diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,19619 +1,2819 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.22624617199557204, + "epoch": 0.03308929974769409, "eval_steps": 500, - "global_step": 28000, + "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 8.080220428413287e-05, - "grad_norm": 120784.5234375, - "learning_rate": 4.040404040404041e-08, - "loss": 6344.0191, + "epoch": 8.272324936923522e-05, + "grad_norm": 290864.625, + "learning_rate": 8.080808080808081e-09, + "loss": 7347.5086, "step": 10 }, { - "epoch": 0.00016160440856826573, - "grad_norm": 246101.4375, - "learning_rate": 8.080808080808082e-08, - "loss": 7230.2391, + "epoch": 0.00016544649873847045, + "grad_norm": 132409.484375, + "learning_rate": 1.6161616161616162e-08, + "loss": 8777.1484, "step": 20 }, { - "epoch": 0.00024240661285239863, - "grad_norm": 220140.828125, - "learning_rate": 1.2121212121212122e-07, - "loss": 7248.1844, + "epoch": 0.0002481697481077057, + "grad_norm": 271826.25, + "learning_rate": 2.4242424242424243e-08, + "loss": 8795.7484, "step": 30 }, { - "epoch": 0.00032320881713653147, - "grad_norm": 468224.40625, - "learning_rate": 1.6161616161616163e-07, - "loss": 8245.5844, + "epoch": 0.0003308929974769409, + "grad_norm": 206802.0625, + "learning_rate": 3.2323232323232324e-08, + "loss": 9771.4797, "step": 40 }, { - "epoch": 0.00040401102142066436, - "grad_norm": 129730.1171875, - "learning_rate": 2.0202020202020202e-07, - "loss": 5464.3164, + "epoch": 0.0004136162468461761, + "grad_norm": 373988.6875, + "learning_rate": 4.040404040404041e-08, + "loss": 9177.75, "step": 50 }, { - "epoch": 0.00048481322570479725, - "grad_norm": 37984.21484375, - "learning_rate": 2.4242424242424244e-07, - "loss": 7551.0562, + "epoch": 0.0004963394962154114, + "grad_norm": 228453.421875, + "learning_rate": 4.8484848484848486e-08, + "loss": 7514.507, "step": 60 }, { - "epoch": 0.0005656154299889301, - "grad_norm": 170187.078125, - "learning_rate": 2.8282828282828283e-07, - "loss": 6856.2141, + "epoch": 0.0005790627455846465, + "grad_norm": 40292.42578125, + "learning_rate": 5.656565656565657e-08, + "loss": 8429.1922, "step": 70 }, { - "epoch": 0.0006464176342730629, - "grad_norm": 64738.05078125, - "learning_rate": 3.2323232323232327e-07, - "loss": 7045.2477, + "epoch": 0.0006617859949538818, + "grad_norm": 118632.5078125, + "learning_rate": 6.464646464646465e-08, + "loss": 10504.1547, "step": 80 }, { - "epoch": 0.0007272198385571959, - "grad_norm": 114463.8515625, - "learning_rate": 3.6363636363636366e-07, - "loss": 5803.459, + "epoch": 0.000744509244323117, + "grad_norm": 243644.078125, + "learning_rate": 7.272727272727274e-08, + "loss": 10121.1352, "step": 90 }, { - "epoch": 0.0008080220428413287, - "grad_norm": 102587.6796875, - "learning_rate": 4.0404040404040405e-07, - "loss": 3976.1211, + "epoch": 0.0008272324936923522, + "grad_norm": 133929.265625, + "learning_rate": 8.080808080808082e-08, + "loss": 6190.8469, "step": 100 }, { - "epoch": 0.0008888242471254616, - "grad_norm": 78112.53125, - "learning_rate": 4.444444444444445e-07, - "loss": 3535.432, + "epoch": 0.0009099557430615875, + "grad_norm": 77254.4375, + "learning_rate": 8.88888888888889e-08, + "loss": 7874.6172, "step": 110 }, { - "epoch": 0.0009696264514095945, - "grad_norm": 100944.5703125, - "learning_rate": 4.848484848484849e-07, - "loss": 4244.4645, + "epoch": 0.0009926789924308227, + "grad_norm": 25441.220703125, + "learning_rate": 9.696969696969697e-08, + "loss": 5206.3652, "step": 120 }, { - "epoch": 0.0010504286556937273, - "grad_norm": 178465.359375, - "learning_rate": 5.252525252525253e-07, - "loss": 5655.2863, + "epoch": 0.0010754022418000579, + "grad_norm": 214560.640625, + "learning_rate": 1.0505050505050506e-07, + "loss": 5062.5816, "step": 130 }, { - "epoch": 0.0011312308599778602, - "grad_norm": 70585.359375, - "learning_rate": 5.656565656565657e-07, - "loss": 3637.5656, + "epoch": 0.001158125491169293, + "grad_norm": 271677.8125, + "learning_rate": 1.1313131313131314e-07, + "loss": 6290.3078, "step": 140 }, { - "epoch": 0.001212033064261993, - "grad_norm": 51223.41015625, - "learning_rate": 6.060606060606061e-07, - "loss": 2629.8635, + "epoch": 0.0012408487405385284, + "grad_norm": 181916.046875, + "learning_rate": 1.2121212121212122e-07, + "loss": 6170.8242, "step": 150 }, { - "epoch": 0.0012928352685461259, - "grad_norm": 53755.01953125, - "learning_rate": 6.464646464646465e-07, - "loss": 4522.2559, + "epoch": 0.0013235719899077636, + "grad_norm": 87781.90625, + "learning_rate": 1.292929292929293e-07, + "loss": 4545.9688, "step": 160 }, { - "epoch": 0.0013736374728302587, - "grad_norm": 72124.0625, - "learning_rate": 6.868686868686869e-07, - "loss": 3395.8777, + "epoch": 0.0014062952392769987, + "grad_norm": 68055.6171875, + "learning_rate": 1.3737373737373738e-07, + "loss": 4016.2902, "step": 170 }, { - "epoch": 0.0014544396771143918, - "grad_norm": 11780.193359375, - "learning_rate": 7.272727272727273e-07, - "loss": 1784.9486, + "epoch": 0.001489018488646234, + "grad_norm": 112705.1328125, + "learning_rate": 1.4545454545454548e-07, + "loss": 4939.2008, "step": 180 }, { - "epoch": 0.0015352418813985246, - "grad_norm": 62803.8828125, - "learning_rate": 7.676767676767678e-07, - "loss": 2220.277, + "epoch": 0.0015717417380154692, + "grad_norm": 22221.66796875, + "learning_rate": 1.5353535353535356e-07, + "loss": 4647.8523, "step": 190 }, { - "epoch": 0.0016160440856826574, - "grad_norm": 118829.34375, - "learning_rate": 8.080808080808081e-07, - "loss": 2133.5133, + "epoch": 0.0016544649873847044, + "grad_norm": 47219.8125, + "learning_rate": 1.6161616161616163e-07, + "loss": 3720.5625, "step": 200 }, { - "epoch": 0.0016968462899667903, - "grad_norm": 7391.849609375, - "learning_rate": 8.484848484848486e-07, - "loss": 1618.4662, + "epoch": 0.0017371882367539398, + "grad_norm": 22008.935546875, + "learning_rate": 1.6969696969696974e-07, + "loss": 3481.5031, "step": 210 }, { - "epoch": 0.0017776484942509231, - "grad_norm": 7889.263671875, - "learning_rate": 8.88888888888889e-07, - "loss": 1578.3401, + "epoch": 0.001819911486123175, + "grad_norm": 48396.8359375, + "learning_rate": 1.777777777777778e-07, + "loss": 3470.4133, "step": 220 }, { - "epoch": 0.001858450698535056, - "grad_norm": 10828.140625, - "learning_rate": 9.292929292929294e-07, - "loss": 1327.2748, + "epoch": 0.00190263473549241, + "grad_norm": 29807.96875, + "learning_rate": 1.858585858585859e-07, + "loss": 4089.068, "step": 230 }, { - "epoch": 0.001939252902819189, - "grad_norm": 15991.2119140625, - "learning_rate": 9.696969696969698e-07, - "loss": 1342.3516, + "epoch": 0.0019853579848616455, + "grad_norm": 110733.6328125, + "learning_rate": 1.9393939393939395e-07, + "loss": 3982.75, "step": 240 }, { - "epoch": 0.0020200551071033216, - "grad_norm": 7872.46484375, - "learning_rate": 1.0101010101010103e-06, - "loss": 1077.598, + "epoch": 0.0020680812342308806, + "grad_norm": 86890.3671875, + "learning_rate": 2.0202020202020205e-07, + "loss": 2655.7609, "step": 250 }, { - "epoch": 0.0021008573113874547, - "grad_norm": 3977.63623046875, - "learning_rate": 1.0505050505050506e-06, - "loss": 835.4583, + "epoch": 0.0021508044836001158, + "grad_norm": 53112.5, + "learning_rate": 2.1010101010101013e-07, + "loss": 2693.0553, "step": 260 }, { - "epoch": 0.0021816595156715873, - "grad_norm": 3596.71923828125, - "learning_rate": 1.090909090909091e-06, - "loss": 766.0804, + "epoch": 0.002233527732969351, + "grad_norm": 32703.505859375, + "learning_rate": 2.181818181818182e-07, + "loss": 1969.2211, "step": 270 }, { - "epoch": 0.0022624617199557204, - "grad_norm": 10194.791015625, - "learning_rate": 1.1313131313131313e-06, - "loss": 777.5695, + "epoch": 0.002316250982338586, + "grad_norm": 63525.1875, + "learning_rate": 2.2626262626262628e-07, + "loss": 2238.1707, "step": 280 }, { - "epoch": 0.0023432639242398534, - "grad_norm": 7022.6103515625, - "learning_rate": 1.1717171717171719e-06, - "loss": 639.281, + "epoch": 0.0023989742317078217, + "grad_norm": 11086.0263671875, + "learning_rate": 2.343434343434344e-07, + "loss": 1978.3666, "step": 290 }, { - "epoch": 0.002424066128523986, - "grad_norm": 2920.81787109375, - "learning_rate": 1.2121212121212122e-06, - "loss": 703.318, + "epoch": 0.002481697481077057, + "grad_norm": 26394.287109375, + "learning_rate": 2.4242424242424244e-07, + "loss": 2122.5781, "step": 300 }, { - "epoch": 0.002504868332808119, - "grad_norm": 4137.4970703125, - "learning_rate": 1.2525252525252527e-06, - "loss": 766.8718, + "epoch": 0.002564420730446292, + "grad_norm": 13995.0908203125, + "learning_rate": 2.505050505050505e-07, + "loss": 2075.9496, "step": 310 }, { - "epoch": 0.0025856705370922517, - "grad_norm": 8379.064453125, - "learning_rate": 1.292929292929293e-06, - "loss": 625.7259, + "epoch": 0.002647143979815527, + "grad_norm": 19229.74609375, + "learning_rate": 2.585858585858586e-07, + "loss": 1616.6876, "step": 320 }, { - "epoch": 0.002666472741376385, - "grad_norm": 2632.939697265625, - "learning_rate": 1.3333333333333334e-06, - "loss": 529.0049, + "epoch": 0.0027298672291847623, + "grad_norm": 37227.74609375, + "learning_rate": 2.666666666666667e-07, + "loss": 1571.374, "step": 330 }, { - "epoch": 0.0027472749456605174, - "grad_norm": 3072.3486328125, - "learning_rate": 1.3737373737373738e-06, - "loss": 557.5195, + "epoch": 0.0028125904785539974, + "grad_norm": 4862.91650390625, + "learning_rate": 2.7474747474747475e-07, + "loss": 1395.156, "step": 340 }, { - "epoch": 0.0028280771499446505, - "grad_norm": 2430.994384765625, - "learning_rate": 1.4141414141414143e-06, - "loss": 548.776, + "epoch": 0.002895313727923233, + "grad_norm": 10541.6279296875, + "learning_rate": 2.828282828282829e-07, + "loss": 1026.758, "step": 350 }, { - "epoch": 0.0029088793542287835, - "grad_norm": 2531.90771484375, - "learning_rate": 1.4545454545454546e-06, - "loss": 602.2291, + "epoch": 0.002978036977292468, + "grad_norm": 6956.09228515625, + "learning_rate": 2.9090909090909096e-07, + "loss": 1170.659, "step": 360 }, { - "epoch": 0.002989681558512916, - "grad_norm": 5374.16552734375, - "learning_rate": 1.4949494949494952e-06, - "loss": 447.6404, + "epoch": 0.0030607602266617033, + "grad_norm": 21385.865234375, + "learning_rate": 2.9898989898989904e-07, + "loss": 1054.5773, "step": 370 }, { - "epoch": 0.003070483762797049, - "grad_norm": 1357.3726806640625, - "learning_rate": 1.5353535353535355e-06, - "loss": 342.7574, + "epoch": 0.0031434834760309385, + "grad_norm": 11527.4638671875, + "learning_rate": 3.070707070707071e-07, + "loss": 953.5405, "step": 380 }, { - "epoch": 0.003151285967081182, - "grad_norm": 1249.0936279296875, - "learning_rate": 1.5757575757575759e-06, - "loss": 562.0835, + "epoch": 0.0032262067254001736, + "grad_norm": 5473.7568359375, + "learning_rate": 3.151515151515152e-07, + "loss": 924.337, "step": 390 }, { - "epoch": 0.003232088171365315, - "grad_norm": 1270.3609619140625, - "learning_rate": 1.6161616161616162e-06, - "loss": 467.299, + "epoch": 0.003308929974769409, + "grad_norm": 4726.9072265625, + "learning_rate": 3.2323232323232327e-07, + "loss": 866.3893, "step": 400 }, { - "epoch": 0.0033128903756494475, - "grad_norm": 2576.8291015625, - "learning_rate": 1.6565656565656565e-06, - "loss": 448.897, + "epoch": 0.003391653224138644, + "grad_norm": 3261.58837890625, + "learning_rate": 3.3131313131313135e-07, + "loss": 701.5376, "step": 410 }, { - "epoch": 0.0033936925799335806, - "grad_norm": 1376.2908935546875, - "learning_rate": 1.6969696969696973e-06, - "loss": 382.242, + "epoch": 0.0034743764735078795, + "grad_norm": 2715.71728515625, + "learning_rate": 3.393939393939395e-07, + "loss": 588.6865, "step": 420 }, { - "epoch": 0.0034744947842177136, - "grad_norm": 1212.802490234375, - "learning_rate": 1.7373737373737376e-06, - "loss": 486.5346, + "epoch": 0.0035570997228771147, + "grad_norm": 4931.7373046875, + "learning_rate": 3.474747474747475e-07, + "loss": 972.5657, "step": 430 }, { - "epoch": 0.0035552969885018462, - "grad_norm": 801.7883911132812, - "learning_rate": 1.777777777777778e-06, - "loss": 452.1537, + "epoch": 0.00363982297224635, + "grad_norm": 4389.5400390625, + "learning_rate": 3.555555555555556e-07, + "loss": 615.0299, "step": 440 }, { - "epoch": 0.0036360991927859793, - "grad_norm": 1590.736572265625, - "learning_rate": 1.818181818181818e-06, - "loss": 449.5157, + "epoch": 0.003722546221615585, + "grad_norm": 4088.788330078125, + "learning_rate": 3.6363636363636366e-07, + "loss": 706.1961, "step": 450 }, { - "epoch": 0.003716901397070112, - "grad_norm": 3368.974853515625, - "learning_rate": 1.8585858585858588e-06, - "loss": 445.9489, + "epoch": 0.00380526947098482, + "grad_norm": 7562.54638671875, + "learning_rate": 3.717171717171718e-07, + "loss": 889.6355, "step": 460 }, { - "epoch": 0.003797703601354245, - "grad_norm": 2164.530517578125, - "learning_rate": 1.8989898989898992e-06, - "loss": 449.8674, + "epoch": 0.0038879927203540553, + "grad_norm": 2788.27978515625, + "learning_rate": 3.7979797979797987e-07, + "loss": 602.9548, "step": 470 }, { - "epoch": 0.003878505805638378, - "grad_norm": 1234.2860107421875, - "learning_rate": 1.9393939393939395e-06, - "loss": 375.5616, + "epoch": 0.003970715969723291, + "grad_norm": 2657.68994140625, + "learning_rate": 3.878787878787879e-07, + "loss": 637.7567, "step": 480 }, { - "epoch": 0.003959308009922511, - "grad_norm": 1191.9912109375, - "learning_rate": 1.9797979797979796e-06, - "loss": 394.9626, + "epoch": 0.004053439219092526, + "grad_norm": 2286.638916015625, + "learning_rate": 3.9595959595959597e-07, + "loss": 632.9385, "step": 490 }, { - "epoch": 0.004040110214206643, - "grad_norm": 2829.901611328125, - "learning_rate": 2.0202020202020206e-06, - "loss": 398.194, + "epoch": 0.004136162468461761, + "grad_norm": 2290.5087890625, + "learning_rate": 4.040404040404041e-07, + "loss": 624.0311, "step": 500 }, { - "epoch": 0.004120912418490777, - "grad_norm": 3573.80322265625, - "learning_rate": 2.0606060606060607e-06, - "loss": 440.5374, + "epoch": 0.004218885717830996, + "grad_norm": 6668.29296875, + "learning_rate": 4.121212121212122e-07, + "loss": 695.544, "step": 510 }, { - "epoch": 0.004201714622774909, - "grad_norm": 991.9559326171875, - "learning_rate": 2.1010101010101013e-06, - "loss": 552.4083, + "epoch": 0.0043016089672002315, + "grad_norm": 1093.6478271484375, + "learning_rate": 4.2020202020202026e-07, + "loss": 679.0598, "step": 520 }, { - "epoch": 0.004282516827059042, - "grad_norm": 2654.274658203125, - "learning_rate": 2.1414141414141414e-06, - "loss": 414.1354, + "epoch": 0.004384332216569467, + "grad_norm": 2005.0426025390625, + "learning_rate": 4.282828282828283e-07, + "loss": 625.4617, "step": 530 }, { - "epoch": 0.004363319031343175, - "grad_norm": 1416.258056640625, - "learning_rate": 2.181818181818182e-06, - "loss": 443.3496, + "epoch": 0.004467055465938702, + "grad_norm": 1259.7838134765625, + "learning_rate": 4.363636363636364e-07, + "loss": 527.8661, "step": 540 }, { - "epoch": 0.004444121235627308, - "grad_norm": 1433.0880126953125, - "learning_rate": 2.2222222222222225e-06, - "loss": 476.5702, + "epoch": 0.004549778715307937, + "grad_norm": 3519.5986328125, + "learning_rate": 4.444444444444445e-07, + "loss": 649.1768, "step": 550 }, { - "epoch": 0.004524923439911441, - "grad_norm": 852.6603393554688, - "learning_rate": 2.2626262626262626e-06, - "loss": 425.1744, + "epoch": 0.004632501964677172, + "grad_norm": 11240.7333984375, + "learning_rate": 4.5252525252525257e-07, + "loss": 575.8672, "step": 560 }, { - "epoch": 0.004605725644195573, - "grad_norm": 1662.8759765625, - "learning_rate": 2.303030303030303e-06, - "loss": 432.2859, + "epoch": 0.004715225214046408, + "grad_norm": 1956.348388671875, + "learning_rate": 4.6060606060606064e-07, + "loss": 551.9122, "step": 570 }, { - "epoch": 0.004686527848479707, - "grad_norm": 1177.7154541015625, - "learning_rate": 2.3434343434343437e-06, - "loss": 472.836, + "epoch": 0.004797948463415643, + "grad_norm": 1233.365478515625, + "learning_rate": 4.686868686868688e-07, + "loss": 544.6224, "step": 580 }, { - "epoch": 0.0047673300527638395, - "grad_norm": 1203.2510986328125, - "learning_rate": 2.383838383838384e-06, - "loss": 400.3267, + "epoch": 0.0048806717127848785, + "grad_norm": 5443.150390625, + "learning_rate": 4.767676767676768e-07, + "loss": 538.1981, "step": 590 }, { - "epoch": 0.004848132257047972, - "grad_norm": 806.4482421875, - "learning_rate": 2.4242424242424244e-06, - "loss": 381.2281, + "epoch": 0.004963394962154114, + "grad_norm": 2709.167236328125, + "learning_rate": 4.848484848484849e-07, + "loss": 520.9679, "step": 600 }, { - "epoch": 0.004928934461332105, - "grad_norm": 1223.798095703125, - "learning_rate": 2.4646464646464645e-06, - "loss": 314.522, + "epoch": 0.005046118211523349, + "grad_norm": 1369.0693359375, + "learning_rate": 4.929292929292929e-07, + "loss": 575.6082, "step": 610 }, { - "epoch": 0.005009736665616238, - "grad_norm": 1220.419189453125, - "learning_rate": 2.5050505050505055e-06, - "loss": 391.7079, + "epoch": 0.005128841460892584, + "grad_norm": 1747.572998046875, + "learning_rate": 5.01010101010101e-07, + "loss": 475.4926, "step": 620 }, { - "epoch": 0.005090538869900371, - "grad_norm": 1846.539306640625, - "learning_rate": 2.5454545454545456e-06, - "loss": 415.6819, + "epoch": 0.005211564710261819, + "grad_norm": 1369.4842529296875, + "learning_rate": 5.090909090909092e-07, + "loss": 561.2948, "step": 630 }, { - "epoch": 0.0051713410741845035, - "grad_norm": 1243.7620849609375, - "learning_rate": 2.585858585858586e-06, - "loss": 437.1002, + "epoch": 0.005294287959631054, + "grad_norm": 914.9776000976562, + "learning_rate": 5.171717171717172e-07, + "loss": 507.4795, "step": 640 }, { - "epoch": 0.005252143278468637, - "grad_norm": 1140.9453125, - "learning_rate": 2.6262626262626263e-06, - "loss": 415.1159, + "epoch": 0.005377011209000289, + "grad_norm": 3704.3095703125, + "learning_rate": 5.252525252525253e-07, + "loss": 548.0711, "step": 650 }, { - "epoch": 0.00533294548275277, - "grad_norm": 4821.37060546875, - "learning_rate": 2.666666666666667e-06, - "loss": 464.5417, + "epoch": 0.005459734458369525, + "grad_norm": 1198.6290283203125, + "learning_rate": 5.333333333333335e-07, + "loss": 435.2668, "step": 660 }, { - "epoch": 0.005413747687036902, - "grad_norm": 1713.6087646484375, - "learning_rate": 2.7070707070707074e-06, - "loss": 375.5785, + "epoch": 0.00554245770773876, + "grad_norm": 2036.1495361328125, + "learning_rate": 5.414141414141415e-07, + "loss": 517.1832, "step": 670 }, { - "epoch": 0.005494549891321035, - "grad_norm": 2620.644287109375, - "learning_rate": 2.7474747474747475e-06, - "loss": 299.2374, + "epoch": 0.005625180957107995, + "grad_norm": 9169.388671875, + "learning_rate": 5.494949494949495e-07, + "loss": 573.4803, "step": 680 }, { - "epoch": 0.005575352095605168, - "grad_norm": 1759.0950927734375, - "learning_rate": 2.787878787878788e-06, - "loss": 415.4854, + "epoch": 0.00570790420647723, + "grad_norm": 1509.4022216796875, + "learning_rate": 5.575757575757576e-07, + "loss": 534.0094, "step": 690 }, { - "epoch": 0.005656154299889301, - "grad_norm": 1536.8719482421875, - "learning_rate": 2.8282828282828286e-06, - "loss": 449.5622, + "epoch": 0.005790627455846466, + "grad_norm": 1220.3211669921875, + "learning_rate": 5.656565656565658e-07, + "loss": 516.8536, "step": 700 }, { - "epoch": 0.0057369565041734336, - "grad_norm": 2095.785400390625, - "learning_rate": 2.8686868686868687e-06, - "loss": 380.9472, + "epoch": 0.005873350705215701, + "grad_norm": 976.4949951171875, + "learning_rate": 5.737373737373738e-07, + "loss": 385.3858, "step": 710 }, { - "epoch": 0.005817758708457567, - "grad_norm": 1478.4825439453125, - "learning_rate": 2.9090909090909093e-06, - "loss": 390.7729, + "epoch": 0.005956073954584936, + "grad_norm": 1088.63916015625, + "learning_rate": 5.818181818181819e-07, + "loss": 468.5048, "step": 720 }, { - "epoch": 0.0058985609127417, - "grad_norm": 1876.679443359375, - "learning_rate": 2.9494949494949494e-06, - "loss": 391.9601, + "epoch": 0.0060387972039541715, + "grad_norm": 2710.740966796875, + "learning_rate": 5.898989898989899e-07, + "loss": 532.3609, "step": 730 }, { - "epoch": 0.005979363117025832, - "grad_norm": 749.0634765625, - "learning_rate": 2.9898989898989904e-06, - "loss": 253.9756, + "epoch": 0.006121520453323407, + "grad_norm": 1125.921630859375, + "learning_rate": 5.979797979797981e-07, + "loss": 583.5362, "step": 740 }, { - "epoch": 0.006060165321309965, - "grad_norm": 906.431396484375, - "learning_rate": 3.0303030303030305e-06, - "loss": 347.2193, + "epoch": 0.006204243702692642, + "grad_norm": 7684.47314453125, + "learning_rate": 6.060606060606061e-07, + "loss": 554.1026, "step": 750 }, { - "epoch": 0.006140967525594098, - "grad_norm": 1034.1180419921875, - "learning_rate": 3.070707070707071e-06, - "loss": 311.8948, + "epoch": 0.006286966952061877, + "grad_norm": 2504.31005859375, + "learning_rate": 6.141414141414142e-07, + "loss": 478.0655, "step": 760 }, { - "epoch": 0.006221769729878231, - "grad_norm": 2706.747802734375, - "learning_rate": 3.111111111111111e-06, - "loss": 329.9375, + "epoch": 0.006369690201431112, + "grad_norm": 4207.77490234375, + "learning_rate": 6.222222222222223e-07, + "loss": 495.5465, "step": 770 }, { - "epoch": 0.006302571934162364, - "grad_norm": 1195.091064453125, - "learning_rate": 3.1515151515151517e-06, - "loss": 303.2004, + "epoch": 0.006452413450800347, + "grad_norm": 1670.755126953125, + "learning_rate": 6.303030303030304e-07, + "loss": 494.9138, "step": 780 }, { - "epoch": 0.006383374138446497, - "grad_norm": 1622.099609375, - "learning_rate": 3.191919191919192e-06, - "loss": 359.3405, + "epoch": 0.0065351367001695825, + "grad_norm": 1391.4010009765625, + "learning_rate": 6.383838383838384e-07, + "loss": 460.6794, "step": 790 }, { - "epoch": 0.00646417634273063, - "grad_norm": 1255.0582275390625, - "learning_rate": 3.2323232323232324e-06, - "loss": 362.8006, + "epoch": 0.006617859949538818, + "grad_norm": 1217.7286376953125, + "learning_rate": 6.464646464646465e-07, + "loss": 579.6249, "step": 800 }, { - "epoch": 0.006544978547014762, - "grad_norm": 1571.966552734375, - "learning_rate": 3.2727272727272733e-06, - "loss": 372.003, + "epoch": 0.006700583198908053, + "grad_norm": 1830.9696044921875, + "learning_rate": 6.545454545454547e-07, + "loss": 559.8664, "step": 810 }, { - "epoch": 0.006625780751298895, - "grad_norm": 1350.624755859375, - "learning_rate": 3.313131313131313e-06, - "loss": 391.2471, + "epoch": 0.006783306448277288, + "grad_norm": 1603.93505859375, + "learning_rate": 6.626262626262627e-07, + "loss": 399.0898, "step": 820 }, { - "epoch": 0.0067065829555830285, - "grad_norm": 1288.0430908203125, - "learning_rate": 3.3535353535353536e-06, - "loss": 381.0373, + "epoch": 0.006866029697646524, + "grad_norm": 1301.9254150390625, + "learning_rate": 6.707070707070708e-07, + "loss": 475.2639, "step": 830 }, { - "epoch": 0.006787385159867161, - "grad_norm": 1756.347900390625, - "learning_rate": 3.3939393939393946e-06, - "loss": 366.8195, + "epoch": 0.006948752947015759, + "grad_norm": 1286.05126953125, + "learning_rate": 6.78787878787879e-07, + "loss": 449.3797, "step": 840 }, { - "epoch": 0.006868187364151294, - "grad_norm": 1164.05224609375, - "learning_rate": 3.4343434343434343e-06, - "loss": 340.2402, + "epoch": 0.007031476196384994, + "grad_norm": 1305.4154052734375, + "learning_rate": 6.868686868686869e-07, + "loss": 458.745, "step": 850 }, { - "epoch": 0.006948989568435427, - "grad_norm": 6004.3291015625, - "learning_rate": 3.4747474747474752e-06, - "loss": 397.7737, + "epoch": 0.007114199445754229, + "grad_norm": 1364.9923095703125, + "learning_rate": 6.94949494949495e-07, + "loss": 350.3186, "step": 860 }, { - "epoch": 0.00702979177271956, - "grad_norm": 1949.6370849609375, - "learning_rate": 3.515151515151515e-06, - "loss": 352.9498, + "epoch": 0.0071969226951234646, + "grad_norm": 1167.2642822265625, + "learning_rate": 7.03030303030303e-07, + "loss": 480.6291, "step": 870 }, { - "epoch": 0.0071105939770036925, - "grad_norm": 823.8360595703125, - "learning_rate": 3.555555555555556e-06, - "loss": 432.1595, + "epoch": 0.0072796459444927, + "grad_norm": 2920.170166015625, + "learning_rate": 7.111111111111112e-07, + "loss": 472.5841, "step": 880 }, { - "epoch": 0.007191396181287825, - "grad_norm": 3512.72607421875, - "learning_rate": 3.5959595959595965e-06, - "loss": 342.5901, + "epoch": 0.007362369193861935, + "grad_norm": 5099.63525390625, + "learning_rate": 7.191919191919193e-07, + "loss": 393.5459, "step": 890 }, { - "epoch": 0.007272198385571959, - "grad_norm": 1352.2506103515625, - "learning_rate": 3.636363636363636e-06, - "loss": 306.887, + "epoch": 0.00744509244323117, + "grad_norm": 1362.518798828125, + "learning_rate": 7.272727272727273e-07, + "loss": 408.5471, "step": 900 }, { - "epoch": 0.007353000589856091, - "grad_norm": 2983.867919921875, - "learning_rate": 3.676767676767677e-06, - "loss": 381.7238, + "epoch": 0.007527815692600405, + "grad_norm": 946.1234130859375, + "learning_rate": 7.353535353535354e-07, + "loss": 424.991, "step": 910 }, { - "epoch": 0.007433802794140224, - "grad_norm": 2423.1806640625, - "learning_rate": 3.7171717171717177e-06, - "loss": 303.3808, + "epoch": 0.00761053894196964, + "grad_norm": 2249.044677734375, + "learning_rate": 7.434343434343436e-07, + "loss": 587.6763, "step": 920 }, { - "epoch": 0.007514604998424357, - "grad_norm": 953.1580810546875, - "learning_rate": 3.757575757575758e-06, - "loss": 380.6255, + "epoch": 0.0076932621913388755, + "grad_norm": 934.8756713867188, + "learning_rate": 7.515151515151516e-07, + "loss": 376.3088, "step": 930 }, { - "epoch": 0.00759540720270849, - "grad_norm": 5818.9609375, - "learning_rate": 3.7979797979797984e-06, - "loss": 372.4149, + "epoch": 0.007775985440708111, + "grad_norm": 1107.6741943359375, + "learning_rate": 7.595959595959597e-07, + "loss": 488.3801, "step": 940 }, { - "epoch": 0.007676209406992623, - "grad_norm": 1317.5467529296875, - "learning_rate": 3.8383838383838385e-06, - "loss": 369.7815, + "epoch": 0.007858708690077347, + "grad_norm": 11725.5283203125, + "learning_rate": 7.676767676767677e-07, + "loss": 529.7495, "step": 950 }, { - "epoch": 0.007757011611276756, - "grad_norm": 929.1298217773438, - "learning_rate": 3.878787878787879e-06, - "loss": 395.9197, + "epoch": 0.007941431939446582, + "grad_norm": 2125.631103515625, + "learning_rate": 7.757575757575758e-07, + "loss": 470.4663, "step": 960 }, { - "epoch": 0.007837813815560889, - "grad_norm": 1342.6861572265625, - "learning_rate": 3.9191919191919196e-06, - "loss": 384.7229, + "epoch": 0.008024155188815817, + "grad_norm": 2510.724609375, + "learning_rate": 7.838383838383839e-07, + "loss": 414.8792, "step": 970 }, { - "epoch": 0.007918616019845021, - "grad_norm": 1253.3720703125, - "learning_rate": 3.959595959595959e-06, - "loss": 302.0778, + "epoch": 0.008106878438185052, + "grad_norm": 4541.27294921875, + "learning_rate": 7.919191919191919e-07, + "loss": 415.6392, "step": 980 }, { - "epoch": 0.007999418224129154, - "grad_norm": 966.1517333984375, - "learning_rate": 4.000000000000001e-06, - "loss": 407.2742, + "epoch": 0.008189601687554287, + "grad_norm": 1016.0551147460938, + "learning_rate": 8.000000000000001e-07, + "loss": 346.6074, "step": 990 }, { - "epoch": 0.008080220428413287, - "grad_norm": 949.992919921875, - "learning_rate": 4.040404040404041e-06, - "loss": 333.9852, + "epoch": 0.008272324936923522, + "grad_norm": 1090.1876220703125, + "learning_rate": 8.080808080808082e-07, + "loss": 344.406, "step": 1000 }, { - "epoch": 0.00816102263269742, - "grad_norm": 2160.320068359375, - "learning_rate": 4.080808080808081e-06, - "loss": 320.1485, + "epoch": 0.008355048186292758, + "grad_norm": 4475.55908203125, + "learning_rate": 8.161616161616162e-07, + "loss": 463.194, "step": 1010 }, { - "epoch": 0.008241824836981554, - "grad_norm": 1784.5238037109375, - "learning_rate": 4.1212121212121215e-06, - "loss": 388.0309, + "epoch": 0.008437771435661993, + "grad_norm": 1601.031982421875, + "learning_rate": 8.242424242424244e-07, + "loss": 392.6079, "step": 1020 }, { - "epoch": 0.008322627041265686, - "grad_norm": 11426.04296875, - "learning_rate": 4.161616161616161e-06, - "loss": 342.7975, + "epoch": 0.008520494685031228, + "grad_norm": 1435.5068359375, + "learning_rate": 8.323232323232324e-07, + "loss": 414.8346, "step": 1030 }, { - "epoch": 0.008403429245549819, - "grad_norm": 1648.6806640625, - "learning_rate": 4.2020202020202026e-06, - "loss": 377.397, + "epoch": 0.008603217934400463, + "grad_norm": 1454.9761962890625, + "learning_rate": 8.404040404040405e-07, + "loss": 401.0342, "step": 1040 }, { - "epoch": 0.008484231449833951, - "grad_norm": 929.9481811523438, - "learning_rate": 4.242424242424243e-06, - "loss": 316.3978, + "epoch": 0.008685941183769698, + "grad_norm": 1803.5755615234375, + "learning_rate": 8.484848484848486e-07, + "loss": 465.8911, "step": 1050 }, { - "epoch": 0.008565033654118084, - "grad_norm": 1129.7996826171875, - "learning_rate": 4.282828282828283e-06, - "loss": 299.9879, + "epoch": 0.008768664433138933, + "grad_norm": 1001.1389770507812, + "learning_rate": 8.565656565656566e-07, + "loss": 519.1788, "step": 1060 }, { - "epoch": 0.008645835858402217, - "grad_norm": 1400.36376953125, - "learning_rate": 4.323232323232323e-06, - "loss": 350.1171, + "epoch": 0.008851387682508169, + "grad_norm": 1133.9569091796875, + "learning_rate": 8.646464646464647e-07, + "loss": 338.6231, "step": 1070 }, { - "epoch": 0.00872663806268635, - "grad_norm": 1833.9061279296875, - "learning_rate": 4.363636363636364e-06, - "loss": 325.1123, + "epoch": 0.008934110931877404, + "grad_norm": 2342.049560546875, + "learning_rate": 8.727272727272728e-07, + "loss": 480.9177, "step": 1080 }, { - "epoch": 0.008807440266970484, - "grad_norm": 1330.033203125, - "learning_rate": 4.4040404040404044e-06, - "loss": 325.3823, + "epoch": 0.009016834181246639, + "grad_norm": 1551.6846923828125, + "learning_rate": 8.808080808080808e-07, + "loss": 422.791, "step": 1090 }, { - "epoch": 0.008888242471254616, - "grad_norm": 910.3088989257812, - "learning_rate": 4.444444444444445e-06, - "loss": 301.4958, + "epoch": 0.009099557430615874, + "grad_norm": 2366.242431640625, + "learning_rate": 8.88888888888889e-07, + "loss": 365.7335, "step": 1100 }, { - "epoch": 0.008969044675538749, - "grad_norm": 1182.816162109375, - "learning_rate": 4.484848484848485e-06, - "loss": 277.6404, + "epoch": 0.00918228067998511, + "grad_norm": 973.9622192382812, + "learning_rate": 8.96969696969697e-07, + "loss": 378.5789, "step": 1110 }, { - "epoch": 0.009049846879822881, - "grad_norm": 4707.03857421875, - "learning_rate": 4.525252525252525e-06, - "loss": 424.5438, + "epoch": 0.009265003929354344, + "grad_norm": 1168.2000732421875, + "learning_rate": 9.050505050505051e-07, + "loss": 373.7866, "step": 1120 }, { - "epoch": 0.009130649084107014, - "grad_norm": 2059.0185546875, - "learning_rate": 4.565656565656566e-06, - "loss": 324.0304, + "epoch": 0.00934772717872358, + "grad_norm": 1147.5294189453125, + "learning_rate": 9.131313131313133e-07, + "loss": 412.2723, "step": 1130 }, { - "epoch": 0.009211451288391147, - "grad_norm": 3679.1044921875, - "learning_rate": 4.606060606060606e-06, - "loss": 303.365, + "epoch": 0.009430450428092816, + "grad_norm": 2847.016357421875, + "learning_rate": 9.212121212121213e-07, + "loss": 492.1345, "step": 1140 }, { - "epoch": 0.00929225349267528, - "grad_norm": 2518.44970703125, - "learning_rate": 4.646464646464647e-06, - "loss": 257.8196, + "epoch": 0.009513173677462052, + "grad_norm": 1180.3143310546875, + "learning_rate": 9.292929292929294e-07, + "loss": 352.6749, "step": 1150 }, { - "epoch": 0.009373055696959414, - "grad_norm": 1017.4381713867188, - "learning_rate": 4.6868686868686874e-06, - "loss": 308.7409, + "epoch": 0.009595896926831287, + "grad_norm": 2237.19189453125, + "learning_rate": 9.373737373737376e-07, + "loss": 461.0708, "step": 1160 }, { - "epoch": 0.009453857901243546, - "grad_norm": 1016.4625244140625, - "learning_rate": 4.727272727272727e-06, - "loss": 309.7148, + "epoch": 0.009678620176200522, + "grad_norm": 6046.27490234375, + "learning_rate": 9.454545454545455e-07, + "loss": 410.0307, "step": 1170 }, { - "epoch": 0.009534660105527679, - "grad_norm": 2000.1339111328125, - "learning_rate": 4.767676767676768e-06, - "loss": 416.5233, + "epoch": 0.009761343425569757, + "grad_norm": 1292.1455078125, + "learning_rate": 9.535353535353536e-07, + "loss": 380.5105, "step": 1180 }, { - "epoch": 0.009615462309811812, - "grad_norm": 1349.24755859375, - "learning_rate": 4.808080808080808e-06, - "loss": 312.0022, + "epoch": 0.009844066674938992, + "grad_norm": 1147.40478515625, + "learning_rate": 9.616161616161617e-07, + "loss": 424.6558, "step": 1190 }, { - "epoch": 0.009696264514095944, - "grad_norm": 605.5498046875, - "learning_rate": 4.848484848484849e-06, - "loss": 373.616, + "epoch": 0.009926789924308227, + "grad_norm": 4430.99072265625, + "learning_rate": 9.696969696969698e-07, + "loss": 410.329, "step": 1200 }, { - "epoch": 0.009777066718380077, - "grad_norm": 1045.7061767578125, - "learning_rate": 4.888888888888889e-06, - "loss": 263.9121, + "epoch": 0.010009513173677462, + "grad_norm": 1044.046142578125, + "learning_rate": 9.77777777777778e-07, + "loss": 296.1801, "step": 1210 }, { - "epoch": 0.00985786892266421, - "grad_norm": 997.4849243164062, - "learning_rate": 4.929292929292929e-06, - "loss": 299.2633, + "epoch": 0.010092236423046698, + "grad_norm": 1115.4732666015625, + "learning_rate": 9.858585858585858e-07, + "loss": 324.9412, "step": 1220 }, { - "epoch": 0.009938671126948344, - "grad_norm": 690.6622314453125, - "learning_rate": 4.96969696969697e-06, - "loss": 216.8853, + "epoch": 0.010174959672415933, + "grad_norm": 1223.4896240234375, + "learning_rate": 9.93939393939394e-07, + "loss": 464.8189, "step": 1230 }, { - "epoch": 0.010019473331232476, - "grad_norm": 1591.2972412109375, - "learning_rate": 5.010101010101011e-06, - "loss": 339.3444, + "epoch": 0.010257682921785168, + "grad_norm": 1541.18408203125, + "learning_rate": 1.002020202020202e-06, + "loss": 378.3653, "step": 1240 }, { - "epoch": 0.010100275535516609, - "grad_norm": 749.3450317382812, - "learning_rate": 5.050505050505051e-06, - "loss": 315.4824, + "epoch": 0.010340406171154403, + "grad_norm": 3600.49951171875, + "learning_rate": 1.01010101010101e-06, + "loss": 420.5468, "step": 1250 }, { - "epoch": 0.010181077739800742, - "grad_norm": 923.2822265625, - "learning_rate": 5.090909090909091e-06, - "loss": 265.8641, + "epoch": 0.010423129420523638, + "grad_norm": 1217.7330322265625, + "learning_rate": 1.0181818181818183e-06, + "loss": 297.3635, "step": 1260 }, { - "epoch": 0.010261879944084874, - "grad_norm": 1161.5048828125, - "learning_rate": 5.131313131313131e-06, - "loss": 255.6592, + "epoch": 0.010505852669892873, + "grad_norm": 938.6069946289062, + "learning_rate": 1.0262626262626264e-06, + "loss": 302.24, "step": 1270 }, { - "epoch": 0.010342682148369007, - "grad_norm": 949.8252563476562, - "learning_rate": 5.171717171717172e-06, - "loss": 350.1398, + "epoch": 0.010588575919262109, + "grad_norm": 1441.960693359375, + "learning_rate": 1.0343434343434344e-06, + "loss": 338.1778, "step": 1280 }, { - "epoch": 0.01042348435265314, - "grad_norm": 1018.5875854492188, - "learning_rate": 5.212121212121213e-06, - "loss": 313.8918, + "epoch": 0.010671299168631344, + "grad_norm": 1427.3394775390625, + "learning_rate": 1.0424242424242426e-06, + "loss": 356.3167, "step": 1290 }, { - "epoch": 0.010504286556937274, - "grad_norm": 3170.640869140625, - "learning_rate": 5.2525252525252526e-06, - "loss": 305.4674, + "epoch": 0.010754022418000579, + "grad_norm": 1234.087646484375, + "learning_rate": 1.0505050505050506e-06, + "loss": 368.8047, "step": 1300 }, { - "epoch": 0.010585088761221407, - "grad_norm": 1600.813720703125, - "learning_rate": 5.292929292929293e-06, - "loss": 331.4024, + "epoch": 0.010836745667369814, + "grad_norm": 3589.67529296875, + "learning_rate": 1.0585858585858587e-06, + "loss": 371.8811, "step": 1310 }, { - "epoch": 0.01066589096550554, - "grad_norm": 1063.93408203125, - "learning_rate": 5.333333333333334e-06, - "loss": 373.562, + "epoch": 0.01091946891673905, + "grad_norm": 1112.5706787109375, + "learning_rate": 1.066666666666667e-06, + "loss": 408.1004, "step": 1320 }, { - "epoch": 0.010746693169789672, - "grad_norm": 665.6146240234375, - "learning_rate": 5.373737373737374e-06, - "loss": 235.0867, + "epoch": 0.011002192166108284, + "grad_norm": 1094.4530029296875, + "learning_rate": 1.0747474747474747e-06, + "loss": 341.6985, "step": 1330 }, { - "epoch": 0.010827495374073804, - "grad_norm": 1380.6151123046875, - "learning_rate": 5.414141414141415e-06, - "loss": 324.2148, + "epoch": 0.01108491541547752, + "grad_norm": 773.48876953125, + "learning_rate": 1.082828282828283e-06, + "loss": 323.1021, "step": 1340 }, { - "epoch": 0.010908297578357937, - "grad_norm": 3627.843994140625, - "learning_rate": 5.4545454545454545e-06, - "loss": 364.2515, + "epoch": 0.011167638664846755, + "grad_norm": 1287.7620849609375, + "learning_rate": 1.090909090909091e-06, + "loss": 359.5697, "step": 1350 }, { - "epoch": 0.01098909978264207, - "grad_norm": 850.1503295898438, - "learning_rate": 5.494949494949495e-06, - "loss": 408.481, + "epoch": 0.01125036191421599, + "grad_norm": 1018.9624633789062, + "learning_rate": 1.098989898989899e-06, + "loss": 347.1984, "step": 1360 }, { - "epoch": 0.011069901986926204, - "grad_norm": 2091.482421875, - "learning_rate": 5.5353535353535355e-06, - "loss": 267.4172, + "epoch": 0.011333085163585225, + "grad_norm": 11387.1826171875, + "learning_rate": 1.1070707070707072e-06, + "loss": 413.8385, "step": 1370 }, { - "epoch": 0.011150704191210337, - "grad_norm": 1496.604248046875, - "learning_rate": 5.575757575757576e-06, - "loss": 295.7435, + "epoch": 0.01141580841295446, + "grad_norm": 1670.8966064453125, + "learning_rate": 1.1151515151515153e-06, + "loss": 401.206, "step": 1380 }, { - "epoch": 0.01123150639549447, - "grad_norm": 2673.033203125, - "learning_rate": 5.616161616161617e-06, - "loss": 251.5225, + "epoch": 0.011498531662323695, + "grad_norm": 1236.286865234375, + "learning_rate": 1.1232323232323233e-06, + "loss": 362.1751, "step": 1390 }, { - "epoch": 0.011312308599778602, - "grad_norm": 1121.48779296875, - "learning_rate": 5.656565656565657e-06, - "loss": 318.3079, + "epoch": 0.011581254911692932, + "grad_norm": 1091.8187255859375, + "learning_rate": 1.1313131313131315e-06, + "loss": 309.3313, "step": 1400 }, { - "epoch": 0.011393110804062734, - "grad_norm": 866.069091796875, - "learning_rate": 5.696969696969697e-06, - "loss": 283.861, + "epoch": 0.011663978161062167, + "grad_norm": 758.419677734375, + "learning_rate": 1.1393939393939395e-06, + "loss": 313.1746, "step": 1410 }, { - "epoch": 0.011473913008346867, - "grad_norm": 1132.9764404296875, - "learning_rate": 5.7373737373737374e-06, - "loss": 312.9168, + "epoch": 0.011746701410431402, + "grad_norm": 729.5891723632812, + "learning_rate": 1.1474747474747476e-06, + "loss": 412.6428, "step": 1420 }, { - "epoch": 0.011554715212631, - "grad_norm": 1029.2037353515625, - "learning_rate": 5.777777777777778e-06, - "loss": 307.1583, + "epoch": 0.011829424659800638, + "grad_norm": 1166.0693359375, + "learning_rate": 1.1555555555555556e-06, + "loss": 341.9258, "step": 1430 }, { - "epoch": 0.011635517416915134, - "grad_norm": 1138.4461669921875, - "learning_rate": 5.8181818181818185e-06, - "loss": 266.8528, + "epoch": 0.011912147909169873, + "grad_norm": 3680.154052734375, + "learning_rate": 1.1636363636363638e-06, + "loss": 379.1943, "step": 1440 }, { - "epoch": 0.011716319621199267, - "grad_norm": 1278.30224609375, - "learning_rate": 5.858585858585859e-06, - "loss": 339.6735, + "epoch": 0.011994871158539108, + "grad_norm": 9175.6337890625, + "learning_rate": 1.1717171717171719e-06, + "loss": 368.5319, "step": 1450 }, { - "epoch": 0.0117971218254834, - "grad_norm": 908.4046630859375, - "learning_rate": 5.898989898989899e-06, - "loss": 331.0705, + "epoch": 0.012077594407908343, + "grad_norm": 1366.3729248046875, + "learning_rate": 1.1797979797979799e-06, + "loss": 345.4706, "step": 1460 }, { - "epoch": 0.011877924029767532, - "grad_norm": 1165.220947265625, - "learning_rate": 5.93939393939394e-06, - "loss": 267.2519, + "epoch": 0.012160317657277578, + "grad_norm": 1024.5416259765625, + "learning_rate": 1.187878787878788e-06, + "loss": 300.8002, "step": 1470 }, { - "epoch": 0.011958726234051665, - "grad_norm": 3219.732177734375, - "learning_rate": 5.979797979797981e-06, - "loss": 485.5872, + "epoch": 0.012243040906646813, + "grad_norm": 1628.740966796875, + "learning_rate": 1.1959595959595961e-06, + "loss": 314.1938, "step": 1480 }, { - "epoch": 0.012039528438335797, - "grad_norm": 1050.6778564453125, - "learning_rate": 6.0202020202020204e-06, - "loss": 325.8213, + "epoch": 0.012325764156016049, + "grad_norm": 887.1139526367188, + "learning_rate": 1.2040404040404042e-06, + "loss": 294.3438, "step": 1490 }, { - "epoch": 0.01212033064261993, - "grad_norm": 1031.156005859375, - "learning_rate": 6.060606060606061e-06, - "loss": 302.1713, + "epoch": 0.012408487405385284, + "grad_norm": 1261.7779541015625, + "learning_rate": 1.2121212121212122e-06, + "loss": 389.399, "step": 1500 }, { - "epoch": 0.012201132846904064, - "grad_norm": 1457.485107421875, - "learning_rate": 6.101010101010101e-06, - "loss": 267.4604, + "epoch": 0.012491210654754519, + "grad_norm": 3676.649169921875, + "learning_rate": 1.2202020202020202e-06, + "loss": 501.5868, "step": 1510 }, { - "epoch": 0.012281935051188197, - "grad_norm": 1509.94091796875, - "learning_rate": 6.141414141414142e-06, - "loss": 368.4667, + "epoch": 0.012573933904123754, + "grad_norm": 1744.2930908203125, + "learning_rate": 1.2282828282828285e-06, + "loss": 438.0144, "step": 1520 }, { - "epoch": 0.01236273725547233, - "grad_norm": 1406.15673828125, - "learning_rate": 6.181818181818183e-06, - "loss": 349.9011, + "epoch": 0.012656657153492989, + "grad_norm": 1423.72509765625, + "learning_rate": 1.2363636363636365e-06, + "loss": 393.2618, "step": 1530 }, { - "epoch": 0.012443539459756462, - "grad_norm": 1459.2613525390625, - "learning_rate": 6.222222222222222e-06, - "loss": 325.6512, + "epoch": 0.012739380402862224, + "grad_norm": 1478.4072265625, + "learning_rate": 1.2444444444444445e-06, + "loss": 469.231, "step": 1540 }, { - "epoch": 0.012524341664040595, - "grad_norm": 3242.78271484375, - "learning_rate": 6.262626262626263e-06, - "loss": 375.0032, + "epoch": 0.01282210365223146, + "grad_norm": 1770.692626953125, + "learning_rate": 1.2525252525252527e-06, + "loss": 322.2968, "step": 1550 }, { - "epoch": 0.012605143868324727, - "grad_norm": 2443.515625, - "learning_rate": 6.303030303030303e-06, - "loss": 379.789, + "epoch": 0.012904826901600695, + "grad_norm": 2104.556640625, + "learning_rate": 1.2606060606060608e-06, + "loss": 404.0126, "step": 1560 }, { - "epoch": 0.01268594607260886, - "grad_norm": 1770.516845703125, - "learning_rate": 6.343434343434344e-06, - "loss": 251.652, + "epoch": 0.01298755015096993, + "grad_norm": 4723.49658203125, + "learning_rate": 1.268686868686869e-06, + "loss": 335.9013, "step": 1570 }, { - "epoch": 0.012766748276892994, - "grad_norm": 1482.888671875, - "learning_rate": 6.383838383838384e-06, - "loss": 324.2996, + "epoch": 0.013070273400339165, + "grad_norm": 1277.7611083984375, + "learning_rate": 1.2767676767676768e-06, + "loss": 351.2187, "step": 1580 }, { - "epoch": 0.012847550481177127, - "grad_norm": 1663.3935546875, - "learning_rate": 6.424242424242424e-06, - "loss": 301.8472, + "epoch": 0.0131529966497084, + "grad_norm": 1605.273681640625, + "learning_rate": 1.2848484848484848e-06, + "loss": 339.6704, "step": 1590 }, { - "epoch": 0.01292835268546126, - "grad_norm": 23524.42578125, - "learning_rate": 6.464646464646465e-06, - "loss": 335.0892, + "epoch": 0.013235719899077635, + "grad_norm": 4790.10888671875, + "learning_rate": 1.292929292929293e-06, + "loss": 382.8409, "step": 1600 }, { - "epoch": 0.013009154889745392, - "grad_norm": 862.7949829101562, - "learning_rate": 6.505050505050505e-06, - "loss": 306.2031, + "epoch": 0.01331844314844687, + "grad_norm": 1960.6728515625, + "learning_rate": 1.301010101010101e-06, + "loss": 401.6021, "step": 1610 }, { - "epoch": 0.013089957094029525, - "grad_norm": 2391.976806640625, - "learning_rate": 6.545454545454547e-06, - "loss": 310.1344, + "epoch": 0.013401166397816106, + "grad_norm": 804.7484741210938, + "learning_rate": 1.3090909090909093e-06, + "loss": 273.4538, "step": 1620 }, { - "epoch": 0.013170759298313657, - "grad_norm": 1260.0029296875, - "learning_rate": 6.5858585858585856e-06, - "loss": 372.1161, + "epoch": 0.01348388964718534, + "grad_norm": 1295.1436767578125, + "learning_rate": 1.3171717171717172e-06, + "loss": 464.695, "step": 1630 }, { - "epoch": 0.01325156150259779, - "grad_norm": 1587.1514892578125, - "learning_rate": 6.626262626262626e-06, - "loss": 351.7325, + "epoch": 0.013566612896554576, + "grad_norm": 1814.4156494140625, + "learning_rate": 1.3252525252525254e-06, + "loss": 263.6925, "step": 1640 }, { - "epoch": 0.013332363706881924, - "grad_norm": 1152.1556396484375, - "learning_rate": 6.666666666666667e-06, - "loss": 373.3706, + "epoch": 0.013649336145923813, + "grad_norm": 916.8629760742188, + "learning_rate": 1.3333333333333334e-06, + "loss": 307.6685, "step": 1650 }, { - "epoch": 0.013413165911166057, - "grad_norm": 1005.15771484375, - "learning_rate": 6.707070707070707e-06, - "loss": 317.8097, + "epoch": 0.013732059395293048, + "grad_norm": 12093.0126953125, + "learning_rate": 1.3414141414141417e-06, + "loss": 356.9628, "step": 1660 }, { - "epoch": 0.01349396811545019, - "grad_norm": 1090.3779296875, - "learning_rate": 6.747474747474749e-06, - "loss": 315.5722, + "epoch": 0.013814782644662283, + "grad_norm": 5740.71142578125, + "learning_rate": 1.3494949494949497e-06, + "loss": 298.0004, "step": 1670 }, { - "epoch": 0.013574770319734322, - "grad_norm": 1011.2723388671875, - "learning_rate": 6.787878787878789e-06, - "loss": 231.6832, + "epoch": 0.013897505894031518, + "grad_norm": 1078.6505126953125, + "learning_rate": 1.357575757575758e-06, + "loss": 340.6031, "step": 1680 }, { - "epoch": 0.013655572524018455, - "grad_norm": 798.0405883789062, - "learning_rate": 6.828282828282828e-06, - "loss": 306.8582, + "epoch": 0.013980229143400753, + "grad_norm": 1931.4029541015625, + "learning_rate": 1.3656565656565657e-06, + "loss": 327.5082, "step": 1690 }, { - "epoch": 0.013736374728302587, - "grad_norm": 855.2308959960938, - "learning_rate": 6.8686868686868685e-06, - "loss": 282.2944, + "epoch": 0.014062952392769988, + "grad_norm": 1247.7496337890625, + "learning_rate": 1.3737373737373738e-06, + "loss": 428.9282, "step": 1700 }, { - "epoch": 0.01381717693258672, - "grad_norm": 1325.1092529296875, - "learning_rate": 6.909090909090909e-06, - "loss": 427.1749, + "epoch": 0.014145675642139224, + "grad_norm": 1388.3594970703125, + "learning_rate": 1.381818181818182e-06, + "loss": 374.589, "step": 1710 }, { - "epoch": 0.013897979136870854, - "grad_norm": 1027.2860107421875, - "learning_rate": 6.9494949494949505e-06, - "loss": 232.57, + "epoch": 0.014228398891508459, + "grad_norm": 1618.57763671875, + "learning_rate": 1.38989898989899e-06, + "loss": 293.1184, "step": 1720 }, { - "epoch": 0.013978781341154987, - "grad_norm": 1047.0118408203125, - "learning_rate": 6.989898989898991e-06, - "loss": 272.4593, + "epoch": 0.014311122140877694, + "grad_norm": 1560.1385498046875, + "learning_rate": 1.3979797979797982e-06, + "loss": 420.2343, "step": 1730 }, { - "epoch": 0.01405958354543912, - "grad_norm": 1453.4931640625, - "learning_rate": 7.03030303030303e-06, - "loss": 287.978, + "epoch": 0.014393845390246929, + "grad_norm": 1528.8956298828125, + "learning_rate": 1.406060606060606e-06, + "loss": 300.293, "step": 1740 }, { - "epoch": 0.014140385749723252, - "grad_norm": 1631.84521484375, - "learning_rate": 7.0707070707070704e-06, - "loss": 222.9585, + "epoch": 0.014476568639616164, + "grad_norm": 1254.3798828125, + "learning_rate": 1.4141414141414143e-06, + "loss": 397.9587, "step": 1750 }, { - "epoch": 0.014221187954007385, - "grad_norm": 1109.9012451171875, - "learning_rate": 7.111111111111112e-06, - "loss": 263.4153, + "epoch": 0.0145592918889854, + "grad_norm": 1065.2459716796875, + "learning_rate": 1.4222222222222223e-06, + "loss": 362.8052, "step": 1760 }, { - "epoch": 0.014301990158291518, - "grad_norm": 1374.5731201171875, - "learning_rate": 7.151515151515152e-06, - "loss": 224.7149, + "epoch": 0.014642015138354635, + "grad_norm": 1418.1273193359375, + "learning_rate": 1.4303030303030306e-06, + "loss": 328.7451, "step": 1770 }, { - "epoch": 0.01438279236257565, - "grad_norm": 700.2552490234375, - "learning_rate": 7.191919191919193e-06, - "loss": 243.2667, + "epoch": 0.01472473838772387, + "grad_norm": 952.8336791992188, + "learning_rate": 1.4383838383838386e-06, + "loss": 336.5588, "step": 1780 }, { - "epoch": 0.014463594566859785, - "grad_norm": 689.7608032226562, - "learning_rate": 7.232323232323232e-06, - "loss": 209.3831, + "epoch": 0.014807461637093105, + "grad_norm": 1089.526123046875, + "learning_rate": 1.4464646464646464e-06, + "loss": 342.3787, "step": 1790 }, { - "epoch": 0.014544396771143917, - "grad_norm": 1072.593994140625, - "learning_rate": 7.272727272727272e-06, - "loss": 233.4359, + "epoch": 0.01489018488646234, + "grad_norm": 1695.346435546875, + "learning_rate": 1.4545454545454546e-06, + "loss": 308.9384, "step": 1800 }, { - "epoch": 0.01462519897542805, - "grad_norm": 783.2555541992188, - "learning_rate": 7.313131313131314e-06, - "loss": 259.1145, + "epoch": 0.014972908135831575, + "grad_norm": 1330.6956787109375, + "learning_rate": 1.4626262626262627e-06, + "loss": 353.8277, "step": 1810 }, { - "epoch": 0.014706001179712182, - "grad_norm": 1364.049560546875, - "learning_rate": 7.353535353535354e-06, - "loss": 244.7944, + "epoch": 0.01505563138520081, + "grad_norm": 1485.4267578125, + "learning_rate": 1.470707070707071e-06, + "loss": 301.8971, "step": 1820 }, { - "epoch": 0.014786803383996315, - "grad_norm": 1947.3690185546875, - "learning_rate": 7.393939393939395e-06, - "loss": 276.0544, + "epoch": 0.015138354634570046, + "grad_norm": 1166.4459228515625, + "learning_rate": 1.478787878787879e-06, + "loss": 288.5717, "step": 1830 }, { - "epoch": 0.014867605588280448, - "grad_norm": 1572.007568359375, - "learning_rate": 7.434343434343435e-06, - "loss": 310.638, + "epoch": 0.01522107788393928, + "grad_norm": 1435.5469970703125, + "learning_rate": 1.4868686868686872e-06, + "loss": 263.6362, "step": 1840 }, { - "epoch": 0.01494840779256458, - "grad_norm": 1419.4361572265625, - "learning_rate": 7.474747474747475e-06, - "loss": 409.634, + "epoch": 0.015303801133308516, + "grad_norm": 807.6802978515625, + "learning_rate": 1.494949494949495e-06, + "loss": 273.6689, "step": 1850 }, { - "epoch": 0.015029209996848715, - "grad_norm": 1355.0137939453125, - "learning_rate": 7.515151515151516e-06, - "loss": 253.9533, + "epoch": 0.015386524382677751, + "grad_norm": 1584.3016357421875, + "learning_rate": 1.5030303030303032e-06, + "loss": 292.5073, "step": 1860 }, { - "epoch": 0.015110012201132847, - "grad_norm": 768.96923828125, - "learning_rate": 7.555555555555556e-06, - "loss": 304.4757, + "epoch": 0.015469247632046986, + "grad_norm": 1016.5427856445312, + "learning_rate": 1.5111111111111112e-06, + "loss": 346.5906, "step": 1870 }, { - "epoch": 0.01519081440541698, - "grad_norm": 959.4989624023438, - "learning_rate": 7.595959595959597e-06, - "loss": 345.032, + "epoch": 0.015551970881416221, + "grad_norm": 1506.8829345703125, + "learning_rate": 1.5191919191919195e-06, + "loss": 345.6554, "step": 1880 }, { - "epoch": 0.015271616609701113, - "grad_norm": 1339.84228515625, - "learning_rate": 7.636363636363638e-06, - "loss": 296.0798, + "epoch": 0.015634694130785456, + "grad_norm": 2679.667236328125, + "learning_rate": 1.5272727272727275e-06, + "loss": 376.285, "step": 1890 }, { - "epoch": 0.015352418813985245, - "grad_norm": 1036.2491455078125, - "learning_rate": 7.676767676767677e-06, - "loss": 300.197, + "epoch": 0.015717417380154693, + "grad_norm": 880.42529296875, + "learning_rate": 1.5353535353535353e-06, + "loss": 353.0767, "step": 1900 }, { - "epoch": 0.015433221018269378, - "grad_norm": 707.27587890625, - "learning_rate": 7.717171717171717e-06, - "loss": 277.1603, + "epoch": 0.015800140629523927, + "grad_norm": 875.5301513671875, + "learning_rate": 1.5434343434343435e-06, + "loss": 266.2872, "step": 1910 }, { - "epoch": 0.015514023222553512, - "grad_norm": 1203.453857421875, - "learning_rate": 7.757575757575758e-06, - "loss": 303.6785, + "epoch": 0.015882863878893164, + "grad_norm": 2602.2958984375, + "learning_rate": 1.5515151515151516e-06, + "loss": 295.0359, "step": 1920 }, { - "epoch": 0.015594825426837645, - "grad_norm": 1172.7025146484375, - "learning_rate": 7.797979797979799e-06, - "loss": 246.9346, + "epoch": 0.015965587128262397, + "grad_norm": 1463.206298828125, + "learning_rate": 1.5595959595959598e-06, + "loss": 237.1384, "step": 1930 }, { - "epoch": 0.015675627631121777, - "grad_norm": 1082.4605712890625, - "learning_rate": 7.838383838383839e-06, - "loss": 296.1259, + "epoch": 0.016048310377631634, + "grad_norm": 3299.566650390625, + "learning_rate": 1.5676767676767678e-06, + "loss": 324.5822, "step": 1940 }, { - "epoch": 0.01575642983540591, - "grad_norm": 1456.42529296875, - "learning_rate": 7.878787878787878e-06, - "loss": 269.7506, + "epoch": 0.016131033627000867, + "grad_norm": 1199.87158203125, + "learning_rate": 1.5757575757575759e-06, + "loss": 328.9643, "step": 1950 }, { - "epoch": 0.015837232039690043, - "grad_norm": 1794.72119140625, - "learning_rate": 7.919191919191919e-06, - "loss": 246.411, + "epoch": 0.016213756876370104, + "grad_norm": 1164.5804443359375, + "learning_rate": 1.5838383838383839e-06, + "loss": 363.4335, "step": 1960 }, { - "epoch": 0.015918034243974175, - "grad_norm": 3157.114990234375, - "learning_rate": 7.959595959595959e-06, - "loss": 256.127, + "epoch": 0.016296480125739338, + "grad_norm": 2804.115478515625, + "learning_rate": 1.5919191919191921e-06, + "loss": 406.4945, "step": 1970 }, { - "epoch": 0.015998836448258308, - "grad_norm": 1361.6929931640625, - "learning_rate": 8.000000000000001e-06, - "loss": 308.1984, + "epoch": 0.016379203375108575, + "grad_norm": 1725.375, + "learning_rate": 1.6000000000000001e-06, + "loss": 391.5324, "step": 1980 }, { - "epoch": 0.01607963865254244, - "grad_norm": 1006.965087890625, - "learning_rate": 8.040404040404042e-06, - "loss": 341.8693, + "epoch": 0.016461926624477808, + "grad_norm": 1829.0943603515625, + "learning_rate": 1.6080808080808084e-06, + "loss": 306.0722, "step": 1990 }, { - "epoch": 0.016160440856826573, - "grad_norm": 1541.38720703125, - "learning_rate": 8.080808080808082e-06, - "loss": 274.4633, + "epoch": 0.016544649873847045, + "grad_norm": 7947.86962890625, + "learning_rate": 1.6161616161616164e-06, + "loss": 314.3117, "step": 2000 }, { - "epoch": 0.016241243061110706, - "grad_norm": 1735.104248046875, - "learning_rate": 8.121212121212121e-06, - "loss": 258.4104, + "epoch": 0.01662737312321628, + "grad_norm": 1242.45361328125, + "learning_rate": 1.6242424242424242e-06, + "loss": 384.1134, "step": 2010 }, { - "epoch": 0.01632204526539484, - "grad_norm": 2176.154052734375, - "learning_rate": 8.161616161616162e-06, - "loss": 314.5508, + "epoch": 0.016710096372585515, + "grad_norm": 1351.123291015625, + "learning_rate": 1.6323232323232325e-06, + "loss": 423.7686, "step": 2020 }, { - "epoch": 0.016402847469678974, - "grad_norm": 1052.533447265625, - "learning_rate": 8.202020202020202e-06, - "loss": 312.927, + "epoch": 0.016792819621954752, + "grad_norm": 932.1561279296875, + "learning_rate": 1.6404040404040405e-06, + "loss": 296.208, "step": 2030 }, { - "epoch": 0.016483649673963107, - "grad_norm": 1208.69189453125, - "learning_rate": 8.242424242424243e-06, - "loss": 254.359, + "epoch": 0.016875542871323986, + "grad_norm": 1880.4168701171875, + "learning_rate": 1.6484848484848487e-06, + "loss": 272.4264, "step": 2040 }, { - "epoch": 0.01656445187824724, - "grad_norm": 1978.5025634765625, - "learning_rate": 8.282828282828283e-06, - "loss": 299.8744, + "epoch": 0.016958266120693222, + "grad_norm": 1092.3876953125, + "learning_rate": 1.6565656565656567e-06, + "loss": 259.9056, "step": 2050 }, { - "epoch": 0.016645254082531372, - "grad_norm": 3605.6494140625, - "learning_rate": 8.323232323232322e-06, - "loss": 298.715, + "epoch": 0.017040989370062456, + "grad_norm": 1369.13671875, + "learning_rate": 1.6646464646464648e-06, + "loss": 310.8441, "step": 2060 }, { - "epoch": 0.016726056286815505, - "grad_norm": 1599.973876953125, - "learning_rate": 8.363636363636365e-06, - "loss": 258.9758, + "epoch": 0.017123712619431693, + "grad_norm": 2696.009765625, + "learning_rate": 1.6727272727272728e-06, + "loss": 384.094, "step": 2070 }, { - "epoch": 0.016806858491099638, - "grad_norm": 1183.451904296875, - "learning_rate": 8.404040404040405e-06, - "loss": 352.8176, + "epoch": 0.017206435868800926, + "grad_norm": 1928.6448974609375, + "learning_rate": 1.680808080808081e-06, + "loss": 289.1333, "step": 2080 }, { - "epoch": 0.01688766069538377, - "grad_norm": 1582.7120361328125, - "learning_rate": 8.444444444444446e-06, - "loss": 309.3484, + "epoch": 0.017289159118170163, + "grad_norm": 2971.282958984375, + "learning_rate": 1.688888888888889e-06, + "loss": 321.7417, "step": 2090 }, { - "epoch": 0.016968462899667903, - "grad_norm": 932.9716796875, - "learning_rate": 8.484848484848486e-06, - "loss": 255.265, + "epoch": 0.017371882367539396, + "grad_norm": 1133.2125244140625, + "learning_rate": 1.6969696969696973e-06, + "loss": 356.1534, "step": 2100 }, { - "epoch": 0.017049265103952035, - "grad_norm": 922.5059814453125, - "learning_rate": 8.525252525252525e-06, - "loss": 253.6583, + "epoch": 0.017454605616908633, + "grad_norm": 4175.73876953125, + "learning_rate": 1.705050505050505e-06, + "loss": 323.3943, "step": 2110 }, { - "epoch": 0.017130067308236168, - "grad_norm": 1196.361083984375, - "learning_rate": 8.565656565656566e-06, - "loss": 323.4844, + "epoch": 0.017537328866277867, + "grad_norm": 1043.2705078125, + "learning_rate": 1.7131313131313131e-06, + "loss": 332.0161, "step": 2120 }, { - "epoch": 0.0172108695125203, - "grad_norm": 1005.9546508789062, - "learning_rate": 8.606060606060606e-06, - "loss": 351.4896, + "epoch": 0.017620052115647104, + "grad_norm": 3233.656005859375, + "learning_rate": 1.7212121212121214e-06, + "loss": 335.2546, "step": 2130 }, { - "epoch": 0.017291671716804433, - "grad_norm": 1585.8636474609375, - "learning_rate": 8.646464646464647e-06, - "loss": 325.0701, + "epoch": 0.017702775365016337, + "grad_norm": 907.5848388671875, + "learning_rate": 1.7292929292929294e-06, + "loss": 295.7123, "step": 2140 }, { - "epoch": 0.017372473921088566, - "grad_norm": 3758.6982421875, - "learning_rate": 8.686868686868687e-06, - "loss": 203.9509, + "epoch": 0.017785498614385574, + "grad_norm": 3490.914306640625, + "learning_rate": 1.7373737373737376e-06, + "loss": 337.3698, "step": 2150 }, { - "epoch": 0.0174532761253727, - "grad_norm": 1602.480224609375, - "learning_rate": 8.727272727272728e-06, - "loss": 294.8089, + "epoch": 0.017868221863754807, + "grad_norm": 1934.054931640625, + "learning_rate": 1.7454545454545456e-06, + "loss": 323.8587, "step": 2160 }, { - "epoch": 0.017534078329656835, - "grad_norm": 1571.1812744140625, - "learning_rate": 8.767676767676768e-06, - "loss": 220.656, + "epoch": 0.017950945113124044, + "grad_norm": 3566.7275390625, + "learning_rate": 1.7535353535353537e-06, + "loss": 411.7551, "step": 2170 }, { - "epoch": 0.017614880533940967, - "grad_norm": 1073.2261962890625, - "learning_rate": 8.808080808080809e-06, - "loss": 221.6837, + "epoch": 0.018033668362493278, + "grad_norm": 1062.2769775390625, + "learning_rate": 1.7616161616161617e-06, + "loss": 316.7116, "step": 2180 }, { - "epoch": 0.0176956827382251, - "grad_norm": 1125.1983642578125, - "learning_rate": 8.84848484848485e-06, - "loss": 286.0061, + "epoch": 0.018116391611862515, + "grad_norm": 726.226806640625, + "learning_rate": 1.76969696969697e-06, + "loss": 310.7243, "step": 2190 }, { - "epoch": 0.017776484942509233, - "grad_norm": 1008.795654296875, - "learning_rate": 8.88888888888889e-06, - "loss": 282.5281, + "epoch": 0.018199114861231748, + "grad_norm": 1068.4840087890625, + "learning_rate": 1.777777777777778e-06, + "loss": 250.8001, "step": 2200 }, { - "epoch": 0.017857287146793365, - "grad_norm": 1810.7894287109375, - "learning_rate": 8.92929292929293e-06, - "loss": 226.4816, + "epoch": 0.018281838110600985, + "grad_norm": 1519.0250244140625, + "learning_rate": 1.7858585858585862e-06, + "loss": 279.3612, "step": 2210 }, { - "epoch": 0.017938089351077498, - "grad_norm": 894.2589721679688, - "learning_rate": 8.96969696969697e-06, - "loss": 251.6401, + "epoch": 0.01836456135997022, + "grad_norm": 1655.1064453125, + "learning_rate": 1.793939393939394e-06, + "loss": 288.0882, "step": 2220 }, { - "epoch": 0.01801889155536163, - "grad_norm": 1232.9827880859375, - "learning_rate": 9.01010101010101e-06, - "loss": 248.7544, + "epoch": 0.018447284609339455, + "grad_norm": 775.435546875, + "learning_rate": 1.802020202020202e-06, + "loss": 240.9307, "step": 2230 }, { - "epoch": 0.018099693759645763, - "grad_norm": 1993.9267578125, - "learning_rate": 9.05050505050505e-06, - "loss": 256.8296, + "epoch": 0.01853000785870869, + "grad_norm": 894.8203125, + "learning_rate": 1.8101010101010103e-06, + "loss": 237.9998, "step": 2240 }, { - "epoch": 0.018180495963929896, - "grad_norm": 967.433837890625, - "learning_rate": 9.090909090909091e-06, - "loss": 245.6321, + "epoch": 0.018612731108077925, + "grad_norm": 1792.8360595703125, + "learning_rate": 1.8181818181818183e-06, + "loss": 259.3344, "step": 2250 }, { - "epoch": 0.018261298168214028, - "grad_norm": 2560.1728515625, - "learning_rate": 9.131313131313132e-06, - "loss": 234.9478, + "epoch": 0.01869545435744716, + "grad_norm": 1591.714111328125, + "learning_rate": 1.8262626262626265e-06, + "loss": 406.4088, "step": 2260 }, { - "epoch": 0.01834210037249816, - "grad_norm": 590.1747436523438, - "learning_rate": 9.171717171717172e-06, - "loss": 236.6796, + "epoch": 0.018778177606816396, + "grad_norm": 1374.53271484375, + "learning_rate": 1.8343434343434343e-06, + "loss": 265.1345, "step": 2270 }, { - "epoch": 0.018422902576782293, - "grad_norm": 1504.942626953125, - "learning_rate": 9.212121212121213e-06, - "loss": 367.3362, + "epoch": 0.018860900856185633, + "grad_norm": 984.4681396484375, + "learning_rate": 1.8424242424242426e-06, + "loss": 302.0863, "step": 2280 }, { - "epoch": 0.018503704781066426, - "grad_norm": 977.7069091796875, - "learning_rate": 9.252525252525253e-06, - "loss": 276.6067, + "epoch": 0.018943624105554866, + "grad_norm": 4490.89306640625, + "learning_rate": 1.8505050505050506e-06, + "loss": 341.7106, "step": 2290 }, { - "epoch": 0.01858450698535056, - "grad_norm": 1384.83203125, - "learning_rate": 9.292929292929294e-06, - "loss": 340.7896, + "epoch": 0.019026347354924103, + "grad_norm": 959.8450317382812, + "learning_rate": 1.8585858585858588e-06, + "loss": 295.2025, "step": 2300 }, { - "epoch": 0.018665309189634695, - "grad_norm": 1302.6343994140625, - "learning_rate": 9.333333333333334e-06, - "loss": 198.1527, + "epoch": 0.019109070604293336, + "grad_norm": 1515.1561279296875, + "learning_rate": 1.8666666666666669e-06, + "loss": 319.7544, "step": 2310 }, { - "epoch": 0.018746111393918827, - "grad_norm": 836.4732666015625, - "learning_rate": 9.373737373737375e-06, - "loss": 209.2565, + "epoch": 0.019191793853662573, + "grad_norm": 1125.7613525390625, + "learning_rate": 1.874747474747475e-06, + "loss": 275.1258, "step": 2320 }, { - "epoch": 0.01882691359820296, - "grad_norm": 1400.0604248046875, - "learning_rate": 9.414141414141414e-06, - "loss": 305.2968, + "epoch": 0.019274517103031807, + "grad_norm": 988.5458984375, + "learning_rate": 1.882828282828283e-06, + "loss": 315.6667, "step": 2330 }, { - "epoch": 0.018907715802487093, - "grad_norm": 1454.78125, - "learning_rate": 9.454545454545454e-06, - "loss": 260.4474, + "epoch": 0.019357240352401044, + "grad_norm": 733.443359375, + "learning_rate": 1.890909090909091e-06, + "loss": 279.1915, "step": 2340 }, { - "epoch": 0.018988518006771225, - "grad_norm": 4679.6923828125, - "learning_rate": 9.494949494949495e-06, - "loss": 247.9314, + "epoch": 0.019439963601770277, + "grad_norm": 1048.5247802734375, + "learning_rate": 1.8989898989898992e-06, + "loss": 289.4639, "step": 2350 }, { - "epoch": 0.019069320211055358, - "grad_norm": 6640.1201171875, - "learning_rate": 9.535353535353535e-06, - "loss": 259.6065, + "epoch": 0.019522686851139514, + "grad_norm": 932.3818969726562, + "learning_rate": 1.9070707070707072e-06, + "loss": 319.7495, "step": 2360 }, { - "epoch": 0.01915012241533949, - "grad_norm": 1564.781982421875, - "learning_rate": 9.575757575757578e-06, - "loss": 245.3016, + "epoch": 0.019605410100508747, + "grad_norm": 1501.8258056640625, + "learning_rate": 1.9151515151515154e-06, + "loss": 286.4116, "step": 2370 }, { - "epoch": 0.019230924619623623, - "grad_norm": 2078.74267578125, - "learning_rate": 9.616161616161616e-06, - "loss": 284.7133, + "epoch": 0.019688133349877984, + "grad_norm": 2098.0625, + "learning_rate": 1.9232323232323235e-06, + "loss": 323.738, "step": 2380 }, { - "epoch": 0.019311726823907756, - "grad_norm": 1441.0360107421875, - "learning_rate": 9.656565656565657e-06, - "loss": 270.4787, + "epoch": 0.019770856599247218, + "grad_norm": 1237.71240234375, + "learning_rate": 1.9313131313131315e-06, + "loss": 290.7764, "step": 2390 }, { - "epoch": 0.01939252902819189, - "grad_norm": 744.0514526367188, - "learning_rate": 9.696969696969698e-06, - "loss": 200.2359, + "epoch": 0.019853579848616455, + "grad_norm": 942.9974365234375, + "learning_rate": 1.9393939393939395e-06, + "loss": 340.0305, "step": 2400 }, { - "epoch": 0.01947333123247602, - "grad_norm": 1168.913818359375, - "learning_rate": 9.737373737373738e-06, - "loss": 269.4149, + "epoch": 0.019936303097985688, + "grad_norm": 1180.946533203125, + "learning_rate": 1.9474747474747475e-06, + "loss": 345.4843, "step": 2410 }, { - "epoch": 0.019554133436760154, - "grad_norm": 3848.146484375, - "learning_rate": 9.777777777777779e-06, - "loss": 250.6479, + "epoch": 0.020019026347354925, + "grad_norm": 727.6260375976562, + "learning_rate": 1.955555555555556e-06, + "loss": 277.7417, "step": 2420 }, { - "epoch": 0.019634935641044286, - "grad_norm": 798.60595703125, - "learning_rate": 9.818181818181818e-06, - "loss": 245.7341, + "epoch": 0.02010174959672416, + "grad_norm": 3368.737548828125, + "learning_rate": 1.9636363636363636e-06, + "loss": 352.602, "step": 2430 }, { - "epoch": 0.01971573784532842, - "grad_norm": 2139.72265625, - "learning_rate": 9.858585858585858e-06, - "loss": 275.653, + "epoch": 0.020184472846093395, + "grad_norm": 1234.5382080078125, + "learning_rate": 1.9717171717171716e-06, + "loss": 236.3891, "step": 2440 }, { - "epoch": 0.019796540049612555, - "grad_norm": 1223.6392822265625, - "learning_rate": 9.898989898989899e-06, - "loss": 271.2685, + "epoch": 0.02026719609546263, + "grad_norm": 1076.5107421875, + "learning_rate": 1.97979797979798e-06, + "loss": 245.4711, "step": 2450 }, { - "epoch": 0.019877342253896688, - "grad_norm": 1007.25439453125, - "learning_rate": 9.93939393939394e-06, - "loss": 235.147, + "epoch": 0.020349919344831865, + "grad_norm": 3362.482421875, + "learning_rate": 1.987878787878788e-06, + "loss": 309.1249, "step": 2460 }, { - "epoch": 0.01995814445818082, - "grad_norm": 1195.012939453125, - "learning_rate": 9.979797979797981e-06, - "loss": 302.4475, + "epoch": 0.0204326425942011, + "grad_norm": 2115.099609375, + "learning_rate": 1.995959595959596e-06, + "loss": 291.4714, "step": 2470 }, { - "epoch": 0.020038946662464953, - "grad_norm": 1530.9473876953125, - "learning_rate": 1.0020202020202022e-05, - "loss": 259.1597, + "epoch": 0.020515365843570336, + "grad_norm": 2554.067626953125, + "learning_rate": 2.004040404040404e-06, + "loss": 398.9078, "step": 2480 }, { - "epoch": 0.020119748866749085, - "grad_norm": 1403.546142578125, - "learning_rate": 1.006060606060606e-05, - "loss": 348.2433, + "epoch": 0.02059808909293957, + "grad_norm": 843.080810546875, + "learning_rate": 2.012121212121212e-06, + "loss": 398.1104, "step": 2490 }, { - "epoch": 0.020200551071033218, - "grad_norm": 1328.4873046875, - "learning_rate": 1.0101010101010101e-05, - "loss": 289.0594, + "epoch": 0.020680812342308806, + "grad_norm": 1753.0567626953125, + "learning_rate": 2.02020202020202e-06, + "loss": 269.6314, "step": 2500 }, { - "epoch": 0.02028135327531735, - "grad_norm": 1171.5048828125, - "learning_rate": 1.0141414141414142e-05, - "loss": 197.3946, + "epoch": 0.02076353559167804, + "grad_norm": 1189.378173828125, + "learning_rate": 2.0282828282828286e-06, + "loss": 263.313, "step": 2510 }, { - "epoch": 0.020362155479601483, - "grad_norm": 1274.6544189453125, - "learning_rate": 1.0181818181818182e-05, - "loss": 295.2945, + "epoch": 0.020846258841047276, + "grad_norm": 972.114990234375, + "learning_rate": 2.0363636363636367e-06, + "loss": 276.6843, "step": 2520 }, { - "epoch": 0.020442957683885616, - "grad_norm": 2023.71337890625, - "learning_rate": 1.0222222222222223e-05, - "loss": 271.9707, + "epoch": 0.02092898209041651, + "grad_norm": 1066.5135498046875, + "learning_rate": 2.0444444444444447e-06, + "loss": 262.9993, "step": 2530 }, { - "epoch": 0.02052375988816975, - "grad_norm": 1765.538818359375, - "learning_rate": 1.0262626262626262e-05, - "loss": 246.6141, + "epoch": 0.021011705339785747, + "grad_norm": 1628.2537841796875, + "learning_rate": 2.0525252525252527e-06, + "loss": 295.3019, "step": 2540 }, { - "epoch": 0.02060456209245388, - "grad_norm": 859.3914794921875, - "learning_rate": 1.0303030303030304e-05, - "loss": 276.1375, + "epoch": 0.021094428589154984, + "grad_norm": 1901.3768310546875, + "learning_rate": 2.0606060606060607e-06, + "loss": 240.3958, "step": 2550 }, { - "epoch": 0.020685364296738014, - "grad_norm": 1024.8955078125, - "learning_rate": 1.0343434343434345e-05, - "loss": 239.6529, + "epoch": 0.021177151838524217, + "grad_norm": 1032.462158203125, + "learning_rate": 2.0686868686868688e-06, + "loss": 262.1258, "step": 2560 }, { - "epoch": 0.020766166501022146, - "grad_norm": 1012.91455078125, - "learning_rate": 1.0383838383838385e-05, - "loss": 199.6656, + "epoch": 0.021259875087893454, + "grad_norm": 1653.3731689453125, + "learning_rate": 2.0767676767676768e-06, + "loss": 279.4022, "step": 2570 }, { - "epoch": 0.02084696870530628, - "grad_norm": 1371.8551025390625, - "learning_rate": 1.0424242424242426e-05, - "loss": 247.0821, + "epoch": 0.021342598337262687, + "grad_norm": 2378.767822265625, + "learning_rate": 2.0848484848484852e-06, + "loss": 374.8497, "step": 2580 }, { - "epoch": 0.020927770909590415, - "grad_norm": 1338.343017578125, - "learning_rate": 1.0464646464646465e-05, - "loss": 260.7634, + "epoch": 0.021425321586631924, + "grad_norm": 1281.254150390625, + "learning_rate": 2.092929292929293e-06, + "loss": 312.5471, "step": 2590 }, { - "epoch": 0.021008573113874548, - "grad_norm": 917.8023071289062, - "learning_rate": 1.0505050505050505e-05, - "loss": 255.2869, + "epoch": 0.021508044836001158, + "grad_norm": 1831.2857666015625, + "learning_rate": 2.1010101010101013e-06, + "loss": 341.7124, "step": 2600 }, { - "epoch": 0.02108937531815868, - "grad_norm": 1167.3427734375, - "learning_rate": 1.0545454545454546e-05, - "loss": 255.108, + "epoch": 0.021590768085370395, + "grad_norm": 5672.0419921875, + "learning_rate": 2.1090909090909093e-06, + "loss": 357.9832, "step": 2610 }, { - "epoch": 0.021170177522442813, - "grad_norm": 1662.1556396484375, - "learning_rate": 1.0585858585858586e-05, - "loss": 304.7596, + "epoch": 0.021673491334739628, + "grad_norm": 1445.11474609375, + "learning_rate": 2.1171717171717173e-06, + "loss": 302.0483, "step": 2620 }, { - "epoch": 0.021250979726726946, - "grad_norm": 1393.7713623046875, - "learning_rate": 1.0626262626262627e-05, - "loss": 298.9948, + "epoch": 0.021756214584108865, + "grad_norm": 908.9044799804688, + "learning_rate": 2.1252525252525254e-06, + "loss": 292.4746, "step": 2630 }, { - "epoch": 0.02133178193101108, - "grad_norm": 4169.89306640625, - "learning_rate": 1.0666666666666667e-05, - "loss": 256.329, + "epoch": 0.0218389378334781, + "grad_norm": 1368.536865234375, + "learning_rate": 2.133333333333334e-06, + "loss": 240.9777, "step": 2640 }, { - "epoch": 0.02141258413529521, - "grad_norm": 961.8526000976562, - "learning_rate": 1.0707070707070708e-05, - "loss": 268.2124, + "epoch": 0.021921661082847335, + "grad_norm": 1520.9796142578125, + "learning_rate": 2.1414141414141414e-06, + "loss": 394.0476, "step": 2650 }, { - "epoch": 0.021493386339579344, - "grad_norm": 2141.90869140625, - "learning_rate": 1.0747474747474748e-05, - "loss": 279.6773, + "epoch": 0.02200438433221657, + "grad_norm": 1067.6510009765625, + "learning_rate": 2.1494949494949494e-06, + "loss": 306.7722, "step": 2660 }, { - "epoch": 0.021574188543863476, - "grad_norm": 1454.77392578125, - "learning_rate": 1.0787878787878789e-05, - "loss": 250.6374, + "epoch": 0.022087107581585805, + "grad_norm": 1416.1361083984375, + "learning_rate": 2.157575757575758e-06, + "loss": 309.6038, "step": 2670 }, { - "epoch": 0.02165499074814761, - "grad_norm": 1119.782958984375, - "learning_rate": 1.082828282828283e-05, - "loss": 299.24, + "epoch": 0.02216983083095504, + "grad_norm": 1390.99169921875, + "learning_rate": 2.165656565656566e-06, + "loss": 245.2896, "step": 2680 }, { - "epoch": 0.02173579295243174, - "grad_norm": 1507.88916015625, - "learning_rate": 1.086868686868687e-05, - "loss": 304.945, + "epoch": 0.022252554080324276, + "grad_norm": 2862.983642578125, + "learning_rate": 2.173737373737374e-06, + "loss": 265.1056, "step": 2690 }, { - "epoch": 0.021816595156715874, - "grad_norm": 1235.4326171875, - "learning_rate": 1.0909090909090909e-05, - "loss": 329.6972, + "epoch": 0.02233527732969351, + "grad_norm": 16051.9775390625, + "learning_rate": 2.181818181818182e-06, + "loss": 358.6156, "step": 2700 }, { - "epoch": 0.021897397361000007, - "grad_norm": 1516.6436767578125, - "learning_rate": 1.094949494949495e-05, - "loss": 276.193, + "epoch": 0.022418000579062746, + "grad_norm": 1990.9959716796875, + "learning_rate": 2.18989898989899e-06, + "loss": 339.7691, "step": 2710 }, { - "epoch": 0.02197819956528414, - "grad_norm": 1332.3309326171875, - "learning_rate": 1.098989898989899e-05, - "loss": 309.7016, + "epoch": 0.02250072382843198, + "grad_norm": 1169.6275634765625, + "learning_rate": 2.197979797979798e-06, + "loss": 278.6436, "step": 2720 }, { - "epoch": 0.022059001769568275, - "grad_norm": 1349.0360107421875, - "learning_rate": 1.103030303030303e-05, - "loss": 423.0254, + "epoch": 0.022583447077801216, + "grad_norm": 981.5079956054688, + "learning_rate": 2.2060606060606064e-06, + "loss": 315.041, "step": 2730 }, { - "epoch": 0.022139803973852408, - "grad_norm": 2262.348876953125, - "learning_rate": 1.1070707070707071e-05, - "loss": 260.9447, + "epoch": 0.02266617032717045, + "grad_norm": 889.581298828125, + "learning_rate": 2.2141414141414145e-06, + "loss": 256.0375, "step": 2740 }, { - "epoch": 0.02222060617813654, - "grad_norm": 1374.3009033203125, - "learning_rate": 1.1111111111111112e-05, - "loss": 283.2908, + "epoch": 0.022748893576539687, + "grad_norm": 1031.492431640625, + "learning_rate": 2.222222222222222e-06, + "loss": 295.4076, "step": 2750 }, { - "epoch": 0.022301408382420673, - "grad_norm": 768.2625732421875, - "learning_rate": 1.1151515151515152e-05, - "loss": 243.0171, + "epoch": 0.02283161682590892, + "grad_norm": 1507.5657958984375, + "learning_rate": 2.2303030303030305e-06, + "loss": 269.3946, "step": 2760 }, { - "epoch": 0.022382210586704806, - "grad_norm": 4175.06396484375, - "learning_rate": 1.1191919191919193e-05, - "loss": 236.5765, + "epoch": 0.022914340075278157, + "grad_norm": 1775.590087890625, + "learning_rate": 2.2383838383838385e-06, + "loss": 268.5406, "step": 2770 }, { - "epoch": 0.02246301279098894, - "grad_norm": 1220.933837890625, - "learning_rate": 1.1232323232323233e-05, - "loss": 254.5181, + "epoch": 0.02299706332464739, + "grad_norm": 1084.8333740234375, + "learning_rate": 2.2464646464646466e-06, + "loss": 248.5186, "step": 2780 }, { - "epoch": 0.02254381499527307, - "grad_norm": 1589.7581787109375, - "learning_rate": 1.1272727272727274e-05, - "loss": 183.8809, + "epoch": 0.023079786574016627, + "grad_norm": 1082.241943359375, + "learning_rate": 2.254545454545455e-06, + "loss": 292.8328, "step": 2790 }, { - "epoch": 0.022624617199557204, - "grad_norm": 1031.3692626953125, - "learning_rate": 1.1313131313131314e-05, - "loss": 246.612, + "epoch": 0.023162509823385864, + "grad_norm": 1380.9404296875, + "learning_rate": 2.262626262626263e-06, + "loss": 284.8308, "step": 2800 }, { - "epoch": 0.022705419403841336, - "grad_norm": 1396.37744140625, - "learning_rate": 1.1353535353535353e-05, - "loss": 224.2803, + "epoch": 0.023245233072755098, + "grad_norm": 2408.440185546875, + "learning_rate": 2.2707070707070706e-06, + "loss": 255.7943, "step": 2810 }, { - "epoch": 0.02278622160812547, - "grad_norm": 1879.5634765625, - "learning_rate": 1.1393939393939394e-05, - "loss": 287.104, + "epoch": 0.023327956322124335, + "grad_norm": 913.3507080078125, + "learning_rate": 2.278787878787879e-06, + "loss": 303.4039, "step": 2820 }, { - "epoch": 0.0228670238124096, - "grad_norm": 1376.0625, - "learning_rate": 1.1434343434343434e-05, - "loss": 284.0272, + "epoch": 0.023410679571493568, + "grad_norm": 1832.5792236328125, + "learning_rate": 2.286868686868687e-06, + "loss": 279.6091, "step": 2830 }, { - "epoch": 0.022947826016693734, - "grad_norm": 682.961181640625, - "learning_rate": 1.1474747474747475e-05, - "loss": 210.3876, + "epoch": 0.023493402820862805, + "grad_norm": 2071.0654296875, + "learning_rate": 2.294949494949495e-06, + "loss": 335.2055, "step": 2840 }, { - "epoch": 0.023028628220977867, - "grad_norm": 1677.2169189453125, - "learning_rate": 1.1515151515151517e-05, - "loss": 326.8289, + "epoch": 0.02357612607023204, + "grad_norm": 1121.7471923828125, + "learning_rate": 2.303030303030303e-06, + "loss": 241.0812, "step": 2850 }, { - "epoch": 0.023109430425262, - "grad_norm": 733.2987060546875, - "learning_rate": 1.1555555555555556e-05, - "loss": 230.43, + "epoch": 0.023658849319601275, + "grad_norm": 2208.54150390625, + "learning_rate": 2.311111111111111e-06, + "loss": 403.2151, "step": 2860 }, { - "epoch": 0.023190232629546136, - "grad_norm": 630.212890625, - "learning_rate": 1.1595959595959597e-05, - "loss": 205.1732, + "epoch": 0.02374157256897051, + "grad_norm": 1187.6275634765625, + "learning_rate": 2.3191919191919192e-06, + "loss": 338.0163, "step": 2870 }, { - "epoch": 0.023271034833830268, - "grad_norm": 1535.36572265625, - "learning_rate": 1.1636363636363637e-05, - "loss": 304.9371, + "epoch": 0.023824295818339745, + "grad_norm": 1252.1275634765625, + "learning_rate": 2.3272727272727277e-06, + "loss": 275.806, "step": 2880 }, { - "epoch": 0.0233518370381144, - "grad_norm": 1065.3255615234375, - "learning_rate": 1.1676767676767678e-05, - "loss": 219.3441, + "epoch": 0.02390701906770898, + "grad_norm": 1646.0361328125, + "learning_rate": 2.3353535353535357e-06, + "loss": 278.4115, "step": 2890 }, { - "epoch": 0.023432639242398533, - "grad_norm": 2319.004638671875, - "learning_rate": 1.1717171717171718e-05, - "loss": 274.9734, + "epoch": 0.023989742317078216, + "grad_norm": 1795.2572021484375, + "learning_rate": 2.3434343434343437e-06, + "loss": 343.9991, "step": 2900 }, { - "epoch": 0.023513441446682666, - "grad_norm": 1362.914794921875, - "learning_rate": 1.1757575757575757e-05, - "loss": 214.0071, + "epoch": 0.02407246556644745, + "grad_norm": 2612.888916015625, + "learning_rate": 2.3515151515151517e-06, + "loss": 280.1369, "step": 2910 }, { - "epoch": 0.0235942436509668, - "grad_norm": 1748.42333984375, - "learning_rate": 1.1797979797979798e-05, - "loss": 247.8715, + "epoch": 0.024155188815816686, + "grad_norm": 2178.56884765625, + "learning_rate": 2.3595959595959598e-06, + "loss": 315.9677, "step": 2920 }, { - "epoch": 0.02367504585525093, - "grad_norm": 1357.4864501953125, - "learning_rate": 1.1838383838383838e-05, - "loss": 367.5896, + "epoch": 0.02423791206518592, + "grad_norm": 2697.52490234375, + "learning_rate": 2.367676767676768e-06, + "loss": 263.7618, "step": 2930 }, { - "epoch": 0.023755848059535064, - "grad_norm": 1028.1129150390625, - "learning_rate": 1.187878787878788e-05, - "loss": 248.4116, + "epoch": 0.024320635314555156, + "grad_norm": 1224.319091796875, + "learning_rate": 2.375757575757576e-06, + "loss": 256.7818, "step": 2940 }, { - "epoch": 0.023836650263819197, - "grad_norm": 1497.2218017578125, - "learning_rate": 1.1919191919191921e-05, - "loss": 309.2206, + "epoch": 0.02440335856392439, + "grad_norm": 871.7252807617188, + "learning_rate": 2.3838383838383843e-06, + "loss": 215.4878, "step": 2950 }, { - "epoch": 0.02391745246810333, - "grad_norm": 830.5894775390625, - "learning_rate": 1.1959595959595961e-05, - "loss": 203.645, + "epoch": 0.024486081813293627, + "grad_norm": 1057.8360595703125, + "learning_rate": 2.3919191919191923e-06, + "loss": 249.1406, "step": 2960 }, { - "epoch": 0.023998254672387462, - "grad_norm": 1067.825439453125, - "learning_rate": 1.2e-05, - "loss": 226.1753, + "epoch": 0.02456880506266286, + "grad_norm": 1478.2142333984375, + "learning_rate": 2.4000000000000003e-06, + "loss": 339.5534, "step": 2970 }, { - "epoch": 0.024079056876671594, - "grad_norm": 1088.949462890625, - "learning_rate": 1.2040404040404041e-05, - "loss": 235.4068, + "epoch": 0.024651528312032097, + "grad_norm": 1867.17529296875, + "learning_rate": 2.4080808080808083e-06, + "loss": 280.7602, "step": 2980 }, { - "epoch": 0.024159859080955727, - "grad_norm": 1378.7232666015625, - "learning_rate": 1.2080808080808081e-05, - "loss": 258.9663, + "epoch": 0.02473425156140133, + "grad_norm": 2076.963623046875, + "learning_rate": 2.4161616161616164e-06, + "loss": 259.5281, "step": 2990 }, { - "epoch": 0.02424066128523986, - "grad_norm": 1665.1182861328125, - "learning_rate": 1.2121212121212122e-05, - "loss": 178.7374, + "epoch": 0.024816974810770567, + "grad_norm": 1526.342041015625, + "learning_rate": 2.4242424242424244e-06, + "loss": 382.3653, "step": 3000 }, { - "epoch": 0.024321463489523996, - "grad_norm": 1254.1650390625, - "learning_rate": 1.2161616161616162e-05, - "loss": 196.0946, + "epoch": 0.0248996980601398, + "grad_norm": 3320.03564453125, + "learning_rate": 2.432323232323233e-06, + "loss": 317.5047, "step": 3010 }, { - "epoch": 0.02440226569380813, - "grad_norm": 1680.0679931640625, - "learning_rate": 1.2202020202020201e-05, - "loss": 232.687, + "epoch": 0.024982421309509038, + "grad_norm": 1523.0584716796875, + "learning_rate": 2.4404040404040404e-06, + "loss": 304.8877, "step": 3020 }, { - "epoch": 0.02448306789809226, - "grad_norm": 1063.8585205078125, - "learning_rate": 1.2242424242424242e-05, - "loss": 247.0947, + "epoch": 0.02506514455887827, + "grad_norm": 751.4943237304688, + "learning_rate": 2.4484848484848485e-06, + "loss": 304.1197, "step": 3030 }, { - "epoch": 0.024563870102376394, - "grad_norm": 3708.090087890625, - "learning_rate": 1.2282828282828284e-05, - "loss": 252.8591, + "epoch": 0.025147867808247508, + "grad_norm": 1342.514404296875, + "learning_rate": 2.456565656565657e-06, + "loss": 270.6673, "step": 3040 }, { - "epoch": 0.024644672306660526, - "grad_norm": 1465.7147216796875, - "learning_rate": 1.2323232323232325e-05, - "loss": 262.5838, + "epoch": 0.025230591057616745, + "grad_norm": 1611.482421875, + "learning_rate": 2.464646464646465e-06, + "loss": 237.246, "step": 3050 }, { - "epoch": 0.02472547451094466, - "grad_norm": 1327.937255859375, - "learning_rate": 1.2363636363636365e-05, - "loss": 363.5593, + "epoch": 0.025313314306985978, + "grad_norm": 1269.6123046875, + "learning_rate": 2.472727272727273e-06, + "loss": 253.5089, "step": 3060 }, { - "epoch": 0.02480627671522879, - "grad_norm": 1344.0130615234375, - "learning_rate": 1.2404040404040404e-05, - "loss": 257.7762, + "epoch": 0.025396037556355215, + "grad_norm": 2930.730712890625, + "learning_rate": 2.480808080808081e-06, + "loss": 236.9861, "step": 3070 }, { - "epoch": 0.024887078919512924, - "grad_norm": 1054.320068359375, - "learning_rate": 1.2444444444444445e-05, - "loss": 247.01, + "epoch": 0.02547876080572445, + "grad_norm": 1325.4600830078125, + "learning_rate": 2.488888888888889e-06, + "loss": 270.6385, "step": 3080 }, { - "epoch": 0.024967881123797057, - "grad_norm": 1068.103515625, - "learning_rate": 1.2484848484848485e-05, - "loss": 248.3725, + "epoch": 0.025561484055093685, + "grad_norm": 1557.753662109375, + "learning_rate": 2.496969696969697e-06, + "loss": 262.9895, "step": 3090 }, { - "epoch": 0.02504868332808119, - "grad_norm": 1697.66259765625, - "learning_rate": 1.2525252525252526e-05, - "loss": 258.5855, + "epoch": 0.02564420730446292, + "grad_norm": 2465.635498046875, + "learning_rate": 2.5050505050505055e-06, + "loss": 273.6902, "step": 3100 }, { - "epoch": 0.025129485532365322, - "grad_norm": 2154.507080078125, - "learning_rate": 1.2565656565656566e-05, - "loss": 324.4505, + "epoch": 0.025726930553832156, + "grad_norm": 938.6351318359375, + "learning_rate": 2.5131313131313135e-06, + "loss": 233.0343, "step": 3110 }, { - "epoch": 0.025210287736649455, - "grad_norm": 1405.111083984375, - "learning_rate": 1.2606060606060607e-05, - "loss": 201.9885, + "epoch": 0.02580965380320139, + "grad_norm": 1145.6470947265625, + "learning_rate": 2.5212121212121215e-06, + "loss": 211.7665, "step": 3120 }, { - "epoch": 0.025291089940933587, - "grad_norm": 1943.2344970703125, - "learning_rate": 1.2646464646464647e-05, - "loss": 279.3126, + "epoch": 0.025892377052570626, + "grad_norm": 834.8644409179688, + "learning_rate": 2.5292929292929296e-06, + "loss": 240.4035, "step": 3130 }, { - "epoch": 0.02537189214521772, - "grad_norm": 1883.3538818359375, - "learning_rate": 1.2686868686868688e-05, - "loss": 305.4725, + "epoch": 0.02597510030193986, + "grad_norm": 1026.044921875, + "learning_rate": 2.537373737373738e-06, + "loss": 218.8928, "step": 3140 }, { - "epoch": 0.025452694349501856, - "grad_norm": 2943.758544921875, - "learning_rate": 1.2727272727272727e-05, - "loss": 282.1084, + "epoch": 0.026057823551309096, + "grad_norm": 2245.337890625, + "learning_rate": 2.5454545454545456e-06, + "loss": 214.8017, "step": 3150 }, { - "epoch": 0.02553349655378599, - "grad_norm": 1242.160400390625, - "learning_rate": 1.2767676767676767e-05, - "loss": 220.8162, + "epoch": 0.02614054680067833, + "grad_norm": 2408.834716796875, + "learning_rate": 2.5535353535353536e-06, + "loss": 332.6547, "step": 3160 }, { - "epoch": 0.02561429875807012, - "grad_norm": 2627.211181640625, - "learning_rate": 1.2808080808080808e-05, - "loss": 303.4214, + "epoch": 0.026223270050047567, + "grad_norm": 872.4295043945312, + "learning_rate": 2.5616161616161617e-06, + "loss": 223.9143, "step": 3170 }, { - "epoch": 0.025695100962354254, - "grad_norm": 1310.1988525390625, - "learning_rate": 1.2848484848484848e-05, - "loss": 259.6753, + "epoch": 0.0263059932994168, + "grad_norm": 2037.6937255859375, + "learning_rate": 2.5696969696969697e-06, + "loss": 298.6558, "step": 3180 }, { - "epoch": 0.025775903166638386, - "grad_norm": 1910.4666748046875, - "learning_rate": 1.2888888888888889e-05, - "loss": 273.7454, + "epoch": 0.026388716548786037, + "grad_norm": 945.5786743164062, + "learning_rate": 2.577777777777778e-06, + "loss": 222.1081, "step": 3190 }, { - "epoch": 0.02585670537092252, - "grad_norm": 740.5687255859375, - "learning_rate": 1.292929292929293e-05, - "loss": 292.6491, + "epoch": 0.02647143979815527, + "grad_norm": 1916.262451171875, + "learning_rate": 2.585858585858586e-06, + "loss": 295.9917, "step": 3200 }, { - "epoch": 0.02593750757520665, - "grad_norm": 1202.99462890625, - "learning_rate": 1.296969696969697e-05, - "loss": 243.2686, + "epoch": 0.026554163047524507, + "grad_norm": 4162.72900390625, + "learning_rate": 2.593939393939394e-06, + "loss": 287.0178, "step": 3210 }, { - "epoch": 0.026018309779490784, - "grad_norm": 2174.3525390625, - "learning_rate": 1.301010101010101e-05, - "loss": 276.3461, + "epoch": 0.02663688629689374, + "grad_norm": 1126.4703369140625, + "learning_rate": 2.602020202020202e-06, + "loss": 255.6603, "step": 3220 }, { - "epoch": 0.026099111983774917, - "grad_norm": 1177.9141845703125, - "learning_rate": 1.3050505050505051e-05, - "loss": 246.7616, + "epoch": 0.026719609546262978, + "grad_norm": 850.2581176757812, + "learning_rate": 2.6101010101010107e-06, + "loss": 273.4949, "step": 3230 }, { - "epoch": 0.02617991418805905, - "grad_norm": 1337.3155517578125, - "learning_rate": 1.3090909090909093e-05, - "loss": 279.5415, + "epoch": 0.02680233279563221, + "grad_norm": 1589.9171142578125, + "learning_rate": 2.6181818181818187e-06, + "loss": 175.5844, "step": 3240 }, { - "epoch": 0.026260716392343182, - "grad_norm": 970.5191040039062, - "learning_rate": 1.3131313131313134e-05, - "loss": 240.0207, + "epoch": 0.026885056045001448, + "grad_norm": 1970.279052734375, + "learning_rate": 2.6262626262626267e-06, + "loss": 254.5701, "step": 3250 }, { - "epoch": 0.026341518596627315, - "grad_norm": 1119.1689453125, - "learning_rate": 1.3171717171717171e-05, - "loss": 242.7696, + "epoch": 0.02696777929437068, + "grad_norm": 1464.1839599609375, + "learning_rate": 2.6343434343434343e-06, + "loss": 236.1957, "step": 3260 }, { - "epoch": 0.026422320800911447, - "grad_norm": 4005.26318359375, - "learning_rate": 1.3212121212121212e-05, - "loss": 288.4158, + "epoch": 0.027050502543739918, + "grad_norm": 1190.6724853515625, + "learning_rate": 2.6424242424242423e-06, + "loss": 305.4469, "step": 3270 }, { - "epoch": 0.02650312300519558, - "grad_norm": 2148.187255859375, - "learning_rate": 1.3252525252525252e-05, - "loss": 224.5919, + "epoch": 0.02713322579310915, + "grad_norm": 1271.06640625, + "learning_rate": 2.6505050505050508e-06, + "loss": 311.8845, "step": 3280 }, { - "epoch": 0.026583925209479716, - "grad_norm": 1367.2222900390625, - "learning_rate": 1.3292929292929293e-05, - "loss": 212.8641, + "epoch": 0.02721594904247839, + "grad_norm": 577.0706787109375, + "learning_rate": 2.658585858585859e-06, + "loss": 259.7036, "step": 3290 }, { - "epoch": 0.02666472741376385, - "grad_norm": 1278.0506591796875, - "learning_rate": 1.3333333333333333e-05, - "loss": 222.3518, + "epoch": 0.027298672291847625, + "grad_norm": 3129.1865234375, + "learning_rate": 2.666666666666667e-06, + "loss": 279.0257, "step": 3300 }, { - "epoch": 0.02674552961804798, - "grad_norm": 1361.965087890625, - "learning_rate": 1.3373737373737374e-05, - "loss": 257.0285, + "epoch": 0.02738139554121686, + "grad_norm": 1438.704833984375, + "learning_rate": 2.674747474747475e-06, + "loss": 256.3712, "step": 3310 }, { - "epoch": 0.026826331822332114, - "grad_norm": 1330.8619384765625, - "learning_rate": 1.3414141414141414e-05, - "loss": 293.7213, + "epoch": 0.027464118790586096, + "grad_norm": 1322.8076171875, + "learning_rate": 2.6828282828282833e-06, + "loss": 218.8963, "step": 3320 }, { - "epoch": 0.026907134026616247, - "grad_norm": 1107.8526611328125, - "learning_rate": 1.3454545454545457e-05, - "loss": 198.6048, + "epoch": 0.02754684203995533, + "grad_norm": 1651.8616943359375, + "learning_rate": 2.6909090909090913e-06, + "loss": 253.5638, "step": 3330 }, { - "epoch": 0.02698793623090038, - "grad_norm": 1048.5009765625, - "learning_rate": 1.3494949494949497e-05, - "loss": 197.4301, + "epoch": 0.027629565289324566, + "grad_norm": 1041.0313720703125, + "learning_rate": 2.6989898989898994e-06, + "loss": 283.8154, "step": 3340 }, { - "epoch": 0.027068738435184512, - "grad_norm": 2446.47119140625, - "learning_rate": 1.3535353535353538e-05, - "loss": 287.2842, + "epoch": 0.0277122885386938, + "grad_norm": 995.6533813476562, + "learning_rate": 2.7070707070707074e-06, + "loss": 282.6185, "step": 3350 }, { - "epoch": 0.027149540639468644, - "grad_norm": 1396.544921875, - "learning_rate": 1.3575757575757578e-05, - "loss": 258.2231, + "epoch": 0.027795011788063036, + "grad_norm": 810.1492309570312, + "learning_rate": 2.715151515151516e-06, + "loss": 211.4098, "step": 3360 }, { - "epoch": 0.027230342843752777, - "grad_norm": 904.5388793945312, - "learning_rate": 1.3616161616161615e-05, - "loss": 376.3808, + "epoch": 0.02787773503743227, + "grad_norm": 3923.459228515625, + "learning_rate": 2.7232323232323234e-06, + "loss": 237.4153, "step": 3370 }, { - "epoch": 0.02731114504803691, - "grad_norm": 1247.5994873046875, - "learning_rate": 1.3656565656565656e-05, - "loss": 318.9619, + "epoch": 0.027960458286801507, + "grad_norm": 1535.9481201171875, + "learning_rate": 2.7313131313131315e-06, + "loss": 267.656, "step": 3380 }, { - "epoch": 0.027391947252321042, - "grad_norm": 1123.76220703125, - "learning_rate": 1.3696969696969697e-05, - "loss": 235.3794, + "epoch": 0.02804318153617074, + "grad_norm": 1280.874267578125, + "learning_rate": 2.7393939393939395e-06, + "loss": 300.4168, "step": 3390 }, { - "epoch": 0.027472749456605175, - "grad_norm": 2435.860107421875, - "learning_rate": 1.3737373737373737e-05, - "loss": 290.7648, + "epoch": 0.028125904785539977, + "grad_norm": 1072.8399658203125, + "learning_rate": 2.7474747474747475e-06, + "loss": 218.5466, "step": 3400 }, { - "epoch": 0.027553551660889308, - "grad_norm": 1895.08642578125, - "learning_rate": 1.3777777777777778e-05, - "loss": 228.9499, + "epoch": 0.02820862803490921, + "grad_norm": 3512.74365234375, + "learning_rate": 2.755555555555556e-06, + "loss": 270.4754, "step": 3410 }, { - "epoch": 0.02763435386517344, - "grad_norm": 1057.9578857421875, - "learning_rate": 1.3818181818181818e-05, - "loss": 199.0401, + "epoch": 0.028291351284278447, + "grad_norm": 1300.0826416015625, + "learning_rate": 2.763636363636364e-06, + "loss": 288.5931, "step": 3420 }, { - "epoch": 0.027715156069457576, - "grad_norm": 1296.09130859375, - "learning_rate": 1.385858585858586e-05, - "loss": 238.6514, + "epoch": 0.02837407453364768, + "grad_norm": 1038.111572265625, + "learning_rate": 2.771717171717172e-06, + "loss": 233.5386, "step": 3430 }, { - "epoch": 0.02779595827374171, - "grad_norm": 3411.5341796875, - "learning_rate": 1.3898989898989901e-05, - "loss": 299.6798, + "epoch": 0.028456797783016918, + "grad_norm": 771.6912231445312, + "learning_rate": 2.77979797979798e-06, + "loss": 340.9047, "step": 3440 }, { - "epoch": 0.02787676047802584, - "grad_norm": 1198.8946533203125, - "learning_rate": 1.3939393939393942e-05, - "loss": 258.3361, + "epoch": 0.02853952103238615, + "grad_norm": 996.7304077148438, + "learning_rate": 2.7878787878787885e-06, + "loss": 290.5169, "step": 3450 }, { - "epoch": 0.027957562682309974, - "grad_norm": 839.223388671875, - "learning_rate": 1.3979797979797982e-05, - "loss": 222.8733, + "epoch": 0.028622244281755388, + "grad_norm": 1329.12109375, + "learning_rate": 2.7959595959595965e-06, + "loss": 328.9499, "step": 3460 }, { - "epoch": 0.028038364886594107, - "grad_norm": 1462.1505126953125, - "learning_rate": 1.402020202020202e-05, - "loss": 206.9738, + "epoch": 0.02870496753112462, + "grad_norm": 3462.29248046875, + "learning_rate": 2.804040404040404e-06, + "loss": 302.2541, "step": 3470 }, { - "epoch": 0.02811916709087824, - "grad_norm": 1066.7890625, - "learning_rate": 1.406060606060606e-05, - "loss": 190.5506, + "epoch": 0.028787690780493858, + "grad_norm": 1650.1275634765625, + "learning_rate": 2.812121212121212e-06, + "loss": 266.2151, "step": 3480 }, { - "epoch": 0.028199969295162372, - "grad_norm": 2753.951171875, - "learning_rate": 1.41010101010101e-05, - "loss": 210.7065, + "epoch": 0.02887041402986309, + "grad_norm": 2317.211669921875, + "learning_rate": 2.82020202020202e-06, + "loss": 231.3138, "step": 3490 }, { - "epoch": 0.028280771499446505, - "grad_norm": 1700.187744140625, - "learning_rate": 1.4141414141414141e-05, - "loss": 239.0851, + "epoch": 0.02895313727923233, + "grad_norm": 1898.203125, + "learning_rate": 2.8282828282828286e-06, + "loss": 310.349, "step": 3500 }, { - "epoch": 0.028361573703730637, - "grad_norm": 1913.965576171875, - "learning_rate": 1.4181818181818181e-05, - "loss": 244.5272, + "epoch": 0.029035860528601562, + "grad_norm": 3188.328125, + "learning_rate": 2.8363636363636366e-06, + "loss": 290.0499, "step": 3510 }, { - "epoch": 0.02844237590801477, - "grad_norm": 1347.6934814453125, - "learning_rate": 1.4222222222222224e-05, - "loss": 219.5849, + "epoch": 0.0291185837779708, + "grad_norm": 1257.7547607421875, + "learning_rate": 2.8444444444444446e-06, + "loss": 243.0518, "step": 3520 }, { - "epoch": 0.028523178112298903, - "grad_norm": 894.3698120117188, - "learning_rate": 1.4262626262626264e-05, - "loss": 261.3107, + "epoch": 0.029201307027340032, + "grad_norm": 910.1399536132812, + "learning_rate": 2.8525252525252527e-06, + "loss": 259.2063, "step": 3530 }, { - "epoch": 0.028603980316583035, - "grad_norm": 649.42236328125, - "learning_rate": 1.4303030303030305e-05, - "loss": 202.5557, + "epoch": 0.02928403027670927, + "grad_norm": 899.6647338867188, + "learning_rate": 2.860606060606061e-06, + "loss": 283.3083, "step": 3540 }, { - "epoch": 0.028684782520867168, - "grad_norm": 824.8812255859375, - "learning_rate": 1.4343434343434345e-05, - "loss": 245.8003, + "epoch": 0.029366753526078506, + "grad_norm": 1044.6268310546875, + "learning_rate": 2.868686868686869e-06, + "loss": 180.7959, "step": 3550 }, { - "epoch": 0.0287655847251513, - "grad_norm": 828.0931396484375, - "learning_rate": 1.4383838383838386e-05, - "loss": 260.2875, + "epoch": 0.02944947677544774, + "grad_norm": 593.9415283203125, + "learning_rate": 2.876767676767677e-06, + "loss": 218.3748, "step": 3560 }, { - "epoch": 0.028846386929435437, - "grad_norm": 1334.4947509765625, - "learning_rate": 1.4424242424242426e-05, - "loss": 232.7898, + "epoch": 0.029532200024816976, + "grad_norm": 1013.0276489257812, + "learning_rate": 2.884848484848485e-06, + "loss": 261.4349, "step": 3570 }, { - "epoch": 0.02892718913371957, - "grad_norm": 1371.1171875, - "learning_rate": 1.4464646464646464e-05, - "loss": 418.4771, + "epoch": 0.02961492327418621, + "grad_norm": 1403.456787109375, + "learning_rate": 2.892929292929293e-06, + "loss": 290.2407, "step": 3580 }, { - "epoch": 0.029007991338003702, - "grad_norm": 18497.5234375, - "learning_rate": 1.4505050505050504e-05, - "loss": 303.3979, + "epoch": 0.029697646523555447, + "grad_norm": 1189.52783203125, + "learning_rate": 2.9010101010101012e-06, + "loss": 268.2149, "step": 3590 }, { - "epoch": 0.029088793542287834, - "grad_norm": 1640.417724609375, - "learning_rate": 1.4545454545454545e-05, - "loss": 246.1203, + "epoch": 0.02978036977292468, + "grad_norm": 6295.24853515625, + "learning_rate": 2.9090909090909093e-06, + "loss": 312.3619, "step": 3600 }, { - "epoch": 0.029169595746571967, - "grad_norm": 866.4635620117188, - "learning_rate": 1.4585858585858587e-05, - "loss": 227.0032, + "epoch": 0.029863093022293917, + "grad_norm": 2903.958251953125, + "learning_rate": 2.9171717171717173e-06, + "loss": 252.4466, "step": 3610 }, { - "epoch": 0.0292503979508561, - "grad_norm": 1206.3389892578125, - "learning_rate": 1.4626262626262627e-05, - "loss": 240.7797, + "epoch": 0.02994581627166315, + "grad_norm": 1029.76708984375, + "learning_rate": 2.9252525252525253e-06, + "loss": 303.6254, "step": 3620 }, { - "epoch": 0.029331200155140232, - "grad_norm": 1930.5679931640625, - "learning_rate": 1.4666666666666668e-05, - "loss": 244.9207, + "epoch": 0.030028539521032387, + "grad_norm": 894.25439453125, + "learning_rate": 2.9333333333333338e-06, + "loss": 212.2832, "step": 3630 }, { - "epoch": 0.029412002359424365, - "grad_norm": 1362.0755615234375, - "learning_rate": 1.4707070707070709e-05, - "loss": 223.5896, + "epoch": 0.03011126277040162, + "grad_norm": 1585.9566650390625, + "learning_rate": 2.941414141414142e-06, + "loss": 304.3559, "step": 3640 }, { - "epoch": 0.029492804563708497, - "grad_norm": 1778.240478515625, - "learning_rate": 1.4747474747474749e-05, - "loss": 233.0804, + "epoch": 0.030193986019770858, + "grad_norm": 1158.5279541015625, + "learning_rate": 2.94949494949495e-06, + "loss": 183.8355, "step": 3650 }, { - "epoch": 0.02957360676799263, - "grad_norm": 1185.7432861328125, - "learning_rate": 1.478787878787879e-05, - "loss": 269.5211, + "epoch": 0.03027670926914009, + "grad_norm": 1179.15673828125, + "learning_rate": 2.957575757575758e-06, + "loss": 377.2282, "step": 3660 }, { - "epoch": 0.029654408972276763, - "grad_norm": 1272.7274169921875, - "learning_rate": 1.482828282828283e-05, - "loss": 256.0854, + "epoch": 0.030359432518509328, + "grad_norm": 3017.98486328125, + "learning_rate": 2.9656565656565663e-06, + "loss": 298.7828, "step": 3670 }, { - "epoch": 0.029735211176560895, - "grad_norm": 3724.482421875, - "learning_rate": 1.486868686868687e-05, - "loss": 220.0564, + "epoch": 0.03044215576787856, + "grad_norm": 1472.5626220703125, + "learning_rate": 2.9737373737373743e-06, + "loss": 227.1208, "step": 3680 }, { - "epoch": 0.029816013380845028, - "grad_norm": 1362.2408447265625, - "learning_rate": 1.4909090909090908e-05, - "loss": 196.9579, + "epoch": 0.030524879017247798, + "grad_norm": 1372.93017578125, + "learning_rate": 2.981818181818182e-06, + "loss": 229.019, "step": 3690 }, { - "epoch": 0.02989681558512916, - "grad_norm": 1142.985107421875, - "learning_rate": 1.494949494949495e-05, - "loss": 298.3712, + "epoch": 0.03060760226661703, + "grad_norm": 1071.147705078125, + "learning_rate": 2.98989898989899e-06, + "loss": 279.2174, "step": 3700 }, { - "epoch": 0.029977617789413297, - "grad_norm": 1711.4461669921875, - "learning_rate": 1.498989898989899e-05, - "loss": 248.673, + "epoch": 0.03069032551598627, + "grad_norm": 1144.8431396484375, + "learning_rate": 2.997979797979798e-06, + "loss": 253.8975, "step": 3710 }, { - "epoch": 0.03005841999369743, - "grad_norm": 1854.973876953125, - "learning_rate": 1.5030303030303031e-05, - "loss": 178.4528, + "epoch": 0.030773048765355502, + "grad_norm": 1811.742431640625, + "learning_rate": 3.0060606060606064e-06, + "loss": 265.0974, "step": 3720 }, { - "epoch": 0.030139222197981562, - "grad_norm": 2415.3564453125, - "learning_rate": 1.5070707070707072e-05, - "loss": 279.0313, + "epoch": 0.03085577201472474, + "grad_norm": 1689.624755859375, + "learning_rate": 3.0141414141414144e-06, + "loss": 351.1895, "step": 3730 }, { - "epoch": 0.030220024402265695, - "grad_norm": 1113.0447998046875, - "learning_rate": 1.5111111111111112e-05, - "loss": 263.2642, + "epoch": 0.030938495264093972, + "grad_norm": 1279.59765625, + "learning_rate": 3.0222222222222225e-06, + "loss": 278.09, "step": 3740 }, { - "epoch": 0.030300826606549827, - "grad_norm": 1523.1632080078125, - "learning_rate": 1.5151515151515153e-05, - "loss": 292.6833, + "epoch": 0.03102121851346321, + "grad_norm": 919.4520263671875, + "learning_rate": 3.0303030303030305e-06, + "loss": 265.7346, "step": 3750 }, { - "epoch": 0.03038162881083396, - "grad_norm": 1810.5382080078125, - "learning_rate": 1.5191919191919193e-05, - "loss": 260.2465, + "epoch": 0.031103941762832443, + "grad_norm": 1729.0640869140625, + "learning_rate": 3.038383838383839e-06, + "loss": 276.5641, "step": 3760 }, { - "epoch": 0.030462431015118092, - "grad_norm": 2051.318115234375, - "learning_rate": 1.5232323232323234e-05, - "loss": 249.5686, + "epoch": 0.03118666501220168, + "grad_norm": 1261.3045654296875, + "learning_rate": 3.046464646464647e-06, + "loss": 271.3371, "step": 3770 }, { - "epoch": 0.030543233219402225, - "grad_norm": 1145.482421875, - "learning_rate": 1.5272727272727276e-05, - "loss": 217.0, + "epoch": 0.03126938826157091, + "grad_norm": 2631.84619140625, + "learning_rate": 3.054545454545455e-06, + "loss": 324.4734, "step": 3780 }, { - "epoch": 0.030624035423686358, - "grad_norm": 1456.9969482421875, - "learning_rate": 1.531313131313131e-05, - "loss": 247.1355, + "epoch": 0.031352111510940146, + "grad_norm": 1084.5216064453125, + "learning_rate": 3.0626262626262626e-06, + "loss": 262.7237, "step": 3790 }, { - "epoch": 0.03070483762797049, - "grad_norm": 2063.9072265625, - "learning_rate": 1.5353535353535354e-05, - "loss": 317.8373, + "epoch": 0.03143483476030939, + "grad_norm": 1438.6495361328125, + "learning_rate": 3.0707070707070706e-06, + "loss": 369.0003, "step": 3800 }, { - "epoch": 0.030785639832254623, - "grad_norm": 1188.59130859375, - "learning_rate": 1.5393939393939393e-05, - "loss": 251.0659, + "epoch": 0.03151755800967862, + "grad_norm": 1348.9072265625, + "learning_rate": 3.078787878787879e-06, + "loss": 279.291, "step": 3810 }, { - "epoch": 0.030866442036538756, - "grad_norm": 542.1653442382812, - "learning_rate": 1.5434343434343435e-05, - "loss": 205.6288, + "epoch": 0.031600281259047854, + "grad_norm": 1024.0528564453125, + "learning_rate": 3.086868686868687e-06, + "loss": 209.3468, "step": 3820 }, { - "epoch": 0.030947244240822888, - "grad_norm": 858.66552734375, - "learning_rate": 1.5474747474747474e-05, - "loss": 261.1724, + "epoch": 0.031683004508417094, + "grad_norm": 957.8978881835938, + "learning_rate": 3.094949494949495e-06, + "loss": 243.3487, "step": 3830 }, { - "epoch": 0.031028046445107024, - "grad_norm": 1392.4208984375, - "learning_rate": 1.5515151515151516e-05, - "loss": 263.898, + "epoch": 0.03176572775778633, + "grad_norm": 2287.8779296875, + "learning_rate": 3.103030303030303e-06, + "loss": 267.3116, "step": 3840 }, { - "epoch": 0.031108848649391157, - "grad_norm": 1089.10888671875, - "learning_rate": 1.5555555555555555e-05, - "loss": 263.4895, + "epoch": 0.03184845100715556, + "grad_norm": 975.30078125, + "learning_rate": 3.1111111111111116e-06, + "loss": 248.6962, "step": 3850 }, { - "epoch": 0.03118965085367529, - "grad_norm": 1323.1083984375, - "learning_rate": 1.5595959595959597e-05, - "loss": 224.5914, + "epoch": 0.031931174256524794, + "grad_norm": 1127.2760009765625, + "learning_rate": 3.1191919191919196e-06, + "loss": 266.4931, "step": 3860 }, { - "epoch": 0.03127045305795942, - "grad_norm": 748.7206420898438, - "learning_rate": 1.563636363636364e-05, - "loss": 185.8181, + "epoch": 0.032013897505894034, + "grad_norm": 1335.565673828125, + "learning_rate": 3.1272727272727276e-06, + "loss": 323.9828, "step": 3870 }, { - "epoch": 0.031351255262243555, - "grad_norm": 1530.072021484375, - "learning_rate": 1.5676767676767678e-05, - "loss": 295.1081, + "epoch": 0.03209662075526327, + "grad_norm": 1591.8233642578125, + "learning_rate": 3.1353535353535357e-06, + "loss": 302.0961, "step": 3880 }, { - "epoch": 0.031432057466527684, - "grad_norm": 1390.1978759765625, - "learning_rate": 1.571717171717172e-05, - "loss": 221.7333, + "epoch": 0.0321793440046325, + "grad_norm": 1125.4427490234375, + "learning_rate": 3.143434343434344e-06, + "loss": 273.0575, "step": 3890 }, { - "epoch": 0.03151285967081182, - "grad_norm": 1188.6934814453125, - "learning_rate": 1.5757575757575756e-05, - "loss": 231.4922, + "epoch": 0.032262067254001735, + "grad_norm": 2481.316650390625, + "learning_rate": 3.1515151515151517e-06, + "loss": 216.6466, "step": 3900 }, { - "epoch": 0.031593661875095956, - "grad_norm": 1810.8616943359375, - "learning_rate": 1.5797979797979798e-05, - "loss": 226.2008, + "epoch": 0.032344790503370975, + "grad_norm": 1574.20654296875, + "learning_rate": 3.1595959595959597e-06, + "loss": 265.3724, "step": 3910 }, { - "epoch": 0.031674464079380085, - "grad_norm": 1351.2021484375, - "learning_rate": 1.5838383838383837e-05, - "loss": 213.8082, + "epoch": 0.03242751375274021, + "grad_norm": 2719.501220703125, + "learning_rate": 3.1676767676767678e-06, + "loss": 257.04, "step": 3920 }, { - "epoch": 0.03175526628366422, - "grad_norm": 1504.8511962890625, - "learning_rate": 1.587878787878788e-05, - "loss": 237.731, + "epoch": 0.03251023700210944, + "grad_norm": 1456.691162109375, + "learning_rate": 3.1757575757575758e-06, + "loss": 268.7118, "step": 3930 }, { - "epoch": 0.03183606848794835, - "grad_norm": 3990.205810546875, - "learning_rate": 1.5919191919191918e-05, - "loss": 308.5875, + "epoch": 0.032592960251478675, + "grad_norm": 1164.705810546875, + "learning_rate": 3.1838383838383842e-06, + "loss": 212.9811, "step": 3940 }, { - "epoch": 0.03191687069223249, - "grad_norm": 1052.4140625, - "learning_rate": 1.595959595959596e-05, - "loss": 173.6135, + "epoch": 0.032675683500847916, + "grad_norm": 1381.672119140625, + "learning_rate": 3.1919191919191923e-06, + "loss": 267.6111, "step": 3950 }, { - "epoch": 0.031997672896516616, - "grad_norm": 818.5986328125, - "learning_rate": 1.6000000000000003e-05, - "loss": 255.928, + "epoch": 0.03275840675021715, + "grad_norm": 1761.30908203125, + "learning_rate": 3.2000000000000003e-06, + "loss": 218.0799, "step": 3960 }, { - "epoch": 0.03207847510080075, - "grad_norm": 3015.482666015625, - "learning_rate": 1.604040404040404e-05, - "loss": 246.6157, + "epoch": 0.03284112999958638, + "grad_norm": 1165.8375244140625, + "learning_rate": 3.2080808080808083e-06, + "loss": 264.5883, "step": 3970 }, { - "epoch": 0.03215927730508488, - "grad_norm": 1520.350341796875, - "learning_rate": 1.6080808080808084e-05, - "loss": 294.5478, + "epoch": 0.032923853248955616, + "grad_norm": 1598.2080078125, + "learning_rate": 3.2161616161616168e-06, + "loss": 205.0413, "step": 3980 }, { - "epoch": 0.03224007950936902, - "grad_norm": 1362.8385009765625, - "learning_rate": 1.6121212121212123e-05, - "loss": 263.2382, + "epoch": 0.033006576498324856, + "grad_norm": 911.1261596679688, + "learning_rate": 3.2242424242424248e-06, + "loss": 276.2504, "step": 3990 }, { - "epoch": 0.032320881713653146, - "grad_norm": 1330.2135009765625, - "learning_rate": 1.6161616161616165e-05, - "loss": 227.9952, + "epoch": 0.03308929974769409, + "grad_norm": 1438.65869140625, + "learning_rate": 3.232323232323233e-06, + "loss": 251.1899, "step": 4000 - }, - { - "epoch": 0.03240168391793728, - "grad_norm": 2001.479248046875, - "learning_rate": 1.62020202020202e-05, - "loss": 373.7298, - "step": 4010 - }, - { - "epoch": 0.03248248612222141, - "grad_norm": 670.8789672851562, - "learning_rate": 1.6242424242424243e-05, - "loss": 252.2481, - "step": 4020 - }, - { - "epoch": 0.03256328832650555, - "grad_norm": 1504.35205078125, - "learning_rate": 1.628282828282828e-05, - "loss": 259.7328, - "step": 4030 - }, - { - "epoch": 0.03264409053078968, - "grad_norm": 1177.47509765625, - "learning_rate": 1.6323232323232324e-05, - "loss": 220.6592, - "step": 4040 - }, - { - "epoch": 0.03272489273507381, - "grad_norm": 889.9537353515625, - "learning_rate": 1.6363636363636366e-05, - "loss": 210.0868, - "step": 4050 - }, - { - "epoch": 0.03280569493935795, - "grad_norm": 1655.767333984375, - "learning_rate": 1.6404040404040405e-05, - "loss": 247.7082, - "step": 4060 - }, - { - "epoch": 0.03288649714364208, - "grad_norm": 1741.26416015625, - "learning_rate": 1.6444444444444447e-05, - "loss": 213.1305, - "step": 4070 - }, - { - "epoch": 0.032967299347926214, - "grad_norm": 1701.3470458984375, - "learning_rate": 1.6484848484848486e-05, - "loss": 197.4172, - "step": 4080 - }, - { - "epoch": 0.03304810155221034, - "grad_norm": 1241.48876953125, - "learning_rate": 1.6525252525252528e-05, - "loss": 200.895, - "step": 4090 - }, - { - "epoch": 0.03312890375649448, - "grad_norm": 4305.5234375, - "learning_rate": 1.6565656565656567e-05, - "loss": 270.1561, - "step": 4100 - }, - { - "epoch": 0.03320970596077861, - "grad_norm": 1233.9559326171875, - "learning_rate": 1.6606060606060606e-05, - "loss": 234.697, - "step": 4110 - }, - { - "epoch": 0.033290508165062745, - "grad_norm": 1864.9722900390625, - "learning_rate": 1.6646464646464645e-05, - "loss": 207.7519, - "step": 4120 - }, - { - "epoch": 0.033371310369346874, - "grad_norm": 696.45654296875, - "learning_rate": 1.6686868686868687e-05, - "loss": 260.9977, - "step": 4130 - }, - { - "epoch": 0.03345211257363101, - "grad_norm": 1083.8914794921875, - "learning_rate": 1.672727272727273e-05, - "loss": 296.5648, - "step": 4140 - }, - { - "epoch": 0.03353291477791514, - "grad_norm": 787.8980102539062, - "learning_rate": 1.6767676767676768e-05, - "loss": 252.0068, - "step": 4150 - }, - { - "epoch": 0.033613716982199275, - "grad_norm": 3963.899658203125, - "learning_rate": 1.680808080808081e-05, - "loss": 239.0976, - "step": 4160 - }, - { - "epoch": 0.033694519186483404, - "grad_norm": 1345.8841552734375, - "learning_rate": 1.684848484848485e-05, - "loss": 200.5568, - "step": 4170 - }, - { - "epoch": 0.03377532139076754, - "grad_norm": 1667.1441650390625, - "learning_rate": 1.688888888888889e-05, - "loss": 246.528, - "step": 4180 - }, - { - "epoch": 0.033856123595051676, - "grad_norm": 941.3829956054688, - "learning_rate": 1.692929292929293e-05, - "loss": 188.0032, - "step": 4190 - }, - { - "epoch": 0.033936925799335806, - "grad_norm": 2224.00048828125, - "learning_rate": 1.6969696969696972e-05, - "loss": 233.2688, - "step": 4200 - }, - { - "epoch": 0.03401772800361994, - "grad_norm": 990.577880859375, - "learning_rate": 1.701010101010101e-05, - "loss": 229.0408, - "step": 4210 - }, - { - "epoch": 0.03409853020790407, - "grad_norm": 1741.591064453125, - "learning_rate": 1.705050505050505e-05, - "loss": 210.8973, - "step": 4220 - }, - { - "epoch": 0.03417933241218821, - "grad_norm": 1565.2149658203125, - "learning_rate": 1.7090909090909092e-05, - "loss": 172.9691, - "step": 4230 - }, - { - "epoch": 0.034260134616472336, - "grad_norm": 1411.6668701171875, - "learning_rate": 1.713131313131313e-05, - "loss": 223.8018, - "step": 4240 - }, - { - "epoch": 0.03434093682075647, - "grad_norm": 849.447998046875, - "learning_rate": 1.7171717171717173e-05, - "loss": 277.72, - "step": 4250 - }, - { - "epoch": 0.0344217390250406, - "grad_norm": 1456.3353271484375, - "learning_rate": 1.7212121212121212e-05, - "loss": 269.1795, - "step": 4260 - }, - { - "epoch": 0.03450254122932474, - "grad_norm": 2039.048583984375, - "learning_rate": 1.7252525252525255e-05, - "loss": 203.6644, - "step": 4270 - }, - { - "epoch": 0.03458334343360887, - "grad_norm": 1037.1063232421875, - "learning_rate": 1.7292929292929293e-05, - "loss": 268.1442, - "step": 4280 - }, - { - "epoch": 0.034664145637893, - "grad_norm": 1481.98095703125, - "learning_rate": 1.7333333333333336e-05, - "loss": 246.1609, - "step": 4290 - }, - { - "epoch": 0.03474494784217713, - "grad_norm": 1042.147216796875, - "learning_rate": 1.7373737373737375e-05, - "loss": 360.0711, - "step": 4300 - }, - { - "epoch": 0.03482575004646127, - "grad_norm": 1008.8258666992188, - "learning_rate": 1.7414141414141417e-05, - "loss": 254.1684, - "step": 4310 - }, - { - "epoch": 0.0349065522507454, - "grad_norm": 1818.73681640625, - "learning_rate": 1.7454545454545456e-05, - "loss": 248.8469, - "step": 4320 - }, - { - "epoch": 0.03498735445502953, - "grad_norm": 2598.832763671875, - "learning_rate": 1.7494949494949494e-05, - "loss": 215.4962, - "step": 4330 - }, - { - "epoch": 0.03506815665931367, - "grad_norm": 5505.1572265625, - "learning_rate": 1.7535353535353537e-05, - "loss": 206.1825, - "step": 4340 - }, - { - "epoch": 0.0351489588635978, - "grad_norm": 872.9111328125, - "learning_rate": 1.7575757575757576e-05, - "loss": 226.9096, - "step": 4350 - }, - { - "epoch": 0.035229761067881935, - "grad_norm": 1309.483154296875, - "learning_rate": 1.7616161616161618e-05, - "loss": 347.4825, - "step": 4360 - }, - { - "epoch": 0.035310563272166064, - "grad_norm": 1847.357666015625, - "learning_rate": 1.7656565656565657e-05, - "loss": 283.7126, - "step": 4370 - }, - { - "epoch": 0.0353913654764502, - "grad_norm": 1132.7510986328125, - "learning_rate": 1.76969696969697e-05, - "loss": 238.7522, - "step": 4380 - }, - { - "epoch": 0.03547216768073433, - "grad_norm": 1338.4906005859375, - "learning_rate": 1.7737373737373738e-05, - "loss": 206.4677, - "step": 4390 - }, - { - "epoch": 0.035552969885018465, - "grad_norm": 889.9144897460938, - "learning_rate": 1.777777777777778e-05, - "loss": 218.258, - "step": 4400 - }, - { - "epoch": 0.035633772089302594, - "grad_norm": 1081.747314453125, - "learning_rate": 1.781818181818182e-05, - "loss": 227.4267, - "step": 4410 - }, - { - "epoch": 0.03571457429358673, - "grad_norm": 1337.2747802734375, - "learning_rate": 1.785858585858586e-05, - "loss": 216.5905, - "step": 4420 - }, - { - "epoch": 0.03579537649787086, - "grad_norm": 1070.0733642578125, - "learning_rate": 1.78989898989899e-05, - "loss": 244.2413, - "step": 4430 - }, - { - "epoch": 0.035876178702154995, - "grad_norm": 2713.52392578125, - "learning_rate": 1.793939393939394e-05, - "loss": 214.338, - "step": 4440 - }, - { - "epoch": 0.035956980906439125, - "grad_norm": 1579.244873046875, - "learning_rate": 1.797979797979798e-05, - "loss": 236.9733, - "step": 4450 - }, - { - "epoch": 0.03603778311072326, - "grad_norm": 1429.0421142578125, - "learning_rate": 1.802020202020202e-05, - "loss": 210.1912, - "step": 4460 - }, - { - "epoch": 0.0361185853150074, - "grad_norm": 1236.484375, - "learning_rate": 1.8060606060606062e-05, - "loss": 209.4003, - "step": 4470 - }, - { - "epoch": 0.036199387519291526, - "grad_norm": 2510.634521484375, - "learning_rate": 1.81010101010101e-05, - "loss": 261.3127, - "step": 4480 - }, - { - "epoch": 0.03628018972357566, - "grad_norm": 697.04345703125, - "learning_rate": 1.8141414141414143e-05, - "loss": 202.3308, - "step": 4490 - }, - { - "epoch": 0.03636099192785979, - "grad_norm": 1664.605712890625, - "learning_rate": 1.8181818181818182e-05, - "loss": 230.7549, - "step": 4500 - }, - { - "epoch": 0.03644179413214393, - "grad_norm": 1968.6279296875, - "learning_rate": 1.8222222222222224e-05, - "loss": 222.0125, - "step": 4510 - }, - { - "epoch": 0.036522596336428056, - "grad_norm": 1813.247314453125, - "learning_rate": 1.8262626262626263e-05, - "loss": 206.1146, - "step": 4520 - }, - { - "epoch": 0.03660339854071219, - "grad_norm": 1681.3162841796875, - "learning_rate": 1.8303030303030305e-05, - "loss": 281.5203, - "step": 4530 - }, - { - "epoch": 0.03668420074499632, - "grad_norm": 813.0327758789062, - "learning_rate": 1.8343434343434344e-05, - "loss": 241.7396, - "step": 4540 - }, - { - "epoch": 0.03676500294928046, - "grad_norm": 1714.4927978515625, - "learning_rate": 1.8383838383838383e-05, - "loss": 229.0337, - "step": 4550 - }, - { - "epoch": 0.03684580515356459, - "grad_norm": 1173.26318359375, - "learning_rate": 1.8424242424242425e-05, - "loss": 167.8814, - "step": 4560 - }, - { - "epoch": 0.03692660735784872, - "grad_norm": 1044.22509765625, - "learning_rate": 1.8464646464646464e-05, - "loss": 181.4134, - "step": 4570 - }, - { - "epoch": 0.03700740956213285, - "grad_norm": 1544.4964599609375, - "learning_rate": 1.8505050505050506e-05, - "loss": 264.1711, - "step": 4580 - }, - { - "epoch": 0.03708821176641699, - "grad_norm": 3204.8271484375, - "learning_rate": 1.8545454545454545e-05, - "loss": 209.0515, - "step": 4590 - }, - { - "epoch": 0.03716901397070112, - "grad_norm": 1948.9998779296875, - "learning_rate": 1.8585858585858588e-05, - "loss": 204.1481, - "step": 4600 - }, - { - "epoch": 0.037249816174985254, - "grad_norm": 985.3388671875, - "learning_rate": 1.8626262626262626e-05, - "loss": 251.0652, - "step": 4610 - }, - { - "epoch": 0.03733061837926939, - "grad_norm": 4716.29833984375, - "learning_rate": 1.866666666666667e-05, - "loss": 234.005, - "step": 4620 - }, - { - "epoch": 0.03741142058355352, - "grad_norm": 2745.129150390625, - "learning_rate": 1.8707070707070707e-05, - "loss": 222.8053, - "step": 4630 - }, - { - "epoch": 0.037492222787837655, - "grad_norm": 852.2494506835938, - "learning_rate": 1.874747474747475e-05, - "loss": 244.6, - "step": 4640 - }, - { - "epoch": 0.037573024992121784, - "grad_norm": 1276.906494140625, - "learning_rate": 1.878787878787879e-05, - "loss": 243.4739, - "step": 4650 - }, - { - "epoch": 0.03765382719640592, - "grad_norm": 2488.490478515625, - "learning_rate": 1.8828282828282827e-05, - "loss": 241.5105, - "step": 4660 - }, - { - "epoch": 0.03773462940069005, - "grad_norm": 1208.5731201171875, - "learning_rate": 1.886868686868687e-05, - "loss": 266.4298, - "step": 4670 - }, - { - "epoch": 0.037815431604974185, - "grad_norm": 1110.9935302734375, - "learning_rate": 1.890909090909091e-05, - "loss": 220.2013, - "step": 4680 - }, - { - "epoch": 0.037896233809258315, - "grad_norm": 966.4763793945312, - "learning_rate": 1.894949494949495e-05, - "loss": 213.4089, - "step": 4690 - }, - { - "epoch": 0.03797703601354245, - "grad_norm": 888.4136352539062, - "learning_rate": 1.898989898989899e-05, - "loss": 192.6133, - "step": 4700 - }, - { - "epoch": 0.03805783821782658, - "grad_norm": 1441.930419921875, - "learning_rate": 1.9030303030303032e-05, - "loss": 210.6855, - "step": 4710 - }, - { - "epoch": 0.038138640422110716, - "grad_norm": 1268.2919921875, - "learning_rate": 1.907070707070707e-05, - "loss": 196.9399, - "step": 4720 - }, - { - "epoch": 0.038219442626394845, - "grad_norm": 714.101318359375, - "learning_rate": 1.9111111111111113e-05, - "loss": 236.1493, - "step": 4730 - }, - { - "epoch": 0.03830024483067898, - "grad_norm": 1360.3662109375, - "learning_rate": 1.9151515151515155e-05, - "loss": 277.1614, - "step": 4740 - }, - { - "epoch": 0.03838104703496312, - "grad_norm": 857.1802368164062, - "learning_rate": 1.919191919191919e-05, - "loss": 233.6975, - "step": 4750 - }, - { - "epoch": 0.038461849239247246, - "grad_norm": 1430.3370361328125, - "learning_rate": 1.9232323232323233e-05, - "loss": 206.9375, - "step": 4760 - }, - { - "epoch": 0.03854265144353138, - "grad_norm": 999.745849609375, - "learning_rate": 1.9272727272727272e-05, - "loss": 177.6682, - "step": 4770 - }, - { - "epoch": 0.03862345364781551, - "grad_norm": 1979.0234375, - "learning_rate": 1.9313131313131314e-05, - "loss": 237.5471, - "step": 4780 - }, - { - "epoch": 0.03870425585209965, - "grad_norm": 1399.9544677734375, - "learning_rate": 1.9353535353535353e-05, - "loss": 209.8267, - "step": 4790 - }, - { - "epoch": 0.03878505805638378, - "grad_norm": 1058.5128173828125, - "learning_rate": 1.9393939393939395e-05, - "loss": 206.1269, - "step": 4800 - }, - { - "epoch": 0.03886586026066791, - "grad_norm": 1852.674072265625, - "learning_rate": 1.9434343434343434e-05, - "loss": 192.6013, - "step": 4810 - }, - { - "epoch": 0.03894666246495204, - "grad_norm": 1104.2967529296875, - "learning_rate": 1.9474747474747476e-05, - "loss": 252.5522, - "step": 4820 - }, - { - "epoch": 0.03902746466923618, - "grad_norm": 1426.0396728515625, - "learning_rate": 1.951515151515152e-05, - "loss": 250.6448, - "step": 4830 - }, - { - "epoch": 0.03910826687352031, - "grad_norm": 1632.4510498046875, - "learning_rate": 1.9555555555555557e-05, - "loss": 163.3638, - "step": 4840 - }, - { - "epoch": 0.03918906907780444, - "grad_norm": 700.0907592773438, - "learning_rate": 1.95959595959596e-05, - "loss": 236.7388, - "step": 4850 - }, - { - "epoch": 0.03926987128208857, - "grad_norm": 1205.572265625, - "learning_rate": 1.9636363636363635e-05, - "loss": 272.2705, - "step": 4860 - }, - { - "epoch": 0.03935067348637271, - "grad_norm": 799.412353515625, - "learning_rate": 1.9676767676767677e-05, - "loss": 171.4291, - "step": 4870 - }, - { - "epoch": 0.03943147569065684, - "grad_norm": 1350.2025146484375, - "learning_rate": 1.9717171717171716e-05, - "loss": 233.9921, - "step": 4880 - }, - { - "epoch": 0.039512277894940974, - "grad_norm": 976.219970703125, - "learning_rate": 1.975757575757576e-05, - "loss": 189.0711, - "step": 4890 - }, - { - "epoch": 0.03959308009922511, - "grad_norm": 947.8401489257812, - "learning_rate": 1.9797979797979797e-05, - "loss": 207.2786, - "step": 4900 - }, - { - "epoch": 0.03967388230350924, - "grad_norm": 1402.2440185546875, - "learning_rate": 1.983838383838384e-05, - "loss": 233.6717, - "step": 4910 - }, - { - "epoch": 0.039754684507793375, - "grad_norm": 2319.2314453125, - "learning_rate": 1.987878787878788e-05, - "loss": 268.4254, - "step": 4920 - }, - { - "epoch": 0.039835486712077504, - "grad_norm": 1344.019775390625, - "learning_rate": 1.991919191919192e-05, - "loss": 215.5304, - "step": 4930 - }, - { - "epoch": 0.03991628891636164, - "grad_norm": 1209.1622314453125, - "learning_rate": 1.9959595959595963e-05, - "loss": 202.8059, - "step": 4940 - }, - { - "epoch": 0.03999709112064577, - "grad_norm": 1872.3892822265625, - "learning_rate": 2e-05, - "loss": 193.5764, - "step": 4950 - }, - { - "epoch": 0.040077893324929906, - "grad_norm": 1944.2449951171875, - "learning_rate": 2.0040404040404044e-05, - "loss": 273.8487, - "step": 4960 - }, - { - "epoch": 0.040158695529214035, - "grad_norm": 988.1495361328125, - "learning_rate": 2.008080808080808e-05, - "loss": 202.3245, - "step": 4970 - }, - { - "epoch": 0.04023949773349817, - "grad_norm": 1082.6280517578125, - "learning_rate": 2.012121212121212e-05, - "loss": 190.6009, - "step": 4980 - }, - { - "epoch": 0.0403202999377823, - "grad_norm": 1510.5738525390625, - "learning_rate": 2.016161616161616e-05, - "loss": 262.4282, - "step": 4990 - }, - { - "epoch": 0.040401102142066436, - "grad_norm": 1080.0328369140625, - "learning_rate": 2.0202020202020203e-05, - "loss": 179.5178, - "step": 5000 - }, - { - "epoch": 0.040481904346350565, - "grad_norm": 1204.5341796875, - "learning_rate": 2.0242424242424245e-05, - "loss": 208.4234, - "step": 5010 - }, - { - "epoch": 0.0405627065506347, - "grad_norm": 788.6203002929688, - "learning_rate": 2.0282828282828284e-05, - "loss": 222.1854, - "step": 5020 - }, - { - "epoch": 0.04064350875491884, - "grad_norm": 2447.934326171875, - "learning_rate": 2.0323232323232326e-05, - "loss": 183.1969, - "step": 5030 - }, - { - "epoch": 0.04072431095920297, - "grad_norm": 1879.5914306640625, - "learning_rate": 2.0363636363636365e-05, - "loss": 235.8428, - "step": 5040 - }, - { - "epoch": 0.0408051131634871, - "grad_norm": 859.5083618164062, - "learning_rate": 2.0404040404040407e-05, - "loss": 223.1974, - "step": 5050 - }, - { - "epoch": 0.04088591536777123, - "grad_norm": 591.982421875, - "learning_rate": 2.0444444444444446e-05, - "loss": 195.905, - "step": 5060 - }, - { - "epoch": 0.04096671757205537, - "grad_norm": 2516.256103515625, - "learning_rate": 2.0484848484848485e-05, - "loss": 224.4586, - "step": 5070 - }, - { - "epoch": 0.0410475197763395, - "grad_norm": 1155.78271484375, - "learning_rate": 2.0525252525252524e-05, - "loss": 237.8034, - "step": 5080 - }, - { - "epoch": 0.04112832198062363, - "grad_norm": 760.8511962890625, - "learning_rate": 2.0565656565656566e-05, - "loss": 213.4372, - "step": 5090 - }, - { - "epoch": 0.04120912418490776, - "grad_norm": 746.3182983398438, - "learning_rate": 2.0606060606060608e-05, - "loss": 246.9279, - "step": 5100 - }, - { - "epoch": 0.0412899263891919, - "grad_norm": 1112.6119384765625, - "learning_rate": 2.0646464646464647e-05, - "loss": 215.4636, - "step": 5110 - }, - { - "epoch": 0.04137072859347603, - "grad_norm": 1308.880126953125, - "learning_rate": 2.068686868686869e-05, - "loss": 184.3576, - "step": 5120 - }, - { - "epoch": 0.041451530797760164, - "grad_norm": 1182.3695068359375, - "learning_rate": 2.0727272727272728e-05, - "loss": 251.3663, - "step": 5130 - }, - { - "epoch": 0.04153233300204429, - "grad_norm": 3545.449951171875, - "learning_rate": 2.076767676767677e-05, - "loss": 221.7183, - "step": 5140 - }, - { - "epoch": 0.04161313520632843, - "grad_norm": 1155.616455078125, - "learning_rate": 2.080808080808081e-05, - "loss": 181.7703, - "step": 5150 - }, - { - "epoch": 0.04169393741061256, - "grad_norm": 927.0892333984375, - "learning_rate": 2.084848484848485e-05, - "loss": 242.7771, - "step": 5160 - }, - { - "epoch": 0.041774739614896694, - "grad_norm": 1621.09326171875, - "learning_rate": 2.088888888888889e-05, - "loss": 168.8398, - "step": 5170 - }, - { - "epoch": 0.04185554181918083, - "grad_norm": 1823.0281982421875, - "learning_rate": 2.092929292929293e-05, - "loss": 226.3993, - "step": 5180 - }, - { - "epoch": 0.04193634402346496, - "grad_norm": 1904.581298828125, - "learning_rate": 2.096969696969697e-05, - "loss": 274.6904, - "step": 5190 - }, - { - "epoch": 0.042017146227749096, - "grad_norm": 1195.8973388671875, - "learning_rate": 2.101010101010101e-05, - "loss": 193.929, - "step": 5200 - }, - { - "epoch": 0.042097948432033225, - "grad_norm": 809.5712890625, - "learning_rate": 2.1050505050505052e-05, - "loss": 183.259, - "step": 5210 - }, - { - "epoch": 0.04217875063631736, - "grad_norm": 1392.5491943359375, - "learning_rate": 2.109090909090909e-05, - "loss": 220.0326, - "step": 5220 - }, - { - "epoch": 0.04225955284060149, - "grad_norm": 1818.6051025390625, - "learning_rate": 2.1131313131313134e-05, - "loss": 209.3423, - "step": 5230 - }, - { - "epoch": 0.042340355044885626, - "grad_norm": 756.583740234375, - "learning_rate": 2.1171717171717172e-05, - "loss": 152.79, - "step": 5240 - }, - { - "epoch": 0.042421157249169755, - "grad_norm": 1358.5194091796875, - "learning_rate": 2.1212121212121215e-05, - "loss": 223.6846, - "step": 5250 - }, - { - "epoch": 0.04250195945345389, - "grad_norm": 2302.727783203125, - "learning_rate": 2.1252525252525254e-05, - "loss": 206.5412, - "step": 5260 - }, - { - "epoch": 0.04258276165773802, - "grad_norm": 1090.666259765625, - "learning_rate": 2.1292929292929296e-05, - "loss": 197.9379, - "step": 5270 - }, - { - "epoch": 0.04266356386202216, - "grad_norm": 1535.5264892578125, - "learning_rate": 2.1333333333333335e-05, - "loss": 172.5529, - "step": 5280 - }, - { - "epoch": 0.042744366066306286, - "grad_norm": 1242.1055908203125, - "learning_rate": 2.1373737373737373e-05, - "loss": 182.2667, - "step": 5290 - }, - { - "epoch": 0.04282516827059042, - "grad_norm": 1571.221923828125, - "learning_rate": 2.1414141414141416e-05, - "loss": 206.9152, - "step": 5300 - }, - { - "epoch": 0.04290597047487456, - "grad_norm": 1733.92578125, - "learning_rate": 2.1454545454545455e-05, - "loss": 253.6228, - "step": 5310 - }, - { - "epoch": 0.04298677267915869, - "grad_norm": 1736.4722900390625, - "learning_rate": 2.1494949494949497e-05, - "loss": 209.5105, - "step": 5320 - }, - { - "epoch": 0.04306757488344282, - "grad_norm": 846.6854248046875, - "learning_rate": 2.1535353535353536e-05, - "loss": 227.6331, - "step": 5330 - }, - { - "epoch": 0.04314837708772695, - "grad_norm": 793.491943359375, - "learning_rate": 2.1575757575757578e-05, - "loss": 190.9206, - "step": 5340 - }, - { - "epoch": 0.04322917929201109, - "grad_norm": 1314.4940185546875, - "learning_rate": 2.1616161616161617e-05, - "loss": 278.1586, - "step": 5350 - }, - { - "epoch": 0.04330998149629522, - "grad_norm": 1807.1669921875, - "learning_rate": 2.165656565656566e-05, - "loss": 246.6954, - "step": 5360 - }, - { - "epoch": 0.043390783700579354, - "grad_norm": 1456.6739501953125, - "learning_rate": 2.1696969696969698e-05, - "loss": 198.1051, - "step": 5370 - }, - { - "epoch": 0.04347158590486348, - "grad_norm": 2645.863037109375, - "learning_rate": 2.173737373737374e-05, - "loss": 263.7012, - "step": 5380 - }, - { - "epoch": 0.04355238810914762, - "grad_norm": 890.2818603515625, - "learning_rate": 2.177777777777778e-05, - "loss": 206.08, - "step": 5390 - }, - { - "epoch": 0.04363319031343175, - "grad_norm": 1066.948974609375, - "learning_rate": 2.1818181818181818e-05, - "loss": 203.3024, - "step": 5400 - }, - { - "epoch": 0.043713992517715884, - "grad_norm": 1678.3651123046875, - "learning_rate": 2.185858585858586e-05, - "loss": 287.4994, - "step": 5410 - }, - { - "epoch": 0.04379479472200001, - "grad_norm": 1427.133544921875, - "learning_rate": 2.18989898989899e-05, - "loss": 236.3808, - "step": 5420 - }, - { - "epoch": 0.04387559692628415, - "grad_norm": 993.3723754882812, - "learning_rate": 2.193939393939394e-05, - "loss": 221.2247, - "step": 5430 - }, - { - "epoch": 0.04395639913056828, - "grad_norm": 919.2279663085938, - "learning_rate": 2.197979797979798e-05, - "loss": 232.8961, - "step": 5440 - }, - { - "epoch": 0.044037201334852415, - "grad_norm": 1196.51904296875, - "learning_rate": 2.2020202020202022e-05, - "loss": 208.7773, - "step": 5450 - }, - { - "epoch": 0.04411800353913655, - "grad_norm": 937.6903076171875, - "learning_rate": 2.206060606060606e-05, - "loss": 159.7425, - "step": 5460 - }, - { - "epoch": 0.04419880574342068, - "grad_norm": 2946.419921875, - "learning_rate": 2.2101010101010103e-05, - "loss": 201.0844, - "step": 5470 - }, - { - "epoch": 0.044279607947704816, - "grad_norm": 1663.4422607421875, - "learning_rate": 2.2141414141414142e-05, - "loss": 140.8333, - "step": 5480 - }, - { - "epoch": 0.044360410151988945, - "grad_norm": 1202.589599609375, - "learning_rate": 2.2181818181818184e-05, - "loss": 210.6169, - "step": 5490 - }, - { - "epoch": 0.04444121235627308, - "grad_norm": 1676.0555419921875, - "learning_rate": 2.2222222222222223e-05, - "loss": 300.5972, - "step": 5500 - }, - { - "epoch": 0.04452201456055721, - "grad_norm": 1122.7333984375, - "learning_rate": 2.2262626262626262e-05, - "loss": 223.7688, - "step": 5510 - }, - { - "epoch": 0.044602816764841346, - "grad_norm": 842.3754272460938, - "learning_rate": 2.2303030303030304e-05, - "loss": 231.1573, - "step": 5520 - }, - { - "epoch": 0.044683618969125476, - "grad_norm": 912.3519897460938, - "learning_rate": 2.2343434343434343e-05, - "loss": 161.5479, - "step": 5530 - }, - { - "epoch": 0.04476442117340961, - "grad_norm": 2117.377197265625, - "learning_rate": 2.2383838383838385e-05, - "loss": 178.251, - "step": 5540 - }, - { - "epoch": 0.04484522337769374, - "grad_norm": 1402.2164306640625, - "learning_rate": 2.2424242424242424e-05, - "loss": 209.2086, - "step": 5550 - }, - { - "epoch": 0.04492602558197788, - "grad_norm": 1458.323974609375, - "learning_rate": 2.2464646464646467e-05, - "loss": 243.9479, - "step": 5560 - }, - { - "epoch": 0.045006827786262006, - "grad_norm": 2175.216796875, - "learning_rate": 2.2505050505050505e-05, - "loss": 189.8892, - "step": 5570 - }, - { - "epoch": 0.04508762999054614, - "grad_norm": 1899.4354248046875, - "learning_rate": 2.2545454545454548e-05, - "loss": 335.0552, - "step": 5580 - }, - { - "epoch": 0.04516843219483028, - "grad_norm": 1230.814697265625, - "learning_rate": 2.2585858585858587e-05, - "loss": 194.9335, - "step": 5590 - }, - { - "epoch": 0.04524923439911441, - "grad_norm": 2101.527587890625, - "learning_rate": 2.262626262626263e-05, - "loss": 257.3806, - "step": 5600 - }, - { - "epoch": 0.045330036603398544, - "grad_norm": 1695.30810546875, - "learning_rate": 2.2666666666666668e-05, - "loss": 219.7137, - "step": 5610 - }, - { - "epoch": 0.04541083880768267, - "grad_norm": 1386.2855224609375, - "learning_rate": 2.2707070707070706e-05, - "loss": 236.2214, - "step": 5620 - }, - { - "epoch": 0.04549164101196681, - "grad_norm": 1138.779052734375, - "learning_rate": 2.274747474747475e-05, - "loss": 192.9845, - "step": 5630 - }, - { - "epoch": 0.04557244321625094, - "grad_norm": 2650.991943359375, - "learning_rate": 2.2787878787878788e-05, - "loss": 233.3904, - "step": 5640 - }, - { - "epoch": 0.045653245420535074, - "grad_norm": 1309.0333251953125, - "learning_rate": 2.282828282828283e-05, - "loss": 225.3846, - "step": 5650 - }, - { - "epoch": 0.0457340476248192, - "grad_norm": 930.385009765625, - "learning_rate": 2.286868686868687e-05, - "loss": 236.1336, - "step": 5660 - }, - { - "epoch": 0.04581484982910334, - "grad_norm": 1646.2891845703125, - "learning_rate": 2.290909090909091e-05, - "loss": 227.3526, - "step": 5670 - }, - { - "epoch": 0.04589565203338747, - "grad_norm": 2285.751708984375, - "learning_rate": 2.294949494949495e-05, - "loss": 236.6346, - "step": 5680 - }, - { - "epoch": 0.045976454237671605, - "grad_norm": 3180.75537109375, - "learning_rate": 2.2989898989898992e-05, - "loss": 177.5457, - "step": 5690 - }, - { - "epoch": 0.046057256441955734, - "grad_norm": 1423.35009765625, - "learning_rate": 2.3030303030303034e-05, - "loss": 194.2139, - "step": 5700 - }, - { - "epoch": 0.04613805864623987, - "grad_norm": 1577.701171875, - "learning_rate": 2.307070707070707e-05, - "loss": 183.8717, - "step": 5710 - }, - { - "epoch": 0.046218860850524, - "grad_norm": 1255.1485595703125, - "learning_rate": 2.3111111111111112e-05, - "loss": 213.8492, - "step": 5720 - }, - { - "epoch": 0.046299663054808135, - "grad_norm": 1154.9453125, - "learning_rate": 2.315151515151515e-05, - "loss": 219.6154, - "step": 5730 - }, - { - "epoch": 0.04638046525909227, - "grad_norm": 3208.9140625, - "learning_rate": 2.3191919191919193e-05, - "loss": 212.2527, - "step": 5740 - }, - { - "epoch": 0.0464612674633764, - "grad_norm": 826.8831787109375, - "learning_rate": 2.3232323232323232e-05, - "loss": 193.0573, - "step": 5750 - }, - { - "epoch": 0.046542069667660536, - "grad_norm": 953.578369140625, - "learning_rate": 2.3272727272727274e-05, - "loss": 200.1285, - "step": 5760 - }, - { - "epoch": 0.046622871871944666, - "grad_norm": 948.6517944335938, - "learning_rate": 2.3313131313131313e-05, - "loss": 226.3946, - "step": 5770 - }, - { - "epoch": 0.0467036740762288, - "grad_norm": 1502.9415283203125, - "learning_rate": 2.3353535353535355e-05, - "loss": 301.4247, - "step": 5780 - }, - { - "epoch": 0.04678447628051293, - "grad_norm": 592.7190551757812, - "learning_rate": 2.3393939393939397e-05, - "loss": 171.6613, - "step": 5790 - }, - { - "epoch": 0.04686527848479707, - "grad_norm": 774.3163452148438, - "learning_rate": 2.3434343434343436e-05, - "loss": 174.7567, - "step": 5800 - }, - { - "epoch": 0.046946080689081196, - "grad_norm": 1000.3840942382812, - "learning_rate": 2.347474747474748e-05, - "loss": 140.1143, - "step": 5810 - }, - { - "epoch": 0.04702688289336533, - "grad_norm": 1050.761474609375, - "learning_rate": 2.3515151515151514e-05, - "loss": 234.2542, - "step": 5820 - }, - { - "epoch": 0.04710768509764946, - "grad_norm": 1076.979248046875, - "learning_rate": 2.3555555555555556e-05, - "loss": 170.2877, - "step": 5830 - }, - { - "epoch": 0.0471884873019336, - "grad_norm": 1528.865478515625, - "learning_rate": 2.3595959595959595e-05, - "loss": 280.3715, - "step": 5840 - }, - { - "epoch": 0.047269289506217727, - "grad_norm": 1554.0205078125, - "learning_rate": 2.3636363636363637e-05, - "loss": 258.9206, - "step": 5850 - }, - { - "epoch": 0.04735009171050186, - "grad_norm": 969.7879028320312, - "learning_rate": 2.3676767676767676e-05, - "loss": 173.4592, - "step": 5860 - }, - { - "epoch": 0.047430893914786, - "grad_norm": 1271.55322265625, - "learning_rate": 2.371717171717172e-05, - "loss": 187.7373, - "step": 5870 - }, - { - "epoch": 0.04751169611907013, - "grad_norm": 757.3799438476562, - "learning_rate": 2.375757575757576e-05, - "loss": 206.0978, - "step": 5880 - }, - { - "epoch": 0.047592498323354264, - "grad_norm": 1099.2119140625, - "learning_rate": 2.37979797979798e-05, - "loss": 191.4486, - "step": 5890 - }, - { - "epoch": 0.04767330052763839, - "grad_norm": 895.0558471679688, - "learning_rate": 2.3838383838383842e-05, - "loss": 197.1677, - "step": 5900 - }, - { - "epoch": 0.04775410273192253, - "grad_norm": 900.752685546875, - "learning_rate": 2.387878787878788e-05, - "loss": 209.3482, - "step": 5910 - }, - { - "epoch": 0.04783490493620666, - "grad_norm": 865.3425903320312, - "learning_rate": 2.3919191919191923e-05, - "loss": 211.704, - "step": 5920 - }, - { - "epoch": 0.047915707140490794, - "grad_norm": 1376.961181640625, - "learning_rate": 2.395959595959596e-05, - "loss": 197.2012, - "step": 5930 - }, - { - "epoch": 0.047996509344774924, - "grad_norm": 2671.92236328125, - "learning_rate": 2.4e-05, - "loss": 262.8319, - "step": 5940 - }, - { - "epoch": 0.04807731154905906, - "grad_norm": 4328.66552734375, - "learning_rate": 2.404040404040404e-05, - "loss": 263.4226, - "step": 5950 - }, - { - "epoch": 0.04815811375334319, - "grad_norm": 1454.4398193359375, - "learning_rate": 2.4080808080808082e-05, - "loss": 173.7909, - "step": 5960 - }, - { - "epoch": 0.048238915957627325, - "grad_norm": 1238.2913818359375, - "learning_rate": 2.4121212121212124e-05, - "loss": 190.8571, - "step": 5970 - }, - { - "epoch": 0.048319718161911454, - "grad_norm": 1106.6146240234375, - "learning_rate": 2.4161616161616163e-05, - "loss": 252.6962, - "step": 5980 - }, - { - "epoch": 0.04840052036619559, - "grad_norm": 1612.1171875, - "learning_rate": 2.4202020202020205e-05, - "loss": 176.1714, - "step": 5990 - }, - { - "epoch": 0.04848132257047972, - "grad_norm": 684.4707641601562, - "learning_rate": 2.4242424242424244e-05, - "loss": 236.6299, - "step": 6000 - }, - { - "epoch": 0.048562124774763855, - "grad_norm": 5278.638671875, - "learning_rate": 2.4282828282828286e-05, - "loss": 200.9588, - "step": 6010 - }, - { - "epoch": 0.04864292697904799, - "grad_norm": 2136.859375, - "learning_rate": 2.4323232323232325e-05, - "loss": 249.8048, - "step": 6020 - }, - { - "epoch": 0.04872372918333212, - "grad_norm": 704.8456420898438, - "learning_rate": 2.4363636363636364e-05, - "loss": 210.816, - "step": 6030 - }, - { - "epoch": 0.04880453138761626, - "grad_norm": 2405.291259765625, - "learning_rate": 2.4404040404040403e-05, - "loss": 180.7068, - "step": 6040 - }, - { - "epoch": 0.048885333591900386, - "grad_norm": 1121.5928955078125, - "learning_rate": 2.4444444444444445e-05, - "loss": 268.1764, - "step": 6050 - }, - { - "epoch": 0.04896613579618452, - "grad_norm": 1185.4925537109375, - "learning_rate": 2.4484848484848484e-05, - "loss": 252.5901, - "step": 6060 - }, - { - "epoch": 0.04904693800046865, - "grad_norm": 1037.7261962890625, - "learning_rate": 2.4525252525252526e-05, - "loss": 217.7089, - "step": 6070 - }, - { - "epoch": 0.04912774020475279, - "grad_norm": 3574.91943359375, - "learning_rate": 2.4565656565656568e-05, - "loss": 248.0757, - "step": 6080 - }, - { - "epoch": 0.049208542409036916, - "grad_norm": 1335.7510986328125, - "learning_rate": 2.4606060606060607e-05, - "loss": 243.5903, - "step": 6090 - }, - { - "epoch": 0.04928934461332105, - "grad_norm": 1548.2281494140625, - "learning_rate": 2.464646464646465e-05, - "loss": 204.2808, - "step": 6100 - }, - { - "epoch": 0.04937014681760518, - "grad_norm": 1327.641357421875, - "learning_rate": 2.4686868686868688e-05, - "loss": 175.3226, - "step": 6110 - }, - { - "epoch": 0.04945094902188932, - "grad_norm": 1096.567626953125, - "learning_rate": 2.472727272727273e-05, - "loss": 251.3891, - "step": 6120 - }, - { - "epoch": 0.04953175122617345, - "grad_norm": 916.0780639648438, - "learning_rate": 2.476767676767677e-05, - "loss": 265.5964, - "step": 6130 - }, - { - "epoch": 0.04961255343045758, - "grad_norm": 3319.821533203125, - "learning_rate": 2.4808080808080808e-05, - "loss": 199.656, - "step": 6140 - }, - { - "epoch": 0.04969335563474172, - "grad_norm": 804.5398559570312, - "learning_rate": 2.4848484848484847e-05, - "loss": 176.793, - "step": 6150 - }, - { - "epoch": 0.04977415783902585, - "grad_norm": 1266.6590576171875, - "learning_rate": 2.488888888888889e-05, - "loss": 172.6065, - "step": 6160 - }, - { - "epoch": 0.049854960043309984, - "grad_norm": 953.1856689453125, - "learning_rate": 2.492929292929293e-05, - "loss": 259.3056, - "step": 6170 - }, - { - "epoch": 0.049935762247594113, - "grad_norm": 1643.679443359375, - "learning_rate": 2.496969696969697e-05, - "loss": 227.5671, - "step": 6180 - }, - { - "epoch": 0.05001656445187825, - "grad_norm": 2092.3837890625, - "learning_rate": 2.5010101010101013e-05, - "loss": 231.7141, - "step": 6190 - }, - { - "epoch": 0.05009736665616238, - "grad_norm": 5872.7822265625, - "learning_rate": 2.505050505050505e-05, - "loss": 307.3282, - "step": 6200 - }, - { - "epoch": 0.050178168860446515, - "grad_norm": 1653.10888671875, - "learning_rate": 2.5090909090909094e-05, - "loss": 290.71, - "step": 6210 - }, - { - "epoch": 0.050258971064730644, - "grad_norm": 5940.2861328125, - "learning_rate": 2.5131313131313133e-05, - "loss": 298.1718, - "step": 6220 - }, - { - "epoch": 0.05033977326901478, - "grad_norm": 1056.6617431640625, - "learning_rate": 2.5171717171717175e-05, - "loss": 167.573, - "step": 6230 - }, - { - "epoch": 0.05042057547329891, - "grad_norm": 1492.5479736328125, - "learning_rate": 2.5212121212121214e-05, - "loss": 209.9481, - "step": 6240 - }, - { - "epoch": 0.050501377677583045, - "grad_norm": 764.5651245117188, - "learning_rate": 2.5252525252525256e-05, - "loss": 189.9945, - "step": 6250 - }, - { - "epoch": 0.050582179881867174, - "grad_norm": 2933.18603515625, - "learning_rate": 2.5292929292929295e-05, - "loss": 228.2252, - "step": 6260 - }, - { - "epoch": 0.05066298208615131, - "grad_norm": 2692.583740234375, - "learning_rate": 2.5333333333333337e-05, - "loss": 217.1123, - "step": 6270 - }, - { - "epoch": 0.05074378429043544, - "grad_norm": 1611.5694580078125, - "learning_rate": 2.5373737373737376e-05, - "loss": 199.2745, - "step": 6280 - }, - { - "epoch": 0.050824586494719576, - "grad_norm": 638.3251953125, - "learning_rate": 2.5414141414141418e-05, - "loss": 283.4336, - "step": 6290 - }, - { - "epoch": 0.05090538869900371, - "grad_norm": 960.7551879882812, - "learning_rate": 2.5454545454545454e-05, - "loss": 199.2895, - "step": 6300 - }, - { - "epoch": 0.05098619090328784, - "grad_norm": 1416.6865234375, - "learning_rate": 2.5494949494949492e-05, - "loss": 247.6437, - "step": 6310 - }, - { - "epoch": 0.05106699310757198, - "grad_norm": 962.5587158203125, - "learning_rate": 2.5535353535353535e-05, - "loss": 222.514, - "step": 6320 - }, - { - "epoch": 0.051147795311856106, - "grad_norm": 1019.0704956054688, - "learning_rate": 2.5575757575757573e-05, - "loss": 233.7968, - "step": 6330 - }, - { - "epoch": 0.05122859751614024, - "grad_norm": 1380.1087646484375, - "learning_rate": 2.5616161616161616e-05, - "loss": 203.472, - "step": 6340 - }, - { - "epoch": 0.05130939972042437, - "grad_norm": 765.1551513671875, - "learning_rate": 2.5656565656565658e-05, - "loss": 202.9591, - "step": 6350 - }, - { - "epoch": 0.05139020192470851, - "grad_norm": 854.4512329101562, - "learning_rate": 2.5696969696969697e-05, - "loss": 152.8654, - "step": 6360 - }, - { - "epoch": 0.05147100412899264, - "grad_norm": 1366.1529541015625, - "learning_rate": 2.573737373737374e-05, - "loss": 202.4912, - "step": 6370 - }, - { - "epoch": 0.05155180633327677, - "grad_norm": 812.153564453125, - "learning_rate": 2.5777777777777778e-05, - "loss": 190.2283, - "step": 6380 - }, - { - "epoch": 0.0516326085375609, - "grad_norm": 2072.30029296875, - "learning_rate": 2.581818181818182e-05, - "loss": 250.4601, - "step": 6390 - }, - { - "epoch": 0.05171341074184504, - "grad_norm": 1064.25732421875, - "learning_rate": 2.585858585858586e-05, - "loss": 243.8253, - "step": 6400 - }, - { - "epoch": 0.05179421294612917, - "grad_norm": 1004.585205078125, - "learning_rate": 2.58989898989899e-05, - "loss": 233.6981, - "step": 6410 - }, - { - "epoch": 0.0518750151504133, - "grad_norm": 781.0443115234375, - "learning_rate": 2.593939393939394e-05, - "loss": 231.5708, - "step": 6420 - }, - { - "epoch": 0.05195581735469744, - "grad_norm": 1038.6923828125, - "learning_rate": 2.5979797979797982e-05, - "loss": 166.9408, - "step": 6430 - }, - { - "epoch": 0.05203661955898157, - "grad_norm": 1369.49560546875, - "learning_rate": 2.602020202020202e-05, - "loss": 212.7086, - "step": 6440 - }, - { - "epoch": 0.052117421763265705, - "grad_norm": 1065.5115966796875, - "learning_rate": 2.6060606060606063e-05, - "loss": 197.232, - "step": 6450 - }, - { - "epoch": 0.052198223967549834, - "grad_norm": 1192.5135498046875, - "learning_rate": 2.6101010101010102e-05, - "loss": 210.3559, - "step": 6460 - }, - { - "epoch": 0.05227902617183397, - "grad_norm": 2817.4658203125, - "learning_rate": 2.6141414141414145e-05, - "loss": 218.5129, - "step": 6470 - }, - { - "epoch": 0.0523598283761181, - "grad_norm": 1661.2547607421875, - "learning_rate": 2.6181818181818187e-05, - "loss": 251.3214, - "step": 6480 - }, - { - "epoch": 0.052440630580402235, - "grad_norm": 1465.83251953125, - "learning_rate": 2.6222222222222226e-05, - "loss": 167.4487, - "step": 6490 - }, - { - "epoch": 0.052521432784686364, - "grad_norm": 1172.0814208984375, - "learning_rate": 2.6262626262626268e-05, - "loss": 202.9199, - "step": 6500 - }, - { - "epoch": 0.0526022349889705, - "grad_norm": 845.3886108398438, - "learning_rate": 2.63030303030303e-05, - "loss": 245.5335, - "step": 6510 - }, - { - "epoch": 0.05268303719325463, - "grad_norm": 1505.1903076171875, - "learning_rate": 2.6343434343434342e-05, - "loss": 189.6107, - "step": 6520 - }, - { - "epoch": 0.052763839397538766, - "grad_norm": 855.8611450195312, - "learning_rate": 2.6383838383838384e-05, - "loss": 168.8279, - "step": 6530 - }, - { - "epoch": 0.052844641601822895, - "grad_norm": 1719.1915283203125, - "learning_rate": 2.6424242424242423e-05, - "loss": 266.6213, - "step": 6540 - }, - { - "epoch": 0.05292544380610703, - "grad_norm": 1334.455322265625, - "learning_rate": 2.6464646464646466e-05, - "loss": 154.8023, - "step": 6550 - }, - { - "epoch": 0.05300624601039116, - "grad_norm": 1549.58154296875, - "learning_rate": 2.6505050505050504e-05, - "loss": 188.1264, - "step": 6560 - }, - { - "epoch": 0.053087048214675296, - "grad_norm": 740.02587890625, - "learning_rate": 2.6545454545454547e-05, - "loss": 241.7192, - "step": 6570 - }, - { - "epoch": 0.05316785041895943, - "grad_norm": 1759.15869140625, - "learning_rate": 2.6585858585858585e-05, - "loss": 249.789, - "step": 6580 - }, - { - "epoch": 0.05324865262324356, - "grad_norm": 1615.3770751953125, - "learning_rate": 2.6626262626262628e-05, - "loss": 256.1343, - "step": 6590 - }, - { - "epoch": 0.0533294548275277, - "grad_norm": 1187.103515625, - "learning_rate": 2.6666666666666667e-05, - "loss": 153.0139, - "step": 6600 - }, - { - "epoch": 0.05341025703181183, - "grad_norm": 835.982177734375, - "learning_rate": 2.670707070707071e-05, - "loss": 240.5858, - "step": 6610 - }, - { - "epoch": 0.05349105923609596, - "grad_norm": 658.9365234375, - "learning_rate": 2.6747474747474748e-05, - "loss": 212.2474, - "step": 6620 - }, - { - "epoch": 0.05357186144038009, - "grad_norm": 836.15185546875, - "learning_rate": 2.678787878787879e-05, - "loss": 203.4595, - "step": 6630 - }, - { - "epoch": 0.05365266364466423, - "grad_norm": 1312.960205078125, - "learning_rate": 2.682828282828283e-05, - "loss": 178.5055, - "step": 6640 - }, - { - "epoch": 0.05373346584894836, - "grad_norm": 2402.58642578125, - "learning_rate": 2.686868686868687e-05, - "loss": 204.5425, - "step": 6650 - }, - { - "epoch": 0.05381426805323249, - "grad_norm": 743.2178344726562, - "learning_rate": 2.6909090909090913e-05, - "loss": 151.0765, - "step": 6660 - }, - { - "epoch": 0.05389507025751662, - "grad_norm": 2009.14599609375, - "learning_rate": 2.6949494949494952e-05, - "loss": 255.5723, - "step": 6670 - }, - { - "epoch": 0.05397587246180076, - "grad_norm": 1129.924560546875, - "learning_rate": 2.6989898989898994e-05, - "loss": 249.5275, - "step": 6680 - }, - { - "epoch": 0.05405667466608489, - "grad_norm": 1877.5682373046875, - "learning_rate": 2.7030303030303033e-05, - "loss": 201.4787, - "step": 6690 - }, - { - "epoch": 0.054137476870369024, - "grad_norm": 1205.5860595703125, - "learning_rate": 2.7070707070707075e-05, - "loss": 165.1917, - "step": 6700 - }, - { - "epoch": 0.05421827907465316, - "grad_norm": 833.5079956054688, - "learning_rate": 2.7111111111111114e-05, - "loss": 222.6354, - "step": 6710 - }, - { - "epoch": 0.05429908127893729, - "grad_norm": 1644.57470703125, - "learning_rate": 2.7151515151515157e-05, - "loss": 183.0618, - "step": 6720 - }, - { - "epoch": 0.054379883483221425, - "grad_norm": 1261.3482666015625, - "learning_rate": 2.7191919191919192e-05, - "loss": 204.8876, - "step": 6730 - }, - { - "epoch": 0.054460685687505554, - "grad_norm": 1064.4910888671875, - "learning_rate": 2.723232323232323e-05, - "loss": 228.8735, - "step": 6740 - }, - { - "epoch": 0.05454148789178969, - "grad_norm": 1227.28369140625, - "learning_rate": 2.7272727272727273e-05, - "loss": 244.5206, - "step": 6750 - }, - { - "epoch": 0.05462229009607382, - "grad_norm": 747.6671142578125, - "learning_rate": 2.7313131313131312e-05, - "loss": 171.5991, - "step": 6760 - }, - { - "epoch": 0.054703092300357956, - "grad_norm": 1191.174560546875, - "learning_rate": 2.7353535353535354e-05, - "loss": 183.062, - "step": 6770 - }, - { - "epoch": 0.054783894504642085, - "grad_norm": 1179.271484375, - "learning_rate": 2.7393939393939393e-05, - "loss": 203.4914, - "step": 6780 - }, - { - "epoch": 0.05486469670892622, - "grad_norm": 1980.94287109375, - "learning_rate": 2.7434343434343435e-05, - "loss": 190.7682, - "step": 6790 - }, - { - "epoch": 0.05494549891321035, - "grad_norm": 1313.760498046875, - "learning_rate": 2.7474747474747474e-05, - "loss": 179.4395, - "step": 6800 - }, - { - "epoch": 0.055026301117494486, - "grad_norm": 818.7135620117188, - "learning_rate": 2.7515151515151516e-05, - "loss": 240.7207, - "step": 6810 - }, - { - "epoch": 0.055107103321778615, - "grad_norm": 1303.9735107421875, - "learning_rate": 2.7555555555555555e-05, - "loss": 197.5866, - "step": 6820 - }, - { - "epoch": 0.05518790552606275, - "grad_norm": 4817.638671875, - "learning_rate": 2.7595959595959597e-05, - "loss": 236.2139, - "step": 6830 - }, - { - "epoch": 0.05526870773034688, - "grad_norm": 1369.7080078125, - "learning_rate": 2.7636363636363636e-05, - "loss": 155.043, - "step": 6840 - }, - { - "epoch": 0.05534950993463102, - "grad_norm": 1351.29150390625, - "learning_rate": 2.767676767676768e-05, - "loss": 193.5722, - "step": 6850 - }, - { - "epoch": 0.05543031213891515, - "grad_norm": 1340.113525390625, - "learning_rate": 2.771717171717172e-05, - "loss": 196.4928, - "step": 6860 - }, - { - "epoch": 0.05551111434319928, - "grad_norm": 1829.1298828125, - "learning_rate": 2.775757575757576e-05, - "loss": 207.2559, - "step": 6870 - }, - { - "epoch": 0.05559191654748342, - "grad_norm": 1614.317138671875, - "learning_rate": 2.7797979797979802e-05, - "loss": 191.8481, - "step": 6880 - }, - { - "epoch": 0.05567271875176755, - "grad_norm": 1409.754150390625, - "learning_rate": 2.783838383838384e-05, - "loss": 172.1698, - "step": 6890 - }, - { - "epoch": 0.05575352095605168, - "grad_norm": 1008.7220458984375, - "learning_rate": 2.7878787878787883e-05, - "loss": 204.197, - "step": 6900 - }, - { - "epoch": 0.05583432316033581, - "grad_norm": 1213.98291015625, - "learning_rate": 2.7919191919191922e-05, - "loss": 176.116, - "step": 6910 - }, - { - "epoch": 0.05591512536461995, - "grad_norm": 1919.146484375, - "learning_rate": 2.7959595959595964e-05, - "loss": 205.5731, - "step": 6920 - }, - { - "epoch": 0.05599592756890408, - "grad_norm": 1582.1240234375, - "learning_rate": 2.8000000000000003e-05, - "loss": 204.0053, - "step": 6930 - }, - { - "epoch": 0.056076729773188214, - "grad_norm": 1082.2257080078125, - "learning_rate": 2.804040404040404e-05, - "loss": 216.7668, - "step": 6940 - }, - { - "epoch": 0.05615753197747234, - "grad_norm": 1451.9715576171875, - "learning_rate": 2.808080808080808e-05, - "loss": 128.3858, - "step": 6950 - }, - { - "epoch": 0.05623833418175648, - "grad_norm": 2580.067138671875, - "learning_rate": 2.812121212121212e-05, - "loss": 187.2646, - "step": 6960 - }, - { - "epoch": 0.05631913638604061, - "grad_norm": 1153.5308837890625, - "learning_rate": 2.8161616161616162e-05, - "loss": 170.1935, - "step": 6970 - }, - { - "epoch": 0.056399938590324744, - "grad_norm": 842.653076171875, - "learning_rate": 2.82020202020202e-05, - "loss": 229.306, - "step": 6980 - }, - { - "epoch": 0.05648074079460888, - "grad_norm": 1086.96337890625, - "learning_rate": 2.8242424242424243e-05, - "loss": 180.9517, - "step": 6990 - }, - { - "epoch": 0.05656154299889301, - "grad_norm": 963.1438598632812, - "learning_rate": 2.8282828282828282e-05, - "loss": 186.2078, - "step": 7000 - }, - { - "epoch": 0.056642345203177145, - "grad_norm": 1010.3299560546875, - "learning_rate": 2.8323232323232324e-05, - "loss": 223.6001, - "step": 7010 - }, - { - "epoch": 0.056723147407461275, - "grad_norm": 1217.844482421875, - "learning_rate": 2.8363636363636363e-05, - "loss": 179.5198, - "step": 7020 - }, - { - "epoch": 0.05680394961174541, - "grad_norm": 1364.8577880859375, - "learning_rate": 2.8404040404040405e-05, - "loss": 212.5286, - "step": 7030 - }, - { - "epoch": 0.05688475181602954, - "grad_norm": 804.541748046875, - "learning_rate": 2.8444444444444447e-05, - "loss": 201.4965, - "step": 7040 - }, - { - "epoch": 0.056965554020313676, - "grad_norm": 2093.808349609375, - "learning_rate": 2.8484848484848486e-05, - "loss": 202.5039, - "step": 7050 - }, - { - "epoch": 0.057046356224597805, - "grad_norm": 1088.9471435546875, - "learning_rate": 2.852525252525253e-05, - "loss": 164.6322, - "step": 7060 - }, - { - "epoch": 0.05712715842888194, - "grad_norm": 1510.014404296875, - "learning_rate": 2.8565656565656567e-05, - "loss": 246.0487, - "step": 7070 - }, - { - "epoch": 0.05720796063316607, - "grad_norm": 617.3926391601562, - "learning_rate": 2.860606060606061e-05, - "loss": 166.5255, - "step": 7080 - }, - { - "epoch": 0.057288762837450206, - "grad_norm": 1088.094482421875, - "learning_rate": 2.864646464646465e-05, - "loss": 180.0012, - "step": 7090 - }, - { - "epoch": 0.057369565041734336, - "grad_norm": 754.35400390625, - "learning_rate": 2.868686868686869e-05, - "loss": 165.6718, - "step": 7100 - }, - { - "epoch": 0.05745036724601847, - "grad_norm": 847.3502197265625, - "learning_rate": 2.872727272727273e-05, - "loss": 150.3254, - "step": 7110 - }, - { - "epoch": 0.0575311694503026, - "grad_norm": 3462.79541015625, - "learning_rate": 2.876767676767677e-05, - "loss": 206.9913, - "step": 7120 - }, - { - "epoch": 0.05761197165458674, - "grad_norm": 1302.846923828125, - "learning_rate": 2.880808080808081e-05, - "loss": 218.2749, - "step": 7130 - }, - { - "epoch": 0.05769277385887087, - "grad_norm": 1508.3194580078125, - "learning_rate": 2.8848484848484853e-05, - "loss": 198.5009, - "step": 7140 - }, - { - "epoch": 0.057773576063155, - "grad_norm": 1260.8990478515625, - "learning_rate": 2.8888888888888888e-05, - "loss": 287.7319, - "step": 7150 - }, - { - "epoch": 0.05785437826743914, - "grad_norm": 2510.641357421875, - "learning_rate": 2.8929292929292927e-05, - "loss": 212.435, - "step": 7160 - }, - { - "epoch": 0.05793518047172327, - "grad_norm": 1610.3782958984375, - "learning_rate": 2.896969696969697e-05, - "loss": 195.9904, - "step": 7170 - }, - { - "epoch": 0.058015982676007403, - "grad_norm": 2051.1611328125, - "learning_rate": 2.9010101010101008e-05, - "loss": 230.746, - "step": 7180 - }, - { - "epoch": 0.05809678488029153, - "grad_norm": 1708.345703125, - "learning_rate": 2.905050505050505e-05, - "loss": 202.1169, - "step": 7190 - }, - { - "epoch": 0.05817758708457567, - "grad_norm": 991.0370483398438, - "learning_rate": 2.909090909090909e-05, - "loss": 182.8259, - "step": 7200 - }, - { - "epoch": 0.0582583892888598, - "grad_norm": 1151.1380615234375, - "learning_rate": 2.913131313131313e-05, - "loss": 241.0473, - "step": 7210 - }, - { - "epoch": 0.058339191493143934, - "grad_norm": 1103.3897705078125, - "learning_rate": 2.9171717171717174e-05, - "loss": 151.7667, - "step": 7220 - }, - { - "epoch": 0.05841999369742806, - "grad_norm": 1151.0849609375, - "learning_rate": 2.9212121212121213e-05, - "loss": 197.9749, - "step": 7230 - }, - { - "epoch": 0.0585007959017122, - "grad_norm": 983.3527221679688, - "learning_rate": 2.9252525252525255e-05, - "loss": 186.8989, - "step": 7240 - }, - { - "epoch": 0.05858159810599633, - "grad_norm": 669.5452880859375, - "learning_rate": 2.9292929292929294e-05, - "loss": 179.281, - "step": 7250 - }, - { - "epoch": 0.058662400310280464, - "grad_norm": 1186.9957275390625, - "learning_rate": 2.9333333333333336e-05, - "loss": 170.5622, - "step": 7260 - }, - { - "epoch": 0.0587432025145646, - "grad_norm": 1314.4376220703125, - "learning_rate": 2.9373737373737375e-05, - "loss": 175.4961, - "step": 7270 - }, - { - "epoch": 0.05882400471884873, - "grad_norm": 1278.834716796875, - "learning_rate": 2.9414141414141417e-05, - "loss": 183.9097, - "step": 7280 - }, - { - "epoch": 0.058904806923132866, - "grad_norm": 1116.2734375, - "learning_rate": 2.9454545454545456e-05, - "loss": 143.516, - "step": 7290 - }, - { - "epoch": 0.058985609127416995, - "grad_norm": 1352.628173828125, - "learning_rate": 2.9494949494949498e-05, - "loss": 204.3025, - "step": 7300 - }, - { - "epoch": 0.05906641133170113, - "grad_norm": 1091.3201904296875, - "learning_rate": 2.9535353535353537e-05, - "loss": 181.4761, - "step": 7310 - }, - { - "epoch": 0.05914721353598526, - "grad_norm": 1040.334716796875, - "learning_rate": 2.957575757575758e-05, - "loss": 170.6319, - "step": 7320 - }, - { - "epoch": 0.059228015740269396, - "grad_norm": 1476.125732421875, - "learning_rate": 2.9616161616161618e-05, - "loss": 161.3749, - "step": 7330 - }, - { - "epoch": 0.059308817944553525, - "grad_norm": 1488.0325927734375, - "learning_rate": 2.965656565656566e-05, - "loss": 183.3941, - "step": 7340 - }, - { - "epoch": 0.05938962014883766, - "grad_norm": 481.60833740234375, - "learning_rate": 2.96969696969697e-05, - "loss": 199.2278, - "step": 7350 - }, - { - "epoch": 0.05947042235312179, - "grad_norm": 1610.34521484375, - "learning_rate": 2.973737373737374e-05, - "loss": 201.723, - "step": 7360 - }, - { - "epoch": 0.05955122455740593, - "grad_norm": 1576.0423583984375, - "learning_rate": 2.9777777777777777e-05, - "loss": 222.0852, - "step": 7370 - }, - { - "epoch": 0.059632026761690056, - "grad_norm": 889.7515258789062, - "learning_rate": 2.9818181818181816e-05, - "loss": 193.5616, - "step": 7380 - }, - { - "epoch": 0.05971282896597419, - "grad_norm": 746.6514282226562, - "learning_rate": 2.9858585858585858e-05, - "loss": 166.2696, - "step": 7390 - }, - { - "epoch": 0.05979363117025832, - "grad_norm": 1730.69580078125, - "learning_rate": 2.98989898989899e-05, - "loss": 209.8799, - "step": 7400 - }, - { - "epoch": 0.05987443337454246, - "grad_norm": 690.6642456054688, - "learning_rate": 2.993939393939394e-05, - "loss": 230.9101, - "step": 7410 - }, - { - "epoch": 0.05995523557882659, - "grad_norm": 863.1697387695312, - "learning_rate": 2.997979797979798e-05, - "loss": 150.7177, - "step": 7420 - }, - { - "epoch": 0.06003603778311072, - "grad_norm": 1267.2069091796875, - "learning_rate": 3.002020202020202e-05, - "loss": 210.8308, - "step": 7430 - }, - { - "epoch": 0.06011683998739486, - "grad_norm": 1010.417724609375, - "learning_rate": 3.0060606060606062e-05, - "loss": 191.3645, - "step": 7440 - }, - { - "epoch": 0.06019764219167899, - "grad_norm": 689.7382202148438, - "learning_rate": 3.01010101010101e-05, - "loss": 187.7134, - "step": 7450 - }, - { - "epoch": 0.060278444395963124, - "grad_norm": 1864.760986328125, - "learning_rate": 3.0141414141414144e-05, - "loss": 214.7331, - "step": 7460 - }, - { - "epoch": 0.06035924660024725, - "grad_norm": 1038.37353515625, - "learning_rate": 3.0181818181818182e-05, - "loss": 217.9106, - "step": 7470 - }, - { - "epoch": 0.06044004880453139, - "grad_norm": 622.6604614257812, - "learning_rate": 3.0222222222222225e-05, - "loss": 155.4263, - "step": 7480 - }, - { - "epoch": 0.06052085100881552, - "grad_norm": 878.7538452148438, - "learning_rate": 3.0262626262626263e-05, - "loss": 231.1667, - "step": 7490 - }, - { - "epoch": 0.060601653213099654, - "grad_norm": 1581.2225341796875, - "learning_rate": 3.0303030303030306e-05, - "loss": 163.4888, - "step": 7500 - }, - { - "epoch": 0.060682455417383784, - "grad_norm": 1152.7149658203125, - "learning_rate": 3.0343434343434345e-05, - "loss": 182.3645, - "step": 7510 - }, - { - "epoch": 0.06076325762166792, - "grad_norm": 1109.6708984375, - "learning_rate": 3.0383838383838387e-05, - "loss": 175.0838, - "step": 7520 - }, - { - "epoch": 0.06084405982595205, - "grad_norm": 1053.8270263671875, - "learning_rate": 3.0424242424242426e-05, - "loss": 181.691, - "step": 7530 - }, - { - "epoch": 0.060924862030236185, - "grad_norm": 2113.046875, - "learning_rate": 3.0464646464646468e-05, - "loss": 224.7368, - "step": 7540 - }, - { - "epoch": 0.06100566423452032, - "grad_norm": 1166.90478515625, - "learning_rate": 3.050505050505051e-05, - "loss": 206.4759, - "step": 7550 - }, - { - "epoch": 0.06108646643880445, - "grad_norm": 1273.3836669921875, - "learning_rate": 3.054545454545455e-05, - "loss": 171.2801, - "step": 7560 - }, - { - "epoch": 0.061167268643088586, - "grad_norm": 2534.885498046875, - "learning_rate": 3.058585858585859e-05, - "loss": 159.7586, - "step": 7570 - }, - { - "epoch": 0.061248070847372715, - "grad_norm": 3763.103515625, - "learning_rate": 3.062626262626262e-05, - "loss": 323.4677, - "step": 7580 - }, - { - "epoch": 0.06132887305165685, - "grad_norm": 1977.9522705078125, - "learning_rate": 3.066666666666667e-05, - "loss": 227.4736, - "step": 7590 - }, - { - "epoch": 0.06140967525594098, - "grad_norm": 1690.8280029296875, - "learning_rate": 3.070707070707071e-05, - "loss": 192.4362, - "step": 7600 - }, - { - "epoch": 0.06149047746022512, - "grad_norm": 1523.7828369140625, - "learning_rate": 3.074747474747475e-05, - "loss": 234.8535, - "step": 7610 - }, - { - "epoch": 0.061571279664509246, - "grad_norm": 1146.36865234375, - "learning_rate": 3.0787878787878786e-05, - "loss": 142.9756, - "step": 7620 - }, - { - "epoch": 0.06165208186879338, - "grad_norm": 895.3403930664062, - "learning_rate": 3.082828282828283e-05, - "loss": 201.5379, - "step": 7630 - }, - { - "epoch": 0.06173288407307751, - "grad_norm": 1039.900634765625, - "learning_rate": 3.086868686868687e-05, - "loss": 230.2974, - "step": 7640 - }, - { - "epoch": 0.06181368627736165, - "grad_norm": 1130.9986572265625, - "learning_rate": 3.090909090909091e-05, - "loss": 189.1531, - "step": 7650 - }, - { - "epoch": 0.061894488481645776, - "grad_norm": 1224.142822265625, - "learning_rate": 3.094949494949495e-05, - "loss": 204.0206, - "step": 7660 - }, - { - "epoch": 0.06197529068592991, - "grad_norm": 2115.472412109375, - "learning_rate": 3.098989898989899e-05, - "loss": 180.536, - "step": 7670 - }, - { - "epoch": 0.06205609289021405, - "grad_norm": 779.9313354492188, - "learning_rate": 3.103030303030303e-05, - "loss": 158.623, - "step": 7680 - }, - { - "epoch": 0.06213689509449818, - "grad_norm": 1337.7568359375, - "learning_rate": 3.107070707070707e-05, - "loss": 159.0383, - "step": 7690 - }, - { - "epoch": 0.062217697298782314, - "grad_norm": 1851.648193359375, - "learning_rate": 3.111111111111111e-05, - "loss": 179.8161, - "step": 7700 - }, - { - "epoch": 0.06229849950306644, - "grad_norm": 1469.6453857421875, - "learning_rate": 3.1151515151515156e-05, - "loss": 187.596, - "step": 7710 - }, - { - "epoch": 0.06237930170735058, - "grad_norm": 1624.6527099609375, - "learning_rate": 3.1191919191919194e-05, - "loss": 214.8479, - "step": 7720 - }, - { - "epoch": 0.06246010391163471, - "grad_norm": 1006.6346435546875, - "learning_rate": 3.123232323232323e-05, - "loss": 154.5748, - "step": 7730 - }, - { - "epoch": 0.06254090611591884, - "grad_norm": 1002.5286254882812, - "learning_rate": 3.127272727272728e-05, - "loss": 184.4432, - "step": 7740 - }, - { - "epoch": 0.06262170832020297, - "grad_norm": 1352.4193115234375, - "learning_rate": 3.131313131313132e-05, - "loss": 237.0036, - "step": 7750 - }, - { - "epoch": 0.06270251052448711, - "grad_norm": 1084.147216796875, - "learning_rate": 3.1353535353535357e-05, - "loss": 164.8318, - "step": 7760 - }, - { - "epoch": 0.06278331272877125, - "grad_norm": 1302.1048583984375, - "learning_rate": 3.1393939393939395e-05, - "loss": 164.3788, - "step": 7770 - }, - { - "epoch": 0.06286411493305537, - "grad_norm": 1383.396484375, - "learning_rate": 3.143434343434344e-05, - "loss": 175.5805, - "step": 7780 - }, - { - "epoch": 0.0629449171373395, - "grad_norm": 1246.53857421875, - "learning_rate": 3.147474747474747e-05, - "loss": 210.3966, - "step": 7790 - }, - { - "epoch": 0.06302571934162364, - "grad_norm": 1285.145263671875, - "learning_rate": 3.151515151515151e-05, - "loss": 246.1903, - "step": 7800 - }, - { - "epoch": 0.06310652154590778, - "grad_norm": 1620.3326416015625, - "learning_rate": 3.155555555555556e-05, - "loss": 196.0127, - "step": 7810 - }, - { - "epoch": 0.06318732375019191, - "grad_norm": 1016.9979858398438, - "learning_rate": 3.1595959595959596e-05, - "loss": 210.5301, - "step": 7820 - }, - { - "epoch": 0.06326812595447603, - "grad_norm": 1945.8780517578125, - "learning_rate": 3.1636363636363635e-05, - "loss": 239.3266, - "step": 7830 - }, - { - "epoch": 0.06334892815876017, - "grad_norm": 1864.5794677734375, - "learning_rate": 3.1676767676767674e-05, - "loss": 193.7567, - "step": 7840 - }, - { - "epoch": 0.0634297303630443, - "grad_norm": 1095.450927734375, - "learning_rate": 3.171717171717172e-05, - "loss": 191.1735, - "step": 7850 - }, - { - "epoch": 0.06351053256732844, - "grad_norm": 1031.504150390625, - "learning_rate": 3.175757575757576e-05, - "loss": 185.8655, - "step": 7860 - }, - { - "epoch": 0.06359133477161256, - "grad_norm": 1385.5076904296875, - "learning_rate": 3.17979797979798e-05, - "loss": 177.6908, - "step": 7870 - }, - { - "epoch": 0.0636721369758967, - "grad_norm": 1074.5181884765625, - "learning_rate": 3.1838383838383836e-05, - "loss": 204.0031, - "step": 7880 - }, - { - "epoch": 0.06375293918018084, - "grad_norm": 953.3314208984375, - "learning_rate": 3.187878787878788e-05, - "loss": 180.9185, - "step": 7890 - }, - { - "epoch": 0.06383374138446497, - "grad_norm": 868.8043823242188, - "learning_rate": 3.191919191919192e-05, - "loss": 220.1422, - "step": 7900 - }, - { - "epoch": 0.0639145435887491, - "grad_norm": 5921.494140625, - "learning_rate": 3.195959595959596e-05, - "loss": 167.039, - "step": 7910 - }, - { - "epoch": 0.06399534579303323, - "grad_norm": 1500.1710205078125, - "learning_rate": 3.2000000000000005e-05, - "loss": 138.7559, - "step": 7920 - }, - { - "epoch": 0.06407614799731737, - "grad_norm": 1143.7266845703125, - "learning_rate": 3.2040404040404044e-05, - "loss": 195.0978, - "step": 7930 - }, - { - "epoch": 0.0641569502016015, - "grad_norm": 523.0445556640625, - "learning_rate": 3.208080808080808e-05, - "loss": 151.1692, - "step": 7940 - }, - { - "epoch": 0.06423775240588563, - "grad_norm": 2158.39013671875, - "learning_rate": 3.212121212121212e-05, - "loss": 236.7984, - "step": 7950 - }, - { - "epoch": 0.06431855461016976, - "grad_norm": 659.3209228515625, - "learning_rate": 3.216161616161617e-05, - "loss": 181.1136, - "step": 7960 - }, - { - "epoch": 0.0643993568144539, - "grad_norm": 608.638671875, - "learning_rate": 3.2202020202020206e-05, - "loss": 194.2183, - "step": 7970 - }, - { - "epoch": 0.06448015901873803, - "grad_norm": 1122.7078857421875, - "learning_rate": 3.2242424242424245e-05, - "loss": 160.3627, - "step": 7980 - }, - { - "epoch": 0.06456096122302217, - "grad_norm": 1686.80810546875, - "learning_rate": 3.2282828282828284e-05, - "loss": 223.456, - "step": 7990 - }, - { - "epoch": 0.06464176342730629, - "grad_norm": 1573.1317138671875, - "learning_rate": 3.232323232323233e-05, - "loss": 224.0322, - "step": 8000 - }, - { - "epoch": 0.06472256563159043, - "grad_norm": 1321.1458740234375, - "learning_rate": 3.236363636363636e-05, - "loss": 252.9104, - "step": 8010 - }, - { - "epoch": 0.06480336783587456, - "grad_norm": 1179.701171875, - "learning_rate": 3.24040404040404e-05, - "loss": 223.4346, - "step": 8020 - }, - { - "epoch": 0.0648841700401587, - "grad_norm": 977.9105224609375, - "learning_rate": 3.2444444444444446e-05, - "loss": 152.0468, - "step": 8030 - }, - { - "epoch": 0.06496497224444282, - "grad_norm": 2066.90380859375, - "learning_rate": 3.2484848484848485e-05, - "loss": 190.8667, - "step": 8040 - }, - { - "epoch": 0.06504577444872696, - "grad_norm": 3095.08935546875, - "learning_rate": 3.2525252525252524e-05, - "loss": 192.0303, - "step": 8050 - }, - { - "epoch": 0.0651265766530111, - "grad_norm": 2343.95947265625, - "learning_rate": 3.256565656565656e-05, - "loss": 157.5384, - "step": 8060 - }, - { - "epoch": 0.06520737885729523, - "grad_norm": 1510.8023681640625, - "learning_rate": 3.260606060606061e-05, - "loss": 239.6892, - "step": 8070 - }, - { - "epoch": 0.06528818106157935, - "grad_norm": 1445.597900390625, - "learning_rate": 3.264646464646465e-05, - "loss": 177.4803, - "step": 8080 - }, - { - "epoch": 0.06536898326586349, - "grad_norm": 1667.5521240234375, - "learning_rate": 3.2686868686868686e-05, - "loss": 190.0459, - "step": 8090 - }, - { - "epoch": 0.06544978547014763, - "grad_norm": 925.2418212890625, - "learning_rate": 3.272727272727273e-05, - "loss": 190.8257, - "step": 8100 - }, - { - "epoch": 0.06553058767443176, - "grad_norm": 1247.4376220703125, - "learning_rate": 3.276767676767677e-05, - "loss": 203.9773, - "step": 8110 - }, - { - "epoch": 0.0656113898787159, - "grad_norm": 1212.892822265625, - "learning_rate": 3.280808080808081e-05, - "loss": 203.7381, - "step": 8120 - }, - { - "epoch": 0.06569219208300002, - "grad_norm": 1091.890380859375, - "learning_rate": 3.284848484848485e-05, - "loss": 194.8187, - "step": 8130 - }, - { - "epoch": 0.06577299428728416, - "grad_norm": 2029.2864990234375, - "learning_rate": 3.2888888888888894e-05, - "loss": 246.8937, - "step": 8140 - }, - { - "epoch": 0.06585379649156829, - "grad_norm": 920.1378784179688, - "learning_rate": 3.292929292929293e-05, - "loss": 215.9934, - "step": 8150 - }, - { - "epoch": 0.06593459869585243, - "grad_norm": 1521.0574951171875, - "learning_rate": 3.296969696969697e-05, - "loss": 167.3099, - "step": 8160 - }, - { - "epoch": 0.06601540090013655, - "grad_norm": 1420.7525634765625, - "learning_rate": 3.301010101010101e-05, - "loss": 206.7512, - "step": 8170 - }, - { - "epoch": 0.06609620310442069, - "grad_norm": 840.5839233398438, - "learning_rate": 3.3050505050505056e-05, - "loss": 202.7185, - "step": 8180 - }, - { - "epoch": 0.06617700530870482, - "grad_norm": 1193.502197265625, - "learning_rate": 3.3090909090909095e-05, - "loss": 160.3612, - "step": 8190 - }, - { - "epoch": 0.06625780751298896, - "grad_norm": 2222.778564453125, - "learning_rate": 3.3131313131313134e-05, - "loss": 150.92, - "step": 8200 - }, - { - "epoch": 0.06633860971727308, - "grad_norm": 776.4454956054688, - "learning_rate": 3.317171717171717e-05, - "loss": 159.6749, - "step": 8210 - }, - { - "epoch": 0.06641941192155722, - "grad_norm": 1179.86279296875, - "learning_rate": 3.321212121212121e-05, - "loss": 147.1537, - "step": 8220 - }, - { - "epoch": 0.06650021412584135, - "grad_norm": 1168.2757568359375, - "learning_rate": 3.325252525252525e-05, - "loss": 163.5715, - "step": 8230 - }, - { - "epoch": 0.06658101633012549, - "grad_norm": 996.3876953125, - "learning_rate": 3.329292929292929e-05, - "loss": 156.5557, - "step": 8240 - }, - { - "epoch": 0.06666181853440963, - "grad_norm": 1006.9996337890625, - "learning_rate": 3.3333333333333335e-05, - "loss": 176.6802, - "step": 8250 - }, - { - "epoch": 0.06674262073869375, - "grad_norm": 877.4000854492188, - "learning_rate": 3.3373737373737374e-05, - "loss": 182.8363, - "step": 8260 - }, - { - "epoch": 0.06682342294297788, - "grad_norm": 2153.091552734375, - "learning_rate": 3.341414141414141e-05, - "loss": 184.7595, - "step": 8270 - }, - { - "epoch": 0.06690422514726202, - "grad_norm": 1884.7989501953125, - "learning_rate": 3.345454545454546e-05, - "loss": 197.672, - "step": 8280 - }, - { - "epoch": 0.06698502735154616, - "grad_norm": 1494.185791015625, - "learning_rate": 3.34949494949495e-05, - "loss": 178.7865, - "step": 8290 - }, - { - "epoch": 0.06706582955583028, - "grad_norm": 2600.398193359375, - "learning_rate": 3.3535353535353536e-05, - "loss": 222.7885, - "step": 8300 - }, - { - "epoch": 0.06714663176011441, - "grad_norm": 1300.013671875, - "learning_rate": 3.3575757575757575e-05, - "loss": 182.5449, - "step": 8310 - }, - { - "epoch": 0.06722743396439855, - "grad_norm": 2145.218505859375, - "learning_rate": 3.361616161616162e-05, - "loss": 281.7514, - "step": 8320 - }, - { - "epoch": 0.06730823616868269, - "grad_norm": 1519.411865234375, - "learning_rate": 3.365656565656566e-05, - "loss": 167.7319, - "step": 8330 - }, - { - "epoch": 0.06738903837296681, - "grad_norm": 750.0274047851562, - "learning_rate": 3.36969696969697e-05, - "loss": 173.5281, - "step": 8340 - }, - { - "epoch": 0.06746984057725094, - "grad_norm": 1222.1435546875, - "learning_rate": 3.373737373737374e-05, - "loss": 162.324, - "step": 8350 - }, - { - "epoch": 0.06755064278153508, - "grad_norm": 955.3302001953125, - "learning_rate": 3.377777777777778e-05, - "loss": 184.845, - "step": 8360 - }, - { - "epoch": 0.06763144498581922, - "grad_norm": 1112.2943115234375, - "learning_rate": 3.381818181818182e-05, - "loss": 160.8671, - "step": 8370 - }, - { - "epoch": 0.06771224719010335, - "grad_norm": 1163.462646484375, - "learning_rate": 3.385858585858586e-05, - "loss": 147.4451, - "step": 8380 - }, - { - "epoch": 0.06779304939438748, - "grad_norm": 925.3172607421875, - "learning_rate": 3.38989898989899e-05, - "loss": 175.2849, - "step": 8390 - }, - { - "epoch": 0.06787385159867161, - "grad_norm": 1990.568359375, - "learning_rate": 3.3939393939393945e-05, - "loss": 199.808, - "step": 8400 - }, - { - "epoch": 0.06795465380295575, - "grad_norm": 2128.471923828125, - "learning_rate": 3.3979797979797984e-05, - "loss": 205.3155, - "step": 8410 - }, - { - "epoch": 0.06803545600723988, - "grad_norm": 2691.37353515625, - "learning_rate": 3.402020202020202e-05, - "loss": 216.1895, - "step": 8420 - }, - { - "epoch": 0.068116258211524, - "grad_norm": 3363.869140625, - "learning_rate": 3.406060606060606e-05, - "loss": 201.8374, - "step": 8430 - }, - { - "epoch": 0.06819706041580814, - "grad_norm": 1438.0633544921875, - "learning_rate": 3.41010101010101e-05, - "loss": 222.5396, - "step": 8440 - }, - { - "epoch": 0.06827786262009228, - "grad_norm": 1703.8653564453125, - "learning_rate": 3.414141414141414e-05, - "loss": 213.8756, - "step": 8450 - }, - { - "epoch": 0.06835866482437641, - "grad_norm": 1938.7177734375, - "learning_rate": 3.4181818181818185e-05, - "loss": 215.6468, - "step": 8460 - }, - { - "epoch": 0.06843946702866054, - "grad_norm": 851.2493896484375, - "learning_rate": 3.4222222222222224e-05, - "loss": 182.1204, - "step": 8470 - }, - { - "epoch": 0.06852026923294467, - "grad_norm": 1202.3365478515625, - "learning_rate": 3.426262626262626e-05, - "loss": 183.2559, - "step": 8480 - }, - { - "epoch": 0.06860107143722881, - "grad_norm": 1543.7257080078125, - "learning_rate": 3.43030303030303e-05, - "loss": 239.1145, - "step": 8490 - }, - { - "epoch": 0.06868187364151294, - "grad_norm": 748.701171875, - "learning_rate": 3.434343434343435e-05, - "loss": 285.6954, - "step": 8500 - }, - { - "epoch": 0.06876267584579707, - "grad_norm": 5747.30224609375, - "learning_rate": 3.4383838383838386e-05, - "loss": 194.7742, - "step": 8510 - }, - { - "epoch": 0.0688434780500812, - "grad_norm": 1587.478271484375, - "learning_rate": 3.4424242424242425e-05, - "loss": 213.0056, - "step": 8520 - }, - { - "epoch": 0.06892428025436534, - "grad_norm": 907.9869995117188, - "learning_rate": 3.4464646464646463e-05, - "loss": 183.2854, - "step": 8530 - }, - { - "epoch": 0.06900508245864947, - "grad_norm": 1065.7462158203125, - "learning_rate": 3.450505050505051e-05, - "loss": 147.3901, - "step": 8540 - }, - { - "epoch": 0.06908588466293361, - "grad_norm": 1654.4375, - "learning_rate": 3.454545454545455e-05, - "loss": 152.4811, - "step": 8550 - }, - { - "epoch": 0.06916668686721773, - "grad_norm": 1075.144775390625, - "learning_rate": 3.458585858585859e-05, - "loss": 217.3591, - "step": 8560 - }, - { - "epoch": 0.06924748907150187, - "grad_norm": 1216.4287109375, - "learning_rate": 3.4626262626262626e-05, - "loss": 184.7878, - "step": 8570 - }, - { - "epoch": 0.069328291275786, - "grad_norm": 1143.3253173828125, - "learning_rate": 3.466666666666667e-05, - "loss": 185.8244, - "step": 8580 - }, - { - "epoch": 0.06940909348007014, - "grad_norm": 2943.891357421875, - "learning_rate": 3.470707070707071e-05, - "loss": 203.6546, - "step": 8590 - }, - { - "epoch": 0.06948989568435426, - "grad_norm": 562.7566528320312, - "learning_rate": 3.474747474747475e-05, - "loss": 224.7354, - "step": 8600 - }, - { - "epoch": 0.0695706978886384, - "grad_norm": 1995.734130859375, - "learning_rate": 3.4787878787878795e-05, - "loss": 178.8969, - "step": 8610 - }, - { - "epoch": 0.06965150009292254, - "grad_norm": 1593.8944091796875, - "learning_rate": 3.4828282828282834e-05, - "loss": 164.9677, - "step": 8620 - }, - { - "epoch": 0.06973230229720667, - "grad_norm": 715.6300048828125, - "learning_rate": 3.486868686868687e-05, - "loss": 147.6377, - "step": 8630 - }, - { - "epoch": 0.0698131045014908, - "grad_norm": 3263.21044921875, - "learning_rate": 3.490909090909091e-05, - "loss": 235.9829, - "step": 8640 - }, - { - "epoch": 0.06989390670577493, - "grad_norm": 3928.5576171875, - "learning_rate": 3.494949494949495e-05, - "loss": 220.889, - "step": 8650 - }, - { - "epoch": 0.06997470891005907, - "grad_norm": 1176.6265869140625, - "learning_rate": 3.498989898989899e-05, - "loss": 173.9785, - "step": 8660 - }, - { - "epoch": 0.0700555111143432, - "grad_norm": 1472.33349609375, - "learning_rate": 3.503030303030303e-05, - "loss": 166.185, - "step": 8670 - }, - { - "epoch": 0.07013631331862734, - "grad_norm": 943.4843139648438, - "learning_rate": 3.5070707070707073e-05, - "loss": 196.9754, - "step": 8680 - }, - { - "epoch": 0.07021711552291146, - "grad_norm": 1376.169189453125, - "learning_rate": 3.511111111111111e-05, - "loss": 249.9044, - "step": 8690 - }, - { - "epoch": 0.0702979177271956, - "grad_norm": 862.705078125, - "learning_rate": 3.515151515151515e-05, - "loss": 134.6473, - "step": 8700 - }, - { - "epoch": 0.07037871993147973, - "grad_norm": 1661.3258056640625, - "learning_rate": 3.519191919191919e-05, - "loss": 260.4335, - "step": 8710 - }, - { - "epoch": 0.07045952213576387, - "grad_norm": 858.2864379882812, - "learning_rate": 3.5232323232323236e-05, - "loss": 156.1466, - "step": 8720 - }, - { - "epoch": 0.07054032434004799, - "grad_norm": 1033.8033447265625, - "learning_rate": 3.5272727272727274e-05, - "loss": 158.8132, - "step": 8730 - }, - { - "epoch": 0.07062112654433213, - "grad_norm": 2244.4833984375, - "learning_rate": 3.531313131313131e-05, - "loss": 185.2664, - "step": 8740 - }, - { - "epoch": 0.07070192874861626, - "grad_norm": 828.0194091796875, - "learning_rate": 3.535353535353535e-05, - "loss": 189.655, - "step": 8750 - }, - { - "epoch": 0.0707827309529004, - "grad_norm": 764.8339233398438, - "learning_rate": 3.53939393939394e-05, - "loss": 169.7833, - "step": 8760 - }, - { - "epoch": 0.07086353315718452, - "grad_norm": 1434.6533203125, - "learning_rate": 3.543434343434344e-05, - "loss": 169.9032, - "step": 8770 - }, - { - "epoch": 0.07094433536146866, - "grad_norm": 1811.5740966796875, - "learning_rate": 3.5474747474747475e-05, - "loss": 256.6501, - "step": 8780 - }, - { - "epoch": 0.0710251375657528, - "grad_norm": 923.5958251953125, - "learning_rate": 3.551515151515152e-05, - "loss": 157.0084, - "step": 8790 - }, - { - "epoch": 0.07110593977003693, - "grad_norm": 1671.8385009765625, - "learning_rate": 3.555555555555556e-05, - "loss": 206.8505, - "step": 8800 - }, - { - "epoch": 0.07118674197432107, - "grad_norm": 2508.17626953125, - "learning_rate": 3.55959595959596e-05, - "loss": 204.866, - "step": 8810 - }, - { - "epoch": 0.07126754417860519, - "grad_norm": 852.1519775390625, - "learning_rate": 3.563636363636364e-05, - "loss": 147.0554, - "step": 8820 - }, - { - "epoch": 0.07134834638288932, - "grad_norm": 925.072021484375, - "learning_rate": 3.567676767676768e-05, - "loss": 205.7363, - "step": 8830 - }, - { - "epoch": 0.07142914858717346, - "grad_norm": 1310.513916015625, - "learning_rate": 3.571717171717172e-05, - "loss": 252.9427, - "step": 8840 - }, - { - "epoch": 0.0715099507914576, - "grad_norm": 1795.476806640625, - "learning_rate": 3.575757575757576e-05, - "loss": 184.422, - "step": 8850 - }, - { - "epoch": 0.07159075299574172, - "grad_norm": 1071.3101806640625, - "learning_rate": 3.57979797979798e-05, - "loss": 165.8845, - "step": 8860 - }, - { - "epoch": 0.07167155520002585, - "grad_norm": 724.8527221679688, - "learning_rate": 3.583838383838384e-05, - "loss": 180.9637, - "step": 8870 - }, - { - "epoch": 0.07175235740430999, - "grad_norm": 999.9872436523438, - "learning_rate": 3.587878787878788e-05, - "loss": 186.5519, - "step": 8880 - }, - { - "epoch": 0.07183315960859413, - "grad_norm": 1380.1075439453125, - "learning_rate": 3.5919191919191916e-05, - "loss": 224.0065, - "step": 8890 - }, - { - "epoch": 0.07191396181287825, - "grad_norm": 1093.9498291015625, - "learning_rate": 3.595959595959596e-05, - "loss": 165.8288, - "step": 8900 - }, - { - "epoch": 0.07199476401716239, - "grad_norm": 2711.7353515625, - "learning_rate": 3.6e-05, - "loss": 163.5886, - "step": 8910 - }, - { - "epoch": 0.07207556622144652, - "grad_norm": 1537.253662109375, - "learning_rate": 3.604040404040404e-05, - "loss": 165.1541, - "step": 8920 - }, - { - "epoch": 0.07215636842573066, - "grad_norm": 1100.548095703125, - "learning_rate": 3.608080808080808e-05, - "loss": 169.8857, - "step": 8930 - }, - { - "epoch": 0.0722371706300148, - "grad_norm": 1088.0587158203125, - "learning_rate": 3.6121212121212124e-05, - "loss": 186.5488, - "step": 8940 - }, - { - "epoch": 0.07231797283429892, - "grad_norm": 1160.9769287109375, - "learning_rate": 3.616161616161616e-05, - "loss": 190.4372, - "step": 8950 - }, - { - "epoch": 0.07239877503858305, - "grad_norm": 1117.179443359375, - "learning_rate": 3.62020202020202e-05, - "loss": 134.2678, - "step": 8960 - }, - { - "epoch": 0.07247957724286719, - "grad_norm": 1956.5089111328125, - "learning_rate": 3.624242424242425e-05, - "loss": 172.5972, - "step": 8970 - }, - { - "epoch": 0.07256037944715132, - "grad_norm": 2694.300537109375, - "learning_rate": 3.6282828282828286e-05, - "loss": 200.7171, - "step": 8980 - }, - { - "epoch": 0.07264118165143545, - "grad_norm": 1112.63720703125, - "learning_rate": 3.6323232323232325e-05, - "loss": 151.1802, - "step": 8990 - }, - { - "epoch": 0.07272198385571958, - "grad_norm": 2176.663330078125, - "learning_rate": 3.6363636363636364e-05, - "loss": 162.1838, - "step": 9000 - }, - { - "epoch": 0.07280278606000372, - "grad_norm": 1126.674072265625, - "learning_rate": 3.640404040404041e-05, - "loss": 180.7603, - "step": 9010 - }, - { - "epoch": 0.07288358826428785, - "grad_norm": 1244.1241455078125, - "learning_rate": 3.644444444444445e-05, - "loss": 114.9891, - "step": 9020 - }, - { - "epoch": 0.07296439046857198, - "grad_norm": 2782.807373046875, - "learning_rate": 3.648484848484849e-05, - "loss": 199.3326, - "step": 9030 - }, - { - "epoch": 0.07304519267285611, - "grad_norm": 1218.721435546875, - "learning_rate": 3.6525252525252526e-05, - "loss": 143.516, - "step": 9040 - }, - { - "epoch": 0.07312599487714025, - "grad_norm": 799.7451782226562, - "learning_rate": 3.656565656565657e-05, - "loss": 131.4932, - "step": 9050 - }, - { - "epoch": 0.07320679708142439, - "grad_norm": 547.2342529296875, - "learning_rate": 3.660606060606061e-05, - "loss": 151.0718, - "step": 9060 - }, - { - "epoch": 0.07328759928570851, - "grad_norm": 1480.14501953125, - "learning_rate": 3.664646464646464e-05, - "loss": 201.2714, - "step": 9070 - }, - { - "epoch": 0.07336840148999264, - "grad_norm": 516.29931640625, - "learning_rate": 3.668686868686869e-05, - "loss": 174.2223, - "step": 9080 - }, - { - "epoch": 0.07344920369427678, - "grad_norm": 912.0347900390625, - "learning_rate": 3.672727272727273e-05, - "loss": 180.7495, - "step": 9090 - }, - { - "epoch": 0.07353000589856092, - "grad_norm": 1243.236083984375, - "learning_rate": 3.6767676767676766e-05, - "loss": 189.5693, - "step": 9100 - }, - { - "epoch": 0.07361080810284505, - "grad_norm": 742.5632934570312, - "learning_rate": 3.6808080808080805e-05, - "loss": 194.6184, - "step": 9110 - }, - { - "epoch": 0.07369161030712917, - "grad_norm": 1145.2069091796875, - "learning_rate": 3.684848484848485e-05, - "loss": 185.7565, - "step": 9120 - }, - { - "epoch": 0.07377241251141331, - "grad_norm": 1086.181640625, - "learning_rate": 3.688888888888889e-05, - "loss": 167.5214, - "step": 9130 - }, - { - "epoch": 0.07385321471569745, - "grad_norm": 1321.85400390625, - "learning_rate": 3.692929292929293e-05, - "loss": 221.075, - "step": 9140 - }, - { - "epoch": 0.07393401691998158, - "grad_norm": 2168.907958984375, - "learning_rate": 3.6969696969696974e-05, - "loss": 219.8519, - "step": 9150 - }, - { - "epoch": 0.0740148191242657, - "grad_norm": 1007.1217041015625, - "learning_rate": 3.701010101010101e-05, - "loss": 194.1429, - "step": 9160 - }, - { - "epoch": 0.07409562132854984, - "grad_norm": 1099.997802734375, - "learning_rate": 3.705050505050505e-05, - "loss": 220.093, - "step": 9170 - }, - { - "epoch": 0.07417642353283398, - "grad_norm": 745.4526977539062, - "learning_rate": 3.709090909090909e-05, - "loss": 143.6606, - "step": 9180 - }, - { - "epoch": 0.07425722573711811, - "grad_norm": 2050.18212890625, - "learning_rate": 3.7131313131313136e-05, - "loss": 222.1462, - "step": 9190 - }, - { - "epoch": 0.07433802794140223, - "grad_norm": 2366.43896484375, - "learning_rate": 3.7171717171717175e-05, - "loss": 191.5783, - "step": 9200 - }, - { - "epoch": 0.07441883014568637, - "grad_norm": 905.9826049804688, - "learning_rate": 3.7212121212121214e-05, - "loss": 168.3594, - "step": 9210 - }, - { - "epoch": 0.07449963234997051, - "grad_norm": 1320.197509765625, - "learning_rate": 3.725252525252525e-05, - "loss": 140.7596, - "step": 9220 - }, - { - "epoch": 0.07458043455425464, - "grad_norm": 3290.6748046875, - "learning_rate": 3.72929292929293e-05, - "loss": 161.625, - "step": 9230 - }, - { - "epoch": 0.07466123675853878, - "grad_norm": 1262.2984619140625, - "learning_rate": 3.733333333333334e-05, - "loss": 163.4713, - "step": 9240 - }, - { - "epoch": 0.0747420389628229, - "grad_norm": 1434.2923583984375, - "learning_rate": 3.7373737373737376e-05, - "loss": 168.6003, - "step": 9250 - }, - { - "epoch": 0.07482284116710704, - "grad_norm": 716.86083984375, - "learning_rate": 3.7414141414141415e-05, - "loss": 165.9515, - "step": 9260 - }, - { - "epoch": 0.07490364337139117, - "grad_norm": 4190.2119140625, - "learning_rate": 3.745454545454546e-05, - "loss": 201.2678, - "step": 9270 - }, - { - "epoch": 0.07498444557567531, - "grad_norm": 1093.3636474609375, - "learning_rate": 3.74949494949495e-05, - "loss": 145.2475, - "step": 9280 - }, - { - "epoch": 0.07506524777995943, - "grad_norm": 1217.2969970703125, - "learning_rate": 3.753535353535353e-05, - "loss": 219.3316, - "step": 9290 - }, - { - "epoch": 0.07514604998424357, - "grad_norm": 1122.0589599609375, - "learning_rate": 3.757575757575758e-05, - "loss": 201.7304, - "step": 9300 - }, - { - "epoch": 0.0752268521885277, - "grad_norm": 779.33447265625, - "learning_rate": 3.7616161616161616e-05, - "loss": 199.0558, - "step": 9310 - }, - { - "epoch": 0.07530765439281184, - "grad_norm": 1110.2554931640625, - "learning_rate": 3.7656565656565655e-05, - "loss": 190.4773, - "step": 9320 - }, - { - "epoch": 0.07538845659709596, - "grad_norm": 1762.0330810546875, - "learning_rate": 3.76969696969697e-05, - "loss": 151.1484, - "step": 9330 - }, - { - "epoch": 0.0754692588013801, - "grad_norm": 1020.6593627929688, - "learning_rate": 3.773737373737374e-05, - "loss": 169.2854, - "step": 9340 - }, - { - "epoch": 0.07555006100566423, - "grad_norm": 1202.7464599609375, - "learning_rate": 3.777777777777778e-05, - "loss": 164.9156, - "step": 9350 - }, - { - "epoch": 0.07563086320994837, - "grad_norm": 1249.380126953125, - "learning_rate": 3.781818181818182e-05, - "loss": 227.8868, - "step": 9360 - }, - { - "epoch": 0.0757116654142325, - "grad_norm": 1847.7193603515625, - "learning_rate": 3.785858585858586e-05, - "loss": 176.2305, - "step": 9370 - }, - { - "epoch": 0.07579246761851663, - "grad_norm": 1446.559814453125, - "learning_rate": 3.78989898989899e-05, - "loss": 147.3983, - "step": 9380 - }, - { - "epoch": 0.07587326982280077, - "grad_norm": 1216.9862060546875, - "learning_rate": 3.793939393939394e-05, - "loss": 165.7989, - "step": 9390 - }, - { - "epoch": 0.0759540720270849, - "grad_norm": 2848.1337890625, - "learning_rate": 3.797979797979798e-05, - "loss": 157.2326, - "step": 9400 - }, - { - "epoch": 0.07603487423136904, - "grad_norm": 940.07177734375, - "learning_rate": 3.8020202020202025e-05, - "loss": 221.2202, - "step": 9410 - }, - { - "epoch": 0.07611567643565316, - "grad_norm": 1160.6881103515625, - "learning_rate": 3.8060606060606064e-05, - "loss": 158.6488, - "step": 9420 - }, - { - "epoch": 0.0761964786399373, - "grad_norm": 2879.822998046875, - "learning_rate": 3.81010101010101e-05, - "loss": 223.0843, - "step": 9430 - }, - { - "epoch": 0.07627728084422143, - "grad_norm": 1498.8753662109375, - "learning_rate": 3.814141414141414e-05, - "loss": 161.5878, - "step": 9440 - }, - { - "epoch": 0.07635808304850557, - "grad_norm": 1229.455078125, - "learning_rate": 3.818181818181819e-05, - "loss": 198.7026, - "step": 9450 - }, - { - "epoch": 0.07643888525278969, - "grad_norm": 929.4089965820312, - "learning_rate": 3.8222222222222226e-05, - "loss": 251.8946, - "step": 9460 - }, - { - "epoch": 0.07651968745707383, - "grad_norm": 934.8760375976562, - "learning_rate": 3.8262626262626265e-05, - "loss": 162.8998, - "step": 9470 - }, - { - "epoch": 0.07660048966135796, - "grad_norm": 915.323974609375, - "learning_rate": 3.830303030303031e-05, - "loss": 171.6096, - "step": 9480 - }, - { - "epoch": 0.0766812918656421, - "grad_norm": 1115.416748046875, - "learning_rate": 3.834343434343435e-05, - "loss": 163.5847, - "step": 9490 - }, - { - "epoch": 0.07676209406992623, - "grad_norm": 2073.390869140625, - "learning_rate": 3.838383838383838e-05, - "loss": 261.5975, - "step": 9500 - }, - { - "epoch": 0.07684289627421036, - "grad_norm": 1260.9718017578125, - "learning_rate": 3.842424242424243e-05, - "loss": 131.5614, - "step": 9510 - }, - { - "epoch": 0.07692369847849449, - "grad_norm": 1239.3568115234375, - "learning_rate": 3.8464646464646466e-05, - "loss": 164.9494, - "step": 9520 - }, - { - "epoch": 0.07700450068277863, - "grad_norm": 1221.8023681640625, - "learning_rate": 3.8505050505050505e-05, - "loss": 245.5353, - "step": 9530 - }, - { - "epoch": 0.07708530288706276, - "grad_norm": 533.4956665039062, - "learning_rate": 3.8545454545454544e-05, - "loss": 149.1819, - "step": 9540 - }, - { - "epoch": 0.07716610509134689, - "grad_norm": 1410.545166015625, - "learning_rate": 3.858585858585859e-05, - "loss": 172.351, - "step": 9550 - }, - { - "epoch": 0.07724690729563102, - "grad_norm": 927.8252563476562, - "learning_rate": 3.862626262626263e-05, - "loss": 152.7654, - "step": 9560 - }, - { - "epoch": 0.07732770949991516, - "grad_norm": 1244.0257568359375, - "learning_rate": 3.866666666666667e-05, - "loss": 171.8292, - "step": 9570 - }, - { - "epoch": 0.0774085117041993, - "grad_norm": 664.2005615234375, - "learning_rate": 3.8707070707070706e-05, - "loss": 109.5876, - "step": 9580 - }, - { - "epoch": 0.07748931390848342, - "grad_norm": 1334.42626953125, - "learning_rate": 3.874747474747475e-05, - "loss": 129.1926, - "step": 9590 - }, - { - "epoch": 0.07757011611276755, - "grad_norm": 1236.3963623046875, - "learning_rate": 3.878787878787879e-05, - "loss": 195.8564, - "step": 9600 - }, - { - "epoch": 0.07765091831705169, - "grad_norm": 517.2808227539062, - "learning_rate": 3.882828282828283e-05, - "loss": 213.7913, - "step": 9610 - }, - { - "epoch": 0.07773172052133583, - "grad_norm": 1618.37890625, - "learning_rate": 3.886868686868687e-05, - "loss": 187.1561, - "step": 9620 - }, - { - "epoch": 0.07781252272561995, - "grad_norm": 1312.72705078125, - "learning_rate": 3.8909090909090914e-05, - "loss": 192.0711, - "step": 9630 - }, - { - "epoch": 0.07789332492990408, - "grad_norm": 970.0208129882812, - "learning_rate": 3.894949494949495e-05, - "loss": 150.2422, - "step": 9640 - }, - { - "epoch": 0.07797412713418822, - "grad_norm": 1302.1982421875, - "learning_rate": 3.898989898989899e-05, - "loss": 149.0684, - "step": 9650 - }, - { - "epoch": 0.07805492933847236, - "grad_norm": 3663.638427734375, - "learning_rate": 3.903030303030304e-05, - "loss": 197.8055, - "step": 9660 - }, - { - "epoch": 0.07813573154275649, - "grad_norm": 1510.8233642578125, - "learning_rate": 3.9070707070707076e-05, - "loss": 217.5386, - "step": 9670 - }, - { - "epoch": 0.07821653374704061, - "grad_norm": 4103.01904296875, - "learning_rate": 3.9111111111111115e-05, - "loss": 198.1855, - "step": 9680 - }, - { - "epoch": 0.07829733595132475, - "grad_norm": 1483.061279296875, - "learning_rate": 3.9151515151515153e-05, - "loss": 200.777, - "step": 9690 - }, - { - "epoch": 0.07837813815560889, - "grad_norm": 1666.9429931640625, - "learning_rate": 3.91919191919192e-05, - "loss": 233.7256, - "step": 9700 - }, - { - "epoch": 0.07845894035989302, - "grad_norm": 1422.605224609375, - "learning_rate": 3.923232323232323e-05, - "loss": 198.8265, - "step": 9710 - }, - { - "epoch": 0.07853974256417715, - "grad_norm": 955.90869140625, - "learning_rate": 3.927272727272727e-05, - "loss": 178.4951, - "step": 9720 - }, - { - "epoch": 0.07862054476846128, - "grad_norm": 768.09228515625, - "learning_rate": 3.9313131313131316e-05, - "loss": 169.9865, - "step": 9730 - }, - { - "epoch": 0.07870134697274542, - "grad_norm": 2241.572998046875, - "learning_rate": 3.9353535353535355e-05, - "loss": 171.3657, - "step": 9740 - }, - { - "epoch": 0.07878214917702955, - "grad_norm": 828.8177490234375, - "learning_rate": 3.939393939393939e-05, - "loss": 212.0979, - "step": 9750 - }, - { - "epoch": 0.07886295138131368, - "grad_norm": 1248.2691650390625, - "learning_rate": 3.943434343434343e-05, - "loss": 144.412, - "step": 9760 - }, - { - "epoch": 0.07894375358559781, - "grad_norm": 1106.5013427734375, - "learning_rate": 3.947474747474748e-05, - "loss": 170.0778, - "step": 9770 - }, - { - "epoch": 0.07902455578988195, - "grad_norm": 1183.6558837890625, - "learning_rate": 3.951515151515152e-05, - "loss": 183.0114, - "step": 9780 - }, - { - "epoch": 0.07910535799416608, - "grad_norm": 790.3275146484375, - "learning_rate": 3.9555555555555556e-05, - "loss": 148.8441, - "step": 9790 - }, - { - "epoch": 0.07918616019845022, - "grad_norm": 1040.2529296875, - "learning_rate": 3.9595959595959594e-05, - "loss": 181.9307, - "step": 9800 - }, - { - "epoch": 0.07926696240273434, - "grad_norm": 1023.8417358398438, - "learning_rate": 3.963636363636364e-05, - "loss": 160.768, - "step": 9810 - }, - { - "epoch": 0.07934776460701848, - "grad_norm": 1530.9327392578125, - "learning_rate": 3.967676767676768e-05, - "loss": 189.5177, - "step": 9820 - }, - { - "epoch": 0.07942856681130261, - "grad_norm": 1020.0157470703125, - "learning_rate": 3.971717171717172e-05, - "loss": 175.2649, - "step": 9830 - }, - { - "epoch": 0.07950936901558675, - "grad_norm": 1115.394287109375, - "learning_rate": 3.975757575757576e-05, - "loss": 180.9754, - "step": 9840 - }, - { - "epoch": 0.07959017121987087, - "grad_norm": 596.3128051757812, - "learning_rate": 3.97979797979798e-05, - "loss": 190.0906, - "step": 9850 - }, - { - "epoch": 0.07967097342415501, - "grad_norm": 1229.2056884765625, - "learning_rate": 3.983838383838384e-05, - "loss": 153.1082, - "step": 9860 - }, - { - "epoch": 0.07975177562843914, - "grad_norm": 1460.8936767578125, - "learning_rate": 3.987878787878788e-05, - "loss": 176.2776, - "step": 9870 - }, - { - "epoch": 0.07983257783272328, - "grad_norm": 4583.7373046875, - "learning_rate": 3.9919191919191926e-05, - "loss": 163.6398, - "step": 9880 - }, - { - "epoch": 0.0799133800370074, - "grad_norm": 1115.4329833984375, - "learning_rate": 3.9959595959595964e-05, - "loss": 174.1616, - "step": 9890 - }, - { - "epoch": 0.07999418224129154, - "grad_norm": 2389.8232421875, - "learning_rate": 4e-05, - "loss": 163.833, - "step": 9900 - }, - { - "epoch": 0.08007498444557568, - "grad_norm": 2180.552978515625, - "learning_rate": 4.004040404040404e-05, - "loss": 189.1826, - "step": 9910 - }, - { - "epoch": 0.08015578664985981, - "grad_norm": 2478.71533203125, - "learning_rate": 4.008080808080809e-05, - "loss": 262.7207, - "step": 9920 - }, - { - "epoch": 0.08023658885414395, - "grad_norm": 952.7739868164062, - "learning_rate": 4.012121212121212e-05, - "loss": 152.5153, - "step": 9930 - }, - { - "epoch": 0.08031739105842807, - "grad_norm": 1591.5555419921875, - "learning_rate": 4.016161616161616e-05, - "loss": 161.1086, - "step": 9940 - }, - { - "epoch": 0.0803981932627122, - "grad_norm": 2502.85400390625, - "learning_rate": 4.0202020202020204e-05, - "loss": 180.6969, - "step": 9950 - }, - { - "epoch": 0.08047899546699634, - "grad_norm": 1107.453857421875, - "learning_rate": 4.024242424242424e-05, - "loss": 245.7, - "step": 9960 - }, - { - "epoch": 0.08055979767128048, - "grad_norm": 732.3422241210938, - "learning_rate": 4.028282828282828e-05, - "loss": 128.3301, - "step": 9970 - }, - { - "epoch": 0.0806405998755646, - "grad_norm": 852.4077758789062, - "learning_rate": 4.032323232323232e-05, - "loss": 171.862, - "step": 9980 - }, - { - "epoch": 0.08072140207984874, - "grad_norm": 974.1900634765625, - "learning_rate": 4.0363636363636367e-05, - "loss": 245.3339, - "step": 9990 - }, - { - "epoch": 0.08080220428413287, - "grad_norm": 917.6868286132812, - "learning_rate": 4.0404040404040405e-05, - "loss": 151.893, - "step": 10000 - }, - { - "epoch": 0.08088300648841701, - "grad_norm": 1009.7120971679688, - "learning_rate": 4.0444444444444444e-05, - "loss": 196.4933, - "step": 10010 - }, - { - "epoch": 0.08096380869270113, - "grad_norm": 2075.980224609375, - "learning_rate": 4.048484848484849e-05, - "loss": 190.6988, - "step": 10020 - }, - { - "epoch": 0.08104461089698527, - "grad_norm": 2236.189697265625, - "learning_rate": 4.052525252525253e-05, - "loss": 173.7965, - "step": 10030 - }, - { - "epoch": 0.0811254131012694, - "grad_norm": 1387.155517578125, - "learning_rate": 4.056565656565657e-05, - "loss": 287.3005, - "step": 10040 - }, - { - "epoch": 0.08120621530555354, - "grad_norm": 1775.0162353515625, - "learning_rate": 4.0606060606060606e-05, - "loss": 161.304, - "step": 10050 - }, - { - "epoch": 0.08128701750983768, - "grad_norm": 2991.034423828125, - "learning_rate": 4.064646464646465e-05, - "loss": 183.5822, - "step": 10060 - }, - { - "epoch": 0.0813678197141218, - "grad_norm": 2393.831298828125, - "learning_rate": 4.068686868686869e-05, - "loss": 216.6813, - "step": 10070 - }, - { - "epoch": 0.08144862191840593, - "grad_norm": 753.2874755859375, - "learning_rate": 4.072727272727273e-05, - "loss": 176.5726, - "step": 10080 - }, - { - "epoch": 0.08152942412269007, - "grad_norm": 917.6944580078125, - "learning_rate": 4.076767676767677e-05, - "loss": 182.8443, - "step": 10090 - }, - { - "epoch": 0.0816102263269742, - "grad_norm": 1497.6976318359375, - "learning_rate": 4.0808080808080814e-05, - "loss": 179.7216, - "step": 10100 - }, - { - "epoch": 0.08169102853125833, - "grad_norm": 896.7678833007812, - "learning_rate": 4.084848484848485e-05, - "loss": 171.1982, - "step": 10110 - }, - { - "epoch": 0.08177183073554246, - "grad_norm": 2560.443115234375, - "learning_rate": 4.088888888888889e-05, - "loss": 242.9725, - "step": 10120 - }, - { - "epoch": 0.0818526329398266, - "grad_norm": 914.347900390625, - "learning_rate": 4.092929292929293e-05, - "loss": 166.4831, - "step": 10130 - }, - { - "epoch": 0.08193343514411074, - "grad_norm": 1105.142822265625, - "learning_rate": 4.096969696969697e-05, - "loss": 182.9737, - "step": 10140 - }, - { - "epoch": 0.08201423734839486, - "grad_norm": 1321.1329345703125, - "learning_rate": 4.101010101010101e-05, - "loss": 152.9314, - "step": 10150 - }, - { - "epoch": 0.082095039552679, - "grad_norm": 924.438720703125, - "learning_rate": 4.105050505050505e-05, - "loss": 187.9318, - "step": 10160 - }, - { - "epoch": 0.08217584175696313, - "grad_norm": 834.27685546875, - "learning_rate": 4.109090909090909e-05, - "loss": 177.3875, - "step": 10170 - }, - { - "epoch": 0.08225664396124727, - "grad_norm": 996.1378173828125, - "learning_rate": 4.113131313131313e-05, - "loss": 163.1871, - "step": 10180 - }, - { - "epoch": 0.08233744616553139, - "grad_norm": 1657.6314697265625, - "learning_rate": 4.117171717171717e-05, - "loss": 195.5306, - "step": 10190 - }, - { - "epoch": 0.08241824836981552, - "grad_norm": 1040.5526123046875, - "learning_rate": 4.1212121212121216e-05, - "loss": 179.7722, - "step": 10200 - }, - { - "epoch": 0.08249905057409966, - "grad_norm": 1405.0408935546875, - "learning_rate": 4.1252525252525255e-05, - "loss": 177.9881, - "step": 10210 - }, - { - "epoch": 0.0825798527783838, - "grad_norm": 1484.392333984375, - "learning_rate": 4.1292929292929294e-05, - "loss": 170.4384, - "step": 10220 - }, - { - "epoch": 0.08266065498266793, - "grad_norm": 533.9537963867188, - "learning_rate": 4.133333333333333e-05, - "loss": 166.7928, - "step": 10230 - }, - { - "epoch": 0.08274145718695206, - "grad_norm": 1133.5531005859375, - "learning_rate": 4.137373737373738e-05, - "loss": 185.8696, - "step": 10240 - }, - { - "epoch": 0.08282225939123619, - "grad_norm": 1964.5546875, - "learning_rate": 4.141414141414142e-05, - "loss": 247.6705, - "step": 10250 - }, - { - "epoch": 0.08290306159552033, - "grad_norm": 1816.8203125, - "learning_rate": 4.1454545454545456e-05, - "loss": 188.6205, - "step": 10260 - }, - { - "epoch": 0.08298386379980446, - "grad_norm": 809.494873046875, - "learning_rate": 4.1494949494949495e-05, - "loss": 186.0191, - "step": 10270 - }, - { - "epoch": 0.08306466600408859, - "grad_norm": 1444.1771240234375, - "learning_rate": 4.153535353535354e-05, - "loss": 171.1266, - "step": 10280 - }, - { - "epoch": 0.08314546820837272, - "grad_norm": 1594.9212646484375, - "learning_rate": 4.157575757575758e-05, - "loss": 152.8558, - "step": 10290 - }, - { - "epoch": 0.08322627041265686, - "grad_norm": 1367.26318359375, - "learning_rate": 4.161616161616162e-05, - "loss": 215.3903, - "step": 10300 - }, - { - "epoch": 0.083307072616941, - "grad_norm": 1395.453857421875, - "learning_rate": 4.165656565656566e-05, - "loss": 219.8608, - "step": 10310 - }, - { - "epoch": 0.08338787482122512, - "grad_norm": 823.7109375, - "learning_rate": 4.16969696969697e-05, - "loss": 198.1472, - "step": 10320 - }, - { - "epoch": 0.08346867702550925, - "grad_norm": 4030.11083984375, - "learning_rate": 4.173737373737374e-05, - "loss": 191.6869, - "step": 10330 - }, - { - "epoch": 0.08354947922979339, - "grad_norm": 1047.6395263671875, - "learning_rate": 4.177777777777778e-05, - "loss": 198.535, - "step": 10340 - }, - { - "epoch": 0.08363028143407752, - "grad_norm": 1213.222412109375, - "learning_rate": 4.181818181818182e-05, - "loss": 209.4711, - "step": 10350 - }, - { - "epoch": 0.08371108363836166, - "grad_norm": 862.3009643554688, - "learning_rate": 4.185858585858586e-05, - "loss": 197.0447, - "step": 10360 - }, - { - "epoch": 0.08379188584264578, - "grad_norm": 7555.03271484375, - "learning_rate": 4.18989898989899e-05, - "loss": 257.4577, - "step": 10370 - }, - { - "epoch": 0.08387268804692992, - "grad_norm": 1448.51806640625, - "learning_rate": 4.193939393939394e-05, - "loss": 145.9192, - "step": 10380 - }, - { - "epoch": 0.08395349025121406, - "grad_norm": 563.4491577148438, - "learning_rate": 4.197979797979798e-05, - "loss": 185.379, - "step": 10390 - }, - { - "epoch": 0.08403429245549819, - "grad_norm": 2999.943603515625, - "learning_rate": 4.202020202020202e-05, - "loss": 207.6808, - "step": 10400 - }, - { - "epoch": 0.08411509465978231, - "grad_norm": 1272.3822021484375, - "learning_rate": 4.206060606060606e-05, - "loss": 203.2208, - "step": 10410 - }, - { - "epoch": 0.08419589686406645, - "grad_norm": 1877.1287841796875, - "learning_rate": 4.2101010101010105e-05, - "loss": 208.5024, - "step": 10420 - }, - { - "epoch": 0.08427669906835059, - "grad_norm": 880.4778442382812, - "learning_rate": 4.2141414141414144e-05, - "loss": 114.9268, - "step": 10430 - }, - { - "epoch": 0.08435750127263472, - "grad_norm": 2282.030517578125, - "learning_rate": 4.218181818181818e-05, - "loss": 185.7249, - "step": 10440 - }, - { - "epoch": 0.08443830347691884, - "grad_norm": 1241.139892578125, - "learning_rate": 4.222222222222222e-05, - "loss": 171.4185, - "step": 10450 - }, - { - "epoch": 0.08451910568120298, - "grad_norm": 1106.116943359375, - "learning_rate": 4.226262626262627e-05, - "loss": 233.517, - "step": 10460 - }, - { - "epoch": 0.08459990788548712, - "grad_norm": 1352.7723388671875, - "learning_rate": 4.2303030303030306e-05, - "loss": 164.0137, - "step": 10470 - }, - { - "epoch": 0.08468071008977125, - "grad_norm": 824.0071411132812, - "learning_rate": 4.2343434343434345e-05, - "loss": 149.2571, - "step": 10480 - }, - { - "epoch": 0.08476151229405539, - "grad_norm": 1494.287841796875, - "learning_rate": 4.2383838383838384e-05, - "loss": 158.6048, - "step": 10490 - }, - { - "epoch": 0.08484231449833951, - "grad_norm": 928.8883056640625, - "learning_rate": 4.242424242424243e-05, - "loss": 160.3929, - "step": 10500 - }, - { - "epoch": 0.08492311670262365, - "grad_norm": 3102.35791015625, - "learning_rate": 4.246464646464647e-05, - "loss": 262.7529, - "step": 10510 - }, - { - "epoch": 0.08500391890690778, - "grad_norm": 753.5589599609375, - "learning_rate": 4.250505050505051e-05, - "loss": 147.7691, - "step": 10520 - }, - { - "epoch": 0.08508472111119192, - "grad_norm": 1001.4116821289062, - "learning_rate": 4.254545454545455e-05, - "loss": 137.3231, - "step": 10530 - }, - { - "epoch": 0.08516552331547604, - "grad_norm": 813.2144775390625, - "learning_rate": 4.258585858585859e-05, - "loss": 201.1259, - "step": 10540 - }, - { - "epoch": 0.08524632551976018, - "grad_norm": 1098.597900390625, - "learning_rate": 4.262626262626263e-05, - "loss": 165.8937, - "step": 10550 - }, - { - "epoch": 0.08532712772404431, - "grad_norm": 1298.8853759765625, - "learning_rate": 4.266666666666667e-05, - "loss": 198.1985, - "step": 10560 - }, - { - "epoch": 0.08540792992832845, - "grad_norm": 1016.0570678710938, - "learning_rate": 4.270707070707071e-05, - "loss": 206.1271, - "step": 10570 - }, - { - "epoch": 0.08548873213261257, - "grad_norm": 2057.573974609375, - "learning_rate": 4.274747474747475e-05, - "loss": 172.7861, - "step": 10580 - }, - { - "epoch": 0.08556953433689671, - "grad_norm": 1261.774169921875, - "learning_rate": 4.2787878787878786e-05, - "loss": 149.1268, - "step": 10590 - }, - { - "epoch": 0.08565033654118084, - "grad_norm": 1344.2037353515625, - "learning_rate": 4.282828282828283e-05, - "loss": 195.2029, - "step": 10600 - }, - { - "epoch": 0.08573113874546498, - "grad_norm": 3034.935546875, - "learning_rate": 4.286868686868687e-05, - "loss": 222.7399, - "step": 10610 - }, - { - "epoch": 0.08581194094974912, - "grad_norm": 970.0159912109375, - "learning_rate": 4.290909090909091e-05, - "loss": 168.5838, - "step": 10620 - }, - { - "epoch": 0.08589274315403324, - "grad_norm": 953.5883178710938, - "learning_rate": 4.294949494949495e-05, - "loss": 162.8468, - "step": 10630 - }, - { - "epoch": 0.08597354535831737, - "grad_norm": 2465.924072265625, - "learning_rate": 4.2989898989898994e-05, - "loss": 204.2057, - "step": 10640 - }, - { - "epoch": 0.08605434756260151, - "grad_norm": 1194.1285400390625, - "learning_rate": 4.303030303030303e-05, - "loss": 197.6671, - "step": 10650 - }, - { - "epoch": 0.08613514976688565, - "grad_norm": 881.1695556640625, - "learning_rate": 4.307070707070707e-05, - "loss": 163.8398, - "step": 10660 - }, - { - "epoch": 0.08621595197116977, - "grad_norm": 3846.036376953125, - "learning_rate": 4.311111111111111e-05, - "loss": 178.4458, - "step": 10670 - }, - { - "epoch": 0.0862967541754539, - "grad_norm": 1206.0906982421875, - "learning_rate": 4.3151515151515156e-05, - "loss": 153.2348, - "step": 10680 - }, - { - "epoch": 0.08637755637973804, - "grad_norm": 898.4664306640625, - "learning_rate": 4.3191919191919195e-05, - "loss": 143.3541, - "step": 10690 - }, - { - "epoch": 0.08645835858402218, - "grad_norm": 2398.2255859375, - "learning_rate": 4.3232323232323234e-05, - "loss": 212.5595, - "step": 10700 - }, - { - "epoch": 0.0865391607883063, - "grad_norm": 1220.5733642578125, - "learning_rate": 4.327272727272728e-05, - "loss": 179.1572, - "step": 10710 - }, - { - "epoch": 0.08661996299259044, - "grad_norm": 1974.5457763671875, - "learning_rate": 4.331313131313132e-05, - "loss": 190.0654, - "step": 10720 - }, - { - "epoch": 0.08670076519687457, - "grad_norm": 2125.98583984375, - "learning_rate": 4.335353535353536e-05, - "loss": 168.3283, - "step": 10730 - }, - { - "epoch": 0.08678156740115871, - "grad_norm": 1389.6546630859375, - "learning_rate": 4.3393939393939396e-05, - "loss": 159.5211, - "step": 10740 - }, - { - "epoch": 0.08686236960544283, - "grad_norm": 807.5780029296875, - "learning_rate": 4.343434343434344e-05, - "loss": 192.4806, - "step": 10750 - }, - { - "epoch": 0.08694317180972697, - "grad_norm": 1139.113037109375, - "learning_rate": 4.347474747474748e-05, - "loss": 155.3915, - "step": 10760 - }, - { - "epoch": 0.0870239740140111, - "grad_norm": 1003.0131225585938, - "learning_rate": 4.351515151515152e-05, - "loss": 150.0795, - "step": 10770 - }, - { - "epoch": 0.08710477621829524, - "grad_norm": 962.0811767578125, - "learning_rate": 4.355555555555556e-05, - "loss": 194.0887, - "step": 10780 - }, - { - "epoch": 0.08718557842257937, - "grad_norm": 626.3510131835938, - "learning_rate": 4.35959595959596e-05, - "loss": 148.0051, - "step": 10790 - }, - { - "epoch": 0.0872663806268635, - "grad_norm": 2664.0517578125, - "learning_rate": 4.3636363636363636e-05, - "loss": 199.106, - "step": 10800 - }, - { - "epoch": 0.08734718283114763, - "grad_norm": 895.5698852539062, - "learning_rate": 4.3676767676767674e-05, - "loss": 172.3583, - "step": 10810 - }, - { - "epoch": 0.08742798503543177, - "grad_norm": 1103.621826171875, - "learning_rate": 4.371717171717172e-05, - "loss": 203.0746, - "step": 10820 - }, - { - "epoch": 0.0875087872397159, - "grad_norm": 1323.6517333984375, - "learning_rate": 4.375757575757576e-05, - "loss": 158.2676, - "step": 10830 - }, - { - "epoch": 0.08758958944400003, - "grad_norm": 1170.48779296875, - "learning_rate": 4.37979797979798e-05, - "loss": 151.4179, - "step": 10840 - }, - { - "epoch": 0.08767039164828416, - "grad_norm": 1171.2379150390625, - "learning_rate": 4.383838383838384e-05, - "loss": 181.4644, - "step": 10850 - }, - { - "epoch": 0.0877511938525683, - "grad_norm": 1495.01025390625, - "learning_rate": 4.387878787878788e-05, - "loss": 167.6518, - "step": 10860 - }, - { - "epoch": 0.08783199605685243, - "grad_norm": 1283.5498046875, - "learning_rate": 4.391919191919192e-05, - "loss": 138.4191, - "step": 10870 - }, - { - "epoch": 0.08791279826113656, - "grad_norm": 1028.198974609375, - "learning_rate": 4.395959595959596e-05, - "loss": 167.4624, - "step": 10880 - }, - { - "epoch": 0.0879936004654207, - "grad_norm": 958.3167114257812, - "learning_rate": 4.4000000000000006e-05, - "loss": 187.0603, - "step": 10890 - }, - { - "epoch": 0.08807440266970483, - "grad_norm": 1248.95556640625, - "learning_rate": 4.4040404040404044e-05, - "loss": 194.5838, - "step": 10900 - }, - { - "epoch": 0.08815520487398897, - "grad_norm": 1088.8775634765625, - "learning_rate": 4.408080808080808e-05, - "loss": 137.5682, - "step": 10910 - }, - { - "epoch": 0.0882360070782731, - "grad_norm": 1130.275146484375, - "learning_rate": 4.412121212121212e-05, - "loss": 180.1215, - "step": 10920 - }, - { - "epoch": 0.08831680928255722, - "grad_norm": 1201.9453125, - "learning_rate": 4.416161616161617e-05, - "loss": 157.9624, - "step": 10930 - }, - { - "epoch": 0.08839761148684136, - "grad_norm": 1291.0989990234375, - "learning_rate": 4.420202020202021e-05, - "loss": 159.1208, - "step": 10940 - }, - { - "epoch": 0.0884784136911255, - "grad_norm": 946.185546875, - "learning_rate": 4.4242424242424246e-05, - "loss": 200.6266, - "step": 10950 - }, - { - "epoch": 0.08855921589540963, - "grad_norm": 2330.45361328125, - "learning_rate": 4.4282828282828284e-05, - "loss": 163.2997, - "step": 10960 - }, - { - "epoch": 0.08864001809969375, - "grad_norm": 959.3818359375, - "learning_rate": 4.432323232323233e-05, - "loss": 200.18, - "step": 10970 - }, - { - "epoch": 0.08872082030397789, - "grad_norm": 1215.0078125, - "learning_rate": 4.436363636363637e-05, - "loss": 194.7184, - "step": 10980 - }, - { - "epoch": 0.08880162250826203, - "grad_norm": 731.89501953125, - "learning_rate": 4.44040404040404e-05, - "loss": 183.9504, - "step": 10990 - }, - { - "epoch": 0.08888242471254616, - "grad_norm": 751.2623291015625, - "learning_rate": 4.4444444444444447e-05, - "loss": 164.3441, - "step": 11000 - }, - { - "epoch": 0.08896322691683028, - "grad_norm": 1424.0635986328125, - "learning_rate": 4.4484848484848485e-05, - "loss": 202.2657, - "step": 11010 - }, - { - "epoch": 0.08904402912111442, - "grad_norm": 1039.608642578125, - "learning_rate": 4.4525252525252524e-05, - "loss": 177.8795, - "step": 11020 - }, - { - "epoch": 0.08912483132539856, - "grad_norm": 1338.993408203125, - "learning_rate": 4.456565656565656e-05, - "loss": 167.3348, - "step": 11030 - }, - { - "epoch": 0.08920563352968269, - "grad_norm": 2591.8984375, - "learning_rate": 4.460606060606061e-05, - "loss": 165.2155, - "step": 11040 - }, - { - "epoch": 0.08928643573396683, - "grad_norm": 931.5535888671875, - "learning_rate": 4.464646464646465e-05, - "loss": 198.5493, - "step": 11050 - }, - { - "epoch": 0.08936723793825095, - "grad_norm": 1028.25927734375, - "learning_rate": 4.4686868686868686e-05, - "loss": 169.1925, - "step": 11060 - }, - { - "epoch": 0.08944804014253509, - "grad_norm": 1156.0810546875, - "learning_rate": 4.472727272727273e-05, - "loss": 172.742, - "step": 11070 - }, - { - "epoch": 0.08952884234681922, - "grad_norm": 1409.868408203125, - "learning_rate": 4.476767676767677e-05, - "loss": 152.6085, - "step": 11080 - }, - { - "epoch": 0.08960964455110336, - "grad_norm": 1091.4266357421875, - "learning_rate": 4.480808080808081e-05, - "loss": 149.4833, - "step": 11090 - }, - { - "epoch": 0.08969044675538748, - "grad_norm": 1274.9849853515625, - "learning_rate": 4.484848484848485e-05, - "loss": 239.724, - "step": 11100 - }, - { - "epoch": 0.08977124895967162, - "grad_norm": 2974.341552734375, - "learning_rate": 4.4888888888888894e-05, - "loss": 184.0551, - "step": 11110 - }, - { - "epoch": 0.08985205116395575, - "grad_norm": 893.275390625, - "learning_rate": 4.492929292929293e-05, - "loss": 163.8494, - "step": 11120 - }, - { - "epoch": 0.08993285336823989, - "grad_norm": 1236.2047119140625, - "learning_rate": 4.496969696969697e-05, - "loss": 223.2573, - "step": 11130 - }, - { - "epoch": 0.09001365557252401, - "grad_norm": 560.5221557617188, - "learning_rate": 4.501010101010101e-05, - "loss": 143.0442, - "step": 11140 - }, - { - "epoch": 0.09009445777680815, - "grad_norm": 1671.4537353515625, - "learning_rate": 4.5050505050505056e-05, - "loss": 230.4551, - "step": 11150 - }, - { - "epoch": 0.09017525998109228, - "grad_norm": 2295.0419921875, - "learning_rate": 4.5090909090909095e-05, - "loss": 191.5067, - "step": 11160 - }, - { - "epoch": 0.09025606218537642, - "grad_norm": 1291.7230224609375, - "learning_rate": 4.5131313131313134e-05, - "loss": 165.677, - "step": 11170 - }, - { - "epoch": 0.09033686438966056, - "grad_norm": 817.165771484375, - "learning_rate": 4.517171717171717e-05, - "loss": 145.0678, - "step": 11180 - }, - { - "epoch": 0.09041766659394468, - "grad_norm": 938.4746704101562, - "learning_rate": 4.521212121212122e-05, - "loss": 168.7315, - "step": 11190 - }, - { - "epoch": 0.09049846879822881, - "grad_norm": 820.7261352539062, - "learning_rate": 4.525252525252526e-05, - "loss": 174.4509, - "step": 11200 - }, - { - "epoch": 0.09057927100251295, - "grad_norm": 899.671875, - "learning_rate": 4.529292929292929e-05, - "loss": 153.883, - "step": 11210 - }, - { - "epoch": 0.09066007320679709, - "grad_norm": 2744.694091796875, - "learning_rate": 4.5333333333333335e-05, - "loss": 152.8033, - "step": 11220 - }, - { - "epoch": 0.09074087541108121, - "grad_norm": 1821.427734375, - "learning_rate": 4.5373737373737374e-05, - "loss": 203.8027, - "step": 11230 - }, - { - "epoch": 0.09082167761536535, - "grad_norm": 937.9207763671875, - "learning_rate": 4.541414141414141e-05, - "loss": 183.0782, - "step": 11240 - }, - { - "epoch": 0.09090247981964948, - "grad_norm": 1872.185546875, - "learning_rate": 4.545454545454546e-05, - "loss": 153.9125, - "step": 11250 - }, - { - "epoch": 0.09098328202393362, - "grad_norm": 772.832275390625, - "learning_rate": 4.54949494949495e-05, - "loss": 143.9607, - "step": 11260 - }, - { - "epoch": 0.09106408422821774, - "grad_norm": 2842.652587890625, - "learning_rate": 4.5535353535353536e-05, - "loss": 189.6786, - "step": 11270 - }, - { - "epoch": 0.09114488643250188, - "grad_norm": 1738.2589111328125, - "learning_rate": 4.5575757575757575e-05, - "loss": 163.1939, - "step": 11280 - }, - { - "epoch": 0.09122568863678601, - "grad_norm": 1521.9814453125, - "learning_rate": 4.561616161616162e-05, - "loss": 208.5482, - "step": 11290 - }, - { - "epoch": 0.09130649084107015, - "grad_norm": 1132.692138671875, - "learning_rate": 4.565656565656566e-05, - "loss": 161.8789, - "step": 11300 - }, - { - "epoch": 0.09138729304535427, - "grad_norm": 1395.38671875, - "learning_rate": 4.56969696969697e-05, - "loss": 164.5156, - "step": 11310 - }, - { - "epoch": 0.0914680952496384, - "grad_norm": 661.7669067382812, - "learning_rate": 4.573737373737374e-05, - "loss": 163.1058, - "step": 11320 - }, - { - "epoch": 0.09154889745392254, - "grad_norm": 1353.42578125, - "learning_rate": 4.577777777777778e-05, - "loss": 178.0265, - "step": 11330 - }, - { - "epoch": 0.09162969965820668, - "grad_norm": 758.9215087890625, - "learning_rate": 4.581818181818182e-05, - "loss": 186.8539, - "step": 11340 - }, - { - "epoch": 0.09171050186249081, - "grad_norm": 926.5440673828125, - "learning_rate": 4.585858585858586e-05, - "loss": 174.9289, - "step": 11350 - }, - { - "epoch": 0.09179130406677494, - "grad_norm": 1032.7493896484375, - "learning_rate": 4.58989898989899e-05, - "loss": 157.8875, - "step": 11360 - }, - { - "epoch": 0.09187210627105907, - "grad_norm": 1104.685302734375, - "learning_rate": 4.5939393939393945e-05, - "loss": 180.8059, - "step": 11370 - }, - { - "epoch": 0.09195290847534321, - "grad_norm": 807.3258056640625, - "learning_rate": 4.5979797979797984e-05, - "loss": 133.7691, - "step": 11380 - }, - { - "epoch": 0.09203371067962735, - "grad_norm": 1296.3505859375, - "learning_rate": 4.602020202020202e-05, - "loss": 168.8188, - "step": 11390 - }, - { - "epoch": 0.09211451288391147, - "grad_norm": 717.891357421875, - "learning_rate": 4.606060606060607e-05, - "loss": 143.9607, - "step": 11400 - }, - { - "epoch": 0.0921953150881956, - "grad_norm": 841.2793579101562, - "learning_rate": 4.610101010101011e-05, - "loss": 185.6871, - "step": 11410 - }, - { - "epoch": 0.09227611729247974, - "grad_norm": 1319.2064208984375, - "learning_rate": 4.614141414141414e-05, - "loss": 163.8516, - "step": 11420 - }, - { - "epoch": 0.09235691949676388, - "grad_norm": 1566.4168701171875, - "learning_rate": 4.618181818181818e-05, - "loss": 165.3434, - "step": 11430 - }, - { - "epoch": 0.092437721701048, - "grad_norm": 919.2448120117188, - "learning_rate": 4.6222222222222224e-05, - "loss": 133.6576, - "step": 11440 - }, - { - "epoch": 0.09251852390533213, - "grad_norm": 976.5399780273438, - "learning_rate": 4.626262626262626e-05, - "loss": 172.4401, - "step": 11450 - }, - { - "epoch": 0.09259932610961627, - "grad_norm": 1343.291015625, - "learning_rate": 4.63030303030303e-05, - "loss": 203.6782, - "step": 11460 - }, - { - "epoch": 0.0926801283139004, - "grad_norm": 839.6242065429688, - "learning_rate": 4.634343434343435e-05, - "loss": 175.6062, - "step": 11470 - }, - { - "epoch": 0.09276093051818454, - "grad_norm": 909.182861328125, - "learning_rate": 4.6383838383838386e-05, - "loss": 159.7895, - "step": 11480 - }, - { - "epoch": 0.09284173272246866, - "grad_norm": 2260.60107421875, - "learning_rate": 4.6424242424242425e-05, - "loss": 168.3035, - "step": 11490 - }, - { - "epoch": 0.0929225349267528, - "grad_norm": 1016.5995483398438, - "learning_rate": 4.6464646464646464e-05, - "loss": 149.6134, - "step": 11500 - }, - { - "epoch": 0.09300333713103694, - "grad_norm": 1180.2608642578125, - "learning_rate": 4.650505050505051e-05, - "loss": 123.0571, - "step": 11510 - }, - { - "epoch": 0.09308413933532107, - "grad_norm": 1054.5244140625, - "learning_rate": 4.654545454545455e-05, - "loss": 180.0858, - "step": 11520 - }, - { - "epoch": 0.0931649415396052, - "grad_norm": 1064.1981201171875, - "learning_rate": 4.658585858585859e-05, - "loss": 305.0233, - "step": 11530 - }, - { - "epoch": 0.09324574374388933, - "grad_norm": 1455.9857177734375, - "learning_rate": 4.6626262626262626e-05, - "loss": 174.1713, - "step": 11540 - }, - { - "epoch": 0.09332654594817347, - "grad_norm": 801.254150390625, - "learning_rate": 4.666666666666667e-05, - "loss": 175.7632, - "step": 11550 - }, - { - "epoch": 0.0934073481524576, - "grad_norm": 811.2909545898438, - "learning_rate": 4.670707070707071e-05, - "loss": 152.5526, - "step": 11560 - }, - { - "epoch": 0.09348815035674173, - "grad_norm": 860.8350830078125, - "learning_rate": 4.674747474747475e-05, - "loss": 159.9338, - "step": 11570 - }, - { - "epoch": 0.09356895256102586, - "grad_norm": 1820.26318359375, - "learning_rate": 4.6787878787878795e-05, - "loss": 205.222, - "step": 11580 - }, - { - "epoch": 0.09364975476531, - "grad_norm": 1027.9521484375, - "learning_rate": 4.6828282828282834e-05, - "loss": 167.9507, - "step": 11590 - }, - { - "epoch": 0.09373055696959413, - "grad_norm": 3551.64599609375, - "learning_rate": 4.686868686868687e-05, - "loss": 194.2529, - "step": 11600 - }, - { - "epoch": 0.09381135917387827, - "grad_norm": 957.3357543945312, - "learning_rate": 4.690909090909091e-05, - "loss": 185.5651, - "step": 11610 - }, - { - "epoch": 0.09389216137816239, - "grad_norm": 587.98828125, - "learning_rate": 4.694949494949496e-05, - "loss": 208.2654, - "step": 11620 - }, - { - "epoch": 0.09397296358244653, - "grad_norm": 824.1953735351562, - "learning_rate": 4.698989898989899e-05, - "loss": 118.9412, - "step": 11630 - }, - { - "epoch": 0.09405376578673066, - "grad_norm": 997.8128051757812, - "learning_rate": 4.703030303030303e-05, - "loss": 144.5133, - "step": 11640 - }, - { - "epoch": 0.0941345679910148, - "grad_norm": 825.6588745117188, - "learning_rate": 4.7070707070707074e-05, - "loss": 130.3822, - "step": 11650 - }, - { - "epoch": 0.09421537019529892, - "grad_norm": 1590.2271728515625, - "learning_rate": 4.711111111111111e-05, - "loss": 147.783, - "step": 11660 - }, - { - "epoch": 0.09429617239958306, - "grad_norm": 831.8695068359375, - "learning_rate": 4.715151515151515e-05, - "loss": 150.0897, - "step": 11670 - }, - { - "epoch": 0.0943769746038672, - "grad_norm": 879.4678955078125, - "learning_rate": 4.719191919191919e-05, - "loss": 134.1156, - "step": 11680 - }, - { - "epoch": 0.09445777680815133, - "grad_norm": 1955.2484130859375, - "learning_rate": 4.7232323232323236e-05, - "loss": 155.8984, - "step": 11690 - }, - { - "epoch": 0.09453857901243545, - "grad_norm": 1074.5509033203125, - "learning_rate": 4.7272727272727275e-05, - "loss": 186.7514, - "step": 11700 - }, - { - "epoch": 0.09461938121671959, - "grad_norm": 992.748046875, - "learning_rate": 4.7313131313131314e-05, - "loss": 155.1787, - "step": 11710 - }, - { - "epoch": 0.09470018342100373, - "grad_norm": 990.5753784179688, - "learning_rate": 4.735353535353535e-05, - "loss": 140.0309, - "step": 11720 - }, - { - "epoch": 0.09478098562528786, - "grad_norm": 792.1434936523438, - "learning_rate": 4.73939393939394e-05, - "loss": 138.4607, - "step": 11730 - }, - { - "epoch": 0.094861787829572, - "grad_norm": 1121.582763671875, - "learning_rate": 4.743434343434344e-05, - "loss": 121.2844, - "step": 11740 - }, - { - "epoch": 0.09494259003385612, - "grad_norm": 1351.6878662109375, - "learning_rate": 4.7474747474747476e-05, - "loss": 198.7702, - "step": 11750 - }, - { - "epoch": 0.09502339223814026, - "grad_norm": 2031.31494140625, - "learning_rate": 4.751515151515152e-05, - "loss": 164.1413, - "step": 11760 - }, - { - "epoch": 0.09510419444242439, - "grad_norm": 761.006103515625, - "learning_rate": 4.755555555555556e-05, - "loss": 143.7779, - "step": 11770 - }, - { - "epoch": 0.09518499664670853, - "grad_norm": 1228.6676025390625, - "learning_rate": 4.75959595959596e-05, - "loss": 177.4899, - "step": 11780 - }, - { - "epoch": 0.09526579885099265, - "grad_norm": 817.8377075195312, - "learning_rate": 4.763636363636364e-05, - "loss": 158.3827, - "step": 11790 - }, - { - "epoch": 0.09534660105527679, - "grad_norm": 1481.2501220703125, - "learning_rate": 4.7676767676767684e-05, - "loss": 144.1627, - "step": 11800 - }, - { - "epoch": 0.09542740325956092, - "grad_norm": 899.5111083984375, - "learning_rate": 4.771717171717172e-05, - "loss": 188.1043, - "step": 11810 - }, - { - "epoch": 0.09550820546384506, - "grad_norm": 766.15869140625, - "learning_rate": 4.775757575757576e-05, - "loss": 190.8986, - "step": 11820 - }, - { - "epoch": 0.09558900766812918, - "grad_norm": 818.2703857421875, - "learning_rate": 4.77979797979798e-05, - "loss": 226.6272, - "step": 11830 - }, - { - "epoch": 0.09566980987241332, - "grad_norm": 1306.8607177734375, - "learning_rate": 4.7838383838383846e-05, - "loss": 179.7575, - "step": 11840 - }, - { - "epoch": 0.09575061207669745, - "grad_norm": 1371.1048583984375, - "learning_rate": 4.787878787878788e-05, - "loss": 184.5432, - "step": 11850 - }, - { - "epoch": 0.09583141428098159, - "grad_norm": 1219.8428955078125, - "learning_rate": 4.791919191919192e-05, - "loss": 256.0484, - "step": 11860 - }, - { - "epoch": 0.09591221648526572, - "grad_norm": 1769.408935546875, - "learning_rate": 4.795959595959596e-05, - "loss": 216.8512, - "step": 11870 - }, - { - "epoch": 0.09599301868954985, - "grad_norm": 1021.0985107421875, - "learning_rate": 4.8e-05, - "loss": 150.8418, - "step": 11880 - }, - { - "epoch": 0.09607382089383398, - "grad_norm": 789.3172607421875, - "learning_rate": 4.804040404040404e-05, - "loss": 168.713, - "step": 11890 - }, - { - "epoch": 0.09615462309811812, - "grad_norm": 1145.4168701171875, - "learning_rate": 4.808080808080808e-05, - "loss": 248.8971, - "step": 11900 - }, - { - "epoch": 0.09623542530240226, - "grad_norm": 1330.3175048828125, - "learning_rate": 4.8121212121212125e-05, - "loss": 143.9111, - "step": 11910 - }, - { - "epoch": 0.09631622750668638, - "grad_norm": 1145.8402099609375, - "learning_rate": 4.8161616161616163e-05, - "loss": 196.2298, - "step": 11920 - }, - { - "epoch": 0.09639702971097051, - "grad_norm": 3719.409423828125, - "learning_rate": 4.82020202020202e-05, - "loss": 141.981, - "step": 11930 - }, - { - "epoch": 0.09647783191525465, - "grad_norm": 783.9396362304688, - "learning_rate": 4.824242424242425e-05, - "loss": 139.5111, - "step": 11940 - }, - { - "epoch": 0.09655863411953879, - "grad_norm": 817.3587036132812, - "learning_rate": 4.828282828282829e-05, - "loss": 193.1281, - "step": 11950 - }, - { - "epoch": 0.09663943632382291, - "grad_norm": 2150.12939453125, - "learning_rate": 4.8323232323232326e-05, - "loss": 173.1304, - "step": 11960 - }, - { - "epoch": 0.09672023852810704, - "grad_norm": 1299.8162841796875, - "learning_rate": 4.8363636363636364e-05, - "loss": 181.7705, - "step": 11970 - }, - { - "epoch": 0.09680104073239118, - "grad_norm": 936.7069091796875, - "learning_rate": 4.840404040404041e-05, - "loss": 173.4242, - "step": 11980 - }, - { - "epoch": 0.09688184293667532, - "grad_norm": 613.501708984375, - "learning_rate": 4.844444444444445e-05, - "loss": 153.117, - "step": 11990 - }, - { - "epoch": 0.09696264514095944, - "grad_norm": 1406.6314697265625, - "learning_rate": 4.848484848484849e-05, - "loss": 185.4404, - "step": 12000 - }, - { - "epoch": 0.09704344734524357, - "grad_norm": 1215.42138671875, - "learning_rate": 4.852525252525253e-05, - "loss": 209.0183, - "step": 12010 - }, - { - "epoch": 0.09712424954952771, - "grad_norm": 1375.722412109375, - "learning_rate": 4.856565656565657e-05, - "loss": 201.9598, - "step": 12020 - }, - { - "epoch": 0.09720505175381185, - "grad_norm": 987.8963623046875, - "learning_rate": 4.860606060606061e-05, - "loss": 151.2616, - "step": 12030 - }, - { - "epoch": 0.09728585395809598, - "grad_norm": 1280.0211181640625, - "learning_rate": 4.864646464646465e-05, - "loss": 137.8948, - "step": 12040 - }, - { - "epoch": 0.0973666561623801, - "grad_norm": 1247.3785400390625, - "learning_rate": 4.868686868686869e-05, - "loss": 154.3858, - "step": 12050 - }, - { - "epoch": 0.09744745836666424, - "grad_norm": 1069.8919677734375, - "learning_rate": 4.872727272727273e-05, - "loss": 154.6211, - "step": 12060 - }, - { - "epoch": 0.09752826057094838, - "grad_norm": 1880.9874267578125, - "learning_rate": 4.8767676767676767e-05, - "loss": 149.5197, - "step": 12070 - }, - { - "epoch": 0.09760906277523251, - "grad_norm": 1230.8167724609375, - "learning_rate": 4.8808080808080805e-05, - "loss": 149.4375, - "step": 12080 - }, - { - "epoch": 0.09768986497951664, - "grad_norm": 1191.7757568359375, - "learning_rate": 4.884848484848485e-05, - "loss": 170.0373, - "step": 12090 - }, - { - "epoch": 0.09777066718380077, - "grad_norm": 1369.3248291015625, - "learning_rate": 4.888888888888889e-05, - "loss": 161.5461, - "step": 12100 - }, - { - "epoch": 0.09785146938808491, - "grad_norm": 864.84423828125, - "learning_rate": 4.892929292929293e-05, - "loss": 185.1635, - "step": 12110 - }, - { - "epoch": 0.09793227159236904, - "grad_norm": 1059.955810546875, - "learning_rate": 4.896969696969697e-05, - "loss": 190.465, - "step": 12120 - }, - { - "epoch": 0.09801307379665317, - "grad_norm": 1468.4659423828125, - "learning_rate": 4.901010101010101e-05, - "loss": 209.5102, - "step": 12130 - }, - { - "epoch": 0.0980938760009373, - "grad_norm": 1922.64404296875, - "learning_rate": 4.905050505050505e-05, - "loss": 136.4074, - "step": 12140 - }, - { - "epoch": 0.09817467820522144, - "grad_norm": 731.4076538085938, - "learning_rate": 4.909090909090909e-05, - "loss": 132.6805, - "step": 12150 - }, - { - "epoch": 0.09825548040950557, - "grad_norm": 1013.0802612304688, - "learning_rate": 4.9131313131313137e-05, - "loss": 169.2464, - "step": 12160 - }, - { - "epoch": 0.09833628261378971, - "grad_norm": 1180.7650146484375, - "learning_rate": 4.9171717171717175e-05, - "loss": 134.0749, - "step": 12170 - }, - { - "epoch": 0.09841708481807383, - "grad_norm": 1976.8056640625, - "learning_rate": 4.9212121212121214e-05, - "loss": 173.6256, - "step": 12180 - }, - { - "epoch": 0.09849788702235797, - "grad_norm": 1068.174560546875, - "learning_rate": 4.925252525252525e-05, - "loss": 172.0746, - "step": 12190 - }, - { - "epoch": 0.0985786892266421, - "grad_norm": 707.3936157226562, - "learning_rate": 4.92929292929293e-05, - "loss": 186.8521, - "step": 12200 - }, - { - "epoch": 0.09865949143092624, - "grad_norm": 1194.782470703125, - "learning_rate": 4.933333333333334e-05, - "loss": 218.8846, - "step": 12210 - }, - { - "epoch": 0.09874029363521036, - "grad_norm": 1251.564453125, - "learning_rate": 4.9373737373737376e-05, - "loss": 163.8027, - "step": 12220 - }, - { - "epoch": 0.0988210958394945, - "grad_norm": 1061.1964111328125, - "learning_rate": 4.9414141414141415e-05, - "loss": 189.2458, - "step": 12230 - }, - { - "epoch": 0.09890189804377864, - "grad_norm": 946.7278442382812, - "learning_rate": 4.945454545454546e-05, - "loss": 147.6934, - "step": 12240 - }, - { - "epoch": 0.09898270024806277, - "grad_norm": 1294.2952880859375, - "learning_rate": 4.94949494949495e-05, - "loss": 224.1921, - "step": 12250 - }, - { - "epoch": 0.0990635024523469, - "grad_norm": 1992.6143798828125, - "learning_rate": 4.953535353535354e-05, - "loss": 168.6858, - "step": 12260 - }, - { - "epoch": 0.09914430465663103, - "grad_norm": 931.4608764648438, - "learning_rate": 4.957575757575758e-05, - "loss": 164.2498, - "step": 12270 - }, - { - "epoch": 0.09922510686091517, - "grad_norm": 928.4422607421875, - "learning_rate": 4.9616161616161616e-05, - "loss": 157.498, - "step": 12280 - }, - { - "epoch": 0.0993059090651993, - "grad_norm": 834.3302612304688, - "learning_rate": 4.9656565656565655e-05, - "loss": 195.0842, - "step": 12290 - }, - { - "epoch": 0.09938671126948344, - "grad_norm": 1873.80517578125, - "learning_rate": 4.9696969696969694e-05, - "loss": 224.0035, - "step": 12300 - }, - { - "epoch": 0.09946751347376756, - "grad_norm": 1335.8856201171875, - "learning_rate": 4.973737373737374e-05, - "loss": 128.0538, - "step": 12310 - }, - { - "epoch": 0.0995483156780517, - "grad_norm": 887.349853515625, - "learning_rate": 4.977777777777778e-05, - "loss": 140.0356, - "step": 12320 - }, - { - "epoch": 0.09962911788233583, - "grad_norm": 1496.796630859375, - "learning_rate": 4.981818181818182e-05, - "loss": 143.5405, - "step": 12330 - }, - { - "epoch": 0.09970992008661997, - "grad_norm": 1359.33544921875, - "learning_rate": 4.985858585858586e-05, - "loss": 163.7442, - "step": 12340 - }, - { - "epoch": 0.09979072229090409, - "grad_norm": 762.21923828125, - "learning_rate": 4.98989898989899e-05, - "loss": 159.0385, - "step": 12350 - }, - { - "epoch": 0.09987152449518823, - "grad_norm": 1305.80615234375, - "learning_rate": 4.993939393939394e-05, - "loss": 191.1846, - "step": 12360 - }, - { - "epoch": 0.09995232669947236, - "grad_norm": 918.177978515625, - "learning_rate": 4.997979797979798e-05, - "loss": 162.5437, - "step": 12370 - }, - { - "epoch": 0.1000331289037565, - "grad_norm": 620.300048828125, - "learning_rate": 4.9999999751358095e-05, - "loss": 183.1986, - "step": 12380 - }, - { - "epoch": 0.10011393110804062, - "grad_norm": 1200.64794921875, - "learning_rate": 4.999999776222285e-05, - "loss": 125.1974, - "step": 12390 - }, - { - "epoch": 0.10019473331232476, - "grad_norm": 1327.4674072265625, - "learning_rate": 4.9999993783952516e-05, - "loss": 203.2498, - "step": 12400 - }, - { - "epoch": 0.1002755355166089, - "grad_norm": 1414.9403076171875, - "learning_rate": 4.999998781654741e-05, - "loss": 183.1395, - "step": 12410 - }, - { - "epoch": 0.10035633772089303, - "grad_norm": 1874.2359619140625, - "learning_rate": 4.9999979860008006e-05, - "loss": 238.1009, - "step": 12420 - }, - { - "epoch": 0.10043713992517717, - "grad_norm": 1188.5574951171875, - "learning_rate": 4.9999969914334944e-05, - "loss": 181.5796, - "step": 12430 - }, - { - "epoch": 0.10051794212946129, - "grad_norm": 1656.1356201171875, - "learning_rate": 4.999995797952901e-05, - "loss": 192.1247, - "step": 12440 - }, - { - "epoch": 0.10059874433374542, - "grad_norm": 636.4390869140625, - "learning_rate": 4.9999944055591154e-05, - "loss": 144.699, - "step": 12450 - }, - { - "epoch": 0.10067954653802956, - "grad_norm": 958.838134765625, - "learning_rate": 4.999992814252249e-05, - "loss": 157.3529, - "step": 12460 - }, - { - "epoch": 0.1007603487423137, - "grad_norm": 1260.2628173828125, - "learning_rate": 4.999991024032426e-05, - "loss": 146.3447, - "step": 12470 - }, - { - "epoch": 0.10084115094659782, - "grad_norm": 1077.9141845703125, - "learning_rate": 4.9999890348997925e-05, - "loss": 143.8297, - "step": 12480 - }, - { - "epoch": 0.10092195315088195, - "grad_norm": 1348.9508056640625, - "learning_rate": 4.999986846854504e-05, - "loss": 173.3844, - "step": 12490 - }, - { - "epoch": 0.10100275535516609, - "grad_norm": 736.1795654296875, - "learning_rate": 4.999984459896735e-05, - "loss": 173.8864, - "step": 12500 - }, - { - "epoch": 0.10108355755945023, - "grad_norm": 2159.8544921875, - "learning_rate": 4.9999818740266766e-05, - "loss": 262.527, - "step": 12510 - }, - { - "epoch": 0.10116435976373435, - "grad_norm": 1445.3455810546875, - "learning_rate": 4.999979089244534e-05, - "loss": 127.1179, - "step": 12520 - }, - { - "epoch": 0.10124516196801848, - "grad_norm": 1050.8997802734375, - "learning_rate": 4.999976105550528e-05, - "loss": 171.1639, - "step": 12530 - }, - { - "epoch": 0.10132596417230262, - "grad_norm": 593.4418334960938, - "learning_rate": 4.9999729229448975e-05, - "loss": 148.7333, - "step": 12540 - }, - { - "epoch": 0.10140676637658676, - "grad_norm": 1813.0040283203125, - "learning_rate": 4.9999695414278944e-05, - "loss": 169.583, - "step": 12550 - }, - { - "epoch": 0.10148756858087088, - "grad_norm": 1632.8077392578125, - "learning_rate": 4.9999659609997875e-05, - "loss": 178.7264, - "step": 12560 - }, - { - "epoch": 0.10156837078515502, - "grad_norm": 1241.4422607421875, - "learning_rate": 4.9999621816608634e-05, - "loss": 195.7228, - "step": 12570 - }, - { - "epoch": 0.10164917298943915, - "grad_norm": 963.0867309570312, - "learning_rate": 4.999958203411421e-05, - "loss": 196.8765, - "step": 12580 - }, - { - "epoch": 0.10172997519372329, - "grad_norm": 879.3704833984375, - "learning_rate": 4.999954026251778e-05, - "loss": 172.8657, - "step": 12590 - }, - { - "epoch": 0.10181077739800742, - "grad_norm": 1103.077880859375, - "learning_rate": 4.999949650182266e-05, - "loss": 168.6242, - "step": 12600 - }, - { - "epoch": 0.10189157960229155, - "grad_norm": 679.7638549804688, - "learning_rate": 4.9999450752032345e-05, - "loss": 130.0992, - "step": 12610 - }, - { - "epoch": 0.10197238180657568, - "grad_norm": 3675.49951171875, - "learning_rate": 4.999940301315046e-05, - "loss": 209.7475, - "step": 12620 - }, - { - "epoch": 0.10205318401085982, - "grad_norm": 862.3643188476562, - "learning_rate": 4.999935328518081e-05, - "loss": 160.7178, - "step": 12630 - }, - { - "epoch": 0.10213398621514395, - "grad_norm": 894.45068359375, - "learning_rate": 4.999930156812734e-05, - "loss": 165.6423, - "step": 12640 - }, - { - "epoch": 0.10221478841942808, - "grad_norm": 856.2412719726562, - "learning_rate": 4.9999247861994194e-05, - "loss": 140.6881, - "step": 12650 - }, - { - "epoch": 0.10229559062371221, - "grad_norm": 1286.828125, - "learning_rate": 4.999919216678561e-05, - "loss": 177.9921, - "step": 12660 - }, - { - "epoch": 0.10237639282799635, - "grad_norm": 1610.512939453125, - "learning_rate": 4.999913448250605e-05, - "loss": 197.9213, - "step": 12670 - }, - { - "epoch": 0.10245719503228048, - "grad_norm": 690.97998046875, - "learning_rate": 4.999907480916009e-05, - "loss": 148.6186, - "step": 12680 - }, - { - "epoch": 0.10253799723656461, - "grad_norm": 1106.3350830078125, - "learning_rate": 4.999901314675246e-05, - "loss": 209.407, - "step": 12690 - }, - { - "epoch": 0.10261879944084874, - "grad_norm": 1395.5164794921875, - "learning_rate": 4.99989494952881e-05, - "loss": 182.4703, - "step": 12700 - }, - { - "epoch": 0.10269960164513288, - "grad_norm": 1877.5772705078125, - "learning_rate": 4.999888385477205e-05, - "loss": 156.0251, - "step": 12710 - }, - { - "epoch": 0.10278040384941702, - "grad_norm": 1271.5089111328125, - "learning_rate": 4.9998816225209544e-05, - "loss": 154.951, - "step": 12720 - }, - { - "epoch": 0.10286120605370115, - "grad_norm": 1295.865966796875, - "learning_rate": 4.9998746606605964e-05, - "loss": 217.4705, - "step": 12730 - }, - { - "epoch": 0.10294200825798527, - "grad_norm": 1123.7264404296875, - "learning_rate": 4.999867499896684e-05, - "loss": 162.33, - "step": 12740 - }, - { - "epoch": 0.10302281046226941, - "grad_norm": 1871.114990234375, - "learning_rate": 4.999860140229788e-05, - "loss": 166.8098, - "step": 12750 - }, - { - "epoch": 0.10310361266655355, - "grad_norm": 1002.0478515625, - "learning_rate": 4.999852581660493e-05, - "loss": 163.5526, - "step": 12760 - }, - { - "epoch": 0.10318441487083768, - "grad_norm": 1245.7467041015625, - "learning_rate": 4.9998448241894006e-05, - "loss": 216.9302, - "step": 12770 - }, - { - "epoch": 0.1032652170751218, - "grad_norm": 1028.039306640625, - "learning_rate": 4.999836867817129e-05, - "loss": 203.9537, - "step": 12780 - }, - { - "epoch": 0.10334601927940594, - "grad_norm": 562.0195922851562, - "learning_rate": 4.99982871254431e-05, - "loss": 129.1438, - "step": 12790 - }, - { - "epoch": 0.10342682148369008, - "grad_norm": 849.0570068359375, - "learning_rate": 4.999820358371593e-05, - "loss": 169.0486, - "step": 12800 - }, - { - "epoch": 0.10350762368797421, - "grad_norm": 1529.56103515625, - "learning_rate": 4.999811805299643e-05, - "loss": 141.6357, - "step": 12810 - }, - { - "epoch": 0.10358842589225833, - "grad_norm": 1446.593505859375, - "learning_rate": 4.999803053329141e-05, - "loss": 195.0215, - "step": 12820 - }, - { - "epoch": 0.10366922809654247, - "grad_norm": 1069.249755859375, - "learning_rate": 4.999794102460781e-05, - "loss": 188.44, - "step": 12830 - }, - { - "epoch": 0.1037500303008266, - "grad_norm": 1103.1290283203125, - "learning_rate": 4.999784952695278e-05, - "loss": 131.1704, - "step": 12840 - }, - { - "epoch": 0.10383083250511074, - "grad_norm": 3719.030029296875, - "learning_rate": 4.999775604033358e-05, - "loss": 186.096, - "step": 12850 - }, - { - "epoch": 0.10391163470939488, - "grad_norm": 934.4454345703125, - "learning_rate": 4.9997660564757654e-05, - "loss": 162.9967, - "step": 12860 - }, - { - "epoch": 0.103992436913679, - "grad_norm": 881.9823608398438, - "learning_rate": 4.999756310023261e-05, - "loss": 189.1937, - "step": 12870 - }, - { - "epoch": 0.10407323911796314, - "grad_norm": 4495.25634765625, - "learning_rate": 4.9997463646766185e-05, - "loss": 167.4466, - "step": 12880 - }, - { - "epoch": 0.10415404132224727, - "grad_norm": 981.443359375, - "learning_rate": 4.99973622043663e-05, - "loss": 145.4228, - "step": 12890 - }, - { - "epoch": 0.10423484352653141, - "grad_norm": 1170.6580810546875, - "learning_rate": 4.9997258773041036e-05, - "loss": 194.973, - "step": 12900 - }, - { - "epoch": 0.10431564573081553, - "grad_norm": 909.8919677734375, - "learning_rate": 4.999715335279861e-05, - "loss": 173.4462, - "step": 12910 - }, - { - "epoch": 0.10439644793509967, - "grad_norm": 756.8289184570312, - "learning_rate": 4.9997045943647415e-05, - "loss": 177.1729, - "step": 12920 - }, - { - "epoch": 0.1044772501393838, - "grad_norm": 1074.613037109375, - "learning_rate": 4.9996936545595986e-05, - "loss": 152.6241, - "step": 12930 - }, - { - "epoch": 0.10455805234366794, - "grad_norm": 948.0105590820312, - "learning_rate": 4.999682515865304e-05, - "loss": 160.605, - "step": 12940 - }, - { - "epoch": 0.10463885454795206, - "grad_norm": 986.80419921875, - "learning_rate": 4.999671178282744e-05, - "loss": 195.0648, - "step": 12950 - }, - { - "epoch": 0.1047196567522362, - "grad_norm": 1015.398193359375, - "learning_rate": 4.99965964181282e-05, - "loss": 152.1088, - "step": 12960 - }, - { - "epoch": 0.10480045895652033, - "grad_norm": 867.7134399414062, - "learning_rate": 4.999647906456451e-05, - "loss": 158.7183, - "step": 12970 - }, - { - "epoch": 0.10488126116080447, - "grad_norm": 1498.0318603515625, - "learning_rate": 4.9996359722145694e-05, - "loss": 178.2639, - "step": 12980 - }, - { - "epoch": 0.1049620633650886, - "grad_norm": 1281.1707763671875, - "learning_rate": 4.999623839088125e-05, - "loss": 167.5088, - "step": 12990 - }, - { - "epoch": 0.10504286556937273, - "grad_norm": 1084.959228515625, - "learning_rate": 4.999611507078083e-05, - "loss": 157.0286, - "step": 13000 - }, - { - "epoch": 0.10512366777365686, - "grad_norm": 1000.6400756835938, - "learning_rate": 4.9995989761854254e-05, - "loss": 146.6623, - "step": 13010 - }, - { - "epoch": 0.105204469977941, - "grad_norm": 1509.530029296875, - "learning_rate": 4.99958624641115e-05, - "loss": 177.9203, - "step": 13020 - }, - { - "epoch": 0.10528527218222514, - "grad_norm": 1420.79345703125, - "learning_rate": 4.999573317756267e-05, - "loss": 148.3134, - "step": 13030 - }, - { - "epoch": 0.10536607438650926, - "grad_norm": 1180.6531982421875, - "learning_rate": 4.999560190221807e-05, - "loss": 170.9963, - "step": 13040 - }, - { - "epoch": 0.1054468765907934, - "grad_norm": 1092.0330810546875, - "learning_rate": 4.999546863808815e-05, - "loss": 133.3349, - "step": 13050 - }, - { - "epoch": 0.10552767879507753, - "grad_norm": 1312.984619140625, - "learning_rate": 4.99953333851835e-05, - "loss": 122.4938, - "step": 13060 - }, - { - "epoch": 0.10560848099936167, - "grad_norm": 1187.057861328125, - "learning_rate": 4.999519614351488e-05, - "loss": 137.0068, - "step": 13070 - }, - { - "epoch": 0.10568928320364579, - "grad_norm": 1474.0391845703125, - "learning_rate": 4.999505691309322e-05, - "loss": 215.5605, - "step": 13080 - }, - { - "epoch": 0.10577008540792993, - "grad_norm": 739.2951049804688, - "learning_rate": 4.9994915693929586e-05, - "loss": 153.0441, - "step": 13090 - }, - { - "epoch": 0.10585088761221406, - "grad_norm": 576.087890625, - "learning_rate": 4.9994772486035225e-05, - "loss": 151.3807, - "step": 13100 - }, - { - "epoch": 0.1059316898164982, - "grad_norm": 1152.282958984375, - "learning_rate": 4.9994627289421534e-05, - "loss": 190.3949, - "step": 13110 - }, - { - "epoch": 0.10601249202078232, - "grad_norm": 2059.785888671875, - "learning_rate": 4.999448010410005e-05, - "loss": 140.3872, - "step": 13120 - }, - { - "epoch": 0.10609329422506646, - "grad_norm": 784.2423095703125, - "learning_rate": 4.99943309300825e-05, - "loss": 103.543, - "step": 13130 - }, - { - "epoch": 0.10617409642935059, - "grad_norm": 1098.57958984375, - "learning_rate": 4.9994179767380746e-05, - "loss": 150.3387, - "step": 13140 - }, - { - "epoch": 0.10625489863363473, - "grad_norm": 1574.0692138671875, - "learning_rate": 4.999402661600682e-05, - "loss": 192.4264, - "step": 13150 - }, - { - "epoch": 0.10633570083791886, - "grad_norm": 2210.006591796875, - "learning_rate": 4.9993871475972895e-05, - "loss": 125.3871, - "step": 13160 - }, - { - "epoch": 0.10641650304220299, - "grad_norm": 1684.879150390625, - "learning_rate": 4.999371434729132e-05, - "loss": 205.1452, - "step": 13170 - }, - { - "epoch": 0.10649730524648712, - "grad_norm": 1359.27490234375, - "learning_rate": 4.999355522997461e-05, - "loss": 203.7965, - "step": 13180 - }, - { - "epoch": 0.10657810745077126, - "grad_norm": 916.1061401367188, - "learning_rate": 4.999339412403541e-05, - "loss": 162.6433, - "step": 13190 - }, - { - "epoch": 0.1066589096550554, - "grad_norm": 1082.037109375, - "learning_rate": 4.9993231029486544e-05, - "loss": 175.1734, - "step": 13200 - }, - { - "epoch": 0.10673971185933952, - "grad_norm": 1730.4796142578125, - "learning_rate": 4.9993065946340986e-05, - "loss": 206.4621, - "step": 13210 - }, - { - "epoch": 0.10682051406362365, - "grad_norm": 1461.9183349609375, - "learning_rate": 4.999289887461188e-05, - "loss": 180.7627, - "step": 13220 - }, - { - "epoch": 0.10690131626790779, - "grad_norm": 1014.8677368164062, - "learning_rate": 4.999272981431251e-05, - "loss": 179.8561, - "step": 13230 - }, - { - "epoch": 0.10698211847219193, - "grad_norm": 1612.005859375, - "learning_rate": 4.9992558765456334e-05, - "loss": 142.1859, - "step": 13240 - }, - { - "epoch": 0.10706292067647605, - "grad_norm": 958.360107421875, - "learning_rate": 4.999238572805694e-05, - "loss": 185.2089, - "step": 13250 - }, - { - "epoch": 0.10714372288076018, - "grad_norm": 810.4140625, - "learning_rate": 4.999221070212813e-05, - "loss": 154.9561, - "step": 13260 - }, - { - "epoch": 0.10722452508504432, - "grad_norm": 1180.060546875, - "learning_rate": 4.999203368768381e-05, - "loss": 152.1395, - "step": 13270 - }, - { - "epoch": 0.10730532728932846, - "grad_norm": 820.518310546875, - "learning_rate": 4.9991854684738066e-05, - "loss": 162.0506, - "step": 13280 - }, - { - "epoch": 0.10738612949361259, - "grad_norm": 654.80078125, - "learning_rate": 4.999167369330514e-05, - "loss": 227.117, - "step": 13290 - }, - { - "epoch": 0.10746693169789671, - "grad_norm": 2003.2425537109375, - "learning_rate": 4.9991490713399436e-05, - "loss": 192.4266, - "step": 13300 - }, - { - "epoch": 0.10754773390218085, - "grad_norm": 598.6465454101562, - "learning_rate": 4.9991305745035514e-05, - "loss": 153.8487, - "step": 13310 - }, - { - "epoch": 0.10762853610646499, - "grad_norm": 1834.0950927734375, - "learning_rate": 4.9991118788228084e-05, - "loss": 177.134, - "step": 13320 - }, - { - "epoch": 0.10770933831074912, - "grad_norm": 1758.453125, - "learning_rate": 4.9990929842992026e-05, - "loss": 170.0616, - "step": 13330 - }, - { - "epoch": 0.10779014051503324, - "grad_norm": 1039.255859375, - "learning_rate": 4.9990738909342384e-05, - "loss": 146.2972, - "step": 13340 - }, - { - "epoch": 0.10787094271931738, - "grad_norm": 1003.7833862304688, - "learning_rate": 4.9990545987294324e-05, - "loss": 151.9346, - "step": 13350 - }, - { - "epoch": 0.10795174492360152, - "grad_norm": 5873.03857421875, - "learning_rate": 4.999035107686322e-05, - "loss": 144.1906, - "step": 13360 - }, - { - "epoch": 0.10803254712788565, - "grad_norm": 2187.155517578125, - "learning_rate": 4.999015417806457e-05, - "loss": 196.4291, - "step": 13370 - }, - { - "epoch": 0.10811334933216978, - "grad_norm": 1240.0516357421875, - "learning_rate": 4.998995529091404e-05, - "loss": 155.5472, - "step": 13380 - }, - { - "epoch": 0.10819415153645391, - "grad_norm": 1135.625, - "learning_rate": 4.998975441542745e-05, - "loss": 197.4514, - "step": 13390 - }, - { - "epoch": 0.10827495374073805, - "grad_norm": 1170.193603515625, - "learning_rate": 4.9989551551620794e-05, - "loss": 166.823, - "step": 13400 - }, - { - "epoch": 0.10835575594502218, - "grad_norm": 1421.60791015625, - "learning_rate": 4.998934669951021e-05, - "loss": 154.3157, - "step": 13410 - }, - { - "epoch": 0.10843655814930632, - "grad_norm": 927.9348754882812, - "learning_rate": 4.9989139859111995e-05, - "loss": 168.5901, - "step": 13420 - }, - { - "epoch": 0.10851736035359044, - "grad_norm": 1038.8873291015625, - "learning_rate": 4.99889310304426e-05, - "loss": 152.3316, - "step": 13430 - }, - { - "epoch": 0.10859816255787458, - "grad_norm": 947.6807250976562, - "learning_rate": 4.9988720213518655e-05, - "loss": 135.1435, - "step": 13440 - }, - { - "epoch": 0.10867896476215871, - "grad_norm": 1577.244384765625, - "learning_rate": 4.998850740835692e-05, - "loss": 178.216, - "step": 13450 - }, - { - "epoch": 0.10875976696644285, - "grad_norm": 885.4010620117188, - "learning_rate": 4.998829261497433e-05, - "loss": 203.6382, - "step": 13460 - }, - { - "epoch": 0.10884056917072697, - "grad_norm": 847.8015747070312, - "learning_rate": 4.998807583338798e-05, - "loss": 153.3626, - "step": 13470 - }, - { - "epoch": 0.10892137137501111, - "grad_norm": 1210.1986083984375, - "learning_rate": 4.998785706361512e-05, - "loss": 145.362, - "step": 13480 - }, - { - "epoch": 0.10900217357929524, - "grad_norm": 1167.423095703125, - "learning_rate": 4.9987636305673144e-05, - "loss": 162.2581, - "step": 13490 - }, - { - "epoch": 0.10908297578357938, - "grad_norm": 1180.3104248046875, - "learning_rate": 4.9987413559579636e-05, - "loss": 149.3035, - "step": 13500 - }, - { - "epoch": 0.1091637779878635, - "grad_norm": 1402.9918212890625, - "learning_rate": 4.9987188825352294e-05, - "loss": 169.6259, - "step": 13510 - }, - { - "epoch": 0.10924458019214764, - "grad_norm": 2665.6416015625, - "learning_rate": 4.998696210300902e-05, - "loss": 192.3846, - "step": 13520 - }, - { - "epoch": 0.10932538239643178, - "grad_norm": 725.3150634765625, - "learning_rate": 4.9986733392567845e-05, - "loss": 197.2868, - "step": 13530 - }, - { - "epoch": 0.10940618460071591, - "grad_norm": 974.5929565429688, - "learning_rate": 4.998650269404697e-05, - "loss": 178.5823, - "step": 13540 - }, - { - "epoch": 0.10948698680500005, - "grad_norm": 1542.777587890625, - "learning_rate": 4.998627000746475e-05, - "loss": 200.5336, - "step": 13550 - }, - { - "epoch": 0.10956778900928417, - "grad_norm": 1063.777099609375, - "learning_rate": 4.9986035332839694e-05, - "loss": 164.4375, - "step": 13560 - }, - { - "epoch": 0.1096485912135683, - "grad_norm": 1302.0216064453125, - "learning_rate": 4.998579867019048e-05, - "loss": 135.7528, - "step": 13570 - }, - { - "epoch": 0.10972939341785244, - "grad_norm": 807.3289794921875, - "learning_rate": 4.998556001953593e-05, - "loss": 156.5582, - "step": 13580 - }, - { - "epoch": 0.10981019562213658, - "grad_norm": 727.23974609375, - "learning_rate": 4.998531938089503e-05, - "loss": 180.1156, - "step": 13590 - }, - { - "epoch": 0.1098909978264207, - "grad_norm": 1151.44189453125, - "learning_rate": 4.998507675428695e-05, - "loss": 138.4998, - "step": 13600 - }, - { - "epoch": 0.10997180003070484, - "grad_norm": 1855.6485595703125, - "learning_rate": 4.998483213973098e-05, - "loss": 166.6201, - "step": 13610 - }, - { - "epoch": 0.11005260223498897, - "grad_norm": 1196.5108642578125, - "learning_rate": 4.9984585537246566e-05, - "loss": 135.4699, - "step": 13620 - }, - { - "epoch": 0.11013340443927311, - "grad_norm": 1511.992919921875, - "learning_rate": 4.998433694685335e-05, - "loss": 136.3776, - "step": 13630 - }, - { - "epoch": 0.11021420664355723, - "grad_norm": 1018.0720825195312, - "learning_rate": 4.998408636857111e-05, - "loss": 142.9319, - "step": 13640 - }, - { - "epoch": 0.11029500884784137, - "grad_norm": 1056.8865966796875, - "learning_rate": 4.998383380241978e-05, - "loss": 163.6253, - "step": 13650 - }, - { - "epoch": 0.1103758110521255, - "grad_norm": 568.1683959960938, - "learning_rate": 4.9983579248419445e-05, - "loss": 156.528, - "step": 13660 - }, - { - "epoch": 0.11045661325640964, - "grad_norm": 2031.0875244140625, - "learning_rate": 4.998332270659037e-05, - "loss": 188.1777, - "step": 13670 - }, - { - "epoch": 0.11053741546069376, - "grad_norm": 682.0467529296875, - "learning_rate": 4.9983064176952976e-05, - "loss": 214.7207, - "step": 13680 - }, - { - "epoch": 0.1106182176649779, - "grad_norm": 1406.7552490234375, - "learning_rate": 4.998280365952782e-05, - "loss": 177.2549, - "step": 13690 - }, - { - "epoch": 0.11069901986926203, - "grad_norm": 922.1409301757812, - "learning_rate": 4.998254115433563e-05, - "loss": 174.0214, - "step": 13700 - }, - { - "epoch": 0.11077982207354617, - "grad_norm": 3660.348388671875, - "learning_rate": 4.9982276661397286e-05, - "loss": 184.1588, - "step": 13710 - }, - { - "epoch": 0.1108606242778303, - "grad_norm": 1597.2998046875, - "learning_rate": 4.998201018073385e-05, - "loss": 152.5763, - "step": 13720 - }, - { - "epoch": 0.11094142648211443, - "grad_norm": 1081.3948974609375, - "learning_rate": 4.9981741712366515e-05, - "loss": 179.4719, - "step": 13730 - }, - { - "epoch": 0.11102222868639856, - "grad_norm": 1722.0684814453125, - "learning_rate": 4.9981471256316645e-05, - "loss": 166.4671, - "step": 13740 - }, - { - "epoch": 0.1111030308906827, - "grad_norm": 1164.61083984375, - "learning_rate": 4.998119881260576e-05, - "loss": 168.6889, - "step": 13750 - }, - { - "epoch": 0.11118383309496684, - "grad_norm": 798.0043334960938, - "learning_rate": 4.998092438125552e-05, - "loss": 120.9161, - "step": 13760 - }, - { - "epoch": 0.11126463529925096, - "grad_norm": 1977.904052734375, - "learning_rate": 4.998064796228779e-05, - "loss": 138.4463, - "step": 13770 - }, - { - "epoch": 0.1113454375035351, - "grad_norm": 784.3284301757812, - "learning_rate": 4.998036955572453e-05, - "loss": 160.8799, - "step": 13780 - }, - { - "epoch": 0.11142623970781923, - "grad_norm": 1237.744873046875, - "learning_rate": 4.9980089161587916e-05, - "loss": 161.2431, - "step": 13790 - }, - { - "epoch": 0.11150704191210337, - "grad_norm": 1126.8914794921875, - "learning_rate": 4.9979806779900255e-05, - "loss": 188.3983, - "step": 13800 - }, - { - "epoch": 0.11158784411638749, - "grad_norm": 1386.9434814453125, - "learning_rate": 4.997952241068401e-05, - "loss": 192.1466, - "step": 13810 - }, - { - "epoch": 0.11166864632067162, - "grad_norm": 680.4060668945312, - "learning_rate": 4.99792360539618e-05, - "loss": 120.4083, - "step": 13820 - }, - { - "epoch": 0.11174944852495576, - "grad_norm": 1295.848388671875, - "learning_rate": 4.997894770975643e-05, - "loss": 166.7768, - "step": 13830 - }, - { - "epoch": 0.1118302507292399, - "grad_norm": 1008.2403564453125, - "learning_rate": 4.9978657378090814e-05, - "loss": 144.6575, - "step": 13840 - }, - { - "epoch": 0.11191105293352403, - "grad_norm": 1428.162353515625, - "learning_rate": 4.997836505898807e-05, - "loss": 154.2511, - "step": 13850 - }, - { - "epoch": 0.11199185513780816, - "grad_norm": 825.681640625, - "learning_rate": 4.997807075247146e-05, - "loss": 121.8282, - "step": 13860 - }, - { - "epoch": 0.11207265734209229, - "grad_norm": 815.547119140625, - "learning_rate": 4.997777445856439e-05, - "loss": 161.0432, - "step": 13870 - }, - { - "epoch": 0.11215345954637643, - "grad_norm": 715.5890502929688, - "learning_rate": 4.997747617729044e-05, - "loss": 152.8754, - "step": 13880 - }, - { - "epoch": 0.11223426175066056, - "grad_norm": 965.2642822265625, - "learning_rate": 4.997717590867335e-05, - "loss": 119.2692, - "step": 13890 - }, - { - "epoch": 0.11231506395494469, - "grad_norm": 2111.7919921875, - "learning_rate": 4.997687365273699e-05, - "loss": 158.9874, - "step": 13900 - }, - { - "epoch": 0.11239586615922882, - "grad_norm": 879.88037109375, - "learning_rate": 4.9976569409505424e-05, - "loss": 176.3235, - "step": 13910 - }, - { - "epoch": 0.11247666836351296, - "grad_norm": 1380.322998046875, - "learning_rate": 4.997626317900286e-05, - "loss": 137.2757, - "step": 13920 - }, - { - "epoch": 0.1125574705677971, - "grad_norm": 718.0297241210938, - "learning_rate": 4.997595496125366e-05, - "loss": 191.654, - "step": 13930 - }, - { - "epoch": 0.11263827277208122, - "grad_norm": 1362.7103271484375, - "learning_rate": 4.997564475628234e-05, - "loss": 134.5998, - "step": 13940 - }, - { - "epoch": 0.11271907497636535, - "grad_norm": 842.7426147460938, - "learning_rate": 4.99753325641136e-05, - "loss": 143.3749, - "step": 13950 - }, - { - "epoch": 0.11279987718064949, - "grad_norm": 1636.0693359375, - "learning_rate": 4.997501838477226e-05, - "loss": 132.4023, - "step": 13960 - }, - { - "epoch": 0.11288067938493362, - "grad_norm": 1336.4649658203125, - "learning_rate": 4.997470221828334e-05, - "loss": 136.2148, - "step": 13970 - }, - { - "epoch": 0.11296148158921776, - "grad_norm": 906.9151000976562, - "learning_rate": 4.997438406467197e-05, - "loss": 175.0376, - "step": 13980 - }, - { - "epoch": 0.11304228379350188, - "grad_norm": 1826.1988525390625, - "learning_rate": 4.997406392396349e-05, - "loss": 152.145, - "step": 13990 - }, - { - "epoch": 0.11312308599778602, - "grad_norm": 1275.584228515625, - "learning_rate": 4.997374179618335e-05, - "loss": 190.0866, - "step": 14000 - }, - { - "epoch": 0.11320388820207015, - "grad_norm": 937.593017578125, - "learning_rate": 4.997341768135719e-05, - "loss": 141.1656, - "step": 14010 - }, - { - "epoch": 0.11328469040635429, - "grad_norm": 945.1254272460938, - "learning_rate": 4.99730915795108e-05, - "loss": 180.2019, - "step": 14020 - }, - { - "epoch": 0.11336549261063841, - "grad_norm": 867.3080444335938, - "learning_rate": 4.9972763490670116e-05, - "loss": 161.3901, - "step": 14030 - }, - { - "epoch": 0.11344629481492255, - "grad_norm": 1019.8523559570312, - "learning_rate": 4.997243341486127e-05, - "loss": 198.7109, - "step": 14040 - }, - { - "epoch": 0.11352709701920669, - "grad_norm": 1486.674072265625, - "learning_rate": 4.9972101352110476e-05, - "loss": 223.1289, - "step": 14050 - }, - { - "epoch": 0.11360789922349082, - "grad_norm": 1696.5440673828125, - "learning_rate": 4.9971767302444204e-05, - "loss": 145.1186, - "step": 14060 - }, - { - "epoch": 0.11368870142777494, - "grad_norm": 673.3330688476562, - "learning_rate": 4.9971431265889014e-05, - "loss": 140.5065, - "step": 14070 - }, - { - "epoch": 0.11376950363205908, - "grad_norm": 3967.926513671875, - "learning_rate": 4.997109324247163e-05, - "loss": 163.7622, - "step": 14080 - }, - { - "epoch": 0.11385030583634322, - "grad_norm": 1310.7440185546875, - "learning_rate": 4.997075323221897e-05, - "loss": 165.0551, - "step": 14090 - }, - { - "epoch": 0.11393110804062735, - "grad_norm": 887.0441284179688, - "learning_rate": 4.9970411235158066e-05, - "loss": 164.2679, - "step": 14100 - }, - { - "epoch": 0.11401191024491149, - "grad_norm": 1011.033935546875, - "learning_rate": 4.997006725131615e-05, - "loss": 126.0271, - "step": 14110 - }, - { - "epoch": 0.11409271244919561, - "grad_norm": 1387.5933837890625, - "learning_rate": 4.996972128072057e-05, - "loss": 159.5976, - "step": 14120 - }, - { - "epoch": 0.11417351465347975, - "grad_norm": 1430.6734619140625, - "learning_rate": 4.996937332339887e-05, - "loss": 142.3303, - "step": 14130 - }, - { - "epoch": 0.11425431685776388, - "grad_norm": 1413.59814453125, - "learning_rate": 4.9969023379378724e-05, - "loss": 212.5765, - "step": 14140 - }, - { - "epoch": 0.11433511906204802, - "grad_norm": 981.312744140625, - "learning_rate": 4.996867144868798e-05, - "loss": 123.0717, - "step": 14150 - }, - { - "epoch": 0.11441592126633214, - "grad_norm": 1553.310546875, - "learning_rate": 4.996831753135464e-05, - "loss": 187.7103, - "step": 14160 - }, - { - "epoch": 0.11449672347061628, - "grad_norm": 1003.1934204101562, - "learning_rate": 4.996796162740686e-05, - "loss": 140.4176, - "step": 14170 - }, - { - "epoch": 0.11457752567490041, - "grad_norm": 916.1061401367188, - "learning_rate": 4.996760373687297e-05, - "loss": 162.5518, - "step": 14180 - }, - { - "epoch": 0.11465832787918455, - "grad_norm": 946.1961669921875, - "learning_rate": 4.9967243859781426e-05, - "loss": 156.3168, - "step": 14190 - }, - { - "epoch": 0.11473913008346867, - "grad_norm": 775.9445190429688, - "learning_rate": 4.9966881996160876e-05, - "loss": 156.1551, - "step": 14200 - }, - { - "epoch": 0.11481993228775281, - "grad_norm": 1236.88427734375, - "learning_rate": 4.996651814604011e-05, - "loss": 174.8899, - "step": 14210 - }, - { - "epoch": 0.11490073449203694, - "grad_norm": 1576.7642822265625, - "learning_rate": 4.9966152309448076e-05, - "loss": 158.9026, - "step": 14220 - }, - { - "epoch": 0.11498153669632108, - "grad_norm": 1118.7216796875, - "learning_rate": 4.996578448641388e-05, - "loss": 138.2308, - "step": 14230 - }, - { - "epoch": 0.1150623389006052, - "grad_norm": 1023.9251708984375, - "learning_rate": 4.9965414676966796e-05, - "loss": 129.1291, - "step": 14240 - }, - { - "epoch": 0.11514314110488934, - "grad_norm": 1101.9742431640625, - "learning_rate": 4.996504288113624e-05, - "loss": 196.3162, - "step": 14250 - }, - { - "epoch": 0.11522394330917347, - "grad_norm": 796.50048828125, - "learning_rate": 4.9964669098951786e-05, - "loss": 147.224, - "step": 14260 - }, - { - "epoch": 0.11530474551345761, - "grad_norm": 1557.001708984375, - "learning_rate": 4.996429333044319e-05, - "loss": 134.4548, - "step": 14270 - }, - { - "epoch": 0.11538554771774175, - "grad_norm": 1414.4412841796875, - "learning_rate": 4.996391557564035e-05, - "loss": 153.0477, - "step": 14280 - }, - { - "epoch": 0.11546634992202587, - "grad_norm": 950.5034790039062, - "learning_rate": 4.996353583457331e-05, - "loss": 151.1424, - "step": 14290 - }, - { - "epoch": 0.11554715212631, - "grad_norm": 5219.78955078125, - "learning_rate": 4.9963154107272295e-05, - "loss": 194.4314, - "step": 14300 - }, - { - "epoch": 0.11562795433059414, - "grad_norm": 915.6011962890625, - "learning_rate": 4.996277039376767e-05, - "loss": 173.4856, - "step": 14310 - }, - { - "epoch": 0.11570875653487828, - "grad_norm": 817.5308837890625, - "learning_rate": 4.996238469408997e-05, - "loss": 108.486, - "step": 14320 - }, - { - "epoch": 0.1157895587391624, - "grad_norm": 2541.830078125, - "learning_rate": 4.996199700826988e-05, - "loss": 220.6018, - "step": 14330 - }, - { - "epoch": 0.11587036094344653, - "grad_norm": 1460.224853515625, - "learning_rate": 4.996160733633824e-05, - "loss": 185.1798, - "step": 14340 - }, - { - "epoch": 0.11595116314773067, - "grad_norm": 1051.0694580078125, - "learning_rate": 4.996121567832608e-05, - "loss": 148.2247, - "step": 14350 - }, - { - "epoch": 0.11603196535201481, - "grad_norm": 1205.9510498046875, - "learning_rate": 4.9960822034264534e-05, - "loss": 169.8678, - "step": 14360 - }, - { - "epoch": 0.11611276755629893, - "grad_norm": 1407.5836181640625, - "learning_rate": 4.996042640418494e-05, - "loss": 158.6641, - "step": 14370 - }, - { - "epoch": 0.11619356976058307, - "grad_norm": 1719.08349609375, - "learning_rate": 4.996002878811876e-05, - "loss": 112.0756, - "step": 14380 - }, - { - "epoch": 0.1162743719648672, - "grad_norm": 1174.4710693359375, - "learning_rate": 4.995962918609766e-05, - "loss": 168.0695, - "step": 14390 - }, - { - "epoch": 0.11635517416915134, - "grad_norm": 1517.498291015625, - "learning_rate": 4.995922759815339e-05, - "loss": 190.5525, - "step": 14400 - }, - { - "epoch": 0.11643597637343547, - "grad_norm": 920.9449462890625, - "learning_rate": 4.995882402431794e-05, - "loss": 165.8069, - "step": 14410 - }, - { - "epoch": 0.1165167785777196, - "grad_norm": 741.5789794921875, - "learning_rate": 4.995841846462341e-05, - "loss": 148.2098, - "step": 14420 - }, - { - "epoch": 0.11659758078200373, - "grad_norm": 995.2381591796875, - "learning_rate": 4.995801091910206e-05, - "loss": 138.0731, - "step": 14430 - }, - { - "epoch": 0.11667838298628787, - "grad_norm": 877.2784423828125, - "learning_rate": 4.995760138778633e-05, - "loss": 148.9934, - "step": 14440 - }, - { - "epoch": 0.116759185190572, - "grad_norm": 1467.7303466796875, - "learning_rate": 4.995718987070879e-05, - "loss": 228.5873, - "step": 14450 - }, - { - "epoch": 0.11683998739485613, - "grad_norm": 1128.6104736328125, - "learning_rate": 4.99567763679022e-05, - "loss": 149.8821, - "step": 14460 - }, - { - "epoch": 0.11692078959914026, - "grad_norm": 932.7119140625, - "learning_rate": 4.9956360879399444e-05, - "loss": 129.9151, - "step": 14470 - }, - { - "epoch": 0.1170015918034244, - "grad_norm": 1253.4259033203125, - "learning_rate": 4.9955943405233584e-05, - "loss": 205.2673, - "step": 14480 - }, - { - "epoch": 0.11708239400770853, - "grad_norm": 2555.6826171875, - "learning_rate": 4.995552394543784e-05, - "loss": 143.3954, - "step": 14490 - }, - { - "epoch": 0.11716319621199266, - "grad_norm": 855.253662109375, - "learning_rate": 4.995510250004559e-05, - "loss": 154.8423, - "step": 14500 - }, - { - "epoch": 0.11724399841627679, - "grad_norm": 2018.5384521484375, - "learning_rate": 4.9954679069090364e-05, - "loss": 234.7133, - "step": 14510 - }, - { - "epoch": 0.11732480062056093, - "grad_norm": 1564.721435546875, - "learning_rate": 4.995425365260585e-05, - "loss": 139.8708, - "step": 14520 - }, - { - "epoch": 0.11740560282484507, - "grad_norm": 688.4728393554688, - "learning_rate": 4.9953826250625896e-05, - "loss": 149.8579, - "step": 14530 - }, - { - "epoch": 0.1174864050291292, - "grad_norm": 2625.092529296875, - "learning_rate": 4.995339686318451e-05, - "loss": 158.0993, - "step": 14540 - }, - { - "epoch": 0.11756720723341332, - "grad_norm": 1898.60546875, - "learning_rate": 4.995296549031585e-05, - "loss": 132.3351, - "step": 14550 - }, - { - "epoch": 0.11764800943769746, - "grad_norm": 911.2061767578125, - "learning_rate": 4.995253213205425e-05, - "loss": 141.7144, - "step": 14560 - }, - { - "epoch": 0.1177288116419816, - "grad_norm": 1163.8433837890625, - "learning_rate": 4.9952096788434186e-05, - "loss": 130.6005, - "step": 14570 - }, - { - "epoch": 0.11780961384626573, - "grad_norm": 1011.4424438476562, - "learning_rate": 4.9951659459490294e-05, - "loss": 150.9337, - "step": 14580 - }, - { - "epoch": 0.11789041605054985, - "grad_norm": 845.7263793945312, - "learning_rate": 4.9951220145257374e-05, - "loss": 286.6193, - "step": 14590 - }, - { - "epoch": 0.11797121825483399, - "grad_norm": 1084.8226318359375, - "learning_rate": 4.9950778845770376e-05, - "loss": 188.6081, - "step": 14600 - }, - { - "epoch": 0.11805202045911813, - "grad_norm": 1993.1123046875, - "learning_rate": 4.9950335561064423e-05, - "loss": 177.9271, - "step": 14610 - }, - { - "epoch": 0.11813282266340226, - "grad_norm": 2498.151611328125, - "learning_rate": 4.994989029117476e-05, - "loss": 185.4942, - "step": 14620 - }, - { - "epoch": 0.11821362486768638, - "grad_norm": 1064.155517578125, - "learning_rate": 4.994944303613684e-05, - "loss": 157.5091, - "step": 14630 - }, - { - "epoch": 0.11829442707197052, - "grad_norm": 880.9674072265625, - "learning_rate": 4.994899379598623e-05, - "loss": 180.4067, - "step": 14640 - }, - { - "epoch": 0.11837522927625466, - "grad_norm": 1241.9085693359375, - "learning_rate": 4.99485425707587e-05, - "loss": 203.0391, - "step": 14650 - }, - { - "epoch": 0.11845603148053879, - "grad_norm": 803.6256713867188, - "learning_rate": 4.994808936049013e-05, - "loss": 183.2648, - "step": 14660 - }, - { - "epoch": 0.11853683368482293, - "grad_norm": 1581.531005859375, - "learning_rate": 4.9947634165216584e-05, - "loss": 186.4468, - "step": 14670 - }, - { - "epoch": 0.11861763588910705, - "grad_norm": 791.8948974609375, - "learning_rate": 4.994717698497428e-05, - "loss": 179.2291, - "step": 14680 - }, - { - "epoch": 0.11869843809339119, - "grad_norm": 1237.831298828125, - "learning_rate": 4.994671781979959e-05, - "loss": 139.9062, - "step": 14690 - }, - { - "epoch": 0.11877924029767532, - "grad_norm": 835.0073852539062, - "learning_rate": 4.994625666972906e-05, - "loss": 156.4611, - "step": 14700 - }, - { - "epoch": 0.11886004250195946, - "grad_norm": 1219.32958984375, - "learning_rate": 4.994579353479938e-05, - "loss": 160.4347, - "step": 14710 - }, - { - "epoch": 0.11894084470624358, - "grad_norm": 1276.5611572265625, - "learning_rate": 4.9945328415047385e-05, - "loss": 167.3067, - "step": 14720 - }, - { - "epoch": 0.11902164691052772, - "grad_norm": 1150.7158203125, - "learning_rate": 4.994486131051009e-05, - "loss": 143.2454, - "step": 14730 - }, - { - "epoch": 0.11910244911481185, - "grad_norm": 1355.6668701171875, - "learning_rate": 4.994439222122468e-05, - "loss": 156.0403, - "step": 14740 - }, - { - "epoch": 0.11918325131909599, - "grad_norm": 1088.11865234375, - "learning_rate": 4.994392114722845e-05, - "loss": 182.5412, - "step": 14750 - }, - { - "epoch": 0.11926405352338011, - "grad_norm": 896.38232421875, - "learning_rate": 4.994344808855888e-05, - "loss": 170.0941, - "step": 14760 - }, - { - "epoch": 0.11934485572766425, - "grad_norm": 1636.67333984375, - "learning_rate": 4.994297304525363e-05, - "loss": 192.0554, - "step": 14770 - }, - { - "epoch": 0.11942565793194838, - "grad_norm": 1122.7872314453125, - "learning_rate": 4.994249601735049e-05, - "loss": 139.6245, - "step": 14780 - }, - { - "epoch": 0.11950646013623252, - "grad_norm": 999.4352416992188, - "learning_rate": 4.994201700488741e-05, - "loss": 157.1384, - "step": 14790 - }, - { - "epoch": 0.11958726234051664, - "grad_norm": 1360.1336669921875, - "learning_rate": 4.99415360079025e-05, - "loss": 126.2787, - "step": 14800 - }, - { - "epoch": 0.11966806454480078, - "grad_norm": 954.704345703125, - "learning_rate": 4.994105302643404e-05, - "loss": 168.1486, - "step": 14810 - }, - { - "epoch": 0.11974886674908491, - "grad_norm": 817.1705932617188, - "learning_rate": 4.994056806052046e-05, - "loss": 133.3082, - "step": 14820 - }, - { - "epoch": 0.11982966895336905, - "grad_norm": 878.759521484375, - "learning_rate": 4.994008111020033e-05, - "loss": 188.8091, - "step": 14830 - }, - { - "epoch": 0.11991047115765319, - "grad_norm": 2776.26220703125, - "learning_rate": 4.993959217551242e-05, - "loss": 159.8074, - "step": 14840 - }, - { - "epoch": 0.11999127336193731, - "grad_norm": 1184.76171875, - "learning_rate": 4.993910125649561e-05, - "loss": 192.4007, - "step": 14850 - }, - { - "epoch": 0.12007207556622145, - "grad_norm": 2491.531982421875, - "learning_rate": 4.9938608353188966e-05, - "loss": 209.929, - "step": 14860 - }, - { - "epoch": 0.12015287777050558, - "grad_norm": 1279.2767333984375, - "learning_rate": 4.993811346563171e-05, - "loss": 159.6929, - "step": 14870 - }, - { - "epoch": 0.12023367997478972, - "grad_norm": 783.5645751953125, - "learning_rate": 4.993761659386322e-05, - "loss": 164.0424, - "step": 14880 - }, - { - "epoch": 0.12031448217907384, - "grad_norm": 1100.378662109375, - "learning_rate": 4.993711773792302e-05, - "loss": 152.5349, - "step": 14890 - }, - { - "epoch": 0.12039528438335798, - "grad_norm": 2542.6484375, - "learning_rate": 4.993661689785081e-05, - "loss": 131.4937, - "step": 14900 - }, - { - "epoch": 0.12047608658764211, - "grad_norm": 1040.3431396484375, - "learning_rate": 4.9936114073686435e-05, - "loss": 135.133, - "step": 14910 - }, - { - "epoch": 0.12055688879192625, - "grad_norm": 1289.4642333984375, - "learning_rate": 4.9935609265469905e-05, - "loss": 138.4034, - "step": 14920 - }, - { - "epoch": 0.12063769099621037, - "grad_norm": 712.0211181640625, - "learning_rate": 4.993510247324139e-05, - "loss": 120.3609, - "step": 14930 - }, - { - "epoch": 0.1207184932004945, - "grad_norm": 872.5120849609375, - "learning_rate": 4.99345936970412e-05, - "loss": 130.8901, - "step": 14940 - }, - { - "epoch": 0.12079929540477864, - "grad_norm": 887.6231079101562, - "learning_rate": 4.993408293690983e-05, - "loss": 168.3752, - "step": 14950 - }, - { - "epoch": 0.12088009760906278, - "grad_norm": 883.7645874023438, - "learning_rate": 4.993357019288791e-05, - "loss": 178.7722, - "step": 14960 - }, - { - "epoch": 0.12096089981334691, - "grad_norm": 1578.703369140625, - "learning_rate": 4.9933055465016245e-05, - "loss": 226.0479, - "step": 14970 - }, - { - "epoch": 0.12104170201763104, - "grad_norm": 4105.41455078125, - "learning_rate": 4.9932538753335776e-05, - "loss": 205.1221, - "step": 14980 - }, - { - "epoch": 0.12112250422191517, - "grad_norm": 1194.9835205078125, - "learning_rate": 4.9932020057887625e-05, - "loss": 161.8187, - "step": 14990 - }, - { - "epoch": 0.12120330642619931, - "grad_norm": 1620.5238037109375, - "learning_rate": 4.9931499378713064e-05, - "loss": 166.9932, - "step": 15000 - }, - { - "epoch": 0.12128410863048344, - "grad_norm": 1244.3077392578125, - "learning_rate": 4.993097671585352e-05, - "loss": 163.8744, - "step": 15010 - }, - { - "epoch": 0.12136491083476757, - "grad_norm": 639.976318359375, - "learning_rate": 4.9930452069350566e-05, - "loss": 191.1954, - "step": 15020 - }, - { - "epoch": 0.1214457130390517, - "grad_norm": 2938.000244140625, - "learning_rate": 4.992992543924596e-05, - "loss": 200.1803, - "step": 15030 - }, - { - "epoch": 0.12152651524333584, - "grad_norm": 2456.51953125, - "learning_rate": 4.99293968255816e-05, - "loss": 178.8182, - "step": 15040 - }, - { - "epoch": 0.12160731744761998, - "grad_norm": 1298.4368896484375, - "learning_rate": 4.992886622839955e-05, - "loss": 141.6572, - "step": 15050 - }, - { - "epoch": 0.1216881196519041, - "grad_norm": 930.0308227539062, - "learning_rate": 4.9928333647742024e-05, - "loss": 147.9233, - "step": 15060 - }, - { - "epoch": 0.12176892185618823, - "grad_norm": 1263.79345703125, - "learning_rate": 4.9927799083651385e-05, - "loss": 130.5263, - "step": 15070 - }, - { - "epoch": 0.12184972406047237, - "grad_norm": 1646.8670654296875, - "learning_rate": 4.9927262536170183e-05, - "loss": 167.601, - "step": 15080 - }, - { - "epoch": 0.1219305262647565, - "grad_norm": 618.90234375, - "learning_rate": 4.9926724005341095e-05, - "loss": 168.1324, - "step": 15090 - }, - { - "epoch": 0.12201132846904064, - "grad_norm": 724.7408447265625, - "learning_rate": 4.992618349120698e-05, - "loss": 134.3964, - "step": 15100 - }, - { - "epoch": 0.12209213067332476, - "grad_norm": 1000.2115478515625, - "learning_rate": 4.992564099381084e-05, - "loss": 135.418, - "step": 15110 - }, - { - "epoch": 0.1221729328776089, - "grad_norm": 1031.4586181640625, - "learning_rate": 4.9925096513195846e-05, - "loss": 151.0433, - "step": 15120 - }, - { - "epoch": 0.12225373508189304, - "grad_norm": 1462.1236572265625, - "learning_rate": 4.992455004940531e-05, - "loss": 131.2349, - "step": 15130 - }, - { - "epoch": 0.12233453728617717, - "grad_norm": 1075.0465087890625, - "learning_rate": 4.9924001602482705e-05, - "loss": 94.1535, - "step": 15140 - }, - { - "epoch": 0.1224153394904613, - "grad_norm": 1040.51025390625, - "learning_rate": 4.992345117247169e-05, - "loss": 137.572, - "step": 15150 - }, - { - "epoch": 0.12249614169474543, - "grad_norm": 1554.9383544921875, - "learning_rate": 4.9922898759416046e-05, - "loss": 188.1611, - "step": 15160 - }, - { - "epoch": 0.12257694389902957, - "grad_norm": 1242.22705078125, - "learning_rate": 4.992234436335972e-05, - "loss": 175.7165, - "step": 15170 - }, - { - "epoch": 0.1226577461033137, - "grad_norm": 1831.4871826171875, - "learning_rate": 4.9921787984346846e-05, - "loss": 166.0569, - "step": 15180 - }, - { - "epoch": 0.12273854830759783, - "grad_norm": 775.4189453125, - "learning_rate": 4.992122962242167e-05, - "loss": 161.268, - "step": 15190 - }, - { - "epoch": 0.12281935051188196, - "grad_norm": 1125.548095703125, - "learning_rate": 4.992066927762862e-05, - "loss": 149.2984, - "step": 15200 - }, - { - "epoch": 0.1229001527161661, - "grad_norm": 732.4982299804688, - "learning_rate": 4.992010695001229e-05, - "loss": 108.5541, - "step": 15210 - }, - { - "epoch": 0.12298095492045023, - "grad_norm": 980.6292724609375, - "learning_rate": 4.9919542639617425e-05, - "loss": 149.5483, - "step": 15220 - }, - { - "epoch": 0.12306175712473437, - "grad_norm": 1520.5997314453125, - "learning_rate": 4.991897634648891e-05, - "loss": 135.0774, - "step": 15230 - }, - { - "epoch": 0.12314255932901849, - "grad_norm": 1217.220458984375, - "learning_rate": 4.991840807067181e-05, - "loss": 187.3661, - "step": 15240 - }, - { - "epoch": 0.12322336153330263, - "grad_norm": 821.0241088867188, - "learning_rate": 4.991783781221134e-05, - "loss": 190.1053, - "step": 15250 - }, - { - "epoch": 0.12330416373758676, - "grad_norm": 1041.9854736328125, - "learning_rate": 4.9917265571152875e-05, - "loss": 157.6719, - "step": 15260 - }, - { - "epoch": 0.1233849659418709, - "grad_norm": 1650.58984375, - "learning_rate": 4.9916691347541946e-05, - "loss": 148.1537, - "step": 15270 - }, - { - "epoch": 0.12346576814615502, - "grad_norm": 1302.0968017578125, - "learning_rate": 4.9916115141424235e-05, - "loss": 146.3277, - "step": 15280 - }, - { - "epoch": 0.12354657035043916, - "grad_norm": 1479.9271240234375, - "learning_rate": 4.991553695284559e-05, - "loss": 135.4141, - "step": 15290 - }, - { - "epoch": 0.1236273725547233, - "grad_norm": 992.5084838867188, - "learning_rate": 4.991495678185202e-05, - "loss": 145.0812, - "step": 15300 - }, - { - "epoch": 0.12370817475900743, - "grad_norm": 1374.5244140625, - "learning_rate": 4.991437462848968e-05, - "loss": 199.361, - "step": 15310 - }, - { - "epoch": 0.12378897696329155, - "grad_norm": 1825.560791015625, - "learning_rate": 4.991379049280489e-05, - "loss": 199.0117, - "step": 15320 - }, - { - "epoch": 0.12386977916757569, - "grad_norm": 1683.6776123046875, - "learning_rate": 4.991320437484414e-05, - "loss": 132.4031, - "step": 15330 - }, - { - "epoch": 0.12395058137185982, - "grad_norm": 1264.279052734375, - "learning_rate": 4.991261627465404e-05, - "loss": 157.1223, - "step": 15340 - }, - { - "epoch": 0.12403138357614396, - "grad_norm": 1643.26171875, - "learning_rate": 4.99120261922814e-05, - "loss": 170.2161, - "step": 15350 - }, - { - "epoch": 0.1241121857804281, - "grad_norm": 5203.81591796875, - "learning_rate": 4.9911434127773176e-05, - "loss": 145.5372, - "step": 15360 - }, - { - "epoch": 0.12419298798471222, - "grad_norm": 1837.298828125, - "learning_rate": 4.991084008117646e-05, - "loss": 135.7863, - "step": 15370 - }, - { - "epoch": 0.12427379018899636, - "grad_norm": 699.8900756835938, - "learning_rate": 4.991024405253852e-05, - "loss": 112.2561, - "step": 15380 - }, - { - "epoch": 0.12435459239328049, - "grad_norm": 970.3558959960938, - "learning_rate": 4.990964604190679e-05, - "loss": 133.0993, - "step": 15390 - }, - { - "epoch": 0.12443539459756463, - "grad_norm": 1397.918701171875, - "learning_rate": 4.9909046049328846e-05, - "loss": 201.8272, - "step": 15400 - }, - { - "epoch": 0.12451619680184875, - "grad_norm": 939.9736328125, - "learning_rate": 4.990844407485242e-05, - "loss": 127.3693, - "step": 15410 - }, - { - "epoch": 0.12459699900613289, - "grad_norm": 875.4765625, - "learning_rate": 4.9907840118525415e-05, - "loss": 160.38, - "step": 15420 - }, - { - "epoch": 0.12467780121041702, - "grad_norm": 1410.2291259765625, - "learning_rate": 4.990723418039588e-05, - "loss": 152.7454, - "step": 15430 - }, - { - "epoch": 0.12475860341470116, - "grad_norm": 1288.0728759765625, - "learning_rate": 4.9906626260512036e-05, - "loss": 150.0691, - "step": 15440 - }, - { - "epoch": 0.12483940561898528, - "grad_norm": 1040.0684814453125, - "learning_rate": 4.9906016358922246e-05, - "loss": 176.1957, - "step": 15450 - }, - { - "epoch": 0.12492020782326942, - "grad_norm": 944.9694213867188, - "learning_rate": 4.990540447567503e-05, - "loss": 129.6401, - "step": 15460 - }, - { - "epoch": 0.12500101002755354, - "grad_norm": 947.33642578125, - "learning_rate": 4.990479061081908e-05, - "loss": 168.0807, - "step": 15470 - }, - { - "epoch": 0.12508181223183767, - "grad_norm": 1530.232666015625, - "learning_rate": 4.9904174764403255e-05, - "loss": 192.025, - "step": 15480 - }, - { - "epoch": 0.1251626144361218, - "grad_norm": 1547.114013671875, - "learning_rate": 4.9903556936476524e-05, - "loss": 137.4716, - "step": 15490 - }, - { - "epoch": 0.12524341664040595, - "grad_norm": 1204.82177734375, - "learning_rate": 4.9902937127088065e-05, - "loss": 163.7098, - "step": 15500 - }, - { - "epoch": 0.12532421884469008, - "grad_norm": 1420.2991943359375, - "learning_rate": 4.9902315336287184e-05, - "loss": 159.6538, - "step": 15510 - }, - { - "epoch": 0.12540502104897422, - "grad_norm": 1532.7950439453125, - "learning_rate": 4.990169156412336e-05, - "loss": 190.6898, - "step": 15520 - }, - { - "epoch": 0.12548582325325836, - "grad_norm": 4929.6416015625, - "learning_rate": 4.990106581064622e-05, - "loss": 149.6133, - "step": 15530 - }, - { - "epoch": 0.1255666254575425, - "grad_norm": 1571.04736328125, - "learning_rate": 4.9900438075905555e-05, - "loss": 148.4885, - "step": 15540 - }, - { - "epoch": 0.12564742766182663, - "grad_norm": 724.03564453125, - "learning_rate": 4.9899808359951314e-05, - "loss": 172.4704, - "step": 15550 - }, - { - "epoch": 0.12572822986611074, - "grad_norm": 860.8834838867188, - "learning_rate": 4.989917666283359e-05, - "loss": 128.226, - "step": 15560 - }, - { - "epoch": 0.12580903207039487, - "grad_norm": 1097.7288818359375, - "learning_rate": 4.9898542984602656e-05, - "loss": 144.4699, - "step": 15570 - }, - { - "epoch": 0.125889834274679, - "grad_norm": 2218.3935546875, - "learning_rate": 4.989790732530892e-05, - "loss": 148.3845, - "step": 15580 - }, - { - "epoch": 0.12597063647896314, - "grad_norm": 548.2178955078125, - "learning_rate": 4.989726968500297e-05, - "loss": 122.0915, - "step": 15590 - }, - { - "epoch": 0.12605143868324728, - "grad_norm": 1305.503173828125, - "learning_rate": 4.989663006373553e-05, - "loss": 135.4369, - "step": 15600 - }, - { - "epoch": 0.12613224088753142, - "grad_norm": 1803.6846923828125, - "learning_rate": 4.9895988461557494e-05, - "loss": 174.447, - "step": 15610 - }, - { - "epoch": 0.12621304309181555, - "grad_norm": 844.888671875, - "learning_rate": 4.989534487851992e-05, - "loss": 143.4764, - "step": 15620 - }, - { - "epoch": 0.1262938452960997, - "grad_norm": 2333.817626953125, - "learning_rate": 4.9894699314674006e-05, - "loss": 192.7881, - "step": 15630 - }, - { - "epoch": 0.12637464750038382, - "grad_norm": 1151.9573974609375, - "learning_rate": 4.9894051770071113e-05, - "loss": 106.1197, - "step": 15640 - }, - { - "epoch": 0.12645544970466793, - "grad_norm": 803.3490600585938, - "learning_rate": 4.989340224476278e-05, - "loss": 168.2896, - "step": 15650 - }, - { - "epoch": 0.12653625190895207, - "grad_norm": 1133.505126953125, - "learning_rate": 4.9892750738800664e-05, - "loss": 156.3058, - "step": 15660 - }, - { - "epoch": 0.1266170541132362, - "grad_norm": 1112.0980224609375, - "learning_rate": 4.989209725223662e-05, - "loss": 99.6213, - "step": 15670 - }, - { - "epoch": 0.12669785631752034, - "grad_norm": 1125.371337890625, - "learning_rate": 4.989144178512263e-05, - "loss": 120.4679, - "step": 15680 - }, - { - "epoch": 0.12677865852180448, - "grad_norm": 1283.361083984375, - "learning_rate": 4.9890784337510865e-05, - "loss": 177.7757, - "step": 15690 - }, - { - "epoch": 0.1268594607260886, - "grad_norm": 982.0589599609375, - "learning_rate": 4.9890124909453615e-05, - "loss": 154.7183, - "step": 15700 - }, - { - "epoch": 0.12694026293037275, - "grad_norm": 810.0673828125, - "learning_rate": 4.988946350100336e-05, - "loss": 186.0556, - "step": 15710 - }, - { - "epoch": 0.12702106513465689, - "grad_norm": 1088.3094482421875, - "learning_rate": 4.988880011221272e-05, - "loss": 139.6886, - "step": 15720 - }, - { - "epoch": 0.127101867338941, - "grad_norm": 1257.6068115234375, - "learning_rate": 4.9888134743134484e-05, - "loss": 174.5065, - "step": 15730 - }, - { - "epoch": 0.12718266954322513, - "grad_norm": 1530.7177734375, - "learning_rate": 4.988746739382158e-05, - "loss": 166.6874, - "step": 15740 - }, - { - "epoch": 0.12726347174750927, - "grad_norm": 1231.8719482421875, - "learning_rate": 4.988679806432712e-05, - "loss": 170.5947, - "step": 15750 - }, - { - "epoch": 0.1273442739517934, - "grad_norm": 919.1317138671875, - "learning_rate": 4.988612675470435e-05, - "loss": 158.5722, - "step": 15760 - }, - { - "epoch": 0.12742507615607754, - "grad_norm": 691.2894897460938, - "learning_rate": 4.988545346500668e-05, - "loss": 170.0254, - "step": 15770 - }, - { - "epoch": 0.12750587836036167, - "grad_norm": 698.147705078125, - "learning_rate": 4.9884778195287695e-05, - "loss": 163.0237, - "step": 15780 - }, - { - "epoch": 0.1275866805646458, - "grad_norm": 1487.1177978515625, - "learning_rate": 4.988410094560111e-05, - "loss": 186.4031, - "step": 15790 - }, - { - "epoch": 0.12766748276892995, - "grad_norm": 1065.00537109375, - "learning_rate": 4.988342171600082e-05, - "loss": 127.2453, - "step": 15800 - }, - { - "epoch": 0.12774828497321408, - "grad_norm": 1005.9658203125, - "learning_rate": 4.988274050654086e-05, - "loss": 141.061, - "step": 15810 - }, - { - "epoch": 0.1278290871774982, - "grad_norm": 1164.0673828125, - "learning_rate": 4.988205731727544e-05, - "loss": 165.4086, - "step": 15820 - }, - { - "epoch": 0.12790988938178233, - "grad_norm": 7027.51513671875, - "learning_rate": 4.988137214825891e-05, - "loss": 220.1764, - "step": 15830 - }, - { - "epoch": 0.12799069158606646, - "grad_norm": 1196.4290771484375, - "learning_rate": 4.988068499954578e-05, - "loss": 147.7958, - "step": 15840 - }, - { - "epoch": 0.1280714937903506, - "grad_norm": 932.5397338867188, - "learning_rate": 4.9879995871190743e-05, - "loss": 190.3842, - "step": 15850 - }, - { - "epoch": 0.12815229599463474, - "grad_norm": 864.31884765625, - "learning_rate": 4.9879304763248615e-05, - "loss": 149.0053, - "step": 15860 - }, - { - "epoch": 0.12823309819891887, - "grad_norm": 2883.14306640625, - "learning_rate": 4.9878611675774375e-05, - "loss": 176.4589, - "step": 15870 - }, - { - "epoch": 0.128313900403203, - "grad_norm": 950.1251831054688, - "learning_rate": 4.9877916608823196e-05, - "loss": 172.9278, - "step": 15880 - }, - { - "epoch": 0.12839470260748714, - "grad_norm": 1128.0977783203125, - "learning_rate": 4.9877219562450364e-05, - "loss": 164.3597, - "step": 15890 - }, - { - "epoch": 0.12847550481177125, - "grad_norm": 952.9031982421875, - "learning_rate": 4.987652053671134e-05, - "loss": 159.8279, - "step": 15900 - }, - { - "epoch": 0.1285563070160554, - "grad_norm": 3416.225341796875, - "learning_rate": 4.987581953166175e-05, - "loss": 160.0613, - "step": 15910 - }, - { - "epoch": 0.12863710922033952, - "grad_norm": 807.5946044921875, - "learning_rate": 4.9875116547357356e-05, - "loss": 150.7103, - "step": 15920 - }, - { - "epoch": 0.12871791142462366, - "grad_norm": 688.88427734375, - "learning_rate": 4.9874411583854106e-05, - "loss": 148.7598, - "step": 15930 - }, - { - "epoch": 0.1287987136289078, - "grad_norm": 914.3491821289062, - "learning_rate": 4.987370464120808e-05, - "loss": 124.2359, - "step": 15940 - }, - { - "epoch": 0.12887951583319193, - "grad_norm": 5167.775390625, - "learning_rate": 4.987299571947553e-05, - "loss": 146.1354, - "step": 15950 - }, - { - "epoch": 0.12896031803747607, - "grad_norm": 1595.9141845703125, - "learning_rate": 4.9872284818712865e-05, - "loss": 139.1633, - "step": 15960 - }, - { - "epoch": 0.1290411202417602, - "grad_norm": 533.0267333984375, - "learning_rate": 4.9871571938976645e-05, - "loss": 167.4221, - "step": 15970 - }, - { - "epoch": 0.12912192244604434, - "grad_norm": 960.4555053710938, - "learning_rate": 4.98708570803236e-05, - "loss": 129.6839, - "step": 15980 - }, - { - "epoch": 0.12920272465032845, - "grad_norm": 603.4066772460938, - "learning_rate": 4.9870140242810585e-05, - "loss": 105.636, - "step": 15990 - }, - { - "epoch": 0.12928352685461258, - "grad_norm": 1337.03369140625, - "learning_rate": 4.986942142649465e-05, - "loss": 168.6452, - "step": 16000 - }, - { - "epoch": 0.12936432905889672, - "grad_norm": 2447.898193359375, - "learning_rate": 4.9868700631432995e-05, - "loss": 206.3615, - "step": 16010 - }, - { - "epoch": 0.12944513126318086, - "grad_norm": 736.8400268554688, - "learning_rate": 4.9867977857682965e-05, - "loss": 145.3305, - "step": 16020 - }, - { - "epoch": 0.129525933467465, - "grad_norm": 1626.4791259765625, - "learning_rate": 4.986725310530206e-05, - "loss": 128.715, - "step": 16030 - }, - { - "epoch": 0.12960673567174913, - "grad_norm": 612.1223754882812, - "learning_rate": 4.986652637434795e-05, - "loss": 186.9655, - "step": 16040 - }, - { - "epoch": 0.12968753787603327, - "grad_norm": 1173.4058837890625, - "learning_rate": 4.9865797664878456e-05, - "loss": 147.4328, - "step": 16050 - }, - { - "epoch": 0.1297683400803174, - "grad_norm": 1522.8951416015625, - "learning_rate": 4.986506697695157e-05, - "loss": 156.7457, - "step": 16060 - }, - { - "epoch": 0.12984914228460154, - "grad_norm": 520.1161499023438, - "learning_rate": 4.986433431062541e-05, - "loss": 114.943, - "step": 16070 - }, - { - "epoch": 0.12992994448888565, - "grad_norm": 885.0980834960938, - "learning_rate": 4.986359966595828e-05, - "loss": 161.2493, - "step": 16080 - }, - { - "epoch": 0.13001074669316978, - "grad_norm": 1161.5257568359375, - "learning_rate": 4.9862863043008645e-05, - "loss": 169.9536, - "step": 16090 - }, - { - "epoch": 0.13009154889745392, - "grad_norm": 1123.99560546875, - "learning_rate": 4.986212444183509e-05, - "loss": 171.4671, - "step": 16100 - }, - { - "epoch": 0.13017235110173805, - "grad_norm": 1075.032958984375, - "learning_rate": 4.9861383862496405e-05, - "loss": 143.3025, - "step": 16110 - }, - { - "epoch": 0.1302531533060222, - "grad_norm": 2145.561279296875, - "learning_rate": 4.9860641305051496e-05, - "loss": 150.4522, - "step": 16120 - }, - { - "epoch": 0.13033395551030633, - "grad_norm": 960.9412231445312, - "learning_rate": 4.9859896769559454e-05, - "loss": 125.8122, - "step": 16130 - }, - { - "epoch": 0.13041475771459046, - "grad_norm": 1217.901611328125, - "learning_rate": 4.985915025607952e-05, - "loss": 133.196, - "step": 16140 - }, - { - "epoch": 0.1304955599188746, - "grad_norm": 767.8223876953125, - "learning_rate": 4.9858401764671095e-05, - "loss": 150.1083, - "step": 16150 - }, - { - "epoch": 0.1305763621231587, - "grad_norm": 3365.16259765625, - "learning_rate": 4.9857651295393716e-05, - "loss": 170.0273, - "step": 16160 - }, - { - "epoch": 0.13065716432744284, - "grad_norm": 758.3485107421875, - "learning_rate": 4.985689884830711e-05, - "loss": 138.8482, - "step": 16170 - }, - { - "epoch": 0.13073796653172698, - "grad_norm": 1844.4720458984375, - "learning_rate": 4.985614442347114e-05, - "loss": 192.0719, - "step": 16180 - }, - { - "epoch": 0.13081876873601112, - "grad_norm": 785.1895751953125, - "learning_rate": 4.985538802094583e-05, - "loss": 132.7963, - "step": 16190 - }, - { - "epoch": 0.13089957094029525, - "grad_norm": 1228.786376953125, - "learning_rate": 4.985462964079137e-05, - "loss": 150.1391, - "step": 16200 - }, - { - "epoch": 0.1309803731445794, - "grad_norm": 1071.201904296875, - "learning_rate": 4.9853869283068086e-05, - "loss": 154.3323, - "step": 16210 - }, - { - "epoch": 0.13106117534886352, - "grad_norm": 1365.28759765625, - "learning_rate": 4.9853106947836504e-05, - "loss": 133.5463, - "step": 16220 - }, - { - "epoch": 0.13114197755314766, - "grad_norm": 1116.1046142578125, - "learning_rate": 4.985234263515725e-05, - "loss": 172.6865, - "step": 16230 - }, - { - "epoch": 0.1312227797574318, - "grad_norm": 1164.0814208984375, - "learning_rate": 4.985157634509115e-05, - "loss": 140.3057, - "step": 16240 - }, - { - "epoch": 0.1313035819617159, - "grad_norm": 1354.85986328125, - "learning_rate": 4.985080807769918e-05, - "loss": 170.2302, - "step": 16250 - }, - { - "epoch": 0.13138438416600004, - "grad_norm": 1436.9825439453125, - "learning_rate": 4.9850037833042463e-05, - "loss": 154.777, - "step": 16260 - }, - { - "epoch": 0.13146518637028418, - "grad_norm": 3008.28662109375, - "learning_rate": 4.984926561118227e-05, - "loss": 193.904, - "step": 16270 - }, - { - "epoch": 0.1315459885745683, - "grad_norm": 2124.60693359375, - "learning_rate": 4.984849141218007e-05, - "loss": 181.6443, - "step": 16280 - }, - { - "epoch": 0.13162679077885245, - "grad_norm": 1305.1060791015625, - "learning_rate": 4.984771523609744e-05, - "loss": 135.3111, - "step": 16290 - }, - { - "epoch": 0.13170759298313658, - "grad_norm": 1435.72021484375, - "learning_rate": 4.984693708299614e-05, - "loss": 144.1642, - "step": 16300 - }, - { - "epoch": 0.13178839518742072, - "grad_norm": 2482.44970703125, - "learning_rate": 4.984615695293809e-05, - "loss": 168.4511, - "step": 16310 - }, - { - "epoch": 0.13186919739170486, - "grad_norm": 1040.33154296875, - "learning_rate": 4.984537484598536e-05, - "loss": 151.9577, - "step": 16320 - }, - { - "epoch": 0.13194999959598896, - "grad_norm": 2147.656494140625, - "learning_rate": 4.9844590762200185e-05, - "loss": 156.3386, - "step": 16330 - }, - { - "epoch": 0.1320308018002731, - "grad_norm": 852.7472534179688, - "learning_rate": 4.9843804701644936e-05, - "loss": 116.6514, - "step": 16340 - }, - { - "epoch": 0.13211160400455724, - "grad_norm": 1123.03076171875, - "learning_rate": 4.984301666438217e-05, - "loss": 135.8654, - "step": 16350 - }, - { - "epoch": 0.13219240620884137, - "grad_norm": 960.448974609375, - "learning_rate": 4.9842226650474574e-05, - "loss": 152.3355, - "step": 16360 - }, - { - "epoch": 0.1322732084131255, - "grad_norm": 1971.15380859375, - "learning_rate": 4.984143465998502e-05, - "loss": 137.8897, - "step": 16370 - }, - { - "epoch": 0.13235401061740965, - "grad_norm": 944.2211303710938, - "learning_rate": 4.984064069297652e-05, - "loss": 200.7082, - "step": 16380 - }, - { - "epoch": 0.13243481282169378, - "grad_norm": 1767.5118408203125, - "learning_rate": 4.9839844749512245e-05, - "loss": 135.5678, - "step": 16390 - }, - { - "epoch": 0.13251561502597792, - "grad_norm": 1431.9781494140625, - "learning_rate": 4.983904682965551e-05, - "loss": 159.7814, - "step": 16400 - }, - { - "epoch": 0.13259641723026205, - "grad_norm": 722.2365112304688, - "learning_rate": 4.9838246933469826e-05, - "loss": 176.3924, - "step": 16410 - }, - { - "epoch": 0.13267721943454616, - "grad_norm": 1308.9915771484375, - "learning_rate": 4.9837445061018825e-05, - "loss": 179.9182, - "step": 16420 - }, - { - "epoch": 0.1327580216388303, - "grad_norm": 3178.224365234375, - "learning_rate": 4.98366412123663e-05, - "loss": 197.8065, - "step": 16430 - }, - { - "epoch": 0.13283882384311443, - "grad_norm": 1109.3902587890625, - "learning_rate": 4.9835835387576226e-05, - "loss": 137.1438, - "step": 16440 - }, - { - "epoch": 0.13291962604739857, - "grad_norm": 785.2881469726562, - "learning_rate": 4.983502758671271e-05, - "loss": 156.4888, - "step": 16450 - }, - { - "epoch": 0.1330004282516827, - "grad_norm": 1051.4027099609375, - "learning_rate": 4.9834217809840027e-05, - "loss": 109.1863, - "step": 16460 - }, - { - "epoch": 0.13308123045596684, - "grad_norm": 924.291748046875, - "learning_rate": 4.98334060570226e-05, - "loss": 178.5278, - "step": 16470 - }, - { - "epoch": 0.13316203266025098, - "grad_norm": 842.4341430664062, - "learning_rate": 4.983259232832503e-05, - "loss": 166.3891, - "step": 16480 - }, - { - "epoch": 0.13324283486453511, - "grad_norm": 1710.2845458984375, - "learning_rate": 4.983177662381205e-05, - "loss": 152.9183, - "step": 16490 - }, - { - "epoch": 0.13332363706881925, - "grad_norm": 776.0635986328125, - "learning_rate": 4.983095894354858e-05, - "loss": 150.6734, - "step": 16500 - }, - { - "epoch": 0.13340443927310336, - "grad_norm": 738.4081420898438, - "learning_rate": 4.983013928759965e-05, - "loss": 130.03, - "step": 16510 - }, - { - "epoch": 0.1334852414773875, - "grad_norm": 1212.0933837890625, - "learning_rate": 4.98293176560305e-05, - "loss": 159.4638, - "step": 16520 - }, - { - "epoch": 0.13356604368167163, - "grad_norm": 1721.5943603515625, - "learning_rate": 4.982849404890649e-05, - "loss": 146.906, - "step": 16530 - }, - { - "epoch": 0.13364684588595577, - "grad_norm": 1130.90283203125, - "learning_rate": 4.982766846629316e-05, - "loss": 157.2213, - "step": 16540 - }, - { - "epoch": 0.1337276480902399, - "grad_norm": 878.40234375, - "learning_rate": 4.982684090825619e-05, - "loss": 183.1443, - "step": 16550 - }, - { - "epoch": 0.13380845029452404, - "grad_norm": 896.69384765625, - "learning_rate": 4.9826011374861435e-05, - "loss": 150.7616, - "step": 16560 - }, - { - "epoch": 0.13388925249880818, - "grad_norm": 1271.7767333984375, - "learning_rate": 4.982517986617489e-05, - "loss": 151.5091, - "step": 16570 - }, - { - "epoch": 0.1339700547030923, - "grad_norm": 786.6751098632812, - "learning_rate": 4.982434638226271e-05, - "loss": 140.3826, - "step": 16580 - }, - { - "epoch": 0.13405085690737642, - "grad_norm": 777.1525268554688, - "learning_rate": 4.982351092319122e-05, - "loss": 121.1654, - "step": 16590 - }, - { - "epoch": 0.13413165911166056, - "grad_norm": 1195.732666015625, - "learning_rate": 4.982267348902688e-05, - "loss": 129.7749, - "step": 16600 - }, - { - "epoch": 0.1342124613159447, - "grad_norm": 730.3426513671875, - "learning_rate": 4.982183407983635e-05, - "loss": 99.6988, - "step": 16610 - }, - { - "epoch": 0.13429326352022883, - "grad_norm": 1521.4287109375, - "learning_rate": 4.982099269568639e-05, - "loss": 134.8955, - "step": 16620 - }, - { - "epoch": 0.13437406572451296, - "grad_norm": 2275.628662109375, - "learning_rate": 4.982014933664395e-05, - "loss": 132.2674, - "step": 16630 - }, - { - "epoch": 0.1344548679287971, - "grad_norm": 1096.541259765625, - "learning_rate": 4.981930400277614e-05, - "loss": 169.6705, - "step": 16640 - }, - { - "epoch": 0.13453567013308124, - "grad_norm": 1358.02001953125, - "learning_rate": 4.981845669415022e-05, - "loss": 143.6309, - "step": 16650 - }, - { - "epoch": 0.13461647233736537, - "grad_norm": 655.5990600585938, - "learning_rate": 4.9817607410833586e-05, - "loss": 157.5329, - "step": 16660 - }, - { - "epoch": 0.1346972745416495, - "grad_norm": 4219.76171875, - "learning_rate": 4.9816756152893845e-05, - "loss": 184.4929, - "step": 16670 - }, - { - "epoch": 0.13477807674593362, - "grad_norm": 1044.2806396484375, - "learning_rate": 4.98159029203987e-05, - "loss": 145.4762, - "step": 16680 - }, - { - "epoch": 0.13485887895021775, - "grad_norm": 786.1893920898438, - "learning_rate": 4.9815047713416067e-05, - "loss": 124.9497, - "step": 16690 - }, - { - "epoch": 0.1349396811545019, - "grad_norm": 790.2650756835938, - "learning_rate": 4.9814190532013955e-05, - "loss": 136.2918, - "step": 16700 - }, - { - "epoch": 0.13502048335878603, - "grad_norm": 2891.427978515625, - "learning_rate": 4.9813331376260585e-05, - "loss": 168.4338, - "step": 16710 - }, - { - "epoch": 0.13510128556307016, - "grad_norm": 947.1802978515625, - "learning_rate": 4.981247024622432e-05, - "loss": 196.8272, - "step": 16720 - }, - { - "epoch": 0.1351820877673543, - "grad_norm": 3245.217041015625, - "learning_rate": 4.9811607141973674e-05, - "loss": 161.3576, - "step": 16730 - }, - { - "epoch": 0.13526288997163843, - "grad_norm": 970.99609375, - "learning_rate": 4.981074206357731e-05, - "loss": 118.8778, - "step": 16740 - }, - { - "epoch": 0.13534369217592257, - "grad_norm": 818.5444946289062, - "learning_rate": 4.980987501110408e-05, - "loss": 111.06, - "step": 16750 - }, - { - "epoch": 0.1354244943802067, - "grad_norm": 1678.4248046875, - "learning_rate": 4.980900598462295e-05, - "loss": 209.277, - "step": 16760 - }, - { - "epoch": 0.13550529658449081, - "grad_norm": 782.6038208007812, - "learning_rate": 4.980813498420306e-05, - "loss": 148.4335, - "step": 16770 - }, - { - "epoch": 0.13558609878877495, - "grad_norm": 2577.811767578125, - "learning_rate": 4.980726200991374e-05, - "loss": 122.7456, - "step": 16780 - }, - { - "epoch": 0.1356669009930591, - "grad_norm": 1101.7432861328125, - "learning_rate": 4.980638706182442e-05, - "loss": 146.7527, - "step": 16790 - }, - { - "epoch": 0.13574770319734322, - "grad_norm": 1047.087646484375, - "learning_rate": 4.980551014000474e-05, - "loss": 153.0954, - "step": 16800 - }, - { - "epoch": 0.13582850540162736, - "grad_norm": 616.9617309570312, - "learning_rate": 4.9804631244524445e-05, - "loss": 169.0794, - "step": 16810 - }, - { - "epoch": 0.1359093076059115, - "grad_norm": 3564.375, - "learning_rate": 4.980375037545349e-05, - "loss": 139.7387, - "step": 16820 - }, - { - "epoch": 0.13599010981019563, - "grad_norm": 869.0906982421875, - "learning_rate": 4.980286753286195e-05, - "loss": 214.401, - "step": 16830 - }, - { - "epoch": 0.13607091201447977, - "grad_norm": 1770.462646484375, - "learning_rate": 4.980198271682007e-05, - "loss": 164.5436, - "step": 16840 - }, - { - "epoch": 0.13615171421876388, - "grad_norm": 861.6243286132812, - "learning_rate": 4.980109592739825e-05, - "loss": 140.7934, - "step": 16850 - }, - { - "epoch": 0.136232516423048, - "grad_norm": 815.6874389648438, - "learning_rate": 4.9800207164667044e-05, - "loss": 148.0081, - "step": 16860 - }, - { - "epoch": 0.13631331862733215, - "grad_norm": 1228.7813720703125, - "learning_rate": 4.979931642869717e-05, - "loss": 186.2767, - "step": 16870 - }, - { - "epoch": 0.13639412083161628, - "grad_norm": 1478.2159423828125, - "learning_rate": 4.979842371955952e-05, - "loss": 131.0911, - "step": 16880 - }, - { - "epoch": 0.13647492303590042, - "grad_norm": 2854.59228515625, - "learning_rate": 4.979752903732509e-05, - "loss": 165.121, - "step": 16890 - }, - { - "epoch": 0.13655572524018456, - "grad_norm": 762.3851318359375, - "learning_rate": 4.979663238206508e-05, - "loss": 147.5221, - "step": 16900 - }, - { - "epoch": 0.1366365274444687, - "grad_norm": 1374.7017822265625, - "learning_rate": 4.979573375385083e-05, - "loss": 170.8515, - "step": 16910 - }, - { - "epoch": 0.13671732964875283, - "grad_norm": 914.9188232421875, - "learning_rate": 4.979483315275385e-05, - "loss": 180.3649, - "step": 16920 - }, - { - "epoch": 0.13679813185303696, - "grad_norm": 787.9354248046875, - "learning_rate": 4.979393057884578e-05, - "loss": 137.5717, - "step": 16930 - }, - { - "epoch": 0.13687893405732107, - "grad_norm": 1231.185302734375, - "learning_rate": 4.9793026032198453e-05, - "loss": 160.6839, - "step": 16940 - }, - { - "epoch": 0.1369597362616052, - "grad_norm": 933.662841796875, - "learning_rate": 4.9792119512883816e-05, - "loss": 147.167, - "step": 16950 - }, - { - "epoch": 0.13704053846588934, - "grad_norm": 3033.047119140625, - "learning_rate": 4.979121102097402e-05, - "loss": 187.1447, - "step": 16960 - }, - { - "epoch": 0.13712134067017348, - "grad_norm": 4561.677734375, - "learning_rate": 4.9790300556541334e-05, - "loss": 158.3624, - "step": 16970 - }, - { - "epoch": 0.13720214287445762, - "grad_norm": 1468.9378662109375, - "learning_rate": 4.978938811965821e-05, - "loss": 164.9483, - "step": 16980 - }, - { - "epoch": 0.13728294507874175, - "grad_norm": 995.0773315429688, - "learning_rate": 4.978847371039724e-05, - "loss": 174.3868, - "step": 16990 - }, - { - "epoch": 0.1373637472830259, - "grad_norm": 1231.4886474609375, - "learning_rate": 4.978755732883118e-05, - "loss": 183.0145, - "step": 17000 - }, - { - "epoch": 0.13744454948731002, - "grad_norm": 1484.8287353515625, - "learning_rate": 4.978663897503294e-05, - "loss": 156.891, - "step": 17010 - }, - { - "epoch": 0.13752535169159413, - "grad_norm": 1120.732421875, - "learning_rate": 4.97857186490756e-05, - "loss": 119.2117, - "step": 17020 - }, - { - "epoch": 0.13760615389587827, - "grad_norm": 1300.58642578125, - "learning_rate": 4.978479635103237e-05, - "loss": 168.5496, - "step": 17030 - }, - { - "epoch": 0.1376869561001624, - "grad_norm": 780.037841796875, - "learning_rate": 4.978387208097665e-05, - "loss": 126.0169, - "step": 17040 - }, - { - "epoch": 0.13776775830444654, - "grad_norm": 1085.820068359375, - "learning_rate": 4.978294583898196e-05, - "loss": 143.0688, - "step": 17050 - }, - { - "epoch": 0.13784856050873068, - "grad_norm": 1167.7269287109375, - "learning_rate": 4.978201762512201e-05, - "loss": 147.1071, - "step": 17060 - }, - { - "epoch": 0.1379293627130148, - "grad_norm": 2116.8076171875, - "learning_rate": 4.978108743947066e-05, - "loss": 176.9654, - "step": 17070 - }, - { - "epoch": 0.13801016491729895, - "grad_norm": 1101.4508056640625, - "learning_rate": 4.97801552821019e-05, - "loss": 147.9685, - "step": 17080 - }, - { - "epoch": 0.13809096712158309, - "grad_norm": 1551.89306640625, - "learning_rate": 4.977922115308992e-05, - "loss": 150.4693, - "step": 17090 - }, - { - "epoch": 0.13817176932586722, - "grad_norm": 1168.867431640625, - "learning_rate": 4.977828505250903e-05, - "loss": 161.2141, - "step": 17100 - }, - { - "epoch": 0.13825257153015133, - "grad_norm": 759.61962890625, - "learning_rate": 4.977734698043371e-05, - "loss": 150.161, - "step": 17110 - }, - { - "epoch": 0.13833337373443547, - "grad_norm": 757.785400390625, - "learning_rate": 4.977640693693862e-05, - "loss": 135.1811, - "step": 17120 - }, - { - "epoch": 0.1384141759387196, - "grad_norm": 2548.685302734375, - "learning_rate": 4.9775464922098524e-05, - "loss": 188.9629, - "step": 17130 - }, - { - "epoch": 0.13849497814300374, - "grad_norm": 911.2066650390625, - "learning_rate": 4.977452093598839e-05, - "loss": 138.8588, - "step": 17140 - }, - { - "epoch": 0.13857578034728787, - "grad_norm": 1549.0982666015625, - "learning_rate": 4.977357497868334e-05, - "loss": 162.1509, - "step": 17150 - }, - { - "epoch": 0.138656582551572, - "grad_norm": 1119.6712646484375, - "learning_rate": 4.9772627050258604e-05, - "loss": 181.102, - "step": 17160 - }, - { - "epoch": 0.13873738475585615, - "grad_norm": 817.0682983398438, - "learning_rate": 4.977167715078963e-05, - "loss": 167.9592, - "step": 17170 - }, - { - "epoch": 0.13881818696014028, - "grad_norm": 734.8985595703125, - "learning_rate": 4.977072528035199e-05, - "loss": 134.1706, - "step": 17180 - }, - { - "epoch": 0.13889898916442442, - "grad_norm": 927.8806762695312, - "learning_rate": 4.976977143902143e-05, - "loss": 150.6093, - "step": 17190 - }, - { - "epoch": 0.13897979136870853, - "grad_norm": 1223.3267822265625, - "learning_rate": 4.9768815626873836e-05, - "loss": 140.2831, - "step": 17200 - }, - { - "epoch": 0.13906059357299266, - "grad_norm": 966.7178955078125, - "learning_rate": 4.9767857843985245e-05, - "loss": 137.6861, - "step": 17210 - }, - { - "epoch": 0.1391413957772768, - "grad_norm": 1357.1478271484375, - "learning_rate": 4.976689809043188e-05, - "loss": 188.3903, - "step": 17220 - }, - { - "epoch": 0.13922219798156094, - "grad_norm": 1198.27001953125, - "learning_rate": 4.97659363662901e-05, - "loss": 112.1936, - "step": 17230 - }, - { - "epoch": 0.13930300018584507, - "grad_norm": 1542.5234375, - "learning_rate": 4.976497267163642e-05, - "loss": 178.1322, - "step": 17240 - }, - { - "epoch": 0.1393838023901292, - "grad_norm": 574.7208862304688, - "learning_rate": 4.9764007006547516e-05, - "loss": 130.098, - "step": 17250 - }, - { - "epoch": 0.13946460459441334, - "grad_norm": 1796.7242431640625, - "learning_rate": 4.976303937110024e-05, - "loss": 145.7055, - "step": 17260 - }, - { - "epoch": 0.13954540679869748, - "grad_norm": 958.0619506835938, - "learning_rate": 4.9762069765371556e-05, - "loss": 134.8362, - "step": 17270 - }, - { - "epoch": 0.1396262090029816, - "grad_norm": 940.2138671875, - "learning_rate": 4.976109818943863e-05, - "loss": 182.4838, - "step": 17280 - }, - { - "epoch": 0.13970701120726572, - "grad_norm": 547.7529907226562, - "learning_rate": 4.976012464337876e-05, - "loss": 112.5585, - "step": 17290 - }, - { - "epoch": 0.13978781341154986, - "grad_norm": 1002.335693359375, - "learning_rate": 4.97591491272694e-05, - "loss": 158.1674, - "step": 17300 - }, - { - "epoch": 0.139868615615834, - "grad_norm": 1832.1202392578125, - "learning_rate": 4.9758171641188174e-05, - "loss": 159.6637, - "step": 17310 - }, - { - "epoch": 0.13994941782011813, - "grad_norm": 2267.85546875, - "learning_rate": 4.975719218521285e-05, - "loss": 121.5045, - "step": 17320 - }, - { - "epoch": 0.14003022002440227, - "grad_norm": 1080.1688232421875, - "learning_rate": 4.975621075942137e-05, - "loss": 145.8474, - "step": 17330 - }, - { - "epoch": 0.1401110222286864, - "grad_norm": 5002.1943359375, - "learning_rate": 4.975522736389182e-05, - "loss": 162.2757, - "step": 17340 - }, - { - "epoch": 0.14019182443297054, - "grad_norm": 1529.05810546875, - "learning_rate": 4.975424199870244e-05, - "loss": 204.3265, - "step": 17350 - }, - { - "epoch": 0.14027262663725468, - "grad_norm": 827.3128662109375, - "learning_rate": 4.975325466393163e-05, - "loss": 171.37, - "step": 17360 - }, - { - "epoch": 0.14035342884153879, - "grad_norm": 802.5576782226562, - "learning_rate": 4.975226535965795e-05, - "loss": 172.4737, - "step": 17370 - }, - { - "epoch": 0.14043423104582292, - "grad_norm": 954.416015625, - "learning_rate": 4.9751274085960097e-05, - "loss": 198.0529, - "step": 17380 - }, - { - "epoch": 0.14051503325010706, - "grad_norm": 978.6486206054688, - "learning_rate": 4.975028084291697e-05, - "loss": 137.7169, - "step": 17390 - }, - { - "epoch": 0.1405958354543912, - "grad_norm": 785.1177368164062, - "learning_rate": 4.9749285630607587e-05, - "loss": 141.8211, - "step": 17400 - }, - { - "epoch": 0.14067663765867533, - "grad_norm": 1156.6881103515625, - "learning_rate": 4.9748288449111126e-05, - "loss": 147.5348, - "step": 17410 - }, - { - "epoch": 0.14075743986295947, - "grad_norm": 1045.3648681640625, - "learning_rate": 4.974728929850694e-05, - "loss": 161.647, - "step": 17420 - }, - { - "epoch": 0.1408382420672436, - "grad_norm": 1343.872802734375, - "learning_rate": 4.974628817887451e-05, - "loss": 145.1868, - "step": 17430 - }, - { - "epoch": 0.14091904427152774, - "grad_norm": 1589.7122802734375, - "learning_rate": 4.97452850902935e-05, - "loss": 118.9889, - "step": 17440 - }, - { - "epoch": 0.14099984647581185, - "grad_norm": 706.4135131835938, - "learning_rate": 4.9744280032843726e-05, - "loss": 157.0399, - "step": 17450 - }, - { - "epoch": 0.14108064868009598, - "grad_norm": 1923.8131103515625, - "learning_rate": 4.974327300660515e-05, - "loss": 152.5646, - "step": 17460 - }, - { - "epoch": 0.14116145088438012, - "grad_norm": 1136.072021484375, - "learning_rate": 4.974226401165789e-05, - "loss": 163.3207, - "step": 17470 - }, - { - "epoch": 0.14124225308866425, - "grad_norm": 1105.5438232421875, - "learning_rate": 4.974125304808224e-05, - "loss": 150.4518, - "step": 17480 - }, - { - "epoch": 0.1413230552929484, - "grad_norm": 1104.9368896484375, - "learning_rate": 4.974024011595864e-05, - "loss": 163.8363, - "step": 17490 - }, - { - "epoch": 0.14140385749723253, - "grad_norm": 855.8195190429688, - "learning_rate": 4.973922521536766e-05, - "loss": 242.4139, - "step": 17500 - }, - { - "epoch": 0.14148465970151666, - "grad_norm": 1221.6298828125, - "learning_rate": 4.973820834639008e-05, - "loss": 148.5189, - "step": 17510 - }, - { - "epoch": 0.1415654619058008, - "grad_norm": 1131.8109130859375, - "learning_rate": 4.973718950910679e-05, - "loss": 156.5813, - "step": 17520 - }, - { - "epoch": 0.14164626411008494, - "grad_norm": 1203.230712890625, - "learning_rate": 4.973616870359886e-05, - "loss": 149.7097, - "step": 17530 - }, - { - "epoch": 0.14172706631436904, - "grad_norm": 1183.5244140625, - "learning_rate": 4.9735145929947506e-05, - "loss": 159.902, - "step": 17540 - }, - { - "epoch": 0.14180786851865318, - "grad_norm": 1980.1556396484375, - "learning_rate": 4.973412118823412e-05, - "loss": 138.3822, - "step": 17550 - }, - { - "epoch": 0.14188867072293732, - "grad_norm": 2065.0751953125, - "learning_rate": 4.973309447854021e-05, - "loss": 187.6551, - "step": 17560 - }, - { - "epoch": 0.14196947292722145, - "grad_norm": 466.6748962402344, - "learning_rate": 4.973206580094749e-05, - "loss": 164.5834, - "step": 17570 - }, - { - "epoch": 0.1420502751315056, - "grad_norm": 1023.6004028320312, - "learning_rate": 4.9731035155537805e-05, - "loss": 142.399, - "step": 17580 - }, - { - "epoch": 0.14213107733578972, - "grad_norm": 1570.9815673828125, - "learning_rate": 4.973000254239314e-05, - "loss": 150.091, - "step": 17590 - }, - { - "epoch": 0.14221187954007386, - "grad_norm": 886.7949829101562, - "learning_rate": 4.972896796159568e-05, - "loss": 215.1895, - "step": 17600 - }, - { - "epoch": 0.142292681744358, - "grad_norm": 2179.057373046875, - "learning_rate": 4.972793141322773e-05, - "loss": 134.2585, - "step": 17610 - }, - { - "epoch": 0.14237348394864213, - "grad_norm": 1217.0042724609375, - "learning_rate": 4.9726892897371754e-05, - "loss": 172.0116, - "step": 17620 - }, - { - "epoch": 0.14245428615292624, - "grad_norm": 1240.138671875, - "learning_rate": 4.9725852414110396e-05, - "loss": 150.7606, - "step": 17630 - }, - { - "epoch": 0.14253508835721038, - "grad_norm": 1866.2113037109375, - "learning_rate": 4.972480996352644e-05, - "loss": 169.5855, - "step": 17640 - }, - { - "epoch": 0.1426158905614945, - "grad_norm": 1343.4752197265625, - "learning_rate": 4.972376554570282e-05, - "loss": 138.5565, - "step": 17650 - }, - { - "epoch": 0.14269669276577865, - "grad_norm": 3173.331787109375, - "learning_rate": 4.972271916072264e-05, - "loss": 161.6781, - "step": 17660 - }, - { - "epoch": 0.14277749497006278, - "grad_norm": 1009.836669921875, - "learning_rate": 4.972167080866917e-05, - "loss": 167.5559, - "step": 17670 - }, - { - "epoch": 0.14285829717434692, - "grad_norm": 1085.6519775390625, - "learning_rate": 4.9720620489625804e-05, - "loss": 163.0791, - "step": 17680 - }, - { - "epoch": 0.14293909937863106, - "grad_norm": 533.2945556640625, - "learning_rate": 4.971956820367612e-05, - "loss": 136.0262, - "step": 17690 - }, - { - "epoch": 0.1430199015829152, - "grad_norm": 1245.5428466796875, - "learning_rate": 4.971851395090384e-05, - "loss": 175.3457, - "step": 17700 - }, - { - "epoch": 0.1431007037871993, - "grad_norm": 1112.46630859375, - "learning_rate": 4.9717457731392854e-05, - "loss": 153.0563, - "step": 17710 - }, - { - "epoch": 0.14318150599148344, - "grad_norm": 1563.6947021484375, - "learning_rate": 4.971639954522719e-05, - "loss": 170.2951, - "step": 17720 - }, - { - "epoch": 0.14326230819576757, - "grad_norm": 4072.32568359375, - "learning_rate": 4.971533939249105e-05, - "loss": 154.5233, - "step": 17730 - }, - { - "epoch": 0.1433431104000517, - "grad_norm": 995.8030395507812, - "learning_rate": 4.971427727326877e-05, - "loss": 135.2525, - "step": 17740 - }, - { - "epoch": 0.14342391260433585, - "grad_norm": 1153.982666015625, - "learning_rate": 4.971321318764488e-05, - "loss": 145.9184, - "step": 17750 - }, - { - "epoch": 0.14350471480861998, - "grad_norm": 1135.168212890625, - "learning_rate": 4.971214713570403e-05, - "loss": 106.7287, - "step": 17760 - }, - { - "epoch": 0.14358551701290412, - "grad_norm": 2647.79736328125, - "learning_rate": 4.9711079117531054e-05, - "loss": 140.8377, - "step": 17770 - }, - { - "epoch": 0.14366631921718825, - "grad_norm": 720.4559936523438, - "learning_rate": 4.9710009133210915e-05, - "loss": 223.2201, - "step": 17780 - }, - { - "epoch": 0.1437471214214724, - "grad_norm": 877.2671508789062, - "learning_rate": 4.970893718282876e-05, - "loss": 126.1438, - "step": 17790 - }, - { - "epoch": 0.1438279236257565, - "grad_norm": 1463.1614990234375, - "learning_rate": 4.970786326646987e-05, - "loss": 158.2247, - "step": 17800 - }, - { - "epoch": 0.14390872583004063, - "grad_norm": 946.1857299804688, - "learning_rate": 4.970678738421969e-05, - "loss": 124.8285, - "step": 17810 - }, - { - "epoch": 0.14398952803432477, - "grad_norm": 1228.1309814453125, - "learning_rate": 4.9705709536163824e-05, - "loss": 133.9969, - "step": 17820 - }, - { - "epoch": 0.1440703302386089, - "grad_norm": 1009.2578735351562, - "learning_rate": 4.9704629722388035e-05, - "loss": 129.6919, - "step": 17830 - }, - { - "epoch": 0.14415113244289304, - "grad_norm": 1004.6964111328125, - "learning_rate": 4.9703547942978244e-05, - "loss": 162.5422, - "step": 17840 - }, - { - "epoch": 0.14423193464717718, - "grad_norm": 1071.4462890625, - "learning_rate": 4.9702464198020517e-05, - "loss": 118.5496, - "step": 17850 - }, - { - "epoch": 0.14431273685146132, - "grad_norm": 733.4691162109375, - "learning_rate": 4.9701378487601074e-05, - "loss": 183.4323, - "step": 17860 - }, - { - "epoch": 0.14439353905574545, - "grad_norm": 1384.6099853515625, - "learning_rate": 4.970029081180632e-05, - "loss": 126.8512, - "step": 17870 - }, - { - "epoch": 0.1444743412600296, - "grad_norm": 1482.7987060546875, - "learning_rate": 4.969920117072277e-05, - "loss": 193.9891, - "step": 17880 - }, - { - "epoch": 0.1445551434643137, - "grad_norm": 1112.25390625, - "learning_rate": 4.969810956443715e-05, - "loss": 163.0959, - "step": 17890 - }, - { - "epoch": 0.14463594566859783, - "grad_norm": 1225.2054443359375, - "learning_rate": 4.96970159930363e-05, - "loss": 185.9541, - "step": 17900 - }, - { - "epoch": 0.14471674787288197, - "grad_norm": 542.3731689453125, - "learning_rate": 4.9695920456607226e-05, - "loss": 171.242, - "step": 17910 - }, - { - "epoch": 0.1447975500771661, - "grad_norm": 713.3396606445312, - "learning_rate": 4.96948229552371e-05, - "loss": 129.3645, - "step": 17920 - }, - { - "epoch": 0.14487835228145024, - "grad_norm": 1782.2279052734375, - "learning_rate": 4.9693723489013253e-05, - "loss": 149.2307, - "step": 17930 - }, - { - "epoch": 0.14495915448573438, - "grad_norm": 791.181640625, - "learning_rate": 4.969262205802315e-05, - "loss": 186.5776, - "step": 17940 - }, - { - "epoch": 0.1450399566900185, - "grad_norm": 1428.761962890625, - "learning_rate": 4.9691518662354434e-05, - "loss": 142.7595, - "step": 17950 - }, - { - "epoch": 0.14512075889430265, - "grad_norm": 897.8218994140625, - "learning_rate": 4.96904133020949e-05, - "loss": 127.4383, - "step": 17960 - }, - { - "epoch": 0.14520156109858676, - "grad_norm": 1025.088623046875, - "learning_rate": 4.968930597733249e-05, - "loss": 184.0482, - "step": 17970 - }, - { - "epoch": 0.1452823633028709, - "grad_norm": 1242.26220703125, - "learning_rate": 4.968819668815532e-05, - "loss": 102.1975, - "step": 17980 - }, - { - "epoch": 0.14536316550715503, - "grad_norm": 1774.1246337890625, - "learning_rate": 4.9687085434651636e-05, - "loss": 133.0901, - "step": 17990 - }, - { - "epoch": 0.14544396771143916, - "grad_norm": 1936.0460205078125, - "learning_rate": 4.968597221690986e-05, - "loss": 196.2127, - "step": 18000 - }, - { - "epoch": 0.1455247699157233, - "grad_norm": 1869.914306640625, - "learning_rate": 4.968485703501857e-05, - "loss": 112.3861, - "step": 18010 - }, - { - "epoch": 0.14560557212000744, - "grad_norm": 792.3851318359375, - "learning_rate": 4.9683739889066497e-05, - "loss": 169.0835, - "step": 18020 - }, - { - "epoch": 0.14568637432429157, - "grad_norm": 1420.177734375, - "learning_rate": 4.968262077914252e-05, - "loss": 164.7283, - "step": 18030 - }, - { - "epoch": 0.1457671765285757, - "grad_norm": 850.1755981445312, - "learning_rate": 4.9681499705335685e-05, - "loss": 136.3054, - "step": 18040 - }, - { - "epoch": 0.14584797873285985, - "grad_norm": 729.207763671875, - "learning_rate": 4.96803766677352e-05, - "loss": 172.2848, - "step": 18050 - }, - { - "epoch": 0.14592878093714395, - "grad_norm": 1386.603271484375, - "learning_rate": 4.96792516664304e-05, - "loss": 182.5913, - "step": 18060 - }, - { - "epoch": 0.1460095831414281, - "grad_norm": 900.0386962890625, - "learning_rate": 4.967812470151082e-05, - "loss": 165.9354, - "step": 18070 - }, - { - "epoch": 0.14609038534571223, - "grad_norm": 940.3046875, - "learning_rate": 4.9676995773066105e-05, - "loss": 112.6717, - "step": 18080 - }, - { - "epoch": 0.14617118754999636, - "grad_norm": 4443.7900390625, - "learning_rate": 4.967586488118609e-05, - "loss": 161.5711, - "step": 18090 - }, - { - "epoch": 0.1462519897542805, - "grad_norm": 955.7782592773438, - "learning_rate": 4.9674732025960755e-05, - "loss": 194.389, - "step": 18100 - }, - { - "epoch": 0.14633279195856463, - "grad_norm": 2043.8023681640625, - "learning_rate": 4.9673597207480236e-05, - "loss": 147.0529, - "step": 18110 - }, - { - "epoch": 0.14641359416284877, - "grad_norm": 2997.87890625, - "learning_rate": 4.967246042583482e-05, - "loss": 146.364, - "step": 18120 - }, - { - "epoch": 0.1464943963671329, - "grad_norm": 1237.5732421875, - "learning_rate": 4.967132168111496e-05, - "loss": 132.0931, - "step": 18130 - }, - { - "epoch": 0.14657519857141701, - "grad_norm": 1432.5120849609375, - "learning_rate": 4.967018097341126e-05, - "loss": 213.1167, - "step": 18140 - }, - { - "epoch": 0.14665600077570115, - "grad_norm": 1035.17724609375, - "learning_rate": 4.966903830281449e-05, - "loss": 133.9097, - "step": 18150 - }, - { - "epoch": 0.1467368029799853, - "grad_norm": 879.0906982421875, - "learning_rate": 4.9667893669415546e-05, - "loss": 161.3391, - "step": 18160 - }, - { - "epoch": 0.14681760518426942, - "grad_norm": 1065.3148193359375, - "learning_rate": 4.966674707330551e-05, - "loss": 147.773, - "step": 18170 - }, - { - "epoch": 0.14689840738855356, - "grad_norm": 993.1563720703125, - "learning_rate": 4.966559851457562e-05, - "loss": 144.7801, - "step": 18180 - }, - { - "epoch": 0.1469792095928377, - "grad_norm": 551.8864135742188, - "learning_rate": 4.966444799331726e-05, - "loss": 142.696, - "step": 18190 - }, - { - "epoch": 0.14706001179712183, - "grad_norm": 641.4093017578125, - "learning_rate": 4.966329550962196e-05, - "loss": 134.7732, - "step": 18200 - }, - { - "epoch": 0.14714081400140597, - "grad_norm": 853.4235229492188, - "learning_rate": 4.9662141063581436e-05, - "loss": 142.1104, - "step": 18210 - }, - { - "epoch": 0.1472216162056901, - "grad_norm": 1158.81005859375, - "learning_rate": 4.9660984655287525e-05, - "loss": 161.9792, - "step": 18220 - }, - { - "epoch": 0.1473024184099742, - "grad_norm": 1042.7049560546875, - "learning_rate": 4.965982628483224e-05, - "loss": 155.6552, - "step": 18230 - }, - { - "epoch": 0.14738322061425835, - "grad_norm": 746.765380859375, - "learning_rate": 4.965866595230776e-05, - "loss": 121.6951, - "step": 18240 - }, - { - "epoch": 0.14746402281854248, - "grad_norm": 687.2789916992188, - "learning_rate": 4.9657503657806395e-05, - "loss": 113.0879, - "step": 18250 - }, - { - "epoch": 0.14754482502282662, - "grad_norm": 1133.392822265625, - "learning_rate": 4.9656339401420624e-05, - "loss": 166.3725, - "step": 18260 - }, - { - "epoch": 0.14762562722711076, - "grad_norm": 733.4806518554688, - "learning_rate": 4.965517318324308e-05, - "loss": 139.1904, - "step": 18270 - }, - { - "epoch": 0.1477064294313949, - "grad_norm": 1013.0859375, - "learning_rate": 4.9654005003366566e-05, - "loss": 136.2502, - "step": 18280 - }, - { - "epoch": 0.14778723163567903, - "grad_norm": 708.066162109375, - "learning_rate": 4.965283486188401e-05, - "loss": 193.8193, - "step": 18290 - }, - { - "epoch": 0.14786803383996316, - "grad_norm": 1149.5897216796875, - "learning_rate": 4.965166275888854e-05, - "loss": 131.3347, - "step": 18300 - }, - { - "epoch": 0.1479488360442473, - "grad_norm": 728.546142578125, - "learning_rate": 4.965048869447339e-05, - "loss": 170.7203, - "step": 18310 - }, - { - "epoch": 0.1480296382485314, - "grad_norm": 1086.8751220703125, - "learning_rate": 4.964931266873198e-05, - "loss": 140.5396, - "step": 18320 - }, - { - "epoch": 0.14811044045281554, - "grad_norm": 2335.054931640625, - "learning_rate": 4.96481346817579e-05, - "loss": 190.4257, - "step": 18330 - }, - { - "epoch": 0.14819124265709968, - "grad_norm": 889.5890502929688, - "learning_rate": 4.9646954733644856e-05, - "loss": 143.3664, - "step": 18340 - }, - { - "epoch": 0.14827204486138382, - "grad_norm": 638.7859497070312, - "learning_rate": 4.9645772824486734e-05, - "loss": 130.9303, - "step": 18350 - }, - { - "epoch": 0.14835284706566795, - "grad_norm": 429.1936340332031, - "learning_rate": 4.964458895437759e-05, - "loss": 194.556, - "step": 18360 - }, - { - "epoch": 0.1484336492699521, - "grad_norm": 625.8038330078125, - "learning_rate": 4.96434031234116e-05, - "loss": 137.4827, - "step": 18370 - }, - { - "epoch": 0.14851445147423623, - "grad_norm": 1951.56005859375, - "learning_rate": 4.964221533168312e-05, - "loss": 177.7926, - "step": 18380 - }, - { - "epoch": 0.14859525367852036, - "grad_norm": 2383.371337890625, - "learning_rate": 4.9641025579286656e-05, - "loss": 142.2735, - "step": 18390 - }, - { - "epoch": 0.14867605588280447, - "grad_norm": 1063.8594970703125, - "learning_rate": 4.9639833866316874e-05, - "loss": 138.8885, - "step": 18400 - }, - { - "epoch": 0.1487568580870886, - "grad_norm": 1529.0601806640625, - "learning_rate": 4.963864019286859e-05, - "loss": 140.622, - "step": 18410 - }, - { - "epoch": 0.14883766029137274, - "grad_norm": 1062.4075927734375, - "learning_rate": 4.963744455903679e-05, - "loss": 167.9912, - "step": 18420 - }, - { - "epoch": 0.14891846249565688, - "grad_norm": 590.7759399414062, - "learning_rate": 4.963624696491659e-05, - "loss": 130.7837, - "step": 18430 - }, - { - "epoch": 0.14899926469994101, - "grad_norm": 991.8165283203125, - "learning_rate": 4.963504741060329e-05, - "loss": 156.975, - "step": 18440 - }, - { - "epoch": 0.14908006690422515, - "grad_norm": 1038.615966796875, - "learning_rate": 4.963384589619233e-05, - "loss": 135.4892, - "step": 18450 - }, - { - "epoch": 0.1491608691085093, - "grad_norm": 738.695556640625, - "learning_rate": 4.9632642421779295e-05, - "loss": 129.8741, - "step": 18460 - }, - { - "epoch": 0.14924167131279342, - "grad_norm": 1388.894287109375, - "learning_rate": 4.9631436987459964e-05, - "loss": 140.8633, - "step": 18470 - }, - { - "epoch": 0.14932247351707756, - "grad_norm": 1890.2734375, - "learning_rate": 4.9630229593330226e-05, - "loss": 195.7736, - "step": 18480 - }, - { - "epoch": 0.14940327572136167, - "grad_norm": 1176.2431640625, - "learning_rate": 4.9629020239486155e-05, - "loss": 176.4671, - "step": 18490 - }, - { - "epoch": 0.1494840779256458, - "grad_norm": 1625.3804931640625, - "learning_rate": 4.962780892602398e-05, - "loss": 182.2511, - "step": 18500 - }, - { - "epoch": 0.14956488012992994, - "grad_norm": 1016.1923828125, - "learning_rate": 4.962659565304008e-05, - "loss": 139.6862, - "step": 18510 - }, - { - "epoch": 0.14964568233421408, - "grad_norm": 1304.57373046875, - "learning_rate": 4.962538042063097e-05, - "loss": 133.6651, - "step": 18520 - }, - { - "epoch": 0.1497264845384982, - "grad_norm": 2883.068359375, - "learning_rate": 4.962416322889337e-05, - "loss": 155.5502, - "step": 18530 - }, - { - "epoch": 0.14980728674278235, - "grad_norm": 518.0875244140625, - "learning_rate": 4.9622944077924106e-05, - "loss": 138.3091, - "step": 18540 - }, - { - "epoch": 0.14988808894706648, - "grad_norm": 1520.2349853515625, - "learning_rate": 4.9621722967820184e-05, - "loss": 134.8868, - "step": 18550 - }, - { - "epoch": 0.14996889115135062, - "grad_norm": 1127.28662109375, - "learning_rate": 4.962049989867877e-05, - "loss": 149.5082, - "step": 18560 - }, - { - "epoch": 0.15004969335563473, - "grad_norm": 1920.3206787109375, - "learning_rate": 4.961927487059716e-05, - "loss": 206.6959, - "step": 18570 - }, - { - "epoch": 0.15013049555991886, - "grad_norm": 985.6328735351562, - "learning_rate": 4.961804788367285e-05, - "loss": 147.696, - "step": 18580 - }, - { - "epoch": 0.150211297764203, - "grad_norm": 1025.6527099609375, - "learning_rate": 4.961681893800344e-05, - "loss": 142.4606, - "step": 18590 - }, - { - "epoch": 0.15029209996848714, - "grad_norm": 1423.64794921875, - "learning_rate": 4.961558803368673e-05, - "loss": 171.7357, - "step": 18600 - }, - { - "epoch": 0.15037290217277127, - "grad_norm": 1430.884765625, - "learning_rate": 4.961435517082065e-05, - "loss": 176.0427, - "step": 18610 - }, - { - "epoch": 0.1504537043770554, - "grad_norm": 664.1461181640625, - "learning_rate": 4.9613120349503286e-05, - "loss": 93.5966, - "step": 18620 - }, - { - "epoch": 0.15053450658133954, - "grad_norm": 1300.7425537109375, - "learning_rate": 4.961188356983291e-05, - "loss": 171.2061, - "step": 18630 - }, - { - "epoch": 0.15061530878562368, - "grad_norm": 1404.8843994140625, - "learning_rate": 4.9610644831907896e-05, - "loss": 156.9324, - "step": 18640 - }, - { - "epoch": 0.15069611098990782, - "grad_norm": 787.6842651367188, - "learning_rate": 4.960940413582683e-05, - "loss": 151.5972, - "step": 18650 - }, - { - "epoch": 0.15077691319419192, - "grad_norm": 926.42333984375, - "learning_rate": 4.960816148168842e-05, - "loss": 111.1213, - "step": 18660 - }, - { - "epoch": 0.15085771539847606, - "grad_norm": 650.7733154296875, - "learning_rate": 4.9606916869591527e-05, - "loss": 143.3633, - "step": 18670 - }, - { - "epoch": 0.1509385176027602, - "grad_norm": 1304.4024658203125, - "learning_rate": 4.960567029963519e-05, - "loss": 164.9596, - "step": 18680 - }, - { - "epoch": 0.15101931980704433, - "grad_norm": 1975.5609130859375, - "learning_rate": 4.9604421771918594e-05, - "loss": 159.01, - "step": 18690 - }, - { - "epoch": 0.15110012201132847, - "grad_norm": 786.7325439453125, - "learning_rate": 4.960317128654108e-05, - "loss": 131.1655, - "step": 18700 - }, - { - "epoch": 0.1511809242156126, - "grad_norm": 1006.70654296875, - "learning_rate": 4.9601918843602145e-05, - "loss": 172.4386, - "step": 18710 - }, - { - "epoch": 0.15126172641989674, - "grad_norm": 913.4758911132812, - "learning_rate": 4.960066444320143e-05, - "loss": 123.8537, - "step": 18720 - }, - { - "epoch": 0.15134252862418088, - "grad_norm": 1134.485107421875, - "learning_rate": 4.959940808543875e-05, - "loss": 143.2191, - "step": 18730 - }, - { - "epoch": 0.151423330828465, - "grad_norm": 1133.67431640625, - "learning_rate": 4.959814977041406e-05, - "loss": 111.9317, - "step": 18740 - }, - { - "epoch": 0.15150413303274912, - "grad_norm": 982.4358520507812, - "learning_rate": 4.9596889498227486e-05, - "loss": 160.2977, - "step": 18750 - }, - { - "epoch": 0.15158493523703326, - "grad_norm": 816.73681640625, - "learning_rate": 4.9595627268979294e-05, - "loss": 128.3574, - "step": 18760 - }, - { - "epoch": 0.1516657374413174, - "grad_norm": 720.4200439453125, - "learning_rate": 4.9594363082769925e-05, - "loss": 132.2612, - "step": 18770 - }, - { - "epoch": 0.15174653964560153, - "grad_norm": 1521.4068603515625, - "learning_rate": 4.959309693969996e-05, - "loss": 129.5006, - "step": 18780 - }, - { - "epoch": 0.15182734184988567, - "grad_norm": 856.1842041015625, - "learning_rate": 4.959182883987012e-05, - "loss": 145.0418, - "step": 18790 - }, - { - "epoch": 0.1519081440541698, - "grad_norm": 1037.7059326171875, - "learning_rate": 4.959055878338134e-05, - "loss": 156.9543, - "step": 18800 - }, - { - "epoch": 0.15198894625845394, - "grad_norm": 499.3943786621094, - "learning_rate": 4.9589286770334654e-05, - "loss": 127.4943, - "step": 18810 - }, - { - "epoch": 0.15206974846273807, - "grad_norm": 1569.54736328125, - "learning_rate": 4.9588012800831264e-05, - "loss": 186.4054, - "step": 18820 - }, - { - "epoch": 0.15215055066702218, - "grad_norm": 1288.7344970703125, - "learning_rate": 4.9586736874972535e-05, - "loss": 134.4534, - "step": 18830 - }, - { - "epoch": 0.15223135287130632, - "grad_norm": 981.8977661132812, - "learning_rate": 4.958545899285999e-05, - "loss": 147.6222, - "step": 18840 - }, - { - "epoch": 0.15231215507559046, - "grad_norm": 1461.2166748046875, - "learning_rate": 4.958417915459531e-05, - "loss": 136.6087, - "step": 18850 - }, - { - "epoch": 0.1523929572798746, - "grad_norm": 1042.28759765625, - "learning_rate": 4.958289736028032e-05, - "loss": 125.8117, - "step": 18860 - }, - { - "epoch": 0.15247375948415873, - "grad_norm": 1394.02099609375, - "learning_rate": 4.958161361001701e-05, - "loss": 106.0165, - "step": 18870 - }, - { - "epoch": 0.15255456168844286, - "grad_norm": 1479.97509765625, - "learning_rate": 4.9580327903907514e-05, - "loss": 135.8925, - "step": 18880 - }, - { - "epoch": 0.152635363892727, - "grad_norm": 983.6040649414062, - "learning_rate": 4.957904024205414e-05, - "loss": 182.2857, - "step": 18890 - }, - { - "epoch": 0.15271616609701114, - "grad_norm": 952.5586547851562, - "learning_rate": 4.957775062455933e-05, - "loss": 114.4358, - "step": 18900 - }, - { - "epoch": 0.15279696830129527, - "grad_norm": 1429.8126220703125, - "learning_rate": 4.95764590515257e-05, - "loss": 155.2205, - "step": 18910 - }, - { - "epoch": 0.15287777050557938, - "grad_norm": 1141.6102294921875, - "learning_rate": 4.957516552305602e-05, - "loss": 196.4725, - "step": 18920 - }, - { - "epoch": 0.15295857270986352, - "grad_norm": 971.5953369140625, - "learning_rate": 4.957387003925321e-05, - "loss": 143.6367, - "step": 18930 - }, - { - "epoch": 0.15303937491414765, - "grad_norm": 986.298095703125, - "learning_rate": 4.9572572600220323e-05, - "loss": 161.2457, - "step": 18940 - }, - { - "epoch": 0.1531201771184318, - "grad_norm": 618.5545654296875, - "learning_rate": 4.957127320606062e-05, - "loss": 98.7228, - "step": 18950 - }, - { - "epoch": 0.15320097932271592, - "grad_norm": 1332.11572265625, - "learning_rate": 4.956997185687747e-05, - "loss": 119.0759, - "step": 18960 - }, - { - "epoch": 0.15328178152700006, - "grad_norm": 1110.71923828125, - "learning_rate": 4.9568668552774424e-05, - "loss": 157.8614, - "step": 18970 - }, - { - "epoch": 0.1533625837312842, - "grad_norm": 567.2537841796875, - "learning_rate": 4.956736329385517e-05, - "loss": 127.0969, - "step": 18980 - }, - { - "epoch": 0.15344338593556833, - "grad_norm": 828.8348999023438, - "learning_rate": 4.9566056080223574e-05, - "loss": 163.5427, - "step": 18990 - }, - { - "epoch": 0.15352418813985247, - "grad_norm": 1140.49853515625, - "learning_rate": 4.956474691198363e-05, - "loss": 132.9715, - "step": 19000 - }, - { - "epoch": 0.15360499034413658, - "grad_norm": 958.723388671875, - "learning_rate": 4.956343578923952e-05, - "loss": 136.1955, - "step": 19010 - }, - { - "epoch": 0.1536857925484207, - "grad_norm": 990.290771484375, - "learning_rate": 4.956212271209555e-05, - "loss": 133.8689, - "step": 19020 - }, - { - "epoch": 0.15376659475270485, - "grad_norm": 1795.55908203125, - "learning_rate": 4.956080768065621e-05, - "loss": 141.3235, - "step": 19030 - }, - { - "epoch": 0.15384739695698899, - "grad_norm": 794.1259765625, - "learning_rate": 4.9559490695026113e-05, - "loss": 124.8279, - "step": 19040 - }, - { - "epoch": 0.15392819916127312, - "grad_norm": 1143.3475341796875, - "learning_rate": 4.955817175531005e-05, - "loss": 157.4777, - "step": 19050 - }, - { - "epoch": 0.15400900136555726, - "grad_norm": 725.7691650390625, - "learning_rate": 4.9556850861612976e-05, - "loss": 152.8616, - "step": 19060 - }, - { - "epoch": 0.1540898035698414, - "grad_norm": 829.7158813476562, - "learning_rate": 4.955552801403998e-05, - "loss": 127.6073, - "step": 19070 - }, - { - "epoch": 0.15417060577412553, - "grad_norm": 836.8492431640625, - "learning_rate": 4.9554203212696304e-05, - "loss": 128.8572, - "step": 19080 - }, - { - "epoch": 0.15425140797840964, - "grad_norm": 939.9307250976562, - "learning_rate": 4.9552876457687374e-05, - "loss": 132.6982, - "step": 19090 - }, - { - "epoch": 0.15433221018269377, - "grad_norm": 1570.5841064453125, - "learning_rate": 4.955154774911875e-05, - "loss": 101.3377, - "step": 19100 - }, - { - "epoch": 0.1544130123869779, - "grad_norm": 608.4269409179688, - "learning_rate": 4.955021708709614e-05, - "loss": 138.4897, - "step": 19110 - }, - { - "epoch": 0.15449381459126205, - "grad_norm": 1717.3123779296875, - "learning_rate": 4.9548884471725434e-05, - "loss": 183.6861, - "step": 19120 - }, - { - "epoch": 0.15457461679554618, - "grad_norm": 721.2388305664062, - "learning_rate": 4.9547549903112654e-05, - "loss": 183.5123, - "step": 19130 - }, - { - "epoch": 0.15465541899983032, - "grad_norm": 1664.793701171875, - "learning_rate": 4.954621338136398e-05, - "loss": 160.0894, - "step": 19140 - }, - { - "epoch": 0.15473622120411445, - "grad_norm": 810.6116333007812, - "learning_rate": 4.954487490658577e-05, - "loss": 150.3457, - "step": 19150 - }, - { - "epoch": 0.1548170234083986, - "grad_norm": 1055.0728759765625, - "learning_rate": 4.95435344788845e-05, - "loss": 168.4552, - "step": 19160 - }, - { - "epoch": 0.15489782561268273, - "grad_norm": 878.2693481445312, - "learning_rate": 4.954219209836684e-05, - "loss": 123.0568, - "step": 19170 - }, - { - "epoch": 0.15497862781696684, - "grad_norm": 971.3629760742188, - "learning_rate": 4.954084776513957e-05, - "loss": 112.7831, - "step": 19180 - }, - { - "epoch": 0.15505943002125097, - "grad_norm": 1250.1171875, - "learning_rate": 4.953950147930969e-05, - "loss": 128.6498, - "step": 19190 - }, - { - "epoch": 0.1551402322255351, - "grad_norm": 1516.2861328125, - "learning_rate": 4.9538153240984286e-05, - "loss": 135.834, - "step": 19200 - }, - { - "epoch": 0.15522103442981924, - "grad_norm": 917.4509887695312, - "learning_rate": 4.953680305027065e-05, - "loss": 199.9036, - "step": 19210 - }, - { - "epoch": 0.15530183663410338, - "grad_norm": 1195.2305908203125, - "learning_rate": 4.9535450907276204e-05, - "loss": 167.8621, - "step": 19220 - }, - { - "epoch": 0.15538263883838752, - "grad_norm": 1185.2828369140625, - "learning_rate": 4.953409681210853e-05, - "loss": 150.506, - "step": 19230 - }, - { - "epoch": 0.15546344104267165, - "grad_norm": 1028.5968017578125, - "learning_rate": 4.9532740764875377e-05, - "loss": 160.7915, - "step": 19240 - }, - { - "epoch": 0.1555442432469558, - "grad_norm": 1641.87451171875, - "learning_rate": 4.953138276568462e-05, - "loss": 167.9964, - "step": 19250 - }, - { - "epoch": 0.1556250454512399, - "grad_norm": 676.5661010742188, - "learning_rate": 4.953002281464432e-05, - "loss": 168.5508, - "step": 19260 - }, - { - "epoch": 0.15570584765552403, - "grad_norm": 581.885009765625, - "learning_rate": 4.952866091186269e-05, - "loss": 138.8725, - "step": 19270 - }, - { - "epoch": 0.15578664985980817, - "grad_norm": 1236.295166015625, - "learning_rate": 4.952729705744808e-05, - "loss": 123.4911, - "step": 19280 - }, - { - "epoch": 0.1558674520640923, - "grad_norm": 888.4049682617188, - "learning_rate": 4.9525931251509e-05, - "loss": 138.8721, - "step": 19290 - }, - { - "epoch": 0.15594825426837644, - "grad_norm": 656.3297119140625, - "learning_rate": 4.9524563494154145e-05, - "loss": 161.5853, - "step": 19300 - }, - { - "epoch": 0.15602905647266058, - "grad_norm": 899.3927612304688, - "learning_rate": 4.952319378549232e-05, - "loss": 130.1569, - "step": 19310 - }, - { - "epoch": 0.1561098586769447, - "grad_norm": 1051.7786865234375, - "learning_rate": 4.95218221256325e-05, - "loss": 155.7519, - "step": 19320 - }, - { - "epoch": 0.15619066088122885, - "grad_norm": 351.5675048828125, - "learning_rate": 4.952044851468385e-05, - "loss": 118.1119, - "step": 19330 - }, - { - "epoch": 0.15627146308551298, - "grad_norm": 648.6687622070312, - "learning_rate": 4.951907295275563e-05, - "loss": 186.3808, - "step": 19340 - }, - { - "epoch": 0.1563522652897971, - "grad_norm": 737.3397827148438, - "learning_rate": 4.951769543995731e-05, - "loss": 164.0862, - "step": 19350 - }, - { - "epoch": 0.15643306749408123, - "grad_norm": 1181.345458984375, - "learning_rate": 4.951631597639849e-05, - "loss": 95.8942, - "step": 19360 - }, - { - "epoch": 0.15651386969836537, - "grad_norm": 1376.4288330078125, - "learning_rate": 4.9514934562188915e-05, - "loss": 155.4059, - "step": 19370 - }, - { - "epoch": 0.1565946719026495, - "grad_norm": 984.8242797851562, - "learning_rate": 4.951355119743851e-05, - "loss": 136.2228, - "step": 19380 - }, - { - "epoch": 0.15667547410693364, - "grad_norm": 1276.46142578125, - "learning_rate": 4.9512165882257335e-05, - "loss": 134.6281, - "step": 19390 - }, - { - "epoch": 0.15675627631121777, - "grad_norm": 855.7882690429688, - "learning_rate": 4.9510778616755616e-05, - "loss": 127.1215, - "step": 19400 - }, - { - "epoch": 0.1568370785155019, - "grad_norm": 1180.2899169921875, - "learning_rate": 4.9509389401043735e-05, - "loss": 157.9484, - "step": 19410 - }, - { - "epoch": 0.15691788071978605, - "grad_norm": 920.9578857421875, - "learning_rate": 4.950799823523222e-05, - "loss": 140.1964, - "step": 19420 - }, - { - "epoch": 0.15699868292407018, - "grad_norm": 1415.94091796875, - "learning_rate": 4.950660511943176e-05, - "loss": 135.1266, - "step": 19430 - }, - { - "epoch": 0.1570794851283543, - "grad_norm": 396.073974609375, - "learning_rate": 4.95052100537532e-05, - "loss": 144.9773, - "step": 19440 - }, - { - "epoch": 0.15716028733263843, - "grad_norm": 716.8720092773438, - "learning_rate": 4.950381303830755e-05, - "loss": 118.7841, - "step": 19450 - }, - { - "epoch": 0.15724108953692256, - "grad_norm": 798.32666015625, - "learning_rate": 4.950241407320594e-05, - "loss": 136.819, - "step": 19460 - }, - { - "epoch": 0.1573218917412067, - "grad_norm": 1627.865966796875, - "learning_rate": 4.95010131585597e-05, - "loss": 155.6983, - "step": 19470 - }, - { - "epoch": 0.15740269394549083, - "grad_norm": 2705.7021484375, - "learning_rate": 4.9499610294480284e-05, - "loss": 151.8552, - "step": 19480 - }, - { - "epoch": 0.15748349614977497, - "grad_norm": 970.4476928710938, - "learning_rate": 4.9498205481079315e-05, - "loss": 154.0217, - "step": 19490 - }, - { - "epoch": 0.1575642983540591, - "grad_norm": 775.9421997070312, - "learning_rate": 4.949679871846857e-05, - "loss": 171.9853, - "step": 19500 - }, - { - "epoch": 0.15764510055834324, - "grad_norm": 736.22314453125, - "learning_rate": 4.949539000675998e-05, - "loss": 115.1917, - "step": 19510 - }, - { - "epoch": 0.15772590276262735, - "grad_norm": 773.5701293945312, - "learning_rate": 4.9493979346065624e-05, - "loss": 143.0012, - "step": 19520 - }, - { - "epoch": 0.1578067049669115, - "grad_norm": 1225.4534912109375, - "learning_rate": 4.9492566736497744e-05, - "loss": 146.8361, - "step": 19530 - }, - { - "epoch": 0.15788750717119562, - "grad_norm": 849.607421875, - "learning_rate": 4.949115217816873e-05, - "loss": 154.6573, - "step": 19540 - }, - { - "epoch": 0.15796830937547976, - "grad_norm": 1171.7467041015625, - "learning_rate": 4.948973567119114e-05, - "loss": 143.7283, - "step": 19550 - }, - { - "epoch": 0.1580491115797639, - "grad_norm": 1184.739013671875, - "learning_rate": 4.9488317215677673e-05, - "loss": 147.067, - "step": 19560 - }, - { - "epoch": 0.15812991378404803, - "grad_norm": 671.5252075195312, - "learning_rate": 4.948689681174119e-05, - "loss": 134.3241, - "step": 19570 - }, - { - "epoch": 0.15821071598833217, - "grad_norm": 1588.4747314453125, - "learning_rate": 4.948547445949471e-05, - "loss": 132.0108, - "step": 19580 - }, - { - "epoch": 0.1582915181926163, - "grad_norm": 1395.05712890625, - "learning_rate": 4.94840501590514e-05, - "loss": 132.0019, - "step": 19590 - }, - { - "epoch": 0.15837232039690044, - "grad_norm": 1854.0704345703125, - "learning_rate": 4.948262391052458e-05, - "loss": 113.3447, - "step": 19600 - }, - { - "epoch": 0.15845312260118455, - "grad_norm": 1134.071044921875, - "learning_rate": 4.948119571402775e-05, - "loss": 133.9102, - "step": 19610 - }, - { - "epoch": 0.15853392480546868, - "grad_norm": 1435.5302734375, - "learning_rate": 4.947976556967452e-05, - "loss": 142.5358, - "step": 19620 - }, - { - "epoch": 0.15861472700975282, - "grad_norm": 672.7803344726562, - "learning_rate": 4.947833347757869e-05, - "loss": 104.7007, - "step": 19630 - }, - { - "epoch": 0.15869552921403696, - "grad_norm": 597.4839477539062, - "learning_rate": 4.9476899437854205e-05, - "loss": 134.072, - "step": 19640 - }, - { - "epoch": 0.1587763314183211, - "grad_norm": 1603.63525390625, - "learning_rate": 4.9475463450615175e-05, - "loss": 115.9036, - "step": 19650 - }, - { - "epoch": 0.15885713362260523, - "grad_norm": 663.64306640625, - "learning_rate": 4.9474025515975835e-05, - "loss": 112.9661, - "step": 19660 - }, - { - "epoch": 0.15893793582688936, - "grad_norm": 1277.085205078125, - "learning_rate": 4.947258563405061e-05, - "loss": 138.1168, - "step": 19670 - }, - { - "epoch": 0.1590187380311735, - "grad_norm": 1482.751708984375, - "learning_rate": 4.947114380495406e-05, - "loss": 194.949, - "step": 19680 - }, - { - "epoch": 0.15909954023545764, - "grad_norm": 1446.81787109375, - "learning_rate": 4.94697000288009e-05, - "loss": 145.5572, - "step": 19690 - }, - { - "epoch": 0.15918034243974175, - "grad_norm": 932.5813598632812, - "learning_rate": 4.946825430570602e-05, - "loss": 145.6906, - "step": 19700 - }, - { - "epoch": 0.15926114464402588, - "grad_norm": 1143.1041259765625, - "learning_rate": 4.946680663578443e-05, - "loss": 151.2845, - "step": 19710 - }, - { - "epoch": 0.15934194684831002, - "grad_norm": 1063.902587890625, - "learning_rate": 4.9465357019151325e-05, - "loss": 155.4715, - "step": 19720 - }, - { - "epoch": 0.15942274905259415, - "grad_norm": 945.45556640625, - "learning_rate": 4.946390545592204e-05, - "loss": 188.0311, - "step": 19730 - }, - { - "epoch": 0.1595035512568783, - "grad_norm": 1733.0693359375, - "learning_rate": 4.9462451946212085e-05, - "loss": 150.3059, - "step": 19740 - }, - { - "epoch": 0.15958435346116243, - "grad_norm": 934.3817749023438, - "learning_rate": 4.946099649013708e-05, - "loss": 115.0981, - "step": 19750 - }, - { - "epoch": 0.15966515566544656, - "grad_norm": 756.0308227539062, - "learning_rate": 4.945953908781286e-05, - "loss": 126.3076, - "step": 19760 - }, - { - "epoch": 0.1597459578697307, - "grad_norm": 646.1735229492188, - "learning_rate": 4.945807973935536e-05, - "loss": 141.5272, - "step": 19770 - }, - { - "epoch": 0.1598267600740148, - "grad_norm": 900.1630249023438, - "learning_rate": 4.94566184448807e-05, - "loss": 169.0281, - "step": 19780 - }, - { - "epoch": 0.15990756227829894, - "grad_norm": 1312.0203857421875, - "learning_rate": 4.945515520450515e-05, - "loss": 168.3111, - "step": 19790 - }, - { - "epoch": 0.15998836448258308, - "grad_norm": 824.4857177734375, - "learning_rate": 4.9453690018345144e-05, - "loss": 160.6633, - "step": 19800 - }, - { - "epoch": 0.16006916668686721, - "grad_norm": 606.3312377929688, - "learning_rate": 4.945222288651724e-05, - "loss": 167.2263, - "step": 19810 - }, - { - "epoch": 0.16014996889115135, - "grad_norm": 1857.02685546875, - "learning_rate": 4.945075380913819e-05, - "loss": 190.6399, - "step": 19820 - }, - { - "epoch": 0.1602307710954355, - "grad_norm": 1485.077392578125, - "learning_rate": 4.944928278632487e-05, - "loss": 156.7114, - "step": 19830 - }, - { - "epoch": 0.16031157329971962, - "grad_norm": 824.557373046875, - "learning_rate": 4.944780981819433e-05, - "loss": 143.3107, - "step": 19840 - }, - { - "epoch": 0.16039237550400376, - "grad_norm": 987.8336791992188, - "learning_rate": 4.944633490486376e-05, - "loss": 100.8076, - "step": 19850 - }, - { - "epoch": 0.1604731777082879, - "grad_norm": 1033.8603515625, - "learning_rate": 4.944485804645052e-05, - "loss": 162.6909, - "step": 19860 - }, - { - "epoch": 0.160553979912572, - "grad_norm": 1208.977783203125, - "learning_rate": 4.9443379243072094e-05, - "loss": 132.4686, - "step": 19870 - }, - { - "epoch": 0.16063478211685614, - "grad_norm": 719.5220947265625, - "learning_rate": 4.944189849484618e-05, - "loss": 141.4025, - "step": 19880 - }, - { - "epoch": 0.16071558432114028, - "grad_norm": 1385.524658203125, - "learning_rate": 4.9440415801890566e-05, - "loss": 182.8163, - "step": 19890 - }, - { - "epoch": 0.1607963865254244, - "grad_norm": 1521.5345458984375, - "learning_rate": 4.9438931164323236e-05, - "loss": 110.8893, - "step": 19900 - }, - { - "epoch": 0.16087718872970855, - "grad_norm": 931.8245849609375, - "learning_rate": 4.9437444582262316e-05, - "loss": 141.6139, - "step": 19910 - }, - { - "epoch": 0.16095799093399268, - "grad_norm": 1338.6199951171875, - "learning_rate": 4.9435956055826083e-05, - "loss": 167.0462, - "step": 19920 - }, - { - "epoch": 0.16103879313827682, - "grad_norm": 592.0842895507812, - "learning_rate": 4.943446558513297e-05, - "loss": 104.6128, - "step": 19930 - }, - { - "epoch": 0.16111959534256096, - "grad_norm": 765.056640625, - "learning_rate": 4.943297317030156e-05, - "loss": 136.0432, - "step": 19940 - }, - { - "epoch": 0.16120039754684506, - "grad_norm": 896.3486328125, - "learning_rate": 4.943147881145063e-05, - "loss": 152.3156, - "step": 19950 - }, - { - "epoch": 0.1612811997511292, - "grad_norm": 669.0186157226562, - "learning_rate": 4.942998250869904e-05, - "loss": 146.4306, - "step": 19960 - }, - { - "epoch": 0.16136200195541334, - "grad_norm": 738.7496948242188, - "learning_rate": 4.9428484262165865e-05, - "loss": 167.2415, - "step": 19970 - }, - { - "epoch": 0.16144280415969747, - "grad_norm": 1352.85009765625, - "learning_rate": 4.9426984071970305e-05, - "loss": 168.2593, - "step": 19980 - }, - { - "epoch": 0.1615236063639816, - "grad_norm": 1093.584228515625, - "learning_rate": 4.942548193823173e-05, - "loss": 186.0099, - "step": 19990 - }, - { - "epoch": 0.16160440856826574, - "grad_norm": 1222.2086181640625, - "learning_rate": 4.942397786106965e-05, - "loss": 159.3606, - "step": 20000 - }, - { - "epoch": 0.16168521077254988, - "grad_norm": 1016.3326416015625, - "learning_rate": 4.942247184060375e-05, - "loss": 121.1088, - "step": 20010 - }, - { - "epoch": 0.16176601297683402, - "grad_norm": 760.5179443359375, - "learning_rate": 4.942096387695385e-05, - "loss": 126.0567, - "step": 20020 - }, - { - "epoch": 0.16184681518111815, - "grad_norm": 2420.37841796875, - "learning_rate": 4.941945397023993e-05, - "loss": 142.4873, - "step": 20030 - }, - { - "epoch": 0.16192761738540226, - "grad_norm": 1585.40234375, - "learning_rate": 4.9417942120582114e-05, - "loss": 128.2617, - "step": 20040 - }, - { - "epoch": 0.1620084195896864, - "grad_norm": 1405.9205322265625, - "learning_rate": 4.941642832810072e-05, - "loss": 123.314, - "step": 20050 - }, - { - "epoch": 0.16208922179397053, - "grad_norm": 827.2435913085938, - "learning_rate": 4.9414912592916185e-05, - "loss": 143.6495, - "step": 20060 - }, - { - "epoch": 0.16217002399825467, - "grad_norm": 742.4790649414062, - "learning_rate": 4.9413394915149094e-05, - "loss": 200.291, - "step": 20070 - }, - { - "epoch": 0.1622508262025388, - "grad_norm": 1240.279296875, - "learning_rate": 4.9411875294920215e-05, - "loss": 165.2704, - "step": 20080 - }, - { - "epoch": 0.16233162840682294, - "grad_norm": 671.4786987304688, - "learning_rate": 4.9410353732350455e-05, - "loss": 158.4867, - "step": 20090 - }, - { - "epoch": 0.16241243061110708, - "grad_norm": 609.6265869140625, - "learning_rate": 4.940883022756088e-05, - "loss": 164.4025, - "step": 20100 - }, - { - "epoch": 0.16249323281539121, - "grad_norm": 702.2029418945312, - "learning_rate": 4.94073047806727e-05, - "loss": 143.9458, - "step": 20110 - }, - { - "epoch": 0.16257403501967535, - "grad_norm": 658.32763671875, - "learning_rate": 4.94057773918073e-05, - "loss": 116.2497, - "step": 20120 - }, - { - "epoch": 0.16265483722395946, - "grad_norm": 618.864990234375, - "learning_rate": 4.940424806108619e-05, - "loss": 113.3011, - "step": 20130 - }, - { - "epoch": 0.1627356394282436, - "grad_norm": 646.5340576171875, - "learning_rate": 4.9402716788631073e-05, - "loss": 209.5348, - "step": 20140 - }, - { - "epoch": 0.16281644163252773, - "grad_norm": 794.1364135742188, - "learning_rate": 4.940118357456377e-05, - "loss": 112.0908, - "step": 20150 - }, - { - "epoch": 0.16289724383681187, - "grad_norm": 1476.8046875, - "learning_rate": 4.939964841900627e-05, - "loss": 156.0068, - "step": 20160 - }, - { - "epoch": 0.162978046041096, - "grad_norm": 1192.75146484375, - "learning_rate": 4.939811132208073e-05, - "loss": 208.6819, - "step": 20170 - }, - { - "epoch": 0.16305884824538014, - "grad_norm": 1222.39208984375, - "learning_rate": 4.939657228390945e-05, - "loss": 127.3492, - "step": 20180 - }, - { - "epoch": 0.16313965044966428, - "grad_norm": 1381.251708984375, - "learning_rate": 4.939503130461487e-05, - "loss": 141.149, - "step": 20190 - }, - { - "epoch": 0.1632204526539484, - "grad_norm": 948.350341796875, - "learning_rate": 4.9393488384319605e-05, - "loss": 146.2455, - "step": 20200 - }, - { - "epoch": 0.16330125485823252, - "grad_norm": 1100.5667724609375, - "learning_rate": 4.939194352314643e-05, - "loss": 134.0395, - "step": 20210 - }, - { - "epoch": 0.16338205706251666, - "grad_norm": 1312.8271484375, - "learning_rate": 4.939039672121825e-05, - "loss": 162.6504, - "step": 20220 - }, - { - "epoch": 0.1634628592668008, - "grad_norm": 742.9317626953125, - "learning_rate": 4.938884797865814e-05, - "loss": 119.811, - "step": 20230 - }, - { - "epoch": 0.16354366147108493, - "grad_norm": 1479.2552490234375, - "learning_rate": 4.938729729558932e-05, - "loss": 127.0003, - "step": 20240 - }, - { - "epoch": 0.16362446367536906, - "grad_norm": 2271.06982421875, - "learning_rate": 4.938574467213518e-05, - "loss": 180.906, - "step": 20250 - }, - { - "epoch": 0.1637052658796532, - "grad_norm": 848.5759887695312, - "learning_rate": 4.938419010841925e-05, - "loss": 108.292, - "step": 20260 - }, - { - "epoch": 0.16378606808393734, - "grad_norm": 865.2387084960938, - "learning_rate": 4.938263360456523e-05, - "loss": 146.3953, - "step": 20270 - }, - { - "epoch": 0.16386687028822147, - "grad_norm": 1448.966552734375, - "learning_rate": 4.938107516069694e-05, - "loss": 197.9948, - "step": 20280 - }, - { - "epoch": 0.1639476724925056, - "grad_norm": 856.9296875, - "learning_rate": 4.9379514776938405e-05, - "loss": 120.4137, - "step": 20290 - }, - { - "epoch": 0.16402847469678972, - "grad_norm": 1423.47607421875, - "learning_rate": 4.9377952453413765e-05, - "loss": 147.8874, - "step": 20300 - }, - { - "epoch": 0.16410927690107385, - "grad_norm": 1550.7271728515625, - "learning_rate": 4.9376388190247324e-05, - "loss": 153.0022, - "step": 20310 - }, - { - "epoch": 0.164190079105358, - "grad_norm": 1133.5203857421875, - "learning_rate": 4.937482198756355e-05, - "loss": 169.8442, - "step": 20320 - }, - { - "epoch": 0.16427088130964212, - "grad_norm": 921.5442504882812, - "learning_rate": 4.937325384548705e-05, - "loss": 179.4838, - "step": 20330 - }, - { - "epoch": 0.16435168351392626, - "grad_norm": 920.5408935546875, - "learning_rate": 4.9371683764142615e-05, - "loss": 108.6345, - "step": 20340 - }, - { - "epoch": 0.1644324857182104, - "grad_norm": 1286.8828125, - "learning_rate": 4.9370111743655145e-05, - "loss": 148.3994, - "step": 20350 - }, - { - "epoch": 0.16451328792249453, - "grad_norm": 918.86181640625, - "learning_rate": 4.9368537784149724e-05, - "loss": 139.1599, - "step": 20360 - }, - { - "epoch": 0.16459409012677867, - "grad_norm": 1502.0557861328125, - "learning_rate": 4.936696188575159e-05, - "loss": 164.7047, - "step": 20370 - }, - { - "epoch": 0.16467489233106278, - "grad_norm": 921.6201171875, - "learning_rate": 4.9365384048586125e-05, - "loss": 146.2348, - "step": 20380 - }, - { - "epoch": 0.1647556945353469, - "grad_norm": 761.9798583984375, - "learning_rate": 4.936380427277888e-05, - "loss": 122.8679, - "step": 20390 - }, - { - "epoch": 0.16483649673963105, - "grad_norm": 1140.500732421875, - "learning_rate": 4.936222255845554e-05, - "loss": 120.364, - "step": 20400 - }, - { - "epoch": 0.16491729894391519, - "grad_norm": 1554.974609375, - "learning_rate": 4.936063890574196e-05, - "loss": 170.4009, - "step": 20410 - }, - { - "epoch": 0.16499810114819932, - "grad_norm": 756.5431518554688, - "learning_rate": 4.935905331476414e-05, - "loss": 124.2604, - "step": 20420 - }, - { - "epoch": 0.16507890335248346, - "grad_norm": 759.2258911132812, - "learning_rate": 4.9357465785648247e-05, - "loss": 122.2716, - "step": 20430 - }, - { - "epoch": 0.1651597055567676, - "grad_norm": 1130.5928955078125, - "learning_rate": 4.935587631852058e-05, - "loss": 120.7589, - "step": 20440 - }, - { - "epoch": 0.16524050776105173, - "grad_norm": 994.0823364257812, - "learning_rate": 4.935428491350761e-05, - "loss": 147.596, - "step": 20450 - }, - { - "epoch": 0.16532130996533587, - "grad_norm": 1129.5496826171875, - "learning_rate": 4.9352691570735965e-05, - "loss": 175.5725, - "step": 20460 - }, - { - "epoch": 0.16540211216961997, - "grad_norm": 974.0113525390625, - "learning_rate": 4.935109629033242e-05, - "loss": 198.5151, - "step": 20470 - }, - { - "epoch": 0.1654829143739041, - "grad_norm": 1393.72607421875, - "learning_rate": 4.93494990724239e-05, - "loss": 142.9287, - "step": 20480 - }, - { - "epoch": 0.16556371657818825, - "grad_norm": 1063.649169921875, - "learning_rate": 4.934789991713748e-05, - "loss": 123.0873, - "step": 20490 - }, - { - "epoch": 0.16564451878247238, - "grad_norm": 1238.6739501953125, - "learning_rate": 4.9346298824600405e-05, - "loss": 120.6401, - "step": 20500 - }, - { - "epoch": 0.16572532098675652, - "grad_norm": 843.2783203125, - "learning_rate": 4.934469579494008e-05, - "loss": 121.2858, - "step": 20510 - }, - { - "epoch": 0.16580612319104066, - "grad_norm": 983.3422241210938, - "learning_rate": 4.934309082828402e-05, - "loss": 143.3731, - "step": 20520 - }, - { - "epoch": 0.1658869253953248, - "grad_norm": 959.219970703125, - "learning_rate": 4.934148392475996e-05, - "loss": 145.4661, - "step": 20530 - }, - { - "epoch": 0.16596772759960893, - "grad_norm": 733.232666015625, - "learning_rate": 4.933987508449572e-05, - "loss": 126.4923, - "step": 20540 - }, - { - "epoch": 0.16604852980389306, - "grad_norm": 853.0502319335938, - "learning_rate": 4.933826430761933e-05, - "loss": 153.8978, - "step": 20550 - }, - { - "epoch": 0.16612933200817717, - "grad_norm": 1198.431640625, - "learning_rate": 4.933665159425895e-05, - "loss": 166.0935, - "step": 20560 - }, - { - "epoch": 0.1662101342124613, - "grad_norm": 514.8040771484375, - "learning_rate": 4.933503694454289e-05, - "loss": 85.8809, - "step": 20570 - }, - { - "epoch": 0.16629093641674544, - "grad_norm": 791.435302734375, - "learning_rate": 4.9333420358599624e-05, - "loss": 141.1844, - "step": 20580 - }, - { - "epoch": 0.16637173862102958, - "grad_norm": 1096.3251953125, - "learning_rate": 4.9331801836557776e-05, - "loss": 141.8443, - "step": 20590 - }, - { - "epoch": 0.16645254082531372, - "grad_norm": 4899.19921875, - "learning_rate": 4.9330181378546124e-05, - "loss": 176.5427, - "step": 20600 - }, - { - "epoch": 0.16653334302959785, - "grad_norm": 453.80072021484375, - "learning_rate": 4.93285589846936e-05, - "loss": 143.0679, - "step": 20610 - }, - { - "epoch": 0.166614145233882, - "grad_norm": 1067.961669921875, - "learning_rate": 4.9326934655129295e-05, - "loss": 125.034, - "step": 20620 - }, - { - "epoch": 0.16669494743816612, - "grad_norm": 674.2916259765625, - "learning_rate": 4.932530838998244e-05, - "loss": 121.1949, - "step": 20630 - }, - { - "epoch": 0.16677574964245023, - "grad_norm": 2042.013671875, - "learning_rate": 4.9323680189382434e-05, - "loss": 248.2044, - "step": 20640 - }, - { - "epoch": 0.16685655184673437, - "grad_norm": 972.5521240234375, - "learning_rate": 4.932205005345882e-05, - "loss": 152.445, - "step": 20650 - }, - { - "epoch": 0.1669373540510185, - "grad_norm": 659.5952758789062, - "learning_rate": 4.9320417982341313e-05, - "loss": 123.7578, - "step": 20660 - }, - { - "epoch": 0.16701815625530264, - "grad_norm": 1101.409912109375, - "learning_rate": 4.9318783976159765e-05, - "loss": 157.3332, - "step": 20670 - }, - { - "epoch": 0.16709895845958678, - "grad_norm": 1593.623291015625, - "learning_rate": 4.931714803504418e-05, - "loss": 161.4305, - "step": 20680 - }, - { - "epoch": 0.1671797606638709, - "grad_norm": 785.7627563476562, - "learning_rate": 4.9315510159124734e-05, - "loss": 133.448, - "step": 20690 - }, - { - "epoch": 0.16726056286815505, - "grad_norm": 1091.3284912109375, - "learning_rate": 4.931387034853173e-05, - "loss": 167.9184, - "step": 20700 - }, - { - "epoch": 0.16734136507243919, - "grad_norm": 1226.1614990234375, - "learning_rate": 4.931222860339565e-05, - "loss": 127.1648, - "step": 20710 - }, - { - "epoch": 0.16742216727672332, - "grad_norm": 1060.7408447265625, - "learning_rate": 4.931058492384712e-05, - "loss": 131.7696, - "step": 20720 - }, - { - "epoch": 0.16750296948100743, - "grad_norm": 821.4359130859375, - "learning_rate": 4.9308939310016916e-05, - "loss": 135.4715, - "step": 20730 - }, - { - "epoch": 0.16758377168529157, - "grad_norm": 2822.77197265625, - "learning_rate": 4.930729176203598e-05, - "loss": 169.724, - "step": 20740 - }, - { - "epoch": 0.1676645738895757, - "grad_norm": 814.9628295898438, - "learning_rate": 4.930564228003538e-05, - "loss": 111.3484, - "step": 20750 - }, - { - "epoch": 0.16774537609385984, - "grad_norm": 1205.373046875, - "learning_rate": 4.930399086414638e-05, - "loss": 125.1682, - "step": 20760 - }, - { - "epoch": 0.16782617829814397, - "grad_norm": 874.1157836914062, - "learning_rate": 4.9302337514500374e-05, - "loss": 145.6209, - "step": 20770 - }, - { - "epoch": 0.1679069805024281, - "grad_norm": 1192.6041259765625, - "learning_rate": 4.930068223122889e-05, - "loss": 154.289, - "step": 20780 - }, - { - "epoch": 0.16798778270671225, - "grad_norm": 1004.2535400390625, - "learning_rate": 4.929902501446366e-05, - "loss": 183.3328, - "step": 20790 - }, - { - "epoch": 0.16806858491099638, - "grad_norm": 996.1227416992188, - "learning_rate": 4.929736586433652e-05, - "loss": 133.2687, - "step": 20800 - }, - { - "epoch": 0.16814938711528052, - "grad_norm": 676.2308349609375, - "learning_rate": 4.929570478097949e-05, - "loss": 149.5055, - "step": 20810 - }, - { - "epoch": 0.16823018931956463, - "grad_norm": 1042.8621826171875, - "learning_rate": 4.9294041764524726e-05, - "loss": 126.4842, - "step": 20820 - }, - { - "epoch": 0.16831099152384876, - "grad_norm": 1661.6201171875, - "learning_rate": 4.9292376815104566e-05, - "loss": 133.8418, - "step": 20830 - }, - { - "epoch": 0.1683917937281329, - "grad_norm": 1082.744140625, - "learning_rate": 4.929070993285146e-05, - "loss": 175.319, - "step": 20840 - }, - { - "epoch": 0.16847259593241704, - "grad_norm": 943.1874389648438, - "learning_rate": 4.928904111789805e-05, - "loss": 122.9224, - "step": 20850 - }, - { - "epoch": 0.16855339813670117, - "grad_norm": 582.3085327148438, - "learning_rate": 4.9287370370377106e-05, - "loss": 154.6284, - "step": 20860 - }, - { - "epoch": 0.1686342003409853, - "grad_norm": 363.685546875, - "learning_rate": 4.928569769042156e-05, - "loss": 159.0539, - "step": 20870 - }, - { - "epoch": 0.16871500254526944, - "grad_norm": 1049.855224609375, - "learning_rate": 4.928402307816451e-05, - "loss": 140.1967, - "step": 20880 - }, - { - "epoch": 0.16879580474955358, - "grad_norm": 912.501708984375, - "learning_rate": 4.92823465337392e-05, - "loss": 124.3262, - "step": 20890 - }, - { - "epoch": 0.1688766069538377, - "grad_norm": 869.2931518554688, - "learning_rate": 4.9280668057279014e-05, - "loss": 125.99, - "step": 20900 - }, - { - "epoch": 0.16895740915812182, - "grad_norm": 834.9353637695312, - "learning_rate": 4.9278987648917495e-05, - "loss": 121.5135, - "step": 20910 - }, - { - "epoch": 0.16903821136240596, - "grad_norm": 933.3566284179688, - "learning_rate": 4.9277305308788365e-05, - "loss": 135.7604, - "step": 20920 - }, - { - "epoch": 0.1691190135666901, - "grad_norm": 6561.35693359375, - "learning_rate": 4.927562103702547e-05, - "loss": 169.7086, - "step": 20930 - }, - { - "epoch": 0.16919981577097423, - "grad_norm": 1966.267822265625, - "learning_rate": 4.92739348337628e-05, - "loss": 135.3359, - "step": 20940 - }, - { - "epoch": 0.16928061797525837, - "grad_norm": 1062.4327392578125, - "learning_rate": 4.927224669913456e-05, - "loss": 130.8302, - "step": 20950 - }, - { - "epoch": 0.1693614201795425, - "grad_norm": 1002.7949829101562, - "learning_rate": 4.927055663327503e-05, - "loss": 152.034, - "step": 20960 - }, - { - "epoch": 0.16944222238382664, - "grad_norm": 1231.685302734375, - "learning_rate": 4.92688646363187e-05, - "loss": 125.2035, - "step": 20970 - }, - { - "epoch": 0.16952302458811078, - "grad_norm": 1325.4017333984375, - "learning_rate": 4.9267170708400196e-05, - "loss": 153.9901, - "step": 20980 - }, - { - "epoch": 0.16960382679239489, - "grad_norm": 1044.0921630859375, - "learning_rate": 4.9265474849654284e-05, - "loss": 160.7131, - "step": 20990 - }, - { - "epoch": 0.16968462899667902, - "grad_norm": 706.5172119140625, - "learning_rate": 4.92637770602159e-05, - "loss": 150.8885, - "step": 21000 - }, - { - "epoch": 0.16976543120096316, - "grad_norm": 443.4945373535156, - "learning_rate": 4.9262077340220135e-05, - "loss": 137.6814, - "step": 21010 - }, - { - "epoch": 0.1698462334052473, - "grad_norm": 3524.959228515625, - "learning_rate": 4.926037568980223e-05, - "loss": 131.1957, - "step": 21020 - }, - { - "epoch": 0.16992703560953143, - "grad_norm": 1061.660888671875, - "learning_rate": 4.925867210909756e-05, - "loss": 109.0972, - "step": 21030 - }, - { - "epoch": 0.17000783781381557, - "grad_norm": 977.8228759765625, - "learning_rate": 4.925696659824169e-05, - "loss": 140.1199, - "step": 21040 - }, - { - "epoch": 0.1700886400180997, - "grad_norm": 886.8487548828125, - "learning_rate": 4.925525915737031e-05, - "loss": 123.5597, - "step": 21050 - }, - { - "epoch": 0.17016944222238384, - "grad_norm": 2220.47314453125, - "learning_rate": 4.925354978661928e-05, - "loss": 116.6628, - "step": 21060 - }, - { - "epoch": 0.17025024442666795, - "grad_norm": 750.9844970703125, - "learning_rate": 4.925183848612459e-05, - "loss": 150.5052, - "step": 21070 - }, - { - "epoch": 0.17033104663095208, - "grad_norm": 1006.42333984375, - "learning_rate": 4.9250125256022416e-05, - "loss": 119.5853, - "step": 21080 - }, - { - "epoch": 0.17041184883523622, - "grad_norm": 1162.7249755859375, - "learning_rate": 4.9248410096449075e-05, - "loss": 137.6988, - "step": 21090 - }, - { - "epoch": 0.17049265103952035, - "grad_norm": 666.7645263671875, - "learning_rate": 4.9246693007541024e-05, - "loss": 181.7554, - "step": 21100 - }, - { - "epoch": 0.1705734532438045, - "grad_norm": 1177.0545654296875, - "learning_rate": 4.9244973989434886e-05, - "loss": 157.994, - "step": 21110 - }, - { - "epoch": 0.17065425544808863, - "grad_norm": 1034.1756591796875, - "learning_rate": 4.924325304226745e-05, - "loss": 135.4122, - "step": 21120 - }, - { - "epoch": 0.17073505765237276, - "grad_norm": 1536.7457275390625, - "learning_rate": 4.9241530166175614e-05, - "loss": 153.4128, - "step": 21130 - }, - { - "epoch": 0.1708158598566569, - "grad_norm": 881.2988891601562, - "learning_rate": 4.9239805361296485e-05, - "loss": 161.9859, - "step": 21140 - }, - { - "epoch": 0.17089666206094103, - "grad_norm": 1522.52197265625, - "learning_rate": 4.923807862776728e-05, - "loss": 178.6479, - "step": 21150 - }, - { - "epoch": 0.17097746426522514, - "grad_norm": 658.5277709960938, - "learning_rate": 4.9236349965725406e-05, - "loss": 130.6969, - "step": 21160 - }, - { - "epoch": 0.17105826646950928, - "grad_norm": 865.1202392578125, - "learning_rate": 4.9234619375308396e-05, - "loss": 125.0552, - "step": 21170 - }, - { - "epoch": 0.17113906867379342, - "grad_norm": 1105.9573974609375, - "learning_rate": 4.9232886856653936e-05, - "loss": 148.4609, - "step": 21180 - }, - { - "epoch": 0.17121987087807755, - "grad_norm": 762.8492431640625, - "learning_rate": 4.923115240989989e-05, - "loss": 177.1173, - "step": 21190 - }, - { - "epoch": 0.1713006730823617, - "grad_norm": 977.0111083984375, - "learning_rate": 4.922941603518425e-05, - "loss": 131.6727, - "step": 21200 - }, - { - "epoch": 0.17138147528664582, - "grad_norm": 864.1790771484375, - "learning_rate": 4.922767773264517e-05, - "loss": 153.2473, - "step": 21210 - }, - { - "epoch": 0.17146227749092996, - "grad_norm": 576.5042724609375, - "learning_rate": 4.9225937502420974e-05, - "loss": 142.9099, - "step": 21220 - }, - { - "epoch": 0.1715430796952141, - "grad_norm": 826.1639404296875, - "learning_rate": 4.9224195344650105e-05, - "loss": 165.9666, - "step": 21230 - }, - { - "epoch": 0.17162388189949823, - "grad_norm": 1021.5435180664062, - "learning_rate": 4.922245125947119e-05, - "loss": 131.2517, - "step": 21240 - }, - { - "epoch": 0.17170468410378234, - "grad_norm": 750.6788330078125, - "learning_rate": 4.9220705247022985e-05, - "loss": 128.4156, - "step": 21250 - }, - { - "epoch": 0.17178548630806648, - "grad_norm": 1202.5364990234375, - "learning_rate": 4.921895730744443e-05, - "loss": 165.3158, - "step": 21260 - }, - { - "epoch": 0.1718662885123506, - "grad_norm": 695.836181640625, - "learning_rate": 4.921720744087459e-05, - "loss": 108.6388, - "step": 21270 - }, - { - "epoch": 0.17194709071663475, - "grad_norm": 658.8826904296875, - "learning_rate": 4.921545564745269e-05, - "loss": 134.6856, - "step": 21280 - }, - { - "epoch": 0.17202789292091888, - "grad_norm": 2134.118896484375, - "learning_rate": 4.9213701927318134e-05, - "loss": 147.9883, - "step": 21290 - }, - { - "epoch": 0.17210869512520302, - "grad_norm": 981.736083984375, - "learning_rate": 4.921194628061043e-05, - "loss": 134.1546, - "step": 21300 - }, - { - "epoch": 0.17218949732948716, - "grad_norm": 619.3984985351562, - "learning_rate": 4.921018870746928e-05, - "loss": 119.4398, - "step": 21310 - }, - { - "epoch": 0.1722702995337713, - "grad_norm": 710.1690673828125, - "learning_rate": 4.9208429208034525e-05, - "loss": 145.9413, - "step": 21320 - }, - { - "epoch": 0.1723511017380554, - "grad_norm": 1467.351806640625, - "learning_rate": 4.920666778244616e-05, - "loss": 159.9146, - "step": 21330 - }, - { - "epoch": 0.17243190394233954, - "grad_norm": 862.062255859375, - "learning_rate": 4.920490443084433e-05, - "loss": 129.9643, - "step": 21340 - }, - { - "epoch": 0.17251270614662367, - "grad_norm": 1797.9549560546875, - "learning_rate": 4.920313915336934e-05, - "loss": 144.6173, - "step": 21350 - }, - { - "epoch": 0.1725935083509078, - "grad_norm": 1055.6019287109375, - "learning_rate": 4.920137195016165e-05, - "loss": 156.1974, - "step": 21360 - }, - { - "epoch": 0.17267431055519195, - "grad_norm": 8324.7998046875, - "learning_rate": 4.919960282136185e-05, - "loss": 164.71, - "step": 21370 - }, - { - "epoch": 0.17275511275947608, - "grad_norm": 971.8815307617188, - "learning_rate": 4.919783176711074e-05, - "loss": 153.0036, - "step": 21380 - }, - { - "epoch": 0.17283591496376022, - "grad_norm": 988.0467529296875, - "learning_rate": 4.9196058787549184e-05, - "loss": 152.4217, - "step": 21390 - }, - { - "epoch": 0.17291671716804435, - "grad_norm": 570.156494140625, - "learning_rate": 4.919428388281829e-05, - "loss": 127.8879, - "step": 21400 - }, - { - "epoch": 0.1729975193723285, - "grad_norm": 1578.5853271484375, - "learning_rate": 4.9192507053059255e-05, - "loss": 148.3654, - "step": 21410 - }, - { - "epoch": 0.1730783215766126, - "grad_norm": 1456.0301513671875, - "learning_rate": 4.919072829841347e-05, - "loss": 178.0747, - "step": 21420 - }, - { - "epoch": 0.17315912378089673, - "grad_norm": 588.3489990234375, - "learning_rate": 4.918894761902245e-05, - "loss": 143.8035, - "step": 21430 - }, - { - "epoch": 0.17323992598518087, - "grad_norm": 927.64404296875, - "learning_rate": 4.918716501502789e-05, - "loss": 126.7022, - "step": 21440 - }, - { - "epoch": 0.173320728189465, - "grad_norm": 1143.9464111328125, - "learning_rate": 4.9185380486571595e-05, - "loss": 79.5578, - "step": 21450 - }, - { - "epoch": 0.17340153039374914, - "grad_norm": 838.281494140625, - "learning_rate": 4.918359403379559e-05, - "loss": 177.1575, - "step": 21460 - }, - { - "epoch": 0.17348233259803328, - "grad_norm": 1176.1307373046875, - "learning_rate": 4.918180565684198e-05, - "loss": 151.7111, - "step": 21470 - }, - { - "epoch": 0.17356313480231741, - "grad_norm": 1099.8289794921875, - "learning_rate": 4.9180015355853084e-05, - "loss": 152.3932, - "step": 21480 - }, - { - "epoch": 0.17364393700660155, - "grad_norm": 1128.26123046875, - "learning_rate": 4.917822313097134e-05, - "loss": 116.6667, - "step": 21490 - }, - { - "epoch": 0.17372473921088566, - "grad_norm": 563.2874755859375, - "learning_rate": 4.917642898233933e-05, - "loss": 142.594, - "step": 21500 - }, - { - "epoch": 0.1738055414151698, - "grad_norm": 1359.58984375, - "learning_rate": 4.917463291009983e-05, - "loss": 133.1546, - "step": 21510 - }, - { - "epoch": 0.17388634361945393, - "grad_norm": 848.3027954101562, - "learning_rate": 4.917283491439574e-05, - "loss": 152.585, - "step": 21520 - }, - { - "epoch": 0.17396714582373807, - "grad_norm": 857.1946411132812, - "learning_rate": 4.91710349953701e-05, - "loss": 178.7701, - "step": 21530 - }, - { - "epoch": 0.1740479480280222, - "grad_norm": 1514.6451416015625, - "learning_rate": 4.916923315316615e-05, - "loss": 165.9126, - "step": 21540 - }, - { - "epoch": 0.17412875023230634, - "grad_norm": 1154.829833984375, - "learning_rate": 4.916742938792723e-05, - "loss": 129.4643, - "step": 21550 - }, - { - "epoch": 0.17420955243659048, - "grad_norm": 1806.4046630859375, - "learning_rate": 4.9165623699796875e-05, - "loss": 131.133, - "step": 21560 - }, - { - "epoch": 0.1742903546408746, - "grad_norm": 2297.58642578125, - "learning_rate": 4.916381608891874e-05, - "loss": 181.9885, - "step": 21570 - }, - { - "epoch": 0.17437115684515875, - "grad_norm": 889.8167724609375, - "learning_rate": 4.916200655543667e-05, - "loss": 124.0278, - "step": 21580 - }, - { - "epoch": 0.17445195904944286, - "grad_norm": 652.2991333007812, - "learning_rate": 4.916019509949461e-05, - "loss": 129.1368, - "step": 21590 - }, - { - "epoch": 0.174532761253727, - "grad_norm": 2002.193603515625, - "learning_rate": 4.915838172123671e-05, - "loss": 156.8969, - "step": 21600 - }, - { - "epoch": 0.17461356345801113, - "grad_norm": 1632.37646484375, - "learning_rate": 4.915656642080726e-05, - "loss": 134.3347, - "step": 21610 - }, - { - "epoch": 0.17469436566229526, - "grad_norm": 1093.335205078125, - "learning_rate": 4.915474919835067e-05, - "loss": 134.8566, - "step": 21620 - }, - { - "epoch": 0.1747751678665794, - "grad_norm": 1129.8870849609375, - "learning_rate": 4.915293005401155e-05, - "loss": 140.7855, - "step": 21630 - }, - { - "epoch": 0.17485597007086354, - "grad_norm": 1071.5859375, - "learning_rate": 4.915110898793462e-05, - "loss": 164.049, - "step": 21640 - }, - { - "epoch": 0.17493677227514767, - "grad_norm": 729.1155395507812, - "learning_rate": 4.9149286000264805e-05, - "loss": 113.2319, - "step": 21650 - }, - { - "epoch": 0.1750175744794318, - "grad_norm": 753.5103149414062, - "learning_rate": 4.9147461091147125e-05, - "loss": 102.7337, - "step": 21660 - }, - { - "epoch": 0.17509837668371594, - "grad_norm": 531.7659301757812, - "learning_rate": 4.914563426072678e-05, - "loss": 149.2153, - "step": 21670 - }, - { - "epoch": 0.17517917888800005, - "grad_norm": 1287.9884033203125, - "learning_rate": 4.914380550914914e-05, - "loss": 146.6886, - "step": 21680 - }, - { - "epoch": 0.1752599810922842, - "grad_norm": 1483.9490966796875, - "learning_rate": 4.9141974836559704e-05, - "loss": 157.0422, - "step": 21690 - }, - { - "epoch": 0.17534078329656833, - "grad_norm": 899.7554931640625, - "learning_rate": 4.9140142243104116e-05, - "loss": 149.8844, - "step": 21700 - }, - { - "epoch": 0.17542158550085246, - "grad_norm": 1013.6568603515625, - "learning_rate": 4.91383077289282e-05, - "loss": 99.9814, - "step": 21710 - }, - { - "epoch": 0.1755023877051366, - "grad_norm": 1174.3260498046875, - "learning_rate": 4.9136471294177924e-05, - "loss": 138.6684, - "step": 21720 - }, - { - "epoch": 0.17558318990942073, - "grad_norm": 1020.420166015625, - "learning_rate": 4.913463293899939e-05, - "loss": 165.4781, - "step": 21730 - }, - { - "epoch": 0.17566399211370487, - "grad_norm": 1076.771728515625, - "learning_rate": 4.913279266353888e-05, - "loss": 138.3935, - "step": 21740 - }, - { - "epoch": 0.175744794317989, - "grad_norm": 1392.3963623046875, - "learning_rate": 4.9130950467942814e-05, - "loss": 130.0977, - "step": 21750 - }, - { - "epoch": 0.17582559652227311, - "grad_norm": 593.2973022460938, - "learning_rate": 4.912910635235777e-05, - "loss": 116.5755, - "step": 21760 - }, - { - "epoch": 0.17590639872655725, - "grad_norm": 1103.0938720703125, - "learning_rate": 4.9127260316930466e-05, - "loss": 138.2039, - "step": 21770 - }, - { - "epoch": 0.1759872009308414, - "grad_norm": 737.562744140625, - "learning_rate": 4.912541236180779e-05, - "loss": 122.1219, - "step": 21780 - }, - { - "epoch": 0.17606800313512552, - "grad_norm": 860.9588623046875, - "learning_rate": 4.9123562487136774e-05, - "loss": 153.3838, - "step": 21790 - }, - { - "epoch": 0.17614880533940966, - "grad_norm": 1145.1170654296875, - "learning_rate": 4.912171069306459e-05, - "loss": 123.9352, - "step": 21800 - }, - { - "epoch": 0.1762296075436938, - "grad_norm": 940.3277587890625, - "learning_rate": 4.911985697973861e-05, - "loss": 142.1018, - "step": 21810 - }, - { - "epoch": 0.17631040974797793, - "grad_norm": 815.0631713867188, - "learning_rate": 4.911800134730629e-05, - "loss": 151.9103, - "step": 21820 - }, - { - "epoch": 0.17639121195226207, - "grad_norm": 1341.9578857421875, - "learning_rate": 4.9116143795915295e-05, - "loss": 146.6557, - "step": 21830 - }, - { - "epoch": 0.1764720141565462, - "grad_norm": 861.9169311523438, - "learning_rate": 4.9114284325713416e-05, - "loss": 131.5514, - "step": 21840 - }, - { - "epoch": 0.1765528163608303, - "grad_norm": 2296.96484375, - "learning_rate": 4.91124229368486e-05, - "loss": 127.922, - "step": 21850 - }, - { - "epoch": 0.17663361856511445, - "grad_norm": 3126.313232421875, - "learning_rate": 4.911055962946896e-05, - "loss": 178.3461, - "step": 21860 - }, - { - "epoch": 0.17671442076939858, - "grad_norm": 1112.87646484375, - "learning_rate": 4.910869440372274e-05, - "loss": 131.921, - "step": 21870 - }, - { - "epoch": 0.17679522297368272, - "grad_norm": 1277.1387939453125, - "learning_rate": 4.910682725975835e-05, - "loss": 172.0795, - "step": 21880 - }, - { - "epoch": 0.17687602517796686, - "grad_norm": 1765.8804931640625, - "learning_rate": 4.910495819772434e-05, - "loss": 116.7105, - "step": 21890 - }, - { - "epoch": 0.176956827382251, - "grad_norm": 1198.19384765625, - "learning_rate": 4.9103087217769445e-05, - "loss": 145.228, - "step": 21900 - }, - { - "epoch": 0.17703762958653513, - "grad_norm": 926.2941284179688, - "learning_rate": 4.910121432004252e-05, - "loss": 161.9553, - "step": 21910 - }, - { - "epoch": 0.17711843179081926, - "grad_norm": 759.7756958007812, - "learning_rate": 4.9099339504692573e-05, - "loss": 113.5963, - "step": 21920 - }, - { - "epoch": 0.1771992339951034, - "grad_norm": 816.7711181640625, - "learning_rate": 4.909746277186879e-05, - "loss": 145.3244, - "step": 21930 - }, - { - "epoch": 0.1772800361993875, - "grad_norm": 1038.8468017578125, - "learning_rate": 4.909558412172047e-05, - "loss": 175.112, - "step": 21940 - }, - { - "epoch": 0.17736083840367164, - "grad_norm": 1387.3388671875, - "learning_rate": 4.909370355439712e-05, - "loss": 124.688, - "step": 21950 - }, - { - "epoch": 0.17744164060795578, - "grad_norm": 666.5802612304688, - "learning_rate": 4.909182107004835e-05, - "loss": 85.6291, - "step": 21960 - }, - { - "epoch": 0.17752244281223992, - "grad_norm": 1517.434326171875, - "learning_rate": 4.908993666882395e-05, - "loss": 125.7463, - "step": 21970 - }, - { - "epoch": 0.17760324501652405, - "grad_norm": 746.6646118164062, - "learning_rate": 4.9088050350873835e-05, - "loss": 143.7797, - "step": 21980 - }, - { - "epoch": 0.1776840472208082, - "grad_norm": 838.4737548828125, - "learning_rate": 4.9086162116348114e-05, - "loss": 120.7253, - "step": 21990 - }, - { - "epoch": 0.17776484942509233, - "grad_norm": 1266.503662109375, - "learning_rate": 4.9084271965397014e-05, - "loss": 141.1467, - "step": 22000 - }, - { - "epoch": 0.17784565162937646, - "grad_norm": 1920.794677734375, - "learning_rate": 4.9082379898170914e-05, - "loss": 143.4987, - "step": 22010 - }, - { - "epoch": 0.17792645383366057, - "grad_norm": 1311.13720703125, - "learning_rate": 4.908048591482038e-05, - "loss": 152.7263, - "step": 22020 - }, - { - "epoch": 0.1780072560379447, - "grad_norm": 1175.812744140625, - "learning_rate": 4.9078590015496096e-05, - "loss": 129.1362, - "step": 22030 - }, - { - "epoch": 0.17808805824222884, - "grad_norm": 1120.2232666015625, - "learning_rate": 4.907669220034891e-05, - "loss": 138.7439, - "step": 22040 - }, - { - "epoch": 0.17816886044651298, - "grad_norm": 1485.8729248046875, - "learning_rate": 4.9074792469529815e-05, - "loss": 274.2296, - "step": 22050 - }, - { - "epoch": 0.1782496626507971, - "grad_norm": 969.6702270507812, - "learning_rate": 4.9072890823189974e-05, - "loss": 119.1664, - "step": 22060 - }, - { - "epoch": 0.17833046485508125, - "grad_norm": 941.8471069335938, - "learning_rate": 4.907098726148069e-05, - "loss": 160.9961, - "step": 22070 - }, - { - "epoch": 0.17841126705936539, - "grad_norm": 1140.767333984375, - "learning_rate": 4.906908178455342e-05, - "loss": 145.0228, - "step": 22080 - }, - { - "epoch": 0.17849206926364952, - "grad_norm": 715.76904296875, - "learning_rate": 4.9067174392559776e-05, - "loss": 103.8144, - "step": 22090 - }, - { - "epoch": 0.17857287146793366, - "grad_norm": 1049.420654296875, - "learning_rate": 4.906526508565152e-05, - "loss": 124.3415, - "step": 22100 - }, - { - "epoch": 0.17865367367221777, - "grad_norm": 1215.5096435546875, - "learning_rate": 4.9063353863980565e-05, - "loss": 182.3524, - "step": 22110 - }, - { - "epoch": 0.1787344758765019, - "grad_norm": 1063.1181640625, - "learning_rate": 4.906144072769898e-05, - "loss": 122.475, - "step": 22120 - }, - { - "epoch": 0.17881527808078604, - "grad_norm": 1125.4080810546875, - "learning_rate": 4.9059525676958986e-05, - "loss": 139.1991, - "step": 22130 - }, - { - "epoch": 0.17889608028507017, - "grad_norm": 1022.3780517578125, - "learning_rate": 4.905760871191295e-05, - "loss": 195.8249, - "step": 22140 - }, - { - "epoch": 0.1789768824893543, - "grad_norm": 1560.5745849609375, - "learning_rate": 4.9055689832713396e-05, - "loss": 125.5297, - "step": 22150 - }, - { - "epoch": 0.17905768469363845, - "grad_norm": 5475.33447265625, - "learning_rate": 4.9053769039513006e-05, - "loss": 148.3278, - "step": 22160 - }, - { - "epoch": 0.17913848689792258, - "grad_norm": 737.4940795898438, - "learning_rate": 4.90518463324646e-05, - "loss": 140.7979, - "step": 22170 - }, - { - "epoch": 0.17921928910220672, - "grad_norm": 627.595458984375, - "learning_rate": 4.904992171172117e-05, - "loss": 157.4451, - "step": 22180 - }, - { - "epoch": 0.17930009130649083, - "grad_norm": 734.084228515625, - "learning_rate": 4.904799517743585e-05, - "loss": 139.5384, - "step": 22190 - }, - { - "epoch": 0.17938089351077496, - "grad_norm": 1306.1624755859375, - "learning_rate": 4.9046066729761905e-05, - "loss": 118.6943, - "step": 22200 - }, - { - "epoch": 0.1794616957150591, - "grad_norm": 1220.8062744140625, - "learning_rate": 4.904413636885279e-05, - "loss": 168.1177, - "step": 22210 - }, - { - "epoch": 0.17954249791934324, - "grad_norm": 1183.083251953125, - "learning_rate": 4.90422040948621e-05, - "loss": 158.66, - "step": 22220 - }, - { - "epoch": 0.17962330012362737, - "grad_norm": 1026.29345703125, - "learning_rate": 4.904026990794356e-05, - "loss": 128.3199, - "step": 22230 - }, - { - "epoch": 0.1797041023279115, - "grad_norm": 771.4765625, - "learning_rate": 4.9038333808251074e-05, - "loss": 111.4788, - "step": 22240 - }, - { - "epoch": 0.17978490453219564, - "grad_norm": 1059.020751953125, - "learning_rate": 4.90363957959387e-05, - "loss": 134.2409, - "step": 22250 - }, - { - "epoch": 0.17986570673647978, - "grad_norm": 1103.791748046875, - "learning_rate": 4.903445587116061e-05, - "loss": 170.8439, - "step": 22260 - }, - { - "epoch": 0.17994650894076392, - "grad_norm": 622.0547485351562, - "learning_rate": 4.9032514034071175e-05, - "loss": 122.9244, - "step": 22270 - }, - { - "epoch": 0.18002731114504802, - "grad_norm": 958.7100830078125, - "learning_rate": 4.903057028482489e-05, - "loss": 99.9289, - "step": 22280 - }, - { - "epoch": 0.18010811334933216, - "grad_norm": 996.9059448242188, - "learning_rate": 4.902862462357641e-05, - "loss": 164.1096, - "step": 22290 - }, - { - "epoch": 0.1801889155536163, - "grad_norm": 481.1267395019531, - "learning_rate": 4.9026677050480555e-05, - "loss": 118.1814, - "step": 22300 - }, - { - "epoch": 0.18026971775790043, - "grad_norm": 1068.948486328125, - "learning_rate": 4.9024727565692274e-05, - "loss": 144.3041, - "step": 22310 - }, - { - "epoch": 0.18035051996218457, - "grad_norm": 997.0437622070312, - "learning_rate": 4.9022776169366666e-05, - "loss": 155.9985, - "step": 22320 - }, - { - "epoch": 0.1804313221664687, - "grad_norm": 1654.6068115234375, - "learning_rate": 4.902082286165902e-05, - "loss": 164.7646, - "step": 22330 - }, - { - "epoch": 0.18051212437075284, - "grad_norm": 3154.902099609375, - "learning_rate": 4.901886764272474e-05, - "loss": 98.5395, - "step": 22340 - }, - { - "epoch": 0.18059292657503698, - "grad_norm": 2299.2890625, - "learning_rate": 4.901691051271939e-05, - "loss": 146.594, - "step": 22350 - }, - { - "epoch": 0.1806737287793211, - "grad_norm": 1197.87451171875, - "learning_rate": 4.90149514717987e-05, - "loss": 145.7994, - "step": 22360 - }, - { - "epoch": 0.18075453098360522, - "grad_norm": 1042.1507568359375, - "learning_rate": 4.901299052011852e-05, - "loss": 144.4632, - "step": 22370 - }, - { - "epoch": 0.18083533318788936, - "grad_norm": 1308.17041015625, - "learning_rate": 4.90110276578349e-05, - "loss": 119.5589, - "step": 22380 - }, - { - "epoch": 0.1809161353921735, - "grad_norm": 904.1051025390625, - "learning_rate": 4.900906288510401e-05, - "loss": 116.1322, - "step": 22390 - }, - { - "epoch": 0.18099693759645763, - "grad_norm": 807.6878051757812, - "learning_rate": 4.900709620208216e-05, - "loss": 179.056, - "step": 22400 - }, - { - "epoch": 0.18107773980074177, - "grad_norm": 644.4304809570312, - "learning_rate": 4.900512760892585e-05, - "loss": 110.5187, - "step": 22410 - }, - { - "epoch": 0.1811585420050259, - "grad_norm": 924.395263671875, - "learning_rate": 4.9003157105791706e-05, - "loss": 116.3889, - "step": 22420 - }, - { - "epoch": 0.18123934420931004, - "grad_norm": 1086.564208984375, - "learning_rate": 4.9001184692836505e-05, - "loss": 121.7575, - "step": 22430 - }, - { - "epoch": 0.18132014641359417, - "grad_norm": 876.027587890625, - "learning_rate": 4.8999210370217194e-05, - "loss": 159.8317, - "step": 22440 - }, - { - "epoch": 0.18140094861787828, - "grad_norm": 1000.0255126953125, - "learning_rate": 4.899723413809085e-05, - "loss": 139.3787, - "step": 22450 - }, - { - "epoch": 0.18148175082216242, - "grad_norm": 2290.170654296875, - "learning_rate": 4.899525599661472e-05, - "loss": 97.0173, - "step": 22460 - }, - { - "epoch": 0.18156255302644655, - "grad_norm": 1271.3829345703125, - "learning_rate": 4.899327594594619e-05, - "loss": 131.2777, - "step": 22470 - }, - { - "epoch": 0.1816433552307307, - "grad_norm": 907.057861328125, - "learning_rate": 4.899129398624281e-05, - "loss": 101.1429, - "step": 22480 - }, - { - "epoch": 0.18172415743501483, - "grad_norm": 1061.6846923828125, - "learning_rate": 4.898931011766228e-05, - "loss": 107.645, - "step": 22490 - }, - { - "epoch": 0.18180495963929896, - "grad_norm": 1035.3082275390625, - "learning_rate": 4.898732434036244e-05, - "loss": 121.6947, - "step": 22500 - }, - { - "epoch": 0.1818857618435831, - "grad_norm": 1202.3555908203125, - "learning_rate": 4.898533665450128e-05, - "loss": 162.0105, - "step": 22510 - }, - { - "epoch": 0.18196656404786724, - "grad_norm": 1121.3282470703125, - "learning_rate": 4.898334706023696e-05, - "loss": 113.9376, - "step": 22520 - }, - { - "epoch": 0.18204736625215137, - "grad_norm": 1393.017578125, - "learning_rate": 4.8981355557727796e-05, - "loss": 139.8284, - "step": 22530 - }, - { - "epoch": 0.18212816845643548, - "grad_norm": 2622.062744140625, - "learning_rate": 4.897936214713222e-05, - "loss": 168.307, - "step": 22540 - }, - { - "epoch": 0.18220897066071962, - "grad_norm": 804.2191772460938, - "learning_rate": 4.897736682860885e-05, - "loss": 146.0737, - "step": 22550 - }, - { - "epoch": 0.18228977286500375, - "grad_norm": 1040.8131103515625, - "learning_rate": 4.897536960231644e-05, - "loss": 157.3722, - "step": 22560 - }, - { - "epoch": 0.1823705750692879, - "grad_norm": 677.9693603515625, - "learning_rate": 4.89733704684139e-05, - "loss": 103.1337, - "step": 22570 - }, - { - "epoch": 0.18245137727357202, - "grad_norm": 1318.36376953125, - "learning_rate": 4.89713694270603e-05, - "loss": 141.4517, - "step": 22580 - }, - { - "epoch": 0.18253217947785616, - "grad_norm": 1170.07861328125, - "learning_rate": 4.8969366478414854e-05, - "loss": 108.5731, - "step": 22590 - }, - { - "epoch": 0.1826129816821403, - "grad_norm": 1243.060791015625, - "learning_rate": 4.896736162263691e-05, - "loss": 173.4797, - "step": 22600 - }, - { - "epoch": 0.18269378388642443, - "grad_norm": 624.64111328125, - "learning_rate": 4.8965354859886006e-05, - "loss": 103.4098, - "step": 22610 - }, - { - "epoch": 0.18277458609070854, - "grad_norm": 982.3737182617188, - "learning_rate": 4.89633461903218e-05, - "loss": 122.2358, - "step": 22620 - }, - { - "epoch": 0.18285538829499268, - "grad_norm": 1259.17578125, - "learning_rate": 4.8961335614104115e-05, - "loss": 124.4892, - "step": 22630 - }, - { - "epoch": 0.1829361904992768, - "grad_norm": 1049.9534912109375, - "learning_rate": 4.895932313139292e-05, - "loss": 165.0955, - "step": 22640 - }, - { - "epoch": 0.18301699270356095, - "grad_norm": 805.4934692382812, - "learning_rate": 4.895730874234834e-05, - "loss": 114.1204, - "step": 22650 - }, - { - "epoch": 0.18309779490784509, - "grad_norm": 1224.6595458984375, - "learning_rate": 4.895529244713066e-05, - "loss": 115.779, - "step": 22660 - }, - { - "epoch": 0.18317859711212922, - "grad_norm": 1112.3597412109375, - "learning_rate": 4.895327424590029e-05, - "loss": 130.0822, - "step": 22670 - }, - { - "epoch": 0.18325939931641336, - "grad_norm": 1353.516845703125, - "learning_rate": 4.895125413881783e-05, - "loss": 171.6577, - "step": 22680 - }, - { - "epoch": 0.1833402015206975, - "grad_norm": 1338.9501953125, - "learning_rate": 4.8949232126044e-05, - "loss": 109.7441, - "step": 22690 - }, - { - "epoch": 0.18342100372498163, - "grad_norm": 1896.7581787109375, - "learning_rate": 4.894720820773967e-05, - "loss": 174.3409, - "step": 22700 - }, - { - "epoch": 0.18350180592926574, - "grad_norm": 766.3515014648438, - "learning_rate": 4.894518238406589e-05, - "loss": 137.257, - "step": 22710 - }, - { - "epoch": 0.18358260813354987, - "grad_norm": 691.5870971679688, - "learning_rate": 4.8943154655183844e-05, - "loss": 100.2154, - "step": 22720 - }, - { - "epoch": 0.183663410337834, - "grad_norm": 5152.01025390625, - "learning_rate": 4.894112502125487e-05, - "loss": 173.3306, - "step": 22730 - }, - { - "epoch": 0.18374421254211815, - "grad_norm": 788.1614379882812, - "learning_rate": 4.8939093482440444e-05, - "loss": 142.851, - "step": 22740 - }, - { - "epoch": 0.18382501474640228, - "grad_norm": 1022.0597534179688, - "learning_rate": 4.8937060038902224e-05, - "loss": 160.1116, - "step": 22750 - }, - { - "epoch": 0.18390581695068642, - "grad_norm": 757.3877563476562, - "learning_rate": 4.8935024690801986e-05, - "loss": 104.9073, - "step": 22760 - }, - { - "epoch": 0.18398661915497055, - "grad_norm": 1713.2584228515625, - "learning_rate": 4.893298743830168e-05, - "loss": 141.8529, - "step": 22770 - }, - { - "epoch": 0.1840674213592547, - "grad_norm": 1372.67822265625, - "learning_rate": 4.89309482815634e-05, - "loss": 157.4392, - "step": 22780 - }, - { - "epoch": 0.18414822356353883, - "grad_norm": 900.3597412109375, - "learning_rate": 4.892890722074941e-05, - "loss": 161.8838, - "step": 22790 - }, - { - "epoch": 0.18422902576782293, - "grad_norm": 1206.832763671875, - "learning_rate": 4.8926864256022074e-05, - "loss": 142.6541, - "step": 22800 - }, - { - "epoch": 0.18430982797210707, - "grad_norm": 3348.473388671875, - "learning_rate": 4.892481938754396e-05, - "loss": 153.2709, - "step": 22810 - }, - { - "epoch": 0.1843906301763912, - "grad_norm": 1615.8541259765625, - "learning_rate": 4.892277261547778e-05, - "loss": 176.116, - "step": 22820 - }, - { - "epoch": 0.18447143238067534, - "grad_norm": 1190.1331787109375, - "learning_rate": 4.892072393998636e-05, - "loss": 130.4555, - "step": 22830 - }, - { - "epoch": 0.18455223458495948, - "grad_norm": 1158.7845458984375, - "learning_rate": 4.891867336123273e-05, - "loss": 117.6687, - "step": 22840 - }, - { - "epoch": 0.18463303678924362, - "grad_norm": 850.7025756835938, - "learning_rate": 4.891662087938002e-05, - "loss": 125.4999, - "step": 22850 - }, - { - "epoch": 0.18471383899352775, - "grad_norm": 1056.5823974609375, - "learning_rate": 4.891456649459155e-05, - "loss": 104.6559, - "step": 22860 - }, - { - "epoch": 0.1847946411978119, - "grad_norm": 821.6203002929688, - "learning_rate": 4.891251020703078e-05, - "loss": 104.2007, - "step": 22870 - }, - { - "epoch": 0.184875443402096, - "grad_norm": 842.5684814453125, - "learning_rate": 4.8910452016861316e-05, - "loss": 149.866, - "step": 22880 - }, - { - "epoch": 0.18495624560638013, - "grad_norm": 1477.5130615234375, - "learning_rate": 4.890839192424692e-05, - "loss": 118.6691, - "step": 22890 - }, - { - "epoch": 0.18503704781066427, - "grad_norm": 679.1465454101562, - "learning_rate": 4.89063299293515e-05, - "loss": 113.9977, - "step": 22900 - }, - { - "epoch": 0.1851178500149484, - "grad_norm": 1265.08349609375, - "learning_rate": 4.890426603233913e-05, - "loss": 187.4383, - "step": 22910 - }, - { - "epoch": 0.18519865221923254, - "grad_norm": 1642.4959716796875, - "learning_rate": 4.890220023337402e-05, - "loss": 114.8031, - "step": 22920 - }, - { - "epoch": 0.18527945442351668, - "grad_norm": 1289.17529296875, - "learning_rate": 4.890013253262052e-05, - "loss": 139.2596, - "step": 22930 - }, - { - "epoch": 0.1853602566278008, - "grad_norm": 1148.701171875, - "learning_rate": 4.889806293024317e-05, - "loss": 115.5268, - "step": 22940 - }, - { - "epoch": 0.18544105883208495, - "grad_norm": 854.2969970703125, - "learning_rate": 4.889599142640663e-05, - "loss": 104.5058, - "step": 22950 - }, - { - "epoch": 0.18552186103636908, - "grad_norm": 638.8955078125, - "learning_rate": 4.889391802127572e-05, - "loss": 137.5948, - "step": 22960 - }, - { - "epoch": 0.1856026632406532, - "grad_norm": 1359.7989501953125, - "learning_rate": 4.8891842715015415e-05, - "loss": 155.3802, - "step": 22970 - }, - { - "epoch": 0.18568346544493733, - "grad_norm": 900.0406494140625, - "learning_rate": 4.888976550779082e-05, - "loss": 148.2721, - "step": 22980 - }, - { - "epoch": 0.18576426764922147, - "grad_norm": 950.3909912109375, - "learning_rate": 4.888768639976723e-05, - "loss": 128.0501, - "step": 22990 - }, - { - "epoch": 0.1858450698535056, - "grad_norm": 904.05615234375, - "learning_rate": 4.888560539111007e-05, - "loss": 117.1624, - "step": 23000 - }, - { - "epoch": 0.18592587205778974, - "grad_norm": 886.5684814453125, - "learning_rate": 4.888352248198491e-05, - "loss": 98.4731, - "step": 23010 - }, - { - "epoch": 0.18600667426207387, - "grad_norm": 1154.155517578125, - "learning_rate": 4.888143767255746e-05, - "loss": 116.1539, - "step": 23020 - }, - { - "epoch": 0.186087476466358, - "grad_norm": 987.8771362304688, - "learning_rate": 4.887935096299363e-05, - "loss": 158.9487, - "step": 23030 - }, - { - "epoch": 0.18616827867064215, - "grad_norm": 711.0520629882812, - "learning_rate": 4.887726235345943e-05, - "loss": 160.4295, - "step": 23040 - }, - { - "epoch": 0.18624908087492628, - "grad_norm": 723.032958984375, - "learning_rate": 4.887517184412105e-05, - "loss": 138.3186, - "step": 23050 - }, - { - "epoch": 0.1863298830792104, - "grad_norm": 1084.6082763671875, - "learning_rate": 4.887307943514482e-05, - "loss": 168.3783, - "step": 23060 - }, - { - "epoch": 0.18641068528349453, - "grad_norm": 1483.857421875, - "learning_rate": 4.887098512669721e-05, - "loss": 123.126, - "step": 23070 - }, - { - "epoch": 0.18649148748777866, - "grad_norm": 1378.773193359375, - "learning_rate": 4.8868888918944875e-05, - "loss": 131.6848, - "step": 23080 - }, - { - "epoch": 0.1865722896920628, - "grad_norm": 839.9609375, - "learning_rate": 4.88667908120546e-05, - "loss": 159.1631, - "step": 23090 - }, - { - "epoch": 0.18665309189634693, - "grad_norm": 1056.0916748046875, - "learning_rate": 4.88646908061933e-05, - "loss": 153.1073, - "step": 23100 - }, - { - "epoch": 0.18673389410063107, - "grad_norm": 950.5769653320312, - "learning_rate": 4.886258890152808e-05, - "loss": 206.336, - "step": 23110 - }, - { - "epoch": 0.1868146963049152, - "grad_norm": 1698.2298583984375, - "learning_rate": 4.8860485098226186e-05, - "loss": 161.5861, - "step": 23120 - }, - { - "epoch": 0.18689549850919934, - "grad_norm": 1449.29052734375, - "learning_rate": 4.8858379396454986e-05, - "loss": 181.665, - "step": 23130 - }, - { - "epoch": 0.18697630071348345, - "grad_norm": 906.7736206054688, - "learning_rate": 4.885627179638203e-05, - "loss": 121.9543, - "step": 23140 - }, - { - "epoch": 0.1870571029177676, - "grad_norm": 853.1943359375, - "learning_rate": 4.885416229817502e-05, - "loss": 126.3017, - "step": 23150 - }, - { - "epoch": 0.18713790512205172, - "grad_norm": 933.1670532226562, - "learning_rate": 4.88520509020018e-05, - "loss": 131.7164, - "step": 23160 - }, - { - "epoch": 0.18721870732633586, - "grad_norm": 809.3897705078125, - "learning_rate": 4.884993760803034e-05, - "loss": 82.2266, - "step": 23170 - }, - { - "epoch": 0.18729950953062, - "grad_norm": 1107.886962890625, - "learning_rate": 4.8847822416428804e-05, - "loss": 137.3787, - "step": 23180 - }, - { - "epoch": 0.18738031173490413, - "grad_norm": 875.2286987304688, - "learning_rate": 4.8845705327365494e-05, - "loss": 147.1326, - "step": 23190 - }, - { - "epoch": 0.18746111393918827, - "grad_norm": 725.078125, - "learning_rate": 4.884358634100884e-05, - "loss": 116.224, - "step": 23200 - }, - { - "epoch": 0.1875419161434724, - "grad_norm": 709.187744140625, - "learning_rate": 4.884146545752745e-05, - "loss": 161.3454, - "step": 23210 - }, - { - "epoch": 0.18762271834775654, - "grad_norm": 965.8853759765625, - "learning_rate": 4.883934267709007e-05, - "loss": 104.2938, - "step": 23220 - }, - { - "epoch": 0.18770352055204065, - "grad_norm": 720.29541015625, - "learning_rate": 4.88372179998656e-05, - "loss": 127.9189, - "step": 23230 - }, - { - "epoch": 0.18778432275632478, - "grad_norm": 787.7008056640625, - "learning_rate": 4.88350914260231e-05, - "loss": 159.5888, - "step": 23240 - }, - { - "epoch": 0.18786512496060892, - "grad_norm": 770.27099609375, - "learning_rate": 4.883296295573176e-05, - "loss": 111.2395, - "step": 23250 - }, - { - "epoch": 0.18794592716489306, - "grad_norm": 989.0401000976562, - "learning_rate": 4.883083258916094e-05, - "loss": 84.2179, - "step": 23260 - }, - { - "epoch": 0.1880267293691772, - "grad_norm": 622.4349975585938, - "learning_rate": 4.8828700326480124e-05, - "loss": 117.0425, - "step": 23270 - }, - { - "epoch": 0.18810753157346133, - "grad_norm": 782.968505859375, - "learning_rate": 4.8826566167859e-05, - "loss": 98.487, - "step": 23280 - }, - { - "epoch": 0.18818833377774546, - "grad_norm": 1396.0714111328125, - "learning_rate": 4.882443011346734e-05, - "loss": 161.5329, - "step": 23290 - }, - { - "epoch": 0.1882691359820296, - "grad_norm": 3127.1787109375, - "learning_rate": 4.882229216347512e-05, - "loss": 139.1958, - "step": 23300 - }, - { - "epoch": 0.1883499381863137, - "grad_norm": 2043.7386474609375, - "learning_rate": 4.882015231805245e-05, - "loss": 131.3828, - "step": 23310 - }, - { - "epoch": 0.18843074039059785, - "grad_norm": 803.8340454101562, - "learning_rate": 4.881801057736957e-05, - "loss": 142.9152, - "step": 23320 - }, - { - "epoch": 0.18851154259488198, - "grad_norm": 565.1333618164062, - "learning_rate": 4.8815866941596907e-05, - "loss": 125.8542, - "step": 23330 - }, - { - "epoch": 0.18859234479916612, - "grad_norm": 1319.6141357421875, - "learning_rate": 4.881372141090501e-05, - "loss": 118.6022, - "step": 23340 - }, - { - "epoch": 0.18867314700345025, - "grad_norm": 775.401611328125, - "learning_rate": 4.8811573985464583e-05, - "loss": 107.1802, - "step": 23350 - }, - { - "epoch": 0.1887539492077344, - "grad_norm": 2228.206298828125, - "learning_rate": 4.8809424665446505e-05, - "loss": 195.3628, - "step": 23360 - }, - { - "epoch": 0.18883475141201853, - "grad_norm": 928.543212890625, - "learning_rate": 4.8807273451021763e-05, - "loss": 106.0024, - "step": 23370 - }, - { - "epoch": 0.18891555361630266, - "grad_norm": 652.130859375, - "learning_rate": 4.880512034236154e-05, - "loss": 113.7809, - "step": 23380 - }, - { - "epoch": 0.1889963558205868, - "grad_norm": 4711.4775390625, - "learning_rate": 4.880296533963714e-05, - "loss": 181.3036, - "step": 23390 - }, - { - "epoch": 0.1890771580248709, - "grad_norm": 878.5155639648438, - "learning_rate": 4.880080844302004e-05, - "loss": 142.3919, - "step": 23400 - }, - { - "epoch": 0.18915796022915504, - "grad_norm": 902.1356811523438, - "learning_rate": 4.879864965268183e-05, - "loss": 123.8351, - "step": 23410 - }, - { - "epoch": 0.18923876243343918, - "grad_norm": 1122.713134765625, - "learning_rate": 4.87964889687943e-05, - "loss": 135.591, - "step": 23420 - }, - { - "epoch": 0.18931956463772331, - "grad_norm": 1172.5733642578125, - "learning_rate": 4.8794326391529344e-05, - "loss": 126.7694, - "step": 23430 - }, - { - "epoch": 0.18940036684200745, - "grad_norm": 1452.78955078125, - "learning_rate": 4.879216192105904e-05, - "loss": 135.9087, - "step": 23440 - }, - { - "epoch": 0.1894811690462916, - "grad_norm": 963.2405395507812, - "learning_rate": 4.87899955575556e-05, - "loss": 148.8573, - "step": 23450 - }, - { - "epoch": 0.18956197125057572, - "grad_norm": 1660.697265625, - "learning_rate": 4.8787827301191405e-05, - "loss": 129.2729, - "step": 23460 - }, - { - "epoch": 0.18964277345485986, - "grad_norm": 916.353515625, - "learning_rate": 4.8785657152138954e-05, - "loss": 124.947, - "step": 23470 - }, - { - "epoch": 0.189723575659144, - "grad_norm": 1272.26708984375, - "learning_rate": 4.8783485110570926e-05, - "loss": 132.3774, - "step": 23480 - }, - { - "epoch": 0.1898043778634281, - "grad_norm": 1189.0185546875, - "learning_rate": 4.878131117666014e-05, - "loss": 127.1024, - "step": 23490 - }, - { - "epoch": 0.18988518006771224, - "grad_norm": 2101.5771484375, - "learning_rate": 4.8779135350579575e-05, - "loss": 133.1101, - "step": 23500 - }, - { - "epoch": 0.18996598227199638, - "grad_norm": 1055.2603759765625, - "learning_rate": 4.877695763250233e-05, - "loss": 145.5062, - "step": 23510 - }, - { - "epoch": 0.1900467844762805, - "grad_norm": 1036.5927734375, - "learning_rate": 4.877477802260169e-05, - "loss": 117.101, - "step": 23520 - }, - { - "epoch": 0.19012758668056465, - "grad_norm": 1334.359619140625, - "learning_rate": 4.8772596521051074e-05, - "loss": 126.108, - "step": 23530 - }, - { - "epoch": 0.19020838888484878, - "grad_norm": 958.9288940429688, - "learning_rate": 4.8770413128024055e-05, - "loss": 119.5724, - "step": 23540 - }, - { - "epoch": 0.19028919108913292, - "grad_norm": 936.3779907226562, - "learning_rate": 4.8768227843694356e-05, - "loss": 119.2132, - "step": 23550 - }, - { - "epoch": 0.19036999329341706, - "grad_norm": 1063.8311767578125, - "learning_rate": 4.876604066823585e-05, - "loss": 99.2183, - "step": 23560 - }, - { - "epoch": 0.19045079549770116, - "grad_norm": 474.4288330078125, - "learning_rate": 4.8763851601822555e-05, - "loss": 162.7293, - "step": 23570 - }, - { - "epoch": 0.1905315977019853, - "grad_norm": 875.7661743164062, - "learning_rate": 4.876166064462865e-05, - "loss": 155.3719, - "step": 23580 - }, - { - "epoch": 0.19061239990626944, - "grad_norm": 734.3944091796875, - "learning_rate": 4.875946779682846e-05, - "loss": 115.4304, - "step": 23590 - }, - { - "epoch": 0.19069320211055357, - "grad_norm": 1140.4166259765625, - "learning_rate": 4.8757273058596466e-05, - "loss": 111.8617, - "step": 23600 - }, - { - "epoch": 0.1907740043148377, - "grad_norm": 1042.712890625, - "learning_rate": 4.8755076430107275e-05, - "loss": 158.3831, - "step": 23610 - }, - { - "epoch": 0.19085480651912184, - "grad_norm": 942.3005981445312, - "learning_rate": 4.8752877911535674e-05, - "loss": 158.2759, - "step": 23620 - }, - { - "epoch": 0.19093560872340598, - "grad_norm": 672.3939208984375, - "learning_rate": 4.87506775030566e-05, - "loss": 101.9895, - "step": 23630 - }, - { - "epoch": 0.19101641092769012, - "grad_norm": 539.8408813476562, - "learning_rate": 4.8748475204845114e-05, - "loss": 134.8368, - "step": 23640 - }, - { - "epoch": 0.19109721313197425, - "grad_norm": 1047.973876953125, - "learning_rate": 4.874627101707644e-05, - "loss": 134.4578, - "step": 23650 - }, - { - "epoch": 0.19117801533625836, - "grad_norm": 587.66064453125, - "learning_rate": 4.874406493992597e-05, - "loss": 128.3708, - "step": 23660 - }, - { - "epoch": 0.1912588175405425, - "grad_norm": 1052.7977294921875, - "learning_rate": 4.874185697356921e-05, - "loss": 112.2446, - "step": 23670 - }, - { - "epoch": 0.19133961974482663, - "grad_norm": 892.5618896484375, - "learning_rate": 4.8739647118181864e-05, - "loss": 141.6148, - "step": 23680 - }, - { - "epoch": 0.19142042194911077, - "grad_norm": 1009.5902099609375, - "learning_rate": 4.8737435373939745e-05, - "loss": 128.5635, - "step": 23690 - }, - { - "epoch": 0.1915012241533949, - "grad_norm": 834.274658203125, - "learning_rate": 4.873522174101883e-05, - "loss": 125.1892, - "step": 23700 - }, - { - "epoch": 0.19158202635767904, - "grad_norm": 614.75048828125, - "learning_rate": 4.873300621959524e-05, - "loss": 119.3424, - "step": 23710 - }, - { - "epoch": 0.19166282856196318, - "grad_norm": 540.7586059570312, - "learning_rate": 4.8730788809845285e-05, - "loss": 123.1911, - "step": 23720 - }, - { - "epoch": 0.1917436307662473, - "grad_norm": 1392.84521484375, - "learning_rate": 4.8728569511945366e-05, - "loss": 133.2286, - "step": 23730 - }, - { - "epoch": 0.19182443297053145, - "grad_norm": 940.1975708007812, - "learning_rate": 4.872634832607207e-05, - "loss": 175.4847, - "step": 23740 - }, - { - "epoch": 0.19190523517481556, - "grad_norm": 585.4035034179688, - "learning_rate": 4.872412525240213e-05, - "loss": 137.4391, - "step": 23750 - }, - { - "epoch": 0.1919860373790997, - "grad_norm": 567.75146484375, - "learning_rate": 4.8721900291112415e-05, - "loss": 148.0143, - "step": 23760 - }, - { - "epoch": 0.19206683958338383, - "grad_norm": 695.6571655273438, - "learning_rate": 4.8719673442379965e-05, - "loss": 151.841, - "step": 23770 - }, - { - "epoch": 0.19214764178766797, - "grad_norm": 645.0660400390625, - "learning_rate": 4.871744470638196e-05, - "loss": 112.2194, - "step": 23780 - }, - { - "epoch": 0.1922284439919521, - "grad_norm": 1323.8675537109375, - "learning_rate": 4.871521408329573e-05, - "loss": 131.1473, - "step": 23790 - }, - { - "epoch": 0.19230924619623624, - "grad_norm": 779.0706176757812, - "learning_rate": 4.8712981573298754e-05, - "loss": 151.9213, - "step": 23800 - }, - { - "epoch": 0.19239004840052037, - "grad_norm": 1045.4783935546875, - "learning_rate": 4.871074717656866e-05, - "loss": 168.5953, - "step": 23810 - }, - { - "epoch": 0.1924708506048045, - "grad_norm": 994.0507202148438, - "learning_rate": 4.8708510893283233e-05, - "loss": 114.8425, - "step": 23820 - }, - { - "epoch": 0.19255165280908862, - "grad_norm": 2326.3681640625, - "learning_rate": 4.87062727236204e-05, - "loss": 147.102, - "step": 23830 - }, - { - "epoch": 0.19263245501337276, - "grad_norm": 661.2938232421875, - "learning_rate": 4.8704032667758235e-05, - "loss": 125.8939, - "step": 23840 - }, - { - "epoch": 0.1927132572176569, - "grad_norm": 1487.8214111328125, - "learning_rate": 4.870179072587499e-05, - "loss": 145.7569, - "step": 23850 - }, - { - "epoch": 0.19279405942194103, - "grad_norm": 1167.40087890625, - "learning_rate": 4.869954689814903e-05, - "loss": 107.6906, - "step": 23860 - }, - { - "epoch": 0.19287486162622516, - "grad_norm": 1011.26416015625, - "learning_rate": 4.869730118475889e-05, - "loss": 150.3682, - "step": 23870 - }, - { - "epoch": 0.1929556638305093, - "grad_norm": 792.6472778320312, - "learning_rate": 4.869505358588324e-05, - "loss": 125.6492, - "step": 23880 - }, - { - "epoch": 0.19303646603479344, - "grad_norm": 1423.9903564453125, - "learning_rate": 4.869280410170094e-05, - "loss": 122.7359, - "step": 23890 - }, - { - "epoch": 0.19311726823907757, - "grad_norm": 851.192626953125, - "learning_rate": 4.8690552732390934e-05, - "loss": 172.6603, - "step": 23900 - }, - { - "epoch": 0.1931980704433617, - "grad_norm": 906.2518920898438, - "learning_rate": 4.868829947813239e-05, - "loss": 115.3062, - "step": 23910 - }, - { - "epoch": 0.19327887264764582, - "grad_norm": 2573.9697265625, - "learning_rate": 4.8686044339104566e-05, - "loss": 172.7958, - "step": 23920 - }, - { - "epoch": 0.19335967485192995, - "grad_norm": 1765.7132568359375, - "learning_rate": 4.868378731548689e-05, - "loss": 168.2542, - "step": 23930 - }, - { - "epoch": 0.1934404770562141, - "grad_norm": 789.6253662109375, - "learning_rate": 4.868152840745895e-05, - "loss": 129.0756, - "step": 23940 - }, - { - "epoch": 0.19352127926049822, - "grad_norm": 1124.692626953125, - "learning_rate": 4.8679267615200493e-05, - "loss": 135.2715, - "step": 23950 - }, - { - "epoch": 0.19360208146478236, - "grad_norm": 1962.5836181640625, - "learning_rate": 4.867700493889137e-05, - "loss": 145.7144, - "step": 23960 - }, - { - "epoch": 0.1936828836690665, - "grad_norm": 961.2976684570312, - "learning_rate": 4.8674740378711635e-05, - "loss": 163.8356, - "step": 23970 - }, - { - "epoch": 0.19376368587335063, - "grad_norm": 811.3829956054688, - "learning_rate": 4.867247393484146e-05, - "loss": 143.9164, - "step": 23980 - }, - { - "epoch": 0.19384448807763477, - "grad_norm": 999.9976196289062, - "learning_rate": 4.867020560746117e-05, - "loss": 125.3291, - "step": 23990 - }, - { - "epoch": 0.19392529028191888, - "grad_norm": 1201.41943359375, - "learning_rate": 4.866793539675126e-05, - "loss": 168.0006, - "step": 24000 - }, - { - "epoch": 0.194006092486203, - "grad_norm": 762.6439208984375, - "learning_rate": 4.866566330289235e-05, - "loss": 110.0791, - "step": 24010 - }, - { - "epoch": 0.19408689469048715, - "grad_norm": 3061.843505859375, - "learning_rate": 4.866338932606522e-05, - "loss": 175.8718, - "step": 24020 - }, - { - "epoch": 0.19416769689477129, - "grad_norm": 771.3095092773438, - "learning_rate": 4.86611134664508e-05, - "loss": 135.5185, - "step": 24030 - }, - { - "epoch": 0.19424849909905542, - "grad_norm": 503.3309631347656, - "learning_rate": 4.865883572423017e-05, - "loss": 147.0893, - "step": 24040 - }, - { - "epoch": 0.19432930130333956, - "grad_norm": 1468.425048828125, - "learning_rate": 4.865655609958456e-05, - "loss": 139.8076, - "step": 24050 - }, - { - "epoch": 0.1944101035076237, - "grad_norm": 1596.8797607421875, - "learning_rate": 4.865427459269536e-05, - "loss": 102.8356, - "step": 24060 - }, - { - "epoch": 0.19449090571190783, - "grad_norm": 1223.2177734375, - "learning_rate": 4.865199120374408e-05, - "loss": 114.3106, - "step": 24070 - }, - { - "epoch": 0.19457170791619197, - "grad_norm": 948.3101196289062, - "learning_rate": 4.864970593291241e-05, - "loss": 158.6571, - "step": 24080 - }, - { - "epoch": 0.19465251012047607, - "grad_norm": 1182.761962890625, - "learning_rate": 4.8647418780382175e-05, - "loss": 139.4475, - "step": 24090 - }, - { - "epoch": 0.1947333123247602, - "grad_norm": 1569.6025390625, - "learning_rate": 4.864512974633536e-05, - "loss": 119.7811, - "step": 24100 - }, - { - "epoch": 0.19481411452904435, - "grad_norm": 1558.6204833984375, - "learning_rate": 4.8642838830954085e-05, - "loss": 136.1171, - "step": 24110 - }, - { - "epoch": 0.19489491673332848, - "grad_norm": 721.2457885742188, - "learning_rate": 4.864054603442062e-05, - "loss": 131.3715, - "step": 24120 - }, - { - "epoch": 0.19497571893761262, - "grad_norm": 1141.6844482421875, - "learning_rate": 4.863825135691742e-05, - "loss": 107.4046, - "step": 24130 - }, - { - "epoch": 0.19505652114189675, - "grad_norm": 1239.126708984375, - "learning_rate": 4.8635954798627034e-05, - "loss": 96.5175, - "step": 24140 - }, - { - "epoch": 0.1951373233461809, - "grad_norm": 2757.32958984375, - "learning_rate": 4.8633656359732204e-05, - "loss": 133.6097, - "step": 24150 - }, - { - "epoch": 0.19521812555046503, - "grad_norm": 1982.5146484375, - "learning_rate": 4.86313560404158e-05, - "loss": 160.5869, - "step": 24160 - }, - { - "epoch": 0.19529892775474916, - "grad_norm": 1142.5767822265625, - "learning_rate": 4.8629053840860845e-05, - "loss": 116.7254, - "step": 24170 - }, - { - "epoch": 0.19537972995903327, - "grad_norm": 1000.1806640625, - "learning_rate": 4.8626749761250526e-05, - "loss": 154.7734, - "step": 24180 - }, - { - "epoch": 0.1954605321633174, - "grad_norm": 844.1956787109375, - "learning_rate": 4.862444380176815e-05, - "loss": 148.9101, - "step": 24190 - }, - { - "epoch": 0.19554133436760154, - "grad_norm": 2545.150634765625, - "learning_rate": 4.86221359625972e-05, - "loss": 137.8026, - "step": 24200 - }, - { - "epoch": 0.19562213657188568, - "grad_norm": 1389.2906494140625, - "learning_rate": 4.861982624392132e-05, - "loss": 127.8276, - "step": 24210 - }, - { - "epoch": 0.19570293877616982, - "grad_norm": 711.1080322265625, - "learning_rate": 4.861751464592426e-05, - "loss": 110.9277, - "step": 24220 - }, - { - "epoch": 0.19578374098045395, - "grad_norm": 2045.6517333984375, - "learning_rate": 4.8615201168789934e-05, - "loss": 155.059, - "step": 24230 - }, - { - "epoch": 0.1958645431847381, - "grad_norm": 918.80126953125, - "learning_rate": 4.861288581270245e-05, - "loss": 160.8845, - "step": 24240 - }, - { - "epoch": 0.19594534538902222, - "grad_norm": 868.2738647460938, - "learning_rate": 4.8610568577846e-05, - "loss": 134.9177, - "step": 24250 - }, - { - "epoch": 0.19602614759330633, - "grad_norm": 748.8843383789062, - "learning_rate": 4.860824946440497e-05, - "loss": 125.3715, - "step": 24260 - }, - { - "epoch": 0.19610694979759047, - "grad_norm": 1340.7904052734375, - "learning_rate": 4.860592847256388e-05, - "loss": 122.3053, - "step": 24270 - }, - { - "epoch": 0.1961877520018746, - "grad_norm": 913.2391357421875, - "learning_rate": 4.8603605602507394e-05, - "loss": 165.5113, - "step": 24280 - }, - { - "epoch": 0.19626855420615874, - "grad_norm": 1045.2586669921875, - "learning_rate": 4.8601280854420325e-05, - "loss": 140.0982, - "step": 24290 - }, - { - "epoch": 0.19634935641044288, - "grad_norm": 784.5724487304688, - "learning_rate": 4.859895422848767e-05, - "loss": 176.2165, - "step": 24300 - }, - { - "epoch": 0.196430158614727, - "grad_norm": 1729.450927734375, - "learning_rate": 4.8596625724894527e-05, - "loss": 152.0629, - "step": 24310 - }, - { - "epoch": 0.19651096081901115, - "grad_norm": 837.1514282226562, - "learning_rate": 4.859429534382617e-05, - "loss": 141.557, - "step": 24320 - }, - { - "epoch": 0.19659176302329529, - "grad_norm": 1113.6141357421875, - "learning_rate": 4.8591963085468013e-05, - "loss": 141.8944, - "step": 24330 - }, - { - "epoch": 0.19667256522757942, - "grad_norm": 759.0359497070312, - "learning_rate": 4.858962895000563e-05, - "loss": 78.9747, - "step": 24340 - }, - { - "epoch": 0.19675336743186353, - "grad_norm": 1297.1385498046875, - "learning_rate": 4.8587292937624727e-05, - "loss": 90.0207, - "step": 24350 - }, - { - "epoch": 0.19683416963614767, - "grad_norm": 949.4478759765625, - "learning_rate": 4.858495504851118e-05, - "loss": 157.0674, - "step": 24360 - }, - { - "epoch": 0.1969149718404318, - "grad_norm": 1068.3106689453125, - "learning_rate": 4.8582615282851e-05, - "loss": 140.3363, - "step": 24370 - }, - { - "epoch": 0.19699577404471594, - "grad_norm": 3619.373291015625, - "learning_rate": 4.858027364083035e-05, - "loss": 189.3813, - "step": 24380 - }, - { - "epoch": 0.19707657624900007, - "grad_norm": 969.4085083007812, - "learning_rate": 4.857793012263555e-05, - "loss": 136.826, - "step": 24390 - }, - { - "epoch": 0.1971573784532842, - "grad_norm": 3741.0732421875, - "learning_rate": 4.857558472845305e-05, - "loss": 177.6485, - "step": 24400 - }, - { - "epoch": 0.19723818065756835, - "grad_norm": 1330.3629150390625, - "learning_rate": 4.857323745846948e-05, - "loss": 122.367, - "step": 24410 - }, - { - "epoch": 0.19731898286185248, - "grad_norm": 1439.5693359375, - "learning_rate": 4.857088831287158e-05, - "loss": 153.8259, - "step": 24420 - }, - { - "epoch": 0.1973997850661366, - "grad_norm": 1264.6275634765625, - "learning_rate": 4.856853729184628e-05, - "loss": 71.2506, - "step": 24430 - }, - { - "epoch": 0.19748058727042073, - "grad_norm": 784.4644165039062, - "learning_rate": 4.8566184395580635e-05, - "loss": 114.3421, - "step": 24440 - }, - { - "epoch": 0.19756138947470486, - "grad_norm": 838.190673828125, - "learning_rate": 4.856382962426185e-05, - "loss": 105.8242, - "step": 24450 - }, - { - "epoch": 0.197642191678989, - "grad_norm": 1089.3406982421875, - "learning_rate": 4.856147297807728e-05, - "loss": 106.1163, - "step": 24460 - }, - { - "epoch": 0.19772299388327313, - "grad_norm": 1645.0614013671875, - "learning_rate": 4.855911445721444e-05, - "loss": 123.7125, - "step": 24470 - }, - { - "epoch": 0.19780379608755727, - "grad_norm": 1254.783447265625, - "learning_rate": 4.855675406186099e-05, - "loss": 150.2006, - "step": 24480 - }, - { - "epoch": 0.1978845982918414, - "grad_norm": 1671.3790283203125, - "learning_rate": 4.855439179220472e-05, - "loss": 124.3438, - "step": 24490 - }, - { - "epoch": 0.19796540049612554, - "grad_norm": 1314.81103515625, - "learning_rate": 4.8552027648433604e-05, - "loss": 167.1761, - "step": 24500 - }, - { - "epoch": 0.19804620270040968, - "grad_norm": 1306.246826171875, - "learning_rate": 4.854966163073574e-05, - "loss": 141.6939, - "step": 24510 - }, - { - "epoch": 0.1981270049046938, - "grad_norm": 921.46337890625, - "learning_rate": 4.854729373929937e-05, - "loss": 134.5958, - "step": 24520 - }, - { - "epoch": 0.19820780710897792, - "grad_norm": 1050.4246826171875, - "learning_rate": 4.854492397431291e-05, - "loss": 158.4257, - "step": 24530 - }, - { - "epoch": 0.19828860931326206, - "grad_norm": 1319.991455078125, - "learning_rate": 4.8542552335964906e-05, - "loss": 120.2154, - "step": 24540 - }, - { - "epoch": 0.1983694115175462, - "grad_norm": 762.9476928710938, - "learning_rate": 4.854017882444405e-05, - "loss": 118.839, - "step": 24550 - }, - { - "epoch": 0.19845021372183033, - "grad_norm": 1108.251220703125, - "learning_rate": 4.853780343993921e-05, - "loss": 94.9426, - "step": 24560 - }, - { - "epoch": 0.19853101592611447, - "grad_norm": 1241.6214599609375, - "learning_rate": 4.8535426182639366e-05, - "loss": 122.4142, - "step": 24570 - }, - { - "epoch": 0.1986118181303986, - "grad_norm": 967.4368286132812, - "learning_rate": 4.853304705273368e-05, - "loss": 163.3576, - "step": 24580 - }, - { - "epoch": 0.19869262033468274, - "grad_norm": 4249.72265625, - "learning_rate": 4.8530666050411437e-05, - "loss": 146.6209, - "step": 24590 - }, - { - "epoch": 0.19877342253896688, - "grad_norm": 1544.2470703125, - "learning_rate": 4.852828317586209e-05, - "loss": 119.7304, - "step": 24600 - }, - { - "epoch": 0.19885422474325098, - "grad_norm": 750.6110229492188, - "learning_rate": 4.852589842927524e-05, - "loss": 111.4285, - "step": 24610 - }, - { - "epoch": 0.19893502694753512, - "grad_norm": 1012.6857299804688, - "learning_rate": 4.8523511810840606e-05, - "loss": 208.5437, - "step": 24620 - }, - { - "epoch": 0.19901582915181926, - "grad_norm": 728.74755859375, - "learning_rate": 4.85211233207481e-05, - "loss": 117.2628, - "step": 24630 - }, - { - "epoch": 0.1990966313561034, - "grad_norm": 1547.7210693359375, - "learning_rate": 4.851873295918776e-05, - "loss": 121.4661, - "step": 24640 - }, - { - "epoch": 0.19917743356038753, - "grad_norm": 742.6744995117188, - "learning_rate": 4.851634072634978e-05, - "loss": 88.9048, - "step": 24650 - }, - { - "epoch": 0.19925823576467167, - "grad_norm": 1027.68017578125, - "learning_rate": 4.851394662242449e-05, - "loss": 104.3657, - "step": 24660 - }, - { - "epoch": 0.1993390379689558, - "grad_norm": 1489.5638427734375, - "learning_rate": 4.8511550647602375e-05, - "loss": 144.8906, - "step": 24670 - }, - { - "epoch": 0.19941984017323994, - "grad_norm": 469.2421875, - "learning_rate": 4.850915280207409e-05, - "loss": 120.9474, - "step": 24680 - }, - { - "epoch": 0.19950064237752405, - "grad_norm": 951.28564453125, - "learning_rate": 4.8506753086030405e-05, - "loss": 97.7092, - "step": 24690 - }, - { - "epoch": 0.19958144458180818, - "grad_norm": 1149.8328857421875, - "learning_rate": 4.850435149966226e-05, - "loss": 180.1586, - "step": 24700 - }, - { - "epoch": 0.19966224678609232, - "grad_norm": 1594.200927734375, - "learning_rate": 4.850194804316073e-05, - "loss": 120.0714, - "step": 24710 - }, - { - "epoch": 0.19974304899037645, - "grad_norm": 673.1038208007812, - "learning_rate": 4.8499542716717064e-05, - "loss": 191.6669, - "step": 24720 - }, - { - "epoch": 0.1998238511946606, - "grad_norm": 905.4772338867188, - "learning_rate": 4.849713552052262e-05, - "loss": 114.5121, - "step": 24730 - }, - { - "epoch": 0.19990465339894473, - "grad_norm": 987.4473876953125, - "learning_rate": 4.8494726454768946e-05, - "loss": 98.6421, - "step": 24740 - }, - { - "epoch": 0.19998545560322886, - "grad_norm": 884.5498046875, - "learning_rate": 4.849231551964771e-05, - "loss": 100.2495, - "step": 24750 - }, - { - "epoch": 0.200066257807513, - "grad_norm": 656.5623779296875, - "learning_rate": 4.848990271535075e-05, - "loss": 148.5826, - "step": 24760 - }, - { - "epoch": 0.20014706001179713, - "grad_norm": 870.3948364257812, - "learning_rate": 4.8487488042070026e-05, - "loss": 104.2613, - "step": 24770 - }, - { - "epoch": 0.20022786221608124, - "grad_norm": 2163.8056640625, - "learning_rate": 4.848507149999768e-05, - "loss": 161.374, - "step": 24780 - }, - { - "epoch": 0.20030866442036538, - "grad_norm": 638.981689453125, - "learning_rate": 4.848265308932598e-05, - "loss": 122.1095, - "step": 24790 - }, - { - "epoch": 0.20038946662464951, - "grad_norm": 1972.9600830078125, - "learning_rate": 4.848023281024734e-05, - "loss": 130.3094, - "step": 24800 - }, - { - "epoch": 0.20047026882893365, - "grad_norm": 1553.595458984375, - "learning_rate": 4.8477810662954323e-05, - "loss": 150.5294, - "step": 24810 - }, - { - "epoch": 0.2005510710332178, - "grad_norm": 735.73291015625, - "learning_rate": 4.847538664763968e-05, - "loss": 119.3605, - "step": 24820 - }, - { - "epoch": 0.20063187323750192, - "grad_norm": 1192.310302734375, - "learning_rate": 4.8472960764496246e-05, - "loss": 134.4257, - "step": 24830 - }, - { - "epoch": 0.20071267544178606, - "grad_norm": 1546.8162841796875, - "learning_rate": 4.8470533013717055e-05, - "loss": 131.5309, - "step": 24840 - }, - { - "epoch": 0.2007934776460702, - "grad_norm": 1164.3795166015625, - "learning_rate": 4.846810339549527e-05, - "loss": 125.8069, - "step": 24850 - }, - { - "epoch": 0.20087427985035433, - "grad_norm": 776.0889282226562, - "learning_rate": 4.84656719100242e-05, - "loss": 133.5487, - "step": 24860 - }, - { - "epoch": 0.20095508205463844, - "grad_norm": 1108.123046875, - "learning_rate": 4.8463238557497304e-05, - "loss": 167.98, - "step": 24870 - }, - { - "epoch": 0.20103588425892258, - "grad_norm": 2984.786865234375, - "learning_rate": 4.8460803338108194e-05, - "loss": 158.5523, - "step": 24880 - }, - { - "epoch": 0.2011166864632067, - "grad_norm": 955.65234375, - "learning_rate": 4.845836625205065e-05, - "loss": 170.4189, - "step": 24890 - }, - { - "epoch": 0.20119748866749085, - "grad_norm": 1458.0986328125, - "learning_rate": 4.8455927299518547e-05, - "loss": 150.8813, - "step": 24900 - }, - { - "epoch": 0.20127829087177498, - "grad_norm": 1403.1298828125, - "learning_rate": 4.8453486480705964e-05, - "loss": 168.0573, - "step": 24910 - }, - { - "epoch": 0.20135909307605912, - "grad_norm": 667.5961303710938, - "learning_rate": 4.84510437958071e-05, - "loss": 94.6423, - "step": 24920 - }, - { - "epoch": 0.20143989528034326, - "grad_norm": 520.2862548828125, - "learning_rate": 4.84485992450163e-05, - "loss": 150.0534, - "step": 24930 - }, - { - "epoch": 0.2015206974846274, - "grad_norm": 879.5791015625, - "learning_rate": 4.844615282852808e-05, - "loss": 84.0358, - "step": 24940 - }, - { - "epoch": 0.2016014996889115, - "grad_norm": 1120.4345703125, - "learning_rate": 4.8443704546537075e-05, - "loss": 103.1282, - "step": 24950 - }, - { - "epoch": 0.20168230189319564, - "grad_norm": 852.24609375, - "learning_rate": 4.84412543992381e-05, - "loss": 150.6174, - "step": 24960 - }, - { - "epoch": 0.20176310409747977, - "grad_norm": 1034.11962890625, - "learning_rate": 4.843880238682609e-05, - "loss": 139.9309, - "step": 24970 - }, - { - "epoch": 0.2018439063017639, - "grad_norm": 1116.184326171875, - "learning_rate": 4.8436348509496146e-05, - "loss": 151.4813, - "step": 24980 - }, - { - "epoch": 0.20192470850604805, - "grad_norm": 903.5824584960938, - "learning_rate": 4.84338927674435e-05, - "loss": 123.8643, - "step": 24990 - }, - { - "epoch": 0.20200551071033218, - "grad_norm": 736.4754028320312, - "learning_rate": 4.843143516086356e-05, - "loss": 167.6289, - "step": 25000 - }, - { - "epoch": 0.20208631291461632, - "grad_norm": 568.1561279296875, - "learning_rate": 4.842897568995186e-05, - "loss": 142.7303, - "step": 25010 - }, - { - "epoch": 0.20216711511890045, - "grad_norm": 1412.805419921875, - "learning_rate": 4.842651435490409e-05, - "loss": 168.1875, - "step": 25020 - }, - { - "epoch": 0.2022479173231846, - "grad_norm": 878.0985107421875, - "learning_rate": 4.842405115591608e-05, - "loss": 114.086, - "step": 25030 - }, - { - "epoch": 0.2023287195274687, - "grad_norm": 1357.9951171875, - "learning_rate": 4.8421586093183835e-05, - "loss": 124.526, - "step": 25040 - }, - { - "epoch": 0.20240952173175283, - "grad_norm": 1052.0174560546875, - "learning_rate": 4.841911916690346e-05, - "loss": 111.2427, - "step": 25050 - }, - { - "epoch": 0.20249032393603697, - "grad_norm": 840.6262817382812, - "learning_rate": 4.841665037727126e-05, - "loss": 159.0765, - "step": 25060 - }, - { - "epoch": 0.2025711261403211, - "grad_norm": 1067.9989013671875, - "learning_rate": 4.8414179724483655e-05, - "loss": 174.7712, - "step": 25070 - }, - { - "epoch": 0.20265192834460524, - "grad_norm": 1314.8035888671875, - "learning_rate": 4.841170720873723e-05, - "loss": 148.8405, - "step": 25080 - }, - { - "epoch": 0.20273273054888938, - "grad_norm": 948.6197509765625, - "learning_rate": 4.84092328302287e-05, - "loss": 120.2858, - "step": 25090 - }, - { - "epoch": 0.20281353275317351, - "grad_norm": 798.6738891601562, - "learning_rate": 4.840675658915496e-05, - "loss": 121.5462, - "step": 25100 - }, - { - "epoch": 0.20289433495745765, - "grad_norm": 1691.7071533203125, - "learning_rate": 4.8404278485713005e-05, - "loss": 146.666, - "step": 25110 - }, - { - "epoch": 0.20297513716174176, - "grad_norm": 867.3951416015625, - "learning_rate": 4.8401798520100025e-05, - "loss": 188.3305, - "step": 25120 - }, - { - "epoch": 0.2030559393660259, - "grad_norm": 785.4647216796875, - "learning_rate": 4.839931669251335e-05, - "loss": 102.5276, - "step": 25130 - }, - { - "epoch": 0.20313674157031003, - "grad_norm": 914.7631225585938, - "learning_rate": 4.839683300315042e-05, - "loss": 130.4049, - "step": 25140 - }, - { - "epoch": 0.20321754377459417, - "grad_norm": 1125.4306640625, - "learning_rate": 4.839434745220887e-05, - "loss": 96.8723, - "step": 25150 - }, - { - "epoch": 0.2032983459788783, - "grad_norm": 1059.5665283203125, - "learning_rate": 4.839186003988646e-05, - "loss": 142.2179, - "step": 25160 - }, - { - "epoch": 0.20337914818316244, - "grad_norm": 584.8876342773438, - "learning_rate": 4.8389370766381105e-05, - "loss": 117.9172, - "step": 25170 - }, - { - "epoch": 0.20345995038744658, - "grad_norm": 692.2371826171875, - "learning_rate": 4.838687963189085e-05, - "loss": 111.0048, - "step": 25180 - }, - { - "epoch": 0.2035407525917307, - "grad_norm": 874.5255737304688, - "learning_rate": 4.838438663661392e-05, - "loss": 122.8301, - "step": 25190 - }, - { - "epoch": 0.20362155479601485, - "grad_norm": 1296.55078125, - "learning_rate": 4.838189178074867e-05, - "loss": 101.8946, - "step": 25200 - }, - { - "epoch": 0.20370235700029896, - "grad_norm": 1085.98779296875, - "learning_rate": 4.8379395064493596e-05, - "loss": 119.0873, - "step": 25210 - }, - { - "epoch": 0.2037831592045831, - "grad_norm": 1383.353515625, - "learning_rate": 4.837689648804734e-05, - "loss": 131.5083, - "step": 25220 - }, - { - "epoch": 0.20386396140886723, - "grad_norm": 871.8002319335938, - "learning_rate": 4.837439605160873e-05, - "loss": 121.8667, - "step": 25230 - }, - { - "epoch": 0.20394476361315136, - "grad_norm": 1358.469482421875, - "learning_rate": 4.83718937553767e-05, - "loss": 194.0584, - "step": 25240 - }, - { - "epoch": 0.2040255658174355, - "grad_norm": 1498.344970703125, - "learning_rate": 4.836938959955034e-05, - "loss": 121.8171, - "step": 25250 - }, - { - "epoch": 0.20410636802171964, - "grad_norm": 515.283447265625, - "learning_rate": 4.836688358432891e-05, - "loss": 103.4014, - "step": 25260 - }, - { - "epoch": 0.20418717022600377, - "grad_norm": 856.0272827148438, - "learning_rate": 4.836437570991179e-05, - "loss": 125.9105, - "step": 25270 - }, - { - "epoch": 0.2042679724302879, - "grad_norm": 702.4708862304688, - "learning_rate": 4.8361865976498524e-05, - "loss": 135.5884, - "step": 25280 - }, - { - "epoch": 0.20434877463457204, - "grad_norm": 990.8687744140625, - "learning_rate": 4.835935438428879e-05, - "loss": 104.5906, - "step": 25290 - }, - { - "epoch": 0.20442957683885615, - "grad_norm": 1151.5469970703125, - "learning_rate": 4.835684093348244e-05, - "loss": 108.9051, - "step": 25300 - }, - { - "epoch": 0.2045103790431403, - "grad_norm": 1168.942138671875, - "learning_rate": 4.8354325624279444e-05, - "loss": 137.6272, - "step": 25310 - }, - { - "epoch": 0.20459118124742443, - "grad_norm": 998.6651611328125, - "learning_rate": 4.8351808456879945e-05, - "loss": 126.3484, - "step": 25320 - }, - { - "epoch": 0.20467198345170856, - "grad_norm": 559.7073364257812, - "learning_rate": 4.834928943148421e-05, - "loss": 121.3656, - "step": 25330 - }, - { - "epoch": 0.2047527856559927, - "grad_norm": 1198.7279052734375, - "learning_rate": 4.834676854829269e-05, - "loss": 103.3286, - "step": 25340 - }, - { - "epoch": 0.20483358786027683, - "grad_norm": 2654.14892578125, - "learning_rate": 4.8344245807505925e-05, - "loss": 165.9907, - "step": 25350 - }, - { - "epoch": 0.20491439006456097, - "grad_norm": 1472.6622314453125, - "learning_rate": 4.834172120932467e-05, - "loss": 134.8079, - "step": 25360 - }, - { - "epoch": 0.2049951922688451, - "grad_norm": 961.0308837890625, - "learning_rate": 4.8339194753949776e-05, - "loss": 99.5531, - "step": 25370 - }, - { - "epoch": 0.20507599447312921, - "grad_norm": 1237.126953125, - "learning_rate": 4.8336666441582265e-05, - "loss": 121.0663, - "step": 25380 - }, - { - "epoch": 0.20515679667741335, - "grad_norm": 1922.439697265625, - "learning_rate": 4.833413627242331e-05, - "loss": 159.171, - "step": 25390 - }, - { - "epoch": 0.20523759888169749, - "grad_norm": 745.6070556640625, - "learning_rate": 4.833160424667423e-05, - "loss": 113.4854, - "step": 25400 - }, - { - "epoch": 0.20531840108598162, - "grad_norm": 786.2567749023438, - "learning_rate": 4.832907036453647e-05, - "loss": 117.437, - "step": 25410 - }, - { - "epoch": 0.20539920329026576, - "grad_norm": 1301.698974609375, - "learning_rate": 4.8326534626211646e-05, - "loss": 138.3389, - "step": 25420 - }, - { - "epoch": 0.2054800054945499, - "grad_norm": 867.2122802734375, - "learning_rate": 4.8323997031901516e-05, - "loss": 157.0264, - "step": 25430 - }, - { - "epoch": 0.20556080769883403, - "grad_norm": 1433.8521728515625, - "learning_rate": 4.832145758180798e-05, - "loss": 142.7119, - "step": 25440 - }, - { - "epoch": 0.20564160990311817, - "grad_norm": 1219.7191162109375, - "learning_rate": 4.8318916276133106e-05, - "loss": 145.5266, - "step": 25450 - }, - { - "epoch": 0.2057224121074023, - "grad_norm": 878.6344604492188, - "learning_rate": 4.831637311507908e-05, - "loss": 146.1202, - "step": 25460 - }, - { - "epoch": 0.2058032143116864, - "grad_norm": 2418.31103515625, - "learning_rate": 4.831382809884825e-05, - "loss": 179.8549, - "step": 25470 - }, - { - "epoch": 0.20588401651597055, - "grad_norm": 2074.760986328125, - "learning_rate": 4.8311281227643114e-05, - "loss": 168.8855, - "step": 25480 - }, - { - "epoch": 0.20596481872025468, - "grad_norm": 1183.781494140625, - "learning_rate": 4.830873250166632e-05, - "loss": 107.9291, - "step": 25490 - }, - { - "epoch": 0.20604562092453882, - "grad_norm": 800.1903686523438, - "learning_rate": 4.8306181921120645e-05, - "loss": 128.5428, - "step": 25500 - }, - { - "epoch": 0.20612642312882296, - "grad_norm": 1112.31787109375, - "learning_rate": 4.8303629486209043e-05, - "loss": 128.484, - "step": 25510 - }, - { - "epoch": 0.2062072253331071, - "grad_norm": 1296.8709716796875, - "learning_rate": 4.8301075197134595e-05, - "loss": 106.8793, - "step": 25520 - }, - { - "epoch": 0.20628802753739123, - "grad_norm": 1166.2271728515625, - "learning_rate": 4.829851905410052e-05, - "loss": 165.3025, - "step": 25530 - }, - { - "epoch": 0.20636882974167536, - "grad_norm": 863.3107299804688, - "learning_rate": 4.8295961057310215e-05, - "loss": 139.6899, - "step": 25540 - }, - { - "epoch": 0.20644963194595947, - "grad_norm": 1125.9315185546875, - "learning_rate": 4.8293401206967205e-05, - "loss": 191.4613, - "step": 25550 - }, - { - "epoch": 0.2065304341502436, - "grad_norm": 828.1910400390625, - "learning_rate": 4.829083950327516e-05, - "loss": 108.9545, - "step": 25560 - }, - { - "epoch": 0.20661123635452774, - "grad_norm": 1076.5262451171875, - "learning_rate": 4.82882759464379e-05, - "loss": 142.2093, - "step": 25570 - }, - { - "epoch": 0.20669203855881188, - "grad_norm": 710.749267578125, - "learning_rate": 4.828571053665941e-05, - "loss": 134.9571, - "step": 25580 - }, - { - "epoch": 0.20677284076309602, - "grad_norm": 857.0191040039062, - "learning_rate": 4.82831432741438e-05, - "loss": 144.0234, - "step": 25590 - }, - { - "epoch": 0.20685364296738015, - "grad_norm": 854.3779296875, - "learning_rate": 4.8280574159095334e-05, - "loss": 138.9507, - "step": 25600 - }, - { - "epoch": 0.2069344451716643, - "grad_norm": 969.385009765625, - "learning_rate": 4.827800319171842e-05, - "loss": 116.7419, - "step": 25610 - }, - { - "epoch": 0.20701524737594842, - "grad_norm": 797.749267578125, - "learning_rate": 4.8275430372217635e-05, - "loss": 149.8068, - "step": 25620 - }, - { - "epoch": 0.20709604958023256, - "grad_norm": 974.6106567382812, - "learning_rate": 4.827285570079767e-05, - "loss": 117.4761, - "step": 25630 - }, - { - "epoch": 0.20717685178451667, - "grad_norm": 1810.8978271484375, - "learning_rate": 4.827027917766338e-05, - "loss": 127.0933, - "step": 25640 - }, - { - "epoch": 0.2072576539888008, - "grad_norm": 926.6117553710938, - "learning_rate": 4.826770080301978e-05, - "loss": 114.4786, - "step": 25650 - }, - { - "epoch": 0.20733845619308494, - "grad_norm": 1579.2696533203125, - "learning_rate": 4.826512057707201e-05, - "loss": 117.5836, - "step": 25660 - }, - { - "epoch": 0.20741925839736908, - "grad_norm": 912.150634765625, - "learning_rate": 4.826253850002536e-05, - "loss": 121.9128, - "step": 25670 - }, - { - "epoch": 0.2075000606016532, - "grad_norm": 1012.4501342773438, - "learning_rate": 4.825995457208528e-05, - "loss": 122.0198, - "step": 25680 - }, - { - "epoch": 0.20758086280593735, - "grad_norm": 1985.3377685546875, - "learning_rate": 4.825736879345738e-05, - "loss": 130.8035, - "step": 25690 - }, - { - "epoch": 0.20766166501022149, - "grad_norm": 570.6342163085938, - "learning_rate": 4.8254781164347376e-05, - "loss": 115.7723, - "step": 25700 - }, - { - "epoch": 0.20774246721450562, - "grad_norm": 1562.3453369140625, - "learning_rate": 4.8252191684961156e-05, - "loss": 157.302, - "step": 25710 - }, - { - "epoch": 0.20782326941878976, - "grad_norm": 1015.2994384765625, - "learning_rate": 4.8249600355504756e-05, - "loss": 102.85, - "step": 25720 - }, - { - "epoch": 0.20790407162307387, - "grad_norm": 1037.9727783203125, - "learning_rate": 4.824700717618436e-05, - "loss": 115.7656, - "step": 25730 - }, - { - "epoch": 0.207984873827358, - "grad_norm": 1207.3101806640625, - "learning_rate": 4.8244412147206284e-05, - "loss": 147.2329, - "step": 25740 - }, - { - "epoch": 0.20806567603164214, - "grad_norm": 657.489501953125, - "learning_rate": 4.824181526877702e-05, - "loss": 134.383, - "step": 25750 - }, - { - "epoch": 0.20814647823592627, - "grad_norm": 1413.3948974609375, - "learning_rate": 4.823921654110318e-05, - "loss": 135.7521, - "step": 25760 - }, - { - "epoch": 0.2082272804402104, - "grad_norm": 775.7398681640625, - "learning_rate": 4.823661596439153e-05, - "loss": 130.5173, - "step": 25770 - }, - { - "epoch": 0.20830808264449455, - "grad_norm": 828.6028442382812, - "learning_rate": 4.8234013538848996e-05, - "loss": 104.7371, - "step": 25780 - }, - { - "epoch": 0.20838888484877868, - "grad_norm": 1575.2137451171875, - "learning_rate": 4.823140926468262e-05, - "loss": 138.3073, - "step": 25790 - }, - { - "epoch": 0.20846968705306282, - "grad_norm": 1031.439453125, - "learning_rate": 4.8228803142099646e-05, - "loss": 98.9179, - "step": 25800 - }, - { - "epoch": 0.20855048925734693, - "grad_norm": 618.6032104492188, - "learning_rate": 4.8226195171307395e-05, - "loss": 133.4145, - "step": 25810 - }, - { - "epoch": 0.20863129146163106, - "grad_norm": 968.1226806640625, - "learning_rate": 4.8223585352513404e-05, - "loss": 101.0214, - "step": 25820 - }, - { - "epoch": 0.2087120936659152, - "grad_norm": 912.6937866210938, - "learning_rate": 4.822097368592529e-05, - "loss": 112.554, - "step": 25830 - }, - { - "epoch": 0.20879289587019934, - "grad_norm": 1080.557373046875, - "learning_rate": 4.821836017175088e-05, - "loss": 108.6954, - "step": 25840 - }, - { - "epoch": 0.20887369807448347, - "grad_norm": 877.4397583007812, - "learning_rate": 4.821574481019811e-05, - "loss": 139.9172, - "step": 25850 - }, - { - "epoch": 0.2089545002787676, - "grad_norm": 589.3485717773438, - "learning_rate": 4.8213127601475075e-05, - "loss": 103.6113, - "step": 25860 - }, - { - "epoch": 0.20903530248305174, - "grad_norm": 579.3892822265625, - "learning_rate": 4.821050854579e-05, - "loss": 145.4385, - "step": 25870 - }, - { - "epoch": 0.20911610468733588, - "grad_norm": 1156.13623046875, - "learning_rate": 4.8207887643351286e-05, - "loss": 153.2932, - "step": 25880 - }, - { - "epoch": 0.20919690689162002, - "grad_norm": 1016.3140869140625, - "learning_rate": 4.8205264894367465e-05, - "loss": 183.6584, - "step": 25890 - }, - { - "epoch": 0.20927770909590412, - "grad_norm": 918.5465698242188, - "learning_rate": 4.8202640299047206e-05, - "loss": 103.016, - "step": 25900 - }, - { - "epoch": 0.20935851130018826, - "grad_norm": 912.2138061523438, - "learning_rate": 4.820001385759936e-05, - "loss": 116.7937, - "step": 25910 - }, - { - "epoch": 0.2094393135044724, - "grad_norm": 955.6724243164062, - "learning_rate": 4.8197385570232866e-05, - "loss": 129.2811, - "step": 25920 - }, - { - "epoch": 0.20952011570875653, - "grad_norm": 774.6975708007812, - "learning_rate": 4.8194755437156874e-05, - "loss": 167.8948, - "step": 25930 - }, - { - "epoch": 0.20960091791304067, - "grad_norm": 638.8875732421875, - "learning_rate": 4.819212345858064e-05, - "loss": 129.2132, - "step": 25940 - }, - { - "epoch": 0.2096817201173248, - "grad_norm": 552.298583984375, - "learning_rate": 4.818948963471358e-05, - "loss": 97.6886, - "step": 25950 - }, - { - "epoch": 0.20976252232160894, - "grad_norm": 969.4928588867188, - "learning_rate": 4.818685396576526e-05, - "loss": 183.411, - "step": 25960 - }, - { - "epoch": 0.20984332452589308, - "grad_norm": 883.5795288085938, - "learning_rate": 4.8184216451945375e-05, - "loss": 128.6071, - "step": 25970 - }, - { - "epoch": 0.2099241267301772, - "grad_norm": 892.232177734375, - "learning_rate": 4.8181577093463794e-05, - "loss": 121.2464, - "step": 25980 - }, - { - "epoch": 0.21000492893446132, - "grad_norm": 1593.98095703125, - "learning_rate": 4.8178935890530504e-05, - "loss": 115.9629, - "step": 25990 - }, - { - "epoch": 0.21008573113874546, - "grad_norm": 1703.683349609375, - "learning_rate": 4.817629284335567e-05, - "loss": 143.0165, - "step": 26000 - }, - { - "epoch": 0.2101665333430296, - "grad_norm": 761.6806030273438, - "learning_rate": 4.817364795214958e-05, - "loss": 116.2317, - "step": 26010 - }, - { - "epoch": 0.21024733554731373, - "grad_norm": 941.1605224609375, - "learning_rate": 4.817100121712267e-05, - "loss": 115.0284, - "step": 26020 - }, - { - "epoch": 0.21032813775159787, - "grad_norm": 1579.6839599609375, - "learning_rate": 4.816835263848554e-05, - "loss": 111.2469, - "step": 26030 - }, - { - "epoch": 0.210408939955882, - "grad_norm": 665.10693359375, - "learning_rate": 4.816570221644891e-05, - "loss": 123.9348, - "step": 26040 - }, - { - "epoch": 0.21048974216016614, - "grad_norm": 2479.761474609375, - "learning_rate": 4.8163049951223685e-05, - "loss": 148.6897, - "step": 26050 - }, - { - "epoch": 0.21057054436445027, - "grad_norm": 1986.2623291015625, - "learning_rate": 4.8160395843020864e-05, - "loss": 123.8568, - "step": 26060 - }, - { - "epoch": 0.21065134656873438, - "grad_norm": 726.0928955078125, - "learning_rate": 4.815773989205165e-05, - "loss": 158.7447, - "step": 26070 - }, - { - "epoch": 0.21073214877301852, - "grad_norm": 857.0536499023438, - "learning_rate": 4.815508209852735e-05, - "loss": 147.0788, - "step": 26080 - }, - { - "epoch": 0.21081295097730265, - "grad_norm": 2886.379638671875, - "learning_rate": 4.815242246265943e-05, - "loss": 149.7661, - "step": 26090 - }, - { - "epoch": 0.2108937531815868, - "grad_norm": 1591.49462890625, - "learning_rate": 4.8149760984659506e-05, - "loss": 152.9901, - "step": 26100 - }, - { - "epoch": 0.21097455538587093, - "grad_norm": 1228.0751953125, - "learning_rate": 4.814709766473935e-05, - "loss": 146.1103, - "step": 26110 - }, - { - "epoch": 0.21105535759015506, - "grad_norm": 692.44580078125, - "learning_rate": 4.814443250311087e-05, - "loss": 129.157, - "step": 26120 - }, - { - "epoch": 0.2111361597944392, - "grad_norm": 764.2545776367188, - "learning_rate": 4.8141765499986105e-05, - "loss": 148.7673, - "step": 26130 - }, - { - "epoch": 0.21121696199872333, - "grad_norm": 1205.29736328125, - "learning_rate": 4.813909665557727e-05, - "loss": 114.3868, - "step": 26140 - }, - { - "epoch": 0.21129776420300747, - "grad_norm": 982.1507568359375, - "learning_rate": 4.813642597009671e-05, - "loss": 167.1367, - "step": 26150 - }, - { - "epoch": 0.21137856640729158, - "grad_norm": 1066.74951171875, - "learning_rate": 4.813375344375691e-05, - "loss": 135.8896, - "step": 26160 - }, - { - "epoch": 0.21145936861157572, - "grad_norm": 916.42626953125, - "learning_rate": 4.813107907677052e-05, - "loss": 118.465, - "step": 26170 - }, - { - "epoch": 0.21154017081585985, - "grad_norm": 1355.17919921875, - "learning_rate": 4.812840286935033e-05, - "loss": 152.8149, - "step": 26180 - }, - { - "epoch": 0.211620973020144, - "grad_norm": 1254.8470458984375, - "learning_rate": 4.812572482170926e-05, - "loss": 186.1999, - "step": 26190 - }, - { - "epoch": 0.21170177522442812, - "grad_norm": 959.7611083984375, - "learning_rate": 4.8123044934060406e-05, - "loss": 117.468, - "step": 26200 - }, - { - "epoch": 0.21178257742871226, - "grad_norm": 780.098388671875, - "learning_rate": 4.8120363206616984e-05, - "loss": 148.4075, - "step": 26210 - }, - { - "epoch": 0.2118633796329964, - "grad_norm": 1312.219482421875, - "learning_rate": 4.8117679639592374e-05, - "loss": 167.4585, - "step": 26220 - }, - { - "epoch": 0.21194418183728053, - "grad_norm": 1396.0057373046875, - "learning_rate": 4.811499423320008e-05, - "loss": 108.1143, - "step": 26230 - }, - { - "epoch": 0.21202498404156464, - "grad_norm": 938.4962768554688, - "learning_rate": 4.811230698765379e-05, - "loss": 98.6509, - "step": 26240 - }, - { - "epoch": 0.21210578624584878, - "grad_norm": 908.1611328125, - "learning_rate": 4.81096179031673e-05, - "loss": 120.716, - "step": 26250 - }, - { - "epoch": 0.2121865884501329, - "grad_norm": 831.1165771484375, - "learning_rate": 4.810692697995458e-05, - "loss": 146.8217, - "step": 26260 - }, - { - "epoch": 0.21226739065441705, - "grad_norm": 1015.498291015625, - "learning_rate": 4.8104234218229717e-05, - "loss": 105.7738, - "step": 26270 - }, - { - "epoch": 0.21234819285870118, - "grad_norm": 1511.4072265625, - "learning_rate": 4.810153961820697e-05, - "loss": 125.2595, - "step": 26280 - }, - { - "epoch": 0.21242899506298532, - "grad_norm": 3980.236572265625, - "learning_rate": 4.8098843180100745e-05, - "loss": 146.2137, - "step": 26290 - }, - { - "epoch": 0.21250979726726946, - "grad_norm": 1262.1708984375, - "learning_rate": 4.809614490412557e-05, - "loss": 136.2154, - "step": 26300 - }, - { - "epoch": 0.2125905994715536, - "grad_norm": 810.4671020507812, - "learning_rate": 4.8093444790496144e-05, - "loss": 130.6367, - "step": 26310 - }, - { - "epoch": 0.21267140167583773, - "grad_norm": 1622.8140869140625, - "learning_rate": 4.809074283942731e-05, - "loss": 171.1182, - "step": 26320 - }, - { - "epoch": 0.21275220388012184, - "grad_norm": 1370.1827392578125, - "learning_rate": 4.808803905113403e-05, - "loss": 132.9687, - "step": 26330 - }, - { - "epoch": 0.21283300608440597, - "grad_norm": 907.0731201171875, - "learning_rate": 4.8085333425831444e-05, - "loss": 185.5648, - "step": 26340 - }, - { - "epoch": 0.2129138082886901, - "grad_norm": 1234.0059814453125, - "learning_rate": 4.808262596373483e-05, - "loss": 145.0553, - "step": 26350 - }, - { - "epoch": 0.21299461049297425, - "grad_norm": 856.7258911132812, - "learning_rate": 4.80799166650596e-05, - "loss": 106.4131, - "step": 26360 - }, - { - "epoch": 0.21307541269725838, - "grad_norm": 1424.252197265625, - "learning_rate": 4.807720553002132e-05, - "loss": 130.5484, - "step": 26370 - }, - { - "epoch": 0.21315621490154252, - "grad_norm": 1079.7247314453125, - "learning_rate": 4.8074492558835706e-05, - "loss": 141.4675, - "step": 26380 - }, - { - "epoch": 0.21323701710582665, - "grad_norm": 1344.104736328125, - "learning_rate": 4.807177775171861e-05, - "loss": 119.4837, - "step": 26390 - }, - { - "epoch": 0.2133178193101108, - "grad_norm": 1134.1732177734375, - "learning_rate": 4.806906110888606e-05, - "loss": 156.9652, - "step": 26400 - }, - { - "epoch": 0.21339862151439493, - "grad_norm": 1158.4144287109375, - "learning_rate": 4.806634263055418e-05, - "loss": 111.5476, - "step": 26410 - }, - { - "epoch": 0.21347942371867903, - "grad_norm": 2834.169921875, - "learning_rate": 4.8063622316939283e-05, - "loss": 132.3988, - "step": 26420 - }, - { - "epoch": 0.21356022592296317, - "grad_norm": 690.62939453125, - "learning_rate": 4.80609001682578e-05, - "loss": 105.5783, - "step": 26430 - }, - { - "epoch": 0.2136410281272473, - "grad_norm": 1018.9848022460938, - "learning_rate": 4.805817618472633e-05, - "loss": 114.9016, - "step": 26440 - }, - { - "epoch": 0.21372183033153144, - "grad_norm": 582.7994384765625, - "learning_rate": 4.80554503665616e-05, - "loss": 131.1239, - "step": 26450 - }, - { - "epoch": 0.21380263253581558, - "grad_norm": 914.8876953125, - "learning_rate": 4.8052722713980505e-05, - "loss": 147.5573, - "step": 26460 - }, - { - "epoch": 0.21388343474009971, - "grad_norm": 635.0199584960938, - "learning_rate": 4.804999322720005e-05, - "loss": 120.6101, - "step": 26470 - }, - { - "epoch": 0.21396423694438385, - "grad_norm": 1247.41650390625, - "learning_rate": 4.8047261906437424e-05, - "loss": 138.249, - "step": 26480 - }, - { - "epoch": 0.214045039148668, - "grad_norm": 986.372314453125, - "learning_rate": 4.804452875190994e-05, - "loss": 129.5616, - "step": 26490 - }, - { - "epoch": 0.2141258413529521, - "grad_norm": 1141.0712890625, - "learning_rate": 4.8041793763835066e-05, - "loss": 136.7256, - "step": 26500 - }, - { - "epoch": 0.21420664355723623, - "grad_norm": 1876.8280029296875, - "learning_rate": 4.8039056942430404e-05, - "loss": 127.1374, - "step": 26510 - }, - { - "epoch": 0.21428744576152037, - "grad_norm": 694.3611450195312, - "learning_rate": 4.803631828791372e-05, - "loss": 107.3327, - "step": 26520 - }, - { - "epoch": 0.2143682479658045, - "grad_norm": 1999.895263671875, - "learning_rate": 4.8033577800502904e-05, - "loss": 141.0124, - "step": 26530 - }, - { - "epoch": 0.21444905017008864, - "grad_norm": 949.3560791015625, - "learning_rate": 4.8030835480416024e-05, - "loss": 149.314, - "step": 26540 - }, - { - "epoch": 0.21452985237437278, - "grad_norm": 871.9557495117188, - "learning_rate": 4.802809132787125e-05, - "loss": 110.3452, - "step": 26550 - }, - { - "epoch": 0.2146106545786569, - "grad_norm": 1526.6343994140625, - "learning_rate": 4.802534534308695e-05, - "loss": 155.3074, - "step": 26560 - }, - { - "epoch": 0.21469145678294105, - "grad_norm": 1495.3609619140625, - "learning_rate": 4.8022597526281575e-05, - "loss": 118.711, - "step": 26570 - }, - { - "epoch": 0.21477225898722518, - "grad_norm": 950.646484375, - "learning_rate": 4.801984787767379e-05, - "loss": 174.5051, - "step": 26580 - }, - { - "epoch": 0.2148530611915093, - "grad_norm": 1232.76220703125, - "learning_rate": 4.801709639748235e-05, - "loss": 105.6188, - "step": 26590 - }, - { - "epoch": 0.21493386339579343, - "grad_norm": 1269.230712890625, - "learning_rate": 4.801434308592618e-05, - "loss": 143.9606, - "step": 26600 - }, - { - "epoch": 0.21501466560007756, - "grad_norm": 1002.9767456054688, - "learning_rate": 4.801158794322436e-05, - "loss": 119.435, - "step": 26610 - }, - { - "epoch": 0.2150954678043617, - "grad_norm": 702.4388427734375, - "learning_rate": 4.800883096959608e-05, - "loss": 123.8373, - "step": 26620 - }, - { - "epoch": 0.21517627000864584, - "grad_norm": 1378.23583984375, - "learning_rate": 4.8006072165260734e-05, - "loss": 125.6581, - "step": 26630 - }, - { - "epoch": 0.21525707221292997, - "grad_norm": 537.1288452148438, - "learning_rate": 4.800331153043781e-05, - "loss": 98.1134, - "step": 26640 - }, - { - "epoch": 0.2153378744172141, - "grad_norm": 870.8504638671875, - "learning_rate": 4.800054906534694e-05, - "loss": 127.3877, - "step": 26650 - }, - { - "epoch": 0.21541867662149825, - "grad_norm": 1054.9300537109375, - "learning_rate": 4.799778477020795e-05, - "loss": 169.7709, - "step": 26660 - }, - { - "epoch": 0.21549947882578235, - "grad_norm": 806.7195434570312, - "learning_rate": 4.799501864524078e-05, - "loss": 122.2219, - "step": 26670 - }, - { - "epoch": 0.2155802810300665, - "grad_norm": 1314.31103515625, - "learning_rate": 4.79922506906655e-05, - "loss": 135.7121, - "step": 26680 - }, - { - "epoch": 0.21566108323435063, - "grad_norm": 958.393798828125, - "learning_rate": 4.7989480906702357e-05, - "loss": 118.1605, - "step": 26690 - }, - { - "epoch": 0.21574188543863476, - "grad_norm": 1037.980712890625, - "learning_rate": 4.7986709293571716e-05, - "loss": 136.6159, - "step": 26700 - }, - { - "epoch": 0.2158226876429189, - "grad_norm": 938.3712768554688, - "learning_rate": 4.798393585149412e-05, - "loss": 172.5269, - "step": 26710 - }, - { - "epoch": 0.21590348984720303, - "grad_norm": 870.22265625, - "learning_rate": 4.798116058069023e-05, - "loss": 136.1852, - "step": 26720 - }, - { - "epoch": 0.21598429205148717, - "grad_norm": 723.0908203125, - "learning_rate": 4.797838348138086e-05, - "loss": 119.919, - "step": 26730 - }, - { - "epoch": 0.2160650942557713, - "grad_norm": 696.8587646484375, - "learning_rate": 4.7975604553786976e-05, - "loss": 113.919, - "step": 26740 - }, - { - "epoch": 0.21614589646005544, - "grad_norm": 1127.3470458984375, - "learning_rate": 4.797282379812968e-05, - "loss": 138.6845, - "step": 26750 - }, - { - "epoch": 0.21622669866433955, - "grad_norm": 827.1114501953125, - "learning_rate": 4.797004121463022e-05, - "loss": 122.3423, - "step": 26760 - }, - { - "epoch": 0.2163075008686237, - "grad_norm": 868.558349609375, - "learning_rate": 4.7967256803510006e-05, - "loss": 145.1173, - "step": 26770 - }, - { - "epoch": 0.21638830307290782, - "grad_norm": 1364.7239990234375, - "learning_rate": 4.796447056499057e-05, - "loss": 145.0587, - "step": 26780 - }, - { - "epoch": 0.21646910527719196, - "grad_norm": 556.9483032226562, - "learning_rate": 4.79616824992936e-05, - "loss": 103.057, - "step": 26790 - }, - { - "epoch": 0.2165499074814761, - "grad_norm": 1105.702392578125, - "learning_rate": 4.795889260664094e-05, - "loss": 131.9451, - "step": 26800 - }, - { - "epoch": 0.21663070968576023, - "grad_norm": 1220.0301513671875, - "learning_rate": 4.7956100887254564e-05, - "loss": 128.604, - "step": 26810 - }, - { - "epoch": 0.21671151189004437, - "grad_norm": 715.3391723632812, - "learning_rate": 4.795330734135659e-05, - "loss": 173.1665, - "step": 26820 - }, - { - "epoch": 0.2167923140943285, - "grad_norm": 1711.2791748046875, - "learning_rate": 4.7950511969169296e-05, - "loss": 124.8017, - "step": 26830 - }, - { - "epoch": 0.21687311629861264, - "grad_norm": 1356.01904296875, - "learning_rate": 4.7947714770915084e-05, - "loss": 134.385, - "step": 26840 - }, - { - "epoch": 0.21695391850289675, - "grad_norm": 1066.4859619140625, - "learning_rate": 4.794491574681653e-05, - "loss": 137.7477, - "step": 26850 - }, - { - "epoch": 0.21703472070718088, - "grad_norm": 982.4284057617188, - "learning_rate": 4.7942114897096336e-05, - "loss": 150.0189, - "step": 26860 - }, - { - "epoch": 0.21711552291146502, - "grad_norm": 1068.5823974609375, - "learning_rate": 4.7939312221977354e-05, - "loss": 137.9951, - "step": 26870 - }, - { - "epoch": 0.21719632511574916, - "grad_norm": 1095.5313720703125, - "learning_rate": 4.7936507721682564e-05, - "loss": 97.3253, - "step": 26880 - }, - { - "epoch": 0.2172771273200333, - "grad_norm": 801.6238403320312, - "learning_rate": 4.7933701396435124e-05, - "loss": 113.6721, - "step": 26890 - }, - { - "epoch": 0.21735792952431743, - "grad_norm": 1595.135986328125, - "learning_rate": 4.793089324645832e-05, - "loss": 147.374, - "step": 26900 - }, - { - "epoch": 0.21743873172860156, - "grad_norm": 940.8125610351562, - "learning_rate": 4.7928083271975564e-05, - "loss": 134.5849, - "step": 26910 - }, - { - "epoch": 0.2175195339328857, - "grad_norm": 1023.5421142578125, - "learning_rate": 4.792527147321046e-05, - "loss": 200.3715, - "step": 26920 - }, - { - "epoch": 0.2176003361371698, - "grad_norm": 839.22607421875, - "learning_rate": 4.792245785038671e-05, - "loss": 132.2677, - "step": 26930 - }, - { - "epoch": 0.21768113834145394, - "grad_norm": 1018.0061645507812, - "learning_rate": 4.79196424037282e-05, - "loss": 115.3362, - "step": 26940 - }, - { - "epoch": 0.21776194054573808, - "grad_norm": 1957.3524169921875, - "learning_rate": 4.791682513345892e-05, - "loss": 128.3477, - "step": 26950 - }, - { - "epoch": 0.21784274275002222, - "grad_norm": 1235.8717041015625, - "learning_rate": 4.791400603980305e-05, - "loss": 90.8765, - "step": 26960 - }, - { - "epoch": 0.21792354495430635, - "grad_norm": 2822.09228515625, - "learning_rate": 4.791118512298487e-05, - "loss": 144.3291, - "step": 26970 - }, - { - "epoch": 0.2180043471585905, - "grad_norm": 902.145263671875, - "learning_rate": 4.790836238322884e-05, - "loss": 94.8746, - "step": 26980 - }, - { - "epoch": 0.21808514936287463, - "grad_norm": 1357.65869140625, - "learning_rate": 4.790553782075955e-05, - "loss": 124.806, - "step": 26990 - }, - { - "epoch": 0.21816595156715876, - "grad_norm": 1046.5242919921875, - "learning_rate": 4.790271143580174e-05, - "loss": 100.0037, - "step": 27000 - }, - { - "epoch": 0.2182467537714429, - "grad_norm": 1063.6263427734375, - "learning_rate": 4.789988322858029e-05, - "loss": 138.3908, - "step": 27010 - }, - { - "epoch": 0.218327555975727, - "grad_norm": 894.979248046875, - "learning_rate": 4.789705319932023e-05, - "loss": 104.3508, - "step": 27020 - }, - { - "epoch": 0.21840835818001114, - "grad_norm": 794.1494750976562, - "learning_rate": 4.7894221348246724e-05, - "loss": 152.5262, - "step": 27030 - }, - { - "epoch": 0.21848916038429528, - "grad_norm": 1162.4373779296875, - "learning_rate": 4.78913876755851e-05, - "loss": 115.4035, - "step": 27040 - }, - { - "epoch": 0.21856996258857941, - "grad_norm": 1050.3670654296875, - "learning_rate": 4.788855218156082e-05, - "loss": 143.373, - "step": 27050 - }, - { - "epoch": 0.21865076479286355, - "grad_norm": 1464.134765625, - "learning_rate": 4.788571486639948e-05, - "loss": 184.6847, - "step": 27060 - }, - { - "epoch": 0.2187315669971477, - "grad_norm": 1033.2891845703125, - "learning_rate": 4.7882875730326844e-05, - "loss": 145.4166, - "step": 27070 - }, - { - "epoch": 0.21881236920143182, - "grad_norm": 1101.4473876953125, - "learning_rate": 4.7880034773568806e-05, - "loss": 113.4437, - "step": 27080 - }, - { - "epoch": 0.21889317140571596, - "grad_norm": 777.7227783203125, - "learning_rate": 4.78771919963514e-05, - "loss": 111.7925, - "step": 27090 - }, - { - "epoch": 0.2189739736100001, - "grad_norm": 2186.093017578125, - "learning_rate": 4.787434739890082e-05, - "loss": 125.779, - "step": 27100 - }, - { - "epoch": 0.2190547758142842, - "grad_norm": 1114.3084716796875, - "learning_rate": 4.78715009814434e-05, - "loss": 123.3441, - "step": 27110 - }, - { - "epoch": 0.21913557801856834, - "grad_norm": 675.5640258789062, - "learning_rate": 4.7868652744205614e-05, - "loss": 131.156, - "step": 27120 - }, - { - "epoch": 0.21921638022285247, - "grad_norm": 1302.7738037109375, - "learning_rate": 4.786580268741407e-05, - "loss": 142.0352, - "step": 27130 - }, - { - "epoch": 0.2192971824271366, - "grad_norm": 986.2174072265625, - "learning_rate": 4.786295081129556e-05, - "loss": 136.5463, - "step": 27140 - }, - { - "epoch": 0.21937798463142075, - "grad_norm": 1845.0106201171875, - "learning_rate": 4.786009711607697e-05, - "loss": 122.9679, - "step": 27150 - }, - { - "epoch": 0.21945878683570488, - "grad_norm": 732.654052734375, - "learning_rate": 4.785724160198537e-05, - "loss": 88.4697, - "step": 27160 - }, - { - "epoch": 0.21953958903998902, - "grad_norm": 987.9874267578125, - "learning_rate": 4.7854384269247966e-05, - "loss": 113.6512, - "step": 27170 - }, - { - "epoch": 0.21962039124427316, - "grad_norm": 997.5609130859375, - "learning_rate": 4.785152511809208e-05, - "loss": 135.7726, - "step": 27180 - }, - { - "epoch": 0.21970119344855726, - "grad_norm": 793.2268676757812, - "learning_rate": 4.784866414874522e-05, - "loss": 135.3499, - "step": 27190 - }, - { - "epoch": 0.2197819956528414, - "grad_norm": 738.2964477539062, - "learning_rate": 4.7845801361435014e-05, - "loss": 80.5195, - "step": 27200 - }, - { - "epoch": 0.21986279785712554, - "grad_norm": 1306.4818115234375, - "learning_rate": 4.784293675638924e-05, - "loss": 156.4882, - "step": 27210 - }, - { - "epoch": 0.21994360006140967, - "grad_norm": 774.1953125, - "learning_rate": 4.784007033383583e-05, - "loss": 133.7275, - "step": 27220 - }, - { - "epoch": 0.2200244022656938, - "grad_norm": 911.51318359375, - "learning_rate": 4.7837202094002845e-05, - "loss": 114.8217, - "step": 27230 - }, - { - "epoch": 0.22010520446997794, - "grad_norm": 1125.44677734375, - "learning_rate": 4.783433203711849e-05, - "loss": 111.2581, - "step": 27240 - }, - { - "epoch": 0.22018600667426208, - "grad_norm": 765.133056640625, - "learning_rate": 4.7831460163411136e-05, - "loss": 154.4946, - "step": 27250 - }, - { - "epoch": 0.22026680887854622, - "grad_norm": 1673.057861328125, - "learning_rate": 4.7828586473109283e-05, - "loss": 103.1199, - "step": 27260 - }, - { - "epoch": 0.22034761108283035, - "grad_norm": 659.0311889648438, - "learning_rate": 4.782571096644156e-05, - "loss": 110.1531, - "step": 27270 - }, - { - "epoch": 0.22042841328711446, - "grad_norm": 1234.859375, - "learning_rate": 4.7822833643636785e-05, - "loss": 137.5645, - "step": 27280 - }, - { - "epoch": 0.2205092154913986, - "grad_norm": 728.8515014648438, - "learning_rate": 4.781995450492387e-05, - "loss": 106.9196, - "step": 27290 - }, - { - "epoch": 0.22059001769568273, - "grad_norm": 1583.98876953125, - "learning_rate": 4.781707355053191e-05, - "loss": 182.9427, - "step": 27300 - }, - { - "epoch": 0.22067081989996687, - "grad_norm": 1324.136474609375, - "learning_rate": 4.781419078069012e-05, - "loss": 114.648, - "step": 27310 - }, - { - "epoch": 0.220751622104251, - "grad_norm": 1119.7723388671875, - "learning_rate": 4.7811306195627874e-05, - "loss": 144.1063, - "step": 27320 - }, - { - "epoch": 0.22083242430853514, - "grad_norm": 1279.691650390625, - "learning_rate": 4.7808419795574686e-05, - "loss": 133.2886, - "step": 27330 - }, - { - "epoch": 0.22091322651281928, - "grad_norm": 1918.8956298828125, - "learning_rate": 4.780553158076021e-05, - "loss": 159.0203, - "step": 27340 - }, - { - "epoch": 0.2209940287171034, - "grad_norm": 906.7332763671875, - "learning_rate": 4.780264155141425e-05, - "loss": 101.9679, - "step": 27350 - }, - { - "epoch": 0.22107483092138752, - "grad_norm": 897.4724731445312, - "learning_rate": 4.779974970776675e-05, - "loss": 130.738, - "step": 27360 - }, - { - "epoch": 0.22115563312567166, - "grad_norm": 666.8642578125, - "learning_rate": 4.7796856050047805e-05, - "loss": 128.9539, - "step": 27370 - }, - { - "epoch": 0.2212364353299558, - "grad_norm": 4365.91943359375, - "learning_rate": 4.7793960578487644e-05, - "loss": 159.1669, - "step": 27380 - }, - { - "epoch": 0.22131723753423993, - "grad_norm": 621.6768798828125, - "learning_rate": 4.779106329331665e-05, - "loss": 125.7744, - "step": 27390 - }, - { - "epoch": 0.22139803973852407, - "grad_norm": 1102.6993408203125, - "learning_rate": 4.778816419476535e-05, - "loss": 133.3575, - "step": 27400 - }, - { - "epoch": 0.2214788419428082, - "grad_norm": 855.665771484375, - "learning_rate": 4.778526328306441e-05, - "loss": 123.1155, - "step": 27410 - }, - { - "epoch": 0.22155964414709234, - "grad_norm": 837.7122192382812, - "learning_rate": 4.778236055844464e-05, - "loss": 126.1311, - "step": 27420 - }, - { - "epoch": 0.22164044635137647, - "grad_norm": 695.8333129882812, - "learning_rate": 4.7779456021136994e-05, - "loss": 144.2318, - "step": 27430 - }, - { - "epoch": 0.2217212485556606, - "grad_norm": 706.7747802734375, - "learning_rate": 4.777654967137257e-05, - "loss": 131.9998, - "step": 27440 - }, - { - "epoch": 0.22180205075994472, - "grad_norm": 956.3048706054688, - "learning_rate": 4.777364150938263e-05, - "loss": 122.0305, - "step": 27450 - }, - { - "epoch": 0.22188285296422885, - "grad_norm": 1235.3560791015625, - "learning_rate": 4.7770731535398546e-05, - "loss": 109.7588, - "step": 27460 - }, - { - "epoch": 0.221963655168513, - "grad_norm": 902.3396606445312, - "learning_rate": 4.7767819749651865e-05, - "loss": 170.0542, - "step": 27470 - }, - { - "epoch": 0.22204445737279713, - "grad_norm": 694.8578491210938, - "learning_rate": 4.7764906152374245e-05, - "loss": 115.4948, - "step": 27480 - }, - { - "epoch": 0.22212525957708126, - "grad_norm": 1049.1341552734375, - "learning_rate": 4.776199074379753e-05, - "loss": 153.7766, - "step": 27490 - }, - { - "epoch": 0.2222060617813654, - "grad_norm": 2423.690673828125, - "learning_rate": 4.775907352415367e-05, - "loss": 154.215, - "step": 27500 - }, - { - "epoch": 0.22228686398564954, - "grad_norm": 1209.479736328125, - "learning_rate": 4.775615449367478e-05, - "loss": 111.1116, - "step": 27510 - }, - { - "epoch": 0.22236766618993367, - "grad_norm": 626.1124267578125, - "learning_rate": 4.775323365259311e-05, - "loss": 143.7026, - "step": 27520 - }, - { - "epoch": 0.2224484683942178, - "grad_norm": 2049.082275390625, - "learning_rate": 4.775031100114107e-05, - "loss": 114.29, - "step": 27530 - }, - { - "epoch": 0.22252927059850192, - "grad_norm": 939.1179809570312, - "learning_rate": 4.774738653955119e-05, - "loss": 89.146, - "step": 27540 - }, - { - "epoch": 0.22261007280278605, - "grad_norm": 1062.435302734375, - "learning_rate": 4.774446026805616e-05, - "loss": 145.3695, - "step": 27550 - }, - { - "epoch": 0.2226908750070702, - "grad_norm": 1143.159912109375, - "learning_rate": 4.7741532186888805e-05, - "loss": 130.587, - "step": 27560 - }, - { - "epoch": 0.22277167721135432, - "grad_norm": 832.4049682617188, - "learning_rate": 4.7738602296282106e-05, - "loss": 120.3255, - "step": 27570 - }, - { - "epoch": 0.22285247941563846, - "grad_norm": 1231.2093505859375, - "learning_rate": 4.773567059646917e-05, - "loss": 164.9255, - "step": 27580 - }, - { - "epoch": 0.2229332816199226, - "grad_norm": 1225.770263671875, - "learning_rate": 4.773273708768328e-05, - "loss": 113.0409, - "step": 27590 - }, - { - "epoch": 0.22301408382420673, - "grad_norm": 965.025146484375, - "learning_rate": 4.7729801770157824e-05, - "loss": 137.8053, - "step": 27600 - }, - { - "epoch": 0.22309488602849087, - "grad_norm": 1262.69677734375, - "learning_rate": 4.7726864644126354e-05, - "loss": 100.2599, - "step": 27610 - }, - { - "epoch": 0.22317568823277498, - "grad_norm": 904.1385498046875, - "learning_rate": 4.772392570982257e-05, - "loss": 114.3216, - "step": 27620 - }, - { - "epoch": 0.2232564904370591, - "grad_norm": 956.3895874023438, - "learning_rate": 4.772098496748031e-05, - "loss": 129.2148, - "step": 27630 - }, - { - "epoch": 0.22333729264134325, - "grad_norm": 1907.3521728515625, - "learning_rate": 4.771804241733355e-05, - "loss": 165.4693, - "step": 27640 - }, - { - "epoch": 0.22341809484562739, - "grad_norm": 1505.408935546875, - "learning_rate": 4.7715098059616416e-05, - "loss": 122.0059, - "step": 27650 - }, - { - "epoch": 0.22349889704991152, - "grad_norm": 1759.0234375, - "learning_rate": 4.771215189456317e-05, - "loss": 143.4316, - "step": 27660 - }, - { - "epoch": 0.22357969925419566, - "grad_norm": 1282.2437744140625, - "learning_rate": 4.770920392240824e-05, - "loss": 124.8109, - "step": 27670 - }, - { - "epoch": 0.2236605014584798, - "grad_norm": 1193.1700439453125, - "learning_rate": 4.770625414338617e-05, - "loss": 162.2701, - "step": 27680 - }, - { - "epoch": 0.22374130366276393, - "grad_norm": 840.0590209960938, - "learning_rate": 4.770330255773167e-05, - "loss": 123.7889, - "step": 27690 - }, - { - "epoch": 0.22382210586704807, - "grad_norm": 1303.338623046875, - "learning_rate": 4.770034916567958e-05, - "loss": 117.7091, - "step": 27700 - }, - { - "epoch": 0.22390290807133217, - "grad_norm": 928.9658813476562, - "learning_rate": 4.769739396746489e-05, - "loss": 138.8442, - "step": 27710 - }, - { - "epoch": 0.2239837102756163, - "grad_norm": 1008.2692260742188, - "learning_rate": 4.769443696332272e-05, - "loss": 119.3787, - "step": 27720 - }, - { - "epoch": 0.22406451247990045, - "grad_norm": 569.2587280273438, - "learning_rate": 4.7691478153488356e-05, - "loss": 98.2171, - "step": 27730 - }, - { - "epoch": 0.22414531468418458, - "grad_norm": 1066.646240234375, - "learning_rate": 4.768851753819722e-05, - "loss": 135.6997, - "step": 27740 - }, - { - "epoch": 0.22422611688846872, - "grad_norm": 4011.916259765625, - "learning_rate": 4.768555511768487e-05, - "loss": 143.5886, - "step": 27750 - }, - { - "epoch": 0.22430691909275285, - "grad_norm": 1243.5986328125, - "learning_rate": 4.768259089218701e-05, - "loss": 142.0805, - "step": 27760 - }, - { - "epoch": 0.224387721297037, - "grad_norm": 1303.7891845703125, - "learning_rate": 4.767962486193949e-05, - "loss": 130.8731, - "step": 27770 - }, - { - "epoch": 0.22446852350132113, - "grad_norm": 734.0792846679688, - "learning_rate": 4.7676657027178305e-05, - "loss": 131.6446, - "step": 27780 - }, - { - "epoch": 0.22454932570560526, - "grad_norm": 1132.3414306640625, - "learning_rate": 4.7673687388139595e-05, - "loss": 103.6288, - "step": 27790 - }, - { - "epoch": 0.22463012790988937, - "grad_norm": 1001.0497436523438, - "learning_rate": 4.7670715945059635e-05, - "loss": 120.3249, - "step": 27800 - }, - { - "epoch": 0.2247109301141735, - "grad_norm": 731.0053100585938, - "learning_rate": 4.7667742698174855e-05, - "loss": 116.091, - "step": 27810 - }, - { - "epoch": 0.22479173231845764, - "grad_norm": 750.06494140625, - "learning_rate": 4.766476764772182e-05, - "loss": 149.7679, - "step": 27820 - }, - { - "epoch": 0.22487253452274178, - "grad_norm": 997.712646484375, - "learning_rate": 4.766179079393723e-05, - "loss": 160.9675, - "step": 27830 - }, - { - "epoch": 0.22495333672702592, - "grad_norm": 748.0264282226562, - "learning_rate": 4.7658812137057965e-05, - "loss": 128.0552, - "step": 27840 - }, - { - "epoch": 0.22503413893131005, - "grad_norm": 3103.683349609375, - "learning_rate": 4.7655831677320996e-05, - "loss": 178.2293, - "step": 27850 - }, - { - "epoch": 0.2251149411355942, - "grad_norm": 1120.0372314453125, - "learning_rate": 4.765284941496349e-05, - "loss": 142.773, - "step": 27860 - }, - { - "epoch": 0.22519574333987832, - "grad_norm": 786.6668701171875, - "learning_rate": 4.76498653502227e-05, - "loss": 125.9656, - "step": 27870 - }, - { - "epoch": 0.22527654554416243, - "grad_norm": 1269.94140625, - "learning_rate": 4.764687948333609e-05, - "loss": 129.2763, - "step": 27880 - }, - { - "epoch": 0.22535734774844657, - "grad_norm": 1278.632568359375, - "learning_rate": 4.7643891814541206e-05, - "loss": 132.1948, - "step": 27890 - }, - { - "epoch": 0.2254381499527307, - "grad_norm": 566.759033203125, - "learning_rate": 4.764090234407577e-05, - "loss": 116.8105, - "step": 27900 - }, - { - "epoch": 0.22551895215701484, - "grad_norm": 712.4197998046875, - "learning_rate": 4.7637911072177646e-05, - "loss": 118.8212, - "step": 27910 - }, - { - "epoch": 0.22559975436129898, - "grad_norm": 1042.465576171875, - "learning_rate": 4.763491799908484e-05, - "loss": 121.6858, - "step": 27920 - }, - { - "epoch": 0.2256805565655831, - "grad_norm": 1268.334716796875, - "learning_rate": 4.763192312503548e-05, - "loss": 113.3119, - "step": 27930 - }, - { - "epoch": 0.22576135876986725, - "grad_norm": 951.8613891601562, - "learning_rate": 4.7628926450267866e-05, - "loss": 143.2557, - "step": 27940 - }, - { - "epoch": 0.22584216097415138, - "grad_norm": 1221.649169921875, - "learning_rate": 4.7625927975020427e-05, - "loss": 131.5796, - "step": 27950 - }, - { - "epoch": 0.22592296317843552, - "grad_norm": 1194.9288330078125, - "learning_rate": 4.762292769953173e-05, - "loss": 111.9895, - "step": 27960 - }, - { - "epoch": 0.22600376538271963, - "grad_norm": 660.5962524414062, - "learning_rate": 4.761992562404051e-05, - "loss": 133.5966, - "step": 27970 - }, - { - "epoch": 0.22608456758700377, - "grad_norm": 1063.5452880859375, - "learning_rate": 4.7616921748785615e-05, - "loss": 133.8167, - "step": 27980 - }, - { - "epoch": 0.2261653697912879, - "grad_norm": 1873.297119140625, - "learning_rate": 4.761391607400606e-05, - "loss": 99.9937, - "step": 27990 - }, - { - "epoch": 0.22624617199557204, - "grad_norm": 480.669189453125, - "learning_rate": 4.761090859994098e-05, - "loss": 110.9108, - "step": 28000 } ], "logging_steps": 10, "max_steps": 123750, "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 1000, + "num_train_epochs": 2, + "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": {