diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,63085 +1,31585 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.9991117033089054, + "epoch": 4.0, "eval_steps": 500, - "global_step": 9004, + "global_step": 4504, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.00044414834554741284, - "grad_norm": 14.241565838210393, - "learning_rate": 1.1098779134295229e-08, - "loss": 0.533, + "epoch": 0.0008880994671403197, + "grad_norm": 13.040843821412901, + "learning_rate": 2.2172949002217297e-08, + "loss": 0.4427, "step": 1 }, { - "epoch": 0.0008882966910948257, - "grad_norm": 16.278912065775813, - "learning_rate": 2.2197558268590458e-08, - "loss": 0.5359, + "epoch": 0.0017761989342806395, + "grad_norm": 13.677664712800397, + "learning_rate": 4.4345898004434594e-08, + "loss": 0.5223, "step": 2 }, { - "epoch": 0.0013324450366422385, - "grad_norm": 15.38532645021989, - "learning_rate": 3.329633740288568e-08, - "loss": 0.6437, + "epoch": 0.0026642984014209592, + "grad_norm": 13.41303079869153, + "learning_rate": 6.651884700665188e-08, + "loss": 0.4662, "step": 3 }, { - "epoch": 0.0017765933821896514, - "grad_norm": 13.00040105537638, - "learning_rate": 4.4395116537180915e-08, - "loss": 0.5256, + "epoch": 0.003552397868561279, + "grad_norm": 12.591263156172579, + "learning_rate": 8.869179600886919e-08, + "loss": 0.4713, "step": 4 }, { - "epoch": 0.0022207417277370642, - "grad_norm": 13.99913175512498, - "learning_rate": 5.549389567147614e-08, - "loss": 0.559, + "epoch": 0.004440497335701598, + "grad_norm": 12.748122761493525, + "learning_rate": 1.1086474501108649e-07, + "loss": 0.4617, "step": 5 }, { - "epoch": 0.002664890073284477, - "grad_norm": 14.827666239037775, - "learning_rate": 6.659267480577137e-08, - "loss": 0.5342, + "epoch": 0.0053285968028419185, + "grad_norm": 12.251887276789628, + "learning_rate": 1.3303769401330377e-07, + "loss": 0.4433, "step": 6 }, { - "epoch": 0.00310903841883189, - "grad_norm": 14.56618533431443, - "learning_rate": 7.76914539400666e-08, - "loss": 0.5818, + "epoch": 0.006216696269982238, + "grad_norm": 12.117567915620455, + "learning_rate": 1.5521064301552109e-07, + "loss": 0.4138, "step": 7 }, { - "epoch": 0.0035531867643793028, - "grad_norm": 14.021029607148948, - "learning_rate": 8.879023307436183e-08, - "loss": 0.5116, + "epoch": 0.007104795737122558, + "grad_norm": 12.929954917378, + "learning_rate": 1.7738359201773838e-07, + "loss": 0.4779, "step": 8 }, { - "epoch": 0.003997335109926716, - "grad_norm": 13.230591443503124, - "learning_rate": 9.988901220865707e-08, - "loss": 0.5254, + "epoch": 0.007992895204262877, + "grad_norm": 12.912164370309165, + "learning_rate": 1.9955654101995567e-07, + "loss": 0.4406, "step": 9 }, { - "epoch": 0.0044414834554741284, - "grad_norm": 14.105717100587825, - "learning_rate": 1.1098779134295228e-07, - "loss": 0.5469, + "epoch": 0.008880994671403197, + "grad_norm": 12.182085316407958, + "learning_rate": 2.2172949002217298e-07, + "loss": 0.4246, "step": 10 }, { - "epoch": 0.004885631801021541, - "grad_norm": 13.393742170063549, - "learning_rate": 1.220865704772475e-07, - "loss": 0.5273, + "epoch": 0.009769094138543518, + "grad_norm": 13.840302903530718, + "learning_rate": 2.439024390243903e-07, + "loss": 0.4855, "step": 11 }, { - "epoch": 0.005329780146568954, - "grad_norm": 13.614380052118197, - "learning_rate": 1.3318534961154273e-07, - "loss": 0.5127, + "epoch": 0.010657193605683837, + "grad_norm": 12.889473918224763, + "learning_rate": 2.6607538802660754e-07, + "loss": 0.4364, "step": 12 }, { - "epoch": 0.005773928492116367, - "grad_norm": 12.784423466221073, - "learning_rate": 1.4428412874583796e-07, - "loss": 0.4306, + "epoch": 0.011545293072824156, + "grad_norm": 12.388896580352663, + "learning_rate": 2.8824833702882486e-07, + "loss": 0.4101, "step": 13 }, { - "epoch": 0.00621807683766378, - "grad_norm": 13.80866223671461, - "learning_rate": 1.553829078801332e-07, - "loss": 0.5439, + "epoch": 0.012433392539964476, + "grad_norm": 12.49046885123923, + "learning_rate": 3.1042128603104217e-07, + "loss": 0.4726, "step": 14 }, { - "epoch": 0.006662225183211193, - "grad_norm": 14.041859557487046, - "learning_rate": 1.6648168701442844e-07, - "loss": 0.5188, + "epoch": 0.013321492007104795, + "grad_norm": 11.48222167766721, + "learning_rate": 3.3259423503325944e-07, + "loss": 0.3645, "step": 15 }, { - "epoch": 0.0071063735287586055, - "grad_norm": 14.936881953932252, - "learning_rate": 1.7758046614872366e-07, - "loss": 0.5683, + "epoch": 0.014209591474245116, + "grad_norm": 12.157840445103568, + "learning_rate": 3.5476718403547675e-07, + "loss": 0.465, "step": 16 }, { - "epoch": 0.007550521874306018, - "grad_norm": 15.650434961515957, - "learning_rate": 1.886792452830189e-07, - "loss": 0.5353, + "epoch": 0.015097690941385435, + "grad_norm": 11.307771204326675, + "learning_rate": 3.7694013303769407e-07, + "loss": 0.438, "step": 17 }, { - "epoch": 0.007994670219853431, - "grad_norm": 14.344720416498966, - "learning_rate": 1.9977802441731414e-07, - "loss": 0.4958, + "epoch": 0.015985790408525755, + "grad_norm": 11.130951839748358, + "learning_rate": 3.9911308203991133e-07, + "loss": 0.4046, "step": 18 }, { - "epoch": 0.008438818565400843, - "grad_norm": 15.791401491309836, - "learning_rate": 2.1087680355160934e-07, - "loss": 0.5652, + "epoch": 0.016873889875666074, + "grad_norm": 11.745967760805645, + "learning_rate": 4.2128603104212865e-07, + "loss": 0.4274, "step": 19 }, { - "epoch": 0.008882966910948257, - "grad_norm": 13.010603153221817, - "learning_rate": 2.2197558268590456e-07, - "loss": 0.4343, + "epoch": 0.017761989342806393, + "grad_norm": 11.644501698063689, + "learning_rate": 4.4345898004434597e-07, + "loss": 0.4188, "step": 20 }, { - "epoch": 0.009327115256495669, - "grad_norm": 15.52141534233184, - "learning_rate": 2.330743618201998e-07, - "loss": 0.5389, + "epoch": 0.018650088809946713, + "grad_norm": 10.785511103209847, + "learning_rate": 4.6563192904656323e-07, + "loss": 0.4375, "step": 21 }, { - "epoch": 0.009771263602043083, - "grad_norm": 15.756642888500245, - "learning_rate": 2.44173140954495e-07, - "loss": 0.5326, + "epoch": 0.019538188277087035, + "grad_norm": 9.26581226542501, + "learning_rate": 4.878048780487805e-07, + "loss": 0.3922, "step": 22 }, { - "epoch": 0.010215411947590495, - "grad_norm": 15.268952002568577, - "learning_rate": 2.5527192008879024e-07, - "loss": 0.4824, + "epoch": 0.020426287744227355, + "grad_norm": 9.9748547025296, + "learning_rate": 5.099778270509978e-07, + "loss": 0.3854, "step": 23 }, { - "epoch": 0.010659560293137908, - "grad_norm": 15.932949745262599, - "learning_rate": 2.6637069922308547e-07, - "loss": 0.5079, + "epoch": 0.021314387211367674, + "grad_norm": 10.047025994539355, + "learning_rate": 5.321507760532151e-07, + "loss": 0.4155, "step": 24 }, { - "epoch": 0.01110370863868532, - "grad_norm": 15.58611205142026, - "learning_rate": 2.7746947835738074e-07, - "loss": 0.476, + "epoch": 0.022202486678507993, + "grad_norm": 9.0090924049002, + "learning_rate": 5.543237250554324e-07, + "loss": 0.3851, "step": 25 }, { - "epoch": 0.011547856984232734, - "grad_norm": 15.375627217870338, - "learning_rate": 2.885682574916759e-07, - "loss": 0.4744, + "epoch": 0.023090586145648313, + "grad_norm": 8.616619523300912, + "learning_rate": 5.764966740576497e-07, + "loss": 0.3766, "step": 26 }, { - "epoch": 0.011992005329780146, - "grad_norm": 16.173866872463734, - "learning_rate": 2.9966703662597114e-07, - "loss": 0.5543, + "epoch": 0.023978685612788632, + "grad_norm": 8.573705551462735, + "learning_rate": 5.98669623059867e-07, + "loss": 0.3393, "step": 27 }, { - "epoch": 0.01243615367532756, - "grad_norm": 11.889556738869189, - "learning_rate": 3.107658157602664e-07, - "loss": 0.459, + "epoch": 0.02486678507992895, + "grad_norm": 5.89996526738856, + "learning_rate": 6.208425720620843e-07, + "loss": 0.332, "step": 28 }, { - "epoch": 0.012880302020874972, - "grad_norm": 9.805467442732954, - "learning_rate": 3.218645948945616e-07, - "loss": 0.4039, + "epoch": 0.02575488454706927, + "grad_norm": 4.133420274556032, + "learning_rate": 6.430155210643016e-07, + "loss": 0.2742, "step": 29 }, { - "epoch": 0.013324450366422385, - "grad_norm": 9.109396128992588, - "learning_rate": 3.3296337402885687e-07, - "loss": 0.374, + "epoch": 0.02664298401420959, + "grad_norm": 4.562422798003308, + "learning_rate": 6.651884700665189e-07, + "loss": 0.2775, "step": 30 }, { - "epoch": 0.013768598711969797, - "grad_norm": 10.263226704180568, - "learning_rate": 3.440621531631521e-07, - "loss": 0.5212, + "epoch": 0.027531083481349913, + "grad_norm": 4.670481858708041, + "learning_rate": 6.873614190687362e-07, + "loss": 0.2935, "step": 31 }, { - "epoch": 0.014212747057517211, - "grad_norm": 8.67404802135948, - "learning_rate": 3.551609322974473e-07, - "loss": 0.449, + "epoch": 0.028419182948490232, + "grad_norm": 4.509382510160496, + "learning_rate": 7.095343680709535e-07, + "loss": 0.3363, "step": 32 }, { - "epoch": 0.014656895403064623, - "grad_norm": 9.21127577620048, - "learning_rate": 3.6625971143174255e-07, - "loss": 0.4436, + "epoch": 0.02930728241563055, + "grad_norm": 4.363609746277317, + "learning_rate": 7.317073170731707e-07, + "loss": 0.2934, "step": 33 }, { - "epoch": 0.015101043748612037, - "grad_norm": 8.76521126068558, - "learning_rate": 3.773584905660378e-07, - "loss": 0.4802, + "epoch": 0.03019538188277087, + "grad_norm": 3.596275007281521, + "learning_rate": 7.538802660753881e-07, + "loss": 0.2653, "step": 34 }, { - "epoch": 0.015545192094159449, - "grad_norm": 8.042704299848994, - "learning_rate": 3.8845726970033295e-07, - "loss": 0.4144, + "epoch": 0.03108348134991119, + "grad_norm": 3.351254206377662, + "learning_rate": 7.760532150776054e-07, + "loss": 0.2821, "step": 35 }, { - "epoch": 0.015989340439706862, - "grad_norm": 7.470228322799545, - "learning_rate": 3.995560488346283e-07, - "loss": 0.3999, + "epoch": 0.03197158081705151, + "grad_norm": 3.1261834239669195, + "learning_rate": 7.982261640798227e-07, + "loss": 0.2584, "step": 36 }, { - "epoch": 0.016433488785254274, - "grad_norm": 8.608115328544926, - "learning_rate": 4.1065482796892345e-07, - "loss": 0.4636, + "epoch": 0.03285968028419183, + "grad_norm": 3.1711488715341862, + "learning_rate": 8.203991130820401e-07, + "loss": 0.2705, "step": 37 }, { - "epoch": 0.016877637130801686, - "grad_norm": 7.697028697048977, - "learning_rate": 4.217536071032187e-07, - "loss": 0.3997, + "epoch": 0.03374777975133215, + "grad_norm": 2.786764471661445, + "learning_rate": 8.425720620842573e-07, + "loss": 0.2583, "step": 38 }, { - "epoch": 0.017321785476349102, - "grad_norm": 5.663561386344377, - "learning_rate": 4.328523862375139e-07, - "loss": 0.3691, + "epoch": 0.03463587921847247, + "grad_norm": 2.8688909432984784, + "learning_rate": 8.647450110864745e-07, + "loss": 0.2202, "step": 39 }, { - "epoch": 0.017765933821896514, - "grad_norm": 5.938577871272599, - "learning_rate": 4.4395116537180913e-07, - "loss": 0.3926, + "epoch": 0.035523978685612786, + "grad_norm": 2.3720310952864145, + "learning_rate": 8.869179600886919e-07, + "loss": 0.2556, "step": 40 }, { - "epoch": 0.018210082167443926, - "grad_norm": 6.0845722806591365, - "learning_rate": 4.5504994450610435e-07, - "loss": 0.4168, + "epoch": 0.03641207815275311, + "grad_norm": 2.1357508951578352, + "learning_rate": 9.090909090909091e-07, + "loss": 0.2256, "step": 41 }, { - "epoch": 0.018654230512991338, - "grad_norm": 4.988124447126365, - "learning_rate": 4.661487236403996e-07, - "loss": 0.3995, + "epoch": 0.037300177619893425, + "grad_norm": 1.9597318478589312, + "learning_rate": 9.312638580931265e-07, + "loss": 0.2283, "step": 42 }, { - "epoch": 0.019098378858538753, - "grad_norm": 5.157391164042898, - "learning_rate": 4.772475027746949e-07, - "loss": 0.3846, + "epoch": 0.03818827708703375, + "grad_norm": 2.321583307882343, + "learning_rate": 9.534368070953437e-07, + "loss": 0.2276, "step": 43 }, { - "epoch": 0.019542527204086165, - "grad_norm": 4.524592140728303, - "learning_rate": 4.8834628190899e-07, - "loss": 0.3393, + "epoch": 0.03907637655417407, + "grad_norm": 2.5274924171412754, + "learning_rate": 9.75609756097561e-07, + "loss": 0.2243, "step": 44 }, { - "epoch": 0.019986675549633577, - "grad_norm": 4.748116191926127, - "learning_rate": 4.994450610432853e-07, - "loss": 0.3278, + "epoch": 0.03996447602131439, + "grad_norm": 2.5093600192649723, + "learning_rate": 9.977827050997784e-07, + "loss": 0.2387, "step": 45 }, { - "epoch": 0.02043082389518099, - "grad_norm": 5.1431403204013755, - "learning_rate": 5.105438401775805e-07, - "loss": 0.3638, + "epoch": 0.04085257548845471, + "grad_norm": 2.258421609405366, + "learning_rate": 1.0199556541019955e-06, + "loss": 0.2294, "step": 46 }, { - "epoch": 0.020874972240728405, - "grad_norm": 5.298031590299299, - "learning_rate": 5.216426193118758e-07, - "loss": 0.3682, + "epoch": 0.041740674955595025, + "grad_norm": 2.0837448190567245, + "learning_rate": 1.042128603104213e-06, + "loss": 0.2293, "step": 47 }, { - "epoch": 0.021319120586275817, - "grad_norm": 4.678508163207935, - "learning_rate": 5.327413984461709e-07, - "loss": 0.3772, + "epoch": 0.04262877442273535, + "grad_norm": 1.7457513775154567, + "learning_rate": 1.0643015521064302e-06, + "loss": 0.2379, "step": 48 }, { - "epoch": 0.02176326893182323, - "grad_norm": 4.39773491079716, - "learning_rate": 5.438401775804662e-07, - "loss": 0.3682, + "epoch": 0.043516873889875664, + "grad_norm": 1.504770899895591, + "learning_rate": 1.0864745011086475e-06, + "loss": 0.2182, "step": 49 }, { - "epoch": 0.02220741727737064, - "grad_norm": 4.313057042541789, - "learning_rate": 5.549389567147615e-07, - "loss": 0.3237, + "epoch": 0.04440497335701599, + "grad_norm": 1.650451976656802, + "learning_rate": 1.1086474501108648e-06, + "loss": 0.185, "step": 50 }, { - "epoch": 0.022651565622918056, - "grad_norm": 4.449086461913763, - "learning_rate": 5.660377358490567e-07, - "loss": 0.3623, + "epoch": 0.0452930728241563, + "grad_norm": 1.7889570187921355, + "learning_rate": 1.130820399113082e-06, + "loss": 0.2132, "step": 51 }, { - "epoch": 0.023095713968465468, - "grad_norm": 2.9654000926481783, - "learning_rate": 5.771365149833518e-07, - "loss": 0.2696, + "epoch": 0.046181172291296625, + "grad_norm": 1.6099856493045095, + "learning_rate": 1.1529933481152994e-06, + "loss": 0.1983, "step": 52 }, { - "epoch": 0.02353986231401288, - "grad_norm": 2.9606096375757986, - "learning_rate": 5.882352941176471e-07, - "loss": 0.2518, + "epoch": 0.04706927175843695, + "grad_norm": 1.3281445457110155, + "learning_rate": 1.1751662971175167e-06, + "loss": 0.184, "step": 53 }, { - "epoch": 0.023984010659560292, - "grad_norm": 3.4611785952391463, - "learning_rate": 5.993340732519423e-07, - "loss": 0.2782, + "epoch": 0.047957371225577264, + "grad_norm": 1.5880809634733573, + "learning_rate": 1.197339246119734e-06, + "loss": 0.2071, "step": 54 }, { - "epoch": 0.024428159005107707, - "grad_norm": 3.1814895723143284, - "learning_rate": 6.104328523862376e-07, - "loss": 0.2906, + "epoch": 0.04884547069271759, + "grad_norm": 1.489451471752179, + "learning_rate": 1.2195121951219514e-06, + "loss": 0.2037, "step": 55 }, { - "epoch": 0.02487230735065512, - "grad_norm": 3.1808671717848496, - "learning_rate": 6.215316315205328e-07, - "loss": 0.2983, + "epoch": 0.0497335701598579, + "grad_norm": 1.8245481090703408, + "learning_rate": 1.2416851441241687e-06, + "loss": 0.2003, "step": 56 }, { - "epoch": 0.02531645569620253, - "grad_norm": 2.118090185635995, - "learning_rate": 6.32630410654828e-07, - "loss": 0.2158, + "epoch": 0.050621669626998225, + "grad_norm": 1.7421793808491324, + "learning_rate": 1.2638580931263858e-06, + "loss": 0.2021, "step": 57 }, { - "epoch": 0.025760604041749943, - "grad_norm": 2.697428855954616, - "learning_rate": 6.437291897891232e-07, - "loss": 0.2815, + "epoch": 0.05150976909413854, + "grad_norm": 1.478392790648627, + "learning_rate": 1.2860310421286031e-06, + "loss": 0.194, "step": 58 }, { - "epoch": 0.02620475238729736, - "grad_norm": 2.8924667859746926, - "learning_rate": 6.548279689234186e-07, - "loss": 0.2487, + "epoch": 0.052397868561278864, + "grad_norm": 1.1458896328589037, + "learning_rate": 1.3082039911308206e-06, + "loss": 0.1863, "step": 59 }, { - "epoch": 0.02664890073284477, - "grad_norm": 2.346290297240456, - "learning_rate": 6.659267480577137e-07, - "loss": 0.2496, + "epoch": 0.05328596802841918, + "grad_norm": 1.343280607139686, + "learning_rate": 1.3303769401330377e-06, + "loss": 0.1902, "step": 60 }, { - "epoch": 0.027093049078392183, - "grad_norm": 2.498409492992046, - "learning_rate": 6.77025527192009e-07, - "loss": 0.2728, + "epoch": 0.0541740674955595, + "grad_norm": 1.3741303683100614, + "learning_rate": 1.352549889135255e-06, + "loss": 0.2023, "step": 61 }, { - "epoch": 0.027537197423939595, - "grad_norm": 3.674582885160151, - "learning_rate": 6.881243063263042e-07, - "loss": 0.2383, + "epoch": 0.055062166962699825, + "grad_norm": 1.258896205353153, + "learning_rate": 1.3747228381374724e-06, + "loss": 0.2016, "step": 62 }, { - "epoch": 0.02798134576948701, - "grad_norm": 2.799425394175707, - "learning_rate": 6.992230854605994e-07, - "loss": 0.3199, + "epoch": 0.05595026642984014, + "grad_norm": 1.3943019068049274, + "learning_rate": 1.3968957871396897e-06, + "loss": 0.2135, "step": 63 }, { - "epoch": 0.028425494115034422, - "grad_norm": 2.4559947409849805, - "learning_rate": 7.103218645948946e-07, - "loss": 0.2693, + "epoch": 0.056838365896980464, + "grad_norm": 1.3471146628155388, + "learning_rate": 1.419068736141907e-06, + "loss": 0.1809, "step": 64 }, { - "epoch": 0.028869642460581834, - "grad_norm": 2.293048008449068, - "learning_rate": 7.214206437291898e-07, - "loss": 0.2519, + "epoch": 0.05772646536412078, + "grad_norm": 1.5701464198094832, + "learning_rate": 1.4412416851441243e-06, + "loss": 0.1786, "step": 65 }, { - "epoch": 0.029313790806129246, - "grad_norm": 2.442411284742022, - "learning_rate": 7.325194228634851e-07, - "loss": 0.2574, + "epoch": 0.0586145648312611, + "grad_norm": 1.371471043542019, + "learning_rate": 1.4634146341463414e-06, + "loss": 0.1986, "step": 66 }, { - "epoch": 0.02975793915167666, - "grad_norm": 1.9858437264830013, - "learning_rate": 7.436182019977803e-07, - "loss": 0.2059, + "epoch": 0.05950266429840142, + "grad_norm": 1.488399056738155, + "learning_rate": 1.485587583148559e-06, + "loss": 0.2069, "step": 67 }, { - "epoch": 0.030202087497224073, - "grad_norm": 2.561701423318645, - "learning_rate": 7.547169811320755e-07, - "loss": 0.2526, + "epoch": 0.06039076376554174, + "grad_norm": 1.1836349492010712, + "learning_rate": 1.5077605321507763e-06, + "loss": 0.1928, "step": 68 }, { - "epoch": 0.030646235842771485, - "grad_norm": 1.7160142067734883, - "learning_rate": 7.658157602663707e-07, - "loss": 0.2103, + "epoch": 0.06127886323268206, + "grad_norm": 1.5478134200389975, + "learning_rate": 1.5299334811529934e-06, + "loss": 0.183, "step": 69 }, { - "epoch": 0.031090384188318897, - "grad_norm": 3.0262045436370473, - "learning_rate": 7.769145394006659e-07, - "loss": 0.278, + "epoch": 0.06216696269982238, + "grad_norm": 1.2814053714377254, + "learning_rate": 1.5521064301552107e-06, + "loss": 0.2091, "step": 70 }, { - "epoch": 0.03153453253386631, - "grad_norm": 2.1467589387452786, - "learning_rate": 7.880133185349612e-07, - "loss": 0.2033, + "epoch": 0.0630550621669627, + "grad_norm": 1.1829355995137947, + "learning_rate": 1.5742793791574282e-06, + "loss": 0.1475, "step": 71 }, { - "epoch": 0.031978680879413725, - "grad_norm": 2.161876762654834, - "learning_rate": 7.991120976692566e-07, - "loss": 0.2319, + "epoch": 0.06394316163410302, + "grad_norm": 1.3941994415209034, + "learning_rate": 1.5964523281596453e-06, + "loss": 0.1432, "step": 72 }, { - "epoch": 0.03242282922496114, - "grad_norm": 2.376008135141836, - "learning_rate": 8.102108768035517e-07, - "loss": 0.2775, + "epoch": 0.06483126110124333, + "grad_norm": 1.2121269627847069, + "learning_rate": 1.6186252771618627e-06, + "loss": 0.1789, "step": 73 }, { - "epoch": 0.03286697757050855, - "grad_norm": 2.12961751744709, - "learning_rate": 8.213096559378469e-07, - "loss": 0.2066, + "epoch": 0.06571936056838366, + "grad_norm": 1.3375032889817995, + "learning_rate": 1.6407982261640802e-06, + "loss": 0.1879, "step": 74 }, { - "epoch": 0.033311125916055964, - "grad_norm": 2.087503322703807, - "learning_rate": 8.324084350721422e-07, - "loss": 0.2432, + "epoch": 0.06660746003552398, + "grad_norm": 1.1029253885450885, + "learning_rate": 1.6629711751662973e-06, + "loss": 0.1255, "step": 75 }, { - "epoch": 0.03375527426160337, - "grad_norm": 2.5677509345382354, - "learning_rate": 8.435072142064374e-07, - "loss": 0.2027, + "epoch": 0.0674955595026643, + "grad_norm": 1.3094518409858786, + "learning_rate": 1.6851441241685146e-06, + "loss": 0.1831, "step": 76 }, { - "epoch": 0.03419942260715079, - "grad_norm": 1.7422813109091475, - "learning_rate": 8.546059933407326e-07, - "loss": 0.1981, + "epoch": 0.06838365896980461, + "grad_norm": 1.5769224156692587, + "learning_rate": 1.707317073170732e-06, + "loss": 0.1672, "step": 77 }, { - "epoch": 0.034643570952698204, - "grad_norm": 1.9135429042700127, - "learning_rate": 8.657047724750278e-07, - "loss": 0.1914, + "epoch": 0.06927175843694494, + "grad_norm": 1.2775816415734151, + "learning_rate": 1.729490022172949e-06, + "loss": 0.1735, "step": 78 }, { - "epoch": 0.03508771929824561, - "grad_norm": 2.0631119197996357, - "learning_rate": 8.768035516093231e-07, - "loss": 0.2247, + "epoch": 0.07015985790408526, + "grad_norm": 0.9763817638147934, + "learning_rate": 1.7516629711751666e-06, + "loss": 0.1344, "step": 79 }, { - "epoch": 0.03553186764379303, - "grad_norm": 2.3180619400309452, - "learning_rate": 8.879023307436183e-07, - "loss": 0.2491, + "epoch": 0.07104795737122557, + "grad_norm": 1.201528948127603, + "learning_rate": 1.7738359201773839e-06, + "loss": 0.1485, "step": 80 }, { - "epoch": 0.03597601598934044, - "grad_norm": 1.9425485940179281, - "learning_rate": 8.990011098779134e-07, - "loss": 0.2187, + "epoch": 0.0719360568383659, + "grad_norm": 1.1447121851778421, + "learning_rate": 1.796008869179601e-06, + "loss": 0.1699, "step": 81 }, { - "epoch": 0.03642016433488785, - "grad_norm": 1.8295253723979448, - "learning_rate": 9.100998890122087e-07, - "loss": 0.2012, + "epoch": 0.07282415630550622, + "grad_norm": 1.05293863502435, + "learning_rate": 1.8181818181818183e-06, + "loss": 0.1395, "step": 82 }, { - "epoch": 0.03686431268043527, - "grad_norm": 1.9729843745153348, - "learning_rate": 9.211986681465039e-07, - "loss": 0.209, + "epoch": 0.07371225577264653, + "grad_norm": 1.3517411214685857, + "learning_rate": 1.8403547671840354e-06, + "loss": 0.1844, "step": 83 }, { - "epoch": 0.037308461025982675, - "grad_norm": 2.037422711387131, - "learning_rate": 9.322974472807992e-07, - "loss": 0.2094, + "epoch": 0.07460035523978685, + "grad_norm": 1.7577841297827297, + "learning_rate": 1.862527716186253e-06, + "loss": 0.1779, "step": 84 }, { - "epoch": 0.03775260937153009, - "grad_norm": 1.577701916798112, - "learning_rate": 9.433962264150944e-07, - "loss": 0.1858, + "epoch": 0.07548845470692718, + "grad_norm": 1.1023377599109887, + "learning_rate": 1.8847006651884702e-06, + "loss": 0.1633, "step": 85 }, { - "epoch": 0.038196757717077506, - "grad_norm": 2.139670658650865, - "learning_rate": 9.544950055493897e-07, - "loss": 0.2254, + "epoch": 0.0763765541740675, + "grad_norm": 1.1472821024712776, + "learning_rate": 1.9068736141906873e-06, + "loss": 0.1413, "step": 86 }, { - "epoch": 0.038640906062624915, - "grad_norm": 2.8502450993225956, - "learning_rate": 9.65593784683685e-07, - "loss": 0.2145, + "epoch": 0.07726465364120781, + "grad_norm": 1.1520998196670071, + "learning_rate": 1.9290465631929047e-06, + "loss": 0.1801, "step": 87 }, { - "epoch": 0.03908505440817233, - "grad_norm": 1.7436844528376316, - "learning_rate": 9.7669256381798e-07, - "loss": 0.1927, + "epoch": 0.07815275310834814, + "grad_norm": 1.2441514098841544, + "learning_rate": 1.951219512195122e-06, + "loss": 0.16, "step": 88 }, { - "epoch": 0.039529202753719746, - "grad_norm": 2.2129521928377796, - "learning_rate": 9.877913429522753e-07, - "loss": 0.2449, + "epoch": 0.07904085257548846, + "grad_norm": 1.475416128749588, + "learning_rate": 1.9733924611973393e-06, + "loss": 0.1493, "step": 89 }, { - "epoch": 0.039973351099267154, - "grad_norm": 1.7063140977463, - "learning_rate": 9.988901220865706e-07, - "loss": 0.2002, + "epoch": 0.07992895204262877, + "grad_norm": 1.5111225554836951, + "learning_rate": 1.995565410199557e-06, + "loss": 0.1733, "step": 90 }, { - "epoch": 0.04041749944481457, - "grad_norm": 1.7274557393115937, - "learning_rate": 1.009988901220866e-06, - "loss": 0.2143, + "epoch": 0.08081705150976909, + "grad_norm": 1.3681513326820232, + "learning_rate": 2.017738359201774e-06, + "loss": 0.1695, "step": 91 }, { - "epoch": 0.04086164779036198, - "grad_norm": 1.6036310126204065, - "learning_rate": 1.021087680355161e-06, - "loss": 0.209, + "epoch": 0.08170515097690942, + "grad_norm": 1.4294052270061006, + "learning_rate": 2.039911308203991e-06, + "loss": 0.1545, "step": 92 }, { - "epoch": 0.041305796135909394, - "grad_norm": 1.6067518053520686, - "learning_rate": 1.0321864594894562e-06, - "loss": 0.2051, + "epoch": 0.08259325044404973, + "grad_norm": 1.1951778622057059, + "learning_rate": 2.0620842572062086e-06, + "loss": 0.1517, "step": 93 }, { - "epoch": 0.04174994448145681, - "grad_norm": 2.193990373798874, - "learning_rate": 1.0432852386237515e-06, - "loss": 0.2124, + "epoch": 0.08348134991119005, + "grad_norm": 1.4048018125143116, + "learning_rate": 2.084257206208426e-06, + "loss": 0.1379, "step": 94 }, { - "epoch": 0.04219409282700422, - "grad_norm": 2.0886683470736536, - "learning_rate": 1.0543840177580466e-06, - "loss": 0.2283, + "epoch": 0.08436944937833037, + "grad_norm": 1.2064909876304688, + "learning_rate": 2.106430155210643e-06, + "loss": 0.1539, "step": 95 }, { - "epoch": 0.04263824117255163, - "grad_norm": 2.111784187748928, - "learning_rate": 1.0654827968923419e-06, - "loss": 0.2035, + "epoch": 0.0852575488454707, + "grad_norm": 1.2323744596339212, + "learning_rate": 2.1286031042128603e-06, + "loss": 0.1438, "step": 96 }, { - "epoch": 0.04308238951809905, - "grad_norm": 2.082762904011713, - "learning_rate": 1.0765815760266371e-06, - "loss": 0.1979, + "epoch": 0.08614564831261101, + "grad_norm": 1.1549888394827348, + "learning_rate": 2.150776053215078e-06, + "loss": 0.1554, "step": 97 }, { - "epoch": 0.04352653786364646, - "grad_norm": 1.9566752129472822, - "learning_rate": 1.0876803551609324e-06, - "loss": 0.2146, + "epoch": 0.08703374777975133, + "grad_norm": 1.2476661230334434, + "learning_rate": 2.172949002217295e-06, + "loss": 0.1574, "step": 98 }, { - "epoch": 0.04397068620919387, - "grad_norm": 1.553353783765945, - "learning_rate": 1.0987791342952277e-06, - "loss": 0.1682, + "epoch": 0.08792184724689166, + "grad_norm": 1.5260144602597139, + "learning_rate": 2.1951219512195125e-06, + "loss": 0.1605, "step": 99 }, { - "epoch": 0.04441483455474128, - "grad_norm": 1.9387075500236326, - "learning_rate": 1.109877913429523e-06, - "loss": 0.1792, + "epoch": 0.08880994671403197, + "grad_norm": 1.347374395733047, + "learning_rate": 2.2172949002217296e-06, + "loss": 0.155, "step": 100 }, { - "epoch": 0.044858982900288696, - "grad_norm": 1.980937561976891, - "learning_rate": 1.120976692563818e-06, - "loss": 0.1896, + "epoch": 0.08969804618117229, + "grad_norm": 1.0555188775346487, + "learning_rate": 2.2394678492239467e-06, + "loss": 0.139, "step": 101 }, { - "epoch": 0.04530313124583611, - "grad_norm": 2.4054051662002824, - "learning_rate": 1.1320754716981133e-06, - "loss": 0.2097, + "epoch": 0.0905861456483126, + "grad_norm": 1.367331017367306, + "learning_rate": 2.261640798226164e-06, + "loss": 0.1449, "step": 102 }, { - "epoch": 0.04574727959138352, - "grad_norm": 1.8504294534254195, - "learning_rate": 1.1431742508324086e-06, - "loss": 0.1795, + "epoch": 0.09147424511545293, + "grad_norm": 1.3152545619381775, + "learning_rate": 2.2838137472283817e-06, + "loss": 0.1585, "step": 103 }, { - "epoch": 0.046191427936930936, - "grad_norm": 1.8842730303029716, - "learning_rate": 1.1542730299667037e-06, - "loss": 0.1883, + "epoch": 0.09236234458259325, + "grad_norm": 1.0647690513712063, + "learning_rate": 2.305986696230599e-06, + "loss": 0.1325, "step": 104 }, { - "epoch": 0.046635576282478344, - "grad_norm": 1.5610944241824418, - "learning_rate": 1.165371809100999e-06, - "loss": 0.1655, + "epoch": 0.09325044404973357, + "grad_norm": 1.5394023192423911, + "learning_rate": 2.3281596452328164e-06, + "loss": 0.1351, "step": 105 }, { - "epoch": 0.04707972462802576, - "grad_norm": 1.5819842411294622, - "learning_rate": 1.1764705882352942e-06, - "loss": 0.1811, + "epoch": 0.0941385435168739, + "grad_norm": 1.4071751363116185, + "learning_rate": 2.3503325942350335e-06, + "loss": 0.1368, "step": 106 }, { - "epoch": 0.047523872973573175, - "grad_norm": 1.9979272057896553, - "learning_rate": 1.1875693673695895e-06, - "loss": 0.1823, + "epoch": 0.09502664298401421, + "grad_norm": 1.0560128882683737, + "learning_rate": 2.3725055432372506e-06, + "loss": 0.1371, "step": 107 }, { - "epoch": 0.047968021319120584, - "grad_norm": 1.9458215900555198, - "learning_rate": 1.1986681465038846e-06, - "loss": 0.2136, + "epoch": 0.09591474245115453, + "grad_norm": 1.11181193435597, + "learning_rate": 2.394678492239468e-06, + "loss": 0.1649, "step": 108 }, { - "epoch": 0.048412169664668, - "grad_norm": 1.8191486796264364, - "learning_rate": 1.2097669256381799e-06, - "loss": 0.2082, + "epoch": 0.09680284191829484, + "grad_norm": 1.241176938087842, + "learning_rate": 2.4168514412416856e-06, + "loss": 0.1645, "step": 109 }, { - "epoch": 0.048856318010215415, - "grad_norm": 1.9290831624594054, - "learning_rate": 1.2208657047724751e-06, - "loss": 0.1731, + "epoch": 0.09769094138543517, + "grad_norm": 1.40617304043457, + "learning_rate": 2.4390243902439027e-06, + "loss": 0.1468, "step": 110 }, { - "epoch": 0.04930046635576282, - "grad_norm": 2.1157513592152233, - "learning_rate": 1.2319644839067704e-06, - "loss": 0.1883, + "epoch": 0.09857904085257549, + "grad_norm": 1.034869455847088, + "learning_rate": 2.46119733924612e-06, + "loss": 0.1333, "step": 111 }, { - "epoch": 0.04974461470131024, - "grad_norm": 2.0021290506932026, - "learning_rate": 1.2430632630410657e-06, - "loss": 0.1906, + "epoch": 0.0994671403197158, + "grad_norm": 1.1404231888742273, + "learning_rate": 2.4833702882483374e-06, + "loss": 0.1436, "step": 112 }, { - "epoch": 0.05018876304685765, - "grad_norm": 1.9159228327922788, - "learning_rate": 1.254162042175361e-06, - "loss": 0.2176, + "epoch": 0.10035523978685613, + "grad_norm": 1.0366648839999737, + "learning_rate": 2.5055432372505545e-06, + "loss": 0.1557, "step": 113 }, { - "epoch": 0.05063291139240506, - "grad_norm": 1.7650876352312228, - "learning_rate": 1.265260821309656e-06, - "loss": 0.1718, + "epoch": 0.10124333925399645, + "grad_norm": 1.4059313118048324, + "learning_rate": 2.5277161862527716e-06, + "loss": 0.1499, "step": 114 }, { - "epoch": 0.05107705973795248, - "grad_norm": 2.03898072034729, - "learning_rate": 1.2763596004439513e-06, - "loss": 0.1898, + "epoch": 0.10213143872113677, + "grad_norm": 2.3968727543849626, + "learning_rate": 2.549889135254989e-06, + "loss": 0.1517, "step": 115 }, { - "epoch": 0.051521208083499886, - "grad_norm": 1.6693420710972704, - "learning_rate": 1.2874583795782464e-06, - "loss": 0.1759, + "epoch": 0.10301953818827708, + "grad_norm": 1.339768764551887, + "learning_rate": 2.5720620842572062e-06, + "loss": 0.1321, "step": 116 }, { - "epoch": 0.0519653564290473, - "grad_norm": 2.335357803271554, - "learning_rate": 1.2985571587125417e-06, - "loss": 0.1837, + "epoch": 0.10390763765541741, + "grad_norm": 1.4530407107366679, + "learning_rate": 2.5942350332594233e-06, + "loss": 0.1197, "step": 117 }, { - "epoch": 0.05240950477459472, - "grad_norm": 1.682419177219224, - "learning_rate": 1.3096559378468371e-06, - "loss": 0.1808, + "epoch": 0.10479573712255773, + "grad_norm": 1.5225116947078698, + "learning_rate": 2.6164079822616413e-06, + "loss": 0.133, "step": 118 }, { - "epoch": 0.052853653120142126, - "grad_norm": 2.0519984686691126, - "learning_rate": 1.3207547169811322e-06, - "loss": 0.1982, + "epoch": 0.10568383658969804, + "grad_norm": 1.1686399652945325, + "learning_rate": 2.6385809312638584e-06, + "loss": 0.1354, "step": 119 }, { - "epoch": 0.05329780146568954, - "grad_norm": 1.6368796114014708, - "learning_rate": 1.3318534961154275e-06, - "loss": 0.1758, + "epoch": 0.10657193605683836, + "grad_norm": 1.6020618834435372, + "learning_rate": 2.6607538802660755e-06, + "loss": 0.1367, "step": 120 }, { - "epoch": 0.05374194981123695, - "grad_norm": 1.6286801352711888, - "learning_rate": 1.3429522752497226e-06, - "loss": 0.1936, + "epoch": 0.10746003552397869, + "grad_norm": 1.1988715520142583, + "learning_rate": 2.682926829268293e-06, + "loss": 0.1403, "step": 121 }, { - "epoch": 0.054186098156784365, - "grad_norm": 1.8633084178414114, - "learning_rate": 1.354051054384018e-06, - "loss": 0.202, + "epoch": 0.108348134991119, + "grad_norm": 1.0364273812267792, + "learning_rate": 2.70509977827051e-06, + "loss": 0.1318, "step": 122 }, { - "epoch": 0.05463024650233178, - "grad_norm": 1.9496618888186672, - "learning_rate": 1.3651498335183131e-06, - "loss": 0.2025, + "epoch": 0.10923623445825932, + "grad_norm": 1.1057236878883572, + "learning_rate": 2.7272727272727272e-06, + "loss": 0.1392, "step": 123 }, { - "epoch": 0.05507439484787919, - "grad_norm": 2.0859989784182353, - "learning_rate": 1.3762486126526084e-06, - "loss": 0.1903, + "epoch": 0.11012433392539965, + "grad_norm": 1.1277933112704228, + "learning_rate": 2.7494456762749448e-06, + "loss": 0.1256, "step": 124 }, { - "epoch": 0.055518543193426605, - "grad_norm": 1.993272387486733, - "learning_rate": 1.3873473917869035e-06, - "loss": 0.2123, + "epoch": 0.11101243339253997, + "grad_norm": 1.0659897014429838, + "learning_rate": 2.7716186252771623e-06, + "loss": 0.1248, "step": 125 }, { - "epoch": 0.05596269153897402, - "grad_norm": 2.2645503131575735, - "learning_rate": 1.3984461709211987e-06, - "loss": 0.2069, + "epoch": 0.11190053285968028, + "grad_norm": 1.186164779320272, + "learning_rate": 2.7937915742793794e-06, + "loss": 0.116, "step": 126 }, { - "epoch": 0.05640683988452143, - "grad_norm": 1.581545113462678, - "learning_rate": 1.409544950055494e-06, - "loss": 0.1804, + "epoch": 0.1127886323268206, + "grad_norm": 1.6226791741262894, + "learning_rate": 2.815964523281597e-06, + "loss": 0.126, "step": 127 }, { - "epoch": 0.056850988230068844, - "grad_norm": 1.9654703741649047, - "learning_rate": 1.4206437291897893e-06, - "loss": 0.1625, + "epoch": 0.11367673179396093, + "grad_norm": 1.0482165891480628, + "learning_rate": 2.838137472283814e-06, + "loss": 0.1207, "step": 128 }, { - "epoch": 0.05729513657561625, - "grad_norm": 2.452801004200468, - "learning_rate": 1.4317425083240844e-06, - "loss": 0.1779, + "epoch": 0.11456483126110124, + "grad_norm": 1.358096964021043, + "learning_rate": 2.860310421286031e-06, + "loss": 0.1558, "step": 129 }, { - "epoch": 0.05773928492116367, - "grad_norm": 1.847246271423822, - "learning_rate": 1.4428412874583796e-06, - "loss": 0.1749, + "epoch": 0.11545293072824156, + "grad_norm": 1.9653651902270786, + "learning_rate": 2.8824833702882487e-06, + "loss": 0.1464, "step": 130 }, { - "epoch": 0.05818343326671108, - "grad_norm": 1.9357616103684274, - "learning_rate": 1.4539400665926751e-06, - "loss": 0.2213, + "epoch": 0.11634103019538189, + "grad_norm": 0.8888278900485693, + "learning_rate": 2.9046563192904658e-06, + "loss": 0.114, "step": 131 }, { - "epoch": 0.05862758161225849, - "grad_norm": 1.495082490211886, - "learning_rate": 1.4650388457269702e-06, - "loss": 0.1579, + "epoch": 0.1172291296625222, + "grad_norm": 1.0600150312970615, + "learning_rate": 2.926829268292683e-06, + "loss": 0.1442, "step": 132 }, { - "epoch": 0.05907172995780591, - "grad_norm": 1.5859431774202346, - "learning_rate": 1.4761376248612655e-06, - "loss": 0.1606, + "epoch": 0.11811722912966252, + "grad_norm": 2.3980686456454072, + "learning_rate": 2.949002217294901e-06, + "loss": 0.1531, "step": 133 }, { - "epoch": 0.05951587830335332, - "grad_norm": 2.8376409272544687, - "learning_rate": 1.4872364039955605e-06, - "loss": 0.2481, + "epoch": 0.11900532859680284, + "grad_norm": 1.537750142292643, + "learning_rate": 2.971175166297118e-06, + "loss": 0.136, "step": 134 }, { - "epoch": 0.05996002664890073, - "grad_norm": 1.7612190558454792, - "learning_rate": 1.498335183129856e-06, - "loss": 0.1637, + "epoch": 0.11989342806394317, + "grad_norm": 1.1173719039790706, + "learning_rate": 2.993348115299335e-06, + "loss": 0.1227, "step": 135 }, { - "epoch": 0.06040417499444815, - "grad_norm": 2.070041455462641, - "learning_rate": 1.509433962264151e-06, - "loss": 0.2122, + "epoch": 0.12078152753108348, + "grad_norm": 1.2564664316562757, + "learning_rate": 3.0155210643015526e-06, + "loss": 0.1103, "step": 136 }, { - "epoch": 0.060848323339995555, - "grad_norm": 2.3780145580102627, - "learning_rate": 1.5205327413984464e-06, - "loss": 0.186, + "epoch": 0.1216696269982238, + "grad_norm": 1.015330802209704, + "learning_rate": 3.0376940133037697e-06, + "loss": 0.1249, "step": 137 }, { - "epoch": 0.06129247168554297, - "grad_norm": 1.8658905992142587, - "learning_rate": 1.5316315205327414e-06, - "loss": 0.1673, + "epoch": 0.12255772646536411, + "grad_norm": 0.9052921105923755, + "learning_rate": 3.0598669623059868e-06, + "loss": 0.1065, "step": 138 }, { - "epoch": 0.061736620031090386, - "grad_norm": 2.8172930803107077, - "learning_rate": 1.5427302996670367e-06, - "loss": 0.244, + "epoch": 0.12344582593250444, + "grad_norm": 1.554369007870476, + "learning_rate": 3.0820399113082043e-06, + "loss": 0.1253, "step": 139 }, { - "epoch": 0.062180768376637795, - "grad_norm": 1.6725142692022914, - "learning_rate": 1.5538290788013318e-06, - "loss": 0.157, + "epoch": 0.12433392539964476, + "grad_norm": 1.0994838247729177, + "learning_rate": 3.1042128603104214e-06, + "loss": 0.1268, "step": 140 }, { - "epoch": 0.0626249167221852, - "grad_norm": 1.392484954569343, - "learning_rate": 1.5649278579356273e-06, - "loss": 0.1558, + "epoch": 0.12522202486678508, + "grad_norm": 1.289812156765464, + "learning_rate": 3.1263858093126385e-06, + "loss": 0.1316, "step": 141 }, { - "epoch": 0.06306906506773262, - "grad_norm": 1.4158061250880813, - "learning_rate": 1.5760266370699223e-06, - "loss": 0.148, + "epoch": 0.1261101243339254, + "grad_norm": 1.117530196743946, + "learning_rate": 3.1485587583148565e-06, + "loss": 0.1205, "step": 142 }, { - "epoch": 0.06351321341328003, - "grad_norm": 1.9695160587869651, - "learning_rate": 1.5871254162042176e-06, - "loss": 0.1468, + "epoch": 0.1269982238010657, + "grad_norm": 1.4963202257356378, + "learning_rate": 3.1707317073170736e-06, + "loss": 0.1282, "step": 143 }, { - "epoch": 0.06395736175882745, - "grad_norm": 1.6387389663448713, - "learning_rate": 1.5982241953385131e-06, - "loss": 0.1363, + "epoch": 0.12788632326820604, + "grad_norm": 1.2143329167513737, + "learning_rate": 3.1929046563192907e-06, + "loss": 0.1268, "step": 144 }, { - "epoch": 0.06440151010437487, - "grad_norm": 1.977141381047165, - "learning_rate": 1.6093229744728082e-06, - "loss": 0.1775, + "epoch": 0.12877442273534637, + "grad_norm": 1.0842233342893233, + "learning_rate": 3.215077605321508e-06, + "loss": 0.1186, "step": 145 }, { - "epoch": 0.06484565844992228, - "grad_norm": 1.866914454872281, - "learning_rate": 1.6204217536071035e-06, - "loss": 0.1676, + "epoch": 0.12966252220248667, + "grad_norm": 1.7013265505553796, + "learning_rate": 3.2372505543237253e-06, + "loss": 0.1118, "step": 146 }, { - "epoch": 0.06528980679546968, - "grad_norm": 1.8479866982339428, - "learning_rate": 1.6315205327413985e-06, - "loss": 0.1795, + "epoch": 0.130550621669627, + "grad_norm": 1.6188620303249415, + "learning_rate": 3.2594235033259424e-06, + "loss": 0.1589, "step": 147 }, { - "epoch": 0.0657339551410171, - "grad_norm": 2.447758458580566, - "learning_rate": 1.6426193118756938e-06, - "loss": 0.1921, + "epoch": 0.13143872113676733, + "grad_norm": 1.0009419982608598, + "learning_rate": 3.2815964523281604e-06, + "loss": 0.1173, "step": 148 }, { - "epoch": 0.06617810348656451, - "grad_norm": 1.928627100482751, - "learning_rate": 1.653718091009989e-06, - "loss": 0.1236, + "epoch": 0.13232682060390763, + "grad_norm": 1.02275400737328, + "learning_rate": 3.3037694013303775e-06, + "loss": 0.1191, "step": 149 }, { - "epoch": 0.06662225183211193, - "grad_norm": 2.496031778783847, - "learning_rate": 1.6648168701442844e-06, - "loss": 0.1263, + "epoch": 0.13321492007104796, + "grad_norm": 1.0442081136592782, + "learning_rate": 3.3259423503325946e-06, + "loss": 0.1425, "step": 150 }, { - "epoch": 0.06706640017765934, - "grad_norm": 2.543270527791031, - "learning_rate": 1.6759156492785794e-06, - "loss": 0.1798, + "epoch": 0.1341030195381883, + "grad_norm": 1.075564124443683, + "learning_rate": 3.348115299334812e-06, + "loss": 0.0982, "step": 151 }, { - "epoch": 0.06751054852320675, - "grad_norm": 2.6478561509220637, - "learning_rate": 1.6870144284128747e-06, - "loss": 0.1976, + "epoch": 0.1349911190053286, + "grad_norm": 1.064389736484248, + "learning_rate": 3.370288248337029e-06, + "loss": 0.1211, "step": 152 }, { - "epoch": 0.06795469686875416, - "grad_norm": 2.046113685099785, - "learning_rate": 1.6981132075471698e-06, - "loss": 0.1412, + "epoch": 0.13587921847246892, + "grad_norm": 1.0766274819279984, + "learning_rate": 3.3924611973392463e-06, + "loss": 0.1267, "step": 153 }, { - "epoch": 0.06839884521430158, - "grad_norm": 4.398993310024598, - "learning_rate": 1.7092119866814653e-06, - "loss": 0.2004, + "epoch": 0.13676731793960922, + "grad_norm": 0.9372935900244141, + "learning_rate": 3.414634146341464e-06, + "loss": 0.1287, "step": 154 }, { - "epoch": 0.06884299355984899, - "grad_norm": 1.7977539055103122, - "learning_rate": 1.7203107658157603e-06, - "loss": 0.1803, + "epoch": 0.13765541740674955, + "grad_norm": 0.9947383850408118, + "learning_rate": 3.436807095343681e-06, + "loss": 0.1192, "step": 155 }, { - "epoch": 0.06928714190539641, - "grad_norm": 1.9000289734288398, - "learning_rate": 1.7314095449500556e-06, - "loss": 0.1725, + "epoch": 0.13854351687388988, + "grad_norm": 1.1107344059739117, + "learning_rate": 3.458980044345898e-06, + "loss": 0.1169, "step": 156 }, { - "epoch": 0.06973129025094381, - "grad_norm": 2.8300001805659956, - "learning_rate": 1.742508324084351e-06, - "loss": 0.1396, + "epoch": 0.13943161634103018, + "grad_norm": 1.029883982289853, + "learning_rate": 3.481152993348116e-06, + "loss": 0.1256, "step": 157 }, { - "epoch": 0.07017543859649122, - "grad_norm": 1.6221647641516457, - "learning_rate": 1.7536071032186462e-06, - "loss": 0.1377, + "epoch": 0.14031971580817051, + "grad_norm": 1.1727622617313658, + "learning_rate": 3.503325942350333e-06, + "loss": 0.136, "step": 158 }, { - "epoch": 0.07061958694203864, - "grad_norm": 1.8070988364351561, - "learning_rate": 1.7647058823529414e-06, - "loss": 0.1466, + "epoch": 0.14120781527531084, + "grad_norm": 0.8288001336590112, + "learning_rate": 3.52549889135255e-06, + "loss": 0.1064, "step": 159 }, { - "epoch": 0.07106373528758606, - "grad_norm": 1.522016454575872, - "learning_rate": 1.7758046614872365e-06, - "loss": 0.1508, + "epoch": 0.14209591474245115, + "grad_norm": 1.210344473508419, + "learning_rate": 3.5476718403547677e-06, + "loss": 0.1321, "step": 160 }, { - "epoch": 0.07150788363313347, - "grad_norm": 2.5966599384816393, - "learning_rate": 1.7869034406215318e-06, - "loss": 0.1946, + "epoch": 0.14298401420959148, + "grad_norm": 1.0298292706998382, + "learning_rate": 3.569844789356985e-06, + "loss": 0.1063, "step": 161 }, { - "epoch": 0.07195203197868089, - "grad_norm": 2.055913601931369, - "learning_rate": 1.7980022197558269e-06, - "loss": 0.1485, + "epoch": 0.1438721136767318, + "grad_norm": 1.1291599864754656, + "learning_rate": 3.592017738359202e-06, + "loss": 0.1247, "step": 162 }, { - "epoch": 0.07239618032422829, - "grad_norm": 1.716589810835508, - "learning_rate": 1.8091009988901223e-06, - "loss": 0.1552, + "epoch": 0.1447602131438721, + "grad_norm": 1.1019933938699389, + "learning_rate": 3.614190687361419e-06, + "loss": 0.1377, "step": 163 }, { - "epoch": 0.0728403286697757, - "grad_norm": 1.324188540188503, - "learning_rate": 1.8201997780244174e-06, - "loss": 0.1217, + "epoch": 0.14564831261101244, + "grad_norm": 0.8189641520937786, + "learning_rate": 3.6363636363636366e-06, + "loss": 0.0991, "step": 164 }, { - "epoch": 0.07328447701532312, - "grad_norm": 1.7875490213167293, - "learning_rate": 1.8312985571587127e-06, - "loss": 0.1814, + "epoch": 0.14653641207815277, + "grad_norm": 1.5124026105918429, + "learning_rate": 3.6585365853658537e-06, + "loss": 0.1348, "step": 165 }, { - "epoch": 0.07372862536087053, - "grad_norm": 2.111076485427071, - "learning_rate": 1.8423973362930078e-06, - "loss": 0.1858, + "epoch": 0.14742451154529307, + "grad_norm": 1.2711556676869127, + "learning_rate": 3.680709534368071e-06, + "loss": 0.1206, "step": 166 }, { - "epoch": 0.07417277370641795, - "grad_norm": 1.8246301721111953, - "learning_rate": 1.8534961154273032e-06, - "loss": 0.1328, + "epoch": 0.1483126110124334, + "grad_norm": 0.7604195531895337, + "learning_rate": 3.7028824833702887e-06, + "loss": 0.0998, "step": 167 }, { - "epoch": 0.07461692205196535, - "grad_norm": 7.017994935548065, - "learning_rate": 1.8645948945615983e-06, - "loss": 0.2175, + "epoch": 0.1492007104795737, + "grad_norm": 0.901217436396349, + "learning_rate": 3.725055432372506e-06, + "loss": 0.1047, "step": 168 }, { - "epoch": 0.07506107039751277, - "grad_norm": 1.8833148111605775, - "learning_rate": 1.8756936736958936e-06, - "loss": 0.169, + "epoch": 0.15008880994671403, + "grad_norm": 1.3122925202426214, + "learning_rate": 3.747228381374723e-06, + "loss": 0.1368, "step": 169 }, { - "epoch": 0.07550521874306018, - "grad_norm": 2.1063106621517833, - "learning_rate": 1.8867924528301889e-06, - "loss": 0.1723, + "epoch": 0.15097690941385436, + "grad_norm": 1.1806821018774798, + "learning_rate": 3.7694013303769405e-06, + "loss": 0.111, "step": 170 }, { - "epoch": 0.0759493670886076, - "grad_norm": 2.1080508911518603, - "learning_rate": 1.8978912319644842e-06, - "loss": 0.1313, + "epoch": 0.15186500888099466, + "grad_norm": 1.220431749996127, + "learning_rate": 3.7915742793791576e-06, + "loss": 0.1558, "step": 171 }, { - "epoch": 0.07639351543415501, - "grad_norm": 2.042694432552121, - "learning_rate": 1.9089900110987794e-06, - "loss": 0.1561, + "epoch": 0.152753108348135, + "grad_norm": 1.027739869330392, + "learning_rate": 3.8137472283813747e-06, + "loss": 0.1261, "step": 172 }, { - "epoch": 0.07683766377970241, - "grad_norm": 2.275654963885769, - "learning_rate": 1.9200887902330745e-06, - "loss": 0.2056, + "epoch": 0.15364120781527532, + "grad_norm": 0.9958834179246624, + "learning_rate": 3.835920177383592e-06, + "loss": 0.1034, "step": 173 }, { - "epoch": 0.07728181212524983, - "grad_norm": 1.4521350560986936, - "learning_rate": 1.93118756936737e-06, - "loss": 0.1539, + "epoch": 0.15452930728241562, + "grad_norm": 0.9734421309270681, + "learning_rate": 3.858093126385809e-06, + "loss": 0.1164, "step": 174 }, { - "epoch": 0.07772596047079725, - "grad_norm": 1.4721774252433903, - "learning_rate": 1.942286348501665e-06, - "loss": 0.1392, + "epoch": 0.15541740674955595, + "grad_norm": 1.1663068744058451, + "learning_rate": 3.8802660753880264e-06, + "loss": 0.1232, "step": 175 }, { - "epoch": 0.07817010881634466, - "grad_norm": 2.236429259701826, - "learning_rate": 1.95338512763596e-06, - "loss": 0.1851, + "epoch": 0.15630550621669628, + "grad_norm": 1.2152791890173018, + "learning_rate": 3.902439024390244e-06, + "loss": 0.1307, "step": 176 }, { - "epoch": 0.07861425716189208, - "grad_norm": 2.0509681089981973, - "learning_rate": 1.964483906770255e-06, - "loss": 0.1613, + "epoch": 0.15719360568383658, + "grad_norm": 1.1930092764069682, + "learning_rate": 3.9246119733924615e-06, + "loss": 0.12, "step": 177 }, { - "epoch": 0.07905840550743949, - "grad_norm": 1.921297270021361, - "learning_rate": 1.9755826859045507e-06, - "loss": 0.1411, + "epoch": 0.15808170515097691, + "grad_norm": 1.4229478314887962, + "learning_rate": 3.946784922394679e-06, + "loss": 0.1139, "step": 178 }, { - "epoch": 0.07950255385298689, - "grad_norm": 2.3334407767674534, - "learning_rate": 1.9866814650388457e-06, - "loss": 0.1673, + "epoch": 0.15896980461811722, + "grad_norm": 1.1541778733556443, + "learning_rate": 3.9689578713968965e-06, + "loss": 0.1493, "step": 179 }, { - "epoch": 0.07994670219853431, - "grad_norm": 2.4173674731585035, - "learning_rate": 1.9977802441731412e-06, - "loss": 0.1769, + "epoch": 0.15985790408525755, + "grad_norm": 0.8900810526022614, + "learning_rate": 3.991130820399114e-06, + "loss": 0.0989, "step": 180 }, { - "epoch": 0.08039085054408172, - "grad_norm": 2.672814520854723, - "learning_rate": 2.0088790233074363e-06, - "loss": 0.1882, + "epoch": 0.16074600355239788, + "grad_norm": 0.9862154082139323, + "learning_rate": 4.013303769401331e-06, + "loss": 0.1066, "step": 181 }, { - "epoch": 0.08083499888962914, - "grad_norm": 2.082354232015489, - "learning_rate": 2.019977802441732e-06, - "loss": 0.1594, + "epoch": 0.16163410301953818, + "grad_norm": 1.0129924983516876, + "learning_rate": 4.035476718403548e-06, + "loss": 0.1246, "step": 182 }, { - "epoch": 0.08127914723517655, - "grad_norm": 2.0548698535266965, - "learning_rate": 2.031076581576027e-06, - "loss": 0.162, + "epoch": 0.1625222024866785, + "grad_norm": 0.8334997069061231, + "learning_rate": 4.057649667405765e-06, + "loss": 0.1113, "step": 183 }, { - "epoch": 0.08172329558072396, - "grad_norm": 2.352430010700108, - "learning_rate": 2.042175360710322e-06, - "loss": 0.1429, + "epoch": 0.16341030195381884, + "grad_norm": 1.3871494257343306, + "learning_rate": 4.079822616407982e-06, + "loss": 0.1127, "step": 184 }, { - "epoch": 0.08216744392627137, - "grad_norm": 1.8421875981209739, - "learning_rate": 2.0532741398446174e-06, - "loss": 0.1575, + "epoch": 0.16429840142095914, + "grad_norm": 1.3181794682195818, + "learning_rate": 4.1019955654102e-06, + "loss": 0.1345, "step": 185 }, { - "epoch": 0.08261159227181879, - "grad_norm": 1.8037755953700159, - "learning_rate": 2.0643729189789125e-06, - "loss": 0.1474, + "epoch": 0.16518650088809947, + "grad_norm": 0.986271734827267, + "learning_rate": 4.124168514412417e-06, + "loss": 0.1154, "step": 186 }, { - "epoch": 0.0830557406173662, - "grad_norm": 2.2301663182454887, - "learning_rate": 2.075471698113208e-06, - "loss": 0.1381, + "epoch": 0.1660746003552398, + "grad_norm": 0.896943232410562, + "learning_rate": 4.146341463414634e-06, + "loss": 0.1107, "step": 187 }, { - "epoch": 0.08349988896291362, - "grad_norm": 1.8820242106298288, - "learning_rate": 2.086570477247503e-06, - "loss": 0.136, + "epoch": 0.1669626998223801, + "grad_norm": 1.2040214431740082, + "learning_rate": 4.168514412416852e-06, + "loss": 0.121, "step": 188 }, { - "epoch": 0.08394403730846102, - "grad_norm": 2.0480577593839935, - "learning_rate": 2.097669256381798e-06, - "loss": 0.146, + "epoch": 0.16785079928952043, + "grad_norm": 1.1911353300667678, + "learning_rate": 4.190687361419069e-06, + "loss": 0.1208, "step": 189 }, { - "epoch": 0.08438818565400844, - "grad_norm": 1.844554763649842, - "learning_rate": 2.108768035516093e-06, - "loss": 0.1663, + "epoch": 0.16873889875666073, + "grad_norm": 1.2508431970298757, + "learning_rate": 4.212860310421286e-06, + "loss": 0.1024, "step": 190 }, { - "epoch": 0.08483233399955585, - "grad_norm": 1.5207370248204364, - "learning_rate": 2.1198668146503887e-06, - "loss": 0.1453, + "epoch": 0.16962699822380106, + "grad_norm": 1.6842128262347582, + "learning_rate": 4.2350332594235035e-06, + "loss": 0.1081, "step": 191 }, { - "epoch": 0.08527648234510327, - "grad_norm": 1.7288600299730912, - "learning_rate": 2.1309655937846837e-06, - "loss": 0.1492, + "epoch": 0.1705150976909414, + "grad_norm": 1.0369558664708232, + "learning_rate": 4.257206208425721e-06, + "loss": 0.1111, "step": 192 }, { - "epoch": 0.08572063069065068, - "grad_norm": 1.6054586875361534, - "learning_rate": 2.1420643729189792e-06, - "loss": 0.1702, + "epoch": 0.1714031971580817, + "grad_norm": 0.8454133452905808, + "learning_rate": 4.279379157427938e-06, + "loss": 0.1006, "step": 193 }, { - "epoch": 0.0861647790361981, - "grad_norm": 2.082434204878279, - "learning_rate": 2.1531631520532743e-06, - "loss": 0.1417, + "epoch": 0.17229129662522202, + "grad_norm": 1.12897982508959, + "learning_rate": 4.301552106430156e-06, + "loss": 0.1133, "step": 194 }, { - "epoch": 0.0866089273817455, - "grad_norm": 1.893129069527839, - "learning_rate": 2.1642619311875694e-06, - "loss": 0.1746, + "epoch": 0.17317939609236235, + "grad_norm": 0.9532666539200393, + "learning_rate": 4.323725055432373e-06, + "loss": 0.1032, "step": 195 }, { - "epoch": 0.08705307572729291, - "grad_norm": 1.5433606197712828, - "learning_rate": 2.175360710321865e-06, - "loss": 0.1434, + "epoch": 0.17406749555950266, + "grad_norm": 1.0733733404707433, + "learning_rate": 4.34589800443459e-06, + "loss": 0.123, "step": 196 }, { - "epoch": 0.08749722407284033, - "grad_norm": 1.8343180586393633, - "learning_rate": 2.18645948945616e-06, - "loss": 0.1635, + "epoch": 0.17495559502664298, + "grad_norm": 0.7809602870024568, + "learning_rate": 4.368070953436808e-06, + "loss": 0.0967, "step": 197 }, { - "epoch": 0.08794137241838774, - "grad_norm": 1.9644492187331304, - "learning_rate": 2.1975582685904554e-06, - "loss": 0.1613, + "epoch": 0.17584369449378331, + "grad_norm": 1.2871583905784865, + "learning_rate": 4.390243902439025e-06, + "loss": 0.1414, "step": 198 }, { - "epoch": 0.08838552076393516, - "grad_norm": 2.3183987673393784, - "learning_rate": 2.2086570477247505e-06, - "loss": 0.1451, + "epoch": 0.17673179396092362, + "grad_norm": 0.8487919353091347, + "learning_rate": 4.412416851441242e-06, + "loss": 0.099, "step": 199 }, { - "epoch": 0.08882966910948256, - "grad_norm": 1.8970811571077912, - "learning_rate": 2.219755826859046e-06, - "loss": 0.1693, + "epoch": 0.17761989342806395, + "grad_norm": 0.8803605777922451, + "learning_rate": 4.434589800443459e-06, + "loss": 0.1001, "step": 200 }, { - "epoch": 0.08927381745502998, - "grad_norm": 1.3438831493644725, - "learning_rate": 2.230854605993341e-06, - "loss": 0.1442, + "epoch": 0.17850799289520428, + "grad_norm": 0.8101636456213316, + "learning_rate": 4.456762749445676e-06, + "loss": 0.1035, "step": 201 }, { - "epoch": 0.08971796580057739, - "grad_norm": 2.0647130607040687, - "learning_rate": 2.241953385127636e-06, - "loss": 0.1483, + "epoch": 0.17939609236234458, + "grad_norm": 0.9967375332978891, + "learning_rate": 4.478935698447893e-06, + "loss": 0.1132, "step": 202 }, { - "epoch": 0.09016211414612481, - "grad_norm": 1.8137712195107252, - "learning_rate": 2.253052164261931e-06, - "loss": 0.1898, + "epoch": 0.1802841918294849, + "grad_norm": 0.8752029350579277, + "learning_rate": 4.501108647450111e-06, + "loss": 0.1073, "step": 203 }, { - "epoch": 0.09060626249167222, - "grad_norm": 1.5677896114201355, - "learning_rate": 2.2641509433962266e-06, - "loss": 0.1105, + "epoch": 0.1811722912966252, + "grad_norm": 0.9312003004475127, + "learning_rate": 4.523281596452328e-06, + "loss": 0.1437, "step": 204 }, { - "epoch": 0.09105041083721963, - "grad_norm": 1.5774232800494377, - "learning_rate": 2.2752497225305217e-06, - "loss": 0.1439, + "epoch": 0.18206039076376554, + "grad_norm": 0.9249575582110223, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.1321, "step": 205 }, { - "epoch": 0.09149455918276704, - "grad_norm": 2.191208018434963, - "learning_rate": 2.286348501664817e-06, - "loss": 0.1739, + "epoch": 0.18294849023090587, + "grad_norm": 0.9302778787096789, + "learning_rate": 4.5676274944567635e-06, + "loss": 0.1106, "step": 206 }, { - "epoch": 0.09193870752831446, - "grad_norm": 1.8759698513356193, - "learning_rate": 2.2974472807991123e-06, - "loss": 0.1238, + "epoch": 0.18383658969804617, + "grad_norm": 0.997721919364688, + "learning_rate": 4.5898004434589806e-06, + "loss": 0.1197, "step": 207 }, { - "epoch": 0.09238285587386187, - "grad_norm": 2.193936812351504, - "learning_rate": 2.3085460599334073e-06, - "loss": 0.149, + "epoch": 0.1847246891651865, + "grad_norm": 0.8646784783181414, + "learning_rate": 4.611973392461198e-06, + "loss": 0.107, "step": 208 }, { - "epoch": 0.09282700421940929, - "grad_norm": 1.7624384709819947, - "learning_rate": 2.319644839067703e-06, - "loss": 0.1395, + "epoch": 0.18561278863232683, + "grad_norm": 0.878322984730142, + "learning_rate": 4.634146341463416e-06, + "loss": 0.1009, "step": 209 }, { - "epoch": 0.09327115256495669, - "grad_norm": 1.940248444895634, - "learning_rate": 2.330743618201998e-06, - "loss": 0.1321, + "epoch": 0.18650088809946713, + "grad_norm": 1.0258913988437643, + "learning_rate": 4.656319290465633e-06, + "loss": 0.1204, "step": 210 }, { - "epoch": 0.0937153009105041, - "grad_norm": 1.623293710115476, - "learning_rate": 2.3418423973362934e-06, - "loss": 0.1411, + "epoch": 0.18738898756660746, + "grad_norm": 0.7285340355199612, + "learning_rate": 4.67849223946785e-06, + "loss": 0.1026, "step": 211 }, { - "epoch": 0.09415944925605152, - "grad_norm": 2.311237234944747, - "learning_rate": 2.3529411764705885e-06, - "loss": 0.1451, + "epoch": 0.1882770870337478, + "grad_norm": 1.1552769161453678, + "learning_rate": 4.700665188470067e-06, + "loss": 0.1099, "step": 212 }, { - "epoch": 0.09460359760159893, - "grad_norm": 2.0964379988774366, - "learning_rate": 2.364039955604884e-06, - "loss": 0.1628, + "epoch": 0.1891651865008881, + "grad_norm": 0.9048845643790788, + "learning_rate": 4.722838137472284e-06, + "loss": 0.1118, "step": 213 }, { - "epoch": 0.09504774594714635, - "grad_norm": 1.57604767113886, - "learning_rate": 2.375138734739179e-06, - "loss": 0.1282, + "epoch": 0.19005328596802842, + "grad_norm": 0.6551802366319263, + "learning_rate": 4.745011086474501e-06, + "loss": 0.0927, "step": 214 }, { - "epoch": 0.09549189429269377, - "grad_norm": 1.446630991649051, - "learning_rate": 2.386237513873474e-06, - "loss": 0.1392, + "epoch": 0.19094138543516873, + "grad_norm": 1.1989568948073477, + "learning_rate": 4.767184035476719e-06, + "loss": 0.1316, "step": 215 }, { - "epoch": 0.09593604263824117, - "grad_norm": 2.274345473449785, - "learning_rate": 2.397336293007769e-06, - "loss": 0.1962, + "epoch": 0.19182948490230906, + "grad_norm": 1.3977062892280108, + "learning_rate": 4.789356984478936e-06, + "loss": 0.1504, "step": 216 }, { - "epoch": 0.09638019098378858, - "grad_norm": 1.5946628028114809, - "learning_rate": 2.4084350721420646e-06, - "loss": 0.138, + "epoch": 0.19271758436944939, + "grad_norm": 1.0870233932912872, + "learning_rate": 4.811529933481153e-06, + "loss": 0.1156, "step": 217 }, { - "epoch": 0.096824339329336, - "grad_norm": 1.882389417953473, - "learning_rate": 2.4195338512763597e-06, - "loss": 0.1903, + "epoch": 0.1936056838365897, + "grad_norm": 0.8488857557091133, + "learning_rate": 4.833702882483371e-06, + "loss": 0.1092, "step": 218 }, { - "epoch": 0.09726848767488341, - "grad_norm": 1.904432577295931, - "learning_rate": 2.430632630410655e-06, - "loss": 0.1481, + "epoch": 0.19449378330373002, + "grad_norm": 0.9902013418159533, + "learning_rate": 4.855875831485588e-06, + "loss": 0.1165, "step": 219 }, { - "epoch": 0.09771263602043083, - "grad_norm": 2.080561584835578, - "learning_rate": 2.4417314095449503e-06, - "loss": 0.1514, + "epoch": 0.19538188277087035, + "grad_norm": 1.0948237904417308, + "learning_rate": 4.8780487804878055e-06, + "loss": 0.1153, "step": 220 }, { - "epoch": 0.09815678436597823, - "grad_norm": 1.6677943098084358, - "learning_rate": 2.4528301886792453e-06, - "loss": 0.1347, + "epoch": 0.19626998223801065, + "grad_norm": 1.191423921883629, + "learning_rate": 4.900221729490023e-06, + "loss": 0.1291, "step": 221 }, { - "epoch": 0.09860093271152565, - "grad_norm": 1.7294352905463155, - "learning_rate": 2.463928967813541e-06, - "loss": 0.1386, + "epoch": 0.19715808170515098, + "grad_norm": 0.7823243340205541, + "learning_rate": 4.92239467849224e-06, + "loss": 0.0987, "step": 222 }, { - "epoch": 0.09904508105707306, - "grad_norm": 2.0928510956520934, - "learning_rate": 2.475027746947836e-06, - "loss": 0.1611, + "epoch": 0.1980461811722913, + "grad_norm": 1.1222921015784384, + "learning_rate": 4.944567627494457e-06, + "loss": 0.1162, "step": 223 }, { - "epoch": 0.09948922940262048, - "grad_norm": 1.6018044625885859, - "learning_rate": 2.4861265260821314e-06, - "loss": 0.1338, + "epoch": 0.1989342806394316, + "grad_norm": 1.0874257819130313, + "learning_rate": 4.966740576496675e-06, + "loss": 0.1197, "step": 224 }, { - "epoch": 0.09993337774816789, - "grad_norm": 1.9068721122681065, - "learning_rate": 2.4972253052164264e-06, - "loss": 0.1557, + "epoch": 0.19982238010657194, + "grad_norm": 1.0791997810485823, + "learning_rate": 4.988913525498892e-06, + "loss": 0.1133, "step": 225 }, { - "epoch": 0.1003775260937153, - "grad_norm": 1.5854054800345856, - "learning_rate": 2.508324084350722e-06, - "loss": 0.1582, + "epoch": 0.20071047957371227, + "grad_norm": 1.329628162108887, + "learning_rate": 5.011086474501109e-06, + "loss": 0.1158, "step": 226 }, { - "epoch": 0.10082167443926271, - "grad_norm": 1.6327772348060883, - "learning_rate": 2.519422863485017e-06, - "loss": 0.153, + "epoch": 0.20159857904085257, + "grad_norm": 0.7819188176330516, + "learning_rate": 5.033259423503326e-06, + "loss": 0.0937, "step": 227 }, { - "epoch": 0.10126582278481013, - "grad_norm": 1.7221707573000986, - "learning_rate": 2.530521642619312e-06, - "loss": 0.1521, + "epoch": 0.2024866785079929, + "grad_norm": 0.7711644719096397, + "learning_rate": 5.055432372505543e-06, + "loss": 0.101, "step": 228 }, { - "epoch": 0.10170997113035754, - "grad_norm": 3.8860493727438605, - "learning_rate": 2.541620421753607e-06, - "loss": 0.185, + "epoch": 0.2033747779751332, + "grad_norm": 1.2128139947821643, + "learning_rate": 5.077605321507761e-06, + "loss": 0.1025, "step": 229 }, { - "epoch": 0.10215411947590496, - "grad_norm": 2.2982967806121057, - "learning_rate": 2.5527192008879026e-06, - "loss": 0.1268, + "epoch": 0.20426287744227353, + "grad_norm": 1.0640410043385014, + "learning_rate": 5.099778270509978e-06, + "loss": 0.1111, "step": 230 }, { - "epoch": 0.10259826782145237, - "grad_norm": 2.633523229110552, - "learning_rate": 2.563817980022198e-06, - "loss": 0.1312, + "epoch": 0.20515097690941386, + "grad_norm": 1.0170744010005723, + "learning_rate": 5.121951219512195e-06, + "loss": 0.1211, "step": 231 }, { - "epoch": 0.10304241616699977, - "grad_norm": 1.8405348072939953, - "learning_rate": 2.5749167591564928e-06, - "loss": 0.1352, + "epoch": 0.20603907637655416, + "grad_norm": 1.109251802320127, + "learning_rate": 5.1441241685144124e-06, + "loss": 0.1427, "step": 232 }, { - "epoch": 0.10348656451254719, - "grad_norm": 1.5009853192812423, - "learning_rate": 2.5860155382907882e-06, - "loss": 0.1191, + "epoch": 0.2069271758436945, + "grad_norm": 0.9589611675842314, + "learning_rate": 5.1662971175166295e-06, + "loss": 0.1183, "step": 233 }, { - "epoch": 0.1039307128580946, - "grad_norm": 1.3280238597160159, - "learning_rate": 2.5971143174250833e-06, - "loss": 0.1207, + "epoch": 0.20781527531083482, + "grad_norm": 1.109878614980834, + "learning_rate": 5.188470066518847e-06, + "loss": 0.1069, "step": 234 }, { - "epoch": 0.10437486120364202, - "grad_norm": 1.412771085327836, - "learning_rate": 2.608213096559379e-06, - "loss": 0.1343, + "epoch": 0.20870337477797513, + "grad_norm": 0.9525740331445137, + "learning_rate": 5.2106430155210654e-06, + "loss": 0.0877, "step": 235 }, { - "epoch": 0.10481900954918943, - "grad_norm": 1.4216505684340854, - "learning_rate": 2.6193118756936743e-06, - "loss": 0.1312, + "epoch": 0.20959147424511546, + "grad_norm": 1.1332712198064505, + "learning_rate": 5.2328159645232826e-06, + "loss": 0.1179, "step": 236 }, { - "epoch": 0.10526315789473684, - "grad_norm": 1.5128985222362534, - "learning_rate": 2.630410654827969e-06, - "loss": 0.1351, + "epoch": 0.21047957371225579, + "grad_norm": 1.165084609634557, + "learning_rate": 5.2549889135255e-06, + "loss": 0.1158, "step": 237 }, { - "epoch": 0.10570730624028425, - "grad_norm": 1.9599268561293408, - "learning_rate": 2.6415094339622644e-06, - "loss": 0.1403, + "epoch": 0.2113676731793961, + "grad_norm": 0.9551403627646942, + "learning_rate": 5.277161862527717e-06, + "loss": 0.1202, "step": 238 }, { - "epoch": 0.10615145458583167, - "grad_norm": 1.4181236006554954, - "learning_rate": 2.6526082130965595e-06, - "loss": 0.1377, + "epoch": 0.21225577264653642, + "grad_norm": 1.3132605084853501, + "learning_rate": 5.299334811529934e-06, + "loss": 0.1023, "step": 239 }, { - "epoch": 0.10659560293137908, - "grad_norm": 1.9917215037873872, - "learning_rate": 2.663706992230855e-06, - "loss": 0.1451, + "epoch": 0.21314387211367672, + "grad_norm": 1.1918356065854254, + "learning_rate": 5.321507760532151e-06, + "loss": 0.13, "step": 240 }, { - "epoch": 0.1070397512769265, - "grad_norm": 1.683475509658225, - "learning_rate": 2.67480577136515e-06, - "loss": 0.1375, + "epoch": 0.21403197158081705, + "grad_norm": 0.8732190922428062, + "learning_rate": 5.343680709534369e-06, + "loss": 0.0939, "step": 241 }, { - "epoch": 0.1074838996224739, - "grad_norm": 1.8301967264376793, - "learning_rate": 2.685904550499445e-06, - "loss": 0.1464, + "epoch": 0.21492007104795738, + "grad_norm": 0.7943260488358588, + "learning_rate": 5.365853658536586e-06, + "loss": 0.0971, "step": 242 }, { - "epoch": 0.10792804796802132, - "grad_norm": 1.6846261045693358, - "learning_rate": 2.6970033296337406e-06, - "loss": 0.1332, + "epoch": 0.21580817051509768, + "grad_norm": 1.1992570718913784, + "learning_rate": 5.388026607538803e-06, + "loss": 0.1154, "step": 243 }, { - "epoch": 0.10837219631356873, - "grad_norm": 1.2774719377840502, - "learning_rate": 2.708102108768036e-06, - "loss": 0.1292, + "epoch": 0.216696269982238, + "grad_norm": 1.2103166864927497, + "learning_rate": 5.41019955654102e-06, + "loss": 0.1279, "step": 244 }, { - "epoch": 0.10881634465911615, - "grad_norm": 1.595633542836346, - "learning_rate": 2.7192008879023307e-06, - "loss": 0.1386, + "epoch": 0.21758436944937834, + "grad_norm": 0.9755034574356769, + "learning_rate": 5.432372505543237e-06, + "loss": 0.1115, "step": 245 }, { - "epoch": 0.10926049300466356, - "grad_norm": 1.4528662121606482, - "learning_rate": 2.7302996670366262e-06, - "loss": 0.1392, + "epoch": 0.21847246891651864, + "grad_norm": 1.1826367678301064, + "learning_rate": 5.4545454545454545e-06, + "loss": 0.1186, "step": 246 }, { - "epoch": 0.10970464135021098, - "grad_norm": 1.5309789173048087, - "learning_rate": 2.7413984461709213e-06, - "loss": 0.1271, + "epoch": 0.21936056838365897, + "grad_norm": 1.0911959641988822, + "learning_rate": 5.476718403547672e-06, + "loss": 0.1067, "step": 247 }, { - "epoch": 0.11014878969575838, - "grad_norm": 1.518105261331223, - "learning_rate": 2.7524972253052168e-06, - "loss": 0.1237, + "epoch": 0.2202486678507993, + "grad_norm": 0.8868011155114296, + "learning_rate": 5.4988913525498895e-06, + "loss": 0.1179, "step": 248 }, { - "epoch": 0.1105929380413058, - "grad_norm": 1.4676341827820025, - "learning_rate": 2.7635960044395123e-06, - "loss": 0.1317, + "epoch": 0.2211367673179396, + "grad_norm": 1.2833847038326471, + "learning_rate": 5.5210643015521075e-06, + "loss": 0.129, "step": 249 }, { - "epoch": 0.11103708638685321, - "grad_norm": 2.146666992664208, - "learning_rate": 2.774694783573807e-06, - "loss": 0.1266, + "epoch": 0.22202486678507993, + "grad_norm": 1.3643340180187968, + "learning_rate": 5.5432372505543246e-06, + "loss": 0.1212, "step": 250 }, { - "epoch": 0.11148123473240062, - "grad_norm": 1.450876621074019, - "learning_rate": 2.7857935627081024e-06, - "loss": 0.1124, + "epoch": 0.22291296625222023, + "grad_norm": 1.2843488941841317, + "learning_rate": 5.565410199556542e-06, + "loss": 0.1265, "step": 251 }, { - "epoch": 0.11192538307794804, - "grad_norm": 1.6447214379715893, - "learning_rate": 2.7968923418423975e-06, - "loss": 0.1176, + "epoch": 0.22380106571936056, + "grad_norm": 0.812674418729087, + "learning_rate": 5.587583148558759e-06, + "loss": 0.0932, "step": 252 }, { - "epoch": 0.11236953142349544, - "grad_norm": 1.85196408048202, - "learning_rate": 2.807991120976693e-06, - "loss": 0.1196, + "epoch": 0.2246891651865009, + "grad_norm": 0.9792111463263631, + "learning_rate": 5.609756097560977e-06, + "loss": 0.1004, "step": 253 }, { - "epoch": 0.11281367976904286, - "grad_norm": 1.8297536053418253, - "learning_rate": 2.819089900110988e-06, - "loss": 0.1312, + "epoch": 0.2255772646536412, + "grad_norm": 1.8989961235893313, + "learning_rate": 5.631929046563194e-06, + "loss": 0.1256, "step": 254 }, { - "epoch": 0.11325782811459027, - "grad_norm": 1.7773730595281947, - "learning_rate": 2.830188679245283e-06, - "loss": 0.1343, + "epoch": 0.22646536412078153, + "grad_norm": 1.1308233607195404, + "learning_rate": 5.654101995565411e-06, + "loss": 0.1318, "step": 255 }, { - "epoch": 0.11370197646013769, - "grad_norm": 1.5584909202093926, - "learning_rate": 2.8412874583795786e-06, - "loss": 0.1075, + "epoch": 0.22735346358792186, + "grad_norm": 1.675009909952044, + "learning_rate": 5.676274944567628e-06, + "loss": 0.1031, "step": 256 }, { - "epoch": 0.1141461248056851, - "grad_norm": 1.9920621657295152, - "learning_rate": 2.852386237513874e-06, - "loss": 0.1591, + "epoch": 0.22824156305506216, + "grad_norm": 0.8060616010785078, + "learning_rate": 5.698447893569845e-06, + "loss": 0.0893, "step": 257 }, { - "epoch": 0.1145902731512325, - "grad_norm": 2.0535793857620264, - "learning_rate": 2.8634850166481687e-06, - "loss": 0.1459, + "epoch": 0.2291296625222025, + "grad_norm": 1.1704861373713222, + "learning_rate": 5.720620842572062e-06, + "loss": 0.1351, "step": 258 }, { - "epoch": 0.11503442149677992, - "grad_norm": 1.432401039912359, - "learning_rate": 2.8745837957824642e-06, - "loss": 0.1235, + "epoch": 0.23001776198934282, + "grad_norm": 0.8444420732939596, + "learning_rate": 5.742793791574279e-06, + "loss": 0.0969, "step": 259 }, { - "epoch": 0.11547856984232734, - "grad_norm": 2.3190551632714427, - "learning_rate": 2.8856825749167593e-06, - "loss": 0.1608, + "epoch": 0.23090586145648312, + "grad_norm": 1.2877473673171203, + "learning_rate": 5.764966740576497e-06, + "loss": 0.1346, "step": 260 }, { - "epoch": 0.11592271818787475, - "grad_norm": 1.4461950992247072, - "learning_rate": 2.8967813540510548e-06, - "loss": 0.1242, + "epoch": 0.23179396092362345, + "grad_norm": 0.8334646425787372, + "learning_rate": 5.787139689578714e-06, + "loss": 0.0983, "step": 261 }, { - "epoch": 0.11636686653342217, - "grad_norm": 1.60132726584782, - "learning_rate": 2.9078801331853503e-06, - "loss": 0.1057, + "epoch": 0.23268206039076378, + "grad_norm": 0.9171036171575526, + "learning_rate": 5.8093126385809315e-06, + "loss": 0.0939, "step": 262 }, { - "epoch": 0.11681101487896958, - "grad_norm": 2.5527110858786553, - "learning_rate": 2.918978912319645e-06, - "loss": 0.1735, + "epoch": 0.23357015985790408, + "grad_norm": 0.8826773379519395, + "learning_rate": 5.831485587583149e-06, + "loss": 0.1108, "step": 263 }, { - "epoch": 0.11725516322451698, - "grad_norm": 1.587535094612724, - "learning_rate": 2.9300776914539404e-06, - "loss": 0.1145, + "epoch": 0.2344582593250444, + "grad_norm": 0.8803411228533244, + "learning_rate": 5.853658536585366e-06, + "loss": 0.0892, "step": 264 }, { - "epoch": 0.1176993115700644, - "grad_norm": 1.8628254342286168, - "learning_rate": 2.9411764705882355e-06, - "loss": 0.1475, + "epoch": 0.2353463587921847, + "grad_norm": 0.9593996610025081, + "learning_rate": 5.875831485587583e-06, + "loss": 0.1114, "step": 265 }, { - "epoch": 0.11814345991561181, - "grad_norm": 2.029208074213613, - "learning_rate": 2.952275249722531e-06, - "loss": 0.1448, + "epoch": 0.23623445825932504, + "grad_norm": 0.8473811030250791, + "learning_rate": 5.898004434589802e-06, + "loss": 0.0983, "step": 266 }, { - "epoch": 0.11858760826115923, - "grad_norm": 1.788916557618341, - "learning_rate": 2.9633740288568256e-06, - "loss": 0.1223, + "epoch": 0.23712255772646537, + "grad_norm": 0.8900843974703818, + "learning_rate": 5.920177383592019e-06, + "loss": 0.1167, "step": 267 }, { - "epoch": 0.11903175660670665, - "grad_norm": 2.0663932534564147, - "learning_rate": 2.974472807991121e-06, - "loss": 0.1456, + "epoch": 0.23801065719360567, + "grad_norm": 0.9318764854171055, + "learning_rate": 5.942350332594236e-06, + "loss": 0.1007, "step": 268 }, { - "epoch": 0.11947590495225405, - "grad_norm": 1.5929695265003, - "learning_rate": 2.9855715871254166e-06, - "loss": 0.1427, + "epoch": 0.238898756660746, + "grad_norm": 0.9402921543885013, + "learning_rate": 5.964523281596453e-06, + "loss": 0.1084, "step": 269 }, { - "epoch": 0.11992005329780146, - "grad_norm": 1.8366971238631566, - "learning_rate": 2.996670366259712e-06, - "loss": 0.1029, + "epoch": 0.23978685612788633, + "grad_norm": 1.5655719582905685, + "learning_rate": 5.98669623059867e-06, + "loss": 0.1403, "step": 270 }, { - "epoch": 0.12036420164334888, - "grad_norm": 1.4941973532109012, - "learning_rate": 3.0077691453940067e-06, - "loss": 0.1115, + "epoch": 0.24067495559502664, + "grad_norm": 1.0285971062943506, + "learning_rate": 6.008869179600887e-06, + "loss": 0.1148, "step": 271 }, { - "epoch": 0.1208083499888963, - "grad_norm": 2.1604567983633403, - "learning_rate": 3.018867924528302e-06, - "loss": 0.11, + "epoch": 0.24156305506216696, + "grad_norm": 0.88569050368874, + "learning_rate": 6.031042128603105e-06, + "loss": 0.1122, "step": 272 }, { - "epoch": 0.12125249833444371, - "grad_norm": 1.5440708951155746, - "learning_rate": 3.0299667036625973e-06, - "loss": 0.1312, + "epoch": 0.2424511545293073, + "grad_norm": 0.9349873568776979, + "learning_rate": 6.053215077605322e-06, + "loss": 0.0991, "step": 273 }, { - "epoch": 0.12169664667999111, - "grad_norm": 1.1320500690500013, - "learning_rate": 3.0410654827968928e-06, - "loss": 0.1172, + "epoch": 0.2433392539964476, + "grad_norm": 0.9162971549498059, + "learning_rate": 6.075388026607539e-06, + "loss": 0.101, "step": 274 }, { - "epoch": 0.12214079502553853, - "grad_norm": 2.1326279999447326, - "learning_rate": 3.0521642619311882e-06, - "loss": 0.1099, + "epoch": 0.24422735346358793, + "grad_norm": 0.8286328678796847, + "learning_rate": 6.0975609756097564e-06, + "loss": 0.0943, "step": 275 }, { - "epoch": 0.12258494337108594, - "grad_norm": 1.8624808272019928, - "learning_rate": 3.063263041065483e-06, - "loss": 0.1061, + "epoch": 0.24511545293072823, + "grad_norm": 1.1987117793960447, + "learning_rate": 6.1197339246119735e-06, + "loss": 0.1305, "step": 276 }, { - "epoch": 0.12302909171663336, - "grad_norm": 2.836562163833866, - "learning_rate": 3.0743618201997784e-06, - "loss": 0.1173, + "epoch": 0.24600355239786856, + "grad_norm": 1.0475168234127854, + "learning_rate": 6.141906873614191e-06, + "loss": 0.1118, "step": 277 }, { - "epoch": 0.12347324006218077, - "grad_norm": 1.5779920497766018, - "learning_rate": 3.0854605993340734e-06, - "loss": 0.1305, + "epoch": 0.2468916518650089, + "grad_norm": 0.9193825808839732, + "learning_rate": 6.164079822616409e-06, + "loss": 0.1094, "step": 278 }, { - "epoch": 0.12391738840772819, - "grad_norm": 1.4061427971159166, - "learning_rate": 3.096559378468369e-06, - "loss": 0.1063, + "epoch": 0.2477797513321492, + "grad_norm": 0.8372396630709162, + "learning_rate": 6.186252771618626e-06, + "loss": 0.1011, "step": 279 }, { - "epoch": 0.12436153675327559, - "grad_norm": 1.4735808311660463, - "learning_rate": 3.1076581576026636e-06, - "loss": 0.1478, + "epoch": 0.24866785079928952, + "grad_norm": 1.2776492189182604, + "learning_rate": 6.208425720620843e-06, + "loss": 0.1091, "step": 280 }, { - "epoch": 0.124805685098823, - "grad_norm": 1.9063172097566101, - "learning_rate": 3.118756936736959e-06, - "loss": 0.152, + "epoch": 0.24955595026642985, + "grad_norm": 0.9827831306483911, + "learning_rate": 6.23059866962306e-06, + "loss": 0.1033, "step": 281 }, { - "epoch": 0.1252498334443704, - "grad_norm": 1.2460125667748942, - "learning_rate": 3.1298557158712546e-06, - "loss": 0.1078, + "epoch": 0.25044404973357015, + "grad_norm": 0.8117968866243302, + "learning_rate": 6.252771618625277e-06, + "loss": 0.1083, "step": 282 }, { - "epoch": 0.12569398178991784, - "grad_norm": 1.422370758153891, - "learning_rate": 3.1409544950055496e-06, - "loss": 0.1143, + "epoch": 0.25133214920071045, + "grad_norm": 0.9259173802915021, + "learning_rate": 6.274944567627494e-06, + "loss": 0.1191, "step": 283 }, { - "epoch": 0.12613813013546524, - "grad_norm": 1.3901208342210212, - "learning_rate": 3.1520532741398447e-06, - "loss": 0.1232, + "epoch": 0.2522202486678508, + "grad_norm": 0.9560506019339821, + "learning_rate": 6.297117516629713e-06, + "loss": 0.1127, "step": 284 }, { - "epoch": 0.12658227848101267, - "grad_norm": 1.2571280817929795, - "learning_rate": 3.16315205327414e-06, - "loss": 0.1183, + "epoch": 0.2531083481349911, + "grad_norm": 1.0450882846146754, + "learning_rate": 6.31929046563193e-06, + "loss": 0.1047, "step": 285 }, { - "epoch": 0.12702642682656007, - "grad_norm": 1.7211835588268667, - "learning_rate": 3.1742508324084352e-06, - "loss": 0.1376, + "epoch": 0.2539964476021314, + "grad_norm": 1.1396885068276017, + "learning_rate": 6.341463414634147e-06, + "loss": 0.1066, "step": 286 }, { - "epoch": 0.1274705751721075, - "grad_norm": 1.7352722841271955, - "learning_rate": 3.1853496115427307e-06, - "loss": 0.1221, + "epoch": 0.25488454706927177, + "grad_norm": 0.9982877841646272, + "learning_rate": 6.363636363636364e-06, + "loss": 0.1052, "step": 287 }, { - "epoch": 0.1279147235176549, - "grad_norm": 1.608485934770127, - "learning_rate": 3.1964483906770262e-06, - "loss": 0.1287, + "epoch": 0.2557726465364121, + "grad_norm": 1.0559192079092554, + "learning_rate": 6.385809312638581e-06, + "loss": 0.1125, "step": 288 }, { - "epoch": 0.1283588718632023, - "grad_norm": 1.4070749655660284, - "learning_rate": 3.207547169811321e-06, - "loss": 0.1107, + "epoch": 0.2566607460035524, + "grad_norm": 0.8756361747433646, + "learning_rate": 6.4079822616407984e-06, + "loss": 0.0906, "step": 289 }, { - "epoch": 0.12880302020874973, - "grad_norm": 1.8061966734954316, - "learning_rate": 3.2186459489456164e-06, - "loss": 0.1209, + "epoch": 0.25754884547069273, + "grad_norm": 1.8607634927392362, + "learning_rate": 6.430155210643016e-06, + "loss": 0.1199, "step": 290 }, { - "epoch": 0.12924716855429713, - "grad_norm": 1.9290505056364757, - "learning_rate": 3.2297447280799114e-06, - "loss": 0.1088, + "epoch": 0.25843694493783304, + "grad_norm": 1.1933913566515744, + "learning_rate": 6.4523281596452335e-06, + "loss": 0.0967, "step": 291 }, { - "epoch": 0.12969131689984456, - "grad_norm": 1.2873703020140206, - "learning_rate": 3.240843507214207e-06, - "loss": 0.1107, + "epoch": 0.25932504440497334, + "grad_norm": 1.0732491347021387, + "learning_rate": 6.474501108647451e-06, + "loss": 0.1172, "step": 292 }, { - "epoch": 0.13013546524539196, - "grad_norm": 2.100967763487988, - "learning_rate": 3.2519422863485016e-06, - "loss": 0.1746, + "epoch": 0.2602131438721137, + "grad_norm": 1.0464473664142486, + "learning_rate": 6.496674057649668e-06, + "loss": 0.1058, "step": 293 }, { - "epoch": 0.13057961359093936, - "grad_norm": 2.9637529933084785, - "learning_rate": 3.263041065482797e-06, - "loss": 0.1408, + "epoch": 0.261101243339254, + "grad_norm": 1.390606848296661, + "learning_rate": 6.518847006651885e-06, + "loss": 0.1161, "step": 294 }, { - "epoch": 0.1310237619364868, - "grad_norm": 1.8502103362104685, - "learning_rate": 3.2741398446170925e-06, - "loss": 0.108, + "epoch": 0.2619893428063943, + "grad_norm": 0.8469819515334254, + "learning_rate": 6.541019955654102e-06, + "loss": 0.0845, "step": 295 }, { - "epoch": 0.1314679102820342, - "grad_norm": 1.5072896588301588, - "learning_rate": 3.2852386237513876e-06, - "loss": 0.1233, + "epoch": 0.26287744227353466, + "grad_norm": 0.852351395715418, + "learning_rate": 6.563192904656321e-06, + "loss": 0.1043, "step": 296 }, { - "epoch": 0.13191205862758162, - "grad_norm": 1.9060937237646072, - "learning_rate": 3.2963374028856827e-06, - "loss": 0.1137, + "epoch": 0.26376554174067496, + "grad_norm": 0.6890587841071393, + "learning_rate": 6.585365853658538e-06, + "loss": 0.0844, "step": 297 }, { - "epoch": 0.13235620697312903, - "grad_norm": 1.4214687758215054, - "learning_rate": 3.307436182019978e-06, - "loss": 0.1181, + "epoch": 0.26465364120781526, + "grad_norm": 0.9415217807109038, + "learning_rate": 6.607538802660755e-06, + "loss": 0.0876, "step": 298 }, { - "epoch": 0.13280035531867643, - "grad_norm": 1.5173189244791243, - "learning_rate": 3.3185349611542732e-06, - "loss": 0.1221, + "epoch": 0.2655417406749556, + "grad_norm": 1.0001337064125344, + "learning_rate": 6.629711751662972e-06, + "loss": 0.0905, "step": 299 }, { - "epoch": 0.13324450366422386, - "grad_norm": 1.4086327372245158, - "learning_rate": 3.3296337402885687e-06, - "loss": 0.1598, + "epoch": 0.2664298401420959, + "grad_norm": 0.9285830454903911, + "learning_rate": 6.651884700665189e-06, + "loss": 0.1065, "step": 300 }, { - "epoch": 0.13368865200977126, - "grad_norm": 1.3949120100912162, - "learning_rate": 3.3407325194228642e-06, - "loss": 0.0996, + "epoch": 0.2673179396092362, + "grad_norm": 0.7175570567997788, + "learning_rate": 6.674057649667406e-06, + "loss": 0.0941, "step": 301 }, { - "epoch": 0.1341328003553187, - "grad_norm": 1.6249998744801628, - "learning_rate": 3.351831298557159e-06, - "loss": 0.0959, + "epoch": 0.2682060390763766, + "grad_norm": 0.8639952187699179, + "learning_rate": 6.696230598669624e-06, + "loss": 0.1102, "step": 302 }, { - "epoch": 0.1345769487008661, - "grad_norm": 1.7178562509007014, - "learning_rate": 3.3629300776914543e-06, - "loss": 0.1348, + "epoch": 0.2690941385435169, + "grad_norm": 0.8941541155162261, + "learning_rate": 6.718403547671841e-06, + "loss": 0.1015, "step": 303 }, { - "epoch": 0.1350210970464135, - "grad_norm": 1.7790098039504103, - "learning_rate": 3.3740288568257494e-06, - "loss": 0.1011, + "epoch": 0.2699822380106572, + "grad_norm": 0.8487330548553126, + "learning_rate": 6.740576496674058e-06, + "loss": 0.1147, "step": 304 }, { - "epoch": 0.13546524539196092, - "grad_norm": 1.4533709920798474, - "learning_rate": 3.385127635960045e-06, - "loss": 0.1177, + "epoch": 0.27087033747779754, + "grad_norm": 1.0437605088989104, + "learning_rate": 6.7627494456762755e-06, + "loss": 0.1006, "step": 305 }, { - "epoch": 0.13590939373750832, - "grad_norm": 1.7170638072373428, - "learning_rate": 3.3962264150943395e-06, - "loss": 0.1264, + "epoch": 0.27175843694493784, + "grad_norm": 0.9407640851480744, + "learning_rate": 6.784922394678493e-06, + "loss": 0.0969, "step": 306 }, { - "epoch": 0.13635354208305575, - "grad_norm": 1.1622578542744249, - "learning_rate": 3.407325194228635e-06, - "loss": 0.1164, + "epoch": 0.27264653641207814, + "grad_norm": 0.8117833769010849, + "learning_rate": 6.80709534368071e-06, + "loss": 0.1014, "step": 307 }, { - "epoch": 0.13679769042860315, - "grad_norm": 1.7861497563291042, - "learning_rate": 3.4184239733629305e-06, - "loss": 0.1328, + "epoch": 0.27353463587921845, + "grad_norm": 0.8114003548176898, + "learning_rate": 6.829268292682928e-06, + "loss": 0.0979, "step": 308 }, { - "epoch": 0.13724183877415055, - "grad_norm": 1.2393311320446403, - "learning_rate": 3.4295227524972256e-06, - "loss": 0.0994, + "epoch": 0.2744227353463588, + "grad_norm": 0.7633066126158762, + "learning_rate": 6.851441241685145e-06, + "loss": 0.1041, "step": 309 }, { - "epoch": 0.13768598711969798, - "grad_norm": 1.779362058176627, - "learning_rate": 3.4406215316315207e-06, - "loss": 0.131, + "epoch": 0.2753108348134991, + "grad_norm": 1.0186060599613544, + "learning_rate": 6.873614190687362e-06, + "loss": 0.1101, "step": 310 }, { - "epoch": 0.13813013546524538, - "grad_norm": 1.384763433835653, - "learning_rate": 3.4517203107658157e-06, - "loss": 0.1011, + "epoch": 0.2761989342806394, + "grad_norm": 0.8807977402444054, + "learning_rate": 6.895787139689579e-06, + "loss": 0.1067, "step": 311 }, { - "epoch": 0.13857428381079281, - "grad_norm": 1.5455433688862117, - "learning_rate": 3.4628190899001112e-06, - "loss": 0.1216, + "epoch": 0.27708703374777977, + "grad_norm": 0.7810626374595981, + "learning_rate": 6.917960088691796e-06, + "loss": 0.099, "step": 312 }, { - "epoch": 0.13901843215634022, - "grad_norm": 1.3658352699008705, - "learning_rate": 3.4739178690344067e-06, - "loss": 0.123, + "epoch": 0.27797513321492007, + "grad_norm": 0.9333340452246128, + "learning_rate": 6.940133037694013e-06, + "loss": 0.0972, "step": 313 }, { - "epoch": 0.13946258050188762, - "grad_norm": 1.3724682796873768, - "learning_rate": 3.485016648168702e-06, - "loss": 0.1209, + "epoch": 0.27886323268206037, + "grad_norm": 0.9632806942955823, + "learning_rate": 6.962305986696232e-06, + "loss": 0.1069, "step": 314 }, { - "epoch": 0.13990672884743505, - "grad_norm": 1.608691375904217, - "learning_rate": 3.496115427302997e-06, - "loss": 0.1141, + "epoch": 0.2797513321492007, + "grad_norm": 0.8661746123894044, + "learning_rate": 6.984478935698449e-06, + "loss": 0.1104, "step": 315 }, { - "epoch": 0.14035087719298245, - "grad_norm": 1.3598637431605427, - "learning_rate": 3.5072142064372923e-06, - "loss": 0.1528, + "epoch": 0.28063943161634103, + "grad_norm": 0.9726637372731115, + "learning_rate": 7.006651884700666e-06, + "loss": 0.1063, "step": 316 }, { - "epoch": 0.14079502553852988, - "grad_norm": 1.0876962111896626, - "learning_rate": 3.5183129855715874e-06, - "loss": 0.1049, + "epoch": 0.28152753108348133, + "grad_norm": 0.8979446464223533, + "learning_rate": 7.028824833702883e-06, + "loss": 0.0886, "step": 317 }, { - "epoch": 0.14123917388407728, - "grad_norm": 1.3385892109435766, - "learning_rate": 3.529411764705883e-06, - "loss": 0.1037, + "epoch": 0.2824156305506217, + "grad_norm": 0.9287233565372144, + "learning_rate": 7.0509977827051e-06, + "loss": 0.0927, "step": 318 }, { - "epoch": 0.14168332222962468, - "grad_norm": 1.7433775937165439, - "learning_rate": 3.5405105438401775e-06, - "loss": 0.1486, + "epoch": 0.283303730017762, + "grad_norm": 0.9525737964024173, + "learning_rate": 7.0731707317073175e-06, + "loss": 0.097, "step": 319 }, { - "epoch": 0.1421274705751721, - "grad_norm": 1.5533508842477224, - "learning_rate": 3.551609322974473e-06, - "loss": 0.1049, + "epoch": 0.2841918294849023, + "grad_norm": 0.7230385948585585, + "learning_rate": 7.0953436807095355e-06, + "loss": 0.0989, "step": 320 }, { - "epoch": 0.1425716189207195, - "grad_norm": 1.2029122587877374, - "learning_rate": 3.5627081021087685e-06, - "loss": 0.1098, + "epoch": 0.28507992895204265, + "grad_norm": 0.7968698079710659, + "learning_rate": 7.117516629711753e-06, + "loss": 0.0837, "step": 321 }, { - "epoch": 0.14301576726626694, - "grad_norm": 1.8995176312013884, - "learning_rate": 3.5738068812430636e-06, - "loss": 0.0944, + "epoch": 0.28596802841918295, + "grad_norm": 0.8034475120364327, + "learning_rate": 7.13968957871397e-06, + "loss": 0.0896, "step": 322 }, { - "epoch": 0.14345991561181434, - "grad_norm": 1.6602519149722867, - "learning_rate": 3.5849056603773586e-06, - "loss": 0.1243, + "epoch": 0.28685612788632325, + "grad_norm": 1.321430422974807, + "learning_rate": 7.161862527716187e-06, + "loss": 0.1078, "step": 323 }, { - "epoch": 0.14390406395736177, - "grad_norm": 1.6075958194272566, - "learning_rate": 3.5960044395116537e-06, - "loss": 0.1196, + "epoch": 0.2877442273534636, + "grad_norm": 0.9085910562132562, + "learning_rate": 7.184035476718404e-06, + "loss": 0.098, "step": 324 }, { - "epoch": 0.14434821230290917, - "grad_norm": 1.510226320322185, - "learning_rate": 3.607103218645949e-06, - "loss": 0.1422, + "epoch": 0.2886323268206039, + "grad_norm": 0.7201916573679772, + "learning_rate": 7.206208425720621e-06, + "loss": 0.102, "step": 325 }, { - "epoch": 0.14479236064845658, - "grad_norm": 1.2802794537606514, - "learning_rate": 3.6182019977802447e-06, - "loss": 0.1244, + "epoch": 0.2895204262877442, + "grad_norm": 0.9432494926172132, + "learning_rate": 7.228381374722838e-06, + "loss": 0.1075, "step": 326 }, { - "epoch": 0.145236508994004, - "grad_norm": 1.1595946058732067, - "learning_rate": 3.6293007769145398e-06, - "loss": 0.0929, + "epoch": 0.29040852575488457, + "grad_norm": 0.9498069827378723, + "learning_rate": 7.250554323725056e-06, + "loss": 0.0906, "step": 327 }, { - "epoch": 0.1456806573395514, - "grad_norm": 1.2381936978069086, - "learning_rate": 3.640399556048835e-06, - "loss": 0.1002, + "epoch": 0.2912966252220249, + "grad_norm": 0.9154528005863768, + "learning_rate": 7.272727272727273e-06, + "loss": 0.1153, "step": 328 }, { - "epoch": 0.14612480568509884, - "grad_norm": 2.4354535778190742, - "learning_rate": 3.6514983351831303e-06, - "loss": 0.1377, + "epoch": 0.2921847246891652, + "grad_norm": 0.7264447802913219, + "learning_rate": 7.29490022172949e-06, + "loss": 0.09, "step": 329 }, { - "epoch": 0.14656895403064624, - "grad_norm": 1.5157062223087485, - "learning_rate": 3.6625971143174254e-06, - "loss": 0.1206, + "epoch": 0.29307282415630553, + "grad_norm": 0.9685882300583614, + "learning_rate": 7.317073170731707e-06, + "loss": 0.1109, "step": 330 }, { - "epoch": 0.14701310237619364, - "grad_norm": 1.3681542301294034, - "learning_rate": 3.673695893451721e-06, - "loss": 0.1047, + "epoch": 0.29396092362344584, + "grad_norm": 1.1890607203846189, + "learning_rate": 7.3392461197339245e-06, + "loss": 0.1229, "step": 331 }, { - "epoch": 0.14745725072174107, - "grad_norm": 1.9762614541590338, - "learning_rate": 3.6847946725860155e-06, - "loss": 0.1249, + "epoch": 0.29484902309058614, + "grad_norm": 0.7286638790755859, + "learning_rate": 7.361419068736142e-06, + "loss": 0.0779, "step": 332 }, { - "epoch": 0.14790139906728847, - "grad_norm": 1.391859368616253, - "learning_rate": 3.695893451720311e-06, - "loss": 0.1015, + "epoch": 0.29573712255772644, + "grad_norm": 0.8339317001253415, + "learning_rate": 7.38359201773836e-06, + "loss": 0.1076, "step": 333 }, { - "epoch": 0.1483455474128359, - "grad_norm": 1.1072542539549668, - "learning_rate": 3.7069922308546065e-06, - "loss": 0.0931, + "epoch": 0.2966252220248668, + "grad_norm": 1.0598424258062524, + "learning_rate": 7.4057649667405775e-06, + "loss": 0.1043, "step": 334 }, { - "epoch": 0.1487896957583833, - "grad_norm": 1.4909584737380348, - "learning_rate": 3.7180910099889016e-06, - "loss": 0.1141, + "epoch": 0.2975133214920071, + "grad_norm": 0.8775990639677548, + "learning_rate": 7.427937915742795e-06, + "loss": 0.1021, "step": 335 }, { - "epoch": 0.1492338441039307, - "grad_norm": 1.7478929922992545, - "learning_rate": 3.7291897891231966e-06, - "loss": 0.0891, + "epoch": 0.2984014209591474, + "grad_norm": 0.8202595963041531, + "learning_rate": 7.450110864745012e-06, + "loss": 0.0938, "step": 336 }, { - "epoch": 0.14967799244947813, - "grad_norm": 1.5597867645297776, - "learning_rate": 3.7402885682574917e-06, - "loss": 0.128, + "epoch": 0.29928952042628776, + "grad_norm": 0.7157189268067363, + "learning_rate": 7.472283813747229e-06, + "loss": 0.0954, "step": 337 }, { - "epoch": 0.15012214079502553, - "grad_norm": 1.4083772110340225, - "learning_rate": 3.751387347391787e-06, - "loss": 0.1337, + "epoch": 0.30017761989342806, + "grad_norm": 0.9611095780525852, + "learning_rate": 7.494456762749446e-06, + "loss": 0.115, "step": 338 }, { - "epoch": 0.15056628914057296, - "grad_norm": 2.011956681151715, - "learning_rate": 3.7624861265260827e-06, - "loss": 0.1036, + "epoch": 0.30106571936056836, + "grad_norm": 0.7193806879098371, + "learning_rate": 7.516629711751664e-06, + "loss": 0.0838, "step": 339 }, { - "epoch": 0.15101043748612036, - "grad_norm": 2.154404794046358, - "learning_rate": 3.7735849056603777e-06, - "loss": 0.1104, + "epoch": 0.3019538188277087, + "grad_norm": 1.2379064911042004, + "learning_rate": 7.538802660753881e-06, + "loss": 0.1038, "step": 340 }, { - "epoch": 0.15145458583166777, - "grad_norm": 2.1106357904010316, - "learning_rate": 3.784683684794673e-06, - "loss": 0.1686, + "epoch": 0.302841918294849, + "grad_norm": 0.8380165411759865, + "learning_rate": 7.560975609756098e-06, + "loss": 0.096, "step": 341 }, { - "epoch": 0.1518987341772152, - "grad_norm": 1.6933676844964125, - "learning_rate": 3.7957824639289683e-06, - "loss": 0.1375, + "epoch": 0.3037300177619893, + "grad_norm": 0.7842272699282425, + "learning_rate": 7.583148558758315e-06, + "loss": 0.1068, "step": 342 }, { - "epoch": 0.1523428825227626, - "grad_norm": 2.205537988403305, - "learning_rate": 3.8068812430632634e-06, - "loss": 0.1513, + "epoch": 0.3046181172291297, + "grad_norm": 0.7903903611927442, + "learning_rate": 7.605321507760532e-06, + "loss": 0.0989, "step": 343 }, { - "epoch": 0.15278703086831003, - "grad_norm": 1.2058069327729946, - "learning_rate": 3.817980022197559e-06, - "loss": 0.091, + "epoch": 0.30550621669627, + "grad_norm": 0.8711590419288197, + "learning_rate": 7.627494456762749e-06, + "loss": 0.1033, "step": 344 }, { - "epoch": 0.15323117921385743, - "grad_norm": 1.400652563163732, - "learning_rate": 3.829078801331854e-06, - "loss": 0.0826, + "epoch": 0.3063943161634103, + "grad_norm": 0.7914571220374275, + "learning_rate": 7.649667405764967e-06, + "loss": 0.0882, "step": 345 }, { - "epoch": 0.15367532755940483, - "grad_norm": 2.057959821320217, - "learning_rate": 3.840177580466149e-06, - "loss": 0.118, + "epoch": 0.30728241563055064, + "grad_norm": 0.9047258915871558, + "learning_rate": 7.671840354767184e-06, + "loss": 0.1079, "step": 346 }, { - "epoch": 0.15411947590495226, - "grad_norm": 1.6604930714816526, - "learning_rate": 3.851276359600444e-06, - "loss": 0.1301, + "epoch": 0.30817051509769094, + "grad_norm": 0.6793865515086529, + "learning_rate": 7.694013303769402e-06, + "loss": 0.0827, "step": 347 }, { - "epoch": 0.15456362425049966, - "grad_norm": 1.033671229980745, - "learning_rate": 3.86237513873474e-06, - "loss": 0.0924, + "epoch": 0.30905861456483125, + "grad_norm": 0.9312853306792186, + "learning_rate": 7.716186252771619e-06, + "loss": 0.101, "step": 348 }, { - "epoch": 0.1550077725960471, - "grad_norm": 1.17967777436608, - "learning_rate": 3.873473917869034e-06, - "loss": 0.0978, + "epoch": 0.3099467140319716, + "grad_norm": 0.8024490916477357, + "learning_rate": 7.738359201773836e-06, + "loss": 0.1081, "step": 349 }, { - "epoch": 0.1554519209415945, - "grad_norm": 1.650470297879078, - "learning_rate": 3.88457269700333e-06, - "loss": 0.133, + "epoch": 0.3108348134991119, + "grad_norm": 0.5541290987926398, + "learning_rate": 7.760532150776053e-06, + "loss": 0.0715, "step": 350 }, { - "epoch": 0.1558960692871419, - "grad_norm": 1.47519341296619, - "learning_rate": 3.895671476137625e-06, - "loss": 0.1254, + "epoch": 0.3117229129662522, + "grad_norm": 0.8662311135063834, + "learning_rate": 7.782705099778272e-06, + "loss": 0.1014, "step": 351 }, { - "epoch": 0.15634021763268932, - "grad_norm": 1.4035761818876917, - "learning_rate": 3.90677025527192e-06, - "loss": 0.1183, + "epoch": 0.31261101243339257, + "grad_norm": 0.6050107863388078, + "learning_rate": 7.804878048780489e-06, + "loss": 0.0768, "step": 352 }, { - "epoch": 0.15678436597823672, - "grad_norm": 2.2555395385843036, - "learning_rate": 3.917869034406216e-06, - "loss": 0.1202, + "epoch": 0.31349911190053287, + "grad_norm": 1.0089660444158453, + "learning_rate": 7.827050997782706e-06, + "loss": 0.1169, "step": 353 }, { - "epoch": 0.15722851432378415, - "grad_norm": 1.4759637016708067, - "learning_rate": 3.92896781354051e-06, - "loss": 0.1104, + "epoch": 0.31438721136767317, + "grad_norm": 1.0024341093771925, + "learning_rate": 7.849223946784923e-06, + "loss": 0.1027, "step": 354 }, { - "epoch": 0.15767266266933155, - "grad_norm": 2.952687567266444, - "learning_rate": 3.940066592674806e-06, - "loss": 0.122, + "epoch": 0.31527531083481347, + "grad_norm": 0.8423589628695387, + "learning_rate": 7.87139689578714e-06, + "loss": 0.0979, "step": 355 }, { - "epoch": 0.15811681101487898, - "grad_norm": 1.1679111801200106, - "learning_rate": 3.951165371809101e-06, - "loss": 0.0905, + "epoch": 0.31616341030195383, + "grad_norm": 0.9539703915364602, + "learning_rate": 7.893569844789357e-06, + "loss": 0.089, "step": 356 }, { - "epoch": 0.15856095936042638, - "grad_norm": 1.660729161757867, - "learning_rate": 3.962264150943396e-06, - "loss": 0.1521, + "epoch": 0.31705150976909413, + "grad_norm": 1.3130291273077888, + "learning_rate": 7.915742793791576e-06, + "loss": 0.1095, "step": 357 }, { - "epoch": 0.15900510770597379, - "grad_norm": 1.3321535227222834, - "learning_rate": 3.9733629300776915e-06, - "loss": 0.1315, + "epoch": 0.31793960923623443, + "grad_norm": 0.8961830406796122, + "learning_rate": 7.937915742793793e-06, + "loss": 0.109, "step": 358 }, { - "epoch": 0.15944925605152122, - "grad_norm": 1.1798749965091133, - "learning_rate": 3.9844617092119866e-06, - "loss": 0.0852, + "epoch": 0.3188277087033748, + "grad_norm": 0.9541141598854677, + "learning_rate": 7.96008869179601e-06, + "loss": 0.0992, "step": 359 }, { - "epoch": 0.15989340439706862, - "grad_norm": 1.198919817717748, - "learning_rate": 3.9955604883462825e-06, - "loss": 0.1029, + "epoch": 0.3197158081705151, + "grad_norm": 1.139854942538658, + "learning_rate": 7.982261640798227e-06, + "loss": 0.0879, "step": 360 }, { - "epoch": 0.16033755274261605, - "grad_norm": 1.1799761966591196, - "learning_rate": 4.0066592674805775e-06, - "loss": 0.1009, + "epoch": 0.3206039076376554, + "grad_norm": 1.0321508216179291, + "learning_rate": 8.004434589800444e-06, + "loss": 0.0945, "step": 361 }, { - "epoch": 0.16078170108816345, - "grad_norm": 1.3843920770849105, - "learning_rate": 4.017758046614873e-06, - "loss": 0.1078, + "epoch": 0.32149200710479575, + "grad_norm": 0.8779246767220663, + "learning_rate": 8.026607538802662e-06, + "loss": 0.0941, "step": 362 }, { - "epoch": 0.16122584943371085, - "grad_norm": 1.230254034984121, - "learning_rate": 4.028856825749168e-06, - "loss": 0.1218, + "epoch": 0.32238010657193605, + "grad_norm": 1.0379702354670175, + "learning_rate": 8.048780487804879e-06, + "loss": 0.109, "step": 363 }, { - "epoch": 0.16166999777925828, - "grad_norm": 1.0307816837555734, - "learning_rate": 4.039955604883464e-06, - "loss": 0.1138, + "epoch": 0.32326820603907636, + "grad_norm": 0.954746767681055, + "learning_rate": 8.070953436807096e-06, + "loss": 0.1049, "step": 364 }, { - "epoch": 0.16211414612480568, - "grad_norm": 1.1369945329768874, - "learning_rate": 4.051054384017759e-06, - "loss": 0.0917, + "epoch": 0.3241563055062167, + "grad_norm": 0.895515034198231, + "learning_rate": 8.093126385809313e-06, + "loss": 0.0818, "step": 365 }, { - "epoch": 0.1625582944703531, - "grad_norm": 1.3026077981860287, - "learning_rate": 4.062153163152054e-06, - "loss": 0.1209, + "epoch": 0.325044404973357, + "grad_norm": 0.9922258085230082, + "learning_rate": 8.11529933481153e-06, + "loss": 0.1029, "step": 366 }, { - "epoch": 0.1630024428159005, - "grad_norm": 1.8570215631965663, - "learning_rate": 4.073251942286349e-06, - "loss": 0.103, + "epoch": 0.3259325044404973, + "grad_norm": 0.9009685079488943, + "learning_rate": 8.137472283813747e-06, + "loss": 0.095, "step": 367 }, { - "epoch": 0.1634465911614479, - "grad_norm": 1.4261357373143895, - "learning_rate": 4.084350721420644e-06, - "loss": 0.1117, + "epoch": 0.3268206039076377, + "grad_norm": 1.3774247173074172, + "learning_rate": 8.159645232815964e-06, + "loss": 0.1122, "step": 368 }, { - "epoch": 0.16389073950699534, - "grad_norm": 1.2842977064487127, - "learning_rate": 4.09544950055494e-06, - "loss": 0.0945, + "epoch": 0.327708703374778, + "grad_norm": 0.8714121817372873, + "learning_rate": 8.181818181818183e-06, + "loss": 0.0723, "step": 369 }, { - "epoch": 0.16433488785254274, - "grad_norm": 1.7942409210847103, - "learning_rate": 4.106548279689235e-06, - "loss": 0.1658, + "epoch": 0.3285968028419183, + "grad_norm": 0.7075961840882904, + "learning_rate": 8.2039911308204e-06, + "loss": 0.0776, "step": 370 }, { - "epoch": 0.16477903619809017, - "grad_norm": 1.2570893224456356, - "learning_rate": 4.11764705882353e-06, - "loss": 0.1238, + "epoch": 0.32948490230905864, + "grad_norm": 0.6315560397637983, + "learning_rate": 8.226164079822617e-06, + "loss": 0.0834, "step": 371 }, { - "epoch": 0.16522318454363757, - "grad_norm": 1.0685833693361235, - "learning_rate": 4.128745837957825e-06, - "loss": 0.0964, + "epoch": 0.33037300177619894, + "grad_norm": 0.8952416568044784, + "learning_rate": 8.248337028824834e-06, + "loss": 0.1081, "step": 372 }, { - "epoch": 0.16566733288918498, - "grad_norm": 1.2815898248385025, - "learning_rate": 4.13984461709212e-06, - "loss": 0.105, + "epoch": 0.33126110124333924, + "grad_norm": 0.9750507657786389, + "learning_rate": 8.270509977827051e-06, + "loss": 0.1021, "step": 373 }, { - "epoch": 0.1661114812347324, - "grad_norm": 1.64061710866675, - "learning_rate": 4.150943396226416e-06, - "loss": 0.1096, + "epoch": 0.3321492007104796, + "grad_norm": 0.6597951127128676, + "learning_rate": 8.292682926829268e-06, + "loss": 0.0772, "step": 374 }, { - "epoch": 0.1665556295802798, - "grad_norm": 1.3088826974530428, - "learning_rate": 4.16204217536071e-06, - "loss": 0.1211, + "epoch": 0.3330373001776199, + "grad_norm": 1.0302393327528312, + "learning_rate": 8.314855875831487e-06, + "loss": 0.0928, "step": 375 }, { - "epoch": 0.16699977792582724, - "grad_norm": 1.3003151707348597, - "learning_rate": 4.173140954495006e-06, - "loss": 0.1111, + "epoch": 0.3339253996447602, + "grad_norm": 0.714526637794305, + "learning_rate": 8.337028824833704e-06, + "loss": 0.0904, "step": 376 }, { - "epoch": 0.16744392627137464, - "grad_norm": 1.3124364780409907, - "learning_rate": 4.184239733629301e-06, - "loss": 0.1085, + "epoch": 0.33481349911190056, + "grad_norm": 1.16627438770377, + "learning_rate": 8.359201773835921e-06, + "loss": 0.1028, "step": 377 }, { - "epoch": 0.16788807461692204, - "grad_norm": 1.3347095844879298, - "learning_rate": 4.195338512763596e-06, - "loss": 0.1228, + "epoch": 0.33570159857904086, + "grad_norm": 0.7022637891988187, + "learning_rate": 8.381374722838139e-06, + "loss": 0.0864, "step": 378 }, { - "epoch": 0.16833222296246947, - "grad_norm": 1.3151237942729317, - "learning_rate": 4.206437291897892e-06, - "loss": 0.1041, + "epoch": 0.33658969804618116, + "grad_norm": 0.9803104096407456, + "learning_rate": 8.403547671840356e-06, + "loss": 0.1045, "step": 379 }, { - "epoch": 0.16877637130801687, - "grad_norm": 1.6082774402292863, - "learning_rate": 4.217536071032186e-06, - "loss": 0.0915, + "epoch": 0.33747779751332146, + "grad_norm": 0.8890791556325177, + "learning_rate": 8.425720620842573e-06, + "loss": 0.0928, "step": 380 }, { - "epoch": 0.1692205196535643, - "grad_norm": 1.2544764918032303, - "learning_rate": 4.228634850166482e-06, - "loss": 0.0903, + "epoch": 0.3383658969804618, + "grad_norm": 0.8699816063506878, + "learning_rate": 8.44789356984479e-06, + "loss": 0.1072, "step": 381 }, { - "epoch": 0.1696646679991117, - "grad_norm": 1.4709993024116135, - "learning_rate": 4.239733629300777e-06, - "loss": 0.1095, + "epoch": 0.3392539964476021, + "grad_norm": 1.3626912807800942, + "learning_rate": 8.470066518847007e-06, + "loss": 0.113, "step": 382 }, { - "epoch": 0.1701088163446591, - "grad_norm": 1.090059538683836, - "learning_rate": 4.250832408435072e-06, - "loss": 0.09, + "epoch": 0.3401420959147424, + "grad_norm": 0.796850832011232, + "learning_rate": 8.492239467849224e-06, + "loss": 0.0911, "step": 383 }, { - "epoch": 0.17055296469020653, - "grad_norm": 1.936001842072003, - "learning_rate": 4.2619311875693675e-06, - "loss": 0.125, + "epoch": 0.3410301953818828, + "grad_norm": 0.9340650220252851, + "learning_rate": 8.514412416851441e-06, + "loss": 0.0919, "step": 384 }, { - "epoch": 0.17099711303575393, - "grad_norm": 1.7118256852712324, - "learning_rate": 4.2730299667036625e-06, - "loss": 0.095, + "epoch": 0.3419182948490231, + "grad_norm": 0.6680290038713156, + "learning_rate": 8.536585365853658e-06, + "loss": 0.0865, "step": 385 }, { - "epoch": 0.17144126138130136, - "grad_norm": 2.0311105495228268, - "learning_rate": 4.2841287458379584e-06, - "loss": 0.1029, + "epoch": 0.3428063943161634, + "grad_norm": 1.0089022028279644, + "learning_rate": 8.558758314855875e-06, + "loss": 0.0971, "step": 386 }, { - "epoch": 0.17188540972684876, - "grad_norm": 1.444490027117435, - "learning_rate": 4.2952275249722535e-06, - "loss": 0.1232, + "epoch": 0.34369449378330375, + "grad_norm": 0.7667339002753097, + "learning_rate": 8.580931263858093e-06, + "loss": 0.0974, "step": 387 }, { - "epoch": 0.1723295580723962, - "grad_norm": 1.4378003146516516, - "learning_rate": 4.3063263041065486e-06, - "loss": 0.0932, + "epoch": 0.34458259325044405, + "grad_norm": 1.1397829731941995, + "learning_rate": 8.603104212860311e-06, + "loss": 0.0959, "step": 388 }, { - "epoch": 0.1727737064179436, - "grad_norm": 1.1043097695262434, - "learning_rate": 4.317425083240844e-06, - "loss": 0.0994, + "epoch": 0.34547069271758435, + "grad_norm": 0.830151012174381, + "learning_rate": 8.625277161862528e-06, + "loss": 0.0742, "step": 389 }, { - "epoch": 0.173217854763491, - "grad_norm": 1.1954032097891434, - "learning_rate": 4.328523862375139e-06, - "loss": 0.101, + "epoch": 0.3463587921847247, + "grad_norm": 1.0064411564475326, + "learning_rate": 8.647450110864746e-06, + "loss": 0.1114, "step": 390 }, { - "epoch": 0.17366200310903843, - "grad_norm": 1.6447959614931191, - "learning_rate": 4.339622641509435e-06, - "loss": 0.1343, + "epoch": 0.347246891651865, + "grad_norm": 0.9085306016933736, + "learning_rate": 8.669623059866963e-06, + "loss": 0.0977, "step": 391 }, { - "epoch": 0.17410615145458583, - "grad_norm": 1.5660886998105679, - "learning_rate": 4.35072142064373e-06, - "loss": 0.1052, + "epoch": 0.3481349911190053, + "grad_norm": 0.8979237778722817, + "learning_rate": 8.69179600886918e-06, + "loss": 0.1125, "step": 392 }, { - "epoch": 0.17455029980013326, - "grad_norm": 1.129037326613576, - "learning_rate": 4.361820199778025e-06, - "loss": 0.0949, + "epoch": 0.34902309058614567, + "grad_norm": 1.101428725819367, + "learning_rate": 8.713968957871397e-06, + "loss": 0.1224, "step": 393 }, { - "epoch": 0.17499444814568066, - "grad_norm": 0.9299986418563283, - "learning_rate": 4.37291897891232e-06, - "loss": 0.091, + "epoch": 0.34991119005328597, + "grad_norm": 0.7879624686592628, + "learning_rate": 8.736141906873616e-06, + "loss": 0.1049, "step": 394 }, { - "epoch": 0.17543859649122806, - "grad_norm": 1.5748400271295198, - "learning_rate": 4.384017758046616e-06, - "loss": 0.1688, + "epoch": 0.35079928952042627, + "grad_norm": 0.6892827477349056, + "learning_rate": 8.758314855875833e-06, + "loss": 0.0864, "step": 395 }, { - "epoch": 0.1758827448367755, - "grad_norm": 0.8778899838246078, - "learning_rate": 4.395116537180911e-06, - "loss": 0.0983, + "epoch": 0.35168738898756663, + "grad_norm": 0.8862673113489934, + "learning_rate": 8.78048780487805e-06, + "loss": 0.0853, "step": 396 }, { - "epoch": 0.1763268931823229, - "grad_norm": 1.1516100437124952, - "learning_rate": 4.406215316315206e-06, - "loss": 0.1201, + "epoch": 0.35257548845470693, + "grad_norm": 0.8648624534546676, + "learning_rate": 8.802660753880267e-06, + "loss": 0.0985, "step": 397 }, { - "epoch": 0.17677104152787032, - "grad_norm": 0.9883123409205935, - "learning_rate": 4.417314095449501e-06, - "loss": 0.0693, + "epoch": 0.35346358792184723, + "grad_norm": 0.7441627676016863, + "learning_rate": 8.824833702882484e-06, + "loss": 0.0965, "step": 398 }, { - "epoch": 0.17721518987341772, - "grad_norm": 1.367128447993114, - "learning_rate": 4.428412874583796e-06, - "loss": 0.1009, + "epoch": 0.3543516873889876, + "grad_norm": 0.9456738596772384, + "learning_rate": 8.847006651884701e-06, + "loss": 0.1117, "step": 399 }, { - "epoch": 0.17765933821896512, - "grad_norm": 1.2639479044263988, - "learning_rate": 4.439511653718092e-06, - "loss": 0.093, + "epoch": 0.3552397868561279, + "grad_norm": 0.677577657857005, + "learning_rate": 8.869179600886918e-06, + "loss": 0.0689, "step": 400 }, { - "epoch": 0.17810348656451255, - "grad_norm": 1.284067214420426, - "learning_rate": 4.450610432852386e-06, - "loss": 0.1029, + "epoch": 0.3561278863232682, + "grad_norm": 0.7861698907374766, + "learning_rate": 8.891352549889135e-06, + "loss": 0.0894, "step": 401 }, { - "epoch": 0.17854763491005995, - "grad_norm": 0.9723989283162116, - "learning_rate": 4.461709211986682e-06, - "loss": 0.093, + "epoch": 0.35701598579040855, + "grad_norm": 0.9404806421842811, + "learning_rate": 8.913525498891353e-06, + "loss": 0.0966, "step": 402 }, { - "epoch": 0.17899178325560738, - "grad_norm": 1.1814808630811793, - "learning_rate": 4.472807991120977e-06, - "loss": 0.1004, + "epoch": 0.35790408525754885, + "grad_norm": 0.9318679815099119, + "learning_rate": 8.93569844789357e-06, + "loss": 0.0911, "step": 403 }, { - "epoch": 0.17943593160115479, - "grad_norm": 1.2100875095234747, - "learning_rate": 4.483906770255272e-06, - "loss": 0.1165, + "epoch": 0.35879218472468916, + "grad_norm": 0.5912566380660288, + "learning_rate": 8.957871396895787e-06, + "loss": 0.0838, "step": 404 }, { - "epoch": 0.1798800799467022, - "grad_norm": 1.2480781051362362, - "learning_rate": 4.495005549389568e-06, - "loss": 0.1086, + "epoch": 0.35968028419182946, + "grad_norm": 0.9873336619294049, + "learning_rate": 8.980044345898006e-06, + "loss": 0.1021, "step": 405 }, { - "epoch": 0.18032422829224962, - "grad_norm": 1.4659555944931602, - "learning_rate": 4.506104328523862e-06, - "loss": 0.0966, + "epoch": 0.3605683836589698, + "grad_norm": 0.9805051718815688, + "learning_rate": 9.002217294900223e-06, + "loss": 0.1159, "step": 406 }, { - "epoch": 0.18076837663779702, - "grad_norm": 1.2905749286816455, - "learning_rate": 4.517203107658158e-06, - "loss": 0.1308, + "epoch": 0.3614564831261101, + "grad_norm": 0.7038213966191287, + "learning_rate": 9.02439024390244e-06, + "loss": 0.0887, "step": 407 }, { - "epoch": 0.18121252498334445, - "grad_norm": 1.2083135245558752, - "learning_rate": 4.528301886792453e-06, - "loss": 0.139, + "epoch": 0.3623445825932504, + "grad_norm": 0.7583067296422552, + "learning_rate": 9.046563192904657e-06, + "loss": 0.0984, "step": 408 }, { - "epoch": 0.18165667332889185, - "grad_norm": 1.2677821305643562, - "learning_rate": 4.539400665926748e-06, - "loss": 0.1522, + "epoch": 0.3632326820603908, + "grad_norm": 0.7349365833394198, + "learning_rate": 9.068736141906874e-06, + "loss": 0.0868, "step": 409 }, { - "epoch": 0.18210082167443925, - "grad_norm": 0.8309366049632025, - "learning_rate": 4.5504994450610434e-06, - "loss": 0.0955, + "epoch": 0.3641207815275311, + "grad_norm": 0.74961148956145, + "learning_rate": 9.090909090909091e-06, + "loss": 0.0913, "step": 410 }, { - "epoch": 0.18254497001998668, - "grad_norm": 1.6708204443941888, - "learning_rate": 4.5615982241953385e-06, - "loss": 0.0983, + "epoch": 0.3650088809946714, + "grad_norm": 0.6621419287165237, + "learning_rate": 9.113082039911308e-06, + "loss": 0.0869, "step": 411 }, { - "epoch": 0.18298911836553408, - "grad_norm": 1.8028863536801523, - "learning_rate": 4.572697003329634e-06, - "loss": 0.1141, + "epoch": 0.36589698046181174, + "grad_norm": 0.6809137513161271, + "learning_rate": 9.135254988913527e-06, + "loss": 0.0867, "step": 412 }, { - "epoch": 0.1834332667110815, - "grad_norm": 1.6039168610165708, - "learning_rate": 4.5837957824639295e-06, - "loss": 0.14, + "epoch": 0.36678507992895204, + "grad_norm": 0.7615144304741818, + "learning_rate": 9.157427937915744e-06, + "loss": 0.0999, "step": 413 }, { - "epoch": 0.1838774150566289, - "grad_norm": 1.3909894579389195, - "learning_rate": 4.5948945615982245e-06, - "loss": 0.0924, + "epoch": 0.36767317939609234, + "grad_norm": 0.6837257281417417, + "learning_rate": 9.179600886917961e-06, + "loss": 0.0912, "step": 414 }, { - "epoch": 0.1843215634021763, - "grad_norm": 1.444002762358773, - "learning_rate": 4.60599334073252e-06, - "loss": 0.0882, + "epoch": 0.3685612788632327, + "grad_norm": 0.7497215509952461, + "learning_rate": 9.201773835920178e-06, + "loss": 0.1024, "step": 415 }, { - "epoch": 0.18476571174772374, - "grad_norm": 1.3517930787179577, - "learning_rate": 4.617092119866815e-06, - "loss": 0.1198, + "epoch": 0.369449378330373, + "grad_norm": 0.8930543281812162, + "learning_rate": 9.223946784922395e-06, + "loss": 0.0907, "step": 416 }, { - "epoch": 0.18520986009327114, - "grad_norm": 0.9651918865616642, - "learning_rate": 4.628190899001111e-06, - "loss": 0.0794, + "epoch": 0.3703374777975133, + "grad_norm": 0.65736242707052, + "learning_rate": 9.246119733924612e-06, + "loss": 0.087, "step": 417 }, { - "epoch": 0.18565400843881857, - "grad_norm": 1.3087574560794024, - "learning_rate": 4.639289678135406e-06, - "loss": 0.1166, + "epoch": 0.37122557726465366, + "grad_norm": 0.7099665406423, + "learning_rate": 9.268292682926831e-06, + "loss": 0.0999, "step": 418 }, { - "epoch": 0.18609815678436598, - "grad_norm": 1.1029726712835008, - "learning_rate": 4.650388457269701e-06, - "loss": 0.106, + "epoch": 0.37211367673179396, + "grad_norm": 0.686234581166355, + "learning_rate": 9.290465631929048e-06, + "loss": 0.0898, "step": 419 }, { - "epoch": 0.18654230512991338, - "grad_norm": 1.481760865085893, - "learning_rate": 4.661487236403996e-06, - "loss": 0.1225, + "epoch": 0.37300177619893427, + "grad_norm": 0.6747224045550618, + "learning_rate": 9.312638580931265e-06, + "loss": 0.0771, "step": 420 }, { - "epoch": 0.1869864534754608, - "grad_norm": 1.125656650735202, - "learning_rate": 4.672586015538291e-06, - "loss": 0.0894, + "epoch": 0.3738898756660746, + "grad_norm": 0.6014800643412591, + "learning_rate": 9.334811529933483e-06, + "loss": 0.0811, "step": 421 }, { - "epoch": 0.1874306018210082, - "grad_norm": 1.1643460761006563, - "learning_rate": 4.683684794672587e-06, - "loss": 0.1049, + "epoch": 0.3747779751332149, + "grad_norm": 0.7827277868490677, + "learning_rate": 9.3569844789357e-06, + "loss": 0.1097, "step": 422 }, { - "epoch": 0.18787475016655564, - "grad_norm": 1.2081839286402132, - "learning_rate": 4.694783573806882e-06, - "loss": 0.0984, + "epoch": 0.3756660746003552, + "grad_norm": 0.7422145310046832, + "learning_rate": 9.379157427937917e-06, + "loss": 0.0797, "step": 423 }, { - "epoch": 0.18831889851210304, - "grad_norm": 1.0952768068994485, - "learning_rate": 4.705882352941177e-06, - "loss": 0.1083, + "epoch": 0.3765541740674956, + "grad_norm": 0.8317608189040117, + "learning_rate": 9.401330376940134e-06, + "loss": 0.0802, "step": 424 }, { - "epoch": 0.18876304685765047, - "grad_norm": 1.993217784490899, - "learning_rate": 4.716981132075472e-06, - "loss": 0.1198, + "epoch": 0.3774422735346359, + "grad_norm": 0.8116023813284492, + "learning_rate": 9.423503325942351e-06, + "loss": 0.0777, "step": 425 }, { - "epoch": 0.18920719520319787, - "grad_norm": 1.4709865866324339, - "learning_rate": 4.728079911209768e-06, - "loss": 0.1004, + "epoch": 0.3783303730017762, + "grad_norm": 0.8589221898382653, + "learning_rate": 9.445676274944568e-06, + "loss": 0.0873, "step": 426 }, { - "epoch": 0.18965134354874527, - "grad_norm": 1.1800698065694786, - "learning_rate": 4.739178690344062e-06, - "loss": 0.0942, + "epoch": 0.37921847246891655, + "grad_norm": 0.9066303214042967, + "learning_rate": 9.467849223946785e-06, + "loss": 0.0965, "step": 427 }, { - "epoch": 0.1900954918942927, - "grad_norm": 0.9004404155709735, - "learning_rate": 4.750277469478358e-06, - "loss": 0.0879, + "epoch": 0.38010657193605685, + "grad_norm": 1.13614698496287, + "learning_rate": 9.490022172949002e-06, + "loss": 0.0909, "step": 428 }, { - "epoch": 0.1905396402398401, - "grad_norm": 1.3988615855599944, - "learning_rate": 4.761376248612653e-06, - "loss": 0.1313, + "epoch": 0.38099467140319715, + "grad_norm": 0.7486420726064874, + "learning_rate": 9.51219512195122e-06, + "loss": 0.1123, "step": 429 }, { - "epoch": 0.19098378858538753, - "grad_norm": 1.2097973380846636, - "learning_rate": 4.772475027746948e-06, - "loss": 0.1192, + "epoch": 0.38188277087033745, + "grad_norm": 0.6648813988527883, + "learning_rate": 9.534368070953438e-06, + "loss": 0.0729, "step": 430 }, { - "epoch": 0.19142793693093493, - "grad_norm": 1.795772492183597, - "learning_rate": 4.783573806881244e-06, - "loss": 0.1319, + "epoch": 0.3827708703374778, + "grad_norm": 1.0031946962163094, + "learning_rate": 9.556541019955655e-06, + "loss": 0.098, "step": 431 }, { - "epoch": 0.19187208527648233, - "grad_norm": 1.5438576982175642, - "learning_rate": 4.794672586015538e-06, - "loss": 0.156, + "epoch": 0.3836589698046181, + "grad_norm": 0.7284073775670307, + "learning_rate": 9.578713968957872e-06, + "loss": 0.0922, "step": 432 }, { - "epoch": 0.19231623362202976, - "grad_norm": 1.4462148356085274, - "learning_rate": 4.805771365149834e-06, - "loss": 0.0977, + "epoch": 0.3845470692717584, + "grad_norm": 0.8118365633131759, + "learning_rate": 9.60088691796009e-06, + "loss": 0.0969, "step": 433 }, { - "epoch": 0.19276038196757717, - "grad_norm": 1.1013117212825567, - "learning_rate": 4.816870144284129e-06, - "loss": 0.123, + "epoch": 0.38543516873889877, + "grad_norm": 0.6878668150492785, + "learning_rate": 9.623059866962307e-06, + "loss": 0.0864, "step": 434 }, { - "epoch": 0.1932045303131246, - "grad_norm": 1.473729194266236, - "learning_rate": 4.827968923418424e-06, - "loss": 0.1219, + "epoch": 0.38632326820603907, + "grad_norm": 0.8369281571161575, + "learning_rate": 9.645232815964524e-06, + "loss": 0.0905, "step": 435 }, { - "epoch": 0.193648678658672, - "grad_norm": 1.060585950094457, - "learning_rate": 4.839067702552719e-06, - "loss": 0.0867, + "epoch": 0.3872113676731794, + "grad_norm": 0.7848911958376642, + "learning_rate": 9.667405764966743e-06, + "loss": 0.0783, "step": 436 }, { - "epoch": 0.1940928270042194, - "grad_norm": 1.1939948263664504, - "learning_rate": 4.8501664816870145e-06, - "loss": 0.1221, + "epoch": 0.38809946714031973, + "grad_norm": 0.7169928750946686, + "learning_rate": 9.68957871396896e-06, + "loss": 0.0809, "step": 437 }, { - "epoch": 0.19453697534976683, - "grad_norm": 1.0411525410331977, - "learning_rate": 4.86126526082131e-06, - "loss": 0.0955, + "epoch": 0.38898756660746003, + "grad_norm": 0.8595443954435857, + "learning_rate": 9.711751662971177e-06, + "loss": 0.0865, "step": 438 }, { - "epoch": 0.19498112369531423, - "grad_norm": 1.3074349171537876, - "learning_rate": 4.8723640399556054e-06, - "loss": 0.1092, + "epoch": 0.38987566607460034, + "grad_norm": 0.8347165131840893, + "learning_rate": 9.733924611973394e-06, + "loss": 0.0981, "step": 439 }, { - "epoch": 0.19542527204086166, - "grad_norm": 2.556790721991839, - "learning_rate": 4.8834628190899005e-06, - "loss": 0.1144, + "epoch": 0.3907637655417407, + "grad_norm": 0.7491623779751316, + "learning_rate": 9.756097560975611e-06, + "loss": 0.0752, "step": 440 }, { - "epoch": 0.19586942038640906, - "grad_norm": 1.5219706664834065, - "learning_rate": 4.894561598224196e-06, - "loss": 0.1353, + "epoch": 0.391651865008881, + "grad_norm": 0.5719404469911055, + "learning_rate": 9.778270509977828e-06, + "loss": 0.09, "step": 441 }, { - "epoch": 0.19631356873195646, - "grad_norm": 1.5771009476283926, - "learning_rate": 4.905660377358491e-06, - "loss": 0.1139, + "epoch": 0.3925399644760213, + "grad_norm": 1.0813454316817122, + "learning_rate": 9.800443458980045e-06, + "loss": 0.0761, "step": 442 }, { - "epoch": 0.1967577170775039, - "grad_norm": 1.7247841251255984, - "learning_rate": 4.9167591564927866e-06, - "loss": 0.0969, + "epoch": 0.39342806394316165, + "grad_norm": 0.7034557816282575, + "learning_rate": 9.822616407982262e-06, + "loss": 0.0788, "step": 443 }, { - "epoch": 0.1972018654230513, - "grad_norm": 1.118860834302166, - "learning_rate": 4.927857935627082e-06, - "loss": 0.0955, + "epoch": 0.39431616341030196, + "grad_norm": 0.6503808869091405, + "learning_rate": 9.84478935698448e-06, + "loss": 0.0735, "step": 444 }, { - "epoch": 0.19764601376859872, - "grad_norm": 1.4715029598755733, - "learning_rate": 4.938956714761377e-06, - "loss": 0.1001, + "epoch": 0.39520426287744226, + "grad_norm": 0.8255682094309857, + "learning_rate": 9.866962305986696e-06, + "loss": 0.0745, "step": 445 }, { - "epoch": 0.19809016211414612, - "grad_norm": 1.5680153762667055, - "learning_rate": 4.950055493895672e-06, - "loss": 0.1222, + "epoch": 0.3960923623445826, + "grad_norm": 0.623187790209699, + "learning_rate": 9.889135254988914e-06, + "loss": 0.0652, "step": 446 }, { - "epoch": 0.19853431045969352, - "grad_norm": 1.7877852329091748, - "learning_rate": 4.961154273029967e-06, - "loss": 0.1057, + "epoch": 0.3969804618117229, + "grad_norm": 1.0289119443547698, + "learning_rate": 9.91130820399113e-06, + "loss": 0.0912, "step": 447 }, { - "epoch": 0.19897845880524095, - "grad_norm": 1.1650327458270198, - "learning_rate": 4.972253052164263e-06, - "loss": 0.1242, + "epoch": 0.3978685612788632, + "grad_norm": 0.7159877064925825, + "learning_rate": 9.93348115299335e-06, + "loss": 0.094, "step": 448 }, { - "epoch": 0.19942260715078836, - "grad_norm": 1.0753006649574548, - "learning_rate": 4.983351831298557e-06, - "loss": 0.0912, + "epoch": 0.3987566607460036, + "grad_norm": 0.6928649401766506, + "learning_rate": 9.955654101995567e-06, + "loss": 0.0766, "step": 449 }, { - "epoch": 0.19986675549633579, - "grad_norm": 1.546863382694724, - "learning_rate": 4.994450610432853e-06, - "loss": 0.123, + "epoch": 0.3996447602131439, + "grad_norm": 1.7774653552366473, + "learning_rate": 9.977827050997784e-06, + "loss": 0.0987, "step": 450 }, { - "epoch": 0.2003109038418832, - "grad_norm": 1.3219021070497294, - "learning_rate": 5.005549389567148e-06, - "loss": 0.1167, + "epoch": 0.4005328596802842, + "grad_norm": 0.8510762775390016, + "learning_rate": 1e-05, + "loss": 0.0892, "step": 451 }, { - "epoch": 0.2007550521874306, - "grad_norm": 1.3154364501774296, - "learning_rate": 5.016648168701444e-06, - "loss": 0.1023, + "epoch": 0.40142095914742454, + "grad_norm": 0.9978840214000684, + "learning_rate": 9.999998497942616e-06, + "loss": 0.0956, "step": 452 }, { - "epoch": 0.20119920053297802, - "grad_norm": 1.2732051452073168, - "learning_rate": 5.027746947835739e-06, - "loss": 0.0717, + "epoch": 0.40230905861456484, + "grad_norm": 0.7864949846643574, + "learning_rate": 9.999993991771364e-06, + "loss": 0.0835, "step": 453 }, { - "epoch": 0.20164334887852542, - "grad_norm": 1.3206735580145712, - "learning_rate": 5.038845726970034e-06, - "loss": 0.1095, + "epoch": 0.40319715808170514, + "grad_norm": 0.8536233008347437, + "learning_rate": 9.999986481488953e-06, + "loss": 0.0972, "step": 454 }, { - "epoch": 0.20208749722407285, - "grad_norm": 1.0332614998616947, - "learning_rate": 5.049944506104328e-06, - "loss": 0.0845, + "epoch": 0.40408525754884544, + "grad_norm": 0.9969692666322935, + "learning_rate": 9.999975967099894e-06, + "loss": 0.0823, "step": 455 }, { - "epoch": 0.20253164556962025, - "grad_norm": 1.211744462644113, - "learning_rate": 5.061043285238624e-06, - "loss": 0.1066, + "epoch": 0.4049733570159858, + "grad_norm": 0.7039412746032423, + "learning_rate": 9.999962448610504e-06, + "loss": 0.0871, "step": 456 }, { - "epoch": 0.20297579391516768, - "grad_norm": 1.3376009797970827, - "learning_rate": 5.072142064372919e-06, - "loss": 0.0905, + "epoch": 0.4058614564831261, + "grad_norm": 0.7008530432074359, + "learning_rate": 9.999945926028907e-06, + "loss": 0.0772, "step": 457 }, { - "epoch": 0.20341994226071508, - "grad_norm": 1.2905140024192596, - "learning_rate": 5.083240843507214e-06, - "loss": 0.1011, + "epoch": 0.4067495559502664, + "grad_norm": 0.9569874508135047, + "learning_rate": 9.99992639936503e-06, + "loss": 0.1082, "step": 458 }, { - "epoch": 0.20386409060626248, - "grad_norm": 1.1671668945164224, - "learning_rate": 5.09433962264151e-06, - "loss": 0.1027, + "epoch": 0.40763765541740676, + "grad_norm": 0.696882654288798, + "learning_rate": 9.999903868630602e-06, + "loss": 0.0896, "step": 459 }, { - "epoch": 0.2043082389518099, - "grad_norm": 2.1761489486755363, - "learning_rate": 5.105438401775805e-06, - "loss": 0.1093, + "epoch": 0.40852575488454707, + "grad_norm": 0.6689970831734524, + "learning_rate": 9.999878333839165e-06, + "loss": 0.0828, "step": 460 }, { - "epoch": 0.2047523872973573, - "grad_norm": 1.5496188187186781, - "learning_rate": 5.1165371809101e-06, - "loss": 0.1241, + "epoch": 0.40941385435168737, + "grad_norm": 0.9693346223383633, + "learning_rate": 9.999849795006055e-06, + "loss": 0.0935, "step": 461 }, { - "epoch": 0.20519653564290474, - "grad_norm": 1.555761458410687, - "learning_rate": 5.127635960044396e-06, - "loss": 0.1119, + "epoch": 0.4103019538188277, + "grad_norm": 0.9283219979748674, + "learning_rate": 9.999818252148425e-06, + "loss": 0.082, "step": 462 }, { - "epoch": 0.20564068398845214, - "grad_norm": 1.4018925493874805, - "learning_rate": 5.138734739178691e-06, - "loss": 0.149, + "epoch": 0.411190053285968, + "grad_norm": 0.6806422342640777, + "learning_rate": 9.999783705285223e-06, + "loss": 0.0763, "step": 463 }, { - "epoch": 0.20608483233399955, - "grad_norm": 1.2908877898264874, - "learning_rate": 5.1498335183129855e-06, - "loss": 0.1255, + "epoch": 0.41207815275310833, + "grad_norm": 0.8149452612499798, + "learning_rate": 9.999746154437206e-06, + "loss": 0.0974, "step": 464 }, { - "epoch": 0.20652898067954698, - "grad_norm": 1.2526168814511602, - "learning_rate": 5.1609322974472806e-06, - "loss": 0.1145, + "epoch": 0.4129662522202487, + "grad_norm": 0.9378706598051992, + "learning_rate": 9.999705599626935e-06, + "loss": 0.1065, "step": 465 }, { - "epoch": 0.20697312902509438, - "grad_norm": 1.1641091501138674, - "learning_rate": 5.1720310765815765e-06, - "loss": 0.1136, + "epoch": 0.413854351687389, + "grad_norm": 0.6190653939919241, + "learning_rate": 9.999662040878779e-06, + "loss": 0.072, "step": 466 }, { - "epoch": 0.2074172773706418, - "grad_norm": 1.1082496575030598, - "learning_rate": 5.1831298557158716e-06, - "loss": 0.0897, + "epoch": 0.4147424511545293, + "grad_norm": 1.2939605591798196, + "learning_rate": 9.999615478218904e-06, + "loss": 0.0886, "step": 467 }, { - "epoch": 0.2078614257161892, - "grad_norm": 1.174903547638247, - "learning_rate": 5.194228634850167e-06, - "loss": 0.1121, + "epoch": 0.41563055062166965, + "grad_norm": 0.909327391547214, + "learning_rate": 9.99956591167529e-06, + "loss": 0.0864, "step": 468 }, { - "epoch": 0.2083055740617366, - "grad_norm": 1.0600460968712155, - "learning_rate": 5.2053274139844625e-06, - "loss": 0.0819, + "epoch": 0.41651865008880995, + "grad_norm": 0.7099744376001921, + "learning_rate": 9.999513341277718e-06, + "loss": 0.0868, "step": 469 }, { - "epoch": 0.20874972240728404, - "grad_norm": 1.3263800742507028, - "learning_rate": 5.216426193118758e-06, - "loss": 0.084, + "epoch": 0.41740674955595025, + "grad_norm": 0.7836599608091721, + "learning_rate": 9.99945776705777e-06, + "loss": 0.0807, "step": 470 }, { - "epoch": 0.20919387075283144, - "grad_norm": 1.604108989764318, - "learning_rate": 5.227524972253053e-06, - "loss": 0.1337, + "epoch": 0.4182948490230906, + "grad_norm": 0.9494608810490509, + "learning_rate": 9.99939918904884e-06, + "loss": 0.0936, "step": 471 }, { - "epoch": 0.20963801909837887, - "grad_norm": 0.9363413921696734, - "learning_rate": 5.238623751387349e-06, - "loss": 0.0863, + "epoch": 0.4191829484902309, + "grad_norm": 0.8376853679370798, + "learning_rate": 9.99933760728612e-06, + "loss": 0.0889, "step": 472 }, { - "epoch": 0.21008216744392627, - "grad_norm": 2.0569016097307578, - "learning_rate": 5.249722530521643e-06, - "loss": 0.1218, + "epoch": 0.4200710479573712, + "grad_norm": 0.5773413335287602, + "learning_rate": 9.999273021806613e-06, + "loss": 0.0773, "step": 473 }, { - "epoch": 0.21052631578947367, - "grad_norm": 2.0907599336843674, - "learning_rate": 5.260821309655938e-06, - "loss": 0.1032, + "epoch": 0.42095914742451157, + "grad_norm": 1.0064143721408958, + "learning_rate": 9.99920543264912e-06, + "loss": 0.1058, "step": 474 }, { - "epoch": 0.2109704641350211, - "grad_norm": 1.03795187986254, - "learning_rate": 5.271920088790233e-06, - "loss": 0.1002, + "epoch": 0.4218472468916519, + "grad_norm": 0.5748082696643984, + "learning_rate": 9.999134839854252e-06, + "loss": 0.0741, "step": 475 }, { - "epoch": 0.2114146124805685, - "grad_norm": 1.6168543989206048, - "learning_rate": 5.283018867924529e-06, - "loss": 0.1292, + "epoch": 0.4227353463587922, + "grad_norm": 0.878238503380513, + "learning_rate": 9.999061243464424e-06, + "loss": 0.0955, "step": 476 }, { - "epoch": 0.21185876082611593, - "grad_norm": 1.555835178594497, - "learning_rate": 5.294117647058824e-06, - "loss": 0.0956, + "epoch": 0.42362344582593253, + "grad_norm": 0.9398055447662498, + "learning_rate": 9.99898464352385e-06, + "loss": 0.0935, "step": 477 }, { - "epoch": 0.21230290917166333, - "grad_norm": 1.4750911257012305, - "learning_rate": 5.305216426193119e-06, - "loss": 0.1025, + "epoch": 0.42451154529307283, + "grad_norm": 0.8328901593585009, + "learning_rate": 9.998905040078557e-06, + "loss": 0.0806, "step": 478 }, { - "epoch": 0.21274705751721074, - "grad_norm": 1.1903302332979158, - "learning_rate": 5.316315205327415e-06, - "loss": 0.0927, + "epoch": 0.42539964476021314, + "grad_norm": 0.7584912885126325, + "learning_rate": 9.998822433176371e-06, + "loss": 0.0899, "step": 479 }, { - "epoch": 0.21319120586275817, - "grad_norm": 1.799907699381259, - "learning_rate": 5.32741398446171e-06, - "loss": 0.1554, + "epoch": 0.42628774422735344, + "grad_norm": 0.9436305122294443, + "learning_rate": 9.998736822866926e-06, + "loss": 0.0907, "step": 480 }, { - "epoch": 0.21363535420830557, - "grad_norm": 1.1084539632711692, - "learning_rate": 5.338512763596004e-06, - "loss": 0.0963, + "epoch": 0.4271758436944938, + "grad_norm": 0.7921723045923234, + "learning_rate": 9.998648209201655e-06, + "loss": 0.093, "step": 481 }, { - "epoch": 0.214079502553853, - "grad_norm": 1.0531894918937277, - "learning_rate": 5.3496115427303e-06, - "loss": 0.0808, + "epoch": 0.4280639431616341, + "grad_norm": 0.8406677566067806, + "learning_rate": 9.998556592233803e-06, + "loss": 0.1007, "step": 482 }, { - "epoch": 0.2145236508994004, - "grad_norm": 1.4912911807270512, - "learning_rate": 5.360710321864595e-06, - "loss": 0.1053, + "epoch": 0.4289520426287744, + "grad_norm": 0.6616072134539779, + "learning_rate": 9.998461972018414e-06, + "loss": 0.0821, "step": 483 }, { - "epoch": 0.2149677992449478, - "grad_norm": 1.1603558318841067, - "learning_rate": 5.37180910099889e-06, - "loss": 0.0859, + "epoch": 0.42984014209591476, + "grad_norm": 0.921893217099755, + "learning_rate": 9.998364348612338e-06, + "loss": 0.0673, "step": 484 }, { - "epoch": 0.21541194759049523, - "grad_norm": 1.2647862898402058, - "learning_rate": 5.382907880133186e-06, - "loss": 0.1165, + "epoch": 0.43072824156305506, + "grad_norm": 0.7245471556467965, + "learning_rate": 9.998263722074228e-06, + "loss": 0.0726, "step": 485 }, { - "epoch": 0.21585609593604263, - "grad_norm": 1.3294179512793625, - "learning_rate": 5.394006659267481e-06, - "loss": 0.1006, + "epoch": 0.43161634103019536, + "grad_norm": 0.7657455636678407, + "learning_rate": 9.998160092464547e-06, + "loss": 0.0849, "step": 486 }, { - "epoch": 0.21630024428159006, - "grad_norm": 1.288870748090468, - "learning_rate": 5.405105438401776e-06, - "loss": 0.1174, + "epoch": 0.4325044404973357, + "grad_norm": 0.7852019441805916, + "learning_rate": 9.998053459845552e-06, + "loss": 0.0778, "step": 487 }, { - "epoch": 0.21674439262713746, - "grad_norm": 1.6526073205290404, - "learning_rate": 5.416204217536072e-06, - "loss": 0.1278, + "epoch": 0.433392539964476, + "grad_norm": 0.9016164990467911, + "learning_rate": 9.997943824281313e-06, + "loss": 0.0828, "step": 488 }, { - "epoch": 0.2171885409726849, - "grad_norm": 1.1625080059645232, - "learning_rate": 5.427302996670367e-06, - "loss": 0.0962, + "epoch": 0.4342806394316163, + "grad_norm": 0.8165736472227916, + "learning_rate": 9.997831185837705e-06, + "loss": 0.0738, "step": 489 }, { - "epoch": 0.2176326893182323, - "grad_norm": 1.0239562074655422, - "learning_rate": 5.4384017758046615e-06, - "loss": 0.1137, + "epoch": 0.4351687388987567, + "grad_norm": 0.7094229690268178, + "learning_rate": 9.997715544582398e-06, + "loss": 0.0955, "step": 490 }, { - "epoch": 0.2180768376637797, - "grad_norm": 1.1775834154919058, - "learning_rate": 5.4495005549389565e-06, - "loss": 0.1167, + "epoch": 0.436056838365897, + "grad_norm": 0.8289115556465729, + "learning_rate": 9.997596900584875e-06, + "loss": 0.0721, "step": 491 }, { - "epoch": 0.21852098600932712, - "grad_norm": 1.0527464833567977, - "learning_rate": 5.4605993340732525e-06, - "loss": 0.1035, + "epoch": 0.4369449378330373, + "grad_norm": 0.8771832244496556, + "learning_rate": 9.99747525391642e-06, + "loss": 0.1127, "step": 492 }, { - "epoch": 0.21896513435487452, - "grad_norm": 1.0639212306477968, - "learning_rate": 5.4716981132075475e-06, - "loss": 0.0798, + "epoch": 0.43783303730017764, + "grad_norm": 0.6530335329715642, + "learning_rate": 9.997350604650123e-06, + "loss": 0.0788, "step": 493 }, { - "epoch": 0.21940928270042195, - "grad_norm": 1.4130768874633084, - "learning_rate": 5.482796892341843e-06, - "loss": 0.1191, + "epoch": 0.43872113676731794, + "grad_norm": 0.8271275114471274, + "learning_rate": 9.99722295286087e-06, + "loss": 0.0888, "step": 494 }, { - "epoch": 0.21985343104596936, - "grad_norm": 1.1493987018132557, - "learning_rate": 5.4938956714761385e-06, - "loss": 0.0933, + "epoch": 0.43960923623445824, + "grad_norm": 0.580368488902037, + "learning_rate": 9.997092298625365e-06, + "loss": 0.0753, "step": 495 }, { - "epoch": 0.22029757939151676, - "grad_norm": 1.5099978612143894, - "learning_rate": 5.5049944506104336e-06, - "loss": 0.1312, + "epoch": 0.4404973357015986, + "grad_norm": 0.6770794650763458, + "learning_rate": 9.996958642022101e-06, + "loss": 0.0755, "step": 496 }, { - "epoch": 0.2207417277370642, - "grad_norm": 1.7153041600616725, - "learning_rate": 5.516093229744729e-06, - "loss": 0.1631, + "epoch": 0.4413854351687389, + "grad_norm": 0.6100575589466122, + "learning_rate": 9.996821983131385e-06, + "loss": 0.0701, "step": 497 }, { - "epoch": 0.2211858760826116, - "grad_norm": 0.9681698333927948, - "learning_rate": 5.5271920088790245e-06, - "loss": 0.0811, + "epoch": 0.4422735346358792, + "grad_norm": 0.6053371540629051, + "learning_rate": 9.996682322035328e-06, + "loss": 0.0697, "step": 498 }, { - "epoch": 0.22163002442815902, - "grad_norm": 1.2720779336432644, - "learning_rate": 5.538290788013319e-06, - "loss": 0.1305, + "epoch": 0.44316163410301956, + "grad_norm": 0.7941985186400321, + "learning_rate": 9.996539658817835e-06, + "loss": 0.0914, "step": 499 }, { - "epoch": 0.22207417277370642, - "grad_norm": 1.2223556006920575, - "learning_rate": 5.549389567147614e-06, - "loss": 0.0942, + "epoch": 0.44404973357015987, + "grad_norm": 0.6539713119848678, + "learning_rate": 9.996393993564626e-06, + "loss": 0.0952, "step": 500 }, { - "epoch": 0.22251832111925382, - "grad_norm": 1.0558717161626225, - "learning_rate": 5.560488346281909e-06, - "loss": 0.1077, + "epoch": 0.44493783303730017, + "grad_norm": 0.7945600797488689, + "learning_rate": 9.996245326363218e-06, + "loss": 0.0773, "step": 501 }, { - "epoch": 0.22296246946480125, - "grad_norm": 1.1170017223897397, - "learning_rate": 5.571587125416205e-06, - "loss": 0.1309, + "epoch": 0.44582593250444047, + "grad_norm": 0.7157119118716787, + "learning_rate": 9.996093657302937e-06, + "loss": 0.0703, "step": 502 }, { - "epoch": 0.22340661781034865, - "grad_norm": 0.9964488397086242, - "learning_rate": 5.5826859045505e-06, - "loss": 0.0834, + "epoch": 0.4467140319715808, + "grad_norm": 0.8554534407228245, + "learning_rate": 9.995938986474905e-06, + "loss": 0.0727, "step": 503 }, { - "epoch": 0.22385076615589608, - "grad_norm": 1.2815734363162072, - "learning_rate": 5.593784683684795e-06, - "loss": 0.0901, + "epoch": 0.44760213143872113, + "grad_norm": 0.8600565466728031, + "learning_rate": 9.995781313972054e-06, + "loss": 0.0771, "step": 504 }, { - "epoch": 0.22429491450144348, - "grad_norm": 1.0732922534576572, - "learning_rate": 5.604883462819091e-06, - "loss": 0.0918, + "epoch": 0.44849023090586143, + "grad_norm": 0.7743541986016661, + "learning_rate": 9.995620639889117e-06, + "loss": 0.1039, "step": 505 }, { - "epoch": 0.22473906284699088, - "grad_norm": 1.0346342912485391, - "learning_rate": 5.615982241953386e-06, - "loss": 0.0983, + "epoch": 0.4493783303730018, + "grad_norm": 0.7509971028833511, + "learning_rate": 9.99545696432263e-06, + "loss": 0.0951, "step": 506 }, { - "epoch": 0.2251832111925383, - "grad_norm": 1.2353611959723767, - "learning_rate": 5.62708102108768e-06, - "loss": 0.1066, + "epoch": 0.4502664298401421, + "grad_norm": 0.5619687860129344, + "learning_rate": 9.995290287370933e-06, + "loss": 0.0731, "step": 507 }, { - "epoch": 0.22562735953808571, - "grad_norm": 1.5571341771119367, - "learning_rate": 5.638179800221976e-06, - "loss": 0.1197, + "epoch": 0.4511545293072824, + "grad_norm": 0.7054714896174964, + "learning_rate": 9.99512060913417e-06, + "loss": 0.0833, "step": 508 }, { - "epoch": 0.22607150788363314, - "grad_norm": 1.8593279138829277, - "learning_rate": 5.649278579356271e-06, - "loss": 0.1539, + "epoch": 0.45204262877442275, + "grad_norm": 0.6964202593719336, + "learning_rate": 9.994947929714288e-06, + "loss": 0.0939, "step": 509 }, { - "epoch": 0.22651565622918055, - "grad_norm": 1.3210670897027246, - "learning_rate": 5.660377358490566e-06, - "loss": 0.0972, + "epoch": 0.45293072824156305, + "grad_norm": 0.7197509181407117, + "learning_rate": 9.994772249215036e-06, + "loss": 0.0924, "step": 510 }, { - "epoch": 0.22695980457472795, - "grad_norm": 1.555152024387198, - "learning_rate": 5.671476137624862e-06, - "loss": 0.1017, + "epoch": 0.45381882770870335, + "grad_norm": 0.7115124670291502, + "learning_rate": 9.994593567741966e-06, + "loss": 0.0789, "step": 511 }, { - "epoch": 0.22740395292027538, - "grad_norm": 1.1784666679583122, - "learning_rate": 5.682574916759157e-06, - "loss": 0.0896, + "epoch": 0.4547069271758437, + "grad_norm": 0.49087310212235263, + "learning_rate": 9.994411885402437e-06, + "loss": 0.0711, "step": 512 }, { - "epoch": 0.22784810126582278, - "grad_norm": 0.897552288214822, - "learning_rate": 5.693673695893452e-06, - "loss": 0.0748, + "epoch": 0.455595026642984, + "grad_norm": 0.8345608095728034, + "learning_rate": 9.994227202305604e-06, + "loss": 0.086, "step": 513 }, { - "epoch": 0.2282922496113702, - "grad_norm": 0.9984908709528371, - "learning_rate": 5.704772475027748e-06, - "loss": 0.0962, + "epoch": 0.4564831261101243, + "grad_norm": 0.5517219664337538, + "learning_rate": 9.994039518562433e-06, + "loss": 0.0662, "step": 514 }, { - "epoch": 0.2287363979569176, - "grad_norm": 1.1522533946932125, - "learning_rate": 5.715871254162043e-06, - "loss": 0.1175, + "epoch": 0.4573712255772647, + "grad_norm": 0.7304217736850901, + "learning_rate": 9.993848834285685e-06, + "loss": 0.0932, "step": 515 }, { - "epoch": 0.229180546302465, - "grad_norm": 1.0543599181977559, - "learning_rate": 5.7269700332963374e-06, - "loss": 0.1386, + "epoch": 0.458259325044405, + "grad_norm": 0.7885844049103651, + "learning_rate": 9.99365514958993e-06, + "loss": 0.0913, "step": 516 }, { - "epoch": 0.22962469464801244, - "grad_norm": 0.8788825627877698, - "learning_rate": 5.7380688124306325e-06, - "loss": 0.0947, + "epoch": 0.4591474245115453, + "grad_norm": 0.7683444302338543, + "learning_rate": 9.993458464591534e-06, + "loss": 0.0954, "step": 517 }, { - "epoch": 0.23006884299355984, - "grad_norm": 0.8916267274537737, - "learning_rate": 5.7491675915649284e-06, - "loss": 0.0875, + "epoch": 0.46003552397868563, + "grad_norm": 0.8089449634286497, + "learning_rate": 9.993258779408676e-06, + "loss": 0.0772, "step": 518 }, { - "epoch": 0.23051299133910727, - "grad_norm": 1.3741271628041778, - "learning_rate": 5.7602663706992235e-06, - "loss": 0.1431, + "epoch": 0.46092362344582594, + "grad_norm": 0.6423905451403036, + "learning_rate": 9.993056094161326e-06, + "loss": 0.0784, "step": 519 }, { - "epoch": 0.23095713968465467, - "grad_norm": 1.158157014144474, - "learning_rate": 5.7713651498335186e-06, - "loss": 0.1079, + "epoch": 0.46181172291296624, + "grad_norm": 0.4846261470710185, + "learning_rate": 9.992850408971267e-06, + "loss": 0.0795, "step": 520 }, { - "epoch": 0.2314012880302021, - "grad_norm": 0.9669483042814996, - "learning_rate": 5.7824639289678145e-06, - "loss": 0.0756, + "epoch": 0.4626998223801066, + "grad_norm": 0.6644049663648053, + "learning_rate": 9.992641723962076e-06, + "loss": 0.0747, "step": 521 }, { - "epoch": 0.2318454363757495, - "grad_norm": 0.9858439751900236, - "learning_rate": 5.7935627081021095e-06, - "loss": 0.1091, + "epoch": 0.4635879218472469, + "grad_norm": 0.6315516410011365, + "learning_rate": 9.992430039259135e-06, + "loss": 0.0772, "step": 522 }, { - "epoch": 0.2322895847212969, - "grad_norm": 1.0356934556615558, - "learning_rate": 5.804661487236405e-06, - "loss": 0.0899, + "epoch": 0.4644760213143872, + "grad_norm": 1.2012473367972696, + "learning_rate": 9.992215354989633e-06, + "loss": 0.0963, "step": 523 }, { - "epoch": 0.23273373306684433, - "grad_norm": 3.900766522495628, - "learning_rate": 5.8157602663707005e-06, - "loss": 0.0926, + "epoch": 0.46536412078152756, + "grad_norm": 0.6833716695406203, + "learning_rate": 9.991997671282554e-06, + "loss": 0.0752, "step": 524 }, { - "epoch": 0.23317788141239174, - "grad_norm": 1.1976168770257003, - "learning_rate": 5.826859045504995e-06, - "loss": 0.0963, + "epoch": 0.46625222024866786, + "grad_norm": 0.5524496659287114, + "learning_rate": 9.991776988268687e-06, + "loss": 0.082, "step": 525 }, { - "epoch": 0.23362202975793916, - "grad_norm": 1.2441372325560522, - "learning_rate": 5.83795782463929e-06, - "loss": 0.1105, + "epoch": 0.46714031971580816, + "grad_norm": 0.883266570689509, + "learning_rate": 9.991553306080627e-06, + "loss": 0.0835, "step": 526 }, { - "epoch": 0.23406617810348657, - "grad_norm": 1.379627337877928, - "learning_rate": 5.849056603773585e-06, - "loss": 0.0851, + "epoch": 0.46802841918294846, + "grad_norm": 0.540053134317723, + "learning_rate": 9.991326624852763e-06, + "loss": 0.0619, "step": 527 }, { - "epoch": 0.23451032644903397, - "grad_norm": 1.0553207077914868, - "learning_rate": 5.860155382907881e-06, - "loss": 0.0876, + "epoch": 0.4689165186500888, + "grad_norm": 0.8470097790352878, + "learning_rate": 9.991096944721292e-06, + "loss": 0.0597, "step": 528 }, { - "epoch": 0.2349544747945814, - "grad_norm": 1.0117503811303656, - "learning_rate": 5.871254162042176e-06, - "loss": 0.0925, + "epoch": 0.4698046181172291, + "grad_norm": 0.8245285829911205, + "learning_rate": 9.990864265824212e-06, + "loss": 0.0915, "step": 529 }, { - "epoch": 0.2353986231401288, - "grad_norm": 1.2155239899129966, - "learning_rate": 5.882352941176471e-06, - "loss": 0.1177, + "epoch": 0.4706927175843694, + "grad_norm": 0.7529196897577872, + "learning_rate": 9.990628588301321e-06, + "loss": 0.0722, "step": 530 }, { - "epoch": 0.23584277148567623, - "grad_norm": 1.0134886033118073, - "learning_rate": 5.893451720310767e-06, - "loss": 0.0868, + "epoch": 0.4715808170515098, + "grad_norm": 0.5935927793864627, + "learning_rate": 9.99038991229422e-06, + "loss": 0.0847, "step": 531 }, { - "epoch": 0.23628691983122363, - "grad_norm": 1.2415034220382457, - "learning_rate": 5.904550499445062e-06, - "loss": 0.1016, + "epoch": 0.4724689165186501, + "grad_norm": 0.8460052408178522, + "learning_rate": 9.99014823794631e-06, + "loss": 0.0928, "step": 532 }, { - "epoch": 0.23673106817677103, - "grad_norm": 0.9266174346376681, - "learning_rate": 5.915649278579356e-06, - "loss": 0.1001, + "epoch": 0.4733570159857904, + "grad_norm": 0.7733706990544129, + "learning_rate": 9.989903565402794e-06, + "loss": 0.0852, "step": 533 }, { - "epoch": 0.23717521652231846, - "grad_norm": 1.3120341529200943, - "learning_rate": 5.926748057713651e-06, - "loss": 0.1248, + "epoch": 0.47424511545293074, + "grad_norm": 0.6112274916254049, + "learning_rate": 9.98965589481068e-06, + "loss": 0.0746, "step": 534 }, { - "epoch": 0.23761936486786586, - "grad_norm": 1.426598559638218, - "learning_rate": 5.937846836847947e-06, - "loss": 0.0932, + "epoch": 0.47513321492007105, + "grad_norm": 0.6754612378104622, + "learning_rate": 9.989405226318772e-06, + "loss": 0.0777, "step": 535 }, { - "epoch": 0.2380635132134133, - "grad_norm": 1.0182331505869047, - "learning_rate": 5.948945615982242e-06, - "loss": 0.1009, + "epoch": 0.47602131438721135, + "grad_norm": 0.5649455662700742, + "learning_rate": 9.989151560077678e-06, + "loss": 0.0808, "step": 536 }, { - "epoch": 0.2385076615589607, - "grad_norm": 1.4554113324351339, - "learning_rate": 5.960044395116537e-06, - "loss": 0.0998, + "epoch": 0.4769094138543517, + "grad_norm": 0.6626006114678178, + "learning_rate": 9.988894896239806e-06, + "loss": 0.0673, "step": 537 }, { - "epoch": 0.2389518099045081, - "grad_norm": 1.6220655384523863, - "learning_rate": 5.971143174250833e-06, - "loss": 0.105, + "epoch": 0.477797513321492, + "grad_norm": 0.7864510413434515, + "learning_rate": 9.988635234959364e-06, + "loss": 0.079, "step": 538 }, { - "epoch": 0.23939595825005552, - "grad_norm": 1.9400783770338148, - "learning_rate": 5.982241953385128e-06, - "loss": 0.1524, + "epoch": 0.4786856127886323, + "grad_norm": 0.7460100071946164, + "learning_rate": 9.988372576392366e-06, + "loss": 0.0875, "step": 539 }, { - "epoch": 0.23984010659560293, - "grad_norm": 1.4635091103881346, - "learning_rate": 5.993340732519424e-06, - "loss": 0.1233, + "epoch": 0.47957371225577267, + "grad_norm": 0.6818272861244811, + "learning_rate": 9.988106920696621e-06, + "loss": 0.0881, "step": 540 }, { - "epoch": 0.24028425494115035, - "grad_norm": 1.2937114895643234, - "learning_rate": 6.004439511653719e-06, - "loss": 0.1169, + "epoch": 0.48046181172291297, + "grad_norm": 0.9752286673524108, + "learning_rate": 9.98783826803174e-06, + "loss": 0.1038, "step": 541 }, { - "epoch": 0.24072840328669776, - "grad_norm": 1.2221129883336008, - "learning_rate": 6.015538290788013e-06, - "loss": 0.1049, + "epoch": 0.48134991119005327, + "grad_norm": 0.7800065206458783, + "learning_rate": 9.987566618559138e-06, + "loss": 0.0824, "step": 542 }, { - "epoch": 0.24117255163224516, - "grad_norm": 1.0735565355706922, - "learning_rate": 6.0266370699223085e-06, - "loss": 0.0886, + "epoch": 0.4822380106571936, + "grad_norm": 0.868202329884969, + "learning_rate": 9.987291972442029e-06, + "loss": 0.0969, "step": 543 }, { - "epoch": 0.2416166999777926, - "grad_norm": 1.3261214209776406, - "learning_rate": 6.037735849056604e-06, - "loss": 0.1245, + "epoch": 0.48312611012433393, + "grad_norm": 0.6019530542460936, + "learning_rate": 9.987014329845422e-06, + "loss": 0.073, "step": 544 }, { - "epoch": 0.24206084832334, - "grad_norm": 1.2122447330179282, - "learning_rate": 6.0488346281908995e-06, - "loss": 0.0824, + "epoch": 0.48401420959147423, + "grad_norm": 0.8756840734553802, + "learning_rate": 9.986733690936136e-06, + "loss": 0.0965, "step": 545 }, { - "epoch": 0.24250499666888742, - "grad_norm": 1.1700065718816541, - "learning_rate": 6.0599334073251945e-06, - "loss": 0.1062, + "epoch": 0.4849023090586146, + "grad_norm": 0.8319714919436229, + "learning_rate": 9.986450055882782e-06, + "loss": 0.0877, "step": 546 }, { - "epoch": 0.24294914501443482, - "grad_norm": 1.0126080671178852, - "learning_rate": 6.0710321864594904e-06, - "loss": 0.0876, + "epoch": 0.4857904085257549, + "grad_norm": 0.8583222321909063, + "learning_rate": 9.986163424855777e-06, + "loss": 0.0896, "step": 547 }, { - "epoch": 0.24339329335998222, - "grad_norm": 1.185450234336463, - "learning_rate": 6.0821309655937855e-06, - "loss": 0.1057, + "epoch": 0.4866785079928952, + "grad_norm": 0.6901166240079477, + "learning_rate": 9.985873798027334e-06, + "loss": 0.108, "step": 548 }, { - "epoch": 0.24383744170552965, - "grad_norm": 0.94702847545811, - "learning_rate": 6.0932297447280806e-06, - "loss": 0.0704, + "epoch": 0.48756660746003555, + "grad_norm": 0.7331274431745097, + "learning_rate": 9.985581175571467e-06, + "loss": 0.096, "step": 549 }, { - "epoch": 0.24428159005107705, - "grad_norm": 1.1538838071376252, - "learning_rate": 6.1043285238623765e-06, - "loss": 0.1062, + "epoch": 0.48845470692717585, + "grad_norm": 0.800265401337623, + "learning_rate": 9.985285557663993e-06, + "loss": 0.0967, "step": 550 }, { - "epoch": 0.24472573839662448, - "grad_norm": 1.2787034775849095, - "learning_rate": 6.115427302996671e-06, - "loss": 0.1053, + "epoch": 0.48934280639431615, + "grad_norm": 0.8809592610672535, + "learning_rate": 9.984986944482523e-06, + "loss": 0.0762, "step": 551 }, { - "epoch": 0.24516988674217188, - "grad_norm": 2.111386576675479, - "learning_rate": 6.126526082130966e-06, - "loss": 0.1423, + "epoch": 0.49023090586145646, + "grad_norm": 0.5636426306677593, + "learning_rate": 9.984685336206472e-06, + "loss": 0.0648, "step": 552 }, { - "epoch": 0.24561403508771928, - "grad_norm": 0.8396851122657492, - "learning_rate": 6.137624861265261e-06, - "loss": 0.0857, + "epoch": 0.4911190053285968, + "grad_norm": 0.678318134751214, + "learning_rate": 9.984380733017052e-06, + "loss": 0.0837, "step": 553 }, { - "epoch": 0.24605818343326671, - "grad_norm": 1.222223712186514, - "learning_rate": 6.148723640399557e-06, - "loss": 0.1241, + "epoch": 0.4920071047957371, + "grad_norm": 0.8204528753002966, + "learning_rate": 9.984073135097277e-06, + "loss": 0.1014, "step": 554 }, { - "epoch": 0.24650233177881412, - "grad_norm": 1.3542921033853719, - "learning_rate": 6.159822419533852e-06, - "loss": 0.1113, + "epoch": 0.4928952042628774, + "grad_norm": 0.6085982428845589, + "learning_rate": 9.983762542631958e-06, + "loss": 0.0678, "step": 555 }, { - "epoch": 0.24694648012436154, - "grad_norm": 0.921783149485065, - "learning_rate": 6.170921198668147e-06, - "loss": 0.0954, + "epoch": 0.4937833037300178, + "grad_norm": 0.5812489068037968, + "learning_rate": 9.983448955807708e-06, + "loss": 0.0842, "step": 556 }, { - "epoch": 0.24739062846990895, - "grad_norm": 1.170580173743532, - "learning_rate": 6.182019977802443e-06, - "loss": 0.0994, + "epoch": 0.4946714031971581, + "grad_norm": 0.6518382690202529, + "learning_rate": 9.983132374812933e-06, + "loss": 0.0738, "step": 557 }, { - "epoch": 0.24783477681545638, - "grad_norm": 1.4092773954441822, - "learning_rate": 6.193118756936738e-06, - "loss": 0.0931, + "epoch": 0.4955595026642984, + "grad_norm": 0.6633074008991864, + "learning_rate": 9.982812799837848e-06, + "loss": 0.0954, "step": 558 }, { - "epoch": 0.24827892516100378, - "grad_norm": 1.0651408845659296, - "learning_rate": 6.204217536071032e-06, - "loss": 0.1083, + "epoch": 0.49644760213143874, + "grad_norm": 0.816328703944021, + "learning_rate": 9.982490231074455e-06, + "loss": 0.0954, "step": 559 }, { - "epoch": 0.24872307350655118, - "grad_norm": 1.2143455853200855, - "learning_rate": 6.215316315205327e-06, - "loss": 0.0952, + "epoch": 0.49733570159857904, + "grad_norm": 0.6281501641841354, + "learning_rate": 9.982164668716565e-06, + "loss": 0.072, "step": 560 }, { - "epoch": 0.2491672218520986, - "grad_norm": 2.64991339168012, - "learning_rate": 6.226415094339623e-06, - "loss": 0.1145, + "epoch": 0.49822380106571934, + "grad_norm": 0.6602879174584156, + "learning_rate": 9.981836112959782e-06, + "loss": 0.0838, "step": 561 }, { - "epoch": 0.249611370197646, - "grad_norm": 0.7270989844733312, - "learning_rate": 6.237513873473918e-06, - "loss": 0.0861, + "epoch": 0.4991119005328597, + "grad_norm": 0.6560392310252683, + "learning_rate": 9.98150456400151e-06, + "loss": 0.0775, "step": 562 }, { - "epoch": 0.2500555185431934, - "grad_norm": 1.1431426285342126, - "learning_rate": 6.248612652608213e-06, - "loss": 0.1162, + "epoch": 0.5, + "grad_norm": 0.512881207998907, + "learning_rate": 9.981170022040949e-06, + "loss": 0.0626, "step": 563 }, { - "epoch": 0.2504996668887408, - "grad_norm": 0.860711327757446, - "learning_rate": 6.259711431742509e-06, - "loss": 0.0935, + "epoch": 0.5008880994671403, + "grad_norm": 0.5481928107670866, + "learning_rate": 9.980832487279102e-06, + "loss": 0.0713, "step": 564 }, { - "epoch": 0.25094381523428827, - "grad_norm": 0.8073003696766194, - "learning_rate": 6.270810210876804e-06, - "loss": 0.1079, + "epoch": 0.5017761989342806, + "grad_norm": 0.6264319183486977, + "learning_rate": 9.980491959918767e-06, + "loss": 0.075, "step": 565 }, { - "epoch": 0.25138796357983567, - "grad_norm": 1.1101487411387352, - "learning_rate": 6.281908990011099e-06, - "loss": 0.1174, + "epoch": 0.5026642984014209, + "grad_norm": 0.5238738729951576, + "learning_rate": 9.980148440164543e-06, + "loss": 0.0698, "step": 566 }, { - "epoch": 0.2518321119253831, - "grad_norm": 1.0549286990788267, - "learning_rate": 6.293007769145395e-06, - "loss": 0.1051, + "epoch": 0.5035523978685613, + "grad_norm": 0.8378395838320835, + "learning_rate": 9.979801928222819e-06, + "loss": 0.0967, "step": 567 }, { - "epoch": 0.2522762602709305, - "grad_norm": 1.2019614422517741, - "learning_rate": 6.304106548279689e-06, - "loss": 0.1063, + "epoch": 0.5044404973357016, + "grad_norm": 0.564341382022705, + "learning_rate": 9.979452424301792e-06, + "loss": 0.0808, "step": 568 }, { - "epoch": 0.2527204086164779, - "grad_norm": 0.9611079197173829, - "learning_rate": 6.3152053274139845e-06, - "loss": 0.0974, + "epoch": 0.5053285968028419, + "grad_norm": 0.631173282575248, + "learning_rate": 9.979099928611449e-06, + "loss": 0.0706, "step": 569 }, { - "epoch": 0.25316455696202533, - "grad_norm": 1.1961495715533894, - "learning_rate": 6.32630410654828e-06, - "loss": 0.0991, + "epoch": 0.5062166962699822, + "grad_norm": 0.5825124500655912, + "learning_rate": 9.978744441363582e-06, + "loss": 0.0727, "step": 570 }, { - "epoch": 0.25360870530757273, - "grad_norm": 1.0576689858819426, - "learning_rate": 6.3374028856825754e-06, - "loss": 0.0953, + "epoch": 0.5071047957371225, + "grad_norm": 0.5797415025353291, + "learning_rate": 9.978385962771771e-06, + "loss": 0.0704, "step": 571 }, { - "epoch": 0.25405285365312014, - "grad_norm": 1.2656186745282128, - "learning_rate": 6.3485016648168705e-06, - "loss": 0.1083, + "epoch": 0.5079928952042628, + "grad_norm": 0.8573349386910596, + "learning_rate": 9.9780244930514e-06, + "loss": 0.0871, "step": 572 }, { - "epoch": 0.25449700199866754, - "grad_norm": 1.1776166049192853, - "learning_rate": 6.359600443951166e-06, - "loss": 0.1118, + "epoch": 0.5088809946714032, + "grad_norm": 0.7752539245927109, + "learning_rate": 9.977660032419647e-06, + "loss": 0.0808, "step": 573 }, { - "epoch": 0.254941150344215, - "grad_norm": 0.8664088781289958, - "learning_rate": 6.3706992230854615e-06, - "loss": 0.0863, + "epoch": 0.5097690941385435, + "grad_norm": 0.5071737030464216, + "learning_rate": 9.977292581095493e-06, + "loss": 0.0735, "step": 574 }, { - "epoch": 0.2553852986897624, - "grad_norm": 0.9925662850080711, - "learning_rate": 6.3817980022197565e-06, - "loss": 0.1058, + "epoch": 0.5106571936056838, + "grad_norm": 0.5589986959748494, + "learning_rate": 9.976922139299706e-06, + "loss": 0.0799, "step": 575 }, { - "epoch": 0.2558294470353098, - "grad_norm": 1.0606625159824246, - "learning_rate": 6.3928967813540525e-06, - "loss": 0.1075, + "epoch": 0.5115452930728241, + "grad_norm": 0.5184552557706661, + "learning_rate": 9.976548707254857e-06, + "loss": 0.0758, "step": 576 }, { - "epoch": 0.2562735953808572, - "grad_norm": 0.9900660508183841, - "learning_rate": 6.403995560488347e-06, - "loss": 0.0829, + "epoch": 0.5124333925399644, + "grad_norm": 0.7888563604598832, + "learning_rate": 9.976172285185315e-06, + "loss": 0.1095, "step": 577 }, { - "epoch": 0.2567177437264046, - "grad_norm": 0.8582749721826495, - "learning_rate": 6.415094339622642e-06, - "loss": 0.0914, + "epoch": 0.5133214920071048, + "grad_norm": 0.5650867261613388, + "learning_rate": 9.97579287331724e-06, + "loss": 0.071, "step": 578 }, { - "epoch": 0.25716189207195206, - "grad_norm": 1.1617033243365338, - "learning_rate": 6.426193118756937e-06, - "loss": 0.1116, + "epoch": 0.5142095914742452, + "grad_norm": 0.6700954589743635, + "learning_rate": 9.975410471878592e-06, + "loss": 0.0839, "step": 579 }, { - "epoch": 0.25760604041749946, - "grad_norm": 1.0362694278635598, - "learning_rate": 6.437291897891233e-06, - "loss": 0.1145, + "epoch": 0.5150976909413855, + "grad_norm": 0.5813447738567292, + "learning_rate": 9.97502508109913e-06, + "loss": 0.0624, "step": 580 }, { - "epoch": 0.25805018876304686, - "grad_norm": 0.8723418719473582, - "learning_rate": 6.448390677025528e-06, - "loss": 0.0885, + "epoch": 0.5159857904085258, + "grad_norm": 0.6158677363797734, + "learning_rate": 9.974636701210402e-06, + "loss": 0.0712, "step": 581 }, { - "epoch": 0.25849433710859426, - "grad_norm": 1.0870215330646826, - "learning_rate": 6.459489456159823e-06, - "loss": 0.0894, + "epoch": 0.5168738898756661, + "grad_norm": 0.62582136981208, + "learning_rate": 9.974245332445756e-06, + "loss": 0.0848, "step": 582 }, { - "epoch": 0.25893848545414166, - "grad_norm": 1.2407360085708667, - "learning_rate": 6.470588235294119e-06, - "loss": 0.0812, + "epoch": 0.5177619893428064, + "grad_norm": 0.5107303299624117, + "learning_rate": 9.973850975040335e-06, + "loss": 0.0654, "step": 583 }, { - "epoch": 0.2593826337996891, - "grad_norm": 1.8006078173259896, - "learning_rate": 6.481687014428414e-06, - "loss": 0.1429, + "epoch": 0.5186500888099467, + "grad_norm": 0.7854970444895641, + "learning_rate": 9.97345362923108e-06, + "loss": 0.0861, "step": 584 }, { - "epoch": 0.2598267821452365, - "grad_norm": 1.4748519282887915, - "learning_rate": 6.492785793562708e-06, - "loss": 0.0867, + "epoch": 0.5195381882770871, + "grad_norm": 0.6754830198888271, + "learning_rate": 9.973053295256725e-06, + "loss": 0.0894, "step": 585 }, { - "epoch": 0.2602709304907839, - "grad_norm": 1.0424639232757755, - "learning_rate": 6.503884572697003e-06, - "loss": 0.1146, + "epoch": 0.5204262877442274, + "grad_norm": 0.6696301919339701, + "learning_rate": 9.972649973357797e-06, + "loss": 0.0745, "step": 586 }, { - "epoch": 0.2607150788363313, - "grad_norm": 1.2786124769190668, - "learning_rate": 6.514983351831299e-06, - "loss": 0.1192, + "epoch": 0.5213143872113677, + "grad_norm": 0.7380562380998862, + "learning_rate": 9.972243663776626e-06, + "loss": 0.0833, "step": 587 }, { - "epoch": 0.26115922718187873, - "grad_norm": 0.9288875670879717, - "learning_rate": 6.526082130965594e-06, - "loss": 0.101, + "epoch": 0.522202486678508, + "grad_norm": 0.8204338382294417, + "learning_rate": 9.971834366757327e-06, + "loss": 0.0768, "step": 588 }, { - "epoch": 0.2616033755274262, - "grad_norm": 0.6884103445991109, - "learning_rate": 6.537180910099889e-06, - "loss": 0.0743, + "epoch": 0.5230905861456483, + "grad_norm": 0.577684776094795, + "learning_rate": 9.971422082545818e-06, + "loss": 0.0847, "step": 589 }, { - "epoch": 0.2620475238729736, - "grad_norm": 1.0193990545252207, - "learning_rate": 6.548279689234185e-06, - "loss": 0.0826, + "epoch": 0.5239786856127886, + "grad_norm": 0.5842234378048432, + "learning_rate": 9.971006811389807e-06, + "loss": 0.0693, "step": 590 }, { - "epoch": 0.262491672218521, - "grad_norm": 1.2098967104098979, - "learning_rate": 6.55937846836848e-06, - "loss": 0.116, + "epoch": 0.5248667850799289, + "grad_norm": 0.6709627494905674, + "learning_rate": 9.970588553538802e-06, + "loss": 0.079, "step": 591 }, { - "epoch": 0.2629358205640684, - "grad_norm": 0.86346939462905, - "learning_rate": 6.570477247502775e-06, - "loss": 0.0833, + "epoch": 0.5257548845470693, + "grad_norm": 0.5657394188287586, + "learning_rate": 9.970167309244097e-06, + "loss": 0.0743, "step": 592 }, { - "epoch": 0.2633799689096158, - "grad_norm": 0.8577523664034169, - "learning_rate": 6.581576026637071e-06, - "loss": 0.0802, + "epoch": 0.5266429840142096, + "grad_norm": 0.5208485085677795, + "learning_rate": 9.969743078758788e-06, + "loss": 0.0669, "step": 593 }, { - "epoch": 0.26382411725516325, - "grad_norm": 0.7836334305793592, - "learning_rate": 6.592674805771365e-06, - "loss": 0.079, + "epoch": 0.5275310834813499, + "grad_norm": 0.5040140180463282, + "learning_rate": 9.969315862337764e-06, + "loss": 0.0538, "step": 594 }, { - "epoch": 0.26426826560071065, - "grad_norm": 0.9292792710410797, - "learning_rate": 6.60377358490566e-06, - "loss": 0.0841, + "epoch": 0.5284191829484902, + "grad_norm": 0.6053908206358658, + "learning_rate": 9.968885660237704e-06, + "loss": 0.0822, "step": 595 }, { - "epoch": 0.26471241394625805, - "grad_norm": 0.949730314196452, - "learning_rate": 6.614872364039956e-06, - "loss": 0.0779, + "epoch": 0.5293072824156305, + "grad_norm": 0.7398344551566501, + "learning_rate": 9.968452472717084e-06, + "loss": 0.0918, "step": 596 }, { - "epoch": 0.26515656229180545, - "grad_norm": 1.067206699117055, - "learning_rate": 6.625971143174251e-06, - "loss": 0.0813, + "epoch": 0.5301953818827708, + "grad_norm": 0.4916738909621316, + "learning_rate": 9.968016300036172e-06, + "loss": 0.073, "step": 597 }, { - "epoch": 0.26560071063735285, - "grad_norm": 1.11737459145968, - "learning_rate": 6.6370699223085465e-06, - "loss": 0.0855, + "epoch": 0.5310834813499112, + "grad_norm": 0.5539634431634507, + "learning_rate": 9.967577142457031e-06, + "loss": 0.0647, "step": 598 }, { - "epoch": 0.2660448589829003, - "grad_norm": 1.3191268336017996, - "learning_rate": 6.648168701442842e-06, - "loss": 0.117, + "epoch": 0.5319715808170515, + "grad_norm": 0.6671542351197542, + "learning_rate": 9.96713500024352e-06, + "loss": 0.0858, "step": 599 }, { - "epoch": 0.2664890073284477, - "grad_norm": 1.0102499350799081, - "learning_rate": 6.6592674805771374e-06, - "loss": 0.0878, + "epoch": 0.5328596802841918, + "grad_norm": 0.4786344609288544, + "learning_rate": 9.966689873661284e-06, + "loss": 0.0811, "step": 600 }, { - "epoch": 0.2669331556739951, - "grad_norm": 0.878524576308141, - "learning_rate": 6.6703662597114325e-06, - "loss": 0.0886, + "epoch": 0.5337477797513321, + "grad_norm": 0.4690021612325154, + "learning_rate": 9.966241762977767e-06, + "loss": 0.0639, "step": 601 }, { - "epoch": 0.2673773040195425, - "grad_norm": 1.0385994736274717, - "learning_rate": 6.6814650388457284e-06, - "loss": 0.0895, + "epoch": 0.5346358792184724, + "grad_norm": 0.6417197726302212, + "learning_rate": 9.965790668462205e-06, + "loss": 0.0753, "step": 602 }, { - "epoch": 0.2678214523650899, - "grad_norm": 2.1746856925927305, - "learning_rate": 6.692563817980023e-06, - "loss": 0.1218, + "epoch": 0.5355239786856127, + "grad_norm": 0.5376695510800252, + "learning_rate": 9.965336590385623e-06, + "loss": 0.0833, "step": 603 }, { - "epoch": 0.2682656007106374, - "grad_norm": 1.1284346260514344, - "learning_rate": 6.703662597114318e-06, - "loss": 0.0984, + "epoch": 0.5364120781527532, + "grad_norm": 0.5569038501440469, + "learning_rate": 9.964879529020844e-06, + "loss": 0.0697, "step": 604 }, { - "epoch": 0.2687097490561848, - "grad_norm": 0.8795466679323811, - "learning_rate": 6.714761376248613e-06, - "loss": 0.1076, + "epoch": 0.5373001776198935, + "grad_norm": 0.5443343147501704, + "learning_rate": 9.964419484642482e-06, + "loss": 0.0761, "step": 605 }, { - "epoch": 0.2691538974017322, - "grad_norm": 0.7889851290279695, - "learning_rate": 6.725860155382909e-06, - "loss": 0.0809, + "epoch": 0.5381882770870338, + "grad_norm": 0.5170897128943753, + "learning_rate": 9.96395645752694e-06, + "loss": 0.064, "step": 606 }, { - "epoch": 0.2695980457472796, - "grad_norm": 1.1495901760021123, - "learning_rate": 6.736958934517204e-06, - "loss": 0.1339, + "epoch": 0.5390763765541741, + "grad_norm": 0.8056275163824247, + "learning_rate": 9.963490447952415e-06, + "loss": 0.092, "step": 607 }, { - "epoch": 0.270042194092827, - "grad_norm": 0.9404581532626506, - "learning_rate": 6.748057713651499e-06, - "loss": 0.0835, + "epoch": 0.5399644760213144, + "grad_norm": 0.5236969420959864, + "learning_rate": 9.963021456198898e-06, + "loss": 0.0742, "step": 608 }, { - "epoch": 0.27048634243837444, - "grad_norm": 1.1614393549481714, - "learning_rate": 6.759156492785795e-06, - "loss": 0.1022, + "epoch": 0.5408525754884547, + "grad_norm": 0.4992794018935628, + "learning_rate": 9.962549482548169e-06, + "loss": 0.0771, "step": 609 }, { - "epoch": 0.27093049078392184, - "grad_norm": 0.7805043342922547, - "learning_rate": 6.77025527192009e-06, - "loss": 0.0872, + "epoch": 0.5417406749555951, + "grad_norm": 0.5755486594928395, + "learning_rate": 9.962074527283803e-06, + "loss": 0.0718, "step": 610 }, { - "epoch": 0.27137463912946924, - "grad_norm": 1.1270320700845156, - "learning_rate": 6.781354051054384e-06, - "loss": 0.106, + "epoch": 0.5426287744227354, + "grad_norm": 0.5339655506708132, + "learning_rate": 9.961596590691159e-06, + "loss": 0.0717, "step": 611 }, { - "epoch": 0.27181878747501664, - "grad_norm": 0.8511881872825724, - "learning_rate": 6.792452830188679e-06, - "loss": 0.077, + "epoch": 0.5435168738898757, + "grad_norm": 0.6875341483650086, + "learning_rate": 9.961115673057397e-06, + "loss": 0.0826, "step": 612 }, { - "epoch": 0.27226293582056404, - "grad_norm": 0.8653445546881817, - "learning_rate": 6.803551609322975e-06, - "loss": 0.0703, + "epoch": 0.544404973357016, + "grad_norm": 0.5518853021351647, + "learning_rate": 9.96063177467146e-06, + "loss": 0.0638, "step": 613 }, { - "epoch": 0.2727070841661115, - "grad_norm": 0.9249325812470924, - "learning_rate": 6.81465038845727e-06, - "loss": 0.1239, + "epoch": 0.5452930728241563, + "grad_norm": 0.5250033178635135, + "learning_rate": 9.960144895824088e-06, + "loss": 0.0533, "step": 614 }, { - "epoch": 0.2731512325116589, - "grad_norm": 1.2035733700107387, - "learning_rate": 6.825749167591565e-06, - "loss": 0.0803, + "epoch": 0.5461811722912966, + "grad_norm": 0.9091750188672192, + "learning_rate": 9.959655036807807e-06, + "loss": 0.0833, "step": 615 }, { - "epoch": 0.2735953808572063, - "grad_norm": 1.058685859950284, - "learning_rate": 6.836847946725861e-06, - "loss": 0.101, + "epoch": 0.5470692717584369, + "grad_norm": 0.5806570052719987, + "learning_rate": 9.959162197916938e-06, + "loss": 0.0765, "step": 616 }, { - "epoch": 0.2740395292027537, - "grad_norm": 0.9291079298055421, - "learning_rate": 6.847946725860156e-06, - "loss": 0.0907, + "epoch": 0.5479573712255773, + "grad_norm": 0.6555721441709699, + "learning_rate": 9.958666379447588e-06, + "loss": 0.0799, "step": 617 }, { - "epoch": 0.2744836775483011, - "grad_norm": 1.1511507883949077, - "learning_rate": 6.859045504994451e-06, - "loss": 0.1077, + "epoch": 0.5488454706927176, + "grad_norm": 0.49620238314036563, + "learning_rate": 9.958167581697656e-06, + "loss": 0.0802, "step": 618 }, { - "epoch": 0.27492782589384857, - "grad_norm": 1.0786240033108858, - "learning_rate": 6.870144284128747e-06, - "loss": 0.1251, + "epoch": 0.5497335701598579, + "grad_norm": 0.6095867263904436, + "learning_rate": 9.95766580496683e-06, + "loss": 0.0661, "step": 619 }, { - "epoch": 0.27537197423939597, - "grad_norm": 0.9285679879223824, - "learning_rate": 6.881243063263041e-06, - "loss": 0.079, + "epoch": 0.5506216696269982, + "grad_norm": 0.5227562383858685, + "learning_rate": 9.957161049556591e-06, + "loss": 0.0613, "step": 620 }, { - "epoch": 0.27581612258494337, - "grad_norm": 0.8117578679372189, - "learning_rate": 6.892341842397336e-06, - "loss": 0.0772, + "epoch": 0.5515097690941385, + "grad_norm": 0.6208242296786288, + "learning_rate": 9.956653315770209e-06, + "loss": 0.075, "step": 621 }, { - "epoch": 0.27626027093049077, - "grad_norm": 1.0138147773665356, - "learning_rate": 6.9034406215316315e-06, - "loss": 0.1238, + "epoch": 0.5523978685612788, + "grad_norm": 0.6046805080838598, + "learning_rate": 9.956142603912737e-06, + "loss": 0.0797, "step": 622 }, { - "epoch": 0.27670441927603817, - "grad_norm": 0.988212212065645, - "learning_rate": 6.914539400665927e-06, - "loss": 0.0726, + "epoch": 0.5532859680284192, + "grad_norm": 0.6444562243401362, + "learning_rate": 9.95562891429103e-06, + "loss": 0.0755, "step": 623 }, { - "epoch": 0.27714856762158563, - "grad_norm": 1.2358456192045688, - "learning_rate": 6.9256381798002224e-06, - "loss": 0.1125, + "epoch": 0.5541740674955595, + "grad_norm": 0.5985558732707539, + "learning_rate": 9.955112247213716e-06, + "loss": 0.0678, "step": 624 }, { - "epoch": 0.27759271596713303, - "grad_norm": 1.2887749392067072, - "learning_rate": 6.9367369589345175e-06, - "loss": 0.1003, + "epoch": 0.5550621669626998, + "grad_norm": 0.6138532225423613, + "learning_rate": 9.954592602991226e-06, + "loss": 0.0854, "step": 625 }, { - "epoch": 0.27803686431268043, - "grad_norm": 0.9864604958886025, - "learning_rate": 6.947835738068813e-06, - "loss": 0.0831, + "epoch": 0.5559502664298401, + "grad_norm": 0.48332537237986817, + "learning_rate": 9.954069981935774e-06, + "loss": 0.0636, "step": 626 }, { - "epoch": 0.27848101265822783, - "grad_norm": 1.088865911862852, - "learning_rate": 6.9589345172031085e-06, - "loss": 0.101, + "epoch": 0.5568383658969804, + "grad_norm": 0.5307808463292392, + "learning_rate": 9.95354438436136e-06, + "loss": 0.0675, "step": 627 }, { - "epoch": 0.27892516100377523, - "grad_norm": 1.409906763744686, - "learning_rate": 6.970033296337404e-06, - "loss": 0.104, + "epoch": 0.5577264653641207, + "grad_norm": 0.5287375678675353, + "learning_rate": 9.953015810583777e-06, + "loss": 0.0728, "step": 628 }, { - "epoch": 0.2793693093493227, - "grad_norm": 1.2302985416041377, - "learning_rate": 6.981132075471699e-06, - "loss": 0.0969, + "epoch": 0.5586145648312612, + "grad_norm": 0.6196806533003894, + "learning_rate": 9.952484260920605e-06, + "loss": 0.0845, "step": 629 }, { - "epoch": 0.2798134576948701, - "grad_norm": 1.2642567912432223, - "learning_rate": 6.992230854605994e-06, - "loss": 0.1116, + "epoch": 0.5595026642984015, + "grad_norm": 0.45677564015470606, + "learning_rate": 9.95194973569121e-06, + "loss": 0.0714, "step": 630 }, { - "epoch": 0.2802576060404175, - "grad_norm": 1.2962548485310728, - "learning_rate": 7.003329633740289e-06, - "loss": 0.1144, + "epoch": 0.5603907637655418, + "grad_norm": 0.4304045479722019, + "learning_rate": 9.951412235216744e-06, + "loss": 0.0592, "step": 631 }, { - "epoch": 0.2807017543859649, - "grad_norm": 0.9021212064599634, - "learning_rate": 7.014428412874585e-06, - "loss": 0.0794, + "epoch": 0.5612788632326821, + "grad_norm": 0.3988334991399962, + "learning_rate": 9.950871759820155e-06, + "loss": 0.0626, "step": 632 }, { - "epoch": 0.2811459027315123, - "grad_norm": 0.9822662924030562, - "learning_rate": 7.02552719200888e-06, - "loss": 0.081, + "epoch": 0.5621669626998224, + "grad_norm": 0.5596339454540106, + "learning_rate": 9.950328309826172e-06, + "loss": 0.0797, "step": 633 }, { - "epoch": 0.28159005107705976, - "grad_norm": 0.8568051773621045, - "learning_rate": 7.036625971143175e-06, - "loss": 0.0838, + "epoch": 0.5630550621669627, + "grad_norm": 0.6027269400477876, + "learning_rate": 9.949781885561309e-06, + "loss": 0.0772, "step": 634 }, { - "epoch": 0.28203419942260716, - "grad_norm": 0.9134125872525329, - "learning_rate": 7.047724750277471e-06, - "loss": 0.0895, + "epoch": 0.5639431616341031, + "grad_norm": 0.5128227811773718, + "learning_rate": 9.949232487353873e-06, + "loss": 0.0666, "step": 635 }, { - "epoch": 0.28247834776815456, - "grad_norm": 1.0203574319172648, - "learning_rate": 7.058823529411766e-06, - "loss": 0.0872, + "epoch": 0.5648312611012434, + "grad_norm": 0.6243259838812658, + "learning_rate": 9.948680115533954e-06, + "loss": 0.0737, "step": 636 }, { - "epoch": 0.28292249611370196, - "grad_norm": 0.8320232571139462, - "learning_rate": 7.06992230854606e-06, - "loss": 0.0778, + "epoch": 0.5657193605683837, + "grad_norm": 0.3721747409496252, + "learning_rate": 9.94812477043343e-06, + "loss": 0.0565, "step": 637 }, { - "epoch": 0.28336664445924936, - "grad_norm": 1.1019251570363913, - "learning_rate": 7.081021087680355e-06, - "loss": 0.101, + "epoch": 0.566607460035524, + "grad_norm": 0.5603791331693248, + "learning_rate": 9.947566452385967e-06, + "loss": 0.0648, "step": 638 }, { - "epoch": 0.2838107928047968, - "grad_norm": 0.9010932622870097, - "learning_rate": 7.092119866814651e-06, - "loss": 0.0852, + "epoch": 0.5674955595026643, + "grad_norm": 0.7236695232741394, + "learning_rate": 9.94700516172701e-06, + "loss": 0.1008, "step": 639 }, { - "epoch": 0.2842549411503442, - "grad_norm": 1.0180000788163226, - "learning_rate": 7.103218645948946e-06, - "loss": 0.0955, + "epoch": 0.5683836589698046, + "grad_norm": 0.5727667142554005, + "learning_rate": 9.9464408987938e-06, + "loss": 0.0821, "step": 640 }, { - "epoch": 0.2846990894958916, - "grad_norm": 0.9251835556617967, - "learning_rate": 7.114317425083241e-06, - "loss": 0.0871, + "epoch": 0.5692717584369449, + "grad_norm": 0.5637693338316091, + "learning_rate": 9.945873663925358e-06, + "loss": 0.0731, "step": 641 }, { - "epoch": 0.285143237841439, - "grad_norm": 0.8901698566560228, - "learning_rate": 7.125416204217537e-06, - "loss": 0.0706, + "epoch": 0.5701598579040853, + "grad_norm": 0.6065169982263043, + "learning_rate": 9.945303457462492e-06, + "loss": 0.0776, "step": 642 }, { - "epoch": 0.2855873861869865, - "grad_norm": 1.0154369539307455, - "learning_rate": 7.136514983351832e-06, - "loss": 0.0887, + "epoch": 0.5710479573712256, + "grad_norm": 0.9659163460815292, + "learning_rate": 9.944730279747795e-06, + "loss": 0.0889, "step": 643 }, { - "epoch": 0.2860315345325339, - "grad_norm": 1.321020159204264, - "learning_rate": 7.147613762486127e-06, - "loss": 0.0865, + "epoch": 0.5719360568383659, + "grad_norm": 0.543692295516828, + "learning_rate": 9.944154131125643e-06, + "loss": 0.0698, "step": 644 }, { - "epoch": 0.2864756828780813, - "grad_norm": 2.535826739420158, - "learning_rate": 7.158712541620423e-06, - "loss": 0.1096, + "epoch": 0.5728241563055062, + "grad_norm": 0.7812550740351492, + "learning_rate": 9.943575011942203e-06, + "loss": 0.0794, "step": 645 }, { - "epoch": 0.2869198312236287, - "grad_norm": 1.0227251731125295, - "learning_rate": 7.169811320754717e-06, - "loss": 0.0977, + "epoch": 0.5737122557726465, + "grad_norm": 0.7273669868701744, + "learning_rate": 9.94299292254542e-06, + "loss": 0.0935, "step": 646 }, { - "epoch": 0.2873639795691761, - "grad_norm": 0.8089367869895305, - "learning_rate": 7.180910099889012e-06, - "loss": 0.0792, + "epoch": 0.5746003552397868, + "grad_norm": 0.6514262782582129, + "learning_rate": 9.94240786328503e-06, + "loss": 0.074, "step": 647 }, { - "epoch": 0.28780812791472354, - "grad_norm": 0.9374423150392824, - "learning_rate": 7.1920088790233074e-06, - "loss": 0.1055, + "epoch": 0.5754884547069272, + "grad_norm": 0.6704934102357321, + "learning_rate": 9.941819834512547e-06, + "loss": 0.0844, "step": 648 }, { - "epoch": 0.28825227626027095, - "grad_norm": 1.07643087941822, - "learning_rate": 7.203107658157603e-06, - "loss": 0.0957, + "epoch": 0.5763765541740675, + "grad_norm": 0.6232798380743291, + "learning_rate": 9.941228836581273e-06, + "loss": 0.0791, "step": 649 }, { - "epoch": 0.28869642460581835, - "grad_norm": 0.9052355958500774, - "learning_rate": 7.214206437291898e-06, - "loss": 0.097, + "epoch": 0.5772646536412078, + "grad_norm": 0.8328031251816983, + "learning_rate": 9.940634869846293e-06, + "loss": 0.0672, "step": 650 }, { - "epoch": 0.28914057295136575, - "grad_norm": 1.1103580563722126, - "learning_rate": 7.2253052164261935e-06, - "loss": 0.1111, + "epoch": 0.5781527531083481, + "grad_norm": 0.5491182560832706, + "learning_rate": 9.940037934664476e-06, + "loss": 0.075, "step": 651 }, { - "epoch": 0.28958472129691315, - "grad_norm": 1.7388144567675092, - "learning_rate": 7.236403995560489e-06, - "loss": 0.0939, + "epoch": 0.5790408525754884, + "grad_norm": 0.6980263709487374, + "learning_rate": 9.939438031394477e-06, + "loss": 0.0854, "step": 652 }, { - "epoch": 0.2900288696424606, - "grad_norm": 1.078770189866491, - "learning_rate": 7.2475027746947845e-06, - "loss": 0.0845, + "epoch": 0.5799289520426287, + "grad_norm": 0.762719484435552, + "learning_rate": 9.938835160396727e-06, + "loss": 0.0659, "step": 653 }, { - "epoch": 0.290473017988008, - "grad_norm": 1.2137588838688103, - "learning_rate": 7.2586015538290795e-06, - "loss": 0.0868, + "epoch": 0.5808170515097691, + "grad_norm": 0.6497096424621961, + "learning_rate": 9.938229322033448e-06, + "loss": 0.0687, "step": 654 }, { - "epoch": 0.2909171663335554, - "grad_norm": 1.3727496776393755, - "learning_rate": 7.269700332963375e-06, - "loss": 0.0922, + "epoch": 0.5817051509769094, + "grad_norm": 0.5232236761833539, + "learning_rate": 9.93762051666864e-06, + "loss": 0.0723, "step": 655 }, { - "epoch": 0.2913613146791028, - "grad_norm": 1.2519592442230572, - "learning_rate": 7.28079911209767e-06, - "loss": 0.126, + "epoch": 0.5825932504440497, + "grad_norm": 1.9373735232345077, + "learning_rate": 9.937008744668089e-06, + "loss": 0.0759, "step": 656 }, { - "epoch": 0.2918054630246502, - "grad_norm": 0.8673663222128764, - "learning_rate": 7.291897891231965e-06, - "loss": 0.0778, + "epoch": 0.58348134991119, + "grad_norm": 0.6686288174204, + "learning_rate": 9.93639400639936e-06, + "loss": 0.0817, "step": 657 }, { - "epoch": 0.29224961137019767, - "grad_norm": 1.1245127772074948, - "learning_rate": 7.302996670366261e-06, - "loss": 0.094, + "epoch": 0.5843694493783304, + "grad_norm": 0.6311724633616251, + "learning_rate": 9.935776302231801e-06, + "loss": 0.0781, "step": 658 }, { - "epoch": 0.29269375971574507, - "grad_norm": 1.156956668230168, - "learning_rate": 7.314095449500556e-06, - "loss": 0.108, + "epoch": 0.5852575488454707, + "grad_norm": 0.7500877855603757, + "learning_rate": 9.935155632536544e-06, + "loss": 0.082, "step": 659 }, { - "epoch": 0.2931379080612925, - "grad_norm": 0.8322393629574484, - "learning_rate": 7.325194228634851e-06, - "loss": 0.0933, + "epoch": 0.5861456483126111, + "grad_norm": 0.726336288113051, + "learning_rate": 9.934531997686503e-06, + "loss": 0.0879, "step": 660 }, { - "epoch": 0.2935820564068399, - "grad_norm": 1.1303584309903236, - "learning_rate": 7.336293007769147e-06, - "loss": 0.1144, + "epoch": 0.5870337477797514, + "grad_norm": 0.6749131743501463, + "learning_rate": 9.933905398056371e-06, + "loss": 0.0781, "step": 661 }, { - "epoch": 0.2940262047523873, - "grad_norm": 1.1516682415849495, - "learning_rate": 7.347391786903442e-06, - "loss": 0.1161, + "epoch": 0.5879218472468917, + "grad_norm": 0.6732350602477372, + "learning_rate": 9.933275834022623e-06, + "loss": 0.0677, "step": 662 }, { - "epoch": 0.29447035309793473, - "grad_norm": 0.6573879684754841, - "learning_rate": 7.358490566037736e-06, - "loss": 0.0743, + "epoch": 0.588809946714032, + "grad_norm": 0.6598835277716724, + "learning_rate": 9.932643305963516e-06, + "loss": 0.0652, "step": 663 }, { - "epoch": 0.29491450144348214, - "grad_norm": 0.7675390269253073, - "learning_rate": 7.369589345172031e-06, - "loss": 0.0733, + "epoch": 0.5896980461811723, + "grad_norm": 0.6889485952784423, + "learning_rate": 9.932007814259088e-06, + "loss": 0.0643, "step": 664 }, { - "epoch": 0.29535864978902954, - "grad_norm": 0.9852104843013336, - "learning_rate": 7.380688124306327e-06, - "loss": 0.1057, + "epoch": 0.5905861456483126, + "grad_norm": 0.609717080517239, + "learning_rate": 9.931369359291154e-06, + "loss": 0.0872, "step": 665 }, { - "epoch": 0.29580279813457694, - "grad_norm": 1.3140910564234118, - "learning_rate": 7.391786903440622e-06, - "loss": 0.1013, + "epoch": 0.5914742451154529, + "grad_norm": 0.5862816597590875, + "learning_rate": 9.930727941443316e-06, + "loss": 0.0747, "step": 666 }, { - "epoch": 0.29624694648012434, - "grad_norm": 1.1588394898060206, - "learning_rate": 7.402885682574917e-06, - "loss": 0.1089, + "epoch": 0.5923623445825933, + "grad_norm": 0.577048754067645, + "learning_rate": 9.930083561100952e-06, + "loss": 0.0719, "step": 667 }, { - "epoch": 0.2966910948256718, - "grad_norm": 1.1857494935699013, - "learning_rate": 7.413984461709213e-06, - "loss": 0.0865, + "epoch": 0.5932504440497336, + "grad_norm": 0.6316269392471102, + "learning_rate": 9.92943621865122e-06, + "loss": 0.0755, "step": 668 }, { - "epoch": 0.2971352431712192, - "grad_norm": 0.861410639016016, - "learning_rate": 7.425083240843508e-06, - "loss": 0.0771, + "epoch": 0.5941385435168739, + "grad_norm": 0.7232924759518332, + "learning_rate": 9.928785914483056e-06, + "loss": 0.0886, "step": 669 }, { - "epoch": 0.2975793915167666, - "grad_norm": 1.293845000506648, - "learning_rate": 7.436182019977803e-06, - "loss": 0.1212, + "epoch": 0.5950266429840142, + "grad_norm": 0.7645669950177373, + "learning_rate": 9.928132648987181e-06, + "loss": 0.0938, "step": 670 }, { - "epoch": 0.298023539862314, - "grad_norm": 1.0407435776743181, - "learning_rate": 7.447280799112099e-06, - "loss": 0.0948, + "epoch": 0.5959147424511545, + "grad_norm": 0.6739484818221317, + "learning_rate": 9.927476422556091e-06, + "loss": 0.0711, "step": 671 }, { - "epoch": 0.2984676882078614, - "grad_norm": 0.9347120808526156, - "learning_rate": 7.458379578246393e-06, - "loss": 0.0779, + "epoch": 0.5968028419182948, + "grad_norm": 0.5849557845201215, + "learning_rate": 9.92681723558406e-06, + "loss": 0.0771, "step": 672 }, { - "epoch": 0.29891183655340886, - "grad_norm": 0.9205867276903889, - "learning_rate": 7.469478357380688e-06, - "loss": 0.0838, + "epoch": 0.5976909413854352, + "grad_norm": 0.6778196656517596, + "learning_rate": 9.926155088467145e-06, + "loss": 0.0937, "step": 673 }, { - "epoch": 0.29935598489895626, - "grad_norm": 1.388402805970494, - "learning_rate": 7.480577136514983e-06, - "loss": 0.0924, + "epoch": 0.5985790408525755, + "grad_norm": 0.6418708651856079, + "learning_rate": 9.925489981603177e-06, + "loss": 0.0687, "step": 674 }, { - "epoch": 0.29980013324450366, - "grad_norm": 1.6172485445367795, - "learning_rate": 7.491675915649279e-06, - "loss": 0.1132, + "epoch": 0.5994671403197158, + "grad_norm": 0.5525985012114619, + "learning_rate": 9.92482191539177e-06, + "loss": 0.0695, "step": 675 }, { - "epoch": 0.30024428159005107, - "grad_norm": 0.9561370366379224, - "learning_rate": 7.502774694783574e-06, - "loss": 0.1032, + "epoch": 0.6003552397868561, + "grad_norm": 0.6845649631504699, + "learning_rate": 9.924150890234311e-06, + "loss": 0.0694, "step": 676 }, { - "epoch": 0.30068842993559847, - "grad_norm": 1.0171256621828135, - "learning_rate": 7.5138734739178694e-06, - "loss": 0.0763, + "epoch": 0.6012433392539964, + "grad_norm": 0.5137621495039753, + "learning_rate": 9.923476906533971e-06, + "loss": 0.0667, "step": 677 }, { - "epoch": 0.3011325782811459, - "grad_norm": 1.0399008828044272, - "learning_rate": 7.524972253052165e-06, - "loss": 0.0763, + "epoch": 0.6021314387211367, + "grad_norm": 0.6305697583430346, + "learning_rate": 9.922799964695691e-06, + "loss": 0.0814, "step": 678 }, { - "epoch": 0.3015767266266933, - "grad_norm": 1.433840414738971, - "learning_rate": 7.5360710321864604e-06, - "loss": 0.1028, + "epoch": 0.6030195381882771, + "grad_norm": 0.7029756793327437, + "learning_rate": 9.922120065126192e-06, + "loss": 0.0859, "step": 679 }, { - "epoch": 0.3020208749722407, - "grad_norm": 1.0023463068044798, - "learning_rate": 7.5471698113207555e-06, - "loss": 0.0899, + "epoch": 0.6039076376554174, + "grad_norm": 0.4931592497555935, + "learning_rate": 9.92143720823398e-06, + "loss": 0.0632, "step": 680 }, { - "epoch": 0.30246502331778813, - "grad_norm": 0.8983226693149738, - "learning_rate": 7.55826859045505e-06, - "loss": 0.0906, + "epoch": 0.6047957371225577, + "grad_norm": 0.7076085243788406, + "learning_rate": 9.920751394429326e-06, + "loss": 0.0652, "step": 681 }, { - "epoch": 0.30290917166333553, - "grad_norm": 1.0458814923796536, - "learning_rate": 7.569367369589346e-06, - "loss": 0.087, + "epoch": 0.605683836589698, + "grad_norm": 0.5869311269808133, + "learning_rate": 9.920062624124282e-06, + "loss": 0.0733, "step": 682 }, { - "epoch": 0.303353320008883, - "grad_norm": 1.317009232453199, - "learning_rate": 7.580466148723641e-06, - "loss": 0.1185, + "epoch": 0.6065719360568383, + "grad_norm": 0.6789599846653736, + "learning_rate": 9.91937089773268e-06, + "loss": 0.0919, "step": 683 }, { - "epoch": 0.3037974683544304, - "grad_norm": 0.9515944992821098, - "learning_rate": 7.591564927857937e-06, - "loss": 0.0784, + "epoch": 0.6074600355239786, + "grad_norm": 0.7106183658768801, + "learning_rate": 9.918676215670123e-06, + "loss": 0.0626, "step": 684 }, { - "epoch": 0.3042416166999778, - "grad_norm": 0.8794173564696354, - "learning_rate": 7.602663706992232e-06, - "loss": 0.0689, + "epoch": 0.6083481349911191, + "grad_norm": 0.5307298829724526, + "learning_rate": 9.917978578353993e-06, + "loss": 0.0698, "step": 685 }, { - "epoch": 0.3046857650455252, - "grad_norm": 1.3164826313969946, - "learning_rate": 7.613762486126527e-06, - "loss": 0.109, + "epoch": 0.6092362344582594, + "grad_norm": 0.5130192562586083, + "learning_rate": 9.917277986203447e-06, + "loss": 0.0769, "step": 686 }, { - "epoch": 0.3051299133910726, - "grad_norm": 1.208021624972249, - "learning_rate": 7.624861265260823e-06, - "loss": 0.1006, + "epoch": 0.6101243339253997, + "grad_norm": 0.9975118284059644, + "learning_rate": 9.916574439639415e-06, + "loss": 0.0797, "step": 687 }, { - "epoch": 0.30557406173662005, - "grad_norm": 0.9049457634328961, - "learning_rate": 7.635960044395118e-06, - "loss": 0.0898, + "epoch": 0.61101243339254, + "grad_norm": 0.4501001012025509, + "learning_rate": 9.915867939084606e-06, + "loss": 0.0578, "step": 688 }, { - "epoch": 0.30601821008216745, - "grad_norm": 1.715228836512271, - "learning_rate": 7.647058823529411e-06, - "loss": 0.0954, + "epoch": 0.6119005328596803, + "grad_norm": 0.9282646718419376, + "learning_rate": 9.915158484963501e-06, + "loss": 0.076, "step": 689 }, { - "epoch": 0.30646235842771485, - "grad_norm": 0.8243753589934399, - "learning_rate": 7.658157602663708e-06, - "loss": 0.0775, + "epoch": 0.6127886323268206, + "grad_norm": 0.7685809779066356, + "learning_rate": 9.914446077702354e-06, + "loss": 0.0684, "step": 690 }, { - "epoch": 0.30690650677326226, - "grad_norm": 1.322320542517841, - "learning_rate": 7.669256381798003e-06, - "loss": 0.1016, + "epoch": 0.6136767317939609, + "grad_norm": 0.5523703432888231, + "learning_rate": 9.9137307177292e-06, + "loss": 0.0659, "step": 691 }, { - "epoch": 0.30735065511880966, - "grad_norm": 0.9618609396789024, - "learning_rate": 7.680355160932298e-06, - "loss": 0.0973, + "epoch": 0.6145648312611013, + "grad_norm": 0.8742460294151082, + "learning_rate": 9.91301240547384e-06, + "loss": 0.0997, "step": 692 }, { - "epoch": 0.3077948034643571, - "grad_norm": 0.9164782000859841, - "learning_rate": 7.691453940066593e-06, - "loss": 0.0625, + "epoch": 0.6154529307282416, + "grad_norm": 0.4410815923031126, + "learning_rate": 9.912291141367853e-06, + "loss": 0.0615, "step": 693 }, { - "epoch": 0.3082389518099045, - "grad_norm": 0.9162081625393108, - "learning_rate": 7.702552719200888e-06, - "loss": 0.089, + "epoch": 0.6163410301953819, + "grad_norm": 0.7411410182866369, + "learning_rate": 9.911566925844593e-06, + "loss": 0.0665, "step": 694 }, { - "epoch": 0.3086831001554519, - "grad_norm": 1.0274536861399304, - "learning_rate": 7.713651498335183e-06, - "loss": 0.0938, + "epoch": 0.6172291296625222, + "grad_norm": 0.9083393270663752, + "learning_rate": 9.910839759339186e-06, + "loss": 0.0753, "step": 695 }, { - "epoch": 0.3091272485009993, - "grad_norm": 1.2217184648988348, - "learning_rate": 7.72475027746948e-06, - "loss": 0.0944, + "epoch": 0.6181172291296625, + "grad_norm": 0.5717516177319388, + "learning_rate": 9.910109642288526e-06, + "loss": 0.0753, "step": 696 }, { - "epoch": 0.3095713968465467, - "grad_norm": 1.2610843560867633, - "learning_rate": 7.735849056603775e-06, - "loss": 0.1108, + "epoch": 0.6190053285968028, + "grad_norm": 0.7803157417708638, + "learning_rate": 9.909376575131288e-06, + "loss": 0.0783, "step": 697 }, { - "epoch": 0.3100155451920942, - "grad_norm": 0.9157424175535596, - "learning_rate": 7.746947835738068e-06, - "loss": 0.0955, + "epoch": 0.6198934280639432, + "grad_norm": 0.8487363706219908, + "learning_rate": 9.908640558307915e-06, + "loss": 0.0879, "step": 698 }, { - "epoch": 0.3104596935376416, - "grad_norm": 0.8026621451546831, - "learning_rate": 7.758046614872365e-06, - "loss": 0.0659, + "epoch": 0.6207815275310835, + "grad_norm": 0.7202024790138503, + "learning_rate": 9.907901592260619e-06, + "loss": 0.0829, "step": 699 }, { - "epoch": 0.310903841883189, - "grad_norm": 0.8085721007510808, - "learning_rate": 7.76914539400666e-06, - "loss": 0.0677, + "epoch": 0.6216696269982238, + "grad_norm": 0.6796685076427839, + "learning_rate": 9.907159677433394e-06, + "loss": 0.0683, "step": 700 }, { - "epoch": 0.3113479902287364, - "grad_norm": 0.9769745515486552, - "learning_rate": 7.780244173140955e-06, - "loss": 0.0896, + "epoch": 0.6225577264653641, + "grad_norm": 0.6907286160016336, + "learning_rate": 9.906414814271994e-06, + "loss": 0.0823, "step": 701 }, { - "epoch": 0.3117921385742838, - "grad_norm": 1.0969240019260509, - "learning_rate": 7.79134295227525e-06, - "loss": 0.1038, + "epoch": 0.6234458259325044, + "grad_norm": 0.7955497425182856, + "learning_rate": 9.905667003223952e-06, + "loss": 0.0911, "step": 702 }, { - "epoch": 0.31223628691983124, - "grad_norm": 0.6785759710369018, - "learning_rate": 7.802441731409545e-06, - "loss": 0.0559, + "epoch": 0.6243339253996447, + "grad_norm": 0.6476810022421644, + "learning_rate": 9.904916244738572e-06, + "loss": 0.1003, "step": 703 }, { - "epoch": 0.31268043526537864, - "grad_norm": 0.9042685928894448, - "learning_rate": 7.81354051054384e-06, - "loss": 0.0843, + "epoch": 0.6252220248667851, + "grad_norm": 0.8772932993530401, + "learning_rate": 9.904162539266924e-06, + "loss": 0.0747, "step": 704 }, { - "epoch": 0.31312458361092604, - "grad_norm": 0.8411568659072639, - "learning_rate": 7.824639289678137e-06, - "loss": 0.1247, + "epoch": 0.6261101243339254, + "grad_norm": 0.5356612214005964, + "learning_rate": 9.903405887261852e-06, + "loss": 0.0661, "step": 705 }, { - "epoch": 0.31356873195647345, - "grad_norm": 0.9154910517915918, - "learning_rate": 7.835738068812432e-06, - "loss": 0.0964, + "epoch": 0.6269982238010657, + "grad_norm": 0.5612010565144157, + "learning_rate": 9.902646289177971e-06, + "loss": 0.0663, "step": 706 }, { - "epoch": 0.3140128803020209, - "grad_norm": 1.31944775400599, - "learning_rate": 7.846836847946726e-06, - "loss": 0.0986, + "epoch": 0.627886323268206, + "grad_norm": 1.1208224490222134, + "learning_rate": 9.901883745471665e-06, + "loss": 0.0875, "step": 707 }, { - "epoch": 0.3144570286475683, - "grad_norm": 1.0518670866955138, - "learning_rate": 7.85793562708102e-06, - "loss": 0.095, + "epoch": 0.6287744227353463, + "grad_norm": 0.6505646562126589, + "learning_rate": 9.901118256601086e-06, + "loss": 0.0801, "step": 708 }, { - "epoch": 0.3149011769931157, - "grad_norm": 1.580342102818051, - "learning_rate": 7.869034406215318e-06, - "loss": 0.0979, + "epoch": 0.6296625222024866, + "grad_norm": 0.8700126403755859, + "learning_rate": 9.900349823026161e-06, + "loss": 0.0817, "step": 709 }, { - "epoch": 0.3153453253386631, - "grad_norm": 0.9955202031527373, - "learning_rate": 7.880133185349613e-06, - "loss": 0.0941, + "epoch": 0.6305506216696269, + "grad_norm": 0.6490556908485404, + "learning_rate": 9.899578445208578e-06, + "loss": 0.0808, "step": 710 }, { - "epoch": 0.3157894736842105, - "grad_norm": 0.9719600353134535, - "learning_rate": 7.891231964483908e-06, - "loss": 0.0929, + "epoch": 0.6314387211367674, + "grad_norm": 0.5297936162626927, + "learning_rate": 9.8988041236118e-06, + "loss": 0.0602, "step": 711 }, { - "epoch": 0.31623362202975797, - "grad_norm": 1.1433817316569035, - "learning_rate": 7.902330743618203e-06, - "loss": 0.0771, + "epoch": 0.6323268206039077, + "grad_norm": 0.6331575154891161, + "learning_rate": 9.89802685870106e-06, + "loss": 0.0743, "step": 712 }, { - "epoch": 0.31667777037530537, - "grad_norm": 1.3256882881979881, - "learning_rate": 7.913429522752498e-06, - "loss": 0.0926, + "epoch": 0.633214920071048, + "grad_norm": 0.5801710341779982, + "learning_rate": 9.897246650943353e-06, + "loss": 0.0706, "step": 713 }, { - "epoch": 0.31712191872085277, - "grad_norm": 1.0742097800389574, - "learning_rate": 7.924528301886793e-06, - "loss": 0.111, + "epoch": 0.6341030195381883, + "grad_norm": 1.1369421097074788, + "learning_rate": 9.896463500807447e-06, + "loss": 0.1075, "step": 714 }, { - "epoch": 0.31756606706640017, - "grad_norm": 0.8425793522673473, - "learning_rate": 7.935627081021088e-06, - "loss": 0.08, + "epoch": 0.6349911190053286, + "grad_norm": 0.45981576282496633, + "learning_rate": 9.895677408763878e-06, + "loss": 0.0702, "step": 715 }, { - "epoch": 0.31801021541194757, - "grad_norm": 1.2662850654524707, - "learning_rate": 7.946725860155383e-06, - "loss": 0.1285, + "epoch": 0.6358792184724689, + "grad_norm": 0.8763111120413932, + "learning_rate": 9.894888375284945e-06, + "loss": 0.0829, "step": 716 }, { - "epoch": 0.31845436375749503, - "grad_norm": 1.0639958089530621, - "learning_rate": 7.957824639289678e-06, - "loss": 0.0911, + "epoch": 0.6367673179396093, + "grad_norm": 0.5055918300464575, + "learning_rate": 9.89409640084472e-06, + "loss": 0.0748, "step": 717 }, { - "epoch": 0.31889851210304243, - "grad_norm": 0.9354143320657179, - "learning_rate": 7.968923418423973e-06, - "loss": 0.0937, + "epoch": 0.6376554174067496, + "grad_norm": 0.5977473500840715, + "learning_rate": 9.893301485919038e-06, + "loss": 0.0697, "step": 718 }, { - "epoch": 0.31934266044858983, - "grad_norm": 0.864109073064804, - "learning_rate": 7.98002219755827e-06, - "loss": 0.07, + "epoch": 0.6385435168738899, + "grad_norm": 0.6766781995614883, + "learning_rate": 9.892503630985504e-06, + "loss": 0.0833, "step": 719 }, { - "epoch": 0.31978680879413723, - "grad_norm": 0.9458708669966496, - "learning_rate": 7.991120976692565e-06, - "loss": 0.0906, + "epoch": 0.6394316163410302, + "grad_norm": 0.9713972859003385, + "learning_rate": 9.891702836523485e-06, + "loss": 0.0725, "step": 720 }, { - "epoch": 0.32023095713968464, - "grad_norm": 0.9008487720151787, - "learning_rate": 8.00221975582686e-06, - "loss": 0.0707, + "epoch": 0.6403197158081705, + "grad_norm": 0.6557626661480346, + "learning_rate": 9.89089910301412e-06, + "loss": 0.0746, "step": 721 }, { - "epoch": 0.3206751054852321, - "grad_norm": 1.395224062553485, - "learning_rate": 8.013318534961155e-06, - "loss": 0.1097, + "epoch": 0.6412078152753108, + "grad_norm": 0.6767885337992217, + "learning_rate": 9.890092430940308e-06, + "loss": 0.073, "step": 722 }, { - "epoch": 0.3211192538307795, - "grad_norm": 1.1228102547899144, - "learning_rate": 8.02441731409545e-06, - "loss": 0.103, + "epoch": 0.6420959147424512, + "grad_norm": 0.7472719120491773, + "learning_rate": 9.889282820786716e-06, + "loss": 0.067, "step": 723 }, { - "epoch": 0.3215634021763269, - "grad_norm": 0.8489026661679754, - "learning_rate": 8.035516093229745e-06, - "loss": 0.0737, + "epoch": 0.6429840142095915, + "grad_norm": 0.6639286593147884, + "learning_rate": 9.888470273039776e-06, + "loss": 0.0829, "step": 724 }, { - "epoch": 0.3220075505218743, - "grad_norm": 1.116811038077338, - "learning_rate": 8.04661487236404e-06, - "loss": 0.0861, + "epoch": 0.6438721136767318, + "grad_norm": 0.5982769355913197, + "learning_rate": 9.887654788187689e-06, + "loss": 0.0775, "step": 725 }, { - "epoch": 0.3224516988674217, - "grad_norm": 1.29632952775896, - "learning_rate": 8.057713651498335e-06, - "loss": 0.1178, + "epoch": 0.6447602131438721, + "grad_norm": 0.4855877184062672, + "learning_rate": 9.88683636672041e-06, + "loss": 0.0676, "step": 726 }, { - "epoch": 0.32289584721296916, - "grad_norm": 0.9812002112145898, - "learning_rate": 8.06881243063263e-06, - "loss": 0.0832, + "epoch": 0.6456483126110124, + "grad_norm": 0.5340617829892079, + "learning_rate": 9.886015009129674e-06, + "loss": 0.0727, "step": 727 }, { - "epoch": 0.32333999555851656, - "grad_norm": 1.4524326080678385, - "learning_rate": 8.079911209766927e-06, - "loss": 0.1157, + "epoch": 0.6465364120781527, + "grad_norm": 0.6639238564457132, + "learning_rate": 9.885190715908966e-06, + "loss": 0.0802, "step": 728 }, { - "epoch": 0.32378414390406396, - "grad_norm": 1.004750311195787, - "learning_rate": 8.091009988901222e-06, - "loss": 0.0817, + "epoch": 0.6474245115452931, + "grad_norm": 0.6110643471699649, + "learning_rate": 9.884363487553542e-06, + "loss": 0.0771, "step": 729 }, { - "epoch": 0.32422829224961136, - "grad_norm": 0.9522750516409738, - "learning_rate": 8.102108768035517e-06, - "loss": 0.0731, + "epoch": 0.6483126110124334, + "grad_norm": 0.4621339230437264, + "learning_rate": 9.88353332456042e-06, + "loss": 0.0449, "step": 730 }, { - "epoch": 0.32467244059515876, - "grad_norm": 1.0736358930231333, - "learning_rate": 8.113207547169812e-06, - "loss": 0.0994, + "epoch": 0.6492007104795737, + "grad_norm": 0.678858009404123, + "learning_rate": 9.88270022742838e-06, + "loss": 0.0877, "step": 731 }, { - "epoch": 0.3251165889407062, - "grad_norm": 1.8191859683022853, - "learning_rate": 8.124306326304107e-06, - "loss": 0.0937, + "epoch": 0.650088809946714, + "grad_norm": 0.6529691011694954, + "learning_rate": 9.881864196657965e-06, + "loss": 0.0812, "step": 732 }, { - "epoch": 0.3255607372862536, - "grad_norm": 1.5487234761111215, - "learning_rate": 8.135405105438403e-06, - "loss": 0.094, + "epoch": 0.6509769094138543, + "grad_norm": 0.5748660355744435, + "learning_rate": 9.881025232751482e-06, + "loss": 0.068, "step": 733 }, { - "epoch": 0.326004885631801, - "grad_norm": 1.0058113971822433, - "learning_rate": 8.146503884572698e-06, - "loss": 0.0887, + "epoch": 0.6518650088809946, + "grad_norm": 0.6375177785671727, + "learning_rate": 9.880183336213002e-06, + "loss": 0.0732, "step": 734 }, { - "epoch": 0.3264490339773484, - "grad_norm": 2.7718590597147963, - "learning_rate": 8.157602663706993e-06, - "loss": 0.1335, + "epoch": 0.6527531083481349, + "grad_norm": 0.6387740003981374, + "learning_rate": 9.879338507548353e-06, + "loss": 0.0812, "step": 735 }, { - "epoch": 0.3268931823228958, - "grad_norm": 2.05367085918422, - "learning_rate": 8.168701442841288e-06, - "loss": 0.1106, + "epoch": 0.6536412078152753, + "grad_norm": 0.7518231911966612, + "learning_rate": 9.87849074726513e-06, + "loss": 0.0804, "step": 736 }, { - "epoch": 0.3273373306684433, - "grad_norm": 1.115500020772734, - "learning_rate": 8.179800221975583e-06, - "loss": 0.0722, + "epoch": 0.6545293072824157, + "grad_norm": 0.6405075473759618, + "learning_rate": 9.877640055872686e-06, + "loss": 0.0743, "step": 737 }, { - "epoch": 0.3277814790139907, - "grad_norm": 1.1750795388816762, - "learning_rate": 8.19089900110988e-06, - "loss": 0.0625, + "epoch": 0.655417406749556, + "grad_norm": 0.6561685164748122, + "learning_rate": 9.876786433882134e-06, + "loss": 0.0655, "step": 738 }, { - "epoch": 0.3282256273595381, - "grad_norm": 0.9103834127535441, - "learning_rate": 8.201997780244175e-06, - "loss": 0.0796, + "epoch": 0.6563055062166963, + "grad_norm": 0.6639484183813833, + "learning_rate": 9.875929881806353e-06, + "loss": 0.08, "step": 739 }, { - "epoch": 0.3286697757050855, - "grad_norm": 0.9850210452729228, - "learning_rate": 8.21309655937847e-06, - "loss": 0.0658, + "epoch": 0.6571936056838366, + "grad_norm": 0.5959227008610657, + "learning_rate": 9.875070400159975e-06, + "loss": 0.0671, "step": 740 }, { - "epoch": 0.3291139240506329, - "grad_norm": 0.599291679194688, - "learning_rate": 8.224195338512763e-06, - "loss": 0.0528, + "epoch": 0.6580817051509769, + "grad_norm": 0.49191753805945415, + "learning_rate": 9.8742079894594e-06, + "loss": 0.0545, "step": 741 }, { - "epoch": 0.32955807239618035, - "grad_norm": 1.1762184481853932, - "learning_rate": 8.23529411764706e-06, - "loss": 0.101, + "epoch": 0.6589698046181173, + "grad_norm": 0.6813282250979682, + "learning_rate": 9.873342650222783e-06, + "loss": 0.0852, "step": 742 }, { - "epoch": 0.33000222074172775, - "grad_norm": 1.4905112857899239, - "learning_rate": 8.246392896781355e-06, - "loss": 0.1072, + "epoch": 0.6598579040852576, + "grad_norm": 0.9250391619311136, + "learning_rate": 9.87247438297004e-06, + "loss": 0.0565, "step": 743 }, { - "epoch": 0.33044636908727515, - "grad_norm": 0.9022874687789386, - "learning_rate": 8.25749167591565e-06, - "loss": 0.0948, + "epoch": 0.6607460035523979, + "grad_norm": 0.537411527063087, + "learning_rate": 9.871603188222842e-06, + "loss": 0.0639, "step": 744 }, { - "epoch": 0.33089051743282255, - "grad_norm": 0.7870509929638926, - "learning_rate": 8.268590455049945e-06, - "loss": 0.0968, + "epoch": 0.6616341030195382, + "grad_norm": 0.633528068688325, + "learning_rate": 9.870729066504629e-06, + "loss": 0.0761, "step": 745 }, { - "epoch": 0.33133466577836995, - "grad_norm": 0.9927750763334663, - "learning_rate": 8.27968923418424e-06, - "loss": 0.0944, + "epoch": 0.6625222024866785, + "grad_norm": 0.6043145862673834, + "learning_rate": 9.869852018340588e-06, + "loss": 0.0781, "step": 746 }, { - "epoch": 0.3317788141239174, - "grad_norm": 0.6875980968846429, - "learning_rate": 8.290788013318535e-06, - "loss": 0.0636, + "epoch": 0.6634103019538188, + "grad_norm": 0.6414104245118655, + "learning_rate": 9.868972044257675e-06, + "loss": 0.0624, "step": 747 }, { - "epoch": 0.3322229624694648, - "grad_norm": 0.8442026433419401, - "learning_rate": 8.301886792452832e-06, - "loss": 0.0831, + "epoch": 0.6642984014209592, + "grad_norm": 0.5109239331287874, + "learning_rate": 9.868089144784593e-06, + "loss": 0.067, "step": 748 }, { - "epoch": 0.3326671108150122, - "grad_norm": 0.9412453312595589, - "learning_rate": 8.312985571587127e-06, - "loss": 0.0776, + "epoch": 0.6651865008880995, + "grad_norm": 1.6962551913633659, + "learning_rate": 9.867203320451814e-06, + "loss": 0.0666, "step": 749 }, { - "epoch": 0.3331112591605596, - "grad_norm": 0.9364463887563221, - "learning_rate": 8.32408435072142e-06, - "loss": 0.0887, + "epoch": 0.6660746003552398, + "grad_norm": 0.813934879598361, + "learning_rate": 9.866314571791557e-06, + "loss": 0.0893, "step": 750 }, { - "epoch": 0.333555407506107, - "grad_norm": 0.7227541011442813, - "learning_rate": 8.335183129855715e-06, - "loss": 0.0857, + "epoch": 0.6669626998223801, + "grad_norm": 0.5511951847347283, + "learning_rate": 9.865422899337805e-06, + "loss": 0.0629, "step": 751 }, { - "epoch": 0.3339995558516545, - "grad_norm": 0.9456038972922441, - "learning_rate": 8.346281908990012e-06, - "loss": 0.0831, + "epoch": 0.6678507992895204, + "grad_norm": 0.5459582838219199, + "learning_rate": 9.864528303626295e-06, + "loss": 0.0785, "step": 752 }, { - "epoch": 0.3344437041972019, - "grad_norm": 0.8779308403041339, - "learning_rate": 8.357380688124307e-06, - "loss": 0.1193, + "epoch": 0.6687388987566607, + "grad_norm": 0.7705512172036647, + "learning_rate": 9.86363078519452e-06, + "loss": 0.0775, "step": 753 }, { - "epoch": 0.3348878525427493, - "grad_norm": 0.9670324236179916, - "learning_rate": 8.368479467258602e-06, - "loss": 0.0701, + "epoch": 0.6696269982238011, + "grad_norm": 0.619884058455073, + "learning_rate": 9.862730344581727e-06, + "loss": 0.0813, "step": 754 }, { - "epoch": 0.3353320008882967, - "grad_norm": 0.661653951539699, - "learning_rate": 8.379578246392897e-06, - "loss": 0.0601, + "epoch": 0.6705150976909414, + "grad_norm": 0.6226342312705461, + "learning_rate": 9.861826982328927e-06, + "loss": 0.071, "step": 755 }, { - "epoch": 0.3357761492338441, - "grad_norm": 0.7315432781058985, - "learning_rate": 8.390677025527192e-06, - "loss": 0.0931, + "epoch": 0.6714031971580817, + "grad_norm": 0.6044498881029556, + "learning_rate": 9.860920698978878e-06, + "loss": 0.075, "step": 756 }, { - "epoch": 0.33622029757939154, - "grad_norm": 0.821857058746102, - "learning_rate": 8.40177580466149e-06, - "loss": 0.0853, + "epoch": 0.672291296625222, + "grad_norm": 0.7913251099265589, + "learning_rate": 9.860011495076094e-06, + "loss": 0.085, "step": 757 }, { - "epoch": 0.33666444592493894, - "grad_norm": 0.8172880203687719, - "learning_rate": 8.412874583795784e-06, - "loss": 0.1047, + "epoch": 0.6731793960923623, + "grad_norm": 0.7381414512209249, + "learning_rate": 9.85909937116685e-06, + "loss": 0.0837, "step": 758 }, { - "epoch": 0.33710859427048634, - "grad_norm": 1.0390709540622176, - "learning_rate": 8.423973362930078e-06, - "loss": 0.0954, + "epoch": 0.6740674955595026, + "grad_norm": 0.6530467510353278, + "learning_rate": 9.858184327799167e-06, + "loss": 0.0914, "step": 759 }, { - "epoch": 0.33755274261603374, - "grad_norm": 0.854833081193649, - "learning_rate": 8.435072142064373e-06, - "loss": 0.07, + "epoch": 0.6749555950266429, + "grad_norm": 0.6030829518086117, + "learning_rate": 9.857266365522825e-06, + "loss": 0.0771, "step": 760 }, { - "epoch": 0.33799689096158114, - "grad_norm": 1.0343320589862304, - "learning_rate": 8.44617092119867e-06, - "loss": 0.1111, + "epoch": 0.6758436944937833, + "grad_norm": 0.5395337483407944, + "learning_rate": 9.856345484889358e-06, + "loss": 0.0699, "step": 761 }, { - "epoch": 0.3384410393071286, - "grad_norm": 0.9843875498635718, - "learning_rate": 8.457269700332965e-06, - "loss": 0.0885, + "epoch": 0.6767317939609236, + "grad_norm": 0.5560718019400371, + "learning_rate": 9.855421686452051e-06, + "loss": 0.0591, "step": 762 }, { - "epoch": 0.338885187652676, - "grad_norm": 1.301338158343054, - "learning_rate": 8.46836847946726e-06, - "loss": 0.0978, + "epoch": 0.677619893428064, + "grad_norm": 0.4739087807150043, + "learning_rate": 9.854494970765944e-06, + "loss": 0.0591, "step": 763 }, { - "epoch": 0.3393293359982234, - "grad_norm": 1.8891259339303466, - "learning_rate": 8.479467258601555e-06, - "loss": 0.1068, + "epoch": 0.6785079928952042, + "grad_norm": 0.5773819216810755, + "learning_rate": 9.853565338387829e-06, + "loss": 0.0705, "step": 764 }, { - "epoch": 0.3397734843437708, - "grad_norm": 0.8759783579336403, - "learning_rate": 8.49056603773585e-06, - "loss": 0.0702, + "epoch": 0.6793960923623446, + "grad_norm": 0.5683151590028005, + "learning_rate": 9.85263278987625e-06, + "loss": 0.0605, "step": 765 }, { - "epoch": 0.3402176326893182, - "grad_norm": 1.0288011027453567, - "learning_rate": 8.501664816870145e-06, - "loss": 0.1032, + "epoch": 0.6802841918294849, + "grad_norm": 0.5668668206312913, + "learning_rate": 9.851697325791505e-06, + "loss": 0.0889, "step": 766 }, { - "epoch": 0.34066178103486566, - "grad_norm": 1.0389974353449243, - "learning_rate": 8.51276359600444e-06, - "loss": 0.0876, + "epoch": 0.6811722912966253, + "grad_norm": 0.4471977890212434, + "learning_rate": 9.850758946695639e-06, + "loss": 0.0541, "step": 767 }, { - "epoch": 0.34110592938041306, - "grad_norm": 0.8136721936839324, - "learning_rate": 8.523862375138735e-06, - "loss": 0.0825, + "epoch": 0.6820603907637656, + "grad_norm": 0.5726134869433405, + "learning_rate": 9.849817653152455e-06, + "loss": 0.0761, "step": 768 }, { - "epoch": 0.34155007772596047, - "grad_norm": 0.7788745832938482, - "learning_rate": 8.53496115427303e-06, - "loss": 0.0841, + "epoch": 0.6829484902309059, + "grad_norm": 0.40904293323042457, + "learning_rate": 9.848873445727505e-06, + "loss": 0.0525, "step": 769 }, { - "epoch": 0.34199422607150787, - "grad_norm": 0.7903184210262955, - "learning_rate": 8.546059933407325e-06, - "loss": 0.0798, + "epoch": 0.6838365896980462, + "grad_norm": 0.5812966102900733, + "learning_rate": 9.847926324988085e-06, + "loss": 0.0682, "step": 770 }, { - "epoch": 0.34243837441705527, - "grad_norm": 0.9106664043393771, - "learning_rate": 8.557158712541622e-06, - "loss": 0.073, + "epoch": 0.6847246891651865, + "grad_norm": 0.6255213845313706, + "learning_rate": 9.846976291503251e-06, + "loss": 0.0989, "step": 771 }, { - "epoch": 0.3428825227626027, - "grad_norm": 1.129345042296519, - "learning_rate": 8.568257491675917e-06, - "loss": 0.1122, + "epoch": 0.6856127886323268, + "grad_norm": 0.5932962594157872, + "learning_rate": 9.846023345843805e-06, + "loss": 0.069, "step": 772 }, { - "epoch": 0.3433266711081501, - "grad_norm": 1.0693131682338626, - "learning_rate": 8.579356270810212e-06, - "loss": 0.0963, + "epoch": 0.6865008880994672, + "grad_norm": 0.4606594098023636, + "learning_rate": 9.845067488582297e-06, + "loss": 0.0616, "step": 773 }, { - "epoch": 0.34377081945369753, - "grad_norm": 0.9775496598799794, - "learning_rate": 8.590455049944507e-06, - "loss": 0.0917, + "epoch": 0.6873889875666075, + "grad_norm": 0.5887167134523461, + "learning_rate": 9.844108720293027e-06, + "loss": 0.0736, "step": 774 }, { - "epoch": 0.34421496779924493, - "grad_norm": 1.734237792325461, - "learning_rate": 8.601553829078802e-06, - "loss": 0.0964, + "epoch": 0.6882770870337478, + "grad_norm": 0.4610259409194756, + "learning_rate": 9.843147041552047e-06, + "loss": 0.064, "step": 775 }, { - "epoch": 0.3446591161447924, - "grad_norm": 0.8386031594806228, - "learning_rate": 8.612652608213097e-06, - "loss": 0.0808, + "epoch": 0.6891651865008881, + "grad_norm": 0.5556439414364968, + "learning_rate": 9.842182452937156e-06, + "loss": 0.0601, "step": 776 }, { - "epoch": 0.3451032644903398, - "grad_norm": 1.0606813630969132, - "learning_rate": 8.623751387347392e-06, - "loss": 0.0689, + "epoch": 0.6900532859680284, + "grad_norm": 0.4963423959312719, + "learning_rate": 9.8412149550279e-06, + "loss": 0.0567, "step": 777 }, { - "epoch": 0.3455474128358872, - "grad_norm": 0.9348314863875357, - "learning_rate": 8.634850166481687e-06, - "loss": 0.0732, + "epoch": 0.6909413854351687, + "grad_norm": 0.4881477349084676, + "learning_rate": 9.840244548405574e-06, + "loss": 0.067, "step": 778 }, { - "epoch": 0.3459915611814346, - "grad_norm": 1.3147018531805297, - "learning_rate": 8.645948945615982e-06, - "loss": 0.1498, + "epoch": 0.6918294849023091, + "grad_norm": 0.5927913645381122, + "learning_rate": 9.83927123365322e-06, + "loss": 0.0706, "step": 779 }, { - "epoch": 0.346435709526982, - "grad_norm": 0.6686475142401406, - "learning_rate": 8.657047724750277e-06, - "loss": 0.0643, + "epoch": 0.6927175843694494, + "grad_norm": 0.6246354132335293, + "learning_rate": 9.83829501135563e-06, + "loss": 0.0714, "step": 780 }, { - "epoch": 0.34687985787252945, - "grad_norm": 1.4294668088170326, - "learning_rate": 8.668146503884574e-06, - "loss": 0.0973, + "epoch": 0.6936056838365897, + "grad_norm": 0.45067414718655535, + "learning_rate": 9.837315882099338e-06, + "loss": 0.0769, "step": 781 }, { - "epoch": 0.34732400621807685, - "grad_norm": 1.0334036928446257, - "learning_rate": 8.67924528301887e-06, - "loss": 0.0907, + "epoch": 0.69449378330373, + "grad_norm": 0.46638603249090305, + "learning_rate": 9.83633384647263e-06, + "loss": 0.0651, "step": 782 }, { - "epoch": 0.34776815456362425, - "grad_norm": 0.787480188698369, - "learning_rate": 8.690344062153164e-06, - "loss": 0.0965, + "epoch": 0.6953818827708703, + "grad_norm": 0.44935838390777366, + "learning_rate": 9.835348905065534e-06, + "loss": 0.0637, "step": 783 }, { - "epoch": 0.34821230290917166, - "grad_norm": 1.1443565433110983, - "learning_rate": 8.70144284128746e-06, - "loss": 0.1128, + "epoch": 0.6962699822380106, + "grad_norm": 0.40165159687498664, + "learning_rate": 9.834361058469824e-06, + "loss": 0.0617, "step": 784 }, { - "epoch": 0.34865645125471906, - "grad_norm": 1.4871900340672362, - "learning_rate": 8.712541620421754e-06, - "loss": 0.1402, + "epoch": 0.6971580817051509, + "grad_norm": 0.5543025139993665, + "learning_rate": 9.833370307279024e-06, + "loss": 0.0761, "step": 785 }, { - "epoch": 0.3491005996002665, - "grad_norm": 1.2517565722671151, - "learning_rate": 8.72364039955605e-06, - "loss": 0.0937, + "epoch": 0.6980461811722913, + "grad_norm": 0.616612079623427, + "learning_rate": 9.832376652088398e-06, + "loss": 0.0874, "step": 786 }, { - "epoch": 0.3495447479458139, - "grad_norm": 1.2066836215604748, - "learning_rate": 8.734739178690345e-06, - "loss": 0.1142, + "epoch": 0.6989342806394316, + "grad_norm": 0.5630055764947307, + "learning_rate": 9.831380093494957e-06, + "loss": 0.0754, "step": 787 }, { - "epoch": 0.3499888962913613, - "grad_norm": 0.9107920976880283, - "learning_rate": 8.74583795782464e-06, - "loss": 0.0885, + "epoch": 0.6998223801065719, + "grad_norm": 0.5215681170244125, + "learning_rate": 9.830380632097458e-06, + "loss": 0.0642, "step": 788 }, { - "epoch": 0.3504330446369087, - "grad_norm": 0.9724494230585177, - "learning_rate": 8.756936736958935e-06, - "loss": 0.0957, + "epoch": 0.7007104795737122, + "grad_norm": 0.6656937827559947, + "learning_rate": 9.8293782684964e-06, + "loss": 0.07, "step": 789 }, { - "epoch": 0.3508771929824561, - "grad_norm": 0.8845187547684428, - "learning_rate": 8.768035516093231e-06, - "loss": 0.0713, + "epoch": 0.7015985790408525, + "grad_norm": 0.6288561251516173, + "learning_rate": 9.828373003294023e-06, + "loss": 0.0914, "step": 790 }, { - "epoch": 0.3513213413280036, - "grad_norm": 0.9555294870332015, - "learning_rate": 8.779134295227527e-06, - "loss": 0.0709, + "epoch": 0.7024866785079928, + "grad_norm": 0.5164186767628194, + "learning_rate": 9.827364837094315e-06, + "loss": 0.0684, "step": 791 }, { - "epoch": 0.351765489673551, - "grad_norm": 0.9971954202922753, - "learning_rate": 8.790233074361822e-06, - "loss": 0.086, + "epoch": 0.7033747779751333, + "grad_norm": 0.5655308094806477, + "learning_rate": 9.826353770503008e-06, + "loss": 0.0722, "step": 792 }, { - "epoch": 0.3522096380190984, - "grad_norm": 0.8799552486323357, - "learning_rate": 8.801331853496115e-06, - "loss": 0.0878, + "epoch": 0.7042628774422736, + "grad_norm": 0.553941699731642, + "learning_rate": 9.82533980412757e-06, + "loss": 0.0696, "step": 793 }, { - "epoch": 0.3526537863646458, - "grad_norm": 1.1586612806863954, - "learning_rate": 8.812430632630412e-06, - "loss": 0.1017, + "epoch": 0.7051509769094139, + "grad_norm": 0.5697946394267486, + "learning_rate": 9.824322938577217e-06, + "loss": 0.0609, "step": 794 }, { - "epoch": 0.3530979347101932, - "grad_norm": 1.0618777970371058, - "learning_rate": 8.823529411764707e-06, - "loss": 0.0932, + "epoch": 0.7060390763765542, + "grad_norm": 0.5673274646378402, + "learning_rate": 9.823303174462908e-06, + "loss": 0.0612, "step": 795 }, { - "epoch": 0.35354208305574064, - "grad_norm": 1.0717541227428644, - "learning_rate": 8.834628190899002e-06, - "loss": 0.095, + "epoch": 0.7069271758436945, + "grad_norm": 0.41718573178468515, + "learning_rate": 9.822280512397336e-06, + "loss": 0.055, "step": 796 }, { - "epoch": 0.35398623140128804, - "grad_norm": 1.1705694879290995, - "learning_rate": 8.845726970033297e-06, - "loss": 0.1118, + "epoch": 0.7078152753108348, + "grad_norm": 0.5566723528174443, + "learning_rate": 9.821254952994942e-06, + "loss": 0.0623, "step": 797 }, { - "epoch": 0.35443037974683544, - "grad_norm": 1.1829108047712253, - "learning_rate": 8.856825749167592e-06, - "loss": 0.1028, + "epoch": 0.7087033747779752, + "grad_norm": 0.5929826177517866, + "learning_rate": 9.820226496871904e-06, + "loss": 0.0623, "step": 798 }, { - "epoch": 0.35487452809238285, - "grad_norm": 0.7445292910176993, - "learning_rate": 8.867924528301887e-06, - "loss": 0.0639, + "epoch": 0.7095914742451155, + "grad_norm": 0.5688187309896156, + "learning_rate": 9.819195144646148e-06, + "loss": 0.0883, "step": 799 }, { - "epoch": 0.35531867643793025, - "grad_norm": 1.0475116637450934, - "learning_rate": 8.879023307436184e-06, - "loss": 0.0706, + "epoch": 0.7104795737122558, + "grad_norm": 0.49122317812039684, + "learning_rate": 9.818160896937326e-06, + "loss": 0.0678, "step": 800 }, { - "epoch": 0.3557628247834777, - "grad_norm": 1.1028250244486228, - "learning_rate": 8.890122086570479e-06, - "loss": 0.0819, + "epoch": 0.7113676731793961, + "grad_norm": 0.6118298125130784, + "learning_rate": 9.817123754366842e-06, + "loss": 0.0768, "step": 801 }, { - "epoch": 0.3562069731290251, - "grad_norm": 1.0807445455171478, - "learning_rate": 8.901220865704772e-06, - "loss": 0.0888, + "epoch": 0.7122557726465364, + "grad_norm": 0.46128906858077734, + "learning_rate": 9.816083717557836e-06, + "loss": 0.0685, "step": 802 }, { - "epoch": 0.3566511214745725, - "grad_norm": 1.0684985644998497, - "learning_rate": 8.912319644839067e-06, - "loss": 0.0822, + "epoch": 0.7131438721136767, + "grad_norm": 0.437684287621171, + "learning_rate": 9.815040787135185e-06, + "loss": 0.0576, "step": 803 }, { - "epoch": 0.3570952698201199, - "grad_norm": 1.3191397870753145, - "learning_rate": 8.923418423973364e-06, - "loss": 0.1026, + "epoch": 0.7140319715808171, + "grad_norm": 0.5342097826992428, + "learning_rate": 9.813994963725502e-06, + "loss": 0.0678, "step": 804 }, { - "epoch": 0.3575394181656673, - "grad_norm": 1.0262852726585219, - "learning_rate": 8.93451720310766e-06, - "loss": 0.0776, + "epoch": 0.7149200710479574, + "grad_norm": 0.5669114346058003, + "learning_rate": 9.812946247957145e-06, + "loss": 0.062, "step": 805 }, { - "epoch": 0.35798356651121477, - "grad_norm": 1.0652219347423726, - "learning_rate": 8.945615982241954e-06, - "loss": 0.0907, + "epoch": 0.7158081705150977, + "grad_norm": 0.4582139918634001, + "learning_rate": 9.81189464046021e-06, + "loss": 0.0655, "step": 806 }, { - "epoch": 0.35842771485676217, - "grad_norm": 1.0116135773749395, - "learning_rate": 8.95671476137625e-06, - "loss": 0.0959, + "epoch": 0.716696269982238, + "grad_norm": 0.5870243211200192, + "learning_rate": 9.810840141866518e-06, + "loss": 0.0775, "step": 807 }, { - "epoch": 0.35887186320230957, - "grad_norm": 0.7818161469235702, - "learning_rate": 8.967813540510544e-06, - "loss": 0.0687, + "epoch": 0.7175843694493783, + "grad_norm": 0.631756840637851, + "learning_rate": 9.809782752809644e-06, + "loss": 0.0736, "step": 808 }, { - "epoch": 0.359316011547857, - "grad_norm": 1.2323283633104487, - "learning_rate": 8.97891231964484e-06, - "loss": 0.1036, + "epoch": 0.7184724689165186, + "grad_norm": 0.6539120301427688, + "learning_rate": 9.808722473924888e-06, + "loss": 0.0959, "step": 809 }, { - "epoch": 0.3597601598934044, - "grad_norm": 1.0483052311963612, - "learning_rate": 8.990011098779136e-06, - "loss": 0.0899, + "epoch": 0.7193605683836589, + "grad_norm": 0.5032683848281911, + "learning_rate": 9.807659305849291e-06, + "loss": 0.064, "step": 810 }, { - "epoch": 0.36020430823895183, - "grad_norm": 0.8215378781764873, - "learning_rate": 9.00110987791343e-06, - "loss": 0.089, + "epoch": 0.7202486678507993, + "grad_norm": 0.610360723221449, + "learning_rate": 9.80659324922163e-06, + "loss": 0.0597, "step": 811 }, { - "epoch": 0.36064845658449923, - "grad_norm": 1.2325336486761531, - "learning_rate": 9.012208657047725e-06, - "loss": 0.1338, + "epoch": 0.7211367673179396, + "grad_norm": 0.6189388212782825, + "learning_rate": 9.805524304682412e-06, + "loss": 0.0613, "step": 812 }, { - "epoch": 0.36109260493004663, - "grad_norm": 1.0281228699327587, - "learning_rate": 9.02330743618202e-06, - "loss": 0.0928, + "epoch": 0.7220248667850799, + "grad_norm": 0.5840790967615844, + "learning_rate": 9.804452472873886e-06, + "loss": 0.0848, "step": 813 }, { - "epoch": 0.36153675327559404, - "grad_norm": 0.9331667240788679, - "learning_rate": 9.034406215316316e-06, - "loss": 0.0795, + "epoch": 0.7229129662522202, + "grad_norm": 0.6329622168240866, + "learning_rate": 9.803377754440036e-06, + "loss": 0.0825, "step": 814 }, { - "epoch": 0.36198090162114144, - "grad_norm": 0.9152493219085753, - "learning_rate": 9.045504994450612e-06, - "loss": 0.1015, + "epoch": 0.7238010657193605, + "grad_norm": 0.619006500413132, + "learning_rate": 9.802300150026573e-06, + "loss": 0.0712, "step": 815 }, { - "epoch": 0.3624250499666889, - "grad_norm": 0.8370218994947535, - "learning_rate": 9.056603773584907e-06, - "loss": 0.0873, + "epoch": 0.7246891651865008, + "grad_norm": 0.6106419407921674, + "learning_rate": 9.801219660280946e-06, + "loss": 0.0663, "step": 816 }, { - "epoch": 0.3628691983122363, - "grad_norm": 0.7774818767814435, - "learning_rate": 9.067702552719202e-06, - "loss": 0.0822, + "epoch": 0.7255772646536413, + "grad_norm": 0.4891178874396289, + "learning_rate": 9.800136285852344e-06, + "loss": 0.0649, "step": 817 }, { - "epoch": 0.3633133466577837, - "grad_norm": 1.6384223947850474, - "learning_rate": 9.078801331853497e-06, - "loss": 0.081, + "epoch": 0.7264653641207816, + "grad_norm": 0.5451059121272476, + "learning_rate": 9.799050027391679e-06, + "loss": 0.0677, "step": 818 }, { - "epoch": 0.3637574950033311, - "grad_norm": 0.991313719663541, - "learning_rate": 9.089900110987792e-06, - "loss": 0.0837, + "epoch": 0.7273534635879219, + "grad_norm": 0.4238153011827328, + "learning_rate": 9.7979608855516e-06, + "loss": 0.0534, "step": 819 }, { - "epoch": 0.3642016433488785, - "grad_norm": 0.8596136661125425, - "learning_rate": 9.100998890122087e-06, - "loss": 0.0918, + "epoch": 0.7282415630550622, + "grad_norm": 0.6330021046191763, + "learning_rate": 9.796868860986489e-06, + "loss": 0.0666, "step": 820 }, { - "epoch": 0.36464579169442596, - "grad_norm": 1.0812055701714487, - "learning_rate": 9.112097669256382e-06, - "loss": 0.0825, + "epoch": 0.7291296625222025, + "grad_norm": 0.6045734694919165, + "learning_rate": 9.795773954352459e-06, + "loss": 0.0547, "step": 821 }, { - "epoch": 0.36508994003997336, - "grad_norm": 0.8070207504890635, - "learning_rate": 9.123196448390677e-06, - "loss": 0.0843, + "epoch": 0.7300177619893428, + "grad_norm": 0.5198037170364895, + "learning_rate": 9.794676166307354e-06, + "loss": 0.0627, "step": 822 }, { - "epoch": 0.36553408838552076, - "grad_norm": 1.1679552400138797, - "learning_rate": 9.134295227524974e-06, - "loss": 0.0914, + "epoch": 0.7309058614564832, + "grad_norm": 0.5554149391898202, + "learning_rate": 9.793575497510753e-06, + "loss": 0.0612, "step": 823 }, { - "epoch": 0.36597823673106816, - "grad_norm": 0.9627855966278458, - "learning_rate": 9.145394006659269e-06, - "loss": 0.08, + "epoch": 0.7317939609236235, + "grad_norm": 0.5343561182075582, + "learning_rate": 9.792471948623964e-06, + "loss": 0.0683, "step": 824 }, { - "epoch": 0.36642238507661556, - "grad_norm": 0.8861606234396192, - "learning_rate": 9.156492785793564e-06, - "loss": 0.0921, + "epoch": 0.7326820603907638, + "grad_norm": 0.5790623408188066, + "learning_rate": 9.791365520310019e-06, + "loss": 0.0724, "step": 825 }, { - "epoch": 0.366866533422163, - "grad_norm": 1.0771512576751088, - "learning_rate": 9.167591564927859e-06, - "loss": 0.0981, + "epoch": 0.7335701598579041, + "grad_norm": 0.8344222032090851, + "learning_rate": 9.790256213233688e-06, + "loss": 0.0799, "step": 826 }, { - "epoch": 0.3673106817677104, - "grad_norm": 0.7667307978284066, - "learning_rate": 9.178690344062154e-06, - "loss": 0.0763, + "epoch": 0.7344582593250444, + "grad_norm": 0.543863835656294, + "learning_rate": 9.789144028061471e-06, + "loss": 0.0665, "step": 827 }, { - "epoch": 0.3677548301132578, - "grad_norm": 0.8789921021859833, - "learning_rate": 9.189789123196449e-06, - "loss": 0.0969, + "epoch": 0.7353463587921847, + "grad_norm": 0.6778994069764115, + "learning_rate": 9.788028965461592e-06, + "loss": 0.0736, "step": 828 }, { - "epoch": 0.3681989784588052, - "grad_norm": 1.100607694945035, - "learning_rate": 9.200887902330744e-06, - "loss": 0.0945, + "epoch": 0.7362344582593251, + "grad_norm": 0.5001185210304049, + "learning_rate": 9.786911026104007e-06, + "loss": 0.0814, "step": 829 }, { - "epoch": 0.3686431268043526, - "grad_norm": 0.8889918245566973, - "learning_rate": 9.21198668146504e-06, - "loss": 0.1027, + "epoch": 0.7371225577264654, + "grad_norm": 0.4318114606035255, + "learning_rate": 9.785790210660397e-06, + "loss": 0.0457, "step": 830 }, { - "epoch": 0.3690872751499001, - "grad_norm": 0.9919838528287025, - "learning_rate": 9.223085460599334e-06, - "loss": 0.0876, + "epoch": 0.7380106571936057, + "grad_norm": 0.5103918503485578, + "learning_rate": 9.784666519804177e-06, + "loss": 0.0584, "step": 831 }, { - "epoch": 0.3695314234954475, - "grad_norm": 0.7322878388650527, - "learning_rate": 9.23418423973363e-06, - "loss": 0.0827, + "epoch": 0.738898756660746, + "grad_norm": 0.998343595704113, + "learning_rate": 9.783539954210486e-06, + "loss": 0.0803, "step": 832 }, { - "epoch": 0.3699755718409949, - "grad_norm": 0.8334148632374379, - "learning_rate": 9.245283018867926e-06, - "loss": 0.0836, + "epoch": 0.7397868561278863, + "grad_norm": 0.6415964902419391, + "learning_rate": 9.782410514556188e-06, + "loss": 0.0678, "step": 833 }, { - "epoch": 0.3704197201865423, - "grad_norm": 1.2813561868661674, - "learning_rate": 9.256381798002221e-06, - "loss": 0.0889, + "epoch": 0.7406749555950266, + "grad_norm": 0.6374999797886973, + "learning_rate": 9.781278201519879e-06, + "loss": 0.0674, "step": 834 }, { - "epoch": 0.3708638685320897, - "grad_norm": 1.339839760620212, - "learning_rate": 9.267480577136516e-06, - "loss": 0.1145, + "epoch": 0.7415630550621669, + "grad_norm": 0.6393001254832945, + "learning_rate": 9.780143015781878e-06, + "loss": 0.0677, "step": 835 }, { - "epoch": 0.37130801687763715, - "grad_norm": 0.9154079084300466, - "learning_rate": 9.278579356270811e-06, - "loss": 0.0816, + "epoch": 0.7424511545293073, + "grad_norm": 0.5441955402643336, + "learning_rate": 9.77900495802423e-06, + "loss": 0.0703, "step": 836 }, { - "epoch": 0.37175216522318455, - "grad_norm": 0.6790049839601768, - "learning_rate": 9.289678135405106e-06, - "loss": 0.052, + "epoch": 0.7433392539964476, + "grad_norm": 0.5118800999904295, + "learning_rate": 9.777864028930705e-06, + "loss": 0.0646, "step": 837 }, { - "epoch": 0.37219631356873195, - "grad_norm": 1.2902350399135438, - "learning_rate": 9.300776914539401e-06, - "loss": 0.1205, + "epoch": 0.7442273534635879, + "grad_norm": 0.4935923907638607, + "learning_rate": 9.7767202291868e-06, + "loss": 0.0512, "step": 838 }, { - "epoch": 0.37264046191427935, - "grad_norm": 0.6777630897746993, - "learning_rate": 9.311875693673697e-06, - "loss": 0.0626, + "epoch": 0.7451154529307282, + "grad_norm": 0.5948126216510479, + "learning_rate": 9.775573559479739e-06, + "loss": 0.0698, "step": 839 }, { - "epoch": 0.37308461025982675, - "grad_norm": 0.974153136606222, - "learning_rate": 9.322974472807992e-06, - "loss": 0.0872, + "epoch": 0.7460035523978685, + "grad_norm": 0.4871301284729971, + "learning_rate": 9.774424020498463e-06, + "loss": 0.0668, "step": 840 }, { - "epoch": 0.3735287586053742, - "grad_norm": 0.8802239921866019, - "learning_rate": 9.334073251942287e-06, - "loss": 0.0705, + "epoch": 0.7468916518650088, + "grad_norm": 0.47479989252048854, + "learning_rate": 9.773271612933646e-06, + "loss": 0.0643, "step": 841 }, { - "epoch": 0.3739729069509216, - "grad_norm": 0.8051312485970141, - "learning_rate": 9.345172031076582e-06, - "loss": 0.0878, + "epoch": 0.7477797513321492, + "grad_norm": 0.4942090866097779, + "learning_rate": 9.772116337477677e-06, + "loss": 0.0797, "step": 842 }, { - "epoch": 0.374417055296469, - "grad_norm": 1.160749633212372, - "learning_rate": 9.356270810210878e-06, - "loss": 0.1112, + "epoch": 0.7486678507992895, + "grad_norm": 0.5473659858639748, + "learning_rate": 9.770958194824673e-06, + "loss": 0.0797, "step": 843 }, { - "epoch": 0.3748612036420164, - "grad_norm": 0.8346210460548523, - "learning_rate": 9.367369589345174e-06, - "loss": 0.1035, + "epoch": 0.7495559502664298, + "grad_norm": 0.424049921592322, + "learning_rate": 9.769797185670475e-06, + "loss": 0.0604, "step": 844 }, { - "epoch": 0.3753053519875639, - "grad_norm": 1.0464653523849117, - "learning_rate": 9.378468368479467e-06, - "loss": 0.0901, + "epoch": 0.7504440497335702, + "grad_norm": 0.5564834103503165, + "learning_rate": 9.768633310712643e-06, + "loss": 0.0657, "step": 845 }, { - "epoch": 0.3757495003331113, - "grad_norm": 0.7515018279210384, - "learning_rate": 9.389567147613764e-06, - "loss": 0.0618, + "epoch": 0.7513321492007105, + "grad_norm": 0.48763222699568676, + "learning_rate": 9.767466570650457e-06, + "loss": 0.0733, "step": 846 }, { - "epoch": 0.3761936486786587, - "grad_norm": 1.1240058131240411, - "learning_rate": 9.400665926748059e-06, - "loss": 0.0682, + "epoch": 0.7522202486678508, + "grad_norm": 1.3304763250830134, + "learning_rate": 9.766296966184925e-06, + "loss": 0.0805, "step": 847 }, { - "epoch": 0.3766377970242061, - "grad_norm": 0.8330871505734428, - "learning_rate": 9.411764705882354e-06, - "loss": 0.0815, + "epoch": 0.7531083481349912, + "grad_norm": 0.48145762725920477, + "learning_rate": 9.765124498018769e-06, + "loss": 0.0547, "step": 848 }, { - "epoch": 0.3770819453697535, - "grad_norm": 0.6597157022162015, - "learning_rate": 9.422863485016649e-06, - "loss": 0.0705, + "epoch": 0.7539964476021315, + "grad_norm": 0.6854989871417136, + "learning_rate": 9.763949166856436e-06, + "loss": 0.0695, "step": 849 }, { - "epoch": 0.37752609371530094, - "grad_norm": 0.870783524840563, - "learning_rate": 9.433962264150944e-06, - "loss": 0.0756, + "epoch": 0.7548845470692718, + "grad_norm": 0.5439120991144568, + "learning_rate": 9.762770973404094e-06, + "loss": 0.0625, "step": 850 }, { - "epoch": 0.37797024206084834, - "grad_norm": 0.7718200428803087, - "learning_rate": 9.445061043285239e-06, - "loss": 0.0688, + "epoch": 0.7557726465364121, + "grad_norm": 0.49542823833573935, + "learning_rate": 9.761589918369626e-06, + "loss": 0.0697, "step": 851 }, { - "epoch": 0.37841439040639574, - "grad_norm": 1.0565515531045717, - "learning_rate": 9.456159822419536e-06, - "loss": 0.096, + "epoch": 0.7566607460035524, + "grad_norm": 0.42224122911916856, + "learning_rate": 9.760406002462639e-06, + "loss": 0.0668, "step": 852 }, { - "epoch": 0.37885853875194314, - "grad_norm": 0.9116267321999116, - "learning_rate": 9.46725860155383e-06, - "loss": 0.1219, + "epoch": 0.7575488454706927, + "grad_norm": 0.45058473601855664, + "learning_rate": 9.759219226394455e-06, + "loss": 0.0571, "step": 853 }, { - "epoch": 0.37930268709749054, - "grad_norm": 0.614891198569231, - "learning_rate": 9.478357380688124e-06, - "loss": 0.0618, + "epoch": 0.7584369449378331, + "grad_norm": 0.6448396174926396, + "learning_rate": 9.758029590878115e-06, + "loss": 0.0865, "step": 854 }, { - "epoch": 0.379746835443038, - "grad_norm": 0.6996810655829085, - "learning_rate": 9.48945615982242e-06, - "loss": 0.0733, + "epoch": 0.7593250444049734, + "grad_norm": 0.5077144999320161, + "learning_rate": 9.756837096628384e-06, + "loss": 0.0657, "step": 855 }, { - "epoch": 0.3801909837885854, - "grad_norm": 0.8742171165547441, - "learning_rate": 9.500554938956716e-06, - "loss": 0.0943, + "epoch": 0.7602131438721137, + "grad_norm": 0.4917255888597828, + "learning_rate": 9.755641744361736e-06, + "loss": 0.066, "step": 856 }, { - "epoch": 0.3806351321341328, - "grad_norm": 0.6941177838819981, - "learning_rate": 9.511653718091011e-06, - "loss": 0.0979, + "epoch": 0.761101243339254, + "grad_norm": 0.5412727578928164, + "learning_rate": 9.754443534796368e-06, + "loss": 0.0695, "step": 857 }, { - "epoch": 0.3810792804796802, - "grad_norm": 0.9912966659857255, - "learning_rate": 9.522752497225306e-06, - "loss": 0.1146, + "epoch": 0.7619893428063943, + "grad_norm": 0.6417173521943089, + "learning_rate": 9.753242468652191e-06, + "loss": 0.0709, "step": 858 }, { - "epoch": 0.3815234288252276, - "grad_norm": 0.8052784568872317, - "learning_rate": 9.533851276359601e-06, - "loss": 0.0765, + "epoch": 0.7628774422735346, + "grad_norm": 0.3981792957258406, + "learning_rate": 9.752038546650832e-06, + "loss": 0.0693, "step": 859 }, { - "epoch": 0.38196757717077506, - "grad_norm": 0.6650776701454606, - "learning_rate": 9.544950055493896e-06, - "loss": 0.0598, + "epoch": 0.7637655417406749, + "grad_norm": 0.523667116627595, + "learning_rate": 9.75083176951564e-06, + "loss": 0.0559, "step": 860 }, { - "epoch": 0.38241172551632246, - "grad_norm": 0.9207926874686408, - "learning_rate": 9.556048834628191e-06, - "loss": 0.0812, + "epoch": 0.7646536412078153, + "grad_norm": 0.5112962424889065, + "learning_rate": 9.749622137971666e-06, + "loss": 0.0671, "step": 861 }, { - "epoch": 0.38285587386186987, - "grad_norm": 0.9371867517767639, - "learning_rate": 9.567147613762488e-06, - "loss": 0.1016, + "epoch": 0.7655417406749556, + "grad_norm": 0.4931265226964857, + "learning_rate": 9.748409652745691e-06, + "loss": 0.0626, "step": 862 }, { - "epoch": 0.38330002220741727, - "grad_norm": 0.8381824885212333, - "learning_rate": 9.578246392896782e-06, - "loss": 0.0975, + "epoch": 0.7664298401420959, + "grad_norm": 0.5921485380445096, + "learning_rate": 9.747194314566203e-06, + "loss": 0.0698, "step": 863 }, { - "epoch": 0.38374417055296467, - "grad_norm": 0.8006129424637977, - "learning_rate": 9.589345172031077e-06, - "loss": 0.0768, + "epoch": 0.7673179396092362, + "grad_norm": 0.47926368786238915, + "learning_rate": 9.745976124163403e-06, + "loss": 0.062, "step": 864 }, { - "epoch": 0.3841883188985121, - "grad_norm": 1.5462086828607524, - "learning_rate": 9.600443951165372e-06, - "loss": 0.0973, + "epoch": 0.7682060390763765, + "grad_norm": 0.41810806221103497, + "learning_rate": 9.744755082269209e-06, + "loss": 0.0569, "step": 865 }, { - "epoch": 0.38463246724405953, - "grad_norm": 0.9098699947791947, - "learning_rate": 9.611542730299668e-06, - "loss": 0.0862, + "epoch": 0.7690941385435168, + "grad_norm": 0.5000498796532304, + "learning_rate": 9.74353118961725e-06, + "loss": 0.0625, "step": 866 }, { - "epoch": 0.38507661558960693, - "grad_norm": 0.6758969469373516, - "learning_rate": 9.622641509433963e-06, - "loss": 0.0926, + "epoch": 0.7699822380106572, + "grad_norm": 0.5931936446327853, + "learning_rate": 9.74230444694287e-06, + "loss": 0.0884, "step": 867 }, { - "epoch": 0.38552076393515433, - "grad_norm": 0.8013932877888278, - "learning_rate": 9.633740288568259e-06, - "loss": 0.0788, + "epoch": 0.7708703374777975, + "grad_norm": 0.44084728448733057, + "learning_rate": 9.741074854983123e-06, + "loss": 0.0701, "step": 868 }, { - "epoch": 0.38596491228070173, - "grad_norm": 0.8330625416886887, - "learning_rate": 9.644839067702554e-06, - "loss": 0.0908, + "epoch": 0.7717584369449378, + "grad_norm": 0.5016984411540918, + "learning_rate": 9.739842414476778e-06, + "loss": 0.06, "step": 869 }, { - "epoch": 0.3864090606262492, - "grad_norm": 0.8653830292054833, - "learning_rate": 9.655937846836849e-06, - "loss": 0.0845, + "epoch": 0.7726465364120781, + "grad_norm": 0.5018618855198906, + "learning_rate": 9.738607126164313e-06, + "loss": 0.0628, "step": 870 }, { - "epoch": 0.3868532089717966, - "grad_norm": 0.9667174949251367, - "learning_rate": 9.667036625971144e-06, - "loss": 0.0661, + "epoch": 0.7735346358792184, + "grad_norm": 0.5148250537317675, + "learning_rate": 9.737368990787917e-06, + "loss": 0.0599, "step": 871 }, { - "epoch": 0.387297357317344, - "grad_norm": 0.9162313384818003, - "learning_rate": 9.678135405105439e-06, - "loss": 0.0839, + "epoch": 0.7744227353463587, + "grad_norm": 0.47205325118182295, + "learning_rate": 9.73612800909149e-06, + "loss": 0.06, "step": 872 }, { - "epoch": 0.3877415056628914, - "grad_norm": 1.05316579924942, - "learning_rate": 9.689234184239734e-06, - "loss": 0.0884, + "epoch": 0.7753108348134992, + "grad_norm": 0.5422210072587286, + "learning_rate": 9.73488418182064e-06, + "loss": 0.0883, "step": 873 }, { - "epoch": 0.3881856540084388, - "grad_norm": 0.6947987167903426, - "learning_rate": 9.700332963374029e-06, - "loss": 0.0676, + "epoch": 0.7761989342806395, + "grad_norm": 0.41956223545218563, + "learning_rate": 9.733637509722693e-06, + "loss": 0.069, "step": 874 }, { - "epoch": 0.38862980235398625, - "grad_norm": 0.7637218765216399, - "learning_rate": 9.711431742508326e-06, - "loss": 0.0855, + "epoch": 0.7770870337477798, + "grad_norm": 0.5573260970497077, + "learning_rate": 9.732387993546672e-06, + "loss": 0.0729, "step": 875 }, { - "epoch": 0.38907395069953365, - "grad_norm": 0.8049041095871476, - "learning_rate": 9.72253052164262e-06, - "loss": 0.0783, + "epoch": 0.7779751332149201, + "grad_norm": 0.4223713644069139, + "learning_rate": 9.731135634043317e-06, + "loss": 0.0561, "step": 876 }, { - "epoch": 0.38951809904508106, - "grad_norm": 1.2616209833129097, - "learning_rate": 9.733629300776916e-06, - "loss": 0.0994, + "epoch": 0.7788632326820604, + "grad_norm": 0.42386931283969925, + "learning_rate": 9.729880431965077e-06, + "loss": 0.0512, "step": 877 }, { - "epoch": 0.38996224739062846, - "grad_norm": 0.8501327438099852, - "learning_rate": 9.744728079911211e-06, - "loss": 0.0865, + "epoch": 0.7797513321492007, + "grad_norm": 0.562790329681012, + "learning_rate": 9.728622388066104e-06, + "loss": 0.0712, "step": 878 }, { - "epoch": 0.39040639573617586, - "grad_norm": 0.8980289128819791, - "learning_rate": 9.755826859045506e-06, - "loss": 0.0644, + "epoch": 0.7806394316163411, + "grad_norm": 0.5223750865670449, + "learning_rate": 9.727361503102259e-06, + "loss": 0.0651, "step": 879 }, { - "epoch": 0.3908505440817233, - "grad_norm": 0.7464217413483473, - "learning_rate": 9.766925638179801e-06, - "loss": 0.0751, + "epoch": 0.7815275310834814, + "grad_norm": 0.46319039337808027, + "learning_rate": 9.726097777831113e-06, + "loss": 0.0639, "step": 880 }, { - "epoch": 0.3912946924272707, - "grad_norm": 1.0405400428578027, - "learning_rate": 9.778024417314096e-06, - "loss": 0.0862, + "epoch": 0.7824156305506217, + "grad_norm": 0.456317921792779, + "learning_rate": 9.724831213011939e-06, + "loss": 0.0621, "step": 881 }, { - "epoch": 0.3917388407728181, - "grad_norm": 0.7773928325897406, - "learning_rate": 9.789123196448391e-06, - "loss": 0.0877, + "epoch": 0.783303730017762, + "grad_norm": 0.5425120205467575, + "learning_rate": 9.723561809405717e-06, + "loss": 0.0671, "step": 882 }, { - "epoch": 0.3921829891183655, - "grad_norm": 0.6831878151686481, - "learning_rate": 9.800221975582686e-06, - "loss": 0.0613, + "epoch": 0.7841918294849023, + "grad_norm": 0.5740922445463003, + "learning_rate": 9.722289567775138e-06, + "loss": 0.0674, "step": 883 }, { - "epoch": 0.3926271374639129, - "grad_norm": 0.8414451878698297, - "learning_rate": 9.811320754716981e-06, - "loss": 0.0742, + "epoch": 0.7850799289520426, + "grad_norm": 0.5297609824168771, + "learning_rate": 9.72101448888459e-06, + "loss": 0.0639, "step": 884 }, { - "epoch": 0.3930712858094604, - "grad_norm": 0.8370630047631802, - "learning_rate": 9.822419533851278e-06, - "loss": 0.0788, + "epoch": 0.7859680284191829, + "grad_norm": 0.4811509902387377, + "learning_rate": 9.719736573500172e-06, + "loss": 0.0605, "step": 885 }, { - "epoch": 0.3935154341550078, - "grad_norm": 0.66195315447052, - "learning_rate": 9.833518312985573e-06, - "loss": 0.0673, + "epoch": 0.7868561278863233, + "grad_norm": 0.5922264117047007, + "learning_rate": 9.718455822389683e-06, + "loss": 0.0669, "step": 886 }, { - "epoch": 0.3939595825005552, - "grad_norm": 0.5443847344778766, - "learning_rate": 9.844617092119868e-06, - "loss": 0.065, + "epoch": 0.7877442273534636, + "grad_norm": 0.8913635608071487, + "learning_rate": 9.71717223632263e-06, + "loss": 0.083, "step": 887 }, { - "epoch": 0.3944037308461026, - "grad_norm": 0.7436541471597082, - "learning_rate": 9.855715871254163e-06, - "loss": 0.0718, + "epoch": 0.7886323268206039, + "grad_norm": 0.5292427013291248, + "learning_rate": 9.715885816070219e-06, + "loss": 0.0748, "step": 888 }, { - "epoch": 0.39484787919165, - "grad_norm": 0.9048716378979127, - "learning_rate": 9.866814650388458e-06, - "loss": 0.0647, + "epoch": 0.7895204262877442, + "grad_norm": 0.5678193666157058, + "learning_rate": 9.714596562405363e-06, + "loss": 0.0602, "step": 889 }, { - "epoch": 0.39529202753719744, - "grad_norm": 0.887466655199119, - "learning_rate": 9.877913429522753e-06, - "loss": 0.0742, + "epoch": 0.7904085257548845, + "grad_norm": 0.5453521799618504, + "learning_rate": 9.713304476102673e-06, + "loss": 0.0668, "step": 890 }, { - "epoch": 0.39573617588274485, - "grad_norm": 0.5558633344968636, - "learning_rate": 9.889012208657048e-06, - "loss": 0.0547, + "epoch": 0.7912966252220248, + "grad_norm": 0.7169261555524354, + "learning_rate": 9.712009557938464e-06, + "loss": 0.0645, "step": 891 }, { - "epoch": 0.39618032422829225, - "grad_norm": 0.6342481865802312, - "learning_rate": 9.900110987791344e-06, - "loss": 0.0652, + "epoch": 0.7921847246891652, + "grad_norm": 0.8218667759261624, + "learning_rate": 9.710711808690754e-06, + "loss": 0.0845, "step": 892 }, { - "epoch": 0.39662447257383965, - "grad_norm": 0.8609031233478771, - "learning_rate": 9.911209766925639e-06, - "loss": 0.0837, + "epoch": 0.7930728241563055, + "grad_norm": 0.5376254187160413, + "learning_rate": 9.70941122913926e-06, + "loss": 0.0823, "step": 893 }, { - "epoch": 0.39706862091938705, - "grad_norm": 0.8888916230177222, - "learning_rate": 9.922308546059934e-06, - "loss": 0.0892, + "epoch": 0.7939609236234458, + "grad_norm": 0.5533386595522959, + "learning_rate": 9.7081078200654e-06, + "loss": 0.0726, "step": 894 }, { - "epoch": 0.3975127692649345, - "grad_norm": 0.8338604612800079, - "learning_rate": 9.93340732519423e-06, - "loss": 0.101, + "epoch": 0.7948490230905861, + "grad_norm": 0.5507402177237228, + "learning_rate": 9.706801582252294e-06, + "loss": 0.0569, "step": 895 }, { - "epoch": 0.3979569176104819, - "grad_norm": 0.689341320326161, - "learning_rate": 9.944506104328525e-06, - "loss": 0.0778, + "epoch": 0.7957371225577264, + "grad_norm": 0.7065760418179104, + "learning_rate": 9.705492516484755e-06, + "loss": 0.064, "step": 896 }, { - "epoch": 0.3984010659560293, - "grad_norm": 0.7629364933455176, - "learning_rate": 9.955604883462819e-06, - "loss": 0.0702, + "epoch": 0.7966252220248667, + "grad_norm": 0.549266372129176, + "learning_rate": 9.704180623549306e-06, + "loss": 0.0748, "step": 897 }, { - "epoch": 0.3988452143015767, - "grad_norm": 0.7205533738637083, - "learning_rate": 9.966703662597114e-06, - "loss": 0.0712, + "epoch": 0.7975133214920072, + "grad_norm": 0.5719391651082385, + "learning_rate": 9.702865904234155e-06, + "loss": 0.0624, "step": 898 }, { - "epoch": 0.3992893626471241, - "grad_norm": 1.1452213067609647, - "learning_rate": 9.97780244173141e-06, - "loss": 0.0868, + "epoch": 0.7984014209591475, + "grad_norm": 0.6379308308407754, + "learning_rate": 9.701548359329223e-06, + "loss": 0.0719, "step": 899 }, { - "epoch": 0.39973351099267157, - "grad_norm": 1.7583678128183422, - "learning_rate": 9.988901220865706e-06, - "loss": 0.0866, + "epoch": 0.7992895204262878, + "grad_norm": 0.5418127960029306, + "learning_rate": 9.700227989626115e-06, + "loss": 0.0623, "step": 900 }, { - "epoch": 0.40017765933821897, - "grad_norm": 1.0237065065243787, - "learning_rate": 1e-05, - "loss": 0.0966, + "epoch": 0.8001776198934281, + "grad_norm": 0.6036706215166676, + "learning_rate": 9.698904795918143e-06, + "loss": 0.0607, "step": 901 }, { - "epoch": 0.4006218076837664, - "grad_norm": 0.9116552545055456, - "learning_rate": 9.999999624207532e-06, - "loss": 0.0752, + "epoch": 0.8010657193605684, + "grad_norm": 0.5752493077654007, + "learning_rate": 9.697578779000311e-06, + "loss": 0.068, "step": 902 }, { - "epoch": 0.4010659560293138, - "grad_norm": 0.9745084022934988, - "learning_rate": 9.999998496830188e-06, - "loss": 0.0816, + "epoch": 0.8019538188277087, + "grad_norm": 0.7663051795905208, + "learning_rate": 9.696249939669322e-06, + "loss": 0.0798, "step": 903 }, { - "epoch": 0.4015101043748612, - "grad_norm": 0.8729574496188063, - "learning_rate": 9.999996617868132e-06, - "loss": 0.096, + "epoch": 0.8028419182948491, + "grad_norm": 0.4723612141454474, + "learning_rate": 9.694918278723573e-06, + "loss": 0.0601, "step": 904 }, { - "epoch": 0.40195425272040863, - "grad_norm": 0.8195978110162689, - "learning_rate": 9.999993987321651e-06, - "loss": 0.0848, + "epoch": 0.8037300177619894, + "grad_norm": 0.5656385272418112, + "learning_rate": 9.693583796963153e-06, + "loss": 0.0619, "step": 905 }, { - "epoch": 0.40239840106595604, - "grad_norm": 1.0642057405652703, - "learning_rate": 9.999990605191136e-06, + "epoch": 0.8046181172291297, + "grad_norm": 0.6699550446405373, + "learning_rate": 9.692246495189851e-06, "loss": 0.0734, "step": 906 }, { - "epoch": 0.40284254941150344, - "grad_norm": 1.055742691312144, - "learning_rate": 9.9999864714771e-06, - "loss": 0.1099, + "epoch": 0.80550621669627, + "grad_norm": 0.5664891190443317, + "learning_rate": 9.69090637420715e-06, + "loss": 0.0708, "step": 907 }, { - "epoch": 0.40328669775705084, - "grad_norm": 1.2115140501137451, - "learning_rate": 9.999981586180161e-06, - "loss": 0.0742, + "epoch": 0.8063943161634103, + "grad_norm": 0.6843597251058838, + "learning_rate": 9.689563434820227e-06, + "loss": 0.0702, "step": 908 }, { - "epoch": 0.4037308461025983, - "grad_norm": 0.899219389432472, - "learning_rate": 9.999975949301057e-06, - "loss": 0.078, + "epoch": 0.8072824156305506, + "grad_norm": 0.683445022498873, + "learning_rate": 9.688217677835946e-06, + "loss": 0.0653, "step": 909 }, { - "epoch": 0.4041749944481457, - "grad_norm": 0.8631042166204541, - "learning_rate": 9.99996956084063e-06, - "loss": 0.0721, + "epoch": 0.8081705150976909, + "grad_norm": 0.5060552768099038, + "learning_rate": 9.68686910406287e-06, + "loss": 0.0556, "step": 910 }, { - "epoch": 0.4046191427936931, - "grad_norm": 0.90232933367849, - "learning_rate": 9.999962420799846e-06, - "loss": 0.0855, + "epoch": 0.8090586145648313, + "grad_norm": 0.7986819442805254, + "learning_rate": 9.685517714311257e-06, + "loss": 0.0795, "step": 911 }, { - "epoch": 0.4050632911392405, - "grad_norm": 0.9832129529060516, - "learning_rate": 9.999954529179773e-06, - "loss": 0.0884, + "epoch": 0.8099467140319716, + "grad_norm": 0.501035935929038, + "learning_rate": 9.68416350939305e-06, + "loss": 0.0666, "step": 912 }, { - "epoch": 0.4055074394847879, - "grad_norm": 0.6541793575769728, - "learning_rate": 9.999945885981603e-06, - "loss": 0.0649, + "epoch": 0.8108348134991119, + "grad_norm": 0.5329884471999715, + "learning_rate": 9.682806490121886e-06, + "loss": 0.0775, "step": 913 }, { - "epoch": 0.40595158783033536, - "grad_norm": 0.7878273299354217, - "learning_rate": 9.999936491206631e-06, - "loss": 0.0806, + "epoch": 0.8117229129662522, + "grad_norm": 0.54135648661835, + "learning_rate": 9.681446657313093e-06, + "loss": 0.0657, "step": 914 }, { - "epoch": 0.40639573617588276, - "grad_norm": 1.1901788044990722, - "learning_rate": 9.99992634485627e-06, - "loss": 0.0926, + "epoch": 0.8126110124333925, + "grad_norm": 0.5134085253589913, + "learning_rate": 9.68008401178369e-06, + "loss": 0.0609, "step": 915 }, { - "epoch": 0.40683988452143016, - "grad_norm": 1.14952509907395, - "learning_rate": 9.999915446932045e-06, - "loss": 0.1071, + "epoch": 0.8134991119005328, + "grad_norm": 0.4434095387180611, + "learning_rate": 9.678718554352388e-06, + "loss": 0.0654, "step": 916 }, { - "epoch": 0.40728403286697756, - "grad_norm": 0.7798635545238589, - "learning_rate": 9.999903797435596e-06, - "loss": 0.0771, + "epoch": 0.8143872113676732, + "grad_norm": 0.5236929010711758, + "learning_rate": 9.677350285839584e-06, + "loss": 0.0764, "step": 917 }, { - "epoch": 0.40772818121252496, - "grad_norm": 1.0917620513168562, - "learning_rate": 9.999891396368672e-06, - "loss": 0.0949, + "epoch": 0.8152753108348135, + "grad_norm": 0.6994365075688941, + "learning_rate": 9.675979207067366e-06, + "loss": 0.0806, "step": 918 }, { - "epoch": 0.4081723295580724, - "grad_norm": 1.0952645685085969, - "learning_rate": 9.999878243733138e-06, - "loss": 0.0817, + "epoch": 0.8161634103019538, + "grad_norm": 0.530891447866391, + "learning_rate": 9.674605318859504e-06, + "loss": 0.0634, "step": 919 }, { - "epoch": 0.4086164779036198, - "grad_norm": 0.8880652433088206, - "learning_rate": 9.99986433953097e-06, - "loss": 0.0776, + "epoch": 0.8170515097690941, + "grad_norm": 0.5765555950626341, + "learning_rate": 9.67322862204147e-06, + "loss": 0.0725, "step": 920 }, { - "epoch": 0.4090606262491672, - "grad_norm": 0.8333025562604276, - "learning_rate": 9.99984968376426e-06, - "loss": 0.0956, + "epoch": 0.8179396092362344, + "grad_norm": 0.5596260200652119, + "learning_rate": 9.671849117440408e-06, + "loss": 0.0714, "step": 921 }, { - "epoch": 0.4095047745947146, - "grad_norm": 1.0999527872907782, - "learning_rate": 9.99983427643521e-06, - "loss": 0.0782, + "epoch": 0.8188277087033747, + "grad_norm": 0.41640827300153266, + "learning_rate": 9.670466805885161e-06, + "loss": 0.0628, "step": 922 }, { - "epoch": 0.40994892294026203, - "grad_norm": 1.0065680465940938, - "learning_rate": 9.999818117546135e-06, - "loss": 0.0862, + "epoch": 0.8197158081705151, + "grad_norm": 0.5640736053598292, + "learning_rate": 9.669081688206252e-06, + "loss": 0.0856, "step": 923 }, { - "epoch": 0.4103930712858095, - "grad_norm": 0.7446846870231809, - "learning_rate": 9.999801207099464e-06, - "loss": 0.0693, + "epoch": 0.8206039076376554, + "grad_norm": 0.8485734792235194, + "learning_rate": 9.667693765235888e-06, + "loss": 0.0747, "step": 924 }, { - "epoch": 0.4108372196313569, - "grad_norm": 0.7664119308596476, - "learning_rate": 9.99978354509774e-06, - "loss": 0.062, + "epoch": 0.8214920071047958, + "grad_norm": 0.5669402076142084, + "learning_rate": 9.66630303780797e-06, + "loss": 0.0686, "step": 925 }, { - "epoch": 0.4112813679769043, - "grad_norm": 0.6216063626642743, - "learning_rate": 9.99976513154362e-06, - "loss": 0.0855, + "epoch": 0.822380106571936, + "grad_norm": 0.4740326434702954, + "learning_rate": 9.664909506758078e-06, + "loss": 0.0534, "step": 926 }, { - "epoch": 0.4117255163224517, - "grad_norm": 0.7583401455167236, - "learning_rate": 9.99974596643987e-06, - "loss": 0.0902, + "epoch": 0.8232682060390764, + "grad_norm": 0.5194347072923514, + "learning_rate": 9.663513172923472e-06, + "loss": 0.069, "step": 927 }, { - "epoch": 0.4121696646679991, - "grad_norm": 0.8236694673839551, - "learning_rate": 9.999726049789367e-06, - "loss": 0.094, + "epoch": 0.8241563055062167, + "grad_norm": 0.6010424988290933, + "learning_rate": 9.662114037143111e-06, + "loss": 0.0738, "step": 928 }, { - "epoch": 0.41261381301354655, - "grad_norm": 1.0278078628460734, - "learning_rate": 9.999705381595111e-06, - "loss": 0.1162, + "epoch": 0.8250444049733571, + "grad_norm": 0.6020466117067801, + "learning_rate": 9.660712100257622e-06, + "loss": 0.0737, "step": 929 }, { - "epoch": 0.41305796135909395, - "grad_norm": 0.9106208454624181, - "learning_rate": 9.999683961860205e-06, - "loss": 0.0832, + "epoch": 0.8259325044404974, + "grad_norm": 0.5396443509643355, + "learning_rate": 9.659307363109318e-06, + "loss": 0.0801, "step": 930 }, { - "epoch": 0.41350210970464135, - "grad_norm": 0.9583162458878111, - "learning_rate": 9.99966179058787e-06, - "loss": 0.0622, + "epoch": 0.8268206039076377, + "grad_norm": 0.5314961474658119, + "learning_rate": 9.657899826542202e-06, + "loss": 0.0517, "step": 931 }, { - "epoch": 0.41394625805018875, - "grad_norm": 1.02310895201938, - "learning_rate": 9.999638867781437e-06, - "loss": 0.0747, + "epoch": 0.827708703374778, + "grad_norm": 0.49738878540759734, + "learning_rate": 9.656489491401954e-06, + "loss": 0.064, "step": 932 }, { - "epoch": 0.41439040639573615, - "grad_norm": 0.9574965389717746, - "learning_rate": 9.999615193444354e-06, - "loss": 0.0826, + "epoch": 0.8285968028419183, + "grad_norm": 0.48381840119758085, + "learning_rate": 9.655076358535936e-06, + "loss": 0.0682, "step": 933 }, { - "epoch": 0.4148345547412836, - "grad_norm": 0.6820969947085748, - "learning_rate": 9.99959076758018e-06, - "loss": 0.0794, + "epoch": 0.8294849023090586, + "grad_norm": 0.44272053171124653, + "learning_rate": 9.653660428793188e-06, + "loss": 0.0581, "step": 934 }, { - "epoch": 0.415278703086831, - "grad_norm": 0.8271166638389166, - "learning_rate": 9.999565590192584e-06, - "loss": 0.0677, + "epoch": 0.8303730017761989, + "grad_norm": 0.4691336287450691, + "learning_rate": 9.652241703024433e-06, + "loss": 0.0642, "step": 935 }, { - "epoch": 0.4157228514323784, - "grad_norm": 0.7913919823378441, - "learning_rate": 9.999539661285354e-06, - "loss": 0.0899, + "epoch": 0.8312611012433393, + "grad_norm": 0.45885267998647256, + "learning_rate": 9.650820182082079e-06, + "loss": 0.0627, "step": 936 }, { - "epoch": 0.4161669997779258, - "grad_norm": 0.6933241808880691, - "learning_rate": 9.999512980862382e-06, - "loss": 0.0762, + "epoch": 0.8321492007104796, + "grad_norm": 1.0219882751792455, + "learning_rate": 9.649395866820204e-06, + "loss": 0.0831, "step": 937 }, { - "epoch": 0.4166111481234732, - "grad_norm": 1.0464472300779635, - "learning_rate": 9.999485548927686e-06, - "loss": 0.0879, + "epoch": 0.8330373001776199, + "grad_norm": 0.5784342450785018, + "learning_rate": 9.64796875809457e-06, + "loss": 0.077, "step": 938 }, { - "epoch": 0.4170552964690207, - "grad_norm": 0.9713981902628005, - "learning_rate": 9.999457365485383e-06, - "loss": 0.0859, + "epoch": 0.8339253996447602, + "grad_norm": 0.49797883919222224, + "learning_rate": 9.646538856762617e-06, + "loss": 0.0716, "step": 939 }, { - "epoch": 0.4174994448145681, - "grad_norm": 0.7157769729378113, - "learning_rate": 9.999428430539713e-06, - "loss": 0.063, + "epoch": 0.8348134991119005, + "grad_norm": 0.5887418894719311, + "learning_rate": 9.645106163683462e-06, + "loss": 0.0714, "step": 940 }, { - "epoch": 0.4179435931601155, - "grad_norm": 1.3110820742809257, - "learning_rate": 9.999398744095024e-06, - "loss": 0.1083, + "epoch": 0.8357015985790408, + "grad_norm": 0.7748244061996492, + "learning_rate": 9.6436706797179e-06, + "loss": 0.0704, "step": 941 }, { - "epoch": 0.4183877415056629, - "grad_norm": 0.9278738003250622, - "learning_rate": 9.999368306155778e-06, - "loss": 0.0682, + "epoch": 0.8365896980461812, + "grad_norm": 0.6855832463744983, + "learning_rate": 9.642232405728405e-06, + "loss": 0.0904, "step": 942 }, { - "epoch": 0.4188318898512103, - "grad_norm": 1.081752257204933, - "learning_rate": 9.999337116726555e-06, - "loss": 0.082, + "epoch": 0.8374777975133215, + "grad_norm": 0.4877569922133212, + "learning_rate": 9.640791342579123e-06, + "loss": 0.0624, "step": 943 }, { - "epoch": 0.41927603819675774, - "grad_norm": 0.7655308857156353, - "learning_rate": 9.999305175812035e-06, - "loss": 0.0824, + "epoch": 0.8383658969804618, + "grad_norm": 0.5089143536321346, + "learning_rate": 9.639347491135877e-06, + "loss": 0.0683, "step": 944 }, { - "epoch": 0.41972018654230514, - "grad_norm": 0.843733636393823, - "learning_rate": 9.999272483417027e-06, - "loss": 0.0583, + "epoch": 0.8392539964476021, + "grad_norm": 0.518918007394712, + "learning_rate": 9.637900852266167e-06, + "loss": 0.0616, "step": 945 }, { - "epoch": 0.42016433488785254, - "grad_norm": 1.0473704718025856, - "learning_rate": 9.99923903954644e-06, - "loss": 0.0893, + "epoch": 0.8401420959147424, + "grad_norm": 0.5840044393755744, + "learning_rate": 9.636451426839168e-06, + "loss": 0.0659, "step": 946 }, { - "epoch": 0.42060848323339994, - "grad_norm": 0.5121181388849327, - "learning_rate": 9.999204844205304e-06, - "loss": 0.0599, + "epoch": 0.8410301953818827, + "grad_norm": 0.45605601768162696, + "learning_rate": 9.634999215725726e-06, + "loss": 0.0668, "step": 947 }, { - "epoch": 0.42105263157894735, - "grad_norm": 0.9066795499623488, - "learning_rate": 9.999169897398757e-06, - "loss": 0.1344, + "epoch": 0.8419182948490231, + "grad_norm": 0.4868724376811619, + "learning_rate": 9.633544219798364e-06, + "loss": 0.0609, "step": 948 }, { - "epoch": 0.4214967799244948, - "grad_norm": 0.910793967335512, - "learning_rate": 9.999134199132054e-06, - "loss": 0.0669, + "epoch": 0.8428063943161634, + "grad_norm": 0.4212420772051227, + "learning_rate": 9.632086439931276e-06, + "loss": 0.0537, "step": 949 }, { - "epoch": 0.4219409282700422, - "grad_norm": 0.5923909271714048, - "learning_rate": 9.999097749410561e-06, - "loss": 0.0739, + "epoch": 0.8436944937833037, + "grad_norm": 0.5078653076714996, + "learning_rate": 9.630625877000333e-06, + "loss": 0.0719, "step": 950 }, { - "epoch": 0.4223850766155896, - "grad_norm": 0.8626478233669734, - "learning_rate": 9.999060548239757e-06, - "loss": 0.085, + "epoch": 0.844582593250444, + "grad_norm": 0.6428732000326673, + "learning_rate": 9.629162531883069e-06, + "loss": 0.0592, "step": 951 }, { - "epoch": 0.422829224961137, - "grad_norm": 1.2157299829969963, - "learning_rate": 9.999022595625233e-06, - "loss": 0.0927, + "epoch": 0.8454706927175843, + "grad_norm": 0.7805808347561376, + "learning_rate": 9.6276964054587e-06, + "loss": 0.0793, "step": 952 }, { - "epoch": 0.4232733733066844, - "grad_norm": 0.8683032499306118, - "learning_rate": 9.998983891572693e-06, - "loss": 0.0857, + "epoch": 0.8463587921847247, + "grad_norm": 0.48470957339414594, + "learning_rate": 9.626227498608106e-06, + "loss": 0.0759, "step": 953 }, { - "epoch": 0.42371752165223187, - "grad_norm": 0.7958109904631795, - "learning_rate": 9.998944436087956e-06, - "loss": 0.0848, + "epoch": 0.8472468916518651, + "grad_norm": 0.5914749471229294, + "learning_rate": 9.624755812213842e-06, + "loss": 0.0731, "step": 954 }, { - "epoch": 0.42416166999777927, - "grad_norm": 0.8911128877165261, - "learning_rate": 9.998904229176955e-06, - "loss": 0.0684, + "epoch": 0.8481349911190054, + "grad_norm": 0.6355778200039421, + "learning_rate": 9.623281347160129e-06, + "loss": 0.0596, "step": 955 }, { - "epoch": 0.42460581834332667, - "grad_norm": 0.8480526510619464, - "learning_rate": 9.998863270845731e-06, - "loss": 0.0819, + "epoch": 0.8490230905861457, + "grad_norm": 0.6496047766540088, + "learning_rate": 9.621804104332859e-06, + "loss": 0.0866, "step": 956 }, { - "epoch": 0.42504996668887407, - "grad_norm": 1.1640161965795854, - "learning_rate": 9.99882156110044e-06, - "loss": 0.1114, + "epoch": 0.849911190053286, + "grad_norm": 0.6056478307229467, + "learning_rate": 9.620324084619597e-06, + "loss": 0.0613, "step": 957 }, { - "epoch": 0.42549411503442147, - "grad_norm": 0.7232203452809441, - "learning_rate": 9.998779099947356e-06, - "loss": 0.0598, + "epoch": 0.8507992895204263, + "grad_norm": 0.546281173408732, + "learning_rate": 9.618841288909568e-06, + "loss": 0.0635, "step": 958 }, { - "epoch": 0.42593826337996893, - "grad_norm": 0.9706452342240306, - "learning_rate": 9.998735887392858e-06, - "loss": 0.083, + "epoch": 0.8516873889875666, + "grad_norm": 0.5559378548585396, + "learning_rate": 9.617355718093673e-06, + "loss": 0.0707, "step": 959 }, { - "epoch": 0.42638241172551633, - "grad_norm": 0.9335641087127532, - "learning_rate": 9.998691923443442e-06, - "loss": 0.0866, + "epoch": 0.8525754884547069, + "grad_norm": 0.5567536912334861, + "learning_rate": 9.615867373064476e-06, + "loss": 0.0826, "step": 960 }, { - "epoch": 0.42682656007106373, - "grad_norm": 0.6482813505555395, - "learning_rate": 9.998647208105717e-06, - "loss": 0.0792, + "epoch": 0.8534635879218473, + "grad_norm": 0.42827668716372647, + "learning_rate": 9.614376254716209e-06, + "loss": 0.0551, "step": 961 }, { - "epoch": 0.42727070841661113, - "grad_norm": 1.0124751766766096, - "learning_rate": 9.998601741386404e-06, - "loss": 0.0981, + "epoch": 0.8543516873889876, + "grad_norm": 0.6093863639678927, + "learning_rate": 9.612882363944772e-06, + "loss": 0.0747, "step": 962 }, { - "epoch": 0.42771485676215854, - "grad_norm": 0.8713994807005938, - "learning_rate": 9.998555523292338e-06, - "loss": 0.112, + "epoch": 0.8552397868561279, + "grad_norm": 0.6073144538624808, + "learning_rate": 9.611385701647724e-06, + "loss": 0.0874, "step": 963 }, { - "epoch": 0.428159005107706, - "grad_norm": 0.7341323526220324, - "learning_rate": 9.998508553830468e-06, - "loss": 0.079, + "epoch": 0.8561278863232682, + "grad_norm": 0.44196140894399816, + "learning_rate": 9.609886268724299e-06, + "loss": 0.0635, "step": 964 }, { - "epoch": 0.4286031534532534, - "grad_norm": 0.7729393765262612, - "learning_rate": 9.99846083300785e-06, - "loss": 0.0783, + "epoch": 0.8570159857904085, + "grad_norm": 0.5774340908065915, + "learning_rate": 9.608384066075387e-06, + "loss": 0.0656, "step": 965 }, { - "epoch": 0.4290473017988008, - "grad_norm": 0.9156169394277074, - "learning_rate": 9.99841236083166e-06, - "loss": 0.0806, + "epoch": 0.8579040852575488, + "grad_norm": 0.44605257552608013, + "learning_rate": 9.60687909460355e-06, + "loss": 0.0652, "step": 966 }, { - "epoch": 0.4294914501443482, - "grad_norm": 0.8551663881257657, - "learning_rate": 9.998363137309187e-06, - "loss": 0.0618, + "epoch": 0.8587921847246892, + "grad_norm": 0.4880651495248036, + "learning_rate": 9.605371355213006e-06, + "loss": 0.0644, "step": 967 }, { - "epoch": 0.4299355984898956, - "grad_norm": 0.5370934826053735, - "learning_rate": 9.998313162447824e-06, - "loss": 0.0652, + "epoch": 0.8596802841918295, + "grad_norm": 0.5163769073738559, + "learning_rate": 9.60386084880964e-06, + "loss": 0.0651, "step": 968 }, { - "epoch": 0.43037974683544306, - "grad_norm": 0.7190678146601934, - "learning_rate": 9.998262436255087e-06, - "loss": 0.0574, + "epoch": 0.8605683836589698, + "grad_norm": 0.484318416820972, + "learning_rate": 9.602347576300998e-06, + "loss": 0.0693, "step": 969 }, { - "epoch": 0.43082389518099046, - "grad_norm": 0.790706849866094, - "learning_rate": 9.998210958738601e-06, - "loss": 0.0805, + "epoch": 0.8614564831261101, + "grad_norm": 0.6745913675404646, + "learning_rate": 9.600831538596294e-06, + "loss": 0.069, "step": 970 }, { - "epoch": 0.43126804352653786, - "grad_norm": 0.7463621831379682, - "learning_rate": 9.998158729906102e-06, - "loss": 0.0712, + "epoch": 0.8623445825932504, + "grad_norm": 0.5244770412855654, + "learning_rate": 9.599312736606393e-06, + "loss": 0.0686, "step": 971 }, { - "epoch": 0.43171219187208526, - "grad_norm": 1.0561578156156401, - "learning_rate": 9.998105749765444e-06, - "loss": 0.0901, + "epoch": 0.8632326820603907, + "grad_norm": 0.5117748422489878, + "learning_rate": 9.597791171243826e-06, + "loss": 0.0695, "step": 972 }, { - "epoch": 0.43215634021763266, - "grad_norm": 0.7180209621862901, - "learning_rate": 9.998052018324586e-06, - "loss": 0.0687, + "epoch": 0.8641207815275311, + "grad_norm": 0.4807594270645188, + "learning_rate": 9.596266843422786e-06, + "loss": 0.0655, "step": 973 }, { - "epoch": 0.4326004885631801, - "grad_norm": 0.8430240030491031, - "learning_rate": 9.99799753559161e-06, - "loss": 0.0763, + "epoch": 0.8650088809946714, + "grad_norm": 0.5078261322695101, + "learning_rate": 9.594739754059126e-06, + "loss": 0.0725, "step": 974 }, { - "epoch": 0.4330446369087275, - "grad_norm": 0.6157610719315397, - "learning_rate": 9.997942301574701e-06, - "loss": 0.0679, + "epoch": 0.8658969804618117, + "grad_norm": 0.5907284204895108, + "learning_rate": 9.593209904070353e-06, + "loss": 0.0641, "step": 975 }, { - "epoch": 0.4334887852542749, - "grad_norm": 0.8284454213431681, - "learning_rate": 9.997886316282167e-06, - "loss": 0.0901, + "epoch": 0.866785079928952, + "grad_norm": 0.6051899041617059, + "learning_rate": 9.591677294375637e-06, + "loss": 0.0765, "step": 976 }, { - "epoch": 0.4339329335998223, - "grad_norm": 0.6138908846381919, - "learning_rate": 9.997829579722418e-06, - "loss": 0.068, + "epoch": 0.8676731793960923, + "grad_norm": 0.48009032801043344, + "learning_rate": 9.590141925895806e-06, + "loss": 0.0616, "step": 977 }, { - "epoch": 0.4343770819453698, - "grad_norm": 0.6977507297268527, - "learning_rate": 9.997772091903984e-06, - "loss": 0.0719, + "epoch": 0.8685612788632326, + "grad_norm": 0.6271830421948442, + "learning_rate": 9.588603799553344e-06, + "loss": 0.0668, "step": 978 }, { - "epoch": 0.4348212302909172, - "grad_norm": 0.672879000533416, - "learning_rate": 9.997713852835509e-06, - "loss": 0.0859, + "epoch": 0.8694493783303731, + "grad_norm": 1.143136210805576, + "learning_rate": 9.587062916272395e-06, + "loss": 0.0892, "step": 979 }, { - "epoch": 0.4352653786364646, - "grad_norm": 1.0953915290421439, - "learning_rate": 9.997654862525746e-06, - "loss": 0.0984, + "epoch": 0.8703374777975134, + "grad_norm": 0.4894418333744091, + "learning_rate": 9.585519276978753e-06, + "loss": 0.067, "step": 980 }, { - "epoch": 0.435709526982012, - "grad_norm": 0.7214317597556656, - "learning_rate": 9.997595120983561e-06, - "loss": 0.0788, + "epoch": 0.8712255772646537, + "grad_norm": 0.5167641117377408, + "learning_rate": 9.583972882599873e-06, + "loss": 0.065, "step": 981 }, { - "epoch": 0.4361536753275594, - "grad_norm": 0.6240169495789859, - "learning_rate": 9.997534628217935e-06, - "loss": 0.0533, + "epoch": 0.872113676731794, + "grad_norm": 0.5190912686806686, + "learning_rate": 9.582423734064866e-06, + "loss": 0.0746, "step": 982 }, { - "epoch": 0.43659782367310684, - "grad_norm": 1.0463824152225416, - "learning_rate": 9.997473384237962e-06, - "loss": 0.1256, + "epoch": 0.8730017761989343, + "grad_norm": 0.6206325896144623, + "learning_rate": 9.580871832304495e-06, + "loss": 0.0567, "step": 983 }, { - "epoch": 0.43704197201865425, - "grad_norm": 0.6075177918870873, - "learning_rate": 9.997411389052846e-06, - "loss": 0.0907, + "epoch": 0.8738898756660746, + "grad_norm": 0.5175071446032825, + "learning_rate": 9.579317178251177e-06, + "loss": 0.0632, "step": 984 }, { - "epoch": 0.43748612036420165, - "grad_norm": 0.8030490613184201, - "learning_rate": 9.997348642671906e-06, - "loss": 0.075, + "epoch": 0.8747779751332149, + "grad_norm": 0.5117114338677473, + "learning_rate": 9.577759772838986e-06, + "loss": 0.0606, "step": 985 }, { - "epoch": 0.43793026870974905, - "grad_norm": 0.6431009442762712, - "learning_rate": 9.997285145104578e-06, - "loss": 0.0681, + "epoch": 0.8756660746003553, + "grad_norm": 0.45216716484221064, + "learning_rate": 9.576199617003646e-06, + "loss": 0.0559, "step": 986 }, { - "epoch": 0.43837441705529645, - "grad_norm": 0.7359898616321635, - "learning_rate": 9.997220896360402e-06, - "loss": 0.0668, + "epoch": 0.8765541740674956, + "grad_norm": 0.5279616323617633, + "learning_rate": 9.574636711682534e-06, + "loss": 0.064, "step": 987 }, { - "epoch": 0.4388185654008439, - "grad_norm": 0.9059235668805204, - "learning_rate": 9.997155896449037e-06, - "loss": 0.1006, + "epoch": 0.8774422735346359, + "grad_norm": 0.5771251219977132, + "learning_rate": 9.57307105781468e-06, + "loss": 0.0597, "step": 988 }, { - "epoch": 0.4392627137463913, - "grad_norm": 0.5647469272415054, - "learning_rate": 9.997090145380253e-06, - "loss": 0.0699, + "epoch": 0.8783303730017762, + "grad_norm": 0.4623690839065104, + "learning_rate": 9.571502656340766e-06, + "loss": 0.0563, "step": 989 }, { - "epoch": 0.4397068620919387, - "grad_norm": 0.7349393202965936, - "learning_rate": 9.997023643163937e-06, - "loss": 0.0731, + "epoch": 0.8792184724689165, + "grad_norm": 0.6946879392998936, + "learning_rate": 9.569931508203119e-06, + "loss": 0.0676, "step": 990 }, { - "epoch": 0.4401510104374861, - "grad_norm": 0.6226852410738429, - "learning_rate": 9.996956389810082e-06, - "loss": 0.0663, + "epoch": 0.8801065719360568, + "grad_norm": 0.4585146234886463, + "learning_rate": 9.568357614345726e-06, + "loss": 0.0733, "step": 991 }, { - "epoch": 0.4405951587830335, - "grad_norm": 0.6263169289849984, - "learning_rate": 9.996888385328798e-06, - "loss": 0.0733, + "epoch": 0.8809946714031972, + "grad_norm": 0.6145459902022079, + "learning_rate": 9.566780975714216e-06, + "loss": 0.0685, "step": 992 }, { - "epoch": 0.44103930712858097, - "grad_norm": 0.7290496435666083, - "learning_rate": 9.996819629730305e-06, - "loss": 0.0664, + "epoch": 0.8818827708703375, + "grad_norm": 0.5042544370825427, + "learning_rate": 9.565201593255871e-06, + "loss": 0.0636, "step": 993 }, { - "epoch": 0.4414834554741284, - "grad_norm": 0.6744456519687402, - "learning_rate": 9.996750123024943e-06, - "loss": 0.0637, + "epoch": 0.8827708703374778, + "grad_norm": 0.527895658494514, + "learning_rate": 9.563619467919618e-06, + "loss": 0.064, "step": 994 }, { - "epoch": 0.4419276038196758, - "grad_norm": 0.9231205047824739, - "learning_rate": 9.996679865223157e-06, - "loss": 0.0598, + "epoch": 0.8836589698046181, + "grad_norm": 0.8562131843407302, + "learning_rate": 9.562034600656037e-06, + "loss": 0.0686, "step": 995 }, { - "epoch": 0.4423717521652232, - "grad_norm": 0.6917241860836084, - "learning_rate": 9.99660885633551e-06, - "loss": 0.07, + "epoch": 0.8845470692717584, + "grad_norm": 0.6663629069562428, + "learning_rate": 9.560446992417352e-06, + "loss": 0.0712, "step": 996 }, { - "epoch": 0.4428159005107706, - "grad_norm": 1.0027314643568717, - "learning_rate": 9.996537096372672e-06, - "loss": 0.0998, + "epoch": 0.8854351687388987, + "grad_norm": 0.5187613898577973, + "learning_rate": 9.558856644157432e-06, + "loss": 0.0772, "step": 997 }, { - "epoch": 0.44326004885631803, - "grad_norm": 0.7262325240442806, - "learning_rate": 9.996464585345433e-06, - "loss": 0.0703, + "epoch": 0.8863232682060391, + "grad_norm": 0.47580394813478066, + "learning_rate": 9.557263556831797e-06, + "loss": 0.0527, "step": 998 }, { - "epoch": 0.44370419720186544, - "grad_norm": 0.7680770645535525, - "learning_rate": 9.996391323264693e-06, - "loss": 0.0884, + "epoch": 0.8872113676731794, + "grad_norm": 0.7369218631811545, + "learning_rate": 9.55566773139761e-06, + "loss": 0.0916, "step": 999 }, { - "epoch": 0.44414834554741284, - "grad_norm": 0.7559490106951665, - "learning_rate": 9.996317310141462e-06, - "loss": 0.0885, + "epoch": 0.8880994671403197, + "grad_norm": 0.5202148270559931, + "learning_rate": 9.55406916881368e-06, + "loss": 0.0719, "step": 1000 }, { - "epoch": 0.44459249389296024, - "grad_norm": 0.648213091365164, - "learning_rate": 9.996242545986868e-06, - "loss": 0.0657, + "epoch": 0.88898756660746, + "grad_norm": 0.4451803025522105, + "learning_rate": 9.55246787004046e-06, + "loss": 0.0651, "step": 1001 }, { - "epoch": 0.44503664223850764, - "grad_norm": 0.941859001821441, - "learning_rate": 9.996167030812146e-06, - "loss": 0.0771, + "epoch": 0.8898756660746003, + "grad_norm": 0.6365441468045436, + "learning_rate": 9.550863836040046e-06, + "loss": 0.0791, "step": 1002 }, { - "epoch": 0.4454807905840551, - "grad_norm": 0.9336722754874243, - "learning_rate": 9.996090764628649e-06, - "loss": 0.062, + "epoch": 0.8907637655417406, + "grad_norm": 0.6417144379958832, + "learning_rate": 9.549257067776179e-06, + "loss": 0.0648, "step": 1003 }, { - "epoch": 0.4459249389296025, - "grad_norm": 0.6689537581560581, - "learning_rate": 9.996013747447844e-06, - "loss": 0.0676, + "epoch": 0.8916518650088809, + "grad_norm": 0.5006334639118971, + "learning_rate": 9.547647566214242e-06, + "loss": 0.0782, "step": 1004 }, { - "epoch": 0.4463690872751499, - "grad_norm": 0.6868609219124203, - "learning_rate": 9.995935979281304e-06, - "loss": 0.0698, + "epoch": 0.8925399644760214, + "grad_norm": 0.6095277759149481, + "learning_rate": 9.546035332321261e-06, + "loss": 0.063, "step": 1005 }, { - "epoch": 0.4468132356206973, - "grad_norm": 0.7598513445760847, - "learning_rate": 9.995857460140719e-06, - "loss": 0.0663, + "epoch": 0.8934280639431617, + "grad_norm": 0.67774395122714, + "learning_rate": 9.544420367065905e-06, + "loss": 0.0716, "step": 1006 }, { - "epoch": 0.4472573839662447, - "grad_norm": 0.8425846771969293, - "learning_rate": 9.995778190037893e-06, - "loss": 0.0709, + "epoch": 0.894316163410302, + "grad_norm": 0.580267254473296, + "learning_rate": 9.542802671418479e-06, + "loss": 0.078, "step": 1007 }, { - "epoch": 0.44770153231179216, - "grad_norm": 0.7973802310676283, - "learning_rate": 9.995698168984743e-06, - "loss": 0.0669, + "epoch": 0.8952042628774423, + "grad_norm": 0.7461816578324983, + "learning_rate": 9.54118224635093e-06, + "loss": 0.0813, "step": 1008 }, { - "epoch": 0.44814568065733956, - "grad_norm": 0.8057530371483355, - "learning_rate": 9.995617396993297e-06, - "loss": 0.105, + "epoch": 0.8960923623445826, + "grad_norm": 0.5764943758208013, + "learning_rate": 9.539559092836853e-06, + "loss": 0.0668, "step": 1009 }, { - "epoch": 0.44858982900288696, - "grad_norm": 0.698450418542813, - "learning_rate": 9.995535874075692e-06, - "loss": 0.0886, + "epoch": 0.8969804618117229, + "grad_norm": 0.5226817090572494, + "learning_rate": 9.53793321185147e-06, + "loss": 0.0665, "step": 1010 }, { - "epoch": 0.44903397734843437, - "grad_norm": 0.6685872712573397, - "learning_rate": 9.99545360024419e-06, - "loss": 0.097, + "epoch": 0.8978685612788633, + "grad_norm": 0.671609926008543, + "learning_rate": 9.536304604371653e-06, + "loss": 0.0601, "step": 1011 }, { - "epoch": 0.44947812569398177, - "grad_norm": 0.7945297170711836, - "learning_rate": 9.995370575511151e-06, - "loss": 0.0841, + "epoch": 0.8987566607460036, + "grad_norm": 0.5644239599358581, + "learning_rate": 9.534673271375902e-06, + "loss": 0.0831, "step": 1012 }, { - "epoch": 0.4499222740395292, - "grad_norm": 0.6520316868875402, - "learning_rate": 9.99528679988906e-06, - "loss": 0.0787, + "epoch": 0.8996447602131439, + "grad_norm": 0.5245491734026599, + "learning_rate": 9.53303921384436e-06, + "loss": 0.0652, "step": 1013 }, { - "epoch": 0.4503664223850766, - "grad_norm": 0.572444172084648, - "learning_rate": 9.995202273390505e-06, - "loss": 0.0594, + "epoch": 0.9005328596802842, + "grad_norm": 0.735750041713909, + "learning_rate": 9.53140243275881e-06, + "loss": 0.0807, "step": 1014 }, { - "epoch": 0.450810570730624, - "grad_norm": 0.6063370258668184, - "learning_rate": 9.995116996028197e-06, - "loss": 0.0683, + "epoch": 0.9014209591474245, + "grad_norm": 0.6719349064728679, + "learning_rate": 9.529762929102662e-06, + "loss": 0.0673, "step": 1015 }, { - "epoch": 0.45125471907617143, - "grad_norm": 0.7022055879284854, - "learning_rate": 9.995030967814952e-06, - "loss": 0.0828, + "epoch": 0.9023090586145648, + "grad_norm": 0.46147220043042025, + "learning_rate": 9.528120703860971e-06, + "loss": 0.0552, "step": 1016 }, { - "epoch": 0.45169886742171883, - "grad_norm": 0.6787805656005202, - "learning_rate": 9.994944188763701e-06, - "loss": 0.0886, + "epoch": 0.9031971580817052, + "grad_norm": 0.6362605578497083, + "learning_rate": 9.526475758020424e-06, + "loss": 0.0662, "step": 1017 }, { - "epoch": 0.4521430157672663, - "grad_norm": 0.6743594234424202, - "learning_rate": 9.994856658887491e-06, - "loss": 0.0858, + "epoch": 0.9040852575488455, + "grad_norm": 0.6092023673577541, + "learning_rate": 9.52482809256934e-06, + "loss": 0.0696, "step": 1018 }, { - "epoch": 0.4525871641128137, - "grad_norm": 0.701751582175111, - "learning_rate": 9.994768378199476e-06, - "loss": 0.0877, + "epoch": 0.9049733570159858, + "grad_norm": 0.5550600604822477, + "learning_rate": 9.523177708497677e-06, + "loss": 0.0674, "step": 1019 }, { - "epoch": 0.4530313124583611, - "grad_norm": 0.7777579093243342, - "learning_rate": 9.994679346712927e-06, - "loss": 0.0809, + "epoch": 0.9058614564831261, + "grad_norm": 0.689868458127882, + "learning_rate": 9.521524606797021e-06, + "loss": 0.0672, "step": 1020 }, { - "epoch": 0.4534754608039085, - "grad_norm": 0.7407021400306011, - "learning_rate": 9.994589564441229e-06, - "loss": 0.0667, + "epoch": 0.9067495559502664, + "grad_norm": 0.5355417556080468, + "learning_rate": 9.519868788460594e-06, + "loss": 0.0597, "step": 1021 }, { - "epoch": 0.4539196091494559, - "grad_norm": 0.8640591667177118, - "learning_rate": 9.994499031397874e-06, - "loss": 0.0779, + "epoch": 0.9076376554174067, + "grad_norm": 0.4786559073365717, + "learning_rate": 9.518210254483252e-06, + "loss": 0.0608, "step": 1022 }, { - "epoch": 0.45436375749500335, - "grad_norm": 0.741626283806636, - "learning_rate": 9.994407747596474e-06, - "loss": 0.0598, + "epoch": 0.9085257548845471, + "grad_norm": 0.5638304194002016, + "learning_rate": 9.516549005861477e-06, + "loss": 0.0749, "step": 1023 }, { - "epoch": 0.45480790584055075, - "grad_norm": 0.7426285442565452, - "learning_rate": 9.994315713050749e-06, - "loss": 0.0763, + "epoch": 0.9094138543516874, + "grad_norm": 0.5388374617346837, + "learning_rate": 9.514885043593387e-06, + "loss": 0.0718, "step": 1024 }, { - "epoch": 0.45525205418609815, - "grad_norm": 1.1459706806907142, - "learning_rate": 9.994222927774535e-06, - "loss": 0.0928, + "epoch": 0.9103019538188277, + "grad_norm": 0.46101867467829, + "learning_rate": 9.513218368678727e-06, + "loss": 0.0646, "step": 1025 }, { - "epoch": 0.45569620253164556, - "grad_norm": 0.9102603719521771, - "learning_rate": 9.994129391781777e-06, - "loss": 0.0695, + "epoch": 0.911190053285968, + "grad_norm": 0.5702452001729812, + "learning_rate": 9.511548982118876e-06, + "loss": 0.07, "step": 1026 }, { - "epoch": 0.45614035087719296, - "grad_norm": 0.8406791604735616, - "learning_rate": 9.994035105086536e-06, - "loss": 0.0655, + "epoch": 0.9120781527531083, + "grad_norm": 0.5077496863804494, + "learning_rate": 9.50987688491684e-06, + "loss": 0.0631, "step": 1027 }, { - "epoch": 0.4565844992227404, - "grad_norm": 0.6764230445649027, - "learning_rate": 9.993940067702985e-06, - "loss": 0.0566, + "epoch": 0.9129662522202486, + "grad_norm": 0.6669979779374497, + "learning_rate": 9.508202078077253e-06, + "loss": 0.0641, "step": 1028 }, { - "epoch": 0.4570286475682878, - "grad_norm": 1.1241952343326866, - "learning_rate": 9.993844279645411e-06, - "loss": 0.1138, + "epoch": 0.9138543516873889, + "grad_norm": 0.5372617408127478, + "learning_rate": 9.506524562606373e-06, + "loss": 0.058, "step": 1029 }, { - "epoch": 0.4574727959138352, - "grad_norm": 0.8203260077691763, - "learning_rate": 9.993747740928207e-06, - "loss": 0.0639, + "epoch": 0.9147424511545293, + "grad_norm": 0.5388729086198248, + "learning_rate": 9.504844339512096e-06, + "loss": 0.0726, "step": 1030 }, { - "epoch": 0.4579169442593826, - "grad_norm": 0.9349985900242503, - "learning_rate": 9.993650451565892e-06, - "loss": 0.078, + "epoch": 0.9156305506216696, + "grad_norm": 0.6114003454684426, + "learning_rate": 9.503161409803936e-06, + "loss": 0.0711, "step": 1031 }, { - "epoch": 0.45836109260493, - "grad_norm": 0.9978670067027727, - "learning_rate": 9.993552411573088e-06, - "loss": 0.0952, + "epoch": 0.91651865008881, + "grad_norm": 0.4485342434354438, + "learning_rate": 9.501475774493034e-06, + "loss": 0.0636, "step": 1032 }, { - "epoch": 0.4588052409504775, - "grad_norm": 1.1441082866740984, - "learning_rate": 9.993453620964529e-06, - "loss": 0.0897, + "epoch": 0.9174067495559503, + "grad_norm": 0.6828271051823804, + "learning_rate": 9.499787434592162e-06, + "loss": 0.0732, "step": 1033 }, { - "epoch": 0.4592493892960249, - "grad_norm": 1.0696466066343144, - "learning_rate": 9.993354079755066e-06, - "loss": 0.0903, + "epoch": 0.9182948490230906, + "grad_norm": 0.599305022824422, + "learning_rate": 9.49809639111571e-06, + "loss": 0.0727, "step": 1034 }, { - "epoch": 0.4596935376415723, - "grad_norm": 0.6531078727573577, - "learning_rate": 9.993253787959664e-06, - "loss": 0.0673, + "epoch": 0.9191829484902309, + "grad_norm": 0.5383385780080625, + "learning_rate": 9.4964026450797e-06, + "loss": 0.0555, "step": 1035 }, { - "epoch": 0.4601376859871197, - "grad_norm": 1.4198972391798237, - "learning_rate": 9.993152745593398e-06, - "loss": 0.083, + "epoch": 0.9200710479573713, + "grad_norm": 0.47192921256501275, + "learning_rate": 9.494706197501768e-06, + "loss": 0.0596, "step": 1036 }, { - "epoch": 0.4605818343326671, - "grad_norm": 1.070478952947314, - "learning_rate": 9.993050952671453e-06, - "loss": 0.0687, + "epoch": 0.9209591474245116, + "grad_norm": 0.46543843473849966, + "learning_rate": 9.493007049401182e-06, + "loss": 0.065, "step": 1037 }, { - "epoch": 0.46102598267821454, - "grad_norm": 0.7417628375073222, - "learning_rate": 9.992948409209134e-06, - "loss": 0.0802, + "epoch": 0.9218472468916519, + "grad_norm": 0.4809339614623937, + "learning_rate": 9.491305201798829e-06, + "loss": 0.0611, "step": 1038 }, { - "epoch": 0.46147013102376194, - "grad_norm": 0.6201081139409721, - "learning_rate": 9.992845115221855e-06, - "loss": 0.0612, + "epoch": 0.9227353463587922, + "grad_norm": 0.5478524759971349, + "learning_rate": 9.489600655717217e-06, + "loss": 0.0664, "step": 1039 }, { - "epoch": 0.46191427936930934, - "grad_norm": 0.8344957050457038, - "learning_rate": 9.992741070725137e-06, - "loss": 0.0897, + "epoch": 0.9236234458259325, + "grad_norm": 0.5590802808369877, + "learning_rate": 9.487893412180478e-06, + "loss": 0.0776, "step": 1040 }, { - "epoch": 0.46235842771485675, - "grad_norm": 0.5628453325687238, - "learning_rate": 9.992636275734629e-06, - "loss": 0.069, + "epoch": 0.9245115452930728, + "grad_norm": 0.5249819885631613, + "learning_rate": 9.48618347221436e-06, + "loss": 0.0654, "step": 1041 }, { - "epoch": 0.4628025760604042, - "grad_norm": 0.6969650084016832, - "learning_rate": 9.992530730266078e-06, - "loss": 0.073, + "epoch": 0.9253996447602132, + "grad_norm": 0.6157061724908073, + "learning_rate": 9.48447083684624e-06, + "loss": 0.0779, "step": 1042 }, { - "epoch": 0.4632467244059516, - "grad_norm": 1.0860876745728663, - "learning_rate": 9.992424434335348e-06, - "loss": 0.0921, + "epoch": 0.9262877442273535, + "grad_norm": 0.5191446647526017, + "learning_rate": 9.482755507105102e-06, + "loss": 0.0693, "step": 1043 }, { - "epoch": 0.463690872751499, - "grad_norm": 0.5795193360907794, - "learning_rate": 9.99231738795842e-06, - "loss": 0.0504, + "epoch": 0.9271758436944938, + "grad_norm": 0.5290036118271478, + "learning_rate": 9.48103748402156e-06, + "loss": 0.0748, "step": 1044 }, { - "epoch": 0.4641350210970464, - "grad_norm": 0.6583352279848995, - "learning_rate": 9.992209591151386e-06, - "loss": 0.0831, + "epoch": 0.9280639431616341, + "grad_norm": 0.4281662222413074, + "learning_rate": 9.47931676862784e-06, + "loss": 0.0578, "step": 1045 }, { - "epoch": 0.4645791694425938, - "grad_norm": 0.5908927184246742, - "learning_rate": 9.992101043930444e-06, - "loss": 0.0996, + "epoch": 0.9289520426287744, + "grad_norm": 0.6704581542680114, + "learning_rate": 9.477593361957786e-06, + "loss": 0.0845, "step": 1046 }, { - "epoch": 0.46502331778814127, - "grad_norm": 0.7629134106102797, - "learning_rate": 9.991991746311916e-06, - "loss": 0.0595, + "epoch": 0.9298401420959147, + "grad_norm": 0.5123744289336686, + "learning_rate": 9.475867265046865e-06, + "loss": 0.0716, "step": 1047 }, { - "epoch": 0.46546746613368867, - "grad_norm": 0.8529582668397104, - "learning_rate": 9.991881698312229e-06, - "loss": 0.076, + "epoch": 0.9307282415630551, + "grad_norm": 0.48433634155634114, + "learning_rate": 9.47413847893215e-06, + "loss": 0.0634, "step": 1048 }, { - "epoch": 0.46591161447923607, - "grad_norm": 0.6804037477070127, - "learning_rate": 9.991770899947925e-06, - "loss": 0.075, + "epoch": 0.9316163410301954, + "grad_norm": 0.728396879512879, + "learning_rate": 9.472407004652337e-06, + "loss": 0.0636, "step": 1049 }, { - "epoch": 0.46635576282478347, - "grad_norm": 0.8612466864991256, - "learning_rate": 9.991659351235662e-06, - "loss": 0.0796, + "epoch": 0.9325044404973357, + "grad_norm": 0.41668367278311613, + "learning_rate": 9.470672843247739e-06, + "loss": 0.0514, "step": 1050 }, { - "epoch": 0.4667999111703309, - "grad_norm": 0.6453565757322578, - "learning_rate": 9.991547052192203e-06, - "loss": 0.0625, + "epoch": 0.933392539964476, + "grad_norm": 0.48971811331996123, + "learning_rate": 9.468935995760275e-06, + "loss": 0.0639, "step": 1051 }, { - "epoch": 0.46724405951587833, - "grad_norm": 1.7133852156999947, - "learning_rate": 9.99143400283443e-06, - "loss": 0.097, + "epoch": 0.9342806394316163, + "grad_norm": 0.5546859724046497, + "learning_rate": 9.467196463233488e-06, + "loss": 0.0558, "step": 1052 }, { - "epoch": 0.46768820786142573, - "grad_norm": 0.7043109079290237, - "learning_rate": 9.991320203179338e-06, - "loss": 0.0556, + "epoch": 0.9351687388987566, + "grad_norm": 0.658139536168307, + "learning_rate": 9.465454246712524e-06, + "loss": 0.0709, "step": 1053 }, { - "epoch": 0.46813235620697313, - "grad_norm": 0.9119065164949084, - "learning_rate": 9.991205653244032e-06, - "loss": 0.0618, + "epoch": 0.9360568383658969, + "grad_norm": 0.623885610850787, + "learning_rate": 9.463709347244147e-06, + "loss": 0.0679, "step": 1054 }, { - "epoch": 0.46857650455252053, - "grad_norm": 0.5740417394473022, - "learning_rate": 9.991090353045729e-06, - "loss": 0.0492, + "epoch": 0.9369449378330373, + "grad_norm": 0.46245415470167184, + "learning_rate": 9.461961765876739e-06, + "loss": 0.0532, "step": 1055 }, { - "epoch": 0.46902065289806794, - "grad_norm": 0.8813556475853607, - "learning_rate": 9.990974302601762e-06, - "loss": 0.0603, + "epoch": 0.9378330373001776, + "grad_norm": 0.4949200069813079, + "learning_rate": 9.46021150366028e-06, + "loss": 0.0693, "step": 1056 }, { - "epoch": 0.4694648012436154, - "grad_norm": 0.6951634771023119, - "learning_rate": 9.990857501929577e-06, - "loss": 0.0641, + "epoch": 0.9387211367673179, + "grad_norm": 0.4441815788870761, + "learning_rate": 9.45845856164637e-06, + "loss": 0.0584, "step": 1057 }, { - "epoch": 0.4699089495891628, - "grad_norm": 0.9703272601786429, - "learning_rate": 9.990739951046729e-06, - "loss": 0.1064, + "epoch": 0.9396092362344582, + "grad_norm": 0.4435068922625452, + "learning_rate": 9.456702940888219e-06, + "loss": 0.0662, "step": 1058 }, { - "epoch": 0.4703530979347102, - "grad_norm": 1.1076310196170094, - "learning_rate": 9.99062164997089e-06, - "loss": 0.077, + "epoch": 0.9404973357015985, + "grad_norm": 0.43640362396963367, + "learning_rate": 9.45494464244064e-06, + "loss": 0.0557, "step": 1059 }, { - "epoch": 0.4707972462802576, - "grad_norm": 0.813678875992106, - "learning_rate": 9.990502598719837e-06, - "loss": 0.0579, + "epoch": 0.9413854351687388, + "grad_norm": 0.6458370718179861, + "learning_rate": 9.453183667360062e-06, + "loss": 0.0677, "step": 1060 }, { - "epoch": 0.471241394625805, - "grad_norm": 0.8445949631628347, - "learning_rate": 9.990382797311474e-06, - "loss": 0.0922, + "epoch": 0.9422735346358793, + "grad_norm": 0.5635397337457626, + "learning_rate": 9.451420016704518e-06, + "loss": 0.0619, "step": 1061 }, { - "epoch": 0.47168554297135246, - "grad_norm": 0.6432477880049939, - "learning_rate": 9.990262245763802e-06, - "loss": 0.066, + "epoch": 0.9431616341030196, + "grad_norm": 0.47572955056498234, + "learning_rate": 9.449653691533651e-06, + "loss": 0.0572, "step": 1062 }, { - "epoch": 0.47212969131689986, - "grad_norm": 1.0190163753358932, - "learning_rate": 9.990140944094946e-06, - "loss": 0.1059, + "epoch": 0.9440497335701599, + "grad_norm": 0.6126902376241518, + "learning_rate": 9.44788469290871e-06, + "loss": 0.0594, "step": 1063 }, { - "epoch": 0.47257383966244726, - "grad_norm": 0.9736504358272349, - "learning_rate": 9.990018892323138e-06, - "loss": 0.0716, + "epoch": 0.9449378330373002, + "grad_norm": 0.5635487410460365, + "learning_rate": 9.446113021892549e-06, + "loss": 0.0649, "step": 1064 }, { - "epoch": 0.47301798800799466, - "grad_norm": 0.8551545600086438, - "learning_rate": 9.989896090466725e-06, - "loss": 0.0721, + "epoch": 0.9458259325044405, + "grad_norm": 0.577763532992864, + "learning_rate": 9.444338679549627e-06, + "loss": 0.0715, "step": 1065 }, { - "epoch": 0.47346213635354206, - "grad_norm": 0.7846245356864615, - "learning_rate": 9.989772538544167e-06, - "loss": 0.0837, + "epoch": 0.9467140319715808, + "grad_norm": 0.7331856587215069, + "learning_rate": 9.442561666946013e-06, + "loss": 0.0591, "step": 1066 }, { - "epoch": 0.4739062846990895, - "grad_norm": 0.6875499187875691, - "learning_rate": 9.989648236574035e-06, - "loss": 0.0604, + "epoch": 0.9476021314387212, + "grad_norm": 0.5643191285005896, + "learning_rate": 9.440781985149375e-06, + "loss": 0.0651, "step": 1067 }, { - "epoch": 0.4743504330446369, - "grad_norm": 1.010871584074772, - "learning_rate": 9.989523184575013e-06, - "loss": 0.0836, + "epoch": 0.9484902309058615, + "grad_norm": 0.5165665085008259, + "learning_rate": 9.438999635228985e-06, + "loss": 0.066, "step": 1068 }, { - "epoch": 0.4747945813901843, - "grad_norm": 0.8312720125551346, - "learning_rate": 9.989397382565898e-06, - "loss": 0.0712, + "epoch": 0.9493783303730018, + "grad_norm": 0.6171838638410556, + "learning_rate": 9.437214618255725e-06, + "loss": 0.0637, "step": 1069 }, { - "epoch": 0.4752387297357317, - "grad_norm": 0.96789104638543, - "learning_rate": 9.989270830565603e-06, - "loss": 0.0784, + "epoch": 0.9502664298401421, + "grad_norm": 0.4693377019678983, + "learning_rate": 9.435426935302068e-06, + "loss": 0.0588, "step": 1070 }, { - "epoch": 0.4756828780812791, - "grad_norm": 0.7404878797473026, - "learning_rate": 9.989143528593149e-06, - "loss": 0.0872, + "epoch": 0.9511545293072824, + "grad_norm": 0.620246955872732, + "learning_rate": 9.4336365874421e-06, + "loss": 0.0589, "step": 1071 }, { - "epoch": 0.4761270264268266, - "grad_norm": 0.6280381360800792, - "learning_rate": 9.98901547666767e-06, - "loss": 0.0661, + "epoch": 0.9520426287744227, + "grad_norm": 0.5373583216159635, + "learning_rate": 9.4318435757515e-06, + "loss": 0.0735, "step": 1072 }, { - "epoch": 0.476571174772374, - "grad_norm": 0.8234412511968835, - "learning_rate": 9.988886674808418e-06, - "loss": 0.0621, + "epoch": 0.9529307282415631, + "grad_norm": 0.5173177306742143, + "learning_rate": 9.43004790130755e-06, + "loss": 0.0587, "step": 1073 }, { - "epoch": 0.4770153231179214, - "grad_norm": 0.5856951522424453, - "learning_rate": 9.988757123034753e-06, - "loss": 0.0657, + "epoch": 0.9538188277087034, + "grad_norm": 0.5668213584644644, + "learning_rate": 9.428249565189136e-06, + "loss": 0.0715, "step": 1074 }, { - "epoch": 0.4774594714634688, - "grad_norm": 0.9863728037623454, - "learning_rate": 9.988626821366147e-06, - "loss": 0.0787, + "epoch": 0.9547069271758437, + "grad_norm": 0.5034740577735468, + "learning_rate": 9.426448568476736e-06, + "loss": 0.0689, "step": 1075 }, { - "epoch": 0.4779036198090162, - "grad_norm": 0.8043968430169456, - "learning_rate": 9.988495769822188e-06, - "loss": 0.0712, + "epoch": 0.955595026642984, + "grad_norm": 0.5051585288129055, + "learning_rate": 9.424644912252434e-06, + "loss": 0.0611, "step": 1076 }, { - "epoch": 0.47834776815456365, - "grad_norm": 0.7330127963179667, - "learning_rate": 9.988363968422577e-06, - "loss": 0.0749, + "epoch": 0.9564831261101243, + "grad_norm": 0.3813464929806569, + "learning_rate": 9.422838597599904e-06, + "loss": 0.0503, "step": 1077 }, { - "epoch": 0.47879191650011105, - "grad_norm": 0.8101029322049607, - "learning_rate": 9.988231417187122e-06, - "loss": 0.0901, + "epoch": 0.9573712255772646, + "grad_norm": 0.4715908502621322, + "learning_rate": 9.421029625604423e-06, + "loss": 0.0626, "step": 1078 }, { - "epoch": 0.47923606484565845, - "grad_norm": 0.9467235647843469, - "learning_rate": 9.98809811613575e-06, - "loss": 0.0923, + "epoch": 0.9582593250444049, + "grad_norm": 0.5345699657625882, + "learning_rate": 9.419217997352863e-06, + "loss": 0.0597, "step": 1079 }, { - "epoch": 0.47968021319120585, - "grad_norm": 0.7955200227969057, - "learning_rate": 9.9879640652885e-06, - "loss": 0.0802, + "epoch": 0.9591474245115453, + "grad_norm": 0.5450263154606977, + "learning_rate": 9.417403713933692e-06, + "loss": 0.0764, "step": 1080 }, { - "epoch": 0.48012436153675325, - "grad_norm": 1.0074849821784144, - "learning_rate": 9.987829264665518e-06, - "loss": 0.1319, + "epoch": 0.9600355239786856, + "grad_norm": 0.5583774630425197, + "learning_rate": 9.415586776436973e-06, + "loss": 0.0706, "step": 1081 }, { - "epoch": 0.4805685098823007, - "grad_norm": 0.680242996169655, - "learning_rate": 9.98769371428707e-06, - "loss": 0.0659, + "epoch": 0.9609236234458259, + "grad_norm": 0.5041062859022016, + "learning_rate": 9.413767185954365e-06, + "loss": 0.0594, "step": 1082 }, { - "epoch": 0.4810126582278481, - "grad_norm": 1.1307628357255757, - "learning_rate": 9.98755741417353e-06, - "loss": 0.0962, + "epoch": 0.9618117229129662, + "grad_norm": 0.46181940102981356, + "learning_rate": 9.411944943579117e-06, + "loss": 0.0726, "step": 1083 }, { - "epoch": 0.4814568065733955, - "grad_norm": 0.7637117418782801, - "learning_rate": 9.987420364345388e-06, - "loss": 0.0596, + "epoch": 0.9626998223801065, + "grad_norm": 0.487345534214178, + "learning_rate": 9.410120050406075e-06, + "loss": 0.0635, "step": 1084 }, { - "epoch": 0.4819009549189429, - "grad_norm": 0.8434389453780773, - "learning_rate": 9.987282564823242e-06, - "loss": 0.0832, + "epoch": 0.9635879218472468, + "grad_norm": 0.5441846328667687, + "learning_rate": 9.408292507531679e-06, + "loss": 0.0583, "step": 1085 }, { - "epoch": 0.4823451032644903, - "grad_norm": 1.3802825505619392, - "learning_rate": 9.98714401562781e-06, - "loss": 0.1092, + "epoch": 0.9644760213143873, + "grad_norm": 0.5494892826459017, + "learning_rate": 9.406462316053954e-06, + "loss": 0.064, "step": 1086 }, { - "epoch": 0.4827892516100378, - "grad_norm": 0.7526559659659204, - "learning_rate": 9.987004716779914e-06, - "loss": 0.0649, + "epoch": 0.9653641207815276, + "grad_norm": 0.43112610113029626, + "learning_rate": 9.404629477072526e-06, + "loss": 0.0498, "step": 1087 }, { - "epoch": 0.4832333999555852, - "grad_norm": 0.8564554305307791, - "learning_rate": 9.986864668300494e-06, - "loss": 0.0745, + "epoch": 0.9662522202486679, + "grad_norm": 0.5123322765087007, + "learning_rate": 9.402793991688604e-06, + "loss": 0.0657, "step": 1088 }, { - "epoch": 0.4836775483011326, - "grad_norm": 0.6121214143253134, - "learning_rate": 9.986723870210605e-06, - "loss": 0.0648, + "epoch": 0.9671403197158082, + "grad_norm": 0.4178380585471896, + "learning_rate": 9.40095586100499e-06, + "loss": 0.0599, "step": 1089 }, { - "epoch": 0.48412169664668, - "grad_norm": 1.2737874840599017, - "learning_rate": 9.986582322531406e-06, - "loss": 0.1167, + "epoch": 0.9680284191829485, + "grad_norm": 0.6153530714811697, + "learning_rate": 9.399115086126077e-06, + "loss": 0.0743, "step": 1090 }, { - "epoch": 0.4845658449922274, - "grad_norm": 1.1300452790006459, - "learning_rate": 9.986440025284177e-06, - "loss": 0.1004, + "epoch": 0.9689165186500888, + "grad_norm": 0.4595165540363119, + "learning_rate": 9.397271668157842e-06, + "loss": 0.0556, "step": 1091 }, { - "epoch": 0.48500999333777484, - "grad_norm": 0.695366891600787, - "learning_rate": 9.986296978490308e-06, - "loss": 0.0631, + "epoch": 0.9698046181172292, + "grad_norm": 0.5700639774522798, + "learning_rate": 9.395425608207854e-06, + "loss": 0.0654, "step": 1092 }, { - "epoch": 0.48545414168332224, - "grad_norm": 0.907315227077863, - "learning_rate": 9.9861531821713e-06, - "loss": 0.0975, + "epoch": 0.9706927175843695, + "grad_norm": 0.5820496812169939, + "learning_rate": 9.393576907385268e-06, + "loss": 0.0561, "step": 1093 }, { - "epoch": 0.48589829002886964, - "grad_norm": 0.8452739013500794, - "learning_rate": 9.986008636348771e-06, - "loss": 0.0725, + "epoch": 0.9715808170515098, + "grad_norm": 0.5104423509205583, + "learning_rate": 9.391725566800828e-06, + "loss": 0.0572, "step": 1094 }, { - "epoch": 0.48634243837441704, - "grad_norm": 0.7641747681242534, - "learning_rate": 9.985863341044444e-06, - "loss": 0.0745, + "epoch": 0.9724689165186501, + "grad_norm": 0.6683733021598959, + "learning_rate": 9.389871587566859e-06, + "loss": 0.0721, "step": 1095 }, { - "epoch": 0.48678658671996444, - "grad_norm": 1.0767424752885635, - "learning_rate": 9.985717296280165e-06, - "loss": 0.1343, + "epoch": 0.9733570159857904, + "grad_norm": 0.5392379578662706, + "learning_rate": 9.388014970797275e-06, + "loss": 0.0653, "step": 1096 }, { - "epoch": 0.4872307350655119, - "grad_norm": 1.082068622065411, - "learning_rate": 9.985570502077881e-06, + "epoch": 0.9742451154529307, + "grad_norm": 0.5184363680081238, + "learning_rate": 9.386155717607575e-06, "loss": 0.0709, "step": 1097 }, { - "epoch": 0.4876748834110593, - "grad_norm": 1.1385183378270511, - "learning_rate": 9.98542295845966e-06, - "loss": 0.1136, + "epoch": 0.9751332149200711, + "grad_norm": 0.4985516172717562, + "learning_rate": 9.384293829114841e-06, + "loss": 0.0585, "step": 1098 }, { - "epoch": 0.4881190317566067, - "grad_norm": 0.8790710582646716, - "learning_rate": 9.985274665447682e-06, - "loss": 0.0796, + "epoch": 0.9760213143872114, + "grad_norm": 0.48932133345360795, + "learning_rate": 9.382429306437738e-06, + "loss": 0.0528, "step": 1099 }, { - "epoch": 0.4885631801021541, - "grad_norm": 0.7645639481923898, - "learning_rate": 9.985125623064238e-06, - "loss": 0.1039, + "epoch": 0.9769094138543517, + "grad_norm": 0.5141853488077307, + "learning_rate": 9.38056215069651e-06, + "loss": 0.0562, "step": 1100 }, { - "epoch": 0.4890073284477015, - "grad_norm": 0.9695248420140831, - "learning_rate": 9.98497583133173e-06, - "loss": 0.0667, + "epoch": 0.977797513321492, + "grad_norm": 0.5853949439809513, + "learning_rate": 9.378692363012995e-06, + "loss": 0.07, "step": 1101 }, { - "epoch": 0.48945147679324896, - "grad_norm": 0.8791125306416613, - "learning_rate": 9.984825290272673e-06, - "loss": 0.0703, + "epoch": 0.9786856127886323, + "grad_norm": 0.6129353009897632, + "learning_rate": 9.376819944510598e-06, + "loss": 0.0687, "step": 1102 }, { - "epoch": 0.48989562513879636, - "grad_norm": 0.6608187335865532, - "learning_rate": 9.984673999909698e-06, - "loss": 0.0607, + "epoch": 0.9795737122557726, + "grad_norm": 0.5566429527473071, + "learning_rate": 9.374944896314314e-06, + "loss": 0.0601, "step": 1103 }, { - "epoch": 0.49033977348434377, - "grad_norm": 0.7238746077559932, - "learning_rate": 9.984521960265545e-06, - "loss": 0.0582, + "epoch": 0.9804618117229129, + "grad_norm": 0.47189365987257625, + "learning_rate": 9.373067219550713e-06, + "loss": 0.0619, "step": 1104 }, { - "epoch": 0.49078392182989117, - "grad_norm": 0.7543190655536871, - "learning_rate": 9.98436917136307e-06, - "loss": 0.075, + "epoch": 0.9813499111900533, + "grad_norm": 0.6488893886846454, + "learning_rate": 9.37118691534795e-06, + "loss": 0.071, "step": 1105 }, { - "epoch": 0.49122807017543857, - "grad_norm": 0.9831510577501872, - "learning_rate": 9.98421563322524e-06, - "loss": 0.0832, + "epoch": 0.9822380106571936, + "grad_norm": 0.3888036759640904, + "learning_rate": 9.36930398483575e-06, + "loss": 0.0549, "step": 1106 }, { - "epoch": 0.491672218520986, - "grad_norm": 1.0953392860217965, - "learning_rate": 9.984061345875133e-06, - "loss": 0.1055, + "epoch": 0.9831261101243339, + "grad_norm": 0.5445655296183051, + "learning_rate": 9.367418429145423e-06, + "loss": 0.0679, "step": 1107 }, { - "epoch": 0.49211636686653343, - "grad_norm": 1.0004839390300868, - "learning_rate": 9.983906309335942e-06, - "loss": 0.0882, + "epoch": 0.9840142095914742, + "grad_norm": 0.3935911621353518, + "learning_rate": 9.365530249409855e-06, + "loss": 0.0549, "step": 1108 }, { - "epoch": 0.49256051521208083, - "grad_norm": 0.7389464810035322, - "learning_rate": 9.98375052363097e-06, - "loss": 0.0608, + "epoch": 0.9849023090586145, + "grad_norm": 0.6394614626639835, + "learning_rate": 9.363639446763508e-06, + "loss": 0.0595, "step": 1109 }, { - "epoch": 0.49300466355762823, - "grad_norm": 0.6469655367019491, - "learning_rate": 9.983593988783634e-06, - "loss": 0.0687, + "epoch": 0.9857904085257548, + "grad_norm": 0.4321186532161739, + "learning_rate": 9.36174602234242e-06, + "loss": 0.0534, "step": 1110 }, { - "epoch": 0.4934488119031757, - "grad_norm": 0.7833813336297293, - "learning_rate": 9.983436704817466e-06, - "loss": 0.0902, + "epoch": 0.9866785079928952, + "grad_norm": 0.4425938193990536, + "learning_rate": 9.359849977284199e-06, + "loss": 0.0647, "step": 1111 }, { - "epoch": 0.4938929602487231, - "grad_norm": 1.1676871219264449, - "learning_rate": 9.983278671756107e-06, - "loss": 0.0741, + "epoch": 0.9875666074600356, + "grad_norm": 0.4764434667907115, + "learning_rate": 9.357951312728037e-06, + "loss": 0.0636, "step": 1112 }, { - "epoch": 0.4943371085942705, - "grad_norm": 0.5948813388699934, - "learning_rate": 9.983119889623314e-06, - "loss": 0.0584, + "epoch": 0.9884547069271759, + "grad_norm": 0.5878975417629865, + "learning_rate": 9.356050029814696e-06, + "loss": 0.0731, "step": 1113 }, { - "epoch": 0.4947812569398179, - "grad_norm": 1.0932192784955042, - "learning_rate": 9.982960358442952e-06, - "loss": 0.0814, + "epoch": 0.9893428063943162, + "grad_norm": 0.3979984912788497, + "learning_rate": 9.354146129686507e-06, + "loss": 0.0661, "step": 1114 }, { - "epoch": 0.4952254052853653, - "grad_norm": 0.9576191702524788, - "learning_rate": 9.982800078239004e-06, - "loss": 0.0939, + "epoch": 0.9902309058614565, + "grad_norm": 0.41184644729096215, + "learning_rate": 9.35223961348738e-06, + "loss": 0.0527, "step": 1115 }, { - "epoch": 0.49566955363091275, - "grad_norm": 0.8091517385774816, - "learning_rate": 9.982639049035559e-06, - "loss": 0.0894, + "epoch": 0.9911190053285968, + "grad_norm": 0.46249426522235615, + "learning_rate": 9.350330482362791e-06, + "loss": 0.0623, "step": 1116 }, { - "epoch": 0.49611370197646015, - "grad_norm": 1.1318080237230808, - "learning_rate": 9.982477270856827e-06, - "loss": 0.0743, + "epoch": 0.9920071047957372, + "grad_norm": 0.36724731132242894, + "learning_rate": 9.34841873745979e-06, + "loss": 0.0458, "step": 1117 }, { - "epoch": 0.49655785032200755, - "grad_norm": 1.2019606615618712, - "learning_rate": 9.982314743727121e-06, - "loss": 0.107, + "epoch": 0.9928952042628775, + "grad_norm": 0.4579120079080996, + "learning_rate": 9.346504379927001e-06, + "loss": 0.0658, "step": 1118 }, { - "epoch": 0.49700199866755496, - "grad_norm": 0.9414473018138827, - "learning_rate": 9.982151467670876e-06, - "loss": 0.0746, + "epoch": 0.9937833037300178, + "grad_norm": 0.5436563697628319, + "learning_rate": 9.34458741091461e-06, + "loss": 0.0713, "step": 1119 }, { - "epoch": 0.49744614701310236, - "grad_norm": 0.7821900901787131, - "learning_rate": 9.981987442712634e-06, - "loss": 0.062, + "epoch": 0.9946714031971581, + "grad_norm": 0.4968627889604334, + "learning_rate": 9.342667831574377e-06, + "loss": 0.06, "step": 1120 }, { - "epoch": 0.4978902953586498, - "grad_norm": 0.8874834331056065, - "learning_rate": 9.981822668877048e-06, - "loss": 0.0964, + "epoch": 0.9955595026642984, + "grad_norm": 0.37477046083848015, + "learning_rate": 9.34074564305963e-06, + "loss": 0.0514, "step": 1121 }, { - "epoch": 0.4983344437041972, - "grad_norm": 0.763549676147823, - "learning_rate": 9.98165714618889e-06, - "loss": 0.0683, + "epoch": 0.9964476021314387, + "grad_norm": 0.4385411957434755, + "learning_rate": 9.338820846525262e-06, + "loss": 0.0507, "step": 1122 }, { - "epoch": 0.4987785920497446, - "grad_norm": 0.7237088556818837, - "learning_rate": 9.98149087467304e-06, - "loss": 0.0706, + "epoch": 0.9973357015985791, + "grad_norm": 0.5427375302508839, + "learning_rate": 9.336893443127739e-06, + "loss": 0.0667, "step": 1123 }, { - "epoch": 0.499222740395292, - "grad_norm": 0.5389621815014336, - "learning_rate": 9.98132385435449e-06, - "loss": 0.0739, + "epoch": 0.9982238010657194, + "grad_norm": 0.7999683100644676, + "learning_rate": 9.334963434025085e-06, + "loss": 0.0729, "step": 1124 }, { - "epoch": 0.4996668887408394, - "grad_norm": 0.5780461463044186, - "learning_rate": 9.981156085258347e-06, - "loss": 0.0532, + "epoch": 0.9991119005328597, + "grad_norm": 0.3699119827224097, + "learning_rate": 9.333030820376896e-06, + "loss": 0.0487, "step": 1125 }, { - "epoch": 0.5001110370863868, - "grad_norm": 0.9337676739733405, - "learning_rate": 9.980987567409829e-06, - "loss": 0.0728, + "epoch": 1.0, + "grad_norm": 0.508829292943249, + "learning_rate": 9.331095603344329e-06, + "loss": 0.0618, "step": 1126 }, { - "epoch": 0.5005551854319342, - "grad_norm": 0.7121233826282255, - "learning_rate": 9.980818300834267e-06, - "loss": 0.0684, + "epoch": 1.0, + "eval_loss": 0.06568732112646103, + "eval_runtime": 81.3182, + "eval_samples_per_second": 186.502, + "eval_steps_per_second": 2.914, + "step": 1126 + }, + { + "epoch": 1.0008880994671403, + "grad_norm": 0.6480601826524256, + "learning_rate": 9.329157784090108e-06, + "loss": 0.0648, "step": 1127 }, { - "epoch": 0.5009993337774816, - "grad_norm": 0.7441687384799486, - "learning_rate": 9.980648285557106e-06, - "loss": 0.0692, + "epoch": 1.0017761989342806, + "grad_norm": 0.4163022145377359, + "learning_rate": 9.32721736377852e-06, + "loss": 0.0537, "step": 1128 }, { - "epoch": 0.5014434821230291, - "grad_norm": 0.6423868990281854, - "learning_rate": 9.980477521603901e-06, - "loss": 0.0677, + "epoch": 1.002664298401421, + "grad_norm": 0.5133094034883717, + "learning_rate": 9.325274343575412e-06, + "loss": 0.0515, "step": 1129 }, { - "epoch": 0.5018876304685765, - "grad_norm": 0.5495871418316753, - "learning_rate": 9.98030600900032e-06, - "loss": 0.0728, + "epoch": 1.0035523978685612, + "grad_norm": 0.5658073414535006, + "learning_rate": 9.323328724648197e-06, + "loss": 0.0601, "step": 1130 }, { - "epoch": 0.5023317788141239, - "grad_norm": 0.6241490380000925, - "learning_rate": 9.980133747772148e-06, - "loss": 0.0662, + "epoch": 1.0044404973357015, + "grad_norm": 0.6245709824367127, + "learning_rate": 9.321380508165848e-06, + "loss": 0.0604, "step": 1131 }, { - "epoch": 0.5027759271596713, - "grad_norm": 0.7343652098087937, - "learning_rate": 9.979960737945273e-06, - "loss": 0.0678, + "epoch": 1.0053285968028418, + "grad_norm": 0.5594061879879259, + "learning_rate": 9.319429695298896e-06, + "loss": 0.0633, "step": 1132 }, { - "epoch": 0.5032200755052187, - "grad_norm": 1.1015252671394224, - "learning_rate": 9.979786979545704e-06, - "loss": 0.0993, + "epoch": 1.0062166962699823, + "grad_norm": 0.6301457475894502, + "learning_rate": 9.317476287219436e-06, + "loss": 0.0629, "step": 1133 }, { - "epoch": 0.5036642238507661, - "grad_norm": 0.8816390886875364, - "learning_rate": 9.979612472599563e-06, - "loss": 0.0839, + "epoch": 1.0071047957371226, + "grad_norm": 0.5679741582137742, + "learning_rate": 9.315520285101118e-06, + "loss": 0.0658, "step": 1134 }, { - "epoch": 0.5041083721963135, - "grad_norm": 0.7833322958956014, - "learning_rate": 9.979437217133077e-06, - "loss": 0.082, + "epoch": 1.007992895204263, + "grad_norm": 0.45827959597629336, + "learning_rate": 9.313561690119157e-06, + "loss": 0.0515, "step": 1135 }, { - "epoch": 0.504552520541861, - "grad_norm": 0.9520003222392113, - "learning_rate": 9.979261213172592e-06, - "loss": 0.0768, + "epoch": 1.0088809946714032, + "grad_norm": 0.6916115911467748, + "learning_rate": 9.311600503450318e-06, + "loss": 0.0691, "step": 1136 }, { - "epoch": 0.5049966688874084, - "grad_norm": 0.6650156355994222, - "learning_rate": 9.979084460744563e-06, - "loss": 0.0594, + "epoch": 1.0097690941385435, + "grad_norm": 0.4706381914559495, + "learning_rate": 9.309636726272929e-06, + "loss": 0.0563, "step": 1137 }, { - "epoch": 0.5054408172329558, - "grad_norm": 0.7702494870526182, - "learning_rate": 9.97890695987556e-06, - "loss": 0.0744, + "epoch": 1.0106571936056838, + "grad_norm": 0.4947347542946586, + "learning_rate": 9.30767035976687e-06, + "loss": 0.0565, "step": 1138 }, { - "epoch": 0.5058849655785033, - "grad_norm": 0.6441998434508551, - "learning_rate": 9.978728710592265e-06, - "loss": 0.0639, + "epoch": 1.0115452930728241, + "grad_norm": 0.5068531468493555, + "learning_rate": 9.305701405113582e-06, + "loss": 0.0556, "step": 1139 }, { - "epoch": 0.5063291139240507, - "grad_norm": 0.8638339516257324, - "learning_rate": 9.97854971292147e-06, - "loss": 0.0737, + "epoch": 1.0124333925399644, + "grad_norm": 0.6182707868884152, + "learning_rate": 9.303729863496057e-06, + "loss": 0.0636, "step": 1140 }, { - "epoch": 0.5067732622695981, - "grad_norm": 0.6599486193027585, - "learning_rate": 9.978369966890082e-06, - "loss": 0.0677, + "epoch": 1.0133214920071048, + "grad_norm": 0.5329607011098876, + "learning_rate": 9.301755736098843e-06, + "loss": 0.0752, "step": 1141 }, { - "epoch": 0.5072174106151455, - "grad_norm": 0.5684535397639588, - "learning_rate": 9.978189472525121e-06, - "loss": 0.0667, + "epoch": 1.014209591474245, + "grad_norm": 0.5715930875894688, + "learning_rate": 9.29977902410804e-06, + "loss": 0.0641, "step": 1142 }, { - "epoch": 0.5076615589606929, - "grad_norm": 0.7930545362878336, - "learning_rate": 9.978008229853717e-06, - "loss": 0.0768, + "epoch": 1.0150976909413854, + "grad_norm": 0.591692404601877, + "learning_rate": 9.297799728711303e-06, + "loss": 0.0589, "step": 1143 }, { - "epoch": 0.5081057073062403, - "grad_norm": 1.013824145483496, - "learning_rate": 9.977826238903116e-06, - "loss": 0.0878, + "epoch": 1.0159857904085257, + "grad_norm": 0.4417333733225122, + "learning_rate": 9.295817851097836e-06, + "loss": 0.0598, "step": 1144 }, { - "epoch": 0.5085498556517877, - "grad_norm": 0.7663207754691962, - "learning_rate": 9.97764349970067e-06, - "loss": 0.0757, + "epoch": 1.0168738898756662, + "grad_norm": 0.5009656149763125, + "learning_rate": 9.2938333924584e-06, + "loss": 0.0411, "step": 1145 }, { - "epoch": 0.5089940039973351, - "grad_norm": 0.7907421181118522, - "learning_rate": 9.977460012273854e-06, - "loss": 0.0784, + "epoch": 1.0177619893428065, + "grad_norm": 0.4530511856701877, + "learning_rate": 9.291846353985301e-06, + "loss": 0.0454, "step": 1146 }, { - "epoch": 0.5094381523428825, - "grad_norm": 0.6096514964784717, - "learning_rate": 9.977275776650244e-06, - "loss": 0.0806, + "epoch": 1.0186500888099468, + "grad_norm": 0.8092573734074219, + "learning_rate": 9.289856736872398e-06, + "loss": 0.0721, "step": 1147 }, { - "epoch": 0.50988230068843, - "grad_norm": 0.5159120034000472, - "learning_rate": 9.977090792857536e-06, - "loss": 0.0598, + "epoch": 1.019538188277087, + "grad_norm": 0.6030690101170599, + "learning_rate": 9.287864542315099e-06, + "loss": 0.0655, "step": 1148 }, { - "epoch": 0.5103264490339774, - "grad_norm": 0.7102391658769992, - "learning_rate": 9.976905060923536e-06, - "loss": 0.077, + "epoch": 1.0204262877442274, + "grad_norm": 1.060691763095867, + "learning_rate": 9.285869771510359e-06, + "loss": 0.0603, "step": 1149 }, { - "epoch": 0.5107705973795248, - "grad_norm": 0.7376867178754969, - "learning_rate": 9.97671858087616e-06, - "loss": 0.0726, + "epoch": 1.0213143872113677, + "grad_norm": 0.7265882392430735, + "learning_rate": 9.283872425656681e-06, + "loss": 0.0803, "step": 1150 }, { - "epoch": 0.5112147457250722, - "grad_norm": 0.7790796748709151, - "learning_rate": 9.976531352743445e-06, - "loss": 0.0806, + "epoch": 1.022202486678508, + "grad_norm": 0.47803413651821886, + "learning_rate": 9.281872505954121e-06, + "loss": 0.062, "step": 1151 }, { - "epoch": 0.5116588940706196, - "grad_norm": 0.618833561044351, - "learning_rate": 9.97634337655353e-06, - "loss": 0.0654, + "epoch": 1.0230905861456483, + "grad_norm": 0.6041289456455284, + "learning_rate": 9.279870013604273e-06, + "loss": 0.0554, "step": 1152 }, { - "epoch": 0.512103042416167, - "grad_norm": 0.808662756842487, - "learning_rate": 9.976154652334673e-06, - "loss": 0.1081, + "epoch": 1.0239786856127886, + "grad_norm": 0.5229855759529586, + "learning_rate": 9.27786494981028e-06, + "loss": 0.0644, "step": 1153 }, { - "epoch": 0.5125471907617144, - "grad_norm": 1.0180790521960883, - "learning_rate": 9.97596518011524e-06, - "loss": 0.1018, + "epoch": 1.024866785079929, + "grad_norm": 0.6121251944543281, + "learning_rate": 9.27585731577683e-06, + "loss": 0.0692, "step": 1154 }, { - "epoch": 0.5129913391072618, - "grad_norm": 0.638695304240369, - "learning_rate": 9.975774959923717e-06, - "loss": 0.0655, + "epoch": 1.0257548845470692, + "grad_norm": 0.5008161653178155, + "learning_rate": 9.273847112710159e-06, + "loss": 0.055, "step": 1155 }, { - "epoch": 0.5134354874528092, - "grad_norm": 0.7566614134333154, - "learning_rate": 9.975583991788691e-06, - "loss": 0.0658, + "epoch": 1.0266429840142095, + "grad_norm": 1.3550527848348757, + "learning_rate": 9.271834341818043e-06, + "loss": 0.0626, "step": 1156 }, { - "epoch": 0.5138796357983566, - "grad_norm": 0.7034717435901088, - "learning_rate": 9.97539227573887e-06, - "loss": 0.0815, + "epoch": 1.0275310834813498, + "grad_norm": 0.6086989420332563, + "learning_rate": 9.269819004309796e-06, + "loss": 0.0696, "step": 1157 }, { - "epoch": 0.5143237841439041, - "grad_norm": 0.7342548435589596, - "learning_rate": 9.975199811803073e-06, - "loss": 0.0755, + "epoch": 1.0284191829484903, + "grad_norm": 0.4398267573695782, + "learning_rate": 9.267801101396284e-06, + "loss": 0.057, "step": 1158 }, { - "epoch": 0.5147679324894515, - "grad_norm": 0.881634196160518, - "learning_rate": 9.975006600010233e-06, - "loss": 0.0649, + "epoch": 1.0293072824156306, + "grad_norm": 0.5023864760744993, + "learning_rate": 9.265780634289905e-06, + "loss": 0.0522, "step": 1159 }, { - "epoch": 0.5152120808349989, - "grad_norm": 0.82644127591562, - "learning_rate": 9.97481264038939e-06, - "loss": 0.0589, + "epoch": 1.030195381882771, + "grad_norm": 0.5446662737483264, + "learning_rate": 9.263757604204607e-06, + "loss": 0.0785, "step": 1160 }, { - "epoch": 0.5156562291805463, - "grad_norm": 0.8462515915216339, - "learning_rate": 9.974617932969697e-06, - "loss": 0.0735, + "epoch": 1.0310834813499112, + "grad_norm": 0.5002911503950366, + "learning_rate": 9.261732012355868e-06, + "loss": 0.0673, "step": 1161 }, { - "epoch": 0.5161003775260937, - "grad_norm": 0.6116708783372564, - "learning_rate": 9.974422477780426e-06, - "loss": 0.0593, + "epoch": 1.0319715808170515, + "grad_norm": 0.534388338135266, + "learning_rate": 9.259703859960713e-06, + "loss": 0.0606, "step": 1162 }, { - "epoch": 0.5165445258716411, - "grad_norm": 0.7646830820557489, - "learning_rate": 9.974226274850956e-06, - "loss": 0.0866, + "epoch": 1.0328596802841918, + "grad_norm": 0.45775255151671196, + "learning_rate": 9.257673148237705e-06, + "loss": 0.0513, "step": 1163 }, { - "epoch": 0.5169886742171885, - "grad_norm": 0.6551362409279565, - "learning_rate": 9.97402932421078e-06, - "loss": 0.0742, + "epoch": 1.0337477797513321, + "grad_norm": 0.5085806099929371, + "learning_rate": 9.255639878406937e-06, + "loss": 0.0571, "step": 1164 }, { - "epoch": 0.5174328225627359, - "grad_norm": 0.5323199794273931, - "learning_rate": 9.973831625889501e-06, - "loss": 0.0599, + "epoch": 1.0346358792184724, + "grad_norm": 0.5955705550431354, + "learning_rate": 9.253604051690047e-06, + "loss": 0.0709, "step": 1165 }, { - "epoch": 0.5178769709082833, - "grad_norm": 0.694095375162108, - "learning_rate": 9.97363317991684e-06, - "loss": 0.0646, + "epoch": 1.0355239786856127, + "grad_norm": 0.5178355763041769, + "learning_rate": 9.251565669310204e-06, + "loss": 0.0601, "step": 1166 }, { - "epoch": 0.5183211192538307, - "grad_norm": 0.9040017314033096, - "learning_rate": 9.973433986322625e-06, - "loss": 0.0755, + "epoch": 1.036412078152753, + "grad_norm": 0.48472634314685387, + "learning_rate": 9.249524732492118e-06, + "loss": 0.0507, "step": 1167 }, { - "epoch": 0.5187652675993782, - "grad_norm": 0.7059881945266824, - "learning_rate": 9.973234045136798e-06, - "loss": 0.0869, + "epoch": 1.0373001776198933, + "grad_norm": 0.5582223693364302, + "learning_rate": 9.24748124246203e-06, + "loss": 0.0582, "step": 1168 }, { - "epoch": 0.5192094159449256, - "grad_norm": 0.7791362124843618, - "learning_rate": 9.973033356389412e-06, - "loss": 0.0912, + "epoch": 1.0381882770870337, + "grad_norm": 0.5548798266580908, + "learning_rate": 9.245435200447715e-06, + "loss": 0.0726, "step": 1169 }, { - "epoch": 0.519653564290473, - "grad_norm": 0.7295114970239928, - "learning_rate": 9.972831920110635e-06, - "loss": 0.0769, + "epoch": 1.0390763765541742, + "grad_norm": 0.5344192014597939, + "learning_rate": 9.243386607678482e-06, + "loss": 0.057, "step": 1170 }, { - "epoch": 0.5200977126360204, - "grad_norm": 0.5612528230166707, - "learning_rate": 9.972629736330748e-06, - "loss": 0.0679, + "epoch": 1.0399644760213145, + "grad_norm": 0.5022659329476004, + "learning_rate": 9.241335465385171e-06, + "loss": 0.0602, "step": 1171 }, { - "epoch": 0.5205418609815678, - "grad_norm": 0.7254809001904728, - "learning_rate": 9.972426805080141e-06, - "loss": 0.0715, + "epoch": 1.0408525754884548, + "grad_norm": 0.6086703551030485, + "learning_rate": 9.239281774800159e-06, + "loss": 0.0581, "step": 1172 }, { - "epoch": 0.5209860093271153, - "grad_norm": 0.7275192896210665, - "learning_rate": 9.97222312638932e-06, - "loss": 0.0841, + "epoch": 1.041740674955595, + "grad_norm": 0.5527578198795071, + "learning_rate": 9.23722553715735e-06, + "loss": 0.0616, "step": 1173 }, { - "epoch": 0.5214301576726627, - "grad_norm": 0.8132581046482065, - "learning_rate": 9.972018700288898e-06, - "loss": 0.0715, + "epoch": 1.0426287744227354, + "grad_norm": 0.5043530641652352, + "learning_rate": 9.235166753692176e-06, + "loss": 0.0641, "step": 1174 }, { - "epoch": 0.52187430601821, - "grad_norm": 0.9966008387669913, - "learning_rate": 9.971813526809606e-06, - "loss": 0.0844, + "epoch": 1.0435168738898757, + "grad_norm": 0.4534779142698993, + "learning_rate": 9.233105425641601e-06, + "loss": 0.048, "step": 1175 }, { - "epoch": 0.5223184543637575, - "grad_norm": 0.621117239882631, - "learning_rate": 9.971607605982285e-06, - "loss": 0.0572, + "epoch": 1.044404973357016, + "grad_norm": 0.45940582209571523, + "learning_rate": 9.23104155424412e-06, + "loss": 0.0547, "step": 1176 }, { - "epoch": 0.5227626027093049, - "grad_norm": 0.8375409793845516, - "learning_rate": 9.971400937837887e-06, - "loss": 0.0728, + "epoch": 1.0452930728241563, + "grad_norm": 0.5998582500885319, + "learning_rate": 9.228975140739756e-06, + "loss": 0.0673, "step": 1177 }, { - "epoch": 0.5232067510548524, - "grad_norm": 0.9177354811192968, - "learning_rate": 9.97119352240748e-06, - "loss": 0.0878, + "epoch": 1.0461811722912966, + "grad_norm": 0.42349229659058435, + "learning_rate": 9.226906186370057e-06, + "loss": 0.0529, "step": 1178 }, { - "epoch": 0.5236508994003998, - "grad_norm": 0.7412042248938593, - "learning_rate": 9.97098535972224e-06, - "loss": 0.0649, + "epoch": 1.047069271758437, + "grad_norm": 0.5217090012758729, + "learning_rate": 9.224834692378095e-06, + "loss": 0.0613, "step": 1179 }, { - "epoch": 0.5240950477459472, - "grad_norm": 0.75392664768525, - "learning_rate": 9.970776449813457e-06, - "loss": 0.0682, + "epoch": 1.0479573712255772, + "grad_norm": 0.5357568673665954, + "learning_rate": 9.222760660008474e-06, + "loss": 0.0563, "step": 1180 }, { - "epoch": 0.5245391960914946, - "grad_norm": 0.7437991387327917, - "learning_rate": 9.970566792712537e-06, - "loss": 0.0646, + "epoch": 1.0488454706927175, + "grad_norm": 0.48336047622715606, + "learning_rate": 9.220684090507318e-06, + "loss": 0.0512, "step": 1181 }, { - "epoch": 0.524983344437042, - "grad_norm": 0.708247278848663, - "learning_rate": 9.970356388450992e-06, - "loss": 0.0781, + "epoch": 1.0497335701598578, + "grad_norm": 0.4624025252082009, + "learning_rate": 9.218604985122282e-06, + "loss": 0.0508, "step": 1182 }, { - "epoch": 0.5254274927825894, - "grad_norm": 0.508933410042531, - "learning_rate": 9.97014523706045e-06, - "loss": 0.0535, + "epoch": 1.0506216696269983, + "grad_norm": 0.46561654488313764, + "learning_rate": 9.216523345102534e-06, + "loss": 0.0517, "step": 1183 }, { - "epoch": 0.5258716411281368, - "grad_norm": 1.0021738372524498, - "learning_rate": 9.96993333857265e-06, - "loss": 0.0925, + "epoch": 1.0515097690941386, + "grad_norm": 0.4648713622907487, + "learning_rate": 9.214439171698777e-06, + "loss": 0.0607, "step": 1184 }, { - "epoch": 0.5263157894736842, - "grad_norm": 0.8285101542802605, - "learning_rate": 9.969720693019447e-06, - "loss": 0.0653, + "epoch": 1.052397868561279, + "grad_norm": 0.5092774520008562, + "learning_rate": 9.212352466163226e-06, + "loss": 0.0682, "step": 1185 }, { - "epoch": 0.5267599378192316, - "grad_norm": 0.6010536869971059, - "learning_rate": 9.9695073004328e-06, - "loss": 0.0601, + "epoch": 1.0532859680284192, + "grad_norm": 0.5088605384399475, + "learning_rate": 9.210263229749626e-06, + "loss": 0.0638, "step": 1186 }, { - "epoch": 0.527204086164779, - "grad_norm": 0.43111203380102636, - "learning_rate": 9.969293160844793e-06, - "loss": 0.0435, + "epoch": 1.0541740674955595, + "grad_norm": 0.3941412719758881, + "learning_rate": 9.208171463713234e-06, + "loss": 0.0516, "step": 1187 }, { - "epoch": 0.5276482345103265, - "grad_norm": 0.7272050961589046, - "learning_rate": 9.969078274287607e-06, - "loss": 0.0582, + "epoch": 1.0550621669626998, + "grad_norm": 0.5232270076417497, + "learning_rate": 9.206077169310833e-06, + "loss": 0.0754, "step": 1188 }, { - "epoch": 0.5280923828558739, - "grad_norm": 1.0434151274290433, - "learning_rate": 9.968862640793547e-06, - "loss": 0.0882, + "epoch": 1.0559502664298401, + "grad_norm": 0.6567846124696248, + "learning_rate": 9.203980347800721e-06, + "loss": 0.0734, "step": 1189 }, { - "epoch": 0.5285365312014213, - "grad_norm": 0.6610761510472204, - "learning_rate": 9.968646260395027e-06, - "loss": 0.0701, + "epoch": 1.0568383658969804, + "grad_norm": 0.5024428301117129, + "learning_rate": 9.20188100044272e-06, + "loss": 0.0683, "step": 1190 }, { - "epoch": 0.5289806795469687, - "grad_norm": 0.7703015492278306, - "learning_rate": 9.96842913312457e-06, - "loss": 0.0755, + "epoch": 1.0577264653641207, + "grad_norm": 0.5645115632471462, + "learning_rate": 9.199779128498163e-06, + "loss": 0.0671, "step": 1191 }, { - "epoch": 0.5294248278925161, - "grad_norm": 1.1334603205830818, - "learning_rate": 9.968211259014817e-06, - "loss": 0.102, + "epoch": 1.058614564831261, + "grad_norm": 0.46395712255910054, + "learning_rate": 9.197674733229904e-06, + "loss": 0.0613, "step": 1192 }, { - "epoch": 0.5298689762380635, - "grad_norm": 0.5680812506173959, - "learning_rate": 9.967992638098517e-06, - "loss": 0.0592, + "epoch": 1.0595026642984013, + "grad_norm": 0.5656733209301367, + "learning_rate": 9.195567815902313e-06, + "loss": 0.0697, "step": 1193 }, { - "epoch": 0.5303131245836109, - "grad_norm": 0.7128376996533422, - "learning_rate": 9.96777327040853e-06, - "loss": 0.0757, + "epoch": 1.0603907637655416, + "grad_norm": 0.45851005309044807, + "learning_rate": 9.193458377781273e-06, + "loss": 0.0638, "step": 1194 }, { - "epoch": 0.5307572729291583, - "grad_norm": 0.6547153588619065, - "learning_rate": 9.967553155977833e-06, - "loss": 0.0513, + "epoch": 1.061278863232682, + "grad_norm": 0.5478213904125951, + "learning_rate": 9.191346420134183e-06, + "loss": 0.0505, "step": 1195 }, { - "epoch": 0.5312014212747057, - "grad_norm": 0.8276344454564539, - "learning_rate": 9.967332294839514e-06, - "loss": 0.0722, + "epoch": 1.0621669626998225, + "grad_norm": 0.5255656506155585, + "learning_rate": 9.189231944229957e-06, + "loss": 0.0658, "step": 1196 }, { - "epoch": 0.5316455696202531, - "grad_norm": 0.5942789130977645, - "learning_rate": 9.967110687026769e-06, - "loss": 0.0735, + "epoch": 1.0630550621669628, + "grad_norm": 0.3767442674250263, + "learning_rate": 9.187114951339021e-06, + "loss": 0.0444, "step": 1197 }, { - "epoch": 0.5320897179658006, - "grad_norm": 0.858420102793841, - "learning_rate": 9.966888332572913e-06, - "loss": 0.0888, + "epoch": 1.063943161634103, + "grad_norm": 0.47321589853516277, + "learning_rate": 9.184995442733309e-06, + "loss": 0.0504, "step": 1198 }, { - "epoch": 0.532533866311348, - "grad_norm": 0.5306419562771065, - "learning_rate": 9.966665231511367e-06, - "loss": 0.0735, + "epoch": 1.0648312611012434, + "grad_norm": 0.48396521102034207, + "learning_rate": 9.182873419686273e-06, + "loss": 0.0501, "step": 1199 }, { - "epoch": 0.5329780146568954, - "grad_norm": 0.8818696703835058, - "learning_rate": 9.96644138387567e-06, - "loss": 0.0836, + "epoch": 1.0657193605683837, + "grad_norm": 0.3783512928266631, + "learning_rate": 9.180748883472874e-06, + "loss": 0.0563, "step": 1200 }, { - "epoch": 0.5334221630024428, - "grad_norm": 0.6953800899178525, - "learning_rate": 9.966216789699466e-06, - "loss": 0.0609, + "epoch": 1.066607460035524, + "grad_norm": 0.5583245420130393, + "learning_rate": 9.178621835369581e-06, + "loss": 0.0535, "step": 1201 }, { - "epoch": 0.5338663113479902, - "grad_norm": 0.6141704558915728, - "learning_rate": 9.965991449016517e-06, - "loss": 0.0593, + "epoch": 1.0674955595026643, + "grad_norm": 0.4365276129838906, + "learning_rate": 9.176492276654373e-06, + "loss": 0.0491, "step": 1202 }, { - "epoch": 0.5343104596935376, - "grad_norm": 0.7784143890008246, - "learning_rate": 9.965765361860696e-06, - "loss": 0.0682, + "epoch": 1.0683836589698046, + "grad_norm": 0.42841506319660755, + "learning_rate": 9.17436020860674e-06, + "loss": 0.053, "step": 1203 }, { - "epoch": 0.534754608039085, - "grad_norm": 0.7591572831776785, - "learning_rate": 9.965538528265986e-06, - "loss": 0.0713, + "epoch": 1.0692717584369449, + "grad_norm": 0.4419723943599086, + "learning_rate": 9.172225632507674e-06, + "loss": 0.0539, "step": 1204 }, { - "epoch": 0.5351987563846324, - "grad_norm": 1.0112012318529477, - "learning_rate": 9.965310948266488e-06, - "loss": 0.0885, + "epoch": 1.0701598579040852, + "grad_norm": 0.5044189696920915, + "learning_rate": 9.170088549639679e-06, + "loss": 0.0582, "step": 1205 }, { - "epoch": 0.5356429047301798, - "grad_norm": 0.540867345550289, - "learning_rate": 9.965082621896407e-06, - "loss": 0.0683, + "epoch": 1.0710479573712255, + "grad_norm": 0.4251103284213423, + "learning_rate": 9.167948961286764e-06, + "loss": 0.0515, "step": 1206 }, { - "epoch": 0.5360870530757272, - "grad_norm": 0.6987948599480821, - "learning_rate": 9.964853549190067e-06, - "loss": 0.0623, + "epoch": 1.071936056838366, + "grad_norm": 0.4946488504568838, + "learning_rate": 9.165806868734444e-06, + "loss": 0.062, "step": 1207 }, { - "epoch": 0.5365312014212748, - "grad_norm": 1.0949702611838528, - "learning_rate": 9.9646237301819e-06, - "loss": 0.0715, + "epoch": 1.0728241563055063, + "grad_norm": 0.5234508822942274, + "learning_rate": 9.163662273269733e-06, + "loss": 0.0669, "step": 1208 }, { - "epoch": 0.5369753497668222, - "grad_norm": 0.5434822885792773, - "learning_rate": 9.964393164906452e-06, - "loss": 0.077, + "epoch": 1.0737122557726466, + "grad_norm": 0.44856465319675254, + "learning_rate": 9.161515176181157e-06, + "loss": 0.0568, "step": 1209 }, { - "epoch": 0.5374194981123696, - "grad_norm": 0.785351221183312, - "learning_rate": 9.964161853398381e-06, - "loss": 0.07, + "epoch": 1.074600355239787, + "grad_norm": 0.5214790110595106, + "learning_rate": 9.15936557875874e-06, + "loss": 0.0607, "step": 1210 }, { - "epoch": 0.537863646457917, - "grad_norm": 0.5762754098127977, - "learning_rate": 9.963929795692458e-06, - "loss": 0.0553, + "epoch": 1.0754884547069272, + "grad_norm": 0.4276901711426315, + "learning_rate": 9.157213482294009e-06, + "loss": 0.0533, "step": 1211 }, { - "epoch": 0.5383077948034644, - "grad_norm": 0.7778031310614973, - "learning_rate": 9.963696991823563e-06, - "loss": 0.0671, + "epoch": 1.0763765541740675, + "grad_norm": 0.4031156583282107, + "learning_rate": 9.155058888079994e-06, + "loss": 0.054, "step": 1212 }, { - "epoch": 0.5387519431490118, - "grad_norm": 0.7236031416359869, - "learning_rate": 9.963463441826693e-06, - "loss": 0.0861, + "epoch": 1.0772646536412078, + "grad_norm": 0.4638513120983917, + "learning_rate": 9.152901797411224e-06, + "loss": 0.0593, "step": 1213 }, { - "epoch": 0.5391960914945592, - "grad_norm": 0.8183548985521056, - "learning_rate": 9.963229145736952e-06, - "loss": 0.0905, + "epoch": 1.0781527531083481, + "grad_norm": 0.46551869913533556, + "learning_rate": 9.150742211583728e-06, + "loss": 0.0636, "step": 1214 }, { - "epoch": 0.5396402398401066, - "grad_norm": 0.8238577196866539, - "learning_rate": 9.96299410358956e-06, - "loss": 0.0791, + "epoch": 1.0790408525754884, + "grad_norm": 0.4950805741312129, + "learning_rate": 9.148580131895037e-06, + "loss": 0.0593, "step": 1215 }, { - "epoch": 0.540084388185654, - "grad_norm": 0.620826351541747, - "learning_rate": 9.962758315419847e-06, - "loss": 0.0627, + "epoch": 1.0799289520426287, + "grad_norm": 0.38004108965114014, + "learning_rate": 9.146415559644176e-06, + "loss": 0.052, "step": 1216 }, { - "epoch": 0.5405285365312015, - "grad_norm": 0.8069644430296897, - "learning_rate": 9.962521781263259e-06, - "loss": 0.0846, + "epoch": 1.080817051509769, + "grad_norm": 0.44474473573525736, + "learning_rate": 9.144248496131669e-06, + "loss": 0.0636, "step": 1217 }, { - "epoch": 0.5409726848767489, - "grad_norm": 0.6551620736321753, - "learning_rate": 9.962284501155347e-06, - "loss": 0.0626, + "epoch": 1.0817051509769093, + "grad_norm": 0.4312387573207775, + "learning_rate": 9.14207894265954e-06, + "loss": 0.0532, "step": 1218 }, { - "epoch": 0.5414168332222963, - "grad_norm": 0.7163185717239511, - "learning_rate": 9.96204647513178e-06, - "loss": 0.0742, + "epoch": 1.0825932504440496, + "grad_norm": 0.3986077843447081, + "learning_rate": 9.139906900531304e-06, + "loss": 0.0623, "step": 1219 }, { - "epoch": 0.5418609815678437, - "grad_norm": 0.6464830809225138, - "learning_rate": 9.96180770322834e-06, - "loss": 0.0597, + "epoch": 1.0834813499111902, + "grad_norm": 0.46785671093620884, + "learning_rate": 9.137732371051978e-06, + "loss": 0.0602, "step": 1220 }, { - "epoch": 0.5423051299133911, - "grad_norm": 0.4702533761313221, - "learning_rate": 9.961568185480912e-06, - "loss": 0.0663, + "epoch": 1.0843694493783305, + "grad_norm": 0.6939585586944105, + "learning_rate": 9.135555355528063e-06, + "loss": 0.0531, "step": 1221 }, { - "epoch": 0.5427492782589385, - "grad_norm": 0.5377074866144479, - "learning_rate": 9.961327921925506e-06, - "loss": 0.0682, + "epoch": 1.0852575488454708, + "grad_norm": 0.5159625584290726, + "learning_rate": 9.133375855267566e-06, + "loss": 0.0733, "step": 1222 }, { - "epoch": 0.5431934266044859, - "grad_norm": 0.6242182477426378, - "learning_rate": 9.961086912598232e-06, - "loss": 0.0558, + "epoch": 1.086145648312611, + "grad_norm": 0.47928519418435783, + "learning_rate": 9.131193871579975e-06, + "loss": 0.0598, "step": 1223 }, { - "epoch": 0.5436375749500333, - "grad_norm": 0.919842928693314, - "learning_rate": 9.960845157535324e-06, - "loss": 0.0979, + "epoch": 1.0870337477797514, + "grad_norm": 0.7058879880927733, + "learning_rate": 9.129009405776281e-06, + "loss": 0.0567, "step": 1224 }, { - "epoch": 0.5440817232955807, - "grad_norm": 0.4865673297809684, - "learning_rate": 9.960602656773118e-06, - "loss": 0.0591, + "epoch": 1.0879218472468917, + "grad_norm": 0.31967130011089134, + "learning_rate": 9.126822459168958e-06, + "loss": 0.0478, "step": 1225 }, { - "epoch": 0.5445258716411281, - "grad_norm": 0.5994196236588832, - "learning_rate": 9.960359410348066e-06, - "loss": 0.06, + "epoch": 1.088809946714032, + "grad_norm": 0.5247237546759348, + "learning_rate": 9.124633033071974e-06, + "loss": 0.0613, "step": 1226 }, { - "epoch": 0.5449700199866756, - "grad_norm": 0.6813026653368286, - "learning_rate": 9.960115418296734e-06, - "loss": 0.0563, + "epoch": 1.0896980461811723, + "grad_norm": 0.43333651070647783, + "learning_rate": 9.12244112880079e-06, + "loss": 0.053, "step": 1227 }, { - "epoch": 0.545414168332223, - "grad_norm": 0.4200726079302436, - "learning_rate": 9.959870680655797e-06, - "loss": 0.0448, + "epoch": 1.0905861456483126, + "grad_norm": 0.6511917697060805, + "learning_rate": 9.120246747672347e-06, + "loss": 0.0712, "step": 1228 }, { - "epoch": 0.5458583166777704, - "grad_norm": 1.3484707743369218, - "learning_rate": 9.959625197462042e-06, - "loss": 0.058, + "epoch": 1.0914742451154529, + "grad_norm": 0.44421544969475796, + "learning_rate": 9.118049891005083e-06, + "loss": 0.0528, "step": 1229 }, { - "epoch": 0.5463024650233178, - "grad_norm": 0.91837601072641, - "learning_rate": 9.959378968752371e-06, - "loss": 0.1046, + "epoch": 1.0923623445825932, + "grad_norm": 0.5075003675357505, + "learning_rate": 9.115850560118919e-06, + "loss": 0.0621, "step": 1230 }, { - "epoch": 0.5467466133688652, - "grad_norm": 0.7675630782674875, - "learning_rate": 9.959131994563795e-06, - "loss": 0.0738, + "epoch": 1.0932504440497335, + "grad_norm": 0.497956216214096, + "learning_rate": 9.113648756335265e-06, + "loss": 0.0594, "step": 1231 }, { - "epoch": 0.5471907617144126, - "grad_norm": 0.5684812066450396, - "learning_rate": 9.958884274933442e-06, - "loss": 0.0699, + "epoch": 1.0941385435168738, + "grad_norm": 0.4603671054013783, + "learning_rate": 9.111444480977011e-06, + "loss": 0.0588, "step": 1232 }, { - "epoch": 0.54763491005996, - "grad_norm": 0.6471523630810303, - "learning_rate": 9.958635809898544e-06, - "loss": 0.0713, + "epoch": 1.0950266429840143, + "grad_norm": 0.4816947592901923, + "learning_rate": 9.109237735368542e-06, + "loss": 0.0687, "step": 1233 }, { - "epoch": 0.5480790584055074, - "grad_norm": 0.6967540553492192, - "learning_rate": 9.95838659949645e-06, - "loss": 0.0776, + "epoch": 1.0959147424511546, + "grad_norm": 0.5551675769805149, + "learning_rate": 9.107028520835717e-06, + "loss": 0.0586, "step": 1234 }, { - "epoch": 0.5485232067510548, - "grad_norm": 0.6469245863206669, - "learning_rate": 9.958136643764624e-06, - "loss": 0.0764, + "epoch": 1.096802841918295, + "grad_norm": 0.47960446136116963, + "learning_rate": 9.104816838705883e-06, + "loss": 0.0678, "step": 1235 }, { - "epoch": 0.5489673550966022, - "grad_norm": 0.8210874368087515, - "learning_rate": 9.957885942740635e-06, - "loss": 0.0779, + "epoch": 1.0976909413854352, + "grad_norm": 0.42950340850108404, + "learning_rate": 9.102602690307873e-06, + "loss": 0.0509, "step": 1236 }, { - "epoch": 0.5494115034421497, - "grad_norm": 0.6891702613627725, - "learning_rate": 9.957634496462169e-06, - "loss": 0.0575, + "epoch": 1.0985790408525755, + "grad_norm": 0.43620658315770067, + "learning_rate": 9.100386076971995e-06, + "loss": 0.062, "step": 1237 }, { - "epoch": 0.5498556517876971, - "grad_norm": 0.5415934981486168, - "learning_rate": 9.957382304967024e-06, - "loss": 0.0653, + "epoch": 1.0994671403197158, + "grad_norm": 0.39365669512280954, + "learning_rate": 9.098167000030041e-06, + "loss": 0.0446, "step": 1238 }, { - "epoch": 0.5502998001332445, - "grad_norm": 0.606098328678092, - "learning_rate": 9.957129368293108e-06, - "loss": 0.0578, + "epoch": 1.1003552397868561, + "grad_norm": 0.5473374177006272, + "learning_rate": 9.095945460815285e-06, + "loss": 0.0591, "step": 1239 }, { - "epoch": 0.5507439484787919, - "grad_norm": 0.5816870040354246, - "learning_rate": 9.95687568647844e-06, - "loss": 0.0543, + "epoch": 1.1012433392539964, + "grad_norm": 0.49635199912421746, + "learning_rate": 9.09372146066248e-06, + "loss": 0.0748, "step": 1240 }, { - "epoch": 0.5511880968243393, - "grad_norm": 0.7853352727315064, - "learning_rate": 9.956621259561152e-06, - "loss": 0.0731, + "epoch": 1.1021314387211367, + "grad_norm": 0.4630743245346236, + "learning_rate": 9.09149500090785e-06, + "loss": 0.0705, "step": 1241 }, { - "epoch": 0.5516322451698867, - "grad_norm": 0.6165368112917523, - "learning_rate": 9.956366087579492e-06, - "loss": 0.0688, + "epoch": 1.103019538188277, + "grad_norm": 0.5551642930392348, + "learning_rate": 9.089266082889113e-06, + "loss": 0.0641, "step": 1242 }, { - "epoch": 0.5520763935154341, - "grad_norm": 0.8107389760561104, - "learning_rate": 9.956110170571816e-06, - "loss": 0.0823, + "epoch": 1.1039076376554173, + "grad_norm": 0.5741357075684433, + "learning_rate": 9.087034707945444e-06, + "loss": 0.0606, "step": 1243 }, { - "epoch": 0.5525205418609815, - "grad_norm": 0.6920576051280234, - "learning_rate": 9.95585350857659e-06, - "loss": 0.0661, + "epoch": 1.1047957371225576, + "grad_norm": 0.5578487348310134, + "learning_rate": 9.084800877417513e-06, + "loss": 0.0605, "step": 1244 }, { - "epoch": 0.5529646902065289, - "grad_norm": 0.6032293972841873, - "learning_rate": 9.9555961016324e-06, - "loss": 0.0629, + "epoch": 1.105683836589698, + "grad_norm": 0.5352395623472251, + "learning_rate": 9.082564592647449e-06, + "loss": 0.051, "step": 1245 }, { - "epoch": 0.5534088385520763, - "grad_norm": 0.7423430185463719, - "learning_rate": 9.955337949777931e-06, - "loss": 0.0781, + "epoch": 1.1065719360568385, + "grad_norm": 0.6366168566998481, + "learning_rate": 9.080325854978869e-06, + "loss": 0.0703, "step": 1246 }, { - "epoch": 0.5538529868976239, - "grad_norm": 0.7004081423242674, - "learning_rate": 9.955079053051992e-06, - "loss": 0.0695, + "epoch": 1.1074600355239788, + "grad_norm": 0.48856849823475945, + "learning_rate": 9.078084665756856e-06, + "loss": 0.0553, "step": 1247 }, { - "epoch": 0.5542971352431713, - "grad_norm": 0.6468930634930644, - "learning_rate": 9.9548194114935e-06, - "loss": 0.0549, + "epoch": 1.108348134991119, + "grad_norm": 0.5363313823992678, + "learning_rate": 9.075841026327967e-06, + "loss": 0.0654, "step": 1248 }, { - "epoch": 0.5547412835887187, - "grad_norm": 0.4810505823476616, - "learning_rate": 9.954559025141484e-06, - "loss": 0.0514, + "epoch": 1.1092362344582594, + "grad_norm": 0.6250870230475393, + "learning_rate": 9.073594938040231e-06, + "loss": 0.0719, "step": 1249 }, { - "epoch": 0.5551854319342661, - "grad_norm": 0.802783907262409, - "learning_rate": 9.95429789403508e-06, - "loss": 0.1085, + "epoch": 1.1101243339253997, + "grad_norm": 0.42805727224477097, + "learning_rate": 9.071346402243155e-06, + "loss": 0.06, "step": 1250 }, { - "epoch": 0.5556295802798135, - "grad_norm": 0.5353100797059109, - "learning_rate": 9.954036018213548e-06, - "loss": 0.0448, + "epoch": 1.11101243339254, + "grad_norm": 0.564525499866512, + "learning_rate": 9.069095420287705e-06, + "loss": 0.0699, "step": 1251 }, { - "epoch": 0.5560737286253609, - "grad_norm": 0.5946904373037035, - "learning_rate": 9.953773397716247e-06, - "loss": 0.0724, + "epoch": 1.1119005328596803, + "grad_norm": 0.42065248951773765, + "learning_rate": 9.066841993526325e-06, + "loss": 0.0596, "step": 1252 }, { - "epoch": 0.5565178769709083, - "grad_norm": 1.0490812743358358, - "learning_rate": 9.953510032582652e-06, - "loss": 0.0675, + "epoch": 1.1127886323268206, + "grad_norm": 0.5328604744333013, + "learning_rate": 9.064586123312926e-06, + "loss": 0.0578, "step": 1253 }, { - "epoch": 0.5569620253164557, - "grad_norm": 0.5087723175679687, - "learning_rate": 9.953245922852355e-06, - "loss": 0.0599, + "epoch": 1.1136767317939609, + "grad_norm": 0.35638540080650655, + "learning_rate": 9.062327811002885e-06, + "loss": 0.0544, "step": 1254 }, { - "epoch": 0.5574061736620031, - "grad_norm": 0.5243033641589454, - "learning_rate": 9.952981068565055e-06, - "loss": 0.0569, + "epoch": 1.1145648312611012, + "grad_norm": 0.4377613323303497, + "learning_rate": 9.060067057953049e-06, + "loss": 0.0567, "step": 1255 }, { - "epoch": 0.5578503220075505, - "grad_norm": 0.7607475927289213, - "learning_rate": 9.952715469760566e-06, - "loss": 0.0775, + "epoch": 1.1154529307282415, + "grad_norm": 0.5594310008177616, + "learning_rate": 9.05780386552173e-06, + "loss": 0.0705, "step": 1256 }, { - "epoch": 0.558294470353098, - "grad_norm": 0.8559494882081514, - "learning_rate": 9.952449126478808e-06, - "loss": 0.0866, + "epoch": 1.116341030195382, + "grad_norm": 0.5130264103556861, + "learning_rate": 9.055538235068706e-06, + "loss": 0.0437, "step": 1257 }, { - "epoch": 0.5587386186986454, - "grad_norm": 0.7656790089546245, - "learning_rate": 9.952182038759818e-06, - "loss": 0.0722, + "epoch": 1.1172291296625223, + "grad_norm": 0.43041305807536706, + "learning_rate": 9.05327016795522e-06, + "loss": 0.0498, "step": 1258 }, { - "epoch": 0.5591827670441928, - "grad_norm": 0.5660284655001593, - "learning_rate": 9.951914206643744e-06, - "loss": 0.0564, + "epoch": 1.1181172291296626, + "grad_norm": 0.45207274454326535, + "learning_rate": 9.05099966554398e-06, + "loss": 0.0462, "step": 1259 }, { - "epoch": 0.5596269153897402, - "grad_norm": 0.5835362013761695, - "learning_rate": 9.95164563017085e-06, - "loss": 0.0788, + "epoch": 1.119005328596803, + "grad_norm": 0.41480525694898, + "learning_rate": 9.048726729199153e-06, + "loss": 0.0612, "step": 1260 }, { - "epoch": 0.5600710637352876, - "grad_norm": 0.6769019916334489, - "learning_rate": 9.951376309381502e-06, - "loss": 0.0594, + "epoch": 1.1198934280639432, + "grad_norm": 0.36469040454064994, + "learning_rate": 9.046451360286372e-06, + "loss": 0.0455, "step": 1261 }, { - "epoch": 0.560515212080835, - "grad_norm": 0.5253934078988655, - "learning_rate": 9.951106244316184e-06, - "loss": 0.053, + "epoch": 1.1207815275310835, + "grad_norm": 0.6199125426861651, + "learning_rate": 9.044173560172734e-06, + "loss": 0.0775, "step": 1262 }, { - "epoch": 0.5609593604263824, - "grad_norm": 0.7300022918221285, - "learning_rate": 9.950835435015495e-06, - "loss": 0.0651, + "epoch": 1.1216696269982238, + "grad_norm": 0.5056571834693232, + "learning_rate": 9.04189333022679e-06, + "loss": 0.0571, "step": 1263 }, { - "epoch": 0.5614035087719298, - "grad_norm": 0.5278089608881616, - "learning_rate": 9.95056388152014e-06, - "loss": 0.0555, + "epoch": 1.1225577264653641, + "grad_norm": 0.4087481618230986, + "learning_rate": 9.039610671818557e-06, + "loss": 0.0568, "step": 1264 }, { - "epoch": 0.5618476571174772, - "grad_norm": 0.6301552291199896, - "learning_rate": 9.950291583870938e-06, - "loss": 0.0862, + "epoch": 1.1234458259325044, + "grad_norm": 0.3487200395719137, + "learning_rate": 9.037325586319507e-06, + "loss": 0.0527, "step": 1265 }, { - "epoch": 0.5622918054630246, - "grad_norm": 0.7781648389786646, - "learning_rate": 9.950018542108818e-06, - "loss": 0.0699, + "epoch": 1.1243339253996447, + "grad_norm": 0.3772716895620028, + "learning_rate": 9.03503807510257e-06, + "loss": 0.0535, "step": 1266 }, { - "epoch": 0.5627359538085721, - "grad_norm": 0.6889192633019509, - "learning_rate": 9.949744756274828e-06, - "loss": 0.059, + "epoch": 1.125222024866785, + "grad_norm": 0.42781524365718604, + "learning_rate": 9.032748139542143e-06, + "loss": 0.0497, "step": 1267 }, { - "epoch": 0.5631801021541195, - "grad_norm": 0.84114983491113, - "learning_rate": 9.94947022641012e-06, - "loss": 0.0855, + "epoch": 1.1261101243339253, + "grad_norm": 0.40021805208371675, + "learning_rate": 9.030455781014062e-06, + "loss": 0.0566, "step": 1268 }, { - "epoch": 0.5636242504996669, - "grad_norm": 0.6070250007384902, - "learning_rate": 9.949194952555958e-06, - "loss": 0.0635, + "epoch": 1.1269982238010656, + "grad_norm": 0.42462869175507223, + "learning_rate": 9.028161000895633e-06, + "loss": 0.0554, "step": 1269 }, { - "epoch": 0.5640683988452143, - "grad_norm": 0.8282717974485503, - "learning_rate": 9.948918934753724e-06, - "loss": 0.0606, + "epoch": 1.1278863232682061, + "grad_norm": 0.7907863161810198, + "learning_rate": 9.025863800565614e-06, + "loss": 0.0632, "step": 1270 }, { - "epoch": 0.5645125471907617, - "grad_norm": 0.8327690010964344, - "learning_rate": 9.948642173044906e-06, - "loss": 0.0617, + "epoch": 1.1287744227353464, + "grad_norm": 0.5600540481669183, + "learning_rate": 9.023564181404214e-06, + "loss": 0.0683, "step": 1271 }, { - "epoch": 0.5649566955363091, - "grad_norm": 0.6842942727397727, - "learning_rate": 9.948364667471106e-06, - "loss": 0.0795, + "epoch": 1.1296625222024868, + "grad_norm": 0.46520643754289215, + "learning_rate": 9.021262144793097e-06, + "loss": 0.0556, "step": 1272 }, { - "epoch": 0.5654008438818565, - "grad_norm": 0.5302443265967214, - "learning_rate": 9.94808641807404e-06, - "loss": 0.0512, + "epoch": 1.130550621669627, + "grad_norm": 0.3557023137760619, + "learning_rate": 9.01895769211538e-06, + "loss": 0.0433, "step": 1273 }, { - "epoch": 0.5658449922274039, - "grad_norm": 0.6110171632056333, - "learning_rate": 9.94780742489553e-06, - "loss": 0.0574, + "epoch": 1.1314387211367674, + "grad_norm": 0.44882962300109963, + "learning_rate": 9.016650824755631e-06, + "loss": 0.0509, "step": 1274 }, { - "epoch": 0.5662891405729513, - "grad_norm": 0.5491078913412093, - "learning_rate": 9.947527687977519e-06, - "loss": 0.0512, + "epoch": 1.1323268206039077, + "grad_norm": 0.4700799341239974, + "learning_rate": 9.014341544099867e-06, + "loss": 0.0548, "step": 1275 }, { - "epoch": 0.5667332889184987, - "grad_norm": 0.6269674055110014, - "learning_rate": 9.94724720736205e-06, - "loss": 0.074, + "epoch": 1.133214920071048, + "grad_norm": 0.6097884107024176, + "learning_rate": 9.01202985153556e-06, + "loss": 0.0604, "step": 1276 }, { - "epoch": 0.5671774372640462, - "grad_norm": 0.7276246324646, - "learning_rate": 9.946965983091286e-06, - "loss": 0.1055, + "epoch": 1.1341030195381883, + "grad_norm": 0.5302403802978701, + "learning_rate": 9.009715748451625e-06, + "loss": 0.0453, "step": 1277 }, { - "epoch": 0.5676215856095936, - "grad_norm": 0.7755784175713893, - "learning_rate": 9.946684015207501e-06, - "loss": 0.0878, + "epoch": 1.1349911190053286, + "grad_norm": 0.4405789861583618, + "learning_rate": 9.007399236238431e-06, + "loss": 0.0526, "step": 1278 }, { - "epoch": 0.568065733955141, - "grad_norm": 0.6682673092927641, - "learning_rate": 9.94640130375308e-06, - "loss": 0.0985, + "epoch": 1.1358792184724689, + "grad_norm": 0.4426044799205729, + "learning_rate": 9.00508031628779e-06, + "loss": 0.0485, "step": 1279 }, { - "epoch": 0.5685098823006884, - "grad_norm": 0.71248575096518, - "learning_rate": 9.946117848770518e-06, - "loss": 0.0546, + "epoch": 1.1367673179396092, + "grad_norm": 0.41768050019952824, + "learning_rate": 9.00275898999296e-06, + "loss": 0.0467, "step": 1280 }, { - "epoch": 0.5689540306462358, - "grad_norm": 0.6304803324875314, - "learning_rate": 9.945833650302423e-06, - "loss": 0.0645, + "epoch": 1.1376554174067495, + "grad_norm": 0.5378318629821901, + "learning_rate": 9.000435258748654e-06, + "loss": 0.0644, "step": 1281 }, { - "epoch": 0.5693981789917832, - "grad_norm": 0.6709593851017193, - "learning_rate": 9.945548708391517e-06, - "loss": 0.0711, + "epoch": 1.1385435168738898, + "grad_norm": 0.40617647077937363, + "learning_rate": 8.998109123951018e-06, + "loss": 0.0518, "step": 1282 }, { - "epoch": 0.5698423273373306, - "grad_norm": 0.6036373550762799, - "learning_rate": 9.94526302308063e-06, - "loss": 0.0766, + "epoch": 1.1394316163410303, + "grad_norm": 0.4721038219531133, + "learning_rate": 8.995780586997647e-06, + "loss": 0.0666, "step": 1283 }, { - "epoch": 0.570286475682878, - "grad_norm": 0.6055441845545156, - "learning_rate": 9.944976594412702e-06, - "loss": 0.0663, + "epoch": 1.1403197158081706, + "grad_norm": 0.5340266166884385, + "learning_rate": 8.99344964928758e-06, + "loss": 0.0523, "step": 1284 }, { - "epoch": 0.5707306240284254, - "grad_norm": 1.1523396950548679, - "learning_rate": 9.944689422430794e-06, - "loss": 0.0876, + "epoch": 1.141207815275311, + "grad_norm": 0.3706596577324069, + "learning_rate": 8.9911163122213e-06, + "loss": 0.0502, "step": 1285 }, { - "epoch": 0.571174772373973, - "grad_norm": 0.8498417793676747, - "learning_rate": 9.94440150717807e-06, - "loss": 0.0764, + "epoch": 1.1420959147424512, + "grad_norm": 0.5716931124223276, + "learning_rate": 8.988780577200725e-06, + "loss": 0.0723, "step": 1286 }, { - "epoch": 0.5716189207195204, - "grad_norm": 0.5983413012513809, - "learning_rate": 9.944112848697809e-06, + "epoch": 1.1429840142095915, + "grad_norm": 0.4432282087167379, + "learning_rate": 8.986442445629223e-06, "loss": 0.0564, "step": 1287 }, { - "epoch": 0.5720630690650678, - "grad_norm": 0.7260849111122668, - "learning_rate": 9.9438234470334e-06, - "loss": 0.0728, + "epoch": 1.1438721136767318, + "grad_norm": 0.5446664959084349, + "learning_rate": 8.984101918911596e-06, + "loss": 0.0709, "step": 1288 }, { - "epoch": 0.5725072174106152, - "grad_norm": 1.0050849305541534, - "learning_rate": 9.943533302228346e-06, - "loss": 0.0711, + "epoch": 1.144760213143872, + "grad_norm": 0.4609866760289082, + "learning_rate": 8.981758998454084e-06, + "loss": 0.068, "step": 1289 }, { - "epoch": 0.5729513657561626, - "grad_norm": 0.800396030085453, - "learning_rate": 9.943242414326263e-06, - "loss": 0.0724, + "epoch": 1.1456483126110124, + "grad_norm": 0.45249331321924663, + "learning_rate": 8.979413685664368e-06, + "loss": 0.0564, "step": 1290 }, { - "epoch": 0.57339551410171, - "grad_norm": 0.9224090719942251, - "learning_rate": 9.94295078337087e-06, - "loss": 0.1002, + "epoch": 1.1465364120781527, + "grad_norm": 0.3810603314693275, + "learning_rate": 8.977065981951567e-06, + "loss": 0.0476, "step": 1291 }, { - "epoch": 0.5738396624472574, - "grad_norm": 0.8382142062568964, - "learning_rate": 9.942658409406012e-06, - "loss": 0.0756, + "epoch": 1.147424511545293, + "grad_norm": 0.44008903615910266, + "learning_rate": 8.974715888726237e-06, + "loss": 0.0671, "step": 1292 }, { - "epoch": 0.5742838107928048, - "grad_norm": 0.5876570577747893, - "learning_rate": 9.942365292475632e-06, - "loss": 0.0605, + "epoch": 1.1483126110124333, + "grad_norm": 0.4057835303831334, + "learning_rate": 8.972363407400363e-06, + "loss": 0.0526, "step": 1293 }, { - "epoch": 0.5747279591383522, - "grad_norm": 0.9775545409759507, - "learning_rate": 9.942071432623794e-06, - "loss": 0.0786, + "epoch": 1.1492007104795736, + "grad_norm": 0.42891192395424976, + "learning_rate": 8.970008539387373e-06, + "loss": 0.0502, "step": 1294 }, { - "epoch": 0.5751721074838996, - "grad_norm": 0.6598504226159471, - "learning_rate": 9.941776829894667e-06, - "loss": 0.0847, + "epoch": 1.150088809946714, + "grad_norm": 0.48816395571984783, + "learning_rate": 8.967651286102125e-06, + "loss": 0.0717, "step": 1295 }, { - "epoch": 0.5756162558294471, - "grad_norm": 0.7984485905609663, - "learning_rate": 9.941481484332537e-06, - "loss": 0.0699, + "epoch": 1.1509769094138544, + "grad_norm": 0.40035060600918576, + "learning_rate": 8.965291648960914e-06, + "loss": 0.0623, "step": 1296 }, { - "epoch": 0.5760604041749945, - "grad_norm": 0.6762263367814108, - "learning_rate": 9.941185395981799e-06, - "loss": 0.0786, + "epoch": 1.1518650088809947, + "grad_norm": 0.3497708657532295, + "learning_rate": 8.962929629381458e-06, + "loss": 0.0395, "step": 1297 }, { - "epoch": 0.5765045525205419, - "grad_norm": 0.7731297634993985, - "learning_rate": 9.940888564886959e-06, - "loss": 0.0673, + "epoch": 1.152753108348135, + "grad_norm": 0.6099565604093532, + "learning_rate": 8.960565228782918e-06, + "loss": 0.059, "step": 1298 }, { - "epoch": 0.5769487008660893, - "grad_norm": 0.919743609676064, - "learning_rate": 9.940590991092639e-06, - "loss": 0.065, + "epoch": 1.1536412078152753, + "grad_norm": 0.38396000043396333, + "learning_rate": 8.958198448585877e-06, + "loss": 0.0494, "step": 1299 }, { - "epoch": 0.5773928492116367, - "grad_norm": 0.6382780908912015, - "learning_rate": 9.940292674643564e-06, - "loss": 0.0578, + "epoch": 1.1545293072824157, + "grad_norm": 0.3689900063182137, + "learning_rate": 8.955829290212352e-06, + "loss": 0.0459, "step": 1300 }, { - "epoch": 0.5778369975571841, - "grad_norm": 0.6620249742612588, - "learning_rate": 9.93999361558458e-06, - "loss": 0.0635, + "epoch": 1.155417406749556, + "grad_norm": 1.096067697838026, + "learning_rate": 8.953457755085788e-06, + "loss": 0.0606, "step": 1301 }, { - "epoch": 0.5782811459027315, - "grad_norm": 0.7452135944776405, - "learning_rate": 9.93969381396064e-06, - "loss": 0.0806, + "epoch": 1.1563055062166963, + "grad_norm": 0.42149161110941696, + "learning_rate": 8.951083844631056e-06, + "loss": 0.0515, "step": 1302 }, { - "epoch": 0.5787252942482789, - "grad_norm": 0.6403437438856001, - "learning_rate": 9.93939326981681e-06, - "loss": 0.0687, + "epoch": 1.1571936056838366, + "grad_norm": 0.4316581327135203, + "learning_rate": 8.948707560274458e-06, + "loss": 0.061, "step": 1303 }, { - "epoch": 0.5791694425938263, - "grad_norm": 1.1014917271306792, - "learning_rate": 9.939091983198266e-06, - "loss": 0.098, + "epoch": 1.1580817051509769, + "grad_norm": 0.6841774110734303, + "learning_rate": 8.946328903443721e-06, + "loss": 0.0646, "step": 1304 }, { - "epoch": 0.5796135909393737, - "grad_norm": 1.0225638964559536, - "learning_rate": 9.938789954150296e-06, - "loss": 0.0644, + "epoch": 1.1589698046181172, + "grad_norm": 0.5965702797799218, + "learning_rate": 8.943947875567993e-06, + "loss": 0.0555, "step": 1305 }, { - "epoch": 0.5800577392849212, - "grad_norm": 0.637383310530645, - "learning_rate": 9.9384871827183e-06, - "loss": 0.06, + "epoch": 1.1598579040852575, + "grad_norm": 1.0044954129750756, + "learning_rate": 8.941564478077853e-06, + "loss": 0.0632, "step": 1306 }, { - "epoch": 0.5805018876304686, - "grad_norm": 0.6546930860338038, - "learning_rate": 9.93818366894779e-06, - "loss": 0.0681, + "epoch": 1.160746003552398, + "grad_norm": 0.6131401211707806, + "learning_rate": 8.9391787124053e-06, + "loss": 0.0677, "step": 1307 }, { - "epoch": 0.580946035976016, - "grad_norm": 0.5341000978856885, - "learning_rate": 9.93787941288439e-06, - "loss": 0.0589, + "epoch": 1.161634103019538, + "grad_norm": 0.48062753599742797, + "learning_rate": 8.936790579983759e-06, + "loss": 0.0529, "step": 1308 }, { - "epoch": 0.5813901843215634, - "grad_norm": 0.5540761577564739, - "learning_rate": 9.937574414573834e-06, - "loss": 0.07, + "epoch": 1.1625222024866786, + "grad_norm": 0.5714788031695348, + "learning_rate": 8.93440008224807e-06, + "loss": 0.054, "step": 1309 }, { - "epoch": 0.5818343326671108, - "grad_norm": 0.6399098162524178, - "learning_rate": 9.937268674061968e-06, - "loss": 0.0681, + "epoch": 1.163410301953819, + "grad_norm": 0.5084421088016741, + "learning_rate": 8.932007220634504e-06, + "loss": 0.0657, "step": 1310 }, { - "epoch": 0.5822784810126582, - "grad_norm": 0.9788827379867293, - "learning_rate": 9.936962191394753e-06, - "loss": 0.0775, + "epoch": 1.1642984014209592, + "grad_norm": 0.43120545860433124, + "learning_rate": 8.929611996580743e-06, + "loss": 0.0649, "step": 1311 }, { - "epoch": 0.5827226293582056, - "grad_norm": 0.5825969221377589, - "learning_rate": 9.936654966618255e-06, - "loss": 0.0662, + "epoch": 1.1651865008880995, + "grad_norm": 0.6763349385926654, + "learning_rate": 8.927214411525895e-06, + "loss": 0.0591, "step": 1312 }, { - "epoch": 0.583166777703753, - "grad_norm": 0.6990166610409554, - "learning_rate": 9.936346999778657e-06, - "loss": 0.0748, + "epoch": 1.1660746003552398, + "grad_norm": 0.46637995940230653, + "learning_rate": 8.924814466910483e-06, + "loss": 0.0536, "step": 1313 }, { - "epoch": 0.5836109260493004, - "grad_norm": 0.7726036624010885, - "learning_rate": 9.93603829092225e-06, - "loss": 0.0806, + "epoch": 1.16696269982238, + "grad_norm": 0.7188377730906739, + "learning_rate": 8.922412164176451e-06, + "loss": 0.062, "step": 1314 }, { - "epoch": 0.5840550743948478, - "grad_norm": 0.5892730393957506, - "learning_rate": 9.93572884009544e-06, - "loss": 0.0687, + "epoch": 1.1678507992895204, + "grad_norm": 0.6445433237920782, + "learning_rate": 8.920007504767154e-06, + "loss": 0.0638, "step": 1315 }, { - "epoch": 0.5844992227403953, - "grad_norm": 0.7324361900970796, - "learning_rate": 9.935418647344741e-06, - "loss": 0.0722, + "epoch": 1.1687388987566607, + "grad_norm": 0.6179906421694086, + "learning_rate": 8.917600490127369e-06, + "loss": 0.0652, "step": 1316 }, { - "epoch": 0.5849433710859427, - "grad_norm": 0.8954678705930711, - "learning_rate": 9.935107712716781e-06, - "loss": 0.0829, + "epoch": 1.169626998223801, + "grad_norm": 0.46166867855276844, + "learning_rate": 8.915191121703286e-06, + "loss": 0.0619, "step": 1317 }, { - "epoch": 0.5853875194314901, - "grad_norm": 0.5986143926701439, - "learning_rate": 9.9347960362583e-06, - "loss": 0.068, + "epoch": 1.1705150976909413, + "grad_norm": 0.6636117139219818, + "learning_rate": 8.912779400942508e-06, + "loss": 0.0751, "step": 1318 }, { - "epoch": 0.5858316677770375, - "grad_norm": 0.6105531585151114, - "learning_rate": 9.934483618016148e-06, - "loss": 0.0719, + "epoch": 1.1714031971580816, + "grad_norm": 0.5545918844554153, + "learning_rate": 8.910365329294053e-06, + "loss": 0.0522, "step": 1319 }, { - "epoch": 0.586275816122585, - "grad_norm": 0.7814579585476528, - "learning_rate": 9.934170458037285e-06, - "loss": 0.0899, + "epoch": 1.1722912966252221, + "grad_norm": 0.5529232213705042, + "learning_rate": 8.907948908208348e-06, + "loss": 0.0542, "step": 1320 }, { - "epoch": 0.5867199644681323, - "grad_norm": 0.6130620918486331, - "learning_rate": 9.933856556368785e-06, - "loss": 0.0663, + "epoch": 1.1731793960923624, + "grad_norm": 0.5572245790921896, + "learning_rate": 8.905530139137237e-06, + "loss": 0.0563, "step": 1321 }, { - "epoch": 0.5871641128136798, - "grad_norm": 0.7911643262947309, - "learning_rate": 9.933541913057833e-06, - "loss": 0.0782, + "epoch": 1.1740674955595027, + "grad_norm": 0.5234606984843095, + "learning_rate": 8.903109023533973e-06, + "loss": 0.0504, "step": 1322 }, { - "epoch": 0.5876082611592272, - "grad_norm": 0.7091228860294103, - "learning_rate": 9.933226528151725e-06, - "loss": 0.0637, + "epoch": 1.174955595026643, + "grad_norm": 0.5065711780095631, + "learning_rate": 8.900685562853214e-06, + "loss": 0.056, "step": 1323 }, { - "epoch": 0.5880524095047746, - "grad_norm": 0.7999422262730185, - "learning_rate": 9.93291040169787e-06, - "loss": 0.0616, + "epoch": 1.1758436944937833, + "grad_norm": 0.5187604950510653, + "learning_rate": 8.898259758551034e-06, + "loss": 0.0613, "step": 1324 }, { - "epoch": 0.588496557850322, - "grad_norm": 0.7189040382281308, - "learning_rate": 9.932593533743786e-06, - "loss": 0.0602, + "epoch": 1.1767317939609236, + "grad_norm": 0.5327828015491809, + "learning_rate": 8.895831612084909e-06, + "loss": 0.0678, "step": 1325 }, { - "epoch": 0.5889407061958695, - "grad_norm": 0.5837515830643734, - "learning_rate": 9.932275924337104e-06, - "loss": 0.0586, + "epoch": 1.177619893428064, + "grad_norm": 0.39890964975590437, + "learning_rate": 8.893401124913727e-06, + "loss": 0.0495, "step": 1326 }, { - "epoch": 0.5893848545414169, - "grad_norm": 0.8281667273984812, - "learning_rate": 9.931957573525566e-06, - "loss": 0.0648, + "epoch": 1.1785079928952042, + "grad_norm": 0.5199797120399963, + "learning_rate": 8.890968298497783e-06, + "loss": 0.0657, "step": 1327 }, { - "epoch": 0.5898290028869643, - "grad_norm": 0.5286485536715172, - "learning_rate": 9.931638481357024e-06, - "loss": 0.0531, + "epoch": 1.1793960923623446, + "grad_norm": 0.5754922883095701, + "learning_rate": 8.88853313429877e-06, + "loss": 0.063, "step": 1328 }, { - "epoch": 0.5902731512325117, - "grad_norm": 0.6275875445640038, - "learning_rate": 9.931318647879445e-06, - "loss": 0.064, + "epoch": 1.1802841918294849, + "grad_norm": 0.4571192744782011, + "learning_rate": 8.886095633779791e-06, + "loss": 0.0461, "step": 1329 }, { - "epoch": 0.5907172995780591, - "grad_norm": 0.8183874857273685, - "learning_rate": 9.930998073140905e-06, - "loss": 0.1023, + "epoch": 1.1811722912966252, + "grad_norm": 0.4329356837295055, + "learning_rate": 8.883655798405358e-06, + "loss": 0.0464, "step": 1330 }, { - "epoch": 0.5911614479236065, - "grad_norm": 0.6557543608908961, - "learning_rate": 9.93067675718959e-06, - "loss": 0.0591, + "epoch": 1.1820603907637655, + "grad_norm": 0.44880379408517723, + "learning_rate": 8.881213629641375e-06, + "loss": 0.0498, "step": 1331 }, { - "epoch": 0.5916055962691539, - "grad_norm": 0.7946472702361359, - "learning_rate": 9.930354700073803e-06, - "loss": 0.0783, + "epoch": 1.1829484902309058, + "grad_norm": 0.6826846866890449, + "learning_rate": 8.878769128955153e-06, + "loss": 0.0674, "step": 1332 }, { - "epoch": 0.5920497446147013, - "grad_norm": 0.4755584849969953, - "learning_rate": 9.930031901841952e-06, - "loss": 0.065, + "epoch": 1.1838365896980463, + "grad_norm": 0.47443992499617, + "learning_rate": 8.876322297815406e-06, + "loss": 0.0581, "step": 1333 }, { - "epoch": 0.5924938929602487, - "grad_norm": 0.6292133058734393, - "learning_rate": 9.929708362542559e-06, - "loss": 0.065, + "epoch": 1.1847246891651866, + "grad_norm": 0.4539546989361826, + "learning_rate": 8.873873137692245e-06, + "loss": 0.0563, "step": 1334 }, { - "epoch": 0.5929380413057961, - "grad_norm": 0.7194753693955848, - "learning_rate": 9.929384082224258e-06, - "loss": 0.0649, + "epoch": 1.1856127886323269, + "grad_norm": 0.5998094035285374, + "learning_rate": 8.871421650057184e-06, + "loss": 0.0517, "step": 1335 }, { - "epoch": 0.5933821896513436, - "grad_norm": 0.8182929882444262, - "learning_rate": 9.929059060935795e-06, - "loss": 0.0735, + "epoch": 1.1865008880994672, + "grad_norm": 0.49449962599266933, + "learning_rate": 8.868967836383128e-06, + "loss": 0.0562, "step": 1336 }, { - "epoch": 0.593826337996891, - "grad_norm": 0.6452166028375979, - "learning_rate": 9.928733298726024e-06, - "loss": 0.0773, + "epoch": 1.1873889875666075, + "grad_norm": 0.5202566548733357, + "learning_rate": 8.866511698144392e-06, + "loss": 0.0589, "step": 1337 }, { - "epoch": 0.5942704863424384, - "grad_norm": 0.6174785647804547, - "learning_rate": 9.928406795643913e-06, - "loss": 0.088, + "epoch": 1.1882770870337478, + "grad_norm": 0.4525532415700337, + "learning_rate": 8.864053236816675e-06, + "loss": 0.0513, "step": 1338 }, { - "epoch": 0.5947146346879858, - "grad_norm": 0.9075257756258466, - "learning_rate": 9.928079551738542e-06, - "loss": 0.0966, + "epoch": 1.189165186500888, + "grad_norm": 0.49480944203176547, + "learning_rate": 8.861592453877077e-06, + "loss": 0.0522, "step": 1339 }, { - "epoch": 0.5951587830335332, - "grad_norm": 0.7930757535133779, - "learning_rate": 9.927751567059103e-06, - "loss": 0.0788, + "epoch": 1.1900532859680284, + "grad_norm": 0.519556670613474, + "learning_rate": 8.859129350804095e-06, + "loss": 0.0628, "step": 1340 }, { - "epoch": 0.5956029313790806, - "grad_norm": 0.7918355971015104, - "learning_rate": 9.927422841654894e-06, - "loss": 0.0732, + "epoch": 1.1909413854351687, + "grad_norm": 0.4581631626495746, + "learning_rate": 8.856663929077615e-06, + "loss": 0.0597, "step": 1341 }, { - "epoch": 0.596047079724628, - "grad_norm": 0.5344795042621896, - "learning_rate": 9.92709337557533e-06, - "loss": 0.0555, + "epoch": 1.191829484902309, + "grad_norm": 0.581342185965847, + "learning_rate": 8.854196190178922e-06, + "loss": 0.0512, "step": 1342 }, { - "epoch": 0.5964912280701754, - "grad_norm": 0.7151020498309698, - "learning_rate": 9.926763168869935e-06, - "loss": 0.0699, + "epoch": 1.1927175843694493, + "grad_norm": 0.45221328804309713, + "learning_rate": 8.85172613559069e-06, + "loss": 0.0581, "step": 1343 }, { - "epoch": 0.5969353764157228, - "grad_norm": 0.8228281453238034, - "learning_rate": 9.926432221588342e-06, - "loss": 0.0723, + "epoch": 1.1936056838365896, + "grad_norm": 0.45561203604976397, + "learning_rate": 8.849253766796982e-06, + "loss": 0.0539, "step": 1344 }, { - "epoch": 0.5973795247612703, - "grad_norm": 0.6976750613205994, - "learning_rate": 9.926100533780304e-06, - "loss": 0.0877, + "epoch": 1.19449378330373, + "grad_norm": 0.3953246995448872, + "learning_rate": 8.846779085283255e-06, + "loss": 0.0554, "step": 1345 }, { - "epoch": 0.5978236731068177, - "grad_norm": 0.8796022908023586, - "learning_rate": 9.925768105495675e-06, - "loss": 0.0867, + "epoch": 1.1953818827708704, + "grad_norm": 0.5479191632194884, + "learning_rate": 8.844302092536357e-06, + "loss": 0.0632, "step": 1346 }, { - "epoch": 0.5982678214523651, - "grad_norm": 0.6214669294617633, - "learning_rate": 9.925434936784426e-06, - "loss": 0.0724, + "epoch": 1.1962699822380107, + "grad_norm": 0.5165362440829403, + "learning_rate": 8.841822790044519e-06, + "loss": 0.0738, "step": 1347 }, { - "epoch": 0.5987119697979125, - "grad_norm": 0.5493814047261378, - "learning_rate": 9.925101027696636e-06, - "loss": 0.0546, + "epoch": 1.197158081705151, + "grad_norm": 0.45246299380103777, + "learning_rate": 8.839341179297363e-06, + "loss": 0.0509, "step": 1348 }, { - "epoch": 0.5991561181434599, - "grad_norm": 0.6738873954295155, - "learning_rate": 9.924766378282499e-06, - "loss": 0.068, + "epoch": 1.1980461811722913, + "grad_norm": 0.4520530861715212, + "learning_rate": 8.8368572617859e-06, + "loss": 0.0604, "step": 1349 }, { - "epoch": 0.5996002664890073, - "grad_norm": 0.662002450028893, - "learning_rate": 9.92443098859232e-06, - "loss": 0.0618, + "epoch": 1.1989342806394316, + "grad_norm": 0.4315426009039334, + "learning_rate": 8.834371039002523e-06, + "loss": 0.0583, "step": 1350 }, { - "epoch": 0.6000444148345547, - "grad_norm": 0.5691844703394149, - "learning_rate": 9.92409485867651e-06, - "loss": 0.0675, + "epoch": 1.199822380106572, + "grad_norm": 0.58482645144901, + "learning_rate": 8.83188251244101e-06, + "loss": 0.061, "step": 1351 }, { - "epoch": 0.6004885631801021, - "grad_norm": 0.5107643154244839, - "learning_rate": 9.923757988585599e-06, - "loss": 0.0582, + "epoch": 1.2007104795737122, + "grad_norm": 0.5362274737959619, + "learning_rate": 8.829391683596528e-06, + "loss": 0.0635, "step": 1352 }, { - "epoch": 0.6009327115256495, - "grad_norm": 0.4892180420795259, - "learning_rate": 9.923420378370221e-06, - "loss": 0.0707, + "epoch": 1.2015985790408525, + "grad_norm": 0.3838263244907273, + "learning_rate": 8.826898553965624e-06, + "loss": 0.0539, "step": 1353 }, { - "epoch": 0.6013768598711969, - "grad_norm": 0.5128501684775312, - "learning_rate": 9.923082028081125e-06, - "loss": 0.0515, + "epoch": 1.2024866785079928, + "grad_norm": 0.6316574808669704, + "learning_rate": 8.824403125046225e-06, + "loss": 0.0652, "step": 1354 }, { - "epoch": 0.6018210082167444, - "grad_norm": 0.6712578771133637, - "learning_rate": 9.922742937769172e-06, - "loss": 0.0668, + "epoch": 1.2033747779751331, + "grad_norm": 0.43602766988187674, + "learning_rate": 8.821905398337645e-06, + "loss": 0.0528, "step": 1355 }, { - "epoch": 0.6022651565622918, - "grad_norm": 0.8489596826538364, - "learning_rate": 9.922403107485335e-06, - "loss": 0.0852, + "epoch": 1.2042628774422734, + "grad_norm": 0.4289281757587253, + "learning_rate": 8.819405375340573e-06, + "loss": 0.0547, "step": 1356 }, { - "epoch": 0.6027093049078392, - "grad_norm": 0.7130581914563381, - "learning_rate": 9.922062537280692e-06, - "loss": 0.0877, + "epoch": 1.205150976909414, + "grad_norm": 0.5202563473870233, + "learning_rate": 8.81690305755708e-06, + "loss": 0.0545, "step": 1357 }, { - "epoch": 0.6031534532533867, - "grad_norm": 0.5999507766881641, - "learning_rate": 9.921721227206438e-06, - "loss": 0.0693, + "epoch": 1.206039076376554, + "grad_norm": 0.6122205843521804, + "learning_rate": 8.814398446490619e-06, + "loss": 0.0736, "step": 1358 }, { - "epoch": 0.603597601598934, - "grad_norm": 0.5747773135899322, - "learning_rate": 9.92137917731388e-06, - "loss": 0.0556, + "epoch": 1.2069271758436946, + "grad_norm": 0.4929556651394607, + "learning_rate": 8.811891543646015e-06, + "loss": 0.0549, "step": 1359 }, { - "epoch": 0.6040417499444815, - "grad_norm": 0.7771993716251702, - "learning_rate": 9.921036387654429e-06, - "loss": 0.0643, + "epoch": 1.2078152753108349, + "grad_norm": 0.39153224898843647, + "learning_rate": 8.809382350529474e-06, + "loss": 0.0538, "step": 1360 }, { - "epoch": 0.6044858982900289, - "grad_norm": 1.307956020844314, - "learning_rate": 9.920692858279616e-06, - "loss": 0.0551, + "epoch": 1.2087033747779752, + "grad_norm": 0.5587191889924773, + "learning_rate": 8.806870868648576e-06, + "loss": 0.0791, "step": 1361 }, { - "epoch": 0.6049300466355763, - "grad_norm": 0.6255379323591398, - "learning_rate": 9.92034858924108e-06, - "loss": 0.0636, + "epoch": 1.2095914742451155, + "grad_norm": 0.506286043805143, + "learning_rate": 8.804357099512279e-06, + "loss": 0.0526, "step": 1362 }, { - "epoch": 0.6053741949811237, - "grad_norm": 0.553962551501975, - "learning_rate": 9.92000358059057e-06, - "loss": 0.0814, + "epoch": 1.2104795737122558, + "grad_norm": 0.4383105018704721, + "learning_rate": 8.80184104463091e-06, + "loss": 0.0578, "step": 1363 }, { - "epoch": 0.6058183433266711, - "grad_norm": 0.7454747633443178, - "learning_rate": 9.919657832379943e-06, - "loss": 0.0603, + "epoch": 1.211367673179396, + "grad_norm": 0.5667322270754526, + "learning_rate": 8.799322705516174e-06, + "loss": 0.0737, "step": 1364 }, { - "epoch": 0.6062624916722186, - "grad_norm": 0.9733260117481642, - "learning_rate": 9.919311344661174e-06, - "loss": 0.1153, + "epoch": 1.2122557726465364, + "grad_norm": 0.5519873093279143, + "learning_rate": 8.796802083681149e-06, + "loss": 0.0542, "step": 1365 }, { - "epoch": 0.606706640017766, - "grad_norm": 0.613902681985122, - "learning_rate": 9.918964117486346e-06, - "loss": 0.0578, + "epoch": 1.2131438721136767, + "grad_norm": 0.40449215131526517, + "learning_rate": 8.79427918064028e-06, + "loss": 0.0579, "step": 1366 }, { - "epoch": 0.6071507883633134, - "grad_norm": 0.7376648326818326, - "learning_rate": 9.918616150907651e-06, - "loss": 0.066, + "epoch": 1.214031971580817, + "grad_norm": 0.35529012811045607, + "learning_rate": 8.791753997909384e-06, + "loss": 0.0558, "step": 1367 }, { - "epoch": 0.6075949367088608, - "grad_norm": 0.6476167687641429, - "learning_rate": 9.918267444977398e-06, - "loss": 0.0467, + "epoch": 1.2149200710479573, + "grad_norm": 0.47815233822568254, + "learning_rate": 8.789226537005651e-06, + "loss": 0.051, "step": 1368 }, { - "epoch": 0.6080390850544082, - "grad_norm": 0.6720043569712078, - "learning_rate": 9.917917999747999e-06, - "loss": 0.0647, + "epoch": 1.2158081705150976, + "grad_norm": 0.4312255381241265, + "learning_rate": 8.786696799447638e-06, + "loss": 0.0486, "step": 1369 }, { - "epoch": 0.6084832333999556, - "grad_norm": 0.6554718376719761, - "learning_rate": 9.917567815271986e-06, - "loss": 0.0652, + "epoch": 1.2166962699822381, + "grad_norm": 0.42906496410040296, + "learning_rate": 8.784164786755268e-06, + "loss": 0.0549, "step": 1370 }, { - "epoch": 0.608927381745503, - "grad_norm": 0.7668531453750762, - "learning_rate": 9.917216891601996e-06, - "loss": 0.07, + "epoch": 1.2175843694493784, + "grad_norm": 0.5772074687235432, + "learning_rate": 8.781630500449833e-06, + "loss": 0.0528, "step": 1371 }, { - "epoch": 0.6093715300910504, - "grad_norm": 0.8088811507082688, - "learning_rate": 9.916865228790776e-06, - "loss": 0.0771, + "epoch": 1.2184724689165187, + "grad_norm": 0.4262348332812471, + "learning_rate": 8.779093942053989e-06, + "loss": 0.0625, "step": 1372 }, { - "epoch": 0.6098156784365978, - "grad_norm": 0.687085496356637, - "learning_rate": 9.91651282689119e-06, - "loss": 0.0758, + "epoch": 1.219360568383659, + "grad_norm": 0.47326752992090304, + "learning_rate": 8.77655511309176e-06, + "loss": 0.0477, "step": 1373 }, { - "epoch": 0.6102598267821452, - "grad_norm": 0.7118263597209087, - "learning_rate": 9.916159685956208e-06, - "loss": 0.0704, + "epoch": 1.2202486678507993, + "grad_norm": 0.6018578470735128, + "learning_rate": 8.774014015088533e-06, + "loss": 0.0688, "step": 1374 }, { - "epoch": 0.6107039751276927, - "grad_norm": 0.5722664877188374, - "learning_rate": 9.915805806038917e-06, - "loss": 0.0568, + "epoch": 1.2211367673179396, + "grad_norm": 0.6063939648321814, + "learning_rate": 8.771470649571056e-06, + "loss": 0.0563, "step": 1375 }, { - "epoch": 0.6111481234732401, - "grad_norm": 0.5809145124340924, - "learning_rate": 9.915451187192507e-06, - "loss": 0.0508, + "epoch": 1.22202486678508, + "grad_norm": 0.5629535965936708, + "learning_rate": 8.768925018067445e-06, + "loss": 0.051, "step": 1376 }, { - "epoch": 0.6115922718187875, - "grad_norm": 0.7936146709815162, - "learning_rate": 9.915095829470284e-06, - "loss": 0.0685, + "epoch": 1.2229129662522202, + "grad_norm": 0.47636128599043764, + "learning_rate": 8.76637712210717e-06, + "loss": 0.0521, "step": 1377 }, { - "epoch": 0.6120364201643349, - "grad_norm": 0.6343789137617302, - "learning_rate": 9.914739732925665e-06, - "loss": 0.0727, + "epoch": 1.2238010657193605, + "grad_norm": 0.5940845743589005, + "learning_rate": 8.763826963221067e-06, + "loss": 0.0544, "step": 1378 }, { - "epoch": 0.6124805685098823, - "grad_norm": 0.6049507318538985, - "learning_rate": 9.914382897612178e-06, - "loss": 0.0613, + "epoch": 1.2246891651865008, + "grad_norm": 0.6114080307110875, + "learning_rate": 8.761274542941329e-06, + "loss": 0.0538, "step": 1379 }, { - "epoch": 0.6129247168554297, - "grad_norm": 0.7820325942346816, - "learning_rate": 9.91402532358346e-06, - "loss": 0.0631, + "epoch": 1.2255772646536411, + "grad_norm": 0.5421878884736326, + "learning_rate": 8.75871986280151e-06, + "loss": 0.0597, "step": 1380 }, { - "epoch": 0.6133688652009771, - "grad_norm": 0.647686549326212, - "learning_rate": 9.913667010893261e-06, - "loss": 0.0593, + "epoch": 1.2264653641207814, + "grad_norm": 0.42808699209934453, + "learning_rate": 8.756162924336522e-06, + "loss": 0.0436, "step": 1381 }, { - "epoch": 0.6138130135465245, - "grad_norm": 0.5699910207599692, - "learning_rate": 9.913307959595443e-06, - "loss": 0.0617, + "epoch": 1.2273534635879217, + "grad_norm": 0.5262320114348207, + "learning_rate": 8.753603729082629e-06, + "loss": 0.0598, "step": 1382 }, { - "epoch": 0.6142571618920719, - "grad_norm": 0.8214987498163691, - "learning_rate": 9.912948169743977e-06, - "loss": 0.1062, + "epoch": 1.2282415630550623, + "grad_norm": 0.4774662835691713, + "learning_rate": 8.751042278577455e-06, + "loss": 0.0545, "step": 1383 }, { - "epoch": 0.6147013102376193, - "grad_norm": 0.7190447874189771, - "learning_rate": 9.912587641392943e-06, - "loss": 0.0816, + "epoch": 1.2291296625222026, + "grad_norm": 0.45697823548933714, + "learning_rate": 8.74847857435998e-06, + "loss": 0.0577, "step": 1384 }, { - "epoch": 0.6151454585831668, - "grad_norm": 0.4801110982287438, - "learning_rate": 9.912226374596536e-06, - "loss": 0.0531, + "epoch": 1.2300177619893429, + "grad_norm": 0.37780797322946186, + "learning_rate": 8.745912617970534e-06, + "loss": 0.043, "step": 1385 }, { - "epoch": 0.6155896069287142, - "grad_norm": 0.5777279849946639, - "learning_rate": 9.911864369409062e-06, - "loss": 0.0593, + "epoch": 1.2309058614564832, + "grad_norm": 0.49860752947976433, + "learning_rate": 8.743344410950804e-06, + "loss": 0.0569, "step": 1386 }, { - "epoch": 0.6160337552742616, - "grad_norm": 0.6445627055234285, - "learning_rate": 9.911501625884934e-06, - "loss": 0.0562, + "epoch": 1.2317939609236235, + "grad_norm": 0.4259825457515346, + "learning_rate": 8.740773954843828e-06, + "loss": 0.0541, "step": 1387 }, { - "epoch": 0.616477903619809, - "grad_norm": 0.6402760221505619, - "learning_rate": 9.911138144078681e-06, - "loss": 0.0628, + "epoch": 1.2326820603907638, + "grad_norm": 0.4399101307272299, + "learning_rate": 8.738201251193993e-06, + "loss": 0.055, "step": 1388 }, { - "epoch": 0.6169220519653564, - "grad_norm": 0.5943075838810645, - "learning_rate": 9.910773924044937e-06, - "loss": 0.0694, + "epoch": 1.233570159857904, + "grad_norm": 0.4591677679252911, + "learning_rate": 8.735626301547042e-06, + "loss": 0.0558, "step": 1389 }, { - "epoch": 0.6173662003109038, - "grad_norm": 0.6887164265976037, - "learning_rate": 9.910408965838455e-06, - "loss": 0.0662, + "epoch": 1.2344582593250444, + "grad_norm": 0.5111308336974323, + "learning_rate": 8.733049107450063e-06, + "loss": 0.0498, "step": 1390 }, { - "epoch": 0.6178103486564512, - "grad_norm": 0.7025269495789147, - "learning_rate": 9.91004326951409e-06, - "loss": 0.0596, + "epoch": 1.2353463587921847, + "grad_norm": 0.5975734851527944, + "learning_rate": 8.73046967045149e-06, + "loss": 0.0576, "step": 1391 }, { - "epoch": 0.6182544970019986, - "grad_norm": 0.6968614560116964, - "learning_rate": 9.909676835126819e-06, - "loss": 0.08, + "epoch": 1.236234458259325, + "grad_norm": 0.45342634735673404, + "learning_rate": 8.727887992101108e-06, + "loss": 0.0489, "step": 1392 }, { - "epoch": 0.618698645347546, - "grad_norm": 0.9599784608542535, - "learning_rate": 9.909309662731713e-06, - "loss": 0.0981, + "epoch": 1.2371225577264653, + "grad_norm": 0.35032485349199205, + "learning_rate": 8.725304073950054e-06, + "loss": 0.0451, "step": 1393 }, { - "epoch": 0.6191427936930934, - "grad_norm": 0.5936127126118025, - "learning_rate": 9.908941752383974e-06, - "loss": 0.0502, + "epoch": 1.2380106571936056, + "grad_norm": 0.4010843131050206, + "learning_rate": 8.7227179175508e-06, + "loss": 0.0466, "step": 1394 }, { - "epoch": 0.619586942038641, - "grad_norm": 0.6281665293752687, - "learning_rate": 9.9085731041389e-06, - "loss": 0.0894, + "epoch": 1.238898756660746, + "grad_norm": 0.4213918581386476, + "learning_rate": 8.72012952445717e-06, + "loss": 0.0541, "step": 1395 }, { - "epoch": 0.6200310903841884, - "grad_norm": 0.7270920690197641, - "learning_rate": 9.908203718051907e-06, - "loss": 0.0772, + "epoch": 1.2397868561278864, + "grad_norm": 0.4706730549027241, + "learning_rate": 8.717538896224333e-06, + "loss": 0.0552, "step": 1396 }, { - "epoch": 0.6204752387297358, - "grad_norm": 0.9689326115460888, - "learning_rate": 9.90783359417852e-06, - "loss": 0.0955, + "epoch": 1.2406749555950267, + "grad_norm": 0.35322390853742186, + "learning_rate": 8.714946034408793e-06, + "loss": 0.0486, "step": 1397 }, { - "epoch": 0.6209193870752832, - "grad_norm": 0.5718494750591436, - "learning_rate": 9.907462732574373e-06, - "loss": 0.0586, + "epoch": 1.241563055062167, + "grad_norm": 0.415940827305024, + "learning_rate": 8.712350940568403e-06, + "loss": 0.0622, "step": 1398 }, { - "epoch": 0.6213635354208306, - "grad_norm": 0.547133748335618, - "learning_rate": 9.907091133295214e-06, - "loss": 0.0546, + "epoch": 1.2424511545293073, + "grad_norm": 0.36378994202820725, + "learning_rate": 8.709753616262355e-06, + "loss": 0.0648, "step": 1399 }, { - "epoch": 0.621807683766378, - "grad_norm": 0.7492139302396027, - "learning_rate": 9.906718796396901e-06, - "loss": 0.0726, + "epoch": 1.2433392539964476, + "grad_norm": 0.4597903087653849, + "learning_rate": 8.70715406305118e-06, + "loss": 0.0516, "step": 1400 }, { - "epoch": 0.6222518321119254, - "grad_norm": 1.1332966961373652, - "learning_rate": 9.906345721935402e-06, - "loss": 0.0837, + "epoch": 1.244227353463588, + "grad_norm": 0.41001654230599194, + "learning_rate": 8.704552282496752e-06, + "loss": 0.0402, "step": 1401 }, { - "epoch": 0.6226959804574728, - "grad_norm": 0.6506799230647055, - "learning_rate": 9.905971909966798e-06, - "loss": 0.0676, + "epoch": 1.2451154529307282, + "grad_norm": 0.43791431676495707, + "learning_rate": 8.701948276162277e-06, + "loss": 0.0523, "step": 1402 }, { - "epoch": 0.6231401288030202, - "grad_norm": 0.7078271615105967, - "learning_rate": 9.905597360547276e-06, - "loss": 0.0763, + "epoch": 1.2460035523978685, + "grad_norm": 0.4875749596099277, + "learning_rate": 8.699342045612304e-06, + "loss": 0.0665, "step": 1403 }, { - "epoch": 0.6235842771485676, - "grad_norm": 0.8061592052669874, - "learning_rate": 9.90522207373314e-06, - "loss": 0.0941, + "epoch": 1.2468916518650088, + "grad_norm": 0.5643094148020755, + "learning_rate": 8.696733592412717e-06, + "loss": 0.0532, "step": 1404 }, { - "epoch": 0.6240284254941151, - "grad_norm": 0.7215188703499849, - "learning_rate": 9.904846049580804e-06, - "loss": 0.0913, + "epoch": 1.2477797513321491, + "grad_norm": 0.41507047223785865, + "learning_rate": 8.694122918130732e-06, + "loss": 0.0465, "step": 1405 }, { - "epoch": 0.6244725738396625, - "grad_norm": 0.7648810068346831, - "learning_rate": 9.904469288146785e-06, - "loss": 0.103, + "epoch": 1.2486678507992894, + "grad_norm": 0.41302290601101155, + "learning_rate": 8.691510024334903e-06, + "loss": 0.0482, "step": 1406 }, { - "epoch": 0.6249167221852099, - "grad_norm": 0.7848759400980109, - "learning_rate": 9.90409178948772e-06, - "loss": 0.0853, + "epoch": 1.24955595026643, + "grad_norm": 0.5133229562261602, + "learning_rate": 8.688894912595116e-06, + "loss": 0.0618, "step": 1407 }, { - "epoch": 0.6253608705307573, - "grad_norm": 0.5249169314876924, - "learning_rate": 9.903713553660352e-06, - "loss": 0.0539, + "epoch": 1.25044404973357, + "grad_norm": 0.3971774806483017, + "learning_rate": 8.686277584482592e-06, + "loss": 0.0417, "step": 1408 }, { - "epoch": 0.6258050188763047, - "grad_norm": 0.651696006803284, - "learning_rate": 9.90333458072154e-06, - "loss": 0.0725, + "epoch": 1.2513321492007106, + "grad_norm": 0.40694314461486464, + "learning_rate": 8.68365804156988e-06, + "loss": 0.0476, "step": 1409 }, { - "epoch": 0.6262491672218521, - "grad_norm": 0.5581372640238701, - "learning_rate": 9.902954870728246e-06, - "loss": 0.0516, + "epoch": 1.2522202486678509, + "grad_norm": 0.5396595665280396, + "learning_rate": 8.681036285430864e-06, + "loss": 0.0603, "step": 1410 }, { - "epoch": 0.6266933155673995, - "grad_norm": 0.46394395658623727, - "learning_rate": 9.902574423737547e-06, - "loss": 0.0543, + "epoch": 1.2531083481349912, + "grad_norm": 0.4952521043170604, + "learning_rate": 8.678412317640753e-06, + "loss": 0.055, "step": 1411 }, { - "epoch": 0.6271374639129469, - "grad_norm": 0.6101555748112776, - "learning_rate": 9.902193239806634e-06, - "loss": 0.0688, + "epoch": 1.2539964476021315, + "grad_norm": 0.40182177205070296, + "learning_rate": 8.675786139776085e-06, + "loss": 0.0617, "step": 1412 }, { - "epoch": 0.6275816122584943, - "grad_norm": 0.6266576234326965, - "learning_rate": 9.901811318992802e-06, - "loss": 0.0619, + "epoch": 1.2548845470692718, + "grad_norm": 0.43728542560992206, + "learning_rate": 8.673157753414733e-06, + "loss": 0.052, "step": 1413 }, { - "epoch": 0.6280257606040418, - "grad_norm": 0.9093069033662159, - "learning_rate": 9.901428661353462e-06, - "loss": 0.1009, + "epoch": 1.255772646536412, + "grad_norm": 0.39273900995788635, + "learning_rate": 8.670527160135888e-06, + "loss": 0.0514, "step": 1414 }, { - "epoch": 0.6284699089495892, - "grad_norm": 0.796133446241698, - "learning_rate": 9.901045266946134e-06, - "loss": 0.0919, + "epoch": 1.2566607460035524, + "grad_norm": 0.42385266693033374, + "learning_rate": 8.667894361520073e-06, + "loss": 0.0621, "step": 1415 }, { - "epoch": 0.6289140572951366, - "grad_norm": 0.75094830010195, - "learning_rate": 9.900661135828448e-06, - "loss": 0.0594, + "epoch": 1.2575488454706927, + "grad_norm": 0.7726227937125522, + "learning_rate": 8.665259359149132e-06, + "loss": 0.0527, "step": 1416 }, { - "epoch": 0.629358205640684, - "grad_norm": 0.8379496784608408, - "learning_rate": 9.900276268058147e-06, - "loss": 0.0659, + "epoch": 1.258436944937833, + "grad_norm": 0.4025470687207859, + "learning_rate": 8.662622154606238e-06, + "loss": 0.0566, "step": 1417 }, { - "epoch": 0.6298023539862314, - "grad_norm": 0.7794466397530133, - "learning_rate": 9.899890663693078e-06, - "loss": 0.086, + "epoch": 1.2593250444049733, + "grad_norm": 0.37050503239021354, + "learning_rate": 8.65998274947588e-06, + "loss": 0.0549, "step": 1418 }, { - "epoch": 0.6302465023317788, - "grad_norm": 1.781154839834256, - "learning_rate": 9.899504322791212e-06, - "loss": 0.1089, + "epoch": 1.2602131438721136, + "grad_norm": 0.4780706131062481, + "learning_rate": 8.657341145343876e-06, + "loss": 0.056, "step": 1419 }, { - "epoch": 0.6306906506773262, - "grad_norm": 0.582797339406938, - "learning_rate": 9.899117245410615e-06, - "loss": 0.0449, + "epoch": 1.261101243339254, + "grad_norm": 0.6251755282173199, + "learning_rate": 8.65469734379736e-06, + "loss": 0.0709, "step": 1420 }, { - "epoch": 0.6311347990228736, - "grad_norm": 0.7463622614388931, - "learning_rate": 9.898729431609477e-06, - "loss": 0.0542, + "epoch": 1.2619893428063942, + "grad_norm": 0.46926370918419985, + "learning_rate": 8.652051346424792e-06, + "loss": 0.0545, "step": 1421 }, { - "epoch": 0.631578947368421, - "grad_norm": 0.7697052470584549, - "learning_rate": 9.89834088144609e-06, - "loss": 0.0635, + "epoch": 1.2628774422735347, + "grad_norm": 0.6219857665197569, + "learning_rate": 8.649403154815946e-06, + "loss": 0.0632, "step": 1422 }, { - "epoch": 0.6320230957139684, - "grad_norm": 0.900347396335065, - "learning_rate": 9.897951594978858e-06, - "loss": 0.0778, + "epoch": 1.263765541740675, + "grad_norm": 0.3997124347103753, + "learning_rate": 8.646752770561917e-06, + "loss": 0.046, "step": 1423 }, { - "epoch": 0.6324672440595159, - "grad_norm": 0.6468712468526486, - "learning_rate": 9.897561572266301e-06, - "loss": 0.066, + "epoch": 1.2646536412078153, + "grad_norm": 0.4623517522205548, + "learning_rate": 8.644100195255114e-06, + "loss": 0.0544, "step": 1424 }, { - "epoch": 0.6329113924050633, - "grad_norm": 0.4986782583243913, - "learning_rate": 9.897170813367045e-06, - "loss": 0.0662, + "epoch": 1.2655417406749556, + "grad_norm": 0.4577135539831789, + "learning_rate": 8.64144543048927e-06, + "loss": 0.0505, "step": 1425 }, { - "epoch": 0.6333555407506107, - "grad_norm": 0.6014710971137512, - "learning_rate": 9.896779318339826e-06, - "loss": 0.0657, + "epoch": 1.266429840142096, + "grad_norm": 0.6400434262376272, + "learning_rate": 8.638788477859424e-06, + "loss": 0.0646, "step": 1426 }, { - "epoch": 0.6337996890961581, - "grad_norm": 0.8579597362562785, - "learning_rate": 9.896387087243496e-06, - "loss": 0.08, + "epoch": 1.2673179396092362, + "grad_norm": 0.3860193622343788, + "learning_rate": 8.636129338961936e-06, + "loss": 0.0426, "step": 1427 }, { - "epoch": 0.6342438374417055, - "grad_norm": 0.9386752220131082, - "learning_rate": 9.89599412013701e-06, - "loss": 0.1233, + "epoch": 1.2682060390763765, + "grad_norm": 0.4060254048456331, + "learning_rate": 8.633468015394479e-06, + "loss": 0.048, "step": 1428 }, { - "epoch": 0.6346879857872529, - "grad_norm": 0.7426460359725069, - "learning_rate": 9.89560041707944e-06, - "loss": 0.0861, + "epoch": 1.2690941385435168, + "grad_norm": 0.49627951706197526, + "learning_rate": 8.630804508756035e-06, + "loss": 0.0501, "step": 1429 }, { - "epoch": 0.6351321341328003, - "grad_norm": 0.5143125228492677, - "learning_rate": 9.895205978129966e-06, - "loss": 0.0446, + "epoch": 1.2699822380106571, + "grad_norm": 0.5434086582577016, + "learning_rate": 8.6281388206469e-06, + "loss": 0.0463, "step": 1430 }, { - "epoch": 0.6355762824783477, - "grad_norm": 1.049177103786211, - "learning_rate": 9.894810803347878e-06, - "loss": 0.0912, + "epoch": 1.2708703374777977, + "grad_norm": 0.6291277946566355, + "learning_rate": 8.625470952668685e-06, + "loss": 0.0665, "step": 1431 }, { - "epoch": 0.6360204308238951, - "grad_norm": 0.5972518436510755, - "learning_rate": 9.894414892792579e-06, - "loss": 0.0657, + "epoch": 1.2717584369449377, + "grad_norm": 0.4254592538836703, + "learning_rate": 8.6228009064243e-06, + "loss": 0.0467, "step": 1432 }, { - "epoch": 0.6364645791694425, - "grad_norm": 0.4154671445638412, - "learning_rate": 9.894018246523577e-06, - "loss": 0.0532, + "epoch": 1.2726465364120783, + "grad_norm": 0.5303595485506196, + "learning_rate": 8.620128683517972e-06, + "loss": 0.0672, "step": 1433 }, { - "epoch": 0.6369087275149901, - "grad_norm": 0.65372867549855, - "learning_rate": 9.893620864600501e-06, - "loss": 0.087, + "epoch": 1.2735346358792183, + "grad_norm": 0.7894791528828095, + "learning_rate": 8.617454285555238e-06, + "loss": 0.0761, "step": 1434 }, { - "epoch": 0.6373528758605375, - "grad_norm": 0.7777909981283909, - "learning_rate": 9.89322274708308e-06, - "loss": 0.0795, + "epoch": 1.2744227353463589, + "grad_norm": 0.49384509382527925, + "learning_rate": 8.614777714142932e-06, + "loss": 0.0545, "step": 1435 }, { - "epoch": 0.6377970242060849, - "grad_norm": 0.6281013351353801, - "learning_rate": 9.892823894031159e-06, - "loss": 0.0525, + "epoch": 1.2753108348134992, + "grad_norm": 0.5218293268530874, + "learning_rate": 8.612098970889203e-06, + "loss": 0.0576, "step": 1436 }, { - "epoch": 0.6382411725516323, - "grad_norm": 0.5274870510804406, - "learning_rate": 9.89242430550469e-06, - "loss": 0.0701, + "epoch": 1.2761989342806395, + "grad_norm": 0.5905674492999149, + "learning_rate": 8.609418057403501e-06, + "loss": 0.0647, "step": 1437 }, { - "epoch": 0.6386853208971797, - "grad_norm": 0.9840882810513909, - "learning_rate": 9.892023981563744e-06, - "loss": 0.0888, + "epoch": 1.2770870337477798, + "grad_norm": 0.4595219577988148, + "learning_rate": 8.606734975296578e-06, + "loss": 0.0523, "step": 1438 }, { - "epoch": 0.6391294692427271, - "grad_norm": 0.7186121306665683, - "learning_rate": 9.89162292226849e-06, - "loss": 0.0666, + "epoch": 1.27797513321492, + "grad_norm": 0.505100325766144, + "learning_rate": 8.604049726180494e-06, + "loss": 0.0593, "step": 1439 }, { - "epoch": 0.6395736175882745, - "grad_norm": 0.7619824705257715, - "learning_rate": 9.891221127679216e-06, - "loss": 0.0654, + "epoch": 1.2788632326820604, + "grad_norm": 0.3912563276111262, + "learning_rate": 8.601362311668609e-06, + "loss": 0.0604, "step": 1440 }, { - "epoch": 0.6400177659338219, - "grad_norm": 0.6418832068583645, - "learning_rate": 9.89081859785632e-06, - "loss": 0.0706, + "epoch": 1.2797513321492007, + "grad_norm": 0.4489114067665813, + "learning_rate": 8.598672733375579e-06, + "loss": 0.0599, "step": 1441 }, { - "epoch": 0.6404619142793693, - "grad_norm": 0.6934692957378497, - "learning_rate": 9.890415332860308e-06, - "loss": 0.0684, + "epoch": 1.280639431616341, + "grad_norm": 0.47885917161477914, + "learning_rate": 8.59598099291737e-06, + "loss": 0.0589, "step": 1442 }, { - "epoch": 0.6409060626249167, - "grad_norm": 0.6769661850243969, - "learning_rate": 9.8900113327518e-06, - "loss": 0.0684, + "epoch": 1.2815275310834813, + "grad_norm": 0.5199443246043078, + "learning_rate": 8.593287091911236e-06, + "loss": 0.062, "step": 1443 }, { - "epoch": 0.6413502109704642, - "grad_norm": 0.6158605594742834, - "learning_rate": 9.88960659759152e-06, - "loss": 0.0652, + "epoch": 1.2824156305506218, + "grad_norm": 0.49746222098434373, + "learning_rate": 8.590591031975738e-06, + "loss": 0.0582, "step": 1444 }, { - "epoch": 0.6417943593160116, - "grad_norm": 0.5847451238116226, - "learning_rate": 9.88920112744031e-06, - "loss": 0.062, + "epoch": 1.2833037300177619, + "grad_norm": 0.35960418251256066, + "learning_rate": 8.58789281473073e-06, + "loss": 0.0444, "step": 1445 }, { - "epoch": 0.642238507661559, - "grad_norm": 0.8117410133012406, - "learning_rate": 9.888794922359116e-06, - "loss": 0.0611, + "epoch": 1.2841918294849024, + "grad_norm": 0.48349444187022905, + "learning_rate": 8.585192441797362e-06, + "loss": 0.0536, "step": 1446 }, { - "epoch": 0.6426826560071064, - "grad_norm": 0.7705515566871968, - "learning_rate": 9.888387982408998e-06, - "loss": 0.0752, + "epoch": 1.2850799289520427, + "grad_norm": 0.5204695091756877, + "learning_rate": 8.582489914798083e-06, + "loss": 0.0483, "step": 1447 }, { - "epoch": 0.6431268043526538, - "grad_norm": 0.777589892103545, - "learning_rate": 9.887980307651128e-06, - "loss": 0.0764, + "epoch": 1.285968028419183, + "grad_norm": 0.4179875305235931, + "learning_rate": 8.579785235356628e-06, + "loss": 0.049, "step": 1448 }, { - "epoch": 0.6435709526982012, - "grad_norm": 0.8491119564862617, - "learning_rate": 9.887571898146787e-06, - "loss": 0.081, + "epoch": 1.2868561278863233, + "grad_norm": 0.4445812605614399, + "learning_rate": 8.577078405098036e-06, + "loss": 0.0584, "step": 1449 }, { - "epoch": 0.6440151010437486, - "grad_norm": 0.7918875216411717, - "learning_rate": 9.887162753957362e-06, - "loss": 0.0626, + "epoch": 1.2877442273534636, + "grad_norm": 0.65983909043929, + "learning_rate": 8.574369425648629e-06, + "loss": 0.0534, "step": 1450 }, { - "epoch": 0.644459249389296, - "grad_norm": 0.5756994501297381, - "learning_rate": 9.886752875144358e-06, - "loss": 0.0635, + "epoch": 1.288632326820604, + "grad_norm": 0.5371606702884453, + "learning_rate": 8.571658298636026e-06, + "loss": 0.0643, "step": 1451 }, { - "epoch": 0.6449033977348434, - "grad_norm": 0.6492149852575843, - "learning_rate": 9.886342261769387e-06, - "loss": 0.0632, + "epoch": 1.2895204262877442, + "grad_norm": 0.47934231287327855, + "learning_rate": 8.568945025689132e-06, + "loss": 0.0597, "step": 1452 }, { - "epoch": 0.6453475460803908, - "grad_norm": 0.7503617012041023, - "learning_rate": 9.885930913894166e-06, - "loss": 0.0627, + "epoch": 1.2904085257548845, + "grad_norm": 0.49682055572492245, + "learning_rate": 8.566229608438146e-06, + "loss": 0.0569, "step": 1453 }, { - "epoch": 0.6457916944259383, - "grad_norm": 0.6405303313329822, - "learning_rate": 9.885518831580533e-06, - "loss": 0.0782, + "epoch": 1.2912966252220248, + "grad_norm": 0.45968426831417825, + "learning_rate": 8.563512048514552e-06, + "loss": 0.0581, "step": 1454 }, { - "epoch": 0.6462358427714857, - "grad_norm": 0.6250298341424497, - "learning_rate": 9.88510601489043e-06, - "loss": 0.0807, + "epoch": 1.2921847246891651, + "grad_norm": 0.42824351622073825, + "learning_rate": 8.560792347551124e-06, + "loss": 0.06, "step": 1455 }, { - "epoch": 0.6466799911170331, - "grad_norm": 0.7939161691732389, - "learning_rate": 9.88469246388591e-06, - "loss": 0.069, + "epoch": 1.2930728241563054, + "grad_norm": 0.6305542843938704, + "learning_rate": 8.558070507181919e-06, + "loss": 0.0646, "step": 1456 }, { - "epoch": 0.6471241394625805, - "grad_norm": 0.7492359887972416, - "learning_rate": 9.884278178629134e-06, - "loss": 0.0854, + "epoch": 1.293960923623446, + "grad_norm": 0.3462779083068886, + "learning_rate": 8.555346529042281e-06, + "loss": 0.0426, "step": 1457 }, { - "epoch": 0.6475682878081279, - "grad_norm": 0.6085287559130933, - "learning_rate": 9.883863159182379e-06, - "loss": 0.0608, + "epoch": 1.294849023090586, + "grad_norm": 0.5127804931629231, + "learning_rate": 8.552620414768839e-06, + "loss": 0.0461, "step": 1458 }, { - "epoch": 0.6480124361536753, - "grad_norm": 0.6361755513609607, - "learning_rate": 9.883447405608032e-06, - "loss": 0.0433, + "epoch": 1.2957371225577266, + "grad_norm": 0.46916166862406744, + "learning_rate": 8.549892165999505e-06, + "loss": 0.0537, "step": 1459 }, { - "epoch": 0.6484565844992227, - "grad_norm": 0.6431691982633025, - "learning_rate": 9.88303091796858e-06, - "loss": 0.0419, + "epoch": 1.2966252220248669, + "grad_norm": 0.40942997066412934, + "learning_rate": 8.547161784373475e-06, + "loss": 0.0483, "step": 1460 }, { - "epoch": 0.6489007328447701, - "grad_norm": 0.8190085662166918, - "learning_rate": 9.882613696326634e-06, - "loss": 0.0767, + "epoch": 1.2975133214920072, + "grad_norm": 0.6235498343590126, + "learning_rate": 8.544429271531224e-06, + "loss": 0.0631, "step": 1461 }, { - "epoch": 0.6493448811903175, - "grad_norm": 0.755548239297633, - "learning_rate": 9.882195740744911e-06, - "loss": 0.0923, + "epoch": 1.2984014209591475, + "grad_norm": 0.5393186646359277, + "learning_rate": 8.541694629114506e-06, + "loss": 0.0523, "step": 1462 }, { - "epoch": 0.6497890295358649, - "grad_norm": 0.5719039887354458, - "learning_rate": 9.881777051286232e-06, - "loss": 0.0723, + "epoch": 1.2992895204262878, + "grad_norm": 0.38281650665148176, + "learning_rate": 8.53895785876636e-06, + "loss": 0.0438, "step": 1463 }, { - "epoch": 0.6502331778814124, - "grad_norm": 0.7825393714474173, - "learning_rate": 9.881357628013535e-06, - "loss": 0.0835, + "epoch": 1.300177619893428, + "grad_norm": 0.4467187838121198, + "learning_rate": 8.536218962131102e-06, + "loss": 0.0532, "step": 1464 }, { - "epoch": 0.6506773262269598, - "grad_norm": 0.6610007380508074, - "learning_rate": 9.880937470989868e-06, - "loss": 0.0656, + "epoch": 1.3010657193605684, + "grad_norm": 0.3937399574367662, + "learning_rate": 8.533477940854317e-06, + "loss": 0.0506, "step": 1465 }, { - "epoch": 0.6511214745725072, - "grad_norm": 0.6218987787310035, - "learning_rate": 9.880516580278386e-06, - "loss": 0.0594, + "epoch": 1.3019538188277087, + "grad_norm": 0.4698156992764309, + "learning_rate": 8.53073479658288e-06, + "loss": 0.0488, "step": 1466 }, { - "epoch": 0.6515656229180546, - "grad_norm": 0.6871813560151103, - "learning_rate": 9.880094955942357e-06, - "loss": 0.0539, + "epoch": 1.302841918294849, + "grad_norm": 0.43459002711500133, + "learning_rate": 8.527989530964934e-06, + "loss": 0.0533, "step": 1467 }, { - "epoch": 0.652009771263602, - "grad_norm": 0.6311698562321532, - "learning_rate": 9.879672598045156e-06, - "loss": 0.0806, + "epoch": 1.3037300177619893, + "grad_norm": 0.36028233828067874, + "learning_rate": 8.525242145649895e-06, + "loss": 0.0575, "step": 1468 }, { - "epoch": 0.6524539196091494, - "grad_norm": 0.6693355850981739, - "learning_rate": 9.879249506650275e-06, - "loss": 0.0856, + "epoch": 1.3046181172291296, + "grad_norm": 0.6517374756236427, + "learning_rate": 8.522492642288458e-06, + "loss": 0.072, "step": 1469 }, { - "epoch": 0.6528980679546968, - "grad_norm": 0.7127562713682332, - "learning_rate": 9.878825681821306e-06, - "loss": 0.0685, + "epoch": 1.30550621669627, + "grad_norm": 0.548678187698675, + "learning_rate": 8.519741022532585e-06, + "loss": 0.0628, "step": 1470 }, { - "epoch": 0.6533422163002442, - "grad_norm": 0.9425811868582009, - "learning_rate": 9.878401123621963e-06, - "loss": 0.0823, + "epoch": 1.3063943161634102, + "grad_norm": 0.4037151468281861, + "learning_rate": 8.516987288035515e-06, + "loss": 0.0509, "step": 1471 }, { - "epoch": 0.6537863646457917, - "grad_norm": 0.6086374723526442, - "learning_rate": 9.87797583211606e-06, - "loss": 0.0614, + "epoch": 1.3072824156305507, + "grad_norm": 0.44596859794092103, + "learning_rate": 8.514231440451753e-06, + "loss": 0.0438, "step": 1472 }, { - "epoch": 0.654230512991339, - "grad_norm": 0.7763257358459474, - "learning_rate": 9.877549807367528e-06, - "loss": 0.0857, + "epoch": 1.308170515097691, + "grad_norm": 0.5465102266648206, + "learning_rate": 8.511473481437079e-06, + "loss": 0.0481, "step": 1473 }, { - "epoch": 0.6546746613368866, - "grad_norm": 0.539995990500211, - "learning_rate": 9.877123049440405e-06, - "loss": 0.0531, + "epoch": 1.3090586145648313, + "grad_norm": 0.480704475035662, + "learning_rate": 8.50871341264853e-06, + "loss": 0.0451, "step": 1474 }, { - "epoch": 0.655118809682434, - "grad_norm": 0.5915202116072419, - "learning_rate": 9.876695558398838e-06, - "loss": 0.0663, + "epoch": 1.3099467140319716, + "grad_norm": 0.36863949736884805, + "learning_rate": 8.505951235744427e-06, + "loss": 0.0466, "step": 1475 }, { - "epoch": 0.6555629580279814, - "grad_norm": 0.6814775856965638, - "learning_rate": 9.876267334307091e-06, - "loss": 0.0536, + "epoch": 1.310834813499112, + "grad_norm": 0.6238227028327276, + "learning_rate": 8.503186952384346e-06, + "loss": 0.0646, "step": 1476 }, { - "epoch": 0.6560071063735288, - "grad_norm": 0.6565534779979346, - "learning_rate": 9.875838377229528e-06, - "loss": 0.0854, + "epoch": 1.3117229129662522, + "grad_norm": 0.8111377642642721, + "learning_rate": 8.500420564229133e-06, + "loss": 0.0526, "step": 1477 }, { - "epoch": 0.6564512547190762, - "grad_norm": 0.6074291178048928, - "learning_rate": 9.875408687230633e-06, - "loss": 0.0643, + "epoch": 1.3126110124333925, + "grad_norm": 0.5640201873462473, + "learning_rate": 8.497652072940896e-06, + "loss": 0.0591, "step": 1478 }, { - "epoch": 0.6568954030646236, - "grad_norm": 0.6683735797478915, - "learning_rate": 9.874978264374991e-06, - "loss": 0.0657, + "epoch": 1.3134991119005328, + "grad_norm": 0.4018870174647557, + "learning_rate": 8.494881480183009e-06, + "loss": 0.0573, "step": 1479 }, { - "epoch": 0.657339551410171, - "grad_norm": 0.6802029779866509, - "learning_rate": 9.874547108727306e-06, - "loss": 0.0571, + "epoch": 1.3143872113676731, + "grad_norm": 0.5311441613629344, + "learning_rate": 8.492108787620106e-06, + "loss": 0.0607, "step": 1480 }, { - "epoch": 0.6577836997557184, - "grad_norm": 0.7709030410876545, - "learning_rate": 9.874115220352386e-06, - "loss": 0.0515, + "epoch": 1.3152753108348134, + "grad_norm": 1.155062480170173, + "learning_rate": 8.489333996918087e-06, + "loss": 0.0662, "step": 1481 }, { - "epoch": 0.6582278481012658, - "grad_norm": 0.6029748648689206, - "learning_rate": 9.873682599315152e-06, - "loss": 0.054, + "epoch": 1.3161634103019537, + "grad_norm": 0.45595005579384046, + "learning_rate": 8.486557109744106e-06, + "loss": 0.0617, "step": 1482 }, { - "epoch": 0.6586719964468133, - "grad_norm": 0.7218151069926805, - "learning_rate": 9.873249245680634e-06, - "loss": 0.0842, + "epoch": 1.3170515097690942, + "grad_norm": 0.47490667013003973, + "learning_rate": 8.483778127766587e-06, + "loss": 0.0553, "step": 1483 }, { - "epoch": 0.6591161447923607, - "grad_norm": 0.7118725638175186, - "learning_rate": 9.872815159513972e-06, - "loss": 0.0783, + "epoch": 1.3179396092362343, + "grad_norm": 0.792155455739816, + "learning_rate": 8.480997052655203e-06, + "loss": 0.0687, "step": 1484 }, { - "epoch": 0.6595602931379081, - "grad_norm": 0.5278035850844903, - "learning_rate": 9.872380340880416e-06, - "loss": 0.0504, + "epoch": 1.3188277087033748, + "grad_norm": 0.4163969332342965, + "learning_rate": 8.478213886080883e-06, + "loss": 0.0541, "step": 1485 }, { - "epoch": 0.6600044414834555, - "grad_norm": 0.7090158968533085, - "learning_rate": 9.87194478984533e-06, - "loss": 0.0518, + "epoch": 1.3197158081705151, + "grad_norm": 0.4473583978385226, + "learning_rate": 8.475428629715825e-06, + "loss": 0.0563, "step": 1486 }, { - "epoch": 0.6604485898290029, - "grad_norm": 0.7140940579751807, - "learning_rate": 9.87150850647418e-06, - "loss": 0.0644, + "epoch": 1.3206039076376554, + "grad_norm": 0.4301317785599763, + "learning_rate": 8.47264128523347e-06, + "loss": 0.0545, "step": 1487 }, { - "epoch": 0.6608927381745503, - "grad_norm": 0.5233702996222387, - "learning_rate": 9.87107149083255e-06, - "loss": 0.0484, + "epoch": 1.3214920071047958, + "grad_norm": 0.4595297952515238, + "learning_rate": 8.469851854308522e-06, + "loss": 0.0434, "step": 1488 }, { - "epoch": 0.6613368865200977, - "grad_norm": 0.5255868844598361, - "learning_rate": 9.870633742986129e-06, - "loss": 0.0486, + "epoch": 1.322380106571936, + "grad_norm": 0.5166301986880361, + "learning_rate": 8.467060338616931e-06, + "loss": 0.0565, "step": 1489 }, { - "epoch": 0.6617810348656451, - "grad_norm": 0.7456554727487221, - "learning_rate": 9.870195263000719e-06, - "loss": 0.0926, + "epoch": 1.3232682060390764, + "grad_norm": 0.4419532686675486, + "learning_rate": 8.464266739835906e-06, + "loss": 0.0503, "step": 1490 }, { - "epoch": 0.6622251832111925, - "grad_norm": 0.5094239000908518, - "learning_rate": 9.869756050942231e-06, - "loss": 0.0609, + "epoch": 1.3241563055062167, + "grad_norm": 0.5410592441277755, + "learning_rate": 8.461471059643907e-06, + "loss": 0.048, "step": 1491 }, { - "epoch": 0.6626693315567399, - "grad_norm": 0.5934076694430587, - "learning_rate": 9.869316106876687e-06, - "loss": 0.0815, + "epoch": 1.325044404973357, + "grad_norm": 0.4449123853152512, + "learning_rate": 8.458673299720639e-06, + "loss": 0.0435, "step": 1492 }, { - "epoch": 0.6631134799022874, - "grad_norm": 0.5800872547088418, - "learning_rate": 9.868875430870217e-06, - "loss": 0.0562, + "epoch": 1.3259325044404973, + "grad_norm": 0.4790481415566278, + "learning_rate": 8.455873461747064e-06, + "loss": 0.0588, "step": 1493 }, { - "epoch": 0.6635576282478348, - "grad_norm": 0.6466806398918721, - "learning_rate": 9.86843402298906e-06, - "loss": 0.0576, + "epoch": 1.3268206039076378, + "grad_norm": 0.5342704537961382, + "learning_rate": 8.453071547405387e-06, + "loss": 0.0634, "step": 1494 }, { - "epoch": 0.6640017765933822, - "grad_norm": 0.49149929770034256, - "learning_rate": 9.86799188329957e-06, - "loss": 0.0552, + "epoch": 1.3277087033747779, + "grad_norm": 0.4170166866754002, + "learning_rate": 8.450267558379063e-06, + "loss": 0.0559, "step": 1495 }, { - "epoch": 0.6644459249389296, - "grad_norm": 0.6869307446643815, - "learning_rate": 9.867549011868208e-06, - "loss": 0.0689, + "epoch": 1.3285968028419184, + "grad_norm": 0.5802552581740366, + "learning_rate": 8.447461496352791e-06, + "loss": 0.0547, "step": 1496 }, { - "epoch": 0.664890073284477, - "grad_norm": 1.0182179632443422, - "learning_rate": 9.867105408761544e-06, - "loss": 0.0658, + "epoch": 1.3294849023090587, + "grad_norm": 0.6286196611464454, + "learning_rate": 8.444653363012521e-06, + "loss": 0.0688, "step": 1497 }, { - "epoch": 0.6653342216300244, - "grad_norm": 0.5094314413955369, - "learning_rate": 9.866661074046258e-06, - "loss": 0.0509, + "epoch": 1.330373001776199, + "grad_norm": 0.36039387770317033, + "learning_rate": 8.441843160045443e-06, + "loss": 0.0514, "step": 1498 }, { - "epoch": 0.6657783699755718, - "grad_norm": 0.6982818166570782, - "learning_rate": 9.866216007789145e-06, - "loss": 0.0728, + "epoch": 1.3312611012433393, + "grad_norm": 0.5411986034980915, + "learning_rate": 8.439030889139987e-06, + "loss": 0.063, "step": 1499 }, { - "epoch": 0.6662225183211192, - "grad_norm": 0.7524936010719828, - "learning_rate": 9.8657702100571e-06, - "loss": 0.0911, + "epoch": 1.3321492007104796, + "grad_norm": 0.49451007108822237, + "learning_rate": 8.436216551985836e-06, + "loss": 0.0603, "step": 1500 }, { - "epoch": 0.6666666666666666, - "grad_norm": 0.7557138142113028, - "learning_rate": 9.86532368091714e-06, - "loss": 0.059, + "epoch": 1.33303730017762, + "grad_norm": 0.6786698927795038, + "learning_rate": 8.433400150273907e-06, + "loss": 0.0584, "step": 1501 }, { - "epoch": 0.667110815012214, - "grad_norm": 0.6888932895497213, - "learning_rate": 9.864876420436383e-06, - "loss": 0.0614, + "epoch": 1.3339253996447602, + "grad_norm": 0.5372460305082452, + "learning_rate": 8.430581685696356e-06, + "loss": 0.0529, "step": 1502 }, { - "epoch": 0.6675549633577615, - "grad_norm": 0.5506269374818886, - "learning_rate": 9.86442842868206e-06, - "loss": 0.0599, + "epoch": 1.3348134991119005, + "grad_norm": 0.44577665924664855, + "learning_rate": 8.427761159946584e-06, + "loss": 0.0467, "step": 1503 }, { - "epoch": 0.667999111703309, - "grad_norm": 0.8018485119174299, - "learning_rate": 9.86397970572151e-06, - "loss": 0.0875, + "epoch": 1.3357015985790408, + "grad_norm": 0.3425848176226401, + "learning_rate": 8.424938574719224e-06, + "loss": 0.0445, "step": 1504 }, { - "epoch": 0.6684432600488563, - "grad_norm": 0.5631462958993229, - "learning_rate": 9.863530251622189e-06, - "loss": 0.0691, + "epoch": 1.336589698046181, + "grad_norm": 0.6350760384909353, + "learning_rate": 8.422113931710156e-06, + "loss": 0.063, "step": 1505 }, { - "epoch": 0.6688874083944037, - "grad_norm": 0.8176186875229494, - "learning_rate": 9.863080066451653e-06, - "loss": 0.072, + "epoch": 1.3374777975133214, + "grad_norm": 0.43821156257968646, + "learning_rate": 8.419287232616484e-06, + "loss": 0.0609, "step": 1506 }, { - "epoch": 0.6693315567399512, - "grad_norm": 0.7128371312290194, - "learning_rate": 9.862629150277574e-06, - "loss": 0.0773, + "epoch": 1.338365896980462, + "grad_norm": 0.762015230863103, + "learning_rate": 8.416458479136558e-06, + "loss": 0.0539, "step": 1507 }, { - "epoch": 0.6697757050854986, - "grad_norm": 0.759379781839776, - "learning_rate": 9.86217750316773e-06, - "loss": 0.0729, + "epoch": 1.339253996447602, + "grad_norm": 0.39381448397625796, + "learning_rate": 8.413627672969958e-06, + "loss": 0.0501, "step": 1508 }, { - "epoch": 0.670219853431046, - "grad_norm": 0.6634457926439065, - "learning_rate": 9.861725125190017e-06, - "loss": 0.0595, + "epoch": 1.3401420959147425, + "grad_norm": 0.5136660363183541, + "learning_rate": 8.410794815817494e-06, + "loss": 0.0497, "step": 1509 }, { - "epoch": 0.6706640017765934, - "grad_norm": 0.7426433953184679, - "learning_rate": 9.861272016412429e-06, - "loss": 0.065, + "epoch": 1.3410301953818828, + "grad_norm": 0.46079688680757713, + "learning_rate": 8.407959909381214e-06, + "loss": 0.0535, "step": 1510 }, { - "epoch": 0.6711081501221408, - "grad_norm": 0.6600032637907064, - "learning_rate": 9.86081817690308e-06, - "loss": 0.0625, + "epoch": 1.3419182948490231, + "grad_norm": 0.6640295890749003, + "learning_rate": 8.405122955364394e-06, + "loss": 0.0597, "step": 1511 }, { - "epoch": 0.6715522984676882, - "grad_norm": 0.703748253196849, - "learning_rate": 9.860363606730185e-06, - "loss": 0.073, + "epoch": 1.3428063943161634, + "grad_norm": 0.4732830529420499, + "learning_rate": 8.402283955471541e-06, + "loss": 0.0441, "step": 1512 }, { - "epoch": 0.6719964468132357, - "grad_norm": 0.8768454660219879, - "learning_rate": 9.85990830596208e-06, - "loss": 0.093, + "epoch": 1.3436944937833037, + "grad_norm": 0.45163064734160446, + "learning_rate": 8.399442911408393e-06, + "loss": 0.0482, "step": 1513 }, { - "epoch": 0.6724405951587831, - "grad_norm": 0.5810307286903331, - "learning_rate": 9.859452274667199e-06, - "loss": 0.0664, + "epoch": 1.344582593250444, + "grad_norm": 0.4004202627674794, + "learning_rate": 8.396599824881913e-06, + "loss": 0.0459, "step": 1514 }, { - "epoch": 0.6728847435043305, - "grad_norm": 0.6467795840989637, - "learning_rate": 9.858995512914096e-06, - "loss": 0.0736, + "epoch": 1.3454706927175843, + "grad_norm": 0.5085992052846169, + "learning_rate": 8.393754697600291e-06, + "loss": 0.0573, "step": 1515 }, { - "epoch": 0.6733288918498779, - "grad_norm": 0.8604239627870944, - "learning_rate": 9.858538020771424e-06, - "loss": 0.0819, + "epoch": 1.3463587921847247, + "grad_norm": 0.4924731869436927, + "learning_rate": 8.390907531272951e-06, + "loss": 0.0659, "step": 1516 }, { - "epoch": 0.6737730401954253, - "grad_norm": 0.5072234228135453, - "learning_rate": 9.858079798307959e-06, - "loss": 0.0764, + "epoch": 1.347246891651865, + "grad_norm": 0.5452365800224888, + "learning_rate": 8.388058327610528e-06, + "loss": 0.0516, "step": 1517 }, { - "epoch": 0.6742171885409727, - "grad_norm": 0.8843203312811015, - "learning_rate": 9.857620845592573e-06, - "loss": 0.0941, + "epoch": 1.3481349911190053, + "grad_norm": 0.5704325365049103, + "learning_rate": 8.385207088324894e-06, + "loss": 0.0484, "step": 1518 }, { - "epoch": 0.6746613368865201, - "grad_norm": 0.7766118997696602, - "learning_rate": 9.85716116269426e-06, - "loss": 0.0697, + "epoch": 1.3490230905861456, + "grad_norm": 0.5016393880689081, + "learning_rate": 8.382353815129136e-06, + "loss": 0.0616, "step": 1519 }, { - "epoch": 0.6751054852320675, - "grad_norm": 0.5371610065691803, - "learning_rate": 9.856700749682114e-06, - "loss": 0.075, + "epoch": 1.349911190053286, + "grad_norm": 0.3981430116519234, + "learning_rate": 8.37949850973757e-06, + "loss": 0.0498, "step": 1520 }, { - "epoch": 0.6755496335776149, - "grad_norm": 0.5048461380926635, - "learning_rate": 9.856239606625345e-06, - "loss": 0.0584, + "epoch": 1.3507992895204262, + "grad_norm": 0.47622194032636755, + "learning_rate": 8.376641173865724e-06, + "loss": 0.0476, "step": 1521 }, { - "epoch": 0.6759937819231623, - "grad_norm": 0.819913205588501, - "learning_rate": 9.855777733593269e-06, - "loss": 0.0748, + "epoch": 1.3516873889875667, + "grad_norm": 0.46749798952082183, + "learning_rate": 8.373781809230355e-06, + "loss": 0.0496, "step": 1522 }, { - "epoch": 0.6764379302687098, - "grad_norm": 0.6492894969300076, - "learning_rate": 9.855315130655315e-06, - "loss": 0.0573, + "epoch": 1.352575488454707, + "grad_norm": 0.404985475436921, + "learning_rate": 8.370920417549433e-06, + "loss": 0.0441, "step": 1523 }, { - "epoch": 0.6768820786142572, - "grad_norm": 1.8883752330861565, - "learning_rate": 9.854851797881018e-06, - "loss": 0.0586, + "epoch": 1.3534635879218473, + "grad_norm": 0.38619516691976297, + "learning_rate": 8.36805700054215e-06, + "loss": 0.0466, "step": 1524 }, { - "epoch": 0.6773262269598046, - "grad_norm": 0.6674563588470771, - "learning_rate": 9.854387735340028e-06, - "loss": 0.0618, + "epoch": 1.3543516873889876, + "grad_norm": 0.3893869282188238, + "learning_rate": 8.365191559928908e-06, + "loss": 0.0474, "step": 1525 }, { - "epoch": 0.677770375305352, - "grad_norm": 0.5745105185416173, - "learning_rate": 9.853922943102099e-06, - "loss": 0.0486, + "epoch": 1.355239786856128, + "grad_norm": 0.4369571034155886, + "learning_rate": 8.362324097431335e-06, + "loss": 0.0503, "step": 1526 }, { - "epoch": 0.6782145236508994, - "grad_norm": 0.8823764363833241, - "learning_rate": 9.853457421237098e-06, - "loss": 0.0655, + "epoch": 1.3561278863232682, + "grad_norm": 0.5264090102795924, + "learning_rate": 8.359454614772264e-06, + "loss": 0.0652, "step": 1527 }, { - "epoch": 0.6786586719964468, - "grad_norm": 0.7502418552024257, - "learning_rate": 9.852991169815002e-06, - "loss": 0.0697, + "epoch": 1.3570159857904085, + "grad_norm": 0.35886809356369237, + "learning_rate": 8.356583113675749e-06, + "loss": 0.0473, "step": 1528 }, { - "epoch": 0.6791028203419942, - "grad_norm": 0.9371750933741902, - "learning_rate": 9.852524188905894e-06, - "loss": 0.0637, + "epoch": 1.3579040852575488, + "grad_norm": 0.3970066798407407, + "learning_rate": 8.353709595867052e-06, + "loss": 0.0456, "step": 1529 }, { - "epoch": 0.6795469686875416, - "grad_norm": 0.4760980667666431, - "learning_rate": 9.85205647857997e-06, - "loss": 0.0507, + "epoch": 1.358792184724689, + "grad_norm": 0.3384342848322592, + "learning_rate": 8.350834063072651e-06, + "loss": 0.0373, "step": 1530 }, { - "epoch": 0.679991117033089, - "grad_norm": 0.5957775022770642, - "learning_rate": 9.851588038907536e-06, - "loss": 0.0725, + "epoch": 1.3596802841918294, + "grad_norm": 0.47453111740475157, + "learning_rate": 8.347956517020227e-06, + "loss": 0.0549, "step": 1531 }, { - "epoch": 0.6804352653786364, - "grad_norm": 0.8583868263839582, - "learning_rate": 9.851118869959006e-06, - "loss": 0.0926, + "epoch": 1.3605683836589697, + "grad_norm": 0.4829163729790234, + "learning_rate": 8.34507695943868e-06, + "loss": 0.0544, "step": 1532 }, { - "epoch": 0.6808794137241839, - "grad_norm": 0.5065773305637178, - "learning_rate": 9.850648971804903e-06, - "loss": 0.0519, + "epoch": 1.3614564831261102, + "grad_norm": 0.47628665800148995, + "learning_rate": 8.342195392058113e-06, + "loss": 0.0533, "step": 1533 }, { - "epoch": 0.6813235620697313, - "grad_norm": 0.6463242249375162, - "learning_rate": 9.850178344515861e-06, - "loss": 0.0495, + "epoch": 1.3623445825932503, + "grad_norm": 0.4453596523266883, + "learning_rate": 8.339311816609838e-06, + "loss": 0.0596, "step": 1534 }, { - "epoch": 0.6817677104152787, - "grad_norm": 0.713200223511551, - "learning_rate": 9.849706988162626e-06, - "loss": 0.0667, + "epoch": 1.3632326820603908, + "grad_norm": 0.4231359352448745, + "learning_rate": 8.336426234826374e-06, + "loss": 0.0542, "step": 1535 }, { - "epoch": 0.6822118587608261, - "grad_norm": 0.8106886906091921, - "learning_rate": 9.849234902816047e-06, - "loss": 0.078, + "epoch": 1.3641207815275311, + "grad_norm": 0.3782868139162873, + "learning_rate": 8.333538648441444e-06, + "loss": 0.0496, "step": 1536 }, { - "epoch": 0.6826560071063735, - "grad_norm": 0.618211559505216, - "learning_rate": 9.848762088547089e-06, - "loss": 0.0499, + "epoch": 1.3650088809946714, + "grad_norm": 0.5542789896302543, + "learning_rate": 8.330649059189975e-06, + "loss": 0.0535, "step": 1537 }, { - "epoch": 0.6831001554519209, - "grad_norm": 0.4683137101850568, - "learning_rate": 9.848288545426821e-06, - "loss": 0.0485, + "epoch": 1.3658969804618117, + "grad_norm": 0.4814896663589956, + "learning_rate": 8.3277574688081e-06, + "loss": 0.0515, "step": 1538 }, { - "epoch": 0.6835443037974683, - "grad_norm": 0.6498884933818719, - "learning_rate": 9.847814273526428e-06, - "loss": 0.0717, + "epoch": 1.366785079928952, + "grad_norm": 0.6046455283468285, + "learning_rate": 8.324863879033153e-06, + "loss": 0.0585, "step": 1539 }, { - "epoch": 0.6839884521430157, - "grad_norm": 0.6112948857459086, - "learning_rate": 9.8473392729172e-06, - "loss": 0.0556, + "epoch": 1.3676731793960923, + "grad_norm": 0.4118867357435279, + "learning_rate": 8.321968291603669e-06, + "loss": 0.0537, "step": 1540 }, { - "epoch": 0.6844326004885631, - "grad_norm": 0.6350627430979878, - "learning_rate": 9.846863543670536e-06, - "loss": 0.067, + "epoch": 1.3685612788632326, + "grad_norm": 0.4478074627392541, + "learning_rate": 8.319070708259383e-06, + "loss": 0.055, "step": 1541 }, { - "epoch": 0.6848767488341105, - "grad_norm": 1.1432261374175676, - "learning_rate": 9.846387085857949e-06, - "loss": 0.1195, + "epoch": 1.369449378330373, + "grad_norm": 0.6756323939696056, + "learning_rate": 8.31617113074123e-06, + "loss": 0.0564, "step": 1542 }, { - "epoch": 0.685320897179658, - "grad_norm": 0.5445218211710267, - "learning_rate": 9.845909899551056e-06, - "loss": 0.0633, + "epoch": 1.3703374777975132, + "grad_norm": 0.3979245077082011, + "learning_rate": 8.313269560791343e-06, + "loss": 0.0549, "step": 1543 }, { - "epoch": 0.6857650455252055, - "grad_norm": 0.7666433728201425, - "learning_rate": 9.845431984821588e-06, - "loss": 0.0652, + "epoch": 1.3712255772646538, + "grad_norm": 0.45049907570969744, + "learning_rate": 8.31036600015305e-06, + "loss": 0.0556, "step": 1544 }, { - "epoch": 0.6862091938707529, - "grad_norm": 0.6445193247682808, - "learning_rate": 9.844953341741383e-06, - "loss": 0.044, + "epoch": 1.3721136767317939, + "grad_norm": 0.6898831561463261, + "learning_rate": 8.307460450570879e-06, + "loss": 0.0613, "step": 1545 }, { - "epoch": 0.6866533422163003, - "grad_norm": 0.6165834169963341, - "learning_rate": 9.844473970382391e-06, - "loss": 0.07, + "epoch": 1.3730017761989344, + "grad_norm": 0.38427443065782585, + "learning_rate": 8.30455291379055e-06, + "loss": 0.0492, "step": 1546 }, { - "epoch": 0.6870974905618477, - "grad_norm": 0.785019896367438, - "learning_rate": 9.843993870816665e-06, - "loss": 0.0692, + "epoch": 1.3738898756660747, + "grad_norm": 0.3904995137122518, + "learning_rate": 8.301643391558977e-06, + "loss": 0.051, "step": 1547 }, { - "epoch": 0.6875416389073951, - "grad_norm": 0.8097989024467772, - "learning_rate": 9.843513043116378e-06, - "loss": 0.0714, + "epoch": 1.374777975133215, + "grad_norm": 0.4675032072811224, + "learning_rate": 8.298731885624272e-06, + "loss": 0.0541, "step": 1548 }, { - "epoch": 0.6879857872529425, - "grad_norm": 0.7549105996829288, - "learning_rate": 9.843031487353803e-06, - "loss": 0.0642, + "epoch": 1.3756660746003553, + "grad_norm": 0.6363678435023608, + "learning_rate": 8.295818397735727e-06, + "loss": 0.0521, "step": 1549 }, { - "epoch": 0.6884299355984899, - "grad_norm": 0.4886439517505401, - "learning_rate": 9.842549203601327e-06, - "loss": 0.0562, + "epoch": 1.3765541740674956, + "grad_norm": 0.46452327793605236, + "learning_rate": 8.29290292964384e-06, + "loss": 0.0497, "step": 1550 }, { - "epoch": 0.6888740839440373, - "grad_norm": 0.7561494998266006, - "learning_rate": 9.842066191931442e-06, - "loss": 0.0643, + "epoch": 1.3774422735346359, + "grad_norm": 0.3869276938537254, + "learning_rate": 8.289985483100284e-06, + "loss": 0.0464, "step": 1551 }, { - "epoch": 0.6893182322895848, - "grad_norm": 0.854720220061704, - "learning_rate": 9.84158245241676e-06, - "loss": 0.0522, + "epoch": 1.3783303730017762, + "grad_norm": 0.5705722937069527, + "learning_rate": 8.287066059857936e-06, + "loss": 0.0609, "step": 1552 }, { - "epoch": 0.6897623806351322, - "grad_norm": 0.5346332465205076, - "learning_rate": 9.84109798512999e-06, - "loss": 0.0518, + "epoch": 1.3792184724689165, + "grad_norm": 0.3749300790504764, + "learning_rate": 8.284144661670845e-06, + "loss": 0.0503, "step": 1553 }, { - "epoch": 0.6902065289806796, - "grad_norm": 0.508165162429757, - "learning_rate": 9.840612790143958e-06, - "loss": 0.0538, + "epoch": 1.3801065719360568, + "grad_norm": 0.5450754778814413, + "learning_rate": 8.28122129029426e-06, + "loss": 0.0824, "step": 1554 }, { - "epoch": 0.690650677326227, - "grad_norm": 0.4808558159762227, - "learning_rate": 9.840126867531594e-06, - "loss": 0.0604, + "epoch": 1.380994671403197, + "grad_norm": 0.49668812306186516, + "learning_rate": 8.278295947484605e-06, + "loss": 0.0627, "step": 1555 }, { - "epoch": 0.6910948256717744, - "grad_norm": 0.6036124527734282, - "learning_rate": 9.839640217365941e-06, - "loss": 0.0636, + "epoch": 1.3818827708703374, + "grad_norm": 0.4744427878395548, + "learning_rate": 8.275368634999495e-06, + "loss": 0.0539, "step": 1556 }, { - "epoch": 0.6915389740173218, - "grad_norm": 0.5365369852245644, - "learning_rate": 9.839152839720157e-06, - "loss": 0.0571, + "epoch": 1.382770870337478, + "grad_norm": 0.583350862165481, + "learning_rate": 8.272439354597728e-06, + "loss": 0.0561, "step": 1557 }, { - "epoch": 0.6919831223628692, - "grad_norm": 0.9675442829718458, - "learning_rate": 9.838664734667496e-06, - "loss": 0.0768, + "epoch": 1.383658969804618, + "grad_norm": 0.36980203867676986, + "learning_rate": 8.269508108039279e-06, + "loss": 0.0521, "step": 1558 }, { - "epoch": 0.6924272707084166, - "grad_norm": 0.6682601437620973, - "learning_rate": 9.83817590228133e-06, - "loss": 0.0682, + "epoch": 1.3845470692717585, + "grad_norm": 0.5120540940254161, + "learning_rate": 8.266574897085312e-06, + "loss": 0.0473, "step": 1559 }, { - "epoch": 0.692871419053964, - "grad_norm": 0.915552570084289, - "learning_rate": 9.83768634263514e-06, - "loss": 0.0689, + "epoch": 1.3854351687388988, + "grad_norm": 0.4827772130451817, + "learning_rate": 8.263639723498167e-06, + "loss": 0.0737, "step": 1560 }, { - "epoch": 0.6933155673995114, - "grad_norm": 0.7294790821869619, - "learning_rate": 9.837196055802514e-06, - "loss": 0.0595, + "epoch": 1.3863232682060391, + "grad_norm": 0.4535918145216589, + "learning_rate": 8.260702589041361e-06, + "loss": 0.0485, "step": 1561 }, { - "epoch": 0.6937597157450589, - "grad_norm": 0.6877843622659181, - "learning_rate": 9.836705041857153e-06, - "loss": 0.0894, + "epoch": 1.3872113676731794, + "grad_norm": 0.5217199430258538, + "learning_rate": 8.257763495479596e-06, + "loss": 0.0545, "step": 1562 }, { - "epoch": 0.6942038640906063, - "grad_norm": 0.5609065701691318, - "learning_rate": 9.836213300872862e-06, - "loss": 0.0523, + "epoch": 1.3880994671403197, + "grad_norm": 0.4027009124705943, + "learning_rate": 8.254822444578742e-06, + "loss": 0.0503, "step": 1563 }, { - "epoch": 0.6946480124361537, - "grad_norm": 0.6750341373878816, - "learning_rate": 9.83572083292356e-06, - "loss": 0.0723, + "epoch": 1.38898756660746, + "grad_norm": 0.4852312091691364, + "learning_rate": 8.251879438105854e-06, + "loss": 0.0438, "step": 1564 }, { - "epoch": 0.6950921607817011, - "grad_norm": 0.8179982672102986, - "learning_rate": 9.835227638083271e-06, - "loss": 0.0741, + "epoch": 1.3898756660746003, + "grad_norm": 0.4268369610359005, + "learning_rate": 8.248934477829154e-06, + "loss": 0.0495, "step": 1565 }, { - "epoch": 0.6955363091272485, - "grad_norm": 0.514148745778486, - "learning_rate": 9.834733716426133e-06, + "epoch": 1.3907637655417406, + "grad_norm": 0.45782613166201763, + "learning_rate": 8.245987565518047e-06, "loss": 0.048, "step": 1566 }, { - "epoch": 0.6959804574727959, - "grad_norm": 0.5288948126890644, - "learning_rate": 9.834239068026388e-06, - "loss": 0.0665, + "epoch": 1.391651865008881, + "grad_norm": 0.4389272824898538, + "learning_rate": 8.2430387029431e-06, + "loss": 0.0513, "step": 1567 }, { - "epoch": 0.6964246058183433, - "grad_norm": 0.5107938326869891, - "learning_rate": 9.833743692958392e-06, - "loss": 0.0482, + "epoch": 1.3925399644760212, + "grad_norm": 0.36951403873805333, + "learning_rate": 8.240087891876061e-06, + "loss": 0.0489, "step": 1568 }, { - "epoch": 0.6968687541638907, - "grad_norm": 1.053299477751095, - "learning_rate": 9.83324759129661e-06, - "loss": 0.0726, + "epoch": 1.3934280639431615, + "grad_norm": 0.45586866631218004, + "learning_rate": 8.237135134089842e-06, + "loss": 0.0476, "step": 1569 }, { - "epoch": 0.6973129025094381, - "grad_norm": 0.6266804348595715, - "learning_rate": 9.832750763115611e-06, - "loss": 0.0738, + "epoch": 1.394316163410302, + "grad_norm": 0.4549045157497352, + "learning_rate": 8.234180431358534e-06, + "loss": 0.0474, "step": 1570 }, { - "epoch": 0.6977570508549855, - "grad_norm": 0.7607794431765756, - "learning_rate": 9.83225320849008e-06, - "loss": 0.0739, + "epoch": 1.3952042628774421, + "grad_norm": 0.4565821267928718, + "learning_rate": 8.23122378545738e-06, + "loss": 0.0559, "step": 1571 }, { - "epoch": 0.698201199200533, - "grad_norm": 0.7778393457616919, - "learning_rate": 9.831754927494803e-06, - "loss": 0.0948, + "epoch": 1.3960923623445827, + "grad_norm": 0.4701002068827714, + "learning_rate": 8.22826519816281e-06, + "loss": 0.0527, "step": 1572 }, { - "epoch": 0.6986453475460804, - "grad_norm": 0.7195786654461274, - "learning_rate": 9.831255920204685e-06, - "loss": 0.0646, + "epoch": 1.396980461811723, + "grad_norm": 0.6596291184863229, + "learning_rate": 8.225304671252406e-06, + "loss": 0.0548, "step": 1573 }, { - "epoch": 0.6990894958916278, - "grad_norm": 0.6366879939595091, - "learning_rate": 9.830756186694734e-06, - "loss": 0.0808, + "epoch": 1.3978685612788633, + "grad_norm": 0.38186387958597295, + "learning_rate": 8.222342206504922e-06, + "loss": 0.0501, "step": 1574 }, { - "epoch": 0.6995336442371752, - "grad_norm": 0.478436966515515, - "learning_rate": 9.830255727040066e-06, - "loss": 0.059, + "epoch": 1.3987566607460036, + "grad_norm": 0.4833881974668152, + "learning_rate": 8.219377805700277e-06, + "loss": 0.0528, "step": 1575 }, { - "epoch": 0.6999777925827226, - "grad_norm": 0.5378537626686002, - "learning_rate": 9.829754541315912e-06, - "loss": 0.0624, + "epoch": 1.3996447602131439, + "grad_norm": 0.4523554837124434, + "learning_rate": 8.216411470619547e-06, + "loss": 0.069, "step": 1576 }, { - "epoch": 0.70042194092827, - "grad_norm": 0.5458661735869976, - "learning_rate": 9.829252629597607e-06, - "loss": 0.056, + "epoch": 1.4005328596802842, + "grad_norm": 0.47654859451639997, + "learning_rate": 8.21344320304498e-06, + "loss": 0.0546, "step": 1577 }, { - "epoch": 0.7008660892738174, - "grad_norm": 0.7598746622748797, - "learning_rate": 9.828749991960598e-06, - "loss": 0.0742, + "epoch": 1.4014209591474245, + "grad_norm": 0.4398119796047219, + "learning_rate": 8.21047300475997e-06, + "loss": 0.0565, "step": 1578 }, { - "epoch": 0.7013102376193648, - "grad_norm": 0.7874301906671837, - "learning_rate": 9.828246628480438e-06, - "loss": 0.1126, + "epoch": 1.4023090586145648, + "grad_norm": 0.5207074172769443, + "learning_rate": 8.207500877549089e-06, + "loss": 0.0587, "step": 1579 }, { - "epoch": 0.7017543859649122, - "grad_norm": 0.5696760012747649, - "learning_rate": 9.827742539232791e-06, - "loss": 0.0622, + "epoch": 1.403197158081705, + "grad_norm": 0.48650562407801845, + "learning_rate": 8.204526823198055e-06, + "loss": 0.0506, "step": 1580 }, { - "epoch": 0.7021985343104596, - "grad_norm": 0.49299605250258527, - "learning_rate": 9.827237724293434e-06, - "loss": 0.0542, + "epoch": 1.4040852575488454, + "grad_norm": 0.48287027505973873, + "learning_rate": 8.201550843493748e-06, + "loss": 0.053, "step": 1581 }, { - "epoch": 0.7026426826560072, - "grad_norm": 0.7950299040696133, - "learning_rate": 9.826732183738246e-06, - "loss": 0.0736, + "epoch": 1.4049733570159857, + "grad_norm": 0.38947995151850845, + "learning_rate": 8.198572940224208e-06, + "loss": 0.0461, "step": 1582 }, { - "epoch": 0.7030868310015546, - "grad_norm": 0.6701858553269311, - "learning_rate": 9.826225917643217e-06, - "loss": 0.0769, + "epoch": 1.4058614564831262, + "grad_norm": 0.6148736212194573, + "learning_rate": 8.195593115178626e-06, + "loss": 0.0663, "step": 1583 }, { - "epoch": 0.703530979347102, - "grad_norm": 0.7198204203508045, - "learning_rate": 9.825718926084451e-06, - "loss": 0.0607, + "epoch": 1.4067495559502663, + "grad_norm": 0.4121009501774294, + "learning_rate": 8.192611370147347e-06, + "loss": 0.046, "step": 1584 }, { - "epoch": 0.7039751276926494, - "grad_norm": 0.7221114160407107, - "learning_rate": 9.825211209138154e-06, - "loss": 0.0662, + "epoch": 1.4076376554174068, + "grad_norm": 0.5049591962876894, + "learning_rate": 8.189627706921876e-06, + "loss": 0.0534, "step": 1585 }, { - "epoch": 0.7044192760381968, - "grad_norm": 0.5718930945068039, - "learning_rate": 9.82470276688065e-06, - "loss": 0.063, + "epoch": 1.4085257548845471, + "grad_norm": 0.43800379560380437, + "learning_rate": 8.186642127294863e-06, + "loss": 0.0598, "step": 1586 }, { - "epoch": 0.7048634243837442, - "grad_norm": 0.7156464008159005, - "learning_rate": 9.824193599388358e-06, - "loss": 0.061, + "epoch": 1.4094138543516874, + "grad_norm": 0.5704135811442727, + "learning_rate": 8.183654633060114e-06, + "loss": 0.072, "step": 1587 }, { - "epoch": 0.7053075727292916, - "grad_norm": 0.4730100411783297, - "learning_rate": 9.823683706737824e-06, - "loss": 0.0538, + "epoch": 1.4103019538188277, + "grad_norm": 0.3892089524421799, + "learning_rate": 8.180665226012585e-06, + "loss": 0.0484, "step": 1588 }, { - "epoch": 0.705751721074839, - "grad_norm": 0.8151288747975431, - "learning_rate": 9.823173089005686e-06, - "loss": 0.0586, + "epoch": 1.411190053285968, + "grad_norm": 0.461051514875073, + "learning_rate": 8.177673907948378e-06, + "loss": 0.066, "step": 1589 }, { - "epoch": 0.7061958694203864, - "grad_norm": 0.6500001880192491, - "learning_rate": 9.822661746268702e-06, - "loss": 0.0558, + "epoch": 1.4120781527531083, + "grad_norm": 0.7247921988831233, + "learning_rate": 8.174680680664748e-06, + "loss": 0.0686, "step": 1590 }, { - "epoch": 0.7066400177659338, - "grad_norm": 0.47387928526278555, - "learning_rate": 9.822149678603733e-06, - "loss": 0.0537, + "epoch": 1.4129662522202486, + "grad_norm": 0.39580557698655033, + "learning_rate": 8.171685545960095e-06, + "loss": 0.0443, "step": 1591 }, { - "epoch": 0.7070841661114813, - "grad_norm": 0.5533123786666888, - "learning_rate": 9.821636886087755e-06, - "loss": 0.049, + "epoch": 1.413854351687389, + "grad_norm": 0.45187941937848386, + "learning_rate": 8.168688505633962e-06, + "loss": 0.056, "step": 1592 }, { - "epoch": 0.7075283144570287, - "grad_norm": 0.5418212216604064, - "learning_rate": 9.82112336879785e-06, - "loss": 0.0443, + "epoch": 1.4147424511545292, + "grad_norm": 0.48617085607247623, + "learning_rate": 8.16568956148704e-06, + "loss": 0.0675, "step": 1593 }, { - "epoch": 0.7079724628025761, - "grad_norm": 0.595384134286072, - "learning_rate": 9.820609126811202e-06, - "loss": 0.0701, + "epoch": 1.4156305506216698, + "grad_norm": 0.6178370439855861, + "learning_rate": 8.162688715321165e-06, + "loss": 0.0609, "step": 1594 }, { - "epoch": 0.7084166111481235, - "grad_norm": 0.5686391716299167, - "learning_rate": 9.820094160205118e-06, - "loss": 0.0612, + "epoch": 1.4165186500888098, + "grad_norm": 0.36070783666679784, + "learning_rate": 8.159685968939315e-06, + "loss": 0.0494, "step": 1595 }, { - "epoch": 0.7088607594936709, - "grad_norm": 0.6501450869897375, - "learning_rate": 9.819578469057e-06, - "loss": 0.0571, + "epoch": 1.4174067495559504, + "grad_norm": 0.33555797226139794, + "learning_rate": 8.156681324145605e-06, + "loss": 0.0455, "step": 1596 }, { - "epoch": 0.7093049078392183, - "grad_norm": 0.8251798873678969, - "learning_rate": 9.819062053444369e-06, - "loss": 0.0903, + "epoch": 1.4182948490230907, + "grad_norm": 0.44810990333980477, + "learning_rate": 8.1536747827453e-06, + "loss": 0.0498, "step": 1597 }, { - "epoch": 0.7097490561847657, - "grad_norm": 0.659869482663948, - "learning_rate": 9.81854491344485e-06, - "loss": 0.077, + "epoch": 1.419182948490231, + "grad_norm": 0.467364659598731, + "learning_rate": 8.150666346544794e-06, + "loss": 0.0418, "step": 1598 }, { - "epoch": 0.7101932045303131, - "grad_norm": 0.5233735880329885, - "learning_rate": 9.818027049136177e-06, - "loss": 0.0756, + "epoch": 1.4200710479573713, + "grad_norm": 0.5506948501799759, + "learning_rate": 8.147656017351629e-06, + "loss": 0.0511, "step": 1599 }, { - "epoch": 0.7106373528758605, - "grad_norm": 0.5435283663423869, - "learning_rate": 9.817508460596195e-06, - "loss": 0.0517, + "epoch": 1.4209591474245116, + "grad_norm": 0.5381193231690307, + "learning_rate": 8.144643796974477e-06, + "loss": 0.0581, "step": 1600 }, { - "epoch": 0.7110815012214079, - "grad_norm": 0.8462175785185146, - "learning_rate": 9.816989147902855e-06, - "loss": 0.0776, + "epoch": 1.4218472468916519, + "grad_norm": 0.37909069381598576, + "learning_rate": 8.141629687223149e-06, + "loss": 0.0459, "step": 1601 }, { - "epoch": 0.7115256495669554, - "grad_norm": 0.5250146868792045, - "learning_rate": 9.816469111134221e-06, - "loss": 0.0636, + "epoch": 1.4227353463587922, + "grad_norm": 0.5968848612394052, + "learning_rate": 8.138613689908592e-06, + "loss": 0.0523, "step": 1602 }, { - "epoch": 0.7119697979125028, - "grad_norm": 0.4561566850818785, - "learning_rate": 9.81594835036846e-06, - "loss": 0.0549, + "epoch": 1.4236234458259325, + "grad_norm": 0.44627052951661905, + "learning_rate": 8.13559580684289e-06, + "loss": 0.06, "step": 1603 }, { - "epoch": 0.7124139462580502, - "grad_norm": 0.639031123731633, - "learning_rate": 9.815426865683858e-06, - "loss": 0.0739, + "epoch": 1.4245115452930728, + "grad_norm": 0.5174906428310386, + "learning_rate": 8.132576039839248e-06, + "loss": 0.0535, "step": 1604 }, { - "epoch": 0.7128580946035976, - "grad_norm": 0.4546814856440393, - "learning_rate": 9.814904657158793e-06, - "loss": 0.0536, + "epoch": 1.425399644760213, + "grad_norm": 0.4570062922280352, + "learning_rate": 8.12955439071202e-06, + "loss": 0.0447, "step": 1605 }, { - "epoch": 0.713302242949145, - "grad_norm": 0.49725993065397267, - "learning_rate": 9.81438172487177e-06, - "loss": 0.0538, + "epoch": 1.4262877442273534, + "grad_norm": 0.4904082398243398, + "learning_rate": 8.126530861276677e-06, + "loss": 0.0532, "step": 1606 }, { - "epoch": 0.7137463912946924, - "grad_norm": 0.7681224223760409, - "learning_rate": 9.813858068901391e-06, - "loss": 0.0738, + "epoch": 1.427175843694494, + "grad_norm": 0.42793939717884144, + "learning_rate": 8.123505453349828e-06, + "loss": 0.051, "step": 1607 }, { - "epoch": 0.7141905396402398, - "grad_norm": 0.5475444111433433, - "learning_rate": 9.813333689326371e-06, - "loss": 0.0532, + "epoch": 1.428063943161634, + "grad_norm": 0.3451762736415589, + "learning_rate": 8.120478168749201e-06, + "loss": 0.0483, "step": 1608 }, { - "epoch": 0.7146346879857872, - "grad_norm": 0.5001622305576355, - "learning_rate": 9.812808586225533e-06, - "loss": 0.0504, + "epoch": 1.4289520426287745, + "grad_norm": 0.4279245108056465, + "learning_rate": 8.117449009293668e-06, + "loss": 0.0595, "step": 1609 }, { - "epoch": 0.7150788363313346, - "grad_norm": 0.5966203326606518, - "learning_rate": 9.812282759677811e-06, - "loss": 0.0632, + "epoch": 1.4298401420959148, + "grad_norm": 0.37575357615088556, + "learning_rate": 8.114417976803212e-06, + "loss": 0.0541, "step": 1610 }, { - "epoch": 0.715522984676882, - "grad_norm": 0.5024269719688089, - "learning_rate": 9.811756209762242e-06, - "loss": 0.054, + "epoch": 1.4307282415630551, + "grad_norm": 0.44406572585728893, + "learning_rate": 8.111385073098946e-06, + "loss": 0.0483, "step": 1611 }, { - "epoch": 0.7159671330224295, - "grad_norm": 0.5953895951996429, - "learning_rate": 9.811228936557977e-06, - "loss": 0.0687, + "epoch": 1.4316163410301954, + "grad_norm": 0.3826166328358062, + "learning_rate": 8.10835030000311e-06, + "loss": 0.0538, "step": 1612 }, { - "epoch": 0.7164112813679769, - "grad_norm": 0.9253768611459666, - "learning_rate": 9.810700940144275e-06, - "loss": 0.0936, + "epoch": 1.4325044404973357, + "grad_norm": 0.35617527077092515, + "learning_rate": 8.105313659339065e-06, + "loss": 0.0406, "step": 1613 }, { - "epoch": 0.7168554297135243, - "grad_norm": 0.40706001374840456, - "learning_rate": 9.810172220600503e-06, - "loss": 0.0501, + "epoch": 1.433392539964476, + "grad_norm": 0.47394559066290226, + "learning_rate": 8.102275152931294e-06, + "loss": 0.0476, "step": 1614 }, { - "epoch": 0.7172995780590717, - "grad_norm": 0.6702001680907567, - "learning_rate": 9.809642778006135e-06, - "loss": 0.0721, + "epoch": 1.4342806394316163, + "grad_norm": 0.42072502125896794, + "learning_rate": 8.099234782605404e-06, + "loss": 0.0462, "step": 1615 }, { - "epoch": 0.7177437264046191, - "grad_norm": 0.5621892036366335, - "learning_rate": 9.809112612440757e-06, - "loss": 0.0624, + "epoch": 1.4351687388987566, + "grad_norm": 0.4124712231895515, + "learning_rate": 8.096192550188113e-06, + "loss": 0.0474, "step": 1616 }, { - "epoch": 0.7181878747501665, - "grad_norm": 0.6550556626236272, - "learning_rate": 9.808581723984059e-06, - "loss": 0.064, + "epoch": 1.436056838365897, + "grad_norm": 0.358379122077467, + "learning_rate": 8.093148457507272e-06, + "loss": 0.0393, "step": 1617 }, { - "epoch": 0.718632023095714, - "grad_norm": 1.171906822226206, - "learning_rate": 9.808050112715845e-06, - "loss": 0.1172, + "epoch": 1.4369449378330372, + "grad_norm": 0.4731175736396712, + "learning_rate": 8.090102506391835e-06, + "loss": 0.0609, "step": 1618 }, { - "epoch": 0.7190761714412613, - "grad_norm": 0.4455710256139167, - "learning_rate": 9.807517778716025e-06, - "loss": 0.045, + "epoch": 1.4378330373001775, + "grad_norm": 0.4612618118512498, + "learning_rate": 8.087054698671884e-06, + "loss": 0.0687, "step": 1619 }, { - "epoch": 0.7195203197868087, - "grad_norm": 0.6254516596662227, - "learning_rate": 9.806984722064616e-06, - "loss": 0.0705, + "epoch": 1.438721136767318, + "grad_norm": 0.38400775436146545, + "learning_rate": 8.084005036178608e-06, + "loss": 0.0587, "step": 1620 }, { - "epoch": 0.7199644681323563, - "grad_norm": 0.5571332804687471, - "learning_rate": 9.806450942841747e-06, - "loss": 0.0502, + "epoch": 1.4396092362344581, + "grad_norm": 0.3864071686719111, + "learning_rate": 8.080953520744319e-06, + "loss": 0.0444, "step": 1621 }, { - "epoch": 0.7204086164779037, - "grad_norm": 1.0191625308451748, - "learning_rate": 9.805916441127657e-06, - "loss": 0.0589, + "epoch": 1.4404973357015987, + "grad_norm": 0.5102453425146569, + "learning_rate": 8.077900154202432e-06, + "loss": 0.0618, "step": 1622 }, { - "epoch": 0.7208527648234511, - "grad_norm": 0.7859502245676739, - "learning_rate": 9.805381217002684e-06, - "loss": 0.0431, + "epoch": 1.441385435168739, + "grad_norm": 0.37693596322117345, + "learning_rate": 8.074844938387485e-06, + "loss": 0.0445, "step": 1623 }, { - "epoch": 0.7212969131689985, - "grad_norm": 0.8065166441211357, - "learning_rate": 9.804845270547288e-06, - "loss": 0.0728, + "epoch": 1.4422735346358793, + "grad_norm": 0.43727362483985777, + "learning_rate": 8.071787875135116e-06, + "loss": 0.0557, "step": 1624 }, { - "epoch": 0.7217410615145459, - "grad_norm": 0.7073921214139905, - "learning_rate": 9.804308601842026e-06, - "loss": 0.0753, + "epoch": 1.4431616341030196, + "grad_norm": 0.4845514139285602, + "learning_rate": 8.068728966282084e-06, + "loss": 0.0642, "step": 1625 }, { - "epoch": 0.7221852098600933, - "grad_norm": 0.7787625206064858, - "learning_rate": 9.80377121096757e-06, - "loss": 0.0854, + "epoch": 1.4440497335701599, + "grad_norm": 0.4033972637944131, + "learning_rate": 8.065668213666252e-06, + "loss": 0.055, "step": 1626 }, { - "epoch": 0.7226293582056407, - "grad_norm": 1.148628838473968, - "learning_rate": 9.8032330980047e-06, - "loss": 0.092, + "epoch": 1.4449378330373002, + "grad_norm": 0.5384839931180158, + "learning_rate": 8.062605619126585e-06, + "loss": 0.0553, "step": 1627 }, { - "epoch": 0.7230735065511881, - "grad_norm": 0.5694973855182914, - "learning_rate": 9.802694263034302e-06, - "loss": 0.0661, + "epoch": 1.4458259325044405, + "grad_norm": 0.5790748920097467, + "learning_rate": 8.059541184503163e-06, + "loss": 0.0561, "step": 1628 }, { - "epoch": 0.7235176548967355, - "grad_norm": 0.6928354319078996, - "learning_rate": 9.802154706137372e-06, - "loss": 0.0588, + "epoch": 1.4467140319715808, + "grad_norm": 0.8618873360176641, + "learning_rate": 8.05647491163717e-06, + "loss": 0.0533, "step": 1629 }, { - "epoch": 0.7239618032422829, - "grad_norm": 0.5747608874251084, - "learning_rate": 9.801614427395018e-06, - "loss": 0.073, + "epoch": 1.447602131438721, + "grad_norm": 0.5534422282504271, + "learning_rate": 8.053406802370892e-06, + "loss": 0.0546, "step": 1630 }, { - "epoch": 0.7244059515878304, - "grad_norm": 0.8245000270563847, - "learning_rate": 9.801073426888447e-06, - "loss": 0.0602, + "epoch": 1.4484902309058614, + "grad_norm": 0.5408854599299867, + "learning_rate": 8.05033685854772e-06, + "loss": 0.0536, "step": 1631 }, { - "epoch": 0.7248500999333778, - "grad_norm": 0.6063953480963461, - "learning_rate": 9.800531704698986e-06, - "loss": 0.0658, + "epoch": 1.4493783303730017, + "grad_norm": 0.35642300584211933, + "learning_rate": 8.047265082012145e-06, + "loss": 0.0407, "step": 1632 }, { - "epoch": 0.7252942482789252, - "grad_norm": 0.5240737295642546, - "learning_rate": 9.799989260908063e-06, - "loss": 0.0564, + "epoch": 1.4502664298401422, + "grad_norm": 0.38358783786879375, + "learning_rate": 8.044191474609762e-06, + "loss": 0.0549, "step": 1633 }, { - "epoch": 0.7257383966244726, - "grad_norm": 0.7016224236011144, - "learning_rate": 9.799446095597216e-06, - "loss": 0.0646, + "epoch": 1.4511545293072823, + "grad_norm": 0.6098586672152925, + "learning_rate": 8.041116038187266e-06, + "loss": 0.0567, "step": 1634 }, { - "epoch": 0.72618254497002, - "grad_norm": 0.6626808866197511, - "learning_rate": 9.798902208848093e-06, - "loss": 0.0818, + "epoch": 1.4520426287744228, + "grad_norm": 0.7964422165476598, + "learning_rate": 8.038038774592449e-06, + "loss": 0.0577, "step": 1635 }, { - "epoch": 0.7266266933155674, - "grad_norm": 0.6141836548174839, - "learning_rate": 9.79835760074245e-06, - "loss": 0.0508, + "epoch": 1.452930728241563, + "grad_norm": 0.39671778431004695, + "learning_rate": 8.034959685674199e-06, + "loss": 0.0479, "step": 1636 }, { - "epoch": 0.7270708416611148, - "grad_norm": 0.6868383771343689, - "learning_rate": 9.797812271362149e-06, - "loss": 0.0552, + "epoch": 1.4538188277087034, + "grad_norm": 0.5714020613486274, + "learning_rate": 8.031878773282507e-06, + "loss": 0.0589, "step": 1637 }, { - "epoch": 0.7275149900066622, - "grad_norm": 0.4532209654763099, - "learning_rate": 9.79726622078916e-06, - "loss": 0.0451, + "epoch": 1.4547069271758437, + "grad_norm": 0.5655019695958018, + "learning_rate": 8.028796039268455e-06, + "loss": 0.0724, "step": 1638 }, { - "epoch": 0.7279591383522096, - "grad_norm": 0.6808218136734143, - "learning_rate": 9.79671944910557e-06, - "loss": 0.0571, + "epoch": 1.455595026642984, + "grad_norm": 0.45811594042660747, + "learning_rate": 8.02571148548422e-06, + "loss": 0.0566, "step": 1639 }, { - "epoch": 0.728403286697757, - "grad_norm": 0.7510893535708428, - "learning_rate": 9.796171956393566e-06, - "loss": 0.0669, + "epoch": 1.4564831261101243, + "grad_norm": 0.4297749875770322, + "learning_rate": 8.022625113783071e-06, + "loss": 0.0605, "step": 1640 }, { - "epoch": 0.7288474350433045, - "grad_norm": 0.41663397739980335, - "learning_rate": 9.79562374273544e-06, - "loss": 0.0441, + "epoch": 1.4573712255772646, + "grad_norm": 0.34519408855532796, + "learning_rate": 8.019536926019373e-06, + "loss": 0.0452, "step": 1641 }, { - "epoch": 0.7292915833888519, - "grad_norm": 0.5592036648884667, - "learning_rate": 9.795074808213604e-06, - "loss": 0.0562, + "epoch": 1.458259325044405, + "grad_norm": 0.3839507109056651, + "learning_rate": 8.016446924048582e-06, + "loss": 0.0455, "step": 1642 }, { - "epoch": 0.7297357317343993, - "grad_norm": 0.495293158677455, - "learning_rate": 9.794525152910573e-06, - "loss": 0.0621, + "epoch": 1.4591474245115452, + "grad_norm": 0.4504413415312184, + "learning_rate": 8.013355109727237e-06, + "loss": 0.0559, "step": 1643 }, { - "epoch": 0.7301798800799467, - "grad_norm": 0.5602886889611556, - "learning_rate": 9.793974776908963e-06, - "loss": 0.0531, + "epoch": 1.4600355239786857, + "grad_norm": 0.41509316012858266, + "learning_rate": 8.010261484912974e-06, + "loss": 0.0528, "step": 1644 }, { - "epoch": 0.7306240284254941, - "grad_norm": 0.5944252332564274, - "learning_rate": 9.79342368029151e-06, - "loss": 0.0563, + "epoch": 1.4609236234458258, + "grad_norm": 0.4419276919161984, + "learning_rate": 8.007166051464515e-06, + "loss": 0.0518, "step": 1645 }, { - "epoch": 0.7310681767710415, - "grad_norm": 0.482240455293522, - "learning_rate": 9.792871863141052e-06, - "loss": 0.0576, + "epoch": 1.4618117229129663, + "grad_norm": 0.47221828061702026, + "learning_rate": 8.004068811241666e-06, + "loss": 0.0528, "step": 1646 }, { - "epoch": 0.7315123251165889, - "grad_norm": 0.5733741929298166, - "learning_rate": 9.792319325540537e-06, - "loss": 0.0684, + "epoch": 1.4626998223801067, + "grad_norm": 0.35945339568820633, + "learning_rate": 8.000969766105317e-06, + "loss": 0.0459, "step": 1647 }, { - "epoch": 0.7319564734621363, - "grad_norm": 0.4985634447799434, - "learning_rate": 9.79176606757302e-06, - "loss": 0.0597, + "epoch": 1.463587921847247, + "grad_norm": 0.45825253444887293, + "learning_rate": 7.997868917917453e-06, + "loss": 0.061, "step": 1648 }, { - "epoch": 0.7324006218076837, - "grad_norm": 0.685246073677029, - "learning_rate": 9.791212089321662e-06, - "loss": 0.0721, + "epoch": 1.4644760213143873, + "grad_norm": 0.4298496576221392, + "learning_rate": 7.994766268541127e-06, + "loss": 0.0516, "step": 1649 }, { - "epoch": 0.7328447701532311, - "grad_norm": 0.7566932790234222, - "learning_rate": 9.790657390869742e-06, - "loss": 0.0665, + "epoch": 1.4653641207815276, + "grad_norm": 0.5922664838557191, + "learning_rate": 7.991661819840486e-06, + "loss": 0.0618, "step": 1650 }, { - "epoch": 0.7332889184987786, - "grad_norm": 0.47289806502397586, - "learning_rate": 9.790101972300635e-06, - "loss": 0.0536, + "epoch": 1.4662522202486679, + "grad_norm": 0.3709128715491371, + "learning_rate": 7.988555573680753e-06, + "loss": 0.0461, "step": 1651 }, { - "epoch": 0.733733066844326, - "grad_norm": 0.9971658246699995, - "learning_rate": 9.789545833697833e-06, - "loss": 0.0959, + "epoch": 1.4671403197158082, + "grad_norm": 0.4102307970961409, + "learning_rate": 7.985447531928234e-06, + "loss": 0.0451, "step": 1652 }, { - "epoch": 0.7341772151898734, - "grad_norm": 0.588355861783506, - "learning_rate": 9.788988975144933e-06, - "loss": 0.0556, + "epoch": 1.4680284191829485, + "grad_norm": 0.45617458628874286, + "learning_rate": 7.982337696450309e-06, + "loss": 0.0504, "step": 1653 }, { - "epoch": 0.7346213635354208, - "grad_norm": 0.5671908470000354, - "learning_rate": 9.788431396725637e-06, - "loss": 0.0669, + "epoch": 1.4689165186500888, + "grad_norm": 0.4619976657842071, + "learning_rate": 7.979226069115438e-06, + "loss": 0.0522, "step": 1654 }, { - "epoch": 0.7350655118809682, - "grad_norm": 0.758993148904425, - "learning_rate": 9.787873098523763e-06, - "loss": 0.0652, + "epoch": 1.469804618117229, + "grad_norm": 0.3989223785652348, + "learning_rate": 7.976112651793162e-06, + "loss": 0.0473, "step": 1655 }, { - "epoch": 0.7355096602265156, - "grad_norm": 0.8048604855196553, - "learning_rate": 9.787314080623229e-06, - "loss": 0.0698, + "epoch": 1.4706927175843694, + "grad_norm": 0.5098062947975681, + "learning_rate": 7.972997446354091e-06, + "loss": 0.05, "step": 1656 }, { - "epoch": 0.735953808572063, - "grad_norm": 0.8623667194773064, - "learning_rate": 9.786754343108066e-06, - "loss": 0.0779, + "epoch": 1.47158081705151, + "grad_norm": 0.3552857864695806, + "learning_rate": 7.969880454669912e-06, + "loss": 0.0437, "step": 1657 }, { - "epoch": 0.7363979569176105, - "grad_norm": 0.61076619104796, - "learning_rate": 9.786193886062415e-06, - "loss": 0.0769, + "epoch": 1.47246891651865, + "grad_norm": 0.4763143967476609, + "learning_rate": 7.966761678613385e-06, + "loss": 0.0415, "step": 1658 }, { - "epoch": 0.7368421052631579, - "grad_norm": 0.36227469684290603, - "learning_rate": 9.785632709570519e-06, - "loss": 0.0414, + "epoch": 1.4733570159857905, + "grad_norm": 0.46591629758296743, + "learning_rate": 7.963641120058341e-06, + "loss": 0.0464, "step": 1659 }, { - "epoch": 0.7372862536087053, - "grad_norm": 4.048112931451507, - "learning_rate": 9.785070813716733e-06, - "loss": 0.0454, + "epoch": 1.4742451154529308, + "grad_norm": 0.5211145953421122, + "learning_rate": 7.960518780879688e-06, + "loss": 0.0539, "step": 1660 }, { - "epoch": 0.7377304019542528, - "grad_norm": 0.6035767336706519, - "learning_rate": 9.784508198585519e-06, - "loss": 0.0633, + "epoch": 1.475133214920071, + "grad_norm": 0.5228037756990918, + "learning_rate": 7.957394662953395e-06, + "loss": 0.0641, "step": 1661 }, { - "epoch": 0.7381745502998002, - "grad_norm": 0.469714457030227, - "learning_rate": 9.783944864261448e-06, - "loss": 0.0472, + "epoch": 1.4760213143872114, + "grad_norm": 0.5671403584205867, + "learning_rate": 7.954268768156504e-06, + "loss": 0.0518, "step": 1662 }, { - "epoch": 0.7386186986453476, - "grad_norm": 0.8129946899965498, - "learning_rate": 9.783380810829198e-06, - "loss": 0.0613, + "epoch": 1.4769094138543517, + "grad_norm": 0.4488867432581175, + "learning_rate": 7.951141098367125e-06, + "loss": 0.0505, "step": 1663 }, { - "epoch": 0.739062846990895, - "grad_norm": 0.6753504015110883, - "learning_rate": 9.782816038373556e-06, - "loss": 0.0902, + "epoch": 1.477797513321492, + "grad_norm": 0.5898047326800395, + "learning_rate": 7.948011655464435e-06, + "loss": 0.0569, "step": 1664 }, { - "epoch": 0.7395069953364424, - "grad_norm": 0.6384325521859789, - "learning_rate": 9.782250546979421e-06, - "loss": 0.074, + "epoch": 1.4786856127886323, + "grad_norm": 0.36299263667907605, + "learning_rate": 7.944880441328673e-06, + "loss": 0.051, "step": 1665 }, { - "epoch": 0.7399511436819898, - "grad_norm": 0.6956365636896437, - "learning_rate": 9.781684336731791e-06, - "loss": 0.0567, + "epoch": 1.4795737122557726, + "grad_norm": 0.7758824677337602, + "learning_rate": 7.941747457841145e-06, + "loss": 0.0633, "step": 1666 }, { - "epoch": 0.7403952920275372, - "grad_norm": 0.8256720537662366, - "learning_rate": 9.781117407715779e-06, - "loss": 0.0651, + "epoch": 1.480461811722913, + "grad_norm": 0.4267977686777056, + "learning_rate": 7.938612706884221e-06, + "loss": 0.0541, "step": 1667 }, { - "epoch": 0.7408394403730846, - "grad_norm": 0.5355855493602046, - "learning_rate": 9.780549760016602e-06, - "loss": 0.0619, + "epoch": 1.4813499111900532, + "grad_norm": 0.3853287094386836, + "learning_rate": 7.935476190341329e-06, + "loss": 0.0373, "step": 1668 }, { - "epoch": 0.741283588718632, - "grad_norm": 0.5253416844707828, - "learning_rate": 9.77998139371959e-06, - "loss": 0.0592, + "epoch": 1.4822380106571935, + "grad_norm": 0.43538914862836475, + "learning_rate": 7.93233791009696e-06, + "loss": 0.0543, "step": 1669 }, { - "epoch": 0.7417277370641794, - "grad_norm": 0.6458381116052228, - "learning_rate": 9.779412308910176e-06, - "loss": 0.0631, + "epoch": 1.483126110124334, + "grad_norm": 0.45485804483725983, + "learning_rate": 7.92919786803667e-06, + "loss": 0.0505, "step": 1670 }, { - "epoch": 0.7421718854097269, - "grad_norm": 0.6458810882415751, - "learning_rate": 9.778842505673906e-06, - "loss": 0.0721, + "epoch": 1.4840142095914741, + "grad_norm": 0.5662436734745963, + "learning_rate": 7.926056066047058e-06, + "loss": 0.047, "step": 1671 }, { - "epoch": 0.7426160337552743, - "grad_norm": 0.578667702719237, - "learning_rate": 9.778271984096427e-06, - "loss": 0.0622, + "epoch": 1.4849023090586146, + "grad_norm": 0.48064756279116727, + "learning_rate": 7.922912506015803e-06, + "loss": 0.0471, "step": 1672 }, { - "epoch": 0.7430601821008217, - "grad_norm": 0.7294244004095107, - "learning_rate": 9.777700744263502e-06, - "loss": 0.0763, + "epoch": 1.485790408525755, + "grad_norm": 0.5239235119594058, + "learning_rate": 7.91976718983162e-06, + "loss": 0.0507, "step": 1673 }, { - "epoch": 0.7435043304463691, - "grad_norm": 0.4949092508929545, - "learning_rate": 9.777128786260995e-06, - "loss": 0.0491, + "epoch": 1.4866785079928952, + "grad_norm": 0.47340647494697957, + "learning_rate": 7.916620119384289e-06, + "loss": 0.0573, "step": 1674 }, { - "epoch": 0.7439484787919165, - "grad_norm": 0.49019523675297216, - "learning_rate": 9.776556110174882e-06, - "loss": 0.0487, + "epoch": 1.4875666074600356, + "grad_norm": 0.4257589822557975, + "learning_rate": 7.913471296564641e-06, + "loss": 0.0455, "step": 1675 }, { - "epoch": 0.7443926271374639, - "grad_norm": 0.6147792916360124, - "learning_rate": 9.775982716091245e-06, - "loss": 0.0468, + "epoch": 1.4884547069271759, + "grad_norm": 0.4007316318150693, + "learning_rate": 7.910320723264563e-06, + "loss": 0.0458, "step": 1676 }, { - "epoch": 0.7448367754830113, - "grad_norm": 0.7728423947263149, - "learning_rate": 9.775408604096276e-06, - "loss": 0.0843, + "epoch": 1.4893428063943162, + "grad_norm": 0.4826000147485177, + "learning_rate": 7.907168401376993e-06, + "loss": 0.0529, "step": 1677 }, { - "epoch": 0.7452809238285587, - "grad_norm": 0.46460994551018925, - "learning_rate": 9.774833774276278e-06, - "loss": 0.0475, + "epoch": 1.4902309058614565, + "grad_norm": 0.37370910532349544, + "learning_rate": 7.904014332795915e-06, + "loss": 0.043, "step": 1678 }, { - "epoch": 0.7457250721741061, - "grad_norm": 0.6020989880539805, - "learning_rate": 9.77425822671765e-06, - "loss": 0.0548, + "epoch": 1.4911190053285968, + "grad_norm": 0.49968928171230986, + "learning_rate": 7.900858519416368e-06, + "loss": 0.0544, "step": 1679 }, { - "epoch": 0.7461692205196535, - "grad_norm": 0.4185492378453488, - "learning_rate": 9.77368196150691e-06, - "loss": 0.0697, + "epoch": 1.492007104795737, + "grad_norm": 0.4528316894124392, + "learning_rate": 7.897700963134436e-06, + "loss": 0.0529, "step": 1680 }, { - "epoch": 0.746613368865201, - "grad_norm": 0.47026038790260477, - "learning_rate": 9.77310497873068e-06, - "loss": 0.0577, + "epoch": 1.4928952042628774, + "grad_norm": 0.4222293956218277, + "learning_rate": 7.894541665847253e-06, + "loss": 0.0441, "step": 1681 }, { - "epoch": 0.7470575172107484, - "grad_norm": 0.7242331312035356, - "learning_rate": 9.772527278475694e-06, - "loss": 0.0646, + "epoch": 1.4937833037300177, + "grad_norm": 0.43844080754157805, + "learning_rate": 7.891380629452994e-06, + "loss": 0.05, "step": 1682 }, { - "epoch": 0.7475016655562958, - "grad_norm": 0.5498639479583922, - "learning_rate": 9.771948860828783e-06, - "loss": 0.0768, + "epoch": 1.4946714031971582, + "grad_norm": 0.41426204357656377, + "learning_rate": 7.888217855850885e-06, + "loss": 0.0505, "step": 1683 }, { - "epoch": 0.7479458139018432, - "grad_norm": 0.5132952000374971, - "learning_rate": 9.7713697258769e-06, - "loss": 0.0749, + "epoch": 1.4955595026642983, + "grad_norm": 0.3721657025423876, + "learning_rate": 7.885053346941193e-06, + "loss": 0.0457, "step": 1684 }, { - "epoch": 0.7483899622473906, - "grad_norm": 0.567848025082148, - "learning_rate": 9.770789873707095e-06, - "loss": 0.0853, + "epoch": 1.4964476021314388, + "grad_norm": 0.5019826083758884, + "learning_rate": 7.881887104625226e-06, + "loss": 0.0743, "step": 1685 }, { - "epoch": 0.748834110592938, - "grad_norm": 0.5686689909367685, - "learning_rate": 9.770209304406531e-06, - "loss": 0.0628, + "epoch": 1.497335701598579, + "grad_norm": 0.42722738957810236, + "learning_rate": 7.878719130805336e-06, + "loss": 0.0541, "step": 1686 }, { - "epoch": 0.7492782589384854, - "grad_norm": 0.6034469126916269, - "learning_rate": 9.769628018062477e-06, - "loss": 0.0479, + "epoch": 1.4982238010657194, + "grad_norm": 0.4124265597348239, + "learning_rate": 7.875549427384916e-06, + "loss": 0.0424, "step": 1687 }, { - "epoch": 0.7497224072840328, - "grad_norm": 0.43723219237091576, - "learning_rate": 9.769046014762307e-06, - "loss": 0.0654, + "epoch": 1.4991119005328597, + "grad_norm": 0.38316319603171595, + "learning_rate": 7.872377996268394e-06, + "loss": 0.0471, "step": 1688 }, { - "epoch": 0.7501665556295802, - "grad_norm": 0.6855260472494596, - "learning_rate": 9.76846329459351e-06, - "loss": 0.0651, + "epoch": 1.5, + "grad_norm": 0.5459557077565556, + "learning_rate": 7.869204839361238e-06, + "loss": 0.0566, "step": 1689 }, { - "epoch": 0.7506107039751277, - "grad_norm": 0.722060394293234, - "learning_rate": 9.767879857643681e-06, - "loss": 0.0552, + "epoch": 1.5008880994671403, + "grad_norm": 0.41423317774594537, + "learning_rate": 7.866029958569956e-06, + "loss": 0.0524, "step": 1690 }, { - "epoch": 0.7510548523206751, - "grad_norm": 0.463106704754472, - "learning_rate": 9.767295704000514e-06, - "loss": 0.0534, + "epoch": 1.5017761989342806, + "grad_norm": 0.37634240216102716, + "learning_rate": 7.862853355802089e-06, + "loss": 0.0489, "step": 1691 }, { - "epoch": 0.7514990006662225, - "grad_norm": 0.6415358905679467, - "learning_rate": 9.766710833751823e-06, - "loss": 0.0806, + "epoch": 1.502664298401421, + "grad_norm": 0.5726844461852207, + "learning_rate": 7.859675032966213e-06, + "loss": 0.0639, "step": 1692 }, { - "epoch": 0.75194314901177, - "grad_norm": 0.5352784756952503, - "learning_rate": 9.76612524698552e-06, - "loss": 0.0558, + "epoch": 1.5035523978685612, + "grad_norm": 0.4360976829462697, + "learning_rate": 7.856494991971933e-06, + "loss": 0.0444, "step": 1693 }, { - "epoch": 0.7523872973573174, - "grad_norm": 1.1814652854922993, - "learning_rate": 9.76553894378963e-06, - "loss": 0.0847, + "epoch": 1.5044404973357017, + "grad_norm": 0.40187458371621043, + "learning_rate": 7.853313234729896e-06, + "loss": 0.0443, "step": 1694 }, { - "epoch": 0.7528314457028648, - "grad_norm": 0.6986298733118028, - "learning_rate": 9.764951924252284e-06, - "loss": 0.05, + "epoch": 1.5053285968028418, + "grad_norm": 0.5477166486760292, + "learning_rate": 7.850129763151773e-06, + "loss": 0.0552, "step": 1695 }, { - "epoch": 0.7532755940484122, - "grad_norm": 0.5447554709895742, - "learning_rate": 9.764364188461723e-06, - "loss": 0.0485, + "epoch": 1.5062166962699823, + "grad_norm": 0.4998916623686671, + "learning_rate": 7.846944579150266e-06, + "loss": 0.0714, "step": 1696 }, { - "epoch": 0.7537197423939596, - "grad_norm": 0.47846902684606657, - "learning_rate": 9.76377573650629e-06, - "loss": 0.0551, + "epoch": 1.5071047957371224, + "grad_norm": 0.5352597644238419, + "learning_rate": 7.843757684639108e-06, + "loss": 0.0601, "step": 1697 }, { - "epoch": 0.754163890739507, - "grad_norm": 0.8115375903439791, - "learning_rate": 9.763186568474443e-06, - "loss": 0.0701, + "epoch": 1.507992895204263, + "grad_norm": 0.45412501834567376, + "learning_rate": 7.840569081533058e-06, + "loss": 0.0574, "step": 1698 }, { - "epoch": 0.7546080390850544, - "grad_norm": 0.5954397107201211, - "learning_rate": 9.762596684454742e-06, - "loss": 0.0474, + "epoch": 1.5088809946714032, + "grad_norm": 0.3785546516205003, + "learning_rate": 7.8373787717479e-06, + "loss": 0.0477, "step": 1699 }, { - "epoch": 0.7550521874306019, - "grad_norm": 0.9043348007985654, - "learning_rate": 9.762006084535857e-06, - "loss": 0.0752, + "epoch": 1.5097690941385435, + "grad_norm": 0.47783613914291184, + "learning_rate": 7.834186757200448e-06, + "loss": 0.0614, "step": 1700 }, { - "epoch": 0.7554963357761493, - "grad_norm": 0.5876955603814114, - "learning_rate": 9.761414768806566e-06, - "loss": 0.058, + "epoch": 1.5106571936056838, + "grad_norm": 0.6507710007364977, + "learning_rate": 7.830993039808537e-06, + "loss": 0.0611, "step": 1701 }, { - "epoch": 0.7559404841216967, - "grad_norm": 0.6215494346347864, - "learning_rate": 9.76082273735575e-06, - "loss": 0.074, + "epoch": 1.5115452930728241, + "grad_norm": 0.5108180337745931, + "learning_rate": 7.827797621491025e-06, + "loss": 0.0499, "step": 1702 }, { - "epoch": 0.7563846324672441, - "grad_norm": 0.6691646984262034, - "learning_rate": 9.760229990272407e-06, - "loss": 0.0752, + "epoch": 1.5124333925399644, + "grad_norm": 0.4578325365843359, + "learning_rate": 7.824600504167793e-06, + "loss": 0.0502, "step": 1703 }, { - "epoch": 0.7568287808127915, - "grad_norm": 0.6530103461059469, - "learning_rate": 9.759636527645633e-06, - "loss": 0.0512, + "epoch": 1.5133214920071048, + "grad_norm": 0.36602192544459716, + "learning_rate": 7.82140168975974e-06, + "loss": 0.0522, "step": 1704 }, { - "epoch": 0.7572729291583389, - "grad_norm": 0.5915232768461202, - "learning_rate": 9.759042349564638e-06, - "loss": 0.0505, + "epoch": 1.5142095914742453, + "grad_norm": 0.40292599196527157, + "learning_rate": 7.818201180188794e-06, + "loss": 0.0542, "step": 1705 }, { - "epoch": 0.7577170775038863, - "grad_norm": 0.6211496225849954, - "learning_rate": 9.758447456118734e-06, - "loss": 0.0527, + "epoch": 1.5150976909413854, + "grad_norm": 0.541855291094372, + "learning_rate": 7.814998977377886e-06, + "loss": 0.0562, "step": 1706 }, { - "epoch": 0.7581612258494337, - "grad_norm": 0.9060279971096812, - "learning_rate": 9.757851847397349e-06, - "loss": 0.0687, + "epoch": 1.5159857904085259, + "grad_norm": 0.42728006868013124, + "learning_rate": 7.811795083250979e-06, + "loss": 0.0524, "step": 1707 }, { - "epoch": 0.7586053741949811, - "grad_norm": 1.0250865769852973, - "learning_rate": 9.757255523490006e-06, - "loss": 0.0935, + "epoch": 1.516873889875666, + "grad_norm": 0.5918224235333536, + "learning_rate": 7.808589499733045e-06, + "loss": 0.0515, "step": 1708 }, { - "epoch": 0.7590495225405285, - "grad_norm": 0.546436812313742, - "learning_rate": 9.756658484486348e-06, - "loss": 0.0667, + "epoch": 1.5177619893428065, + "grad_norm": 0.3453187120825003, + "learning_rate": 7.80538222875007e-06, + "loss": 0.0431, "step": 1709 }, { - "epoch": 0.759493670886076, - "grad_norm": 0.5515990500251661, - "learning_rate": 9.756060730476117e-06, - "loss": 0.0529, + "epoch": 1.5186500888099466, + "grad_norm": 0.434267114063888, + "learning_rate": 7.802173272229058e-06, + "loss": 0.055, "step": 1710 }, { - "epoch": 0.7599378192316234, - "grad_norm": 0.618132276932801, - "learning_rate": 9.755462261549167e-06, - "loss": 0.0506, + "epoch": 1.519538188277087, + "grad_norm": 0.5718702323522821, + "learning_rate": 7.798962632098024e-06, + "loss": 0.0525, "step": 1711 }, { - "epoch": 0.7603819675771708, - "grad_norm": 0.6625277428767956, - "learning_rate": 9.754863077795459e-06, - "loss": 0.0718, + "epoch": 1.5204262877442274, + "grad_norm": 0.3870661493171084, + "learning_rate": 7.795750310285993e-06, + "loss": 0.055, "step": 1712 }, { - "epoch": 0.7608261159227182, - "grad_norm": 0.45483688763929425, - "learning_rate": 9.754263179305058e-06, - "loss": 0.0446, + "epoch": 1.5213143872113677, + "grad_norm": 0.5491931559022314, + "learning_rate": 7.792536308723001e-06, + "loss": 0.0509, "step": 1713 }, { - "epoch": 0.7612702642682656, - "grad_norm": 0.6438763676440094, - "learning_rate": 9.753662566168142e-06, - "loss": 0.0856, + "epoch": 1.522202486678508, + "grad_norm": 0.34261387324707615, + "learning_rate": 7.789320629340096e-06, + "loss": 0.0374, "step": 1714 }, { - "epoch": 0.761714412613813, - "grad_norm": 0.6595899132325235, - "learning_rate": 9.75306123847499e-06, - "loss": 0.0725, + "epoch": 1.5230905861456483, + "grad_norm": 0.5522900715341141, + "learning_rate": 7.786103274069331e-06, + "loss": 0.0437, "step": 1715 }, { - "epoch": 0.7621585609593604, - "grad_norm": 0.6533315196440966, - "learning_rate": 9.752459196315996e-06, - "loss": 0.0607, + "epoch": 1.5239786856127886, + "grad_norm": 0.5101094231820494, + "learning_rate": 7.782884244843766e-06, + "loss": 0.0695, "step": 1716 }, { - "epoch": 0.7626027093049078, - "grad_norm": 0.5435323069050279, - "learning_rate": 9.751856439781653e-06, - "loss": 0.0511, + "epoch": 1.524866785079929, + "grad_norm": 0.4303875587604101, + "learning_rate": 7.779663543597471e-06, + "loss": 0.0519, "step": 1717 }, { - "epoch": 0.7630468576504552, - "grad_norm": 0.5454956836386448, - "learning_rate": 9.751252968962567e-06, - "loss": 0.0828, + "epoch": 1.5257548845470694, + "grad_norm": 0.38600267270958155, + "learning_rate": 7.776441172265511e-06, + "loss": 0.0459, "step": 1718 }, { - "epoch": 0.7634910059960026, - "grad_norm": 0.5715320834454507, - "learning_rate": 9.75064878394945e-06, - "loss": 0.0518, + "epoch": 1.5266429840142095, + "grad_norm": 0.4958890806451229, + "learning_rate": 7.773217132783968e-06, + "loss": 0.0647, "step": 1719 }, { - "epoch": 0.7639351543415501, - "grad_norm": 0.6744811382532548, - "learning_rate": 9.750043884833121e-06, - "loss": 0.0508, + "epoch": 1.52753108348135, + "grad_norm": 0.4257022840174458, + "learning_rate": 7.769991427089915e-06, + "loss": 0.0558, "step": 1720 }, { - "epoch": 0.7643793026870975, - "grad_norm": 0.6108688890165651, - "learning_rate": 9.749438271704508e-06, - "loss": 0.0615, + "epoch": 1.52841918294849, + "grad_norm": 0.549842543803982, + "learning_rate": 7.76676405712143e-06, + "loss": 0.0646, "step": 1721 }, { - "epoch": 0.7648234510326449, - "grad_norm": 0.5674222482544424, - "learning_rate": 9.748831944654643e-06, - "loss": 0.0644, + "epoch": 1.5293072824156306, + "grad_norm": 0.5460234756719656, + "learning_rate": 7.76353502481759e-06, + "loss": 0.051, "step": 1722 }, { - "epoch": 0.7652675993781923, - "grad_norm": 0.7356158997613679, - "learning_rate": 9.74822490377467e-06, - "loss": 0.0733, + "epoch": 1.5301953818827707, + "grad_norm": 0.4107897311322575, + "learning_rate": 7.760304332118475e-06, + "loss": 0.0525, "step": 1723 }, { - "epoch": 0.7657117477237397, - "grad_norm": 0.4849462211495647, - "learning_rate": 9.747617149155834e-06, - "loss": 0.0443, + "epoch": 1.5310834813499112, + "grad_norm": 0.354086275064959, + "learning_rate": 7.757071980965156e-06, + "loss": 0.0441, "step": 1724 }, { - "epoch": 0.7661558960692871, - "grad_norm": 0.831191855315457, - "learning_rate": 9.747008680889493e-06, - "loss": 0.0659, + "epoch": 1.5319715808170515, + "grad_norm": 0.5648261454732384, + "learning_rate": 7.753837973299706e-06, + "loss": 0.0623, "step": 1725 }, { - "epoch": 0.7666000444148345, - "grad_norm": 0.6526473429491929, - "learning_rate": 9.746399499067109e-06, - "loss": 0.064, + "epoch": 1.5328596802841918, + "grad_norm": 0.3421281078802634, + "learning_rate": 7.75060231106519e-06, + "loss": 0.0332, "step": 1726 }, { - "epoch": 0.7670441927603819, - "grad_norm": 0.5803840610292479, - "learning_rate": 9.745789603780254e-06, - "loss": 0.053, + "epoch": 1.5337477797513321, + "grad_norm": 0.4674384959414192, + "learning_rate": 7.747364996205669e-06, + "loss": 0.0542, "step": 1727 }, { - "epoch": 0.7674883411059293, - "grad_norm": 0.6610162442549882, - "learning_rate": 9.745178995120604e-06, - "loss": 0.0626, + "epoch": 1.5346358792184724, + "grad_norm": 0.3837912909909046, + "learning_rate": 7.744126030666193e-06, + "loss": 0.0473, "step": 1728 }, { - "epoch": 0.7679324894514767, - "grad_norm": 0.4876204778498726, - "learning_rate": 9.744567673179946e-06, - "loss": 0.0522, + "epoch": 1.5355239786856127, + "grad_norm": 0.45716040673835834, + "learning_rate": 7.740885416392811e-06, + "loss": 0.0448, "step": 1729 }, { - "epoch": 0.7683766377970243, - "grad_norm": 0.5622038449381063, - "learning_rate": 9.743955638050169e-06, - "loss": 0.0524, + "epoch": 1.536412078152753, + "grad_norm": 0.8260383553950559, + "learning_rate": 7.737643155332559e-06, + "loss": 0.0676, "step": 1730 }, { - "epoch": 0.7688207861425717, - "grad_norm": 0.577658138102478, - "learning_rate": 9.743342889823273e-06, - "loss": 0.0559, + "epoch": 1.5373001776198936, + "grad_norm": 0.5703793064204835, + "learning_rate": 7.734399249433458e-06, + "loss": 0.0583, "step": 1731 }, { - "epoch": 0.7692649344881191, - "grad_norm": 0.6597870381577758, - "learning_rate": 9.742729428591368e-06, - "loss": 0.0603, + "epoch": 1.5381882770870337, + "grad_norm": 0.3251350830631577, + "learning_rate": 7.73115370064452e-06, + "loss": 0.0423, "step": 1732 }, { - "epoch": 0.7697090828336665, - "grad_norm": 0.8344370622440579, - "learning_rate": 9.742115254446665e-06, - "loss": 0.0836, + "epoch": 1.5390763765541742, + "grad_norm": 0.3359864790300054, + "learning_rate": 7.727906510915751e-06, + "loss": 0.0416, "step": 1733 }, { - "epoch": 0.7701532311792139, - "grad_norm": 0.5622150768145531, - "learning_rate": 9.741500367481481e-06, - "loss": 0.0826, + "epoch": 1.5399644760213143, + "grad_norm": 0.46875800830883074, + "learning_rate": 7.724657682198133e-06, + "loss": 0.0572, "step": 1734 }, { - "epoch": 0.7705973795247613, - "grad_norm": 0.6330801870134145, - "learning_rate": 9.740884767788253e-06, - "loss": 0.0569, + "epoch": 1.5408525754884548, + "grad_norm": 0.46807351907670214, + "learning_rate": 7.72140721644364e-06, + "loss": 0.0489, "step": 1735 }, { - "epoch": 0.7710415278703087, - "grad_norm": 0.5484349976426807, - "learning_rate": 9.740268455459507e-06, - "loss": 0.0759, + "epoch": 1.541740674955595, + "grad_norm": 0.5636691387582213, + "learning_rate": 7.718155115605222e-06, + "loss": 0.0682, "step": 1736 }, { - "epoch": 0.7714856762158561, - "grad_norm": 0.6003363970239456, - "learning_rate": 9.739651430587891e-06, - "loss": 0.0559, + "epoch": 1.5426287744227354, + "grad_norm": 0.40135850551698504, + "learning_rate": 7.714901381636819e-06, + "loss": 0.0481, "step": 1737 }, { - "epoch": 0.7719298245614035, - "grad_norm": 0.6514870858876586, - "learning_rate": 9.739033693266152e-06, - "loss": 0.0583, + "epoch": 1.5435168738898757, + "grad_norm": 0.48739636744157866, + "learning_rate": 7.711646016493348e-06, + "loss": 0.0482, "step": 1738 }, { - "epoch": 0.7723739729069509, - "grad_norm": 0.7128176043400777, - "learning_rate": 9.738415243587146e-06, - "loss": 0.0615, + "epoch": 1.544404973357016, + "grad_norm": 0.43889147398324563, + "learning_rate": 7.708389022130706e-06, + "loss": 0.0492, "step": 1739 }, { - "epoch": 0.7728181212524984, - "grad_norm": 0.4997932884849887, - "learning_rate": 9.737796081643838e-06, - "loss": 0.0572, + "epoch": 1.5452930728241563, + "grad_norm": 0.49191135418751325, + "learning_rate": 7.705130400505774e-06, + "loss": 0.0557, "step": 1740 }, { - "epoch": 0.7732622695980458, - "grad_norm": 0.6809941857723402, - "learning_rate": 9.737176207529296e-06, - "loss": 0.0579, + "epoch": 1.5461811722912966, + "grad_norm": 0.34228005697150915, + "learning_rate": 7.701870153576402e-06, + "loss": 0.0453, "step": 1741 }, { - "epoch": 0.7737064179435932, - "grad_norm": 0.579421035714199, - "learning_rate": 9.736555621336701e-06, - "loss": 0.0553, + "epoch": 1.547069271758437, + "grad_norm": 0.4140692891317913, + "learning_rate": 7.698608283301426e-06, + "loss": 0.0505, "step": 1742 }, { - "epoch": 0.7741505662891406, - "grad_norm": 0.49198429179195113, - "learning_rate": 9.735934323159337e-06, - "loss": 0.046, + "epoch": 1.5479573712255772, + "grad_norm": 0.46045025491100045, + "learning_rate": 7.695344791640648e-06, + "loss": 0.0522, "step": 1743 }, { - "epoch": 0.774594714634688, - "grad_norm": 0.6373312482523877, - "learning_rate": 9.735312313090593e-06, - "loss": 0.0645, + "epoch": 1.5488454706927177, + "grad_norm": 0.4438077452461078, + "learning_rate": 7.692079680554849e-06, + "loss": 0.0438, "step": 1744 }, { - "epoch": 0.7750388629802354, - "grad_norm": 0.49553655680906583, - "learning_rate": 9.734689591223971e-06, - "loss": 0.0523, + "epoch": 1.5497335701598578, + "grad_norm": 0.3985589283930428, + "learning_rate": 7.688812952005786e-06, + "loss": 0.046, "step": 1745 }, { - "epoch": 0.7754830113257828, - "grad_norm": 0.7575640777298455, - "learning_rate": 9.734066157653075e-06, - "loss": 0.1155, + "epoch": 1.5506216696269983, + "grad_norm": 0.34141045183330265, + "learning_rate": 7.685544607956182e-06, + "loss": 0.0443, "step": 1746 }, { - "epoch": 0.7759271596713302, - "grad_norm": 0.3941642367752884, - "learning_rate": 9.733442012471617e-06, - "loss": 0.0494, + "epoch": 1.5515097690941384, + "grad_norm": 0.35622382164888206, + "learning_rate": 7.682274650369734e-06, + "loss": 0.0431, "step": 1747 }, { - "epoch": 0.7763713080168776, - "grad_norm": 0.6007166723222428, - "learning_rate": 9.732817155773417e-06, - "loss": 0.0809, + "epoch": 1.552397868561279, + "grad_norm": 0.5399825580211707, + "learning_rate": 7.679003081211107e-06, + "loss": 0.0616, "step": 1748 }, { - "epoch": 0.7768154563624251, - "grad_norm": 0.6048168057227016, - "learning_rate": 9.732191587652402e-06, - "loss": 0.0827, + "epoch": 1.5532859680284192, + "grad_norm": 0.45712644197578145, + "learning_rate": 7.675729902445937e-06, + "loss": 0.0702, "step": 1749 }, { - "epoch": 0.7772596047079725, - "grad_norm": 0.5258636663852201, - "learning_rate": 9.731565308202607e-06, - "loss": 0.0532, + "epoch": 1.5541740674955595, + "grad_norm": 0.674681003807884, + "learning_rate": 7.67245511604082e-06, + "loss": 0.0599, "step": 1750 }, { - "epoch": 0.7777037530535199, - "grad_norm": 0.5183503036370198, - "learning_rate": 9.73093831751817e-06, - "loss": 0.0597, + "epoch": 1.5550621669626998, + "grad_norm": 0.34498352977043173, + "learning_rate": 7.669178723963327e-06, + "loss": 0.0404, "step": 1751 }, { - "epoch": 0.7781479013990673, - "grad_norm": 0.4539057898000694, - "learning_rate": 9.73031061569334e-06, - "loss": 0.0449, + "epoch": 1.5559502664298401, + "grad_norm": 0.4222733940661997, + "learning_rate": 7.665900728181989e-06, + "loss": 0.046, "step": 1752 }, { - "epoch": 0.7785920497446147, - "grad_norm": 0.43734440001807745, - "learning_rate": 9.72968220282247e-06, - "loss": 0.0496, + "epoch": 1.5568383658969804, + "grad_norm": 0.475618676968953, + "learning_rate": 7.6626211306663e-06, + "loss": 0.0604, "step": 1753 }, { - "epoch": 0.7790361980901621, - "grad_norm": 0.4621698388749695, - "learning_rate": 9.729053079000021e-06, - "loss": 0.0446, + "epoch": 1.5577264653641207, + "grad_norm": 0.3770933077341249, + "learning_rate": 7.659339933386718e-06, + "loss": 0.0457, "step": 1754 }, { - "epoch": 0.7794803464357095, - "grad_norm": 0.8795540562598588, - "learning_rate": 9.728423244320561e-06, - "loss": 0.0756, + "epoch": 1.5586145648312613, + "grad_norm": 0.4178934100132517, + "learning_rate": 7.656057138314663e-06, + "loss": 0.0476, "step": 1755 }, { - "epoch": 0.7799244947812569, - "grad_norm": 0.517878545048104, - "learning_rate": 9.727792698878767e-06, - "loss": 0.0577, + "epoch": 1.5595026642984013, + "grad_norm": 0.36640173060430026, + "learning_rate": 7.652772747422512e-06, + "loss": 0.0643, "step": 1756 }, { - "epoch": 0.7803686431268043, - "grad_norm": 0.5755116631603886, - "learning_rate": 9.72716144276942e-06, - "loss": 0.0507, + "epoch": 1.5603907637655419, + "grad_norm": 0.44741988081457607, + "learning_rate": 7.649486762683602e-06, + "loss": 0.0562, "step": 1757 }, { - "epoch": 0.7808127914723517, - "grad_norm": 0.501557195914397, - "learning_rate": 9.726529476087406e-06, - "loss": 0.0695, + "epoch": 1.561278863232682, + "grad_norm": 0.576605656132711, + "learning_rate": 7.646199186072229e-06, + "loss": 0.0817, "step": 1758 }, { - "epoch": 0.7812569398178992, - "grad_norm": 0.6162038871090194, - "learning_rate": 9.725896798927724e-06, - "loss": 0.073, + "epoch": 1.5621669626998225, + "grad_norm": 0.36683552194563107, + "learning_rate": 7.642910019563646e-06, + "loss": 0.0478, "step": 1759 }, { - "epoch": 0.7817010881634466, - "grad_norm": 0.44599102883855124, - "learning_rate": 9.725263411385471e-06, - "loss": 0.046, + "epoch": 1.5630550621669625, + "grad_norm": 0.37934832255837464, + "learning_rate": 7.639619265134056e-06, + "loss": 0.0466, "step": 1760 }, { - "epoch": 0.782145236508994, - "grad_norm": 0.5089144227251173, - "learning_rate": 9.724629313555862e-06, - "loss": 0.0566, + "epoch": 1.563943161634103, + "grad_norm": 0.399945495147686, + "learning_rate": 7.636326924760622e-06, + "loss": 0.0505, "step": 1761 }, { - "epoch": 0.7825893848545414, - "grad_norm": 0.5159558058324487, - "learning_rate": 9.723994505534209e-06, - "loss": 0.062, + "epoch": 1.5648312611012434, + "grad_norm": 0.37470621259966547, + "learning_rate": 7.63303300042146e-06, + "loss": 0.0482, "step": 1762 }, { - "epoch": 0.7830335332000888, - "grad_norm": 0.7445098154181274, - "learning_rate": 9.723358987415933e-06, - "loss": 0.0774, + "epoch": 1.5657193605683837, + "grad_norm": 0.46143179083831476, + "learning_rate": 7.62973749409563e-06, + "loss": 0.0514, "step": 1763 }, { - "epoch": 0.7834776815456362, - "grad_norm": 0.5480997952522682, - "learning_rate": 9.722722759296568e-06, - "loss": 0.0446, + "epoch": 1.566607460035524, + "grad_norm": 0.3549802079949632, + "learning_rate": 7.626440407763151e-06, + "loss": 0.0391, "step": 1764 }, { - "epoch": 0.7839218298911836, - "grad_norm": 0.6171588111915909, - "learning_rate": 9.722085821271747e-06, - "loss": 0.0695, + "epoch": 1.5674955595026643, + "grad_norm": 0.3526063297024799, + "learning_rate": 7.6231417434049896e-06, + "loss": 0.0463, "step": 1765 }, { - "epoch": 0.784365978236731, - "grad_norm": 0.5440136789733718, - "learning_rate": 9.721448173437212e-06, - "loss": 0.0509, + "epoch": 1.5683836589698046, + "grad_norm": 0.3625783448175399, + "learning_rate": 7.619841503003056e-06, + "loss": 0.0435, "step": 1766 }, { - "epoch": 0.7848101265822784, - "grad_norm": 0.5213635915956231, - "learning_rate": 9.720809815888814e-06, - "loss": 0.0543, + "epoch": 1.5692717584369449, + "grad_norm": 0.3535200750509916, + "learning_rate": 7.616539688540212e-06, + "loss": 0.0467, "step": 1767 }, { - "epoch": 0.7852542749278258, - "grad_norm": 0.5190117014644481, - "learning_rate": 9.720170748722507e-06, - "loss": 0.0625, + "epoch": 1.5701598579040854, + "grad_norm": 0.6134778577785767, + "learning_rate": 7.613236302000265e-06, + "loss": 0.0587, "step": 1768 }, { - "epoch": 0.7856984232733734, - "grad_norm": 0.4927607718599878, - "learning_rate": 9.719530972034356e-06, - "loss": 0.0609, + "epoch": 1.5710479573712255, + "grad_norm": 0.47615307783222455, + "learning_rate": 7.609931345367962e-06, + "loss": 0.0467, "step": 1769 }, { - "epoch": 0.7861425716189208, - "grad_norm": 0.48217603174270474, - "learning_rate": 9.718890485920529e-06, - "loss": 0.0496, + "epoch": 1.571936056838366, + "grad_norm": 0.3624745755144589, + "learning_rate": 7.606624820628998e-06, + "loss": 0.0491, "step": 1770 }, { - "epoch": 0.7865867199644682, - "grad_norm": 0.5424722566403071, - "learning_rate": 9.7182492904773e-06, - "loss": 0.0553, + "epoch": 1.572824156305506, + "grad_norm": 0.5421904381130762, + "learning_rate": 7.603316729770012e-06, + "loss": 0.054, "step": 1771 }, { - "epoch": 0.7870308683100156, - "grad_norm": 0.549948589003628, - "learning_rate": 9.717607385801055e-06, - "loss": 0.0673, + "epoch": 1.5737122557726466, + "grad_norm": 0.45513120933546486, + "learning_rate": 7.600007074778577e-06, + "loss": 0.0734, "step": 1772 }, { - "epoch": 0.787475016655563, - "grad_norm": 0.5315442944470616, - "learning_rate": 9.716964771988281e-06, - "loss": 0.0696, + "epoch": 1.5746003552397867, + "grad_norm": 0.6563110917147711, + "learning_rate": 7.596695857643211e-06, + "loss": 0.0628, "step": 1773 }, { - "epoch": 0.7879191650011104, - "grad_norm": 0.6429809896985884, - "learning_rate": 9.716321449135578e-06, - "loss": 0.0789, + "epoch": 1.5754884547069272, + "grad_norm": 0.43269624599939793, + "learning_rate": 7.593383080353369e-06, + "loss": 0.0534, "step": 1774 }, { - "epoch": 0.7883633133466578, - "grad_norm": 0.7545678487097474, - "learning_rate": 9.715677417339641e-06, - "loss": 0.0791, + "epoch": 1.5763765541740675, + "grad_norm": 0.42259017056440773, + "learning_rate": 7.590068744899444e-06, + "loss": 0.058, "step": 1775 }, { - "epoch": 0.7888074616922052, - "grad_norm": 0.5539973797183765, - "learning_rate": 9.715032676697285e-06, - "loss": 0.059, + "epoch": 1.5772646536412078, + "grad_norm": 0.40956595502519033, + "learning_rate": 7.586752853272765e-06, + "loss": 0.0427, "step": 1776 }, { - "epoch": 0.7892516100377526, - "grad_norm": 0.5317979991080277, - "learning_rate": 9.714387227305422e-06, - "loss": 0.0622, + "epoch": 1.5781527531083481, + "grad_norm": 0.3946458429155628, + "learning_rate": 7.583435407465596e-06, + "loss": 0.0407, "step": 1777 }, { - "epoch": 0.7896957583833, - "grad_norm": 0.5252592441778533, - "learning_rate": 9.713741069261076e-06, - "loss": 0.0463, + "epoch": 1.5790408525754884, + "grad_norm": 0.42541576285989563, + "learning_rate": 7.580116409471134e-06, + "loss": 0.0457, "step": 1778 }, { - "epoch": 0.7901399067288475, - "grad_norm": 0.4965411657812668, - "learning_rate": 9.713094202661374e-06, - "loss": 0.0498, + "epoch": 1.5799289520426287, + "grad_norm": 0.4464409900609385, + "learning_rate": 7.576795861283508e-06, + "loss": 0.0626, "step": 1779 }, { - "epoch": 0.7905840550743949, - "grad_norm": 0.7856085130172293, - "learning_rate": 9.712446627603553e-06, - "loss": 0.0732, + "epoch": 1.580817051509769, + "grad_norm": 0.41728552046426953, + "learning_rate": 7.573473764897783e-06, + "loss": 0.0476, "step": 1780 }, { - "epoch": 0.7910282034199423, - "grad_norm": 0.6433472150837076, - "learning_rate": 9.711798344184952e-06, - "loss": 0.0536, + "epoch": 1.5817051509769096, + "grad_norm": 0.51250804492596, + "learning_rate": 7.570150122309947e-06, + "loss": 0.0576, "step": 1781 }, { - "epoch": 0.7914723517654897, - "grad_norm": 0.5926660288471911, - "learning_rate": 9.711149352503022e-06, - "loss": 0.0611, + "epoch": 1.5825932504440496, + "grad_norm": 0.47986207085025157, + "learning_rate": 7.566824935516924e-06, + "loss": 0.0487, "step": 1782 }, { - "epoch": 0.7919165001110371, - "grad_norm": 0.5407209189318974, - "learning_rate": 9.710499652655313e-06, - "loss": 0.0464, + "epoch": 1.5834813499111902, + "grad_norm": 0.5060248238325927, + "learning_rate": 7.563498206516561e-06, + "loss": 0.0598, "step": 1783 }, { - "epoch": 0.7923606484565845, - "grad_norm": 1.2981488377904062, - "learning_rate": 9.709849244739493e-06, - "loss": 0.111, + "epoch": 1.5843694493783302, + "grad_norm": 0.42043139495950965, + "learning_rate": 7.560169937307633e-06, + "loss": 0.0509, "step": 1784 }, { - "epoch": 0.7928047968021319, - "grad_norm": 0.6752401202019049, - "learning_rate": 9.709198128853323e-06, - "loss": 0.0662, + "epoch": 1.5852575488454708, + "grad_norm": 0.5192341057560327, + "learning_rate": 7.55684012988984e-06, + "loss": 0.0579, "step": 1785 }, { - "epoch": 0.7932489451476793, - "grad_norm": 0.5128430129414299, - "learning_rate": 9.708546305094679e-06, - "loss": 0.0892, + "epoch": 1.586145648312611, + "grad_norm": 0.3872785990169641, + "learning_rate": 7.553508786263808e-06, + "loss": 0.0573, "step": 1786 }, { - "epoch": 0.7936930934932267, - "grad_norm": 0.5031597058072875, - "learning_rate": 9.707893773561541e-06, - "loss": 0.0552, + "epoch": 1.5870337477797514, + "grad_norm": 0.428936043137043, + "learning_rate": 7.5501759084310834e-06, + "loss": 0.0587, "step": 1787 }, { - "epoch": 0.7941372418387741, - "grad_norm": 0.7529404051762868, - "learning_rate": 9.707240534351995e-06, - "loss": 0.0829, + "epoch": 1.5879218472468917, + "grad_norm": 0.40158672150687236, + "learning_rate": 7.546841498394137e-06, + "loss": 0.049, "step": 1788 }, { - "epoch": 0.7945813901843216, - "grad_norm": 0.549999308663066, - "learning_rate": 9.706586587564236e-06, - "loss": 0.0456, + "epoch": 1.588809946714032, + "grad_norm": 0.2965004935478777, + "learning_rate": 7.543505558156359e-06, + "loss": 0.0397, "step": 1789 }, { - "epoch": 0.795025538529869, - "grad_norm": 0.5507988655050372, - "learning_rate": 9.705931933296563e-06, - "loss": 0.0565, + "epoch": 1.5896980461811723, + "grad_norm": 0.3348089962247074, + "learning_rate": 7.5401680897220575e-06, + "loss": 0.0414, "step": 1790 }, { - "epoch": 0.7954696868754164, - "grad_norm": 0.5120352810106473, - "learning_rate": 9.705276571647377e-06, - "loss": 0.0529, + "epoch": 1.5905861456483126, + "grad_norm": 0.3579386134392553, + "learning_rate": 7.5368290950964616e-06, + "loss": 0.046, "step": 1791 }, { - "epoch": 0.7959138352209638, - "grad_norm": 0.6321538206444037, - "learning_rate": 9.704620502715196e-06, - "loss": 0.0604, + "epoch": 1.5914742451154529, + "grad_norm": 0.6344177863935818, + "learning_rate": 7.533488576285714e-06, + "loss": 0.0514, "step": 1792 }, { - "epoch": 0.7963579835665112, - "grad_norm": 0.5978551293263742, - "learning_rate": 9.703963726598636e-06, - "loss": 0.0615, + "epoch": 1.5923623445825932, + "grad_norm": 0.4172607942552748, + "learning_rate": 7.5301465352968775e-06, + "loss": 0.0541, "step": 1793 }, { - "epoch": 0.7968021319120586, - "grad_norm": 0.6845514683107613, - "learning_rate": 9.70330624339642e-06, - "loss": 0.0759, + "epoch": 1.5932504440497337, + "grad_norm": 0.4296116049176287, + "learning_rate": 7.526802974137925e-06, + "loss": 0.0469, "step": 1794 }, { - "epoch": 0.797246280257606, - "grad_norm": 0.5162860917319786, - "learning_rate": 9.702648053207381e-06, - "loss": 0.0606, + "epoch": 1.5941385435168738, + "grad_norm": 0.3704480570317141, + "learning_rate": 7.523457894817745e-06, + "loss": 0.0515, "step": 1795 }, { - "epoch": 0.7976904286031534, - "grad_norm": 0.5757276725457067, - "learning_rate": 9.701989156130459e-06, - "loss": 0.0494, + "epoch": 1.5950266429840143, + "grad_norm": 0.4521460965601276, + "learning_rate": 7.52011129934614e-06, + "loss": 0.0443, "step": 1796 }, { - "epoch": 0.7981345769487008, - "grad_norm": 0.7240460732805346, - "learning_rate": 9.70132955226469e-06, - "loss": 0.0681, + "epoch": 1.5959147424511544, + "grad_norm": 0.4394300498345061, + "learning_rate": 7.51676318973382e-06, + "loss": 0.0523, "step": 1797 }, { - "epoch": 0.7985787252942482, - "grad_norm": 0.6852456458554386, - "learning_rate": 9.700669241709229e-06, - "loss": 0.0696, + "epoch": 1.596802841918295, + "grad_norm": 0.4760170297828387, + "learning_rate": 7.513413567992405e-06, + "loss": 0.049, "step": 1798 }, { - "epoch": 0.7990228736397957, - "grad_norm": 0.7728747951665925, - "learning_rate": 9.70000822456333e-06, - "loss": 0.075, + "epoch": 1.5976909413854352, + "grad_norm": 0.43812502523322966, + "learning_rate": 7.510062436134426e-06, + "loss": 0.0451, "step": 1799 }, { - "epoch": 0.7994670219853431, - "grad_norm": 0.44864271076401085, - "learning_rate": 9.699346500926357e-06, - "loss": 0.0396, + "epoch": 1.5985790408525755, + "grad_norm": 0.3881160105648138, + "learning_rate": 7.506709796173319e-06, + "loss": 0.0426, "step": 1800 }, { - "epoch": 0.7999111703308905, - "grad_norm": 0.45736945189860095, - "learning_rate": 9.698684070897774e-06, - "loss": 0.0528, + "epoch": 1.5994671403197158, + "grad_norm": 0.3930153590052019, + "learning_rate": 7.503355650123427e-06, + "loss": 0.05, "step": 1801 }, { - "epoch": 0.8003553186764379, - "grad_norm": 0.7755479737213115, - "learning_rate": 9.69802093457716e-06, - "loss": 0.0604, + "epoch": 1.6003552397868561, + "grad_norm": 0.4945648869122955, + "learning_rate": 7.500000000000001e-06, + "loss": 0.0471, "step": 1802 }, { - "epoch": 0.8007994670219853, - "grad_norm": 0.5300657753961209, - "learning_rate": 9.697357092064196e-06, - "loss": 0.0675, + "epoch": 1.6012433392539964, + "grad_norm": 0.44340722104644353, + "learning_rate": 7.496642847819189e-06, + "loss": 0.048, "step": 1803 }, { - "epoch": 0.8012436153675327, - "grad_norm": 0.5613880914239765, - "learning_rate": 9.696692543458666e-06, + "epoch": 1.6021314387211367, + "grad_norm": 0.4575391302567239, + "learning_rate": 7.493284195598046e-06, "loss": 0.0565, "step": 1804 }, { - "epoch": 0.8016877637130801, - "grad_norm": 1.5622355700759611, - "learning_rate": 9.696027288860463e-06, - "loss": 0.0748, + "epoch": 1.6030195381882772, + "grad_norm": 0.48191658932881076, + "learning_rate": 7.489924045354527e-06, + "loss": 0.0609, "step": 1805 }, { - "epoch": 0.8021319120586275, - "grad_norm": 0.6809476301245401, - "learning_rate": 9.695361328369588e-06, - "loss": 0.077, + "epoch": 1.6039076376554173, + "grad_norm": 0.5562535999796999, + "learning_rate": 7.4865623991074894e-06, + "loss": 0.0573, "step": 1806 }, { - "epoch": 0.802576060404175, - "grad_norm": 0.573004020361602, - "learning_rate": 9.694694662086143e-06, - "loss": 0.0688, + "epoch": 1.6047957371225579, + "grad_norm": 0.3548939944794682, + "learning_rate": 7.483199258876685e-06, + "loss": 0.0395, "step": 1807 }, { - "epoch": 0.8030202087497224, - "grad_norm": 0.48241041676332075, - "learning_rate": 9.694027290110344e-06, - "loss": 0.046, + "epoch": 1.605683836589698, + "grad_norm": 0.37932803121585146, + "learning_rate": 7.479834626682768e-06, + "loss": 0.04, "step": 1808 }, { - "epoch": 0.8034643570952699, - "grad_norm": 0.726848223856282, - "learning_rate": 9.693359212542504e-06, - "loss": 0.0554, + "epoch": 1.6065719360568385, + "grad_norm": 0.38464769545242555, + "learning_rate": 7.476468504547284e-06, + "loss": 0.0468, "step": 1809 }, { - "epoch": 0.8039085054408173, - "grad_norm": 0.5939220725933183, - "learning_rate": 9.692690429483049e-06, - "loss": 0.0599, + "epoch": 1.6074600355239785, + "grad_norm": 0.5426310282126411, + "learning_rate": 7.473100894492679e-06, + "loss": 0.0569, "step": 1810 }, { - "epoch": 0.8043526537863647, - "grad_norm": 0.8703575105729704, - "learning_rate": 9.692020941032508e-06, - "loss": 0.0697, + "epoch": 1.608348134991119, + "grad_norm": 0.33572094350124215, + "learning_rate": 7.469731798542288e-06, + "loss": 0.0439, "step": 1811 }, { - "epoch": 0.8047968021319121, - "grad_norm": 0.6745036941332984, - "learning_rate": 9.691350747291514e-06, - "loss": 0.0622, + "epoch": 1.6092362344582594, + "grad_norm": 0.4676095836985924, + "learning_rate": 7.466361218720344e-06, + "loss": 0.0541, "step": 1812 }, { - "epoch": 0.8052409504774595, - "grad_norm": 0.5924055978232626, - "learning_rate": 9.690679848360811e-06, - "loss": 0.068, + "epoch": 1.6101243339253997, + "grad_norm": 0.47157874619201895, + "learning_rate": 7.462989157051965e-06, + "loss": 0.0559, "step": 1813 }, { - "epoch": 0.8056850988230069, - "grad_norm": 0.6624345407427882, - "learning_rate": 9.690008244341247e-06, - "loss": 0.0671, + "epoch": 1.61101243339254, + "grad_norm": 0.4272596303563746, + "learning_rate": 7.459615615563166e-06, + "loss": 0.051, "step": 1814 }, { - "epoch": 0.8061292471685543, - "grad_norm": 0.760605230819851, - "learning_rate": 9.689335935333775e-06, - "loss": 0.0703, + "epoch": 1.6119005328596803, + "grad_norm": 0.4824238196116946, + "learning_rate": 7.456240596280848e-06, + "loss": 0.0575, "step": 1815 }, { - "epoch": 0.8065733955141017, - "grad_norm": 0.6543633249820602, - "learning_rate": 9.688662921439454e-06, - "loss": 0.0537, + "epoch": 1.6127886323268206, + "grad_norm": 0.9305803356665563, + "learning_rate": 7.452864101232798e-06, + "loss": 0.0529, "step": 1816 }, { - "epoch": 0.8070175438596491, - "grad_norm": 0.6888300517583433, - "learning_rate": 9.687989202759448e-06, - "loss": 0.0495, + "epoch": 1.6136767317939609, + "grad_norm": 0.4136268375325825, + "learning_rate": 7.449486132447694e-06, + "loss": 0.056, "step": 1817 }, { - "epoch": 0.8074616922051966, - "grad_norm": 0.686773842932207, - "learning_rate": 9.68731477939503e-06, - "loss": 0.0659, + "epoch": 1.6145648312611014, + "grad_norm": 0.4565426594135729, + "learning_rate": 7.446106691955097e-06, + "loss": 0.0474, "step": 1818 }, { - "epoch": 0.807905840550744, - "grad_norm": 0.5300847493623394, - "learning_rate": 9.686639651447578e-06, - "loss": 0.0518, + "epoch": 1.6154529307282415, + "grad_norm": 0.5214072414287675, + "learning_rate": 7.442725781785451e-06, + "loss": 0.0674, "step": 1819 }, { - "epoch": 0.8083499888962914, - "grad_norm": 0.5159319768487426, - "learning_rate": 9.685963819018575e-06, - "loss": 0.0515, + "epoch": 1.616341030195382, + "grad_norm": 0.3898726364029089, + "learning_rate": 7.439343403970085e-06, + "loss": 0.0414, "step": 1820 }, { - "epoch": 0.8087941372418388, - "grad_norm": 0.7950101365777946, - "learning_rate": 9.685287282209607e-06, - "loss": 0.0728, + "epoch": 1.617229129662522, + "grad_norm": 0.39409589873524503, + "learning_rate": 7.43595956054121e-06, + "loss": 0.042, "step": 1821 }, { - "epoch": 0.8092382855873862, - "grad_norm": 1.1412965984185115, - "learning_rate": 9.684610041122375e-06, - "loss": 0.0802, + "epoch": 1.6181172291296626, + "grad_norm": 0.5133191051412453, + "learning_rate": 7.432574253531917e-06, + "loss": 0.0491, "step": 1822 }, { - "epoch": 0.8096824339329336, - "grad_norm": 0.45513195688472363, - "learning_rate": 9.683932095858673e-06, - "loss": 0.0615, + "epoch": 1.6190053285968027, + "grad_norm": 0.4201841096539901, + "learning_rate": 7.429187484976172e-06, + "loss": 0.0495, "step": 1823 }, { - "epoch": 0.810126582278481, - "grad_norm": 0.7127474010386736, - "learning_rate": 9.683253446520412e-06, - "loss": 0.0617, + "epoch": 1.6198934280639432, + "grad_norm": 0.4031322720823592, + "learning_rate": 7.42579925690883e-06, + "loss": 0.0407, "step": 1824 }, { - "epoch": 0.8105707306240284, - "grad_norm": 0.5146350672182237, - "learning_rate": 9.682574093209603e-06, - "loss": 0.0821, + "epoch": 1.6207815275310835, + "grad_norm": 0.3666136430256817, + "learning_rate": 7.422409571365612e-06, + "loss": 0.0476, "step": 1825 }, { - "epoch": 0.8110148789695758, - "grad_norm": 0.5927766784074162, - "learning_rate": 9.681894036028365e-06, - "loss": 0.0616, + "epoch": 1.6216696269982238, + "grad_norm": 0.6472614138905696, + "learning_rate": 7.419018430383118e-06, + "loss": 0.0469, "step": 1826 }, { - "epoch": 0.8114590273151232, - "grad_norm": 0.6070773128654839, - "learning_rate": 9.681213275078922e-06, - "loss": 0.0615, + "epoch": 1.6225577264653641, + "grad_norm": 0.7981018598945092, + "learning_rate": 7.415625835998828e-06, + "loss": 0.0623, "step": 1827 }, { - "epoch": 0.8119031756606707, - "grad_norm": 0.532740364018843, - "learning_rate": 9.680531810463606e-06, - "loss": 0.0572, + "epoch": 1.6234458259325044, + "grad_norm": 0.3974535875556267, + "learning_rate": 7.412231790251085e-06, + "loss": 0.0436, "step": 1828 }, { - "epoch": 0.8123473240062181, - "grad_norm": 0.5406785460306237, - "learning_rate": 9.679849642284846e-06, - "loss": 0.0553, + "epoch": 1.6243339253996447, + "grad_norm": 0.3923193715579734, + "learning_rate": 7.408836295179114e-06, + "loss": 0.053, "step": 1829 }, { - "epoch": 0.8127914723517655, - "grad_norm": 0.7113722607782379, - "learning_rate": 9.679166770645193e-06, - "loss": 0.0588, + "epoch": 1.625222024866785, + "grad_norm": 0.5310667299265248, + "learning_rate": 7.4054393528230025e-06, + "loss": 0.0612, "step": 1830 }, { - "epoch": 0.8132356206973129, - "grad_norm": 0.5228564923020365, - "learning_rate": 9.678483195647286e-06, - "loss": 0.0762, + "epoch": 1.6261101243339255, + "grad_norm": 0.5529502932529136, + "learning_rate": 7.4020409652237165e-06, + "loss": 0.0591, "step": 1831 }, { - "epoch": 0.8136797690428603, - "grad_norm": 0.3998194117652634, - "learning_rate": 9.67779891739388e-06, - "loss": 0.045, + "epoch": 1.6269982238010656, + "grad_norm": 0.4441596346548294, + "learning_rate": 7.398641134423081e-06, + "loss": 0.0422, "step": 1832 }, { - "epoch": 0.8141239173884077, - "grad_norm": 0.46502028820157165, - "learning_rate": 9.677113935987839e-06, - "loss": 0.0508, + "epoch": 1.6278863232682061, + "grad_norm": 0.48736618114748104, + "learning_rate": 7.395239862463794e-06, + "loss": 0.0486, "step": 1833 }, { - "epoch": 0.8145680657339551, - "grad_norm": 0.6755084418924148, - "learning_rate": 9.67642825153212e-06, - "loss": 0.0935, + "epoch": 1.6287744227353462, + "grad_norm": 0.5389730081774947, + "learning_rate": 7.391837151389416e-06, + "loss": 0.0555, "step": 1834 }, { - "epoch": 0.8150122140795025, - "grad_norm": 0.6820496339271076, - "learning_rate": 9.675741864129797e-06, - "loss": 0.0648, + "epoch": 1.6296625222024868, + "grad_norm": 0.5114644149008559, + "learning_rate": 7.388433003244377e-06, + "loss": 0.047, "step": 1835 }, { - "epoch": 0.8154563624250499, - "grad_norm": 0.51959592407304, - "learning_rate": 9.675054773884045e-06, - "loss": 0.0804, + "epoch": 1.6305506216696268, + "grad_norm": 0.4288743852943751, + "learning_rate": 7.385027420073965e-06, + "loss": 0.0451, "step": 1836 }, { - "epoch": 0.8159005107705973, - "grad_norm": 0.4703704489029568, - "learning_rate": 9.674366980898145e-06, - "loss": 0.0639, + "epoch": 1.6314387211367674, + "grad_norm": 0.5328497940204218, + "learning_rate": 7.381620403924333e-06, + "loss": 0.0556, "step": 1837 }, { - "epoch": 0.8163446591161448, - "grad_norm": 0.4874780271160304, - "learning_rate": 9.673678485275484e-06, - "loss": 0.0508, + "epoch": 1.6323268206039077, + "grad_norm": 0.4812793644698344, + "learning_rate": 7.378211956842496e-06, + "loss": 0.0518, "step": 1838 }, { - "epoch": 0.8167888074616922, - "grad_norm": 0.44148176077100115, - "learning_rate": 9.672989287119555e-06, - "loss": 0.0588, + "epoch": 1.633214920071048, + "grad_norm": 0.3728723626240057, + "learning_rate": 7.374802080876324e-06, + "loss": 0.0428, "step": 1839 }, { - "epoch": 0.8172329558072396, - "grad_norm": 0.7005816504280763, - "learning_rate": 9.672299386533956e-06, - "loss": 0.074, + "epoch": 1.6341030195381883, + "grad_norm": 0.47107517819965083, + "learning_rate": 7.371390778074552e-06, + "loss": 0.0492, "step": 1840 }, { - "epoch": 0.817677104152787, - "grad_norm": 0.6435780803327446, - "learning_rate": 9.67160878362239e-06, - "loss": 0.064, + "epoch": 1.6349911190053286, + "grad_norm": 0.5033676728454763, + "learning_rate": 7.367978050486768e-06, + "loss": 0.0434, "step": 1841 }, { - "epoch": 0.8181212524983345, - "grad_norm": 0.6068596716742378, - "learning_rate": 9.670917478488669e-06, - "loss": 0.0626, + "epoch": 1.6358792184724689, + "grad_norm": 0.36156424991827696, + "learning_rate": 7.364563900163416e-06, + "loss": 0.0417, "step": 1842 }, { - "epoch": 0.8185654008438819, - "grad_norm": 0.48495659068419156, - "learning_rate": 9.670225471236703e-06, - "loss": 0.0566, + "epoch": 1.6367673179396092, + "grad_norm": 0.472226259653632, + "learning_rate": 7.361148329155798e-06, + "loss": 0.0496, "step": 1843 }, { - "epoch": 0.8190095491894293, - "grad_norm": 0.5732730166801319, - "learning_rate": 9.669532761970518e-06, - "loss": 0.0594, + "epoch": 1.6376554174067497, + "grad_norm": 0.4612065513815727, + "learning_rate": 7.357731339516067e-06, + "loss": 0.0453, "step": 1844 }, { - "epoch": 0.8194536975349767, - "grad_norm": 0.6406816449651614, - "learning_rate": 9.668839350794236e-06, - "loss": 0.0602, + "epoch": 1.6385435168738898, + "grad_norm": 0.37788789100539816, + "learning_rate": 7.354312933297225e-06, + "loss": 0.0437, "step": 1845 }, { - "epoch": 0.8198978458805241, - "grad_norm": 0.7660699462244367, - "learning_rate": 9.66814523781209e-06, - "loss": 0.1004, + "epoch": 1.6394316163410303, + "grad_norm": 0.47442424835246877, + "learning_rate": 7.350893112553134e-06, + "loss": 0.0434, "step": 1846 }, { - "epoch": 0.8203419942260715, - "grad_norm": 0.6805404383061566, - "learning_rate": 9.667450423128417e-06, - "loss": 0.0727, + "epoch": 1.6403197158081704, + "grad_norm": 0.5064539251676949, + "learning_rate": 7.3474718793384995e-06, + "loss": 0.0518, "step": 1847 }, { - "epoch": 0.820786142571619, - "grad_norm": 1.8549554381012934, - "learning_rate": 9.666754906847659e-06, - "loss": 0.0612, + "epoch": 1.641207815275311, + "grad_norm": 0.35894236613374486, + "learning_rate": 7.3440492357088746e-06, + "loss": 0.0401, "step": 1848 }, { - "epoch": 0.8212302909171664, - "grad_norm": 0.9016082075933314, - "learning_rate": 9.666058689074364e-06, - "loss": 0.0792, + "epoch": 1.6420959147424512, + "grad_norm": 0.45759082265965767, + "learning_rate": 7.340625183720664e-06, + "loss": 0.0519, "step": 1849 }, { - "epoch": 0.8216744392627138, - "grad_norm": 0.5403087387935102, - "learning_rate": 9.665361769913187e-06, - "loss": 0.045, + "epoch": 1.6429840142095915, + "grad_norm": 0.39750803011604363, + "learning_rate": 7.3371997254311165e-06, + "loss": 0.0449, "step": 1850 }, { - "epoch": 0.8221185876082612, - "grad_norm": 0.45933630900068073, - "learning_rate": 9.664664149468885e-06, - "loss": 0.046, + "epoch": 1.6438721136767318, + "grad_norm": 0.3392691207859647, + "learning_rate": 7.333772862898328e-06, + "loss": 0.0403, "step": 1851 }, { - "epoch": 0.8225627359538086, - "grad_norm": 0.6057928280686538, - "learning_rate": 9.663965827846321e-06, - "loss": 0.053, + "epoch": 1.644760213143872, + "grad_norm": 0.4128996936237093, + "learning_rate": 7.330344598181231e-06, + "loss": 0.05, "step": 1852 }, { - "epoch": 0.823006884299356, - "grad_norm": 0.7862585104580586, - "learning_rate": 9.663266805150468e-06, - "loss": 0.0706, + "epoch": 1.6456483126110124, + "grad_norm": 0.5078054091289931, + "learning_rate": 7.326914933339612e-06, + "loss": 0.0518, "step": 1853 }, { - "epoch": 0.8234510326449034, - "grad_norm": 0.59770705811337, - "learning_rate": 9.662567081486398e-06, - "loss": 0.0568, + "epoch": 1.6465364120781527, + "grad_norm": 0.42840596236871603, + "learning_rate": 7.323483870434087e-06, + "loss": 0.0518, "step": 1854 }, { - "epoch": 0.8238951809904508, - "grad_norm": 0.5457681124884074, - "learning_rate": 9.661866656959293e-06, - "loss": 0.0534, + "epoch": 1.6474245115452932, + "grad_norm": 0.5155458763080173, + "learning_rate": 7.32005141152612e-06, + "loss": 0.0517, "step": 1855 }, { - "epoch": 0.8243393293359982, - "grad_norm": 0.7281386032100584, - "learning_rate": 9.661165531674438e-06, - "loss": 0.081, + "epoch": 1.6483126110124333, + "grad_norm": 0.47345843257098125, + "learning_rate": 7.316617558678011e-06, + "loss": 0.0606, "step": 1856 }, { - "epoch": 0.8247834776815456, - "grad_norm": 0.5330397841966769, - "learning_rate": 9.660463705737224e-06, - "loss": 0.0657, + "epoch": 1.6492007104795738, + "grad_norm": 0.3448701612007873, + "learning_rate": 7.313182313952897e-06, + "loss": 0.0471, "step": 1857 }, { - "epoch": 0.8252276260270931, - "grad_norm": 0.5773185928330131, - "learning_rate": 9.65976117925315e-06, - "loss": 0.0657, + "epoch": 1.650088809946714, + "grad_norm": 0.6912772116733018, + "learning_rate": 7.309745679414751e-06, + "loss": 0.0598, "step": 1858 }, { - "epoch": 0.8256717743726405, - "grad_norm": 0.6769338412517576, - "learning_rate": 9.659057952327812e-06, - "loss": 0.0713, + "epoch": 1.6509769094138544, + "grad_norm": 0.4514055160584369, + "learning_rate": 7.3063076571283864e-06, + "loss": 0.0511, "step": 1859 }, { - "epoch": 0.8261159227181879, - "grad_norm": 0.5999087119449273, - "learning_rate": 9.65835402506692e-06, - "loss": 0.0776, + "epoch": 1.6518650088809945, + "grad_norm": 0.3931950881071841, + "learning_rate": 7.302868249159441e-06, + "loss": 0.0372, "step": 1860 }, { - "epoch": 0.8265600710637353, - "grad_norm": 0.46186334132300766, - "learning_rate": 9.657649397576289e-06, - "loss": 0.0435, + "epoch": 1.652753108348135, + "grad_norm": 0.43739040998352097, + "learning_rate": 7.29942745757439e-06, + "loss": 0.0475, "step": 1861 }, { - "epoch": 0.8270042194092827, - "grad_norm": 0.6132380100477, - "learning_rate": 9.656944069961832e-06, - "loss": 0.0503, + "epoch": 1.6536412078152753, + "grad_norm": 0.4663761588583565, + "learning_rate": 7.295985284440543e-06, + "loss": 0.0521, "step": 1862 }, { - "epoch": 0.8274483677548301, - "grad_norm": 0.511934760130446, - "learning_rate": 9.656238042329575e-06, - "loss": 0.047, + "epoch": 1.6545293072824157, + "grad_norm": 0.4626929162880625, + "learning_rate": 7.292541731826035e-06, + "loss": 0.0533, "step": 1863 }, { - "epoch": 0.8278925161003775, - "grad_norm": 0.9622008544679613, - "learning_rate": 9.655531314785643e-06, - "loss": 0.0727, + "epoch": 1.655417406749556, + "grad_norm": 0.4813664691991621, + "learning_rate": 7.289096801799831e-06, + "loss": 0.0576, "step": 1864 }, { - "epoch": 0.8283366644459249, - "grad_norm": 0.41625404724105025, - "learning_rate": 9.654823887436272e-06, + "epoch": 1.6563055062166963, + "grad_norm": 0.3835863890156243, + "learning_rate": 7.285650496431726e-06, "loss": 0.0452, "step": 1865 }, { - "epoch": 0.8287808127914723, - "grad_norm": 0.6534224883169892, - "learning_rate": 9.6541157603878e-06, - "loss": 0.0812, + "epoch": 1.6571936056838366, + "grad_norm": 0.3982495988142279, + "learning_rate": 7.282202817792337e-06, + "loss": 0.0439, "step": 1866 }, { - "epoch": 0.8292249611370197, - "grad_norm": 0.3831024637275559, - "learning_rate": 9.653406933746667e-06, - "loss": 0.0406, + "epoch": 1.6580817051509769, + "grad_norm": 0.584980684226663, + "learning_rate": 7.2787537679531105e-06, + "loss": 0.0456, "step": 1867 }, { - "epoch": 0.8296691094825672, - "grad_norm": 0.5750420337273457, - "learning_rate": 9.652697407619425e-06, - "loss": 0.0655, + "epoch": 1.6589698046181174, + "grad_norm": 0.5324889054540743, + "learning_rate": 7.2753033489863134e-06, + "loss": 0.0487, "step": 1868 }, { - "epoch": 0.8301132578281146, - "grad_norm": 0.5340996381251997, - "learning_rate": 9.651987182112727e-06, - "loss": 0.0684, + "epoch": 1.6598579040852575, + "grad_norm": 0.42218813341259764, + "learning_rate": 7.271851562965037e-06, + "loss": 0.0556, "step": 1869 }, { - "epoch": 0.830557406173662, - "grad_norm": 0.5216822693020573, - "learning_rate": 9.651276257333334e-06, - "loss": 0.0506, + "epoch": 1.660746003552398, + "grad_norm": 0.355553460224428, + "learning_rate": 7.268398411963193e-06, + "loss": 0.0439, "step": 1870 }, { - "epoch": 0.8310015545192094, - "grad_norm": 0.6549627952655241, - "learning_rate": 9.650564633388106e-06, - "loss": 0.0746, + "epoch": 1.661634103019538, + "grad_norm": 0.3771293907326771, + "learning_rate": 7.2649438980555165e-06, + "loss": 0.0387, "step": 1871 }, { - "epoch": 0.8314457028647568, - "grad_norm": 0.41543419779685914, - "learning_rate": 9.649852310384017e-06, - "loss": 0.0418, + "epoch": 1.6625222024866786, + "grad_norm": 0.448586691919345, + "learning_rate": 7.261488023317555e-06, + "loss": 0.0486, "step": 1872 }, { - "epoch": 0.8318898512103042, - "grad_norm": 0.8710457342050398, - "learning_rate": 9.649139288428136e-06, - "loss": 0.0725, + "epoch": 1.6634103019538187, + "grad_norm": 0.4634017416752472, + "learning_rate": 7.2580307898256805e-06, + "loss": 0.047, "step": 1873 }, { - "epoch": 0.8323339995558516, - "grad_norm": 0.7280421909189481, - "learning_rate": 9.648425567627646e-06, - "loss": 0.0834, + "epoch": 1.6642984014209592, + "grad_norm": 0.43490416915633406, + "learning_rate": 7.254572199657075e-06, + "loss": 0.0451, "step": 1874 }, { - "epoch": 0.832778147901399, - "grad_norm": 0.48421338065941566, - "learning_rate": 9.647711148089829e-06, - "loss": 0.051, + "epoch": 1.6651865008880995, + "grad_norm": 0.4248897081866145, + "learning_rate": 7.2511122548897426e-06, + "loss": 0.0535, "step": 1875 }, { - "epoch": 0.8332222962469464, - "grad_norm": 0.8228707840085564, - "learning_rate": 9.646996029922078e-06, - "loss": 0.0899, + "epoch": 1.6660746003552398, + "grad_norm": 0.34537012276279605, + "learning_rate": 7.2476509576024945e-06, + "loss": 0.052, "step": 1876 }, { - "epoch": 0.8336664445924938, - "grad_norm": 0.44548368481732253, - "learning_rate": 9.646280213231882e-06, - "loss": 0.0459, + "epoch": 1.66696269982238, + "grad_norm": 0.42620719932561724, + "learning_rate": 7.244188309874959e-06, + "loss": 0.053, "step": 1877 }, { - "epoch": 0.8341105929380414, - "grad_norm": 0.77874935532614, - "learning_rate": 9.645563698126846e-06, - "loss": 0.0874, + "epoch": 1.6678507992895204, + "grad_norm": 0.3788018868784469, + "learning_rate": 7.2407243137875725e-06, + "loss": 0.0528, "step": 1878 }, { - "epoch": 0.8345547412835888, - "grad_norm": 0.6153818855236423, - "learning_rate": 9.64484648471467e-06, - "loss": 0.0538, + "epoch": 1.6687388987566607, + "grad_norm": 0.4205270076037411, + "learning_rate": 7.237258971421587e-06, + "loss": 0.0442, "step": 1879 }, { - "epoch": 0.8349988896291362, - "grad_norm": 0.6341254163576385, - "learning_rate": 9.644128573103166e-06, - "loss": 0.0794, + "epoch": 1.669626998223801, + "grad_norm": 0.3725818744304128, + "learning_rate": 7.233792284859056e-06, + "loss": 0.0477, "step": 1880 }, { - "epoch": 0.8354430379746836, - "grad_norm": 1.0916751108937903, - "learning_rate": 9.643409963400247e-06, - "loss": 0.0775, + "epoch": 1.6705150976909415, + "grad_norm": 0.44835567030216367, + "learning_rate": 7.230324256182848e-06, + "loss": 0.0579, "step": 1881 }, { - "epoch": 0.835887186320231, - "grad_norm": 0.5090370659080116, - "learning_rate": 9.642690655713935e-06, - "loss": 0.0515, + "epoch": 1.6714031971580816, + "grad_norm": 0.4402031076371474, + "learning_rate": 7.226854887476629e-06, + "loss": 0.049, "step": 1882 }, { - "epoch": 0.8363313346657784, - "grad_norm": 0.8125756753691618, - "learning_rate": 9.641970650152351e-06, - "loss": 0.0856, + "epoch": 1.6722912966252221, + "grad_norm": 0.46233583500915776, + "learning_rate": 7.223384180824881e-06, + "loss": 0.0594, "step": 1883 }, { - "epoch": 0.8367754830113258, - "grad_norm": 0.7917848803419996, - "learning_rate": 9.641249946823722e-06, - "loss": 0.0789, + "epoch": 1.6731793960923622, + "grad_norm": 0.3974887552165766, + "learning_rate": 7.219912138312881e-06, + "loss": 0.0421, "step": 1884 }, { - "epoch": 0.8372196313568732, - "grad_norm": 0.6341635191576055, - "learning_rate": 9.640528545836388e-06, - "loss": 0.0689, + "epoch": 1.6740674955595027, + "grad_norm": 0.36712835030146823, + "learning_rate": 7.216438762026714e-06, + "loss": 0.0452, "step": 1885 }, { - "epoch": 0.8376637797024206, - "grad_norm": 0.45285744978069686, - "learning_rate": 9.639806447298786e-06, - "loss": 0.0502, + "epoch": 1.6749555950266428, + "grad_norm": 0.3917454645087735, + "learning_rate": 7.212964054053259e-06, + "loss": 0.0463, "step": 1886 }, { - "epoch": 0.8381079280479681, - "grad_norm": 0.5918006498840634, - "learning_rate": 9.639083651319455e-06, - "loss": 0.077, + "epoch": 1.6758436944937833, + "grad_norm": 0.32488984173426766, + "learning_rate": 7.2094880164802074e-06, + "loss": 0.0474, "step": 1887 }, { - "epoch": 0.8385520763935155, - "grad_norm": 0.5566719058184162, - "learning_rate": 9.638360158007049e-06, - "loss": 0.0518, + "epoch": 1.6767317939609236, + "grad_norm": 0.4632793391754857, + "learning_rate": 7.2060106513960395e-06, + "loss": 0.0523, "step": 1888 }, { - "epoch": 0.8389962247390629, - "grad_norm": 0.5775755465954959, - "learning_rate": 9.637635967470317e-06, - "loss": 0.0583, + "epoch": 1.677619893428064, + "grad_norm": 0.5034328004717088, + "learning_rate": 7.202531960890033e-06, + "loss": 0.0447, "step": 1889 }, { - "epoch": 0.8394403730846103, - "grad_norm": 0.5105748469342627, - "learning_rate": 9.636911079818121e-06, - "loss": 0.0547, + "epoch": 1.6785079928952042, + "grad_norm": 0.42110845571449806, + "learning_rate": 7.19905194705227e-06, + "loss": 0.0406, "step": 1890 }, { - "epoch": 0.8398845214301577, - "grad_norm": 0.7153133794510262, - "learning_rate": 9.636185495159423e-06, - "loss": 0.0598, + "epoch": 1.6793960923623446, + "grad_norm": 0.4171462159066183, + "learning_rate": 7.195570611973619e-06, + "loss": 0.0446, "step": 1891 }, { - "epoch": 0.8403286697757051, - "grad_norm": 0.580672148124565, - "learning_rate": 9.63545921360329e-06, - "loss": 0.0601, + "epoch": 1.6802841918294849, + "grad_norm": 0.4111064851477334, + "learning_rate": 7.192087957745748e-06, + "loss": 0.0527, "step": 1892 }, { - "epoch": 0.8407728181212525, - "grad_norm": 0.43212755100892075, - "learning_rate": 9.634732235258895e-06, - "loss": 0.0501, + "epoch": 1.6811722912966252, + "grad_norm": 0.36207107670250294, + "learning_rate": 7.1886039864611145e-06, + "loss": 0.0359, "step": 1893 }, { - "epoch": 0.8412169664667999, - "grad_norm": 0.6336655954017367, - "learning_rate": 9.634004560235513e-06, - "loss": 0.0742, + "epoch": 1.6820603907637657, + "grad_norm": 0.5772595367079205, + "learning_rate": 7.18511870021297e-06, + "loss": 0.0499, "step": 1894 }, { - "epoch": 0.8416611148123473, - "grad_norm": 0.6609701387334967, - "learning_rate": 9.633276188642529e-06, - "loss": 0.0579, + "epoch": 1.6829484902309058, + "grad_norm": 0.4735776014638674, + "learning_rate": 7.1816321010953525e-06, + "loss": 0.0501, "step": 1895 }, { - "epoch": 0.8421052631578947, - "grad_norm": 0.6932382794686757, - "learning_rate": 9.632547120589426e-06, - "loss": 0.0581, + "epoch": 1.6838365896980463, + "grad_norm": 0.547770694294487, + "learning_rate": 7.178144191203091e-06, + "loss": 0.0473, "step": 1896 }, { - "epoch": 0.8425494115034422, - "grad_norm": 0.5148690171452629, - "learning_rate": 9.631817356185799e-06, - "loss": 0.0507, + "epoch": 1.6847246891651864, + "grad_norm": 0.46026886801118827, + "learning_rate": 7.1746549726318025e-06, + "loss": 0.058, "step": 1897 }, { - "epoch": 0.8429935598489896, - "grad_norm": 0.5764411558916347, - "learning_rate": 9.631086895541343e-06, - "loss": 0.0485, + "epoch": 1.6856127886323269, + "grad_norm": 0.4277151288632358, + "learning_rate": 7.17116444747789e-06, + "loss": 0.0402, "step": 1898 }, { - "epoch": 0.843437708194537, - "grad_norm": 0.713479181836388, - "learning_rate": 9.630355738765859e-06, - "loss": 0.0811, + "epoch": 1.6865008880994672, + "grad_norm": 0.3679088895246344, + "learning_rate": 7.1676726178385415e-06, + "loss": 0.0437, "step": 1899 }, { - "epoch": 0.8438818565400844, - "grad_norm": 0.6095181273242816, - "learning_rate": 9.62962388596925e-06, - "loss": 0.051, + "epoch": 1.6873889875666075, + "grad_norm": 0.5011965656365193, + "learning_rate": 7.164179485811728e-06, + "loss": 0.0457, "step": 1900 }, { - "epoch": 0.8443260048856318, - "grad_norm": 0.6746333568364197, - "learning_rate": 9.628891337261527e-06, - "loss": 0.051, + "epoch": 1.6882770870337478, + "grad_norm": 0.42516927463117626, + "learning_rate": 7.160685053496201e-06, + "loss": 0.045, "step": 1901 }, { - "epoch": 0.8447701532311792, - "grad_norm": 0.5956567675202917, - "learning_rate": 9.628158092752807e-06, - "loss": 0.0544, + "epoch": 1.689165186500888, + "grad_norm": 0.5231918792756166, + "learning_rate": 7.157189322991499e-06, + "loss": 0.053, "step": 1902 }, { - "epoch": 0.8452143015767266, - "grad_norm": 0.9239154351205555, - "learning_rate": 9.627424152553305e-06, - "loss": 0.0801, + "epoch": 1.6900532859680284, + "grad_norm": 0.485304561864495, + "learning_rate": 7.153692296397936e-06, + "loss": 0.061, "step": 1903 }, { - "epoch": 0.845658449922274, - "grad_norm": 0.8434016325954864, - "learning_rate": 9.626689516773348e-06, - "loss": 0.0597, + "epoch": 1.6909413854351687, + "grad_norm": 0.45690268485356994, + "learning_rate": 7.150193975816606e-06, + "loss": 0.0476, "step": 1904 }, { - "epoch": 0.8461025982678214, - "grad_norm": 0.6102688127848747, - "learning_rate": 9.625954185523361e-06, - "loss": 0.0751, + "epoch": 1.6918294849023092, + "grad_norm": 0.40781644672009093, + "learning_rate": 7.146694363349378e-06, + "loss": 0.0573, "step": 1905 }, { - "epoch": 0.8465467466133688, - "grad_norm": 0.7017970233762413, - "learning_rate": 9.62521815891388e-06, - "loss": 0.0665, + "epoch": 1.6927175843694493, + "grad_norm": 0.8996341745938609, + "learning_rate": 7.1431934610989025e-06, + "loss": 0.0757, "step": 1906 }, { - "epoch": 0.8469908949589163, - "grad_norm": 0.6370603868487351, - "learning_rate": 9.624481437055542e-06, - "loss": 0.0584, + "epoch": 1.6936056838365898, + "grad_norm": 0.33465511049130725, + "learning_rate": 7.139691271168601e-06, + "loss": 0.0412, "step": 1907 }, { - "epoch": 0.8474350433044637, - "grad_norm": 0.8073786270061732, - "learning_rate": 9.623744020059086e-06, - "loss": 0.0739, + "epoch": 1.69449378330373, + "grad_norm": 0.4006689443792775, + "learning_rate": 7.136187795662669e-06, + "loss": 0.0554, "step": 1908 }, { - "epoch": 0.8478791916500111, - "grad_norm": 0.6765473850641361, - "learning_rate": 9.623005908035362e-06, - "loss": 0.0578, + "epoch": 1.6953818827708704, + "grad_norm": 0.7051429296225967, + "learning_rate": 7.132683036686076e-06, + "loss": 0.0507, "step": 1909 }, { - "epoch": 0.8483233399955585, - "grad_norm": 0.7723618661951789, - "learning_rate": 9.622267101095318e-06, - "loss": 0.0477, + "epoch": 1.6962699822380105, + "grad_norm": 0.4855643688496919, + "learning_rate": 7.129176996344561e-06, + "loss": 0.0574, "step": 1910 }, { - "epoch": 0.8487674883411059, - "grad_norm": 0.5605923323935563, - "learning_rate": 9.621527599350008e-06, - "loss": 0.0603, + "epoch": 1.697158081705151, + "grad_norm": 0.39594997967435885, + "learning_rate": 7.125669676744633e-06, + "loss": 0.0492, "step": 1911 }, { - "epoch": 0.8492116366866533, - "grad_norm": 0.7755772246757074, - "learning_rate": 9.620787402910597e-06, - "loss": 0.0983, + "epoch": 1.6980461811722913, + "grad_norm": 0.4612011985894259, + "learning_rate": 7.122161079993572e-06, + "loss": 0.0581, "step": 1912 }, { - "epoch": 0.8496557850322007, - "grad_norm": 0.7813757821925909, - "learning_rate": 9.620046511888343e-06, - "loss": 0.0499, + "epoch": 1.6989342806394316, + "grad_norm": 0.4641660554767729, + "learning_rate": 7.1186512081994205e-06, + "loss": 0.052, "step": 1913 }, { - "epoch": 0.8500999333777481, - "grad_norm": 0.8027788261683163, - "learning_rate": 9.619304926394619e-06, - "loss": 0.0588, + "epoch": 1.699822380106572, + "grad_norm": 0.41522639372046943, + "learning_rate": 7.1151400634709935e-06, + "loss": 0.0536, "step": 1914 }, { - "epoch": 0.8505440817232955, - "grad_norm": 0.8601044017181758, - "learning_rate": 9.618562646540897e-06, - "loss": 0.0638, + "epoch": 1.7007104795737122, + "grad_norm": 0.4658818662135742, + "learning_rate": 7.111627647917865e-06, + "loss": 0.0511, "step": 1915 }, { - "epoch": 0.8509882300688429, - "grad_norm": 0.5716901663276188, - "learning_rate": 9.617819672438754e-06, - "loss": 0.056, + "epoch": 1.7015985790408525, + "grad_norm": 0.39128896391754786, + "learning_rate": 7.108113963650376e-06, + "loss": 0.048, "step": 1916 }, { - "epoch": 0.8514323784143905, - "grad_norm": 0.7888611302795855, - "learning_rate": 9.617076004199868e-06, - "loss": 0.08, + "epoch": 1.7024866785079928, + "grad_norm": 0.4359605153766341, + "learning_rate": 7.104599012779627e-06, + "loss": 0.0559, "step": 1917 }, { - "epoch": 0.8518765267599379, - "grad_norm": 0.5937752892253898, - "learning_rate": 9.616331641936031e-06, - "loss": 0.0545, + "epoch": 1.7033747779751334, + "grad_norm": 0.493215315257364, + "learning_rate": 7.101082797417482e-06, + "loss": 0.0576, "step": 1918 }, { - "epoch": 0.8523206751054853, - "grad_norm": 0.6861875565077891, - "learning_rate": 9.61558658575913e-06, - "loss": 0.055, + "epoch": 1.7042628774422734, + "grad_norm": 0.4323465077548411, + "learning_rate": 7.0975653196765656e-06, + "loss": 0.0524, "step": 1919 }, { - "epoch": 0.8527648234510327, - "grad_norm": 0.8458011501127822, - "learning_rate": 9.614840835781159e-06, - "loss": 0.1023, + "epoch": 1.705150976909414, + "grad_norm": 0.5104717566429638, + "learning_rate": 7.094046581670258e-06, + "loss": 0.0583, "step": 1920 }, { - "epoch": 0.8532089717965801, - "grad_norm": 0.5027378978279742, - "learning_rate": 9.614094392114218e-06, - "loss": 0.0518, + "epoch": 1.706039076376554, + "grad_norm": 0.4148091979839278, + "learning_rate": 7.090526585512696e-06, + "loss": 0.0395, "step": 1921 }, { - "epoch": 0.8536531201421275, - "grad_norm": 0.5758297033096836, - "learning_rate": 9.613347254870511e-06, - "loss": 0.0476, + "epoch": 1.7069271758436946, + "grad_norm": 0.40257516478991223, + "learning_rate": 7.087005333318777e-06, + "loss": 0.0425, "step": 1922 }, { - "epoch": 0.8540972684876749, - "grad_norm": 0.8773520125115069, - "learning_rate": 9.612599424162344e-06, - "loss": 0.0858, + "epoch": 1.7078152753108347, + "grad_norm": 0.4167751143461822, + "learning_rate": 7.083482827204148e-06, + "loss": 0.0517, "step": 1923 }, { - "epoch": 0.8545414168332223, - "grad_norm": 0.5342112118627843, - "learning_rate": 9.61185090010213e-06, - "loss": 0.0532, + "epoch": 1.7087033747779752, + "grad_norm": 0.45737278550364424, + "learning_rate": 7.0799590692852126e-06, + "loss": 0.0479, "step": 1924 }, { - "epoch": 0.8549855651787697, - "grad_norm": 0.617753551302806, - "learning_rate": 9.611101682802383e-06, - "loss": 0.0664, + "epoch": 1.7095914742451155, + "grad_norm": 0.717527412678889, + "learning_rate": 7.076434061679125e-06, + "loss": 0.0587, "step": 1925 }, { - "epoch": 0.8554297135243171, - "grad_norm": 0.6816206410848374, - "learning_rate": 9.610351772375724e-06, - "loss": 0.0919, + "epoch": 1.7104795737122558, + "grad_norm": 0.40188478381671133, + "learning_rate": 7.072907806503792e-06, + "loss": 0.0464, "step": 1926 }, { - "epoch": 0.8558738618698646, - "grad_norm": 0.636349806575952, - "learning_rate": 9.609601168934878e-06, - "loss": 0.0544, + "epoch": 1.711367673179396, + "grad_norm": 0.353119606059219, + "learning_rate": 7.0693803058778646e-06, + "loss": 0.0429, "step": 1927 }, { - "epoch": 0.856318010215412, - "grad_norm": 0.5095901638429818, - "learning_rate": 9.608849872592674e-06, - "loss": 0.0584, + "epoch": 1.7122557726465364, + "grad_norm": 0.461018127057418, + "learning_rate": 7.065851561920751e-06, + "loss": 0.0444, "step": 1928 }, { - "epoch": 0.8567621585609594, - "grad_norm": 0.5240679834795968, - "learning_rate": 9.608097883462043e-06, - "loss": 0.0553, + "epoch": 1.7131438721136767, + "grad_norm": 0.4038407291080364, + "learning_rate": 7.0623215767526e-06, + "loss": 0.0433, "step": 1929 }, { - "epoch": 0.8572063069065068, - "grad_norm": 0.6099517865434585, - "learning_rate": 9.60734520165602e-06, - "loss": 0.0657, + "epoch": 1.714031971580817, + "grad_norm": 0.4481265980646673, + "learning_rate": 7.058790352494307e-06, + "loss": 0.0525, "step": 1930 }, { - "epoch": 0.8576504552520542, - "grad_norm": 0.555945227169772, - "learning_rate": 9.60659182728775e-06, - "loss": 0.0737, + "epoch": 1.7149200710479575, + "grad_norm": 0.4927074025855776, + "learning_rate": 7.0552578912675115e-06, + "loss": 0.0645, "step": 1931 }, { - "epoch": 0.8580946035976016, - "grad_norm": 0.5341529189989662, - "learning_rate": 9.605837760470476e-06, - "loss": 0.0494, + "epoch": 1.7158081705150976, + "grad_norm": 0.3051738955531093, + "learning_rate": 7.0517241951946e-06, + "loss": 0.0425, "step": 1932 }, { - "epoch": 0.858538751943149, - "grad_norm": 0.5439610693418829, - "learning_rate": 9.605083001317547e-06, - "loss": 0.0566, + "epoch": 1.7166962699822381, + "grad_norm": 0.4909899725260639, + "learning_rate": 7.048189266398696e-06, + "loss": 0.0543, "step": 1933 }, { - "epoch": 0.8589829002886964, - "grad_norm": 0.6077814464368465, - "learning_rate": 9.604327549942415e-06, - "loss": 0.0615, + "epoch": 1.7175843694493782, + "grad_norm": 0.4121445195542596, + "learning_rate": 7.044653107003668e-06, + "loss": 0.054, "step": 1934 }, { - "epoch": 0.8594270486342438, - "grad_norm": 0.7793330895236799, - "learning_rate": 9.603571406458641e-06, - "loss": 0.0669, + "epoch": 1.7184724689165187, + "grad_norm": 0.7287238660340769, + "learning_rate": 7.04111571913412e-06, + "loss": 0.0518, "step": 1935 }, { - "epoch": 0.8598711969797912, - "grad_norm": 0.5859164596399333, - "learning_rate": 9.60281457097988e-06, - "loss": 0.0554, + "epoch": 1.7193605683836588, + "grad_norm": 0.37118408041840495, + "learning_rate": 7.037577104915396e-06, + "loss": 0.0498, "step": 1936 }, { - "epoch": 0.8603153453253387, - "grad_norm": 0.5913578923102047, - "learning_rate": 9.602057043619903e-06, - "loss": 0.0819, + "epoch": 1.7202486678507993, + "grad_norm": 0.3625408648841396, + "learning_rate": 7.034037266473578e-06, + "loss": 0.0441, "step": 1937 }, { - "epoch": 0.8607594936708861, - "grad_norm": 0.3937188804578706, - "learning_rate": 9.601298824492577e-06, - "loss": 0.0455, + "epoch": 1.7211367673179396, + "grad_norm": 0.430633704859871, + "learning_rate": 7.0304962059354805e-06, + "loss": 0.0469, "step": 1938 }, { - "epoch": 0.8612036420164335, - "grad_norm": 0.5687600158251599, - "learning_rate": 9.600539913711876e-06, - "loss": 0.0703, + "epoch": 1.72202486678508, + "grad_norm": 0.3885989041574132, + "learning_rate": 7.026953925428655e-06, + "loss": 0.0524, "step": 1939 }, { - "epoch": 0.8616477903619809, - "grad_norm": 0.7038982027083199, - "learning_rate": 9.599780311391876e-06, - "loss": 0.0559, + "epoch": 1.7229129662522202, + "grad_norm": 0.5726371868643075, + "learning_rate": 7.023410427081384e-06, + "loss": 0.0509, "step": 1940 }, { - "epoch": 0.8620919387075283, - "grad_norm": 0.5434923734263241, - "learning_rate": 9.599020017646758e-06, - "loss": 0.059, + "epoch": 1.7238010657193605, + "grad_norm": 0.45585758969960544, + "learning_rate": 7.0198657130226835e-06, + "loss": 0.0584, "step": 1941 }, { - "epoch": 0.8625360870530757, - "grad_norm": 0.6288012353236909, - "learning_rate": 9.59825903259081e-06, - "loss": 0.0664, + "epoch": 1.7246891651865008, + "grad_norm": 0.4167656428047505, + "learning_rate": 7.0163197853822975e-06, + "loss": 0.0628, "step": 1942 }, { - "epoch": 0.8629802353986231, - "grad_norm": 0.7037145137062264, - "learning_rate": 9.597497356338415e-06, - "loss": 0.069, + "epoch": 1.7255772646536411, + "grad_norm": 0.4024300064639876, + "learning_rate": 7.0127726462907035e-06, + "loss": 0.0479, "step": 1943 }, { - "epoch": 0.8634243837441705, - "grad_norm": 0.5095647439957363, - "learning_rate": 9.59673498900407e-06, - "loss": 0.0618, + "epoch": 1.7264653641207817, + "grad_norm": 0.44392765991101496, + "learning_rate": 7.0092242978791026e-06, + "loss": 0.0487, "step": 1944 }, { - "epoch": 0.8638685320897179, - "grad_norm": 0.6494185255511562, - "learning_rate": 9.595971930702372e-06, - "loss": 0.0658, + "epoch": 1.7273534635879217, + "grad_norm": 0.4668187295820353, + "learning_rate": 7.005674742279423e-06, + "loss": 0.0463, "step": 1945 }, { - "epoch": 0.8643126804352653, - "grad_norm": 0.5454132982093647, - "learning_rate": 9.595208181548022e-06, - "loss": 0.0591, + "epoch": 1.7282415630550623, + "grad_norm": 0.7327420324761663, + "learning_rate": 7.00212398162432e-06, + "loss": 0.0518, "step": 1946 }, { - "epoch": 0.8647568287808128, - "grad_norm": 0.7893076144618603, - "learning_rate": 9.594443741655823e-06, - "loss": 0.0582, + "epoch": 1.7291296625222023, + "grad_norm": 0.443942234501367, + "learning_rate": 6.998572018047174e-06, + "loss": 0.044, "step": 1947 }, { - "epoch": 0.8652009771263602, - "grad_norm": 0.7430564668455467, - "learning_rate": 9.593678611140683e-06, - "loss": 0.0836, + "epoch": 1.7300177619893429, + "grad_norm": 0.4038559181234273, + "learning_rate": 6.995018853682083e-06, + "loss": 0.0439, "step": 1948 }, { - "epoch": 0.8656451254719076, - "grad_norm": 0.6526222088648067, - "learning_rate": 9.592912790117614e-06, - "loss": 0.0612, + "epoch": 1.7309058614564832, + "grad_norm": 0.6384867272348951, + "learning_rate": 6.991464490663871e-06, + "loss": 0.0636, "step": 1949 }, { - "epoch": 0.866089273817455, - "grad_norm": 0.5060704004253027, - "learning_rate": 9.592146278701734e-06, - "loss": 0.0528, + "epoch": 1.7317939609236235, + "grad_norm": 0.4065243718832035, + "learning_rate": 6.9879089311280815e-06, + "loss": 0.0527, "step": 1950 }, { - "epoch": 0.8665334221630024, - "grad_norm": 0.7447569328611817, - "learning_rate": 9.591379077008263e-06, - "loss": 0.0657, + "epoch": 1.7326820603907638, + "grad_norm": 0.45005679129742787, + "learning_rate": 6.9843521772109765e-06, + "loss": 0.0516, "step": 1951 }, { - "epoch": 0.8669775705085498, - "grad_norm": 0.6783080197900007, - "learning_rate": 9.590611185152521e-06, - "loss": 0.0748, + "epoch": 1.733570159857904, + "grad_norm": 0.4468306498195739, + "learning_rate": 6.980794231049534e-06, + "loss": 0.0591, "step": 1952 }, { - "epoch": 0.8674217188540972, - "grad_norm": 0.7292228470690477, - "learning_rate": 9.589842603249935e-06, - "loss": 0.0626, + "epoch": 1.7344582593250444, + "grad_norm": 0.3841813900716927, + "learning_rate": 6.977235094781452e-06, + "loss": 0.0385, "step": 1953 }, { - "epoch": 0.8678658671996446, - "grad_norm": 0.5752723662054593, - "learning_rate": 9.58907333141604e-06, - "loss": 0.0562, + "epoch": 1.7353463587921847, + "grad_norm": 0.5905425816395963, + "learning_rate": 6.973674770545138e-06, + "loss": 0.0571, "step": 1954 }, { - "epoch": 0.868310015545192, - "grad_norm": 0.5789129306578911, - "learning_rate": 9.588303369766469e-06, - "loss": 0.0523, + "epoch": 1.7362344582593252, + "grad_norm": 0.41403831422730447, + "learning_rate": 6.970113260479719e-06, + "loss": 0.0619, "step": 1955 }, { - "epoch": 0.8687541638907396, - "grad_norm": 0.5972967849185621, - "learning_rate": 9.58753271841696e-06, - "loss": 0.0638, + "epoch": 1.7371225577264653, + "grad_norm": 0.6090544014088589, + "learning_rate": 6.96655056672503e-06, + "loss": 0.048, "step": 1956 }, { - "epoch": 0.869198312236287, - "grad_norm": 0.9160241760382546, - "learning_rate": 9.586761377483355e-06, - "loss": 0.083, + "epoch": 1.7380106571936058, + "grad_norm": 0.5193654110281936, + "learning_rate": 6.962986691421623e-06, + "loss": 0.0549, "step": 1957 }, { - "epoch": 0.8696424605818344, - "grad_norm": 0.6452629891186096, - "learning_rate": 9.585989347081599e-06, - "loss": 0.0765, + "epoch": 1.738898756660746, + "grad_norm": 0.44306594429871254, + "learning_rate": 6.959421636710751e-06, + "loss": 0.0449, "step": 1958 }, { - "epoch": 0.8700866089273818, - "grad_norm": 0.5181341158162395, - "learning_rate": 9.58521662732774e-06, - "loss": 0.063, + "epoch": 1.7397868561278864, + "grad_norm": 0.3545538930856097, + "learning_rate": 6.955855404734384e-06, + "loss": 0.0411, "step": 1959 }, { - "epoch": 0.8705307572729292, - "grad_norm": 0.5208504155693663, - "learning_rate": 9.584443218337935e-06, - "loss": 0.0609, + "epoch": 1.7406749555950265, + "grad_norm": 0.41983040586963427, + "learning_rate": 6.952287997635195e-06, + "loss": 0.0437, "step": 1960 }, { - "epoch": 0.8709749056184766, - "grad_norm": 0.5883552043198282, - "learning_rate": 9.583669120228439e-06, - "loss": 0.0644, + "epoch": 1.741563055062167, + "grad_norm": 0.43214311364759633, + "learning_rate": 6.9487194175565655e-06, + "loss": 0.0405, "step": 1961 }, { - "epoch": 0.871419053964024, - "grad_norm": 0.5457449002927042, - "learning_rate": 9.582894333115608e-06, - "loss": 0.0567, + "epoch": 1.7424511545293073, + "grad_norm": 0.4423198943447773, + "learning_rate": 6.945149666642579e-06, + "loss": 0.0564, "step": 1962 }, { - "epoch": 0.8718632023095714, - "grad_norm": 0.6679359172890208, - "learning_rate": 9.58211885711591e-06, - "loss": 0.0652, + "epoch": 1.7433392539964476, + "grad_norm": 0.5594861180326316, + "learning_rate": 6.941578747038024e-06, + "loss": 0.0661, "step": 1963 }, { - "epoch": 0.8723073506551188, - "grad_norm": 0.5085227180546535, - "learning_rate": 9.581342692345913e-06, - "loss": 0.0734, + "epoch": 1.744227353463588, + "grad_norm": 0.6452081661120449, + "learning_rate": 6.93800666088839e-06, + "loss": 0.0732, "step": 1964 }, { - "epoch": 0.8727514990006662, - "grad_norm": 0.63235227685218, - "learning_rate": 9.580565838922285e-06, - "loss": 0.0501, + "epoch": 1.7451154529307282, + "grad_norm": 0.3920723014733711, + "learning_rate": 6.934433410339869e-06, + "loss": 0.0445, "step": 1965 }, { - "epoch": 0.8731956473462137, - "grad_norm": 0.5297922964410929, - "learning_rate": 9.579788296961801e-06, - "loss": 0.0525, + "epoch": 1.7460035523978685, + "grad_norm": 0.40025988359791975, + "learning_rate": 6.9308589975393535e-06, + "loss": 0.0482, "step": 1966 }, { - "epoch": 0.8736397956917611, - "grad_norm": 0.5775438947377388, - "learning_rate": 9.57901006658134e-06, - "loss": 0.0544, + "epoch": 1.7468916518650088, + "grad_norm": 0.3779755023052511, + "learning_rate": 6.9272834246344325e-06, + "loss": 0.0521, "step": 1967 }, { - "epoch": 0.8740839440373085, - "grad_norm": 0.708123483636742, - "learning_rate": 9.57823114789788e-06, - "loss": 0.0617, + "epoch": 1.7477797513321494, + "grad_norm": 0.4762875832461135, + "learning_rate": 6.923706693773388e-06, + "loss": 0.0579, "step": 1968 }, { - "epoch": 0.8745280923828559, - "grad_norm": 0.5577152090781191, - "learning_rate": 9.577451541028509e-06, - "loss": 0.0583, + "epoch": 1.7486678507992894, + "grad_norm": 0.41862535081175484, + "learning_rate": 6.9201288071052074e-06, + "loss": 0.0538, "step": 1969 }, { - "epoch": 0.8749722407284033, - "grad_norm": 0.6520169618466134, - "learning_rate": 9.576671246090415e-06, - "loss": 0.0566, + "epoch": 1.74955595026643, + "grad_norm": 0.4359239583319757, + "learning_rate": 6.916549766779564e-06, + "loss": 0.048, "step": 1970 }, { - "epoch": 0.8754163890739507, - "grad_norm": 0.49841809419135175, - "learning_rate": 9.575890263200887e-06, - "loss": 0.0497, + "epoch": 1.75044404973357, + "grad_norm": 0.47230583650019586, + "learning_rate": 6.912969574946829e-06, + "loss": 0.0457, "step": 1971 }, { - "epoch": 0.8758605374194981, - "grad_norm": 0.5197540662913069, - "learning_rate": 9.575108592477322e-06, - "loss": 0.0555, + "epoch": 1.7513321492007106, + "grad_norm": 0.5066145503687762, + "learning_rate": 6.909388233758063e-06, + "loss": 0.0569, "step": 1972 }, { - "epoch": 0.8763046857650455, - "grad_norm": 0.5124464300264364, - "learning_rate": 9.57432623403722e-06, - "loss": 0.0477, + "epoch": 1.7522202486678506, + "grad_norm": 0.5125342759701539, + "learning_rate": 6.9058057453650195e-06, + "loss": 0.0518, "step": 1973 }, { - "epoch": 0.8767488341105929, - "grad_norm": 0.5779510703301827, - "learning_rate": 9.57354318799818e-06, - "loss": 0.0685, + "epoch": 1.7531083481349912, + "grad_norm": 0.34865490084429174, + "learning_rate": 6.902222111920136e-06, + "loss": 0.0385, "step": 1974 }, { - "epoch": 0.8771929824561403, - "grad_norm": 0.5097058321667625, - "learning_rate": 9.572759454477907e-06, - "loss": 0.0488, + "epoch": 1.7539964476021315, + "grad_norm": 0.37974407165365953, + "learning_rate": 6.8986373355765464e-06, + "loss": 0.0613, "step": 1975 }, { - "epoch": 0.8776371308016878, - "grad_norm": 0.8063636142109883, - "learning_rate": 9.57197503359421e-06, - "loss": 0.0544, + "epoch": 1.7548845470692718, + "grad_norm": 0.4271229747862285, + "learning_rate": 6.895051418488064e-06, + "loss": 0.058, "step": 1976 }, { - "epoch": 0.8780812791472352, - "grad_norm": 0.4428063296481966, - "learning_rate": 9.571189925465002e-06, - "loss": 0.0533, + "epoch": 1.755772646536412, + "grad_norm": 0.8497766043435947, + "learning_rate": 6.89146436280919e-06, + "loss": 0.0662, "step": 1977 }, { - "epoch": 0.8785254274927826, - "grad_norm": 0.4839122950764424, - "learning_rate": 9.570404130208297e-06, - "loss": 0.0479, + "epoch": 1.7566607460035524, + "grad_norm": 0.3620990172114698, + "learning_rate": 6.88787617069511e-06, + "loss": 0.0422, "step": 1978 }, { - "epoch": 0.87896957583833, - "grad_norm": 0.7120182407032511, - "learning_rate": 9.569617647942214e-06, - "loss": 0.0611, + "epoch": 1.7575488454706927, + "grad_norm": 0.39493366973231053, + "learning_rate": 6.884286844301693e-06, + "loss": 0.0424, "step": 1979 }, { - "epoch": 0.8794137241838774, - "grad_norm": 0.792336377588857, - "learning_rate": 9.568830478784975e-06, - "loss": 0.0618, + "epoch": 1.758436944937833, + "grad_norm": 0.4773129835930439, + "learning_rate": 6.880696385785488e-06, + "loss": 0.0665, "step": 1980 }, { - "epoch": 0.8798578725294248, - "grad_norm": 0.7252084523924553, - "learning_rate": 9.568042622854902e-06, - "loss": 0.075, + "epoch": 1.7593250444049735, + "grad_norm": 0.5921073490200456, + "learning_rate": 6.877104797303725e-06, + "loss": 0.0469, "step": 1981 }, { - "epoch": 0.8803020208749722, - "grad_norm": 0.5594674149617587, - "learning_rate": 9.567254080270427e-06, - "loss": 0.0619, + "epoch": 1.7602131438721136, + "grad_norm": 0.5994443271449283, + "learning_rate": 6.873512081014313e-06, + "loss": 0.0561, "step": 1982 }, { - "epoch": 0.8807461692205196, - "grad_norm": 0.7969483073047114, - "learning_rate": 9.566464851150078e-06, - "loss": 0.0626, + "epoch": 1.761101243339254, + "grad_norm": 0.3903562920098094, + "learning_rate": 6.869918239075838e-06, + "loss": 0.046, "step": 1983 }, { - "epoch": 0.881190317566067, - "grad_norm": 0.760520710752294, - "learning_rate": 9.565674935612495e-06, - "loss": 0.0584, + "epoch": 1.7619893428063942, + "grad_norm": 0.4998592198797589, + "learning_rate": 6.866323273647564e-06, + "loss": 0.0463, "step": 1984 }, { - "epoch": 0.8816344659116144, - "grad_norm": 0.626615448212967, - "learning_rate": 9.564884333776408e-06, - "loss": 0.0596, + "epoch": 1.7628774422735347, + "grad_norm": 0.5220993731117153, + "learning_rate": 6.8627271868894275e-06, + "loss": 0.0499, "step": 1985 }, { - "epoch": 0.8820786142571619, - "grad_norm": 0.6020936472164464, - "learning_rate": 9.564093045760663e-06, - "loss": 0.0623, + "epoch": 1.7637655417406748, + "grad_norm": 0.3641182003004395, + "learning_rate": 6.85912998096204e-06, + "loss": 0.0472, "step": 1986 }, { - "epoch": 0.8825227626027093, - "grad_norm": 0.7034061959408842, - "learning_rate": 9.563301071684203e-06, - "loss": 0.0549, + "epoch": 1.7646536412078153, + "grad_norm": 0.40271526098467064, + "learning_rate": 6.855531658026684e-06, + "loss": 0.0461, "step": 1987 }, { - "epoch": 0.8829669109482567, - "grad_norm": 0.5695284311311956, - "learning_rate": 9.562508411666077e-06, - "loss": 0.0633, + "epoch": 1.7655417406749556, + "grad_norm": 0.4682792796060245, + "learning_rate": 6.851932220245318e-06, + "loss": 0.0503, "step": 1988 }, { - "epoch": 0.8834110592938041, - "grad_norm": 0.9573516672168828, - "learning_rate": 9.56171506582543e-06, - "loss": 0.0653, + "epoch": 1.766429840142096, + "grad_norm": 0.5012466280614905, + "learning_rate": 6.848331669780564e-06, + "loss": 0.0527, "step": 1989 }, { - "epoch": 0.8838552076393515, - "grad_norm": 0.5805029234381518, - "learning_rate": 9.56092103428152e-06, - "loss": 0.0607, + "epoch": 1.7673179396092362, + "grad_norm": 0.3614697970514256, + "learning_rate": 6.844730008795716e-06, + "loss": 0.0438, "step": 1990 }, { - "epoch": 0.884299355984899, - "grad_norm": 0.9854040215753008, - "learning_rate": 9.560126317153702e-06, - "loss": 0.0792, + "epoch": 1.7682060390763765, + "grad_norm": 0.3710877973383125, + "learning_rate": 6.841127239454737e-06, + "loss": 0.0499, "step": 1991 }, { - "epoch": 0.8847435043304464, - "grad_norm": 0.7570653217798166, - "learning_rate": 9.559330914561435e-06, - "loss": 0.0574, + "epoch": 1.7690941385435168, + "grad_norm": 0.3799847731257121, + "learning_rate": 6.837523363922249e-06, + "loss": 0.0411, "step": 1992 }, { - "epoch": 0.8851876526759938, - "grad_norm": 0.6760651246150844, - "learning_rate": 9.558534826624281e-06, - "loss": 0.0586, + "epoch": 1.7699822380106571, + "grad_norm": 0.4581509832635446, + "learning_rate": 6.833918384363547e-06, + "loss": 0.0485, "step": 1993 }, { - "epoch": 0.8856318010215412, - "grad_norm": 0.7446222818408107, - "learning_rate": 9.55773805346191e-06, - "loss": 0.0903, + "epoch": 1.7708703374777977, + "grad_norm": 0.3645272434565814, + "learning_rate": 6.830312302944584e-06, + "loss": 0.0416, "step": 1994 }, { - "epoch": 0.8860759493670886, - "grad_norm": 0.4971467452542264, - "learning_rate": 9.556940595194085e-06, - "loss": 0.0495, + "epoch": 1.7717584369449377, + "grad_norm": 0.4095467817591859, + "learning_rate": 6.8267051218319766e-06, + "loss": 0.0484, "step": 1995 }, { - "epoch": 0.8865200977126361, - "grad_norm": 0.48127766478143535, - "learning_rate": 9.55614245194068e-06, - "loss": 0.0476, + "epoch": 1.7726465364120783, + "grad_norm": 0.4758110810972315, + "learning_rate": 6.823096843193e-06, + "loss": 0.0494, "step": 1996 }, { - "epoch": 0.8869642460581835, - "grad_norm": 0.5264335771790009, - "learning_rate": 9.555343623821669e-06, - "loss": 0.0656, + "epoch": 1.7735346358792183, + "grad_norm": 0.44174361579169513, + "learning_rate": 6.819487469195596e-06, + "loss": 0.0528, "step": 1997 }, { - "epoch": 0.8874083944037309, - "grad_norm": 1.0687153694098346, - "learning_rate": 9.554544110957128e-06, - "loss": 0.1082, + "epoch": 1.7744227353463589, + "grad_norm": 0.3729174949891858, + "learning_rate": 6.815877002008354e-06, + "loss": 0.0471, "step": 1998 }, { - "epoch": 0.8878525427492783, - "grad_norm": 0.6068726497280238, - "learning_rate": 9.553743913467241e-06, - "loss": 0.0613, + "epoch": 1.7753108348134992, + "grad_norm": 0.33549672535848557, + "learning_rate": 6.8122654438005275e-06, + "loss": 0.0453, "step": 1999 }, { - "epoch": 0.8882966910948257, - "grad_norm": 0.7498237482418018, - "learning_rate": 9.552943031472289e-06, - "loss": 0.0771, + "epoch": 1.7761989342806395, + "grad_norm": 0.46990029943424805, + "learning_rate": 6.808652796742025e-06, + "loss": 0.0597, "step": 2000 }, { - "epoch": 0.8887408394403731, - "grad_norm": 0.6023014910820245, - "learning_rate": 9.552141465092659e-06, - "loss": 0.0602, + "epoch": 1.7770870337477798, + "grad_norm": 0.35477533288410923, + "learning_rate": 6.805039063003407e-06, + "loss": 0.0435, "step": 2001 }, { - "epoch": 0.8891849877859205, - "grad_norm": 0.6380409827625847, - "learning_rate": 9.551339214448838e-06, - "loss": 0.0619, + "epoch": 1.77797513321492, + "grad_norm": 0.38951162771881537, + "learning_rate": 6.8014242447558855e-06, + "loss": 0.0489, "step": 2002 }, { - "epoch": 0.8896291361314679, - "grad_norm": 0.9507144923151234, - "learning_rate": 9.55053627966142e-06, - "loss": 0.0869, + "epoch": 1.7788632326820604, + "grad_norm": 0.3582600205481822, + "learning_rate": 6.797808344171329e-06, + "loss": 0.0509, "step": 2003 }, { - "epoch": 0.8900732844770153, - "grad_norm": 0.5426047915412493, - "learning_rate": 9.5497326608511e-06, - "loss": 0.0571, + "epoch": 1.7797513321492007, + "grad_norm": 0.5422647873950133, + "learning_rate": 6.794191363422251e-06, + "loss": 0.0505, "step": 2004 }, { - "epoch": 0.8905174328225627, - "grad_norm": 0.44148111117293815, - "learning_rate": 9.548928358138672e-06, - "loss": 0.0508, + "epoch": 1.7806394316163412, + "grad_norm": 0.4918493546856528, + "learning_rate": 6.79057330468182e-06, + "loss": 0.058, "step": 2005 }, { - "epoch": 0.8909615811681102, - "grad_norm": 0.6815953588544817, - "learning_rate": 9.548123371645042e-06, - "loss": 0.0648, + "epoch": 1.7815275310834813, + "grad_norm": 0.40404782312769294, + "learning_rate": 6.786954170123846e-06, + "loss": 0.0503, "step": 2006 }, { - "epoch": 0.8914057295136576, - "grad_norm": 0.7228672896480205, - "learning_rate": 9.547317701491207e-06, - "loss": 0.0751, + "epoch": 1.7824156305506218, + "grad_norm": 0.38173411664949003, + "learning_rate": 6.78333396192279e-06, + "loss": 0.0455, "step": 2007 }, { - "epoch": 0.891849877859205, - "grad_norm": 0.5288810936079249, - "learning_rate": 9.546511347798278e-06, - "loss": 0.0718, + "epoch": 1.7833037300177619, + "grad_norm": 0.42275908947777346, + "learning_rate": 6.779712682253754e-06, + "loss": 0.0559, "step": 2008 }, { - "epoch": 0.8922940262047524, - "grad_norm": 0.6326908645656705, - "learning_rate": 9.545704310687462e-06, - "loss": 0.0643, + "epoch": 1.7841918294849024, + "grad_norm": 0.5017305533981338, + "learning_rate": 6.776090333292488e-06, + "loss": 0.055, "step": 2009 }, { - "epoch": 0.8927381745502998, - "grad_norm": 0.715169955221305, - "learning_rate": 9.54489659028007e-06, - "loss": 0.0551, + "epoch": 1.7850799289520425, + "grad_norm": 0.4385039129993016, + "learning_rate": 6.77246691721538e-06, + "loss": 0.0474, "step": 2010 }, { - "epoch": 0.8931823228958472, - "grad_norm": 0.5887866106129909, - "learning_rate": 9.544088186697515e-06, - "loss": 0.0537, + "epoch": 1.785968028419183, + "grad_norm": 0.39754305034029586, + "learning_rate": 6.768842436199464e-06, + "loss": 0.0445, "step": 2011 }, { - "epoch": 0.8936264712413946, - "grad_norm": 0.9378252587331583, - "learning_rate": 9.543279100061316e-06, - "loss": 0.0823, + "epoch": 1.7868561278863233, + "grad_norm": 0.5363354231611518, + "learning_rate": 6.76521689242241e-06, + "loss": 0.0507, "step": 2012 }, { - "epoch": 0.894070619586942, - "grad_norm": 0.5493178853844625, - "learning_rate": 9.542469330493092e-06, - "loss": 0.0518, + "epoch": 1.7877442273534636, + "grad_norm": 0.4426707834739782, + "learning_rate": 6.76159028806253e-06, + "loss": 0.0372, "step": 2013 }, { - "epoch": 0.8945147679324894, - "grad_norm": 1.0489392256223762, - "learning_rate": 9.541658878114564e-06, - "loss": 0.0951, + "epoch": 1.788632326820604, + "grad_norm": 0.46176746277760417, + "learning_rate": 6.757962625298769e-06, + "loss": 0.0493, "step": 2014 }, { - "epoch": 0.8949589162780368, - "grad_norm": 0.9003525371835955, - "learning_rate": 9.540847743047556e-06, - "loss": 0.0823, + "epoch": 1.7895204262877442, + "grad_norm": 0.38219017080049245, + "learning_rate": 6.7543339063107084e-06, + "loss": 0.0443, "step": 2015 }, { - "epoch": 0.8954030646235843, - "grad_norm": 0.6349100893476417, - "learning_rate": 9.540035925413997e-06, - "loss": 0.0687, + "epoch": 1.7904085257548845, + "grad_norm": 0.39577290688408745, + "learning_rate": 6.750704133278571e-06, + "loss": 0.0413, "step": 2016 }, { - "epoch": 0.8958472129691317, - "grad_norm": 0.6722685205867144, - "learning_rate": 9.539223425335919e-06, - "loss": 0.0584, + "epoch": 1.7912966252220248, + "grad_norm": 0.4745739444420137, + "learning_rate": 6.747073308383203e-06, + "loss": 0.0511, "step": 2017 }, { - "epoch": 0.8962913613146791, - "grad_norm": 0.5752288899218686, - "learning_rate": 9.53841024293545e-06, - "loss": 0.0615, + "epoch": 1.7921847246891653, + "grad_norm": 0.374979497473119, + "learning_rate": 6.7434414338060894e-06, + "loss": 0.0425, "step": 2018 }, { - "epoch": 0.8967355096602265, - "grad_norm": 0.6641589129330268, - "learning_rate": 9.537596378334827e-06, - "loss": 0.0599, + "epoch": 1.7930728241563054, + "grad_norm": 0.549508948054823, + "learning_rate": 6.739808511729343e-06, + "loss": 0.056, "step": 2019 }, { - "epoch": 0.8971796580057739, - "grad_norm": 0.6171698716517034, - "learning_rate": 9.53678183165639e-06, - "loss": 0.0614, + "epoch": 1.793960923623446, + "grad_norm": 0.4856860394946529, + "learning_rate": 6.73617454433571e-06, + "loss": 0.0502, "step": 2020 }, { - "epoch": 0.8976238063513213, - "grad_norm": 0.46805323991408604, - "learning_rate": 9.535966603022578e-06, - "loss": 0.0497, + "epoch": 1.794849023090586, + "grad_norm": 0.4700483210304012, + "learning_rate": 6.732539533808556e-06, + "loss": 0.0551, "step": 2021 }, { - "epoch": 0.8980679546968687, - "grad_norm": 0.5561153083603769, - "learning_rate": 9.53515069255593e-06, - "loss": 0.0549, + "epoch": 1.7957371225577266, + "grad_norm": 0.45411948001388186, + "learning_rate": 6.7289034823318825e-06, + "loss": 0.0509, "step": 2022 }, { - "epoch": 0.8985121030424161, - "grad_norm": 0.8567619540393755, - "learning_rate": 9.534334100379095e-06, - "loss": 0.087, + "epoch": 1.7966252220248666, + "grad_norm": 0.4323490848451854, + "learning_rate": 6.7252663920903105e-06, + "loss": 0.0545, "step": 2023 }, { - "epoch": 0.8989562513879635, - "grad_norm": 0.7529977371586785, - "learning_rate": 9.533516826614822e-06, - "loss": 0.071, + "epoch": 1.7975133214920072, + "grad_norm": 0.6001627010374895, + "learning_rate": 6.72162826526909e-06, + "loss": 0.0507, "step": 2024 }, { - "epoch": 0.899400399733511, - "grad_norm": 0.47166342806955874, - "learning_rate": 9.532698871385957e-06, - "loss": 0.0598, + "epoch": 1.7984014209591475, + "grad_norm": 0.4422196101485241, + "learning_rate": 6.717989104054089e-06, + "loss": 0.0465, "step": 2025 }, { - "epoch": 0.8998445480790584, - "grad_norm": 0.5143678735436241, - "learning_rate": 9.531880234815454e-06, - "loss": 0.0564, + "epoch": 1.7992895204262878, + "grad_norm": 0.39566100815958044, + "learning_rate": 6.7143489106318e-06, + "loss": 0.0439, "step": 2026 }, { - "epoch": 0.9002886964246058, - "grad_norm": 0.7879319949686112, - "learning_rate": 9.53106091702637e-06, - "loss": 0.0721, + "epoch": 1.800177619893428, + "grad_norm": 0.39363120698285337, + "learning_rate": 6.710707687189335e-06, + "loss": 0.0402, "step": 2027 }, { - "epoch": 0.9007328447701533, - "grad_norm": 0.9434208137805631, - "learning_rate": 9.53024091814186e-06, - "loss": 0.0777, + "epoch": 1.8010657193605684, + "grad_norm": 0.5793439239998182, + "learning_rate": 6.707065435914423e-06, + "loss": 0.0725, "step": 2028 }, { - "epoch": 0.9011769931157007, - "grad_norm": 0.5393835319630947, - "learning_rate": 9.529420238285185e-06, - "loss": 0.0729, + "epoch": 1.8019538188277087, + "grad_norm": 0.4782131052607772, + "learning_rate": 6.703422158995413e-06, + "loss": 0.0584, "step": 2029 }, { - "epoch": 0.901621141461248, - "grad_norm": 1.1442231822471376, - "learning_rate": 9.528598877579707e-06, - "loss": 0.0577, + "epoch": 1.802841918294849, + "grad_norm": 0.5628758094136977, + "learning_rate": 6.699777858621271e-06, + "loss": 0.0567, "step": 2030 }, { - "epoch": 0.9020652898067955, - "grad_norm": 0.6771073887283316, - "learning_rate": 9.52777683614889e-06, - "loss": 0.0524, + "epoch": 1.8037300177619895, + "grad_norm": 0.36581802683215386, + "learning_rate": 6.6961325369815736e-06, + "loss": 0.0436, "step": 2031 }, { - "epoch": 0.9025094381523429, - "grad_norm": 0.4601735486943438, - "learning_rate": 9.5269541141163e-06, - "loss": 0.049, + "epoch": 1.8046181172291296, + "grad_norm": 0.45615475074720174, + "learning_rate": 6.6924861962665166e-06, + "loss": 0.043, "step": 2032 }, { - "epoch": 0.9029535864978903, - "grad_norm": 0.857950337442236, - "learning_rate": 9.526130711605609e-06, - "loss": 0.0729, + "epoch": 1.80550621669627, + "grad_norm": 0.49028151822505167, + "learning_rate": 6.688838838666902e-06, + "loss": 0.0422, "step": 2033 }, { - "epoch": 0.9033977348434377, - "grad_norm": 0.6334716311616548, - "learning_rate": 9.525306628740585e-06, - "loss": 0.0557, + "epoch": 1.8063943161634102, + "grad_norm": 0.41650683109921516, + "learning_rate": 6.685190466374149e-06, + "loss": 0.042, "step": 2034 }, { - "epoch": 0.9038418831889852, - "grad_norm": 0.5606743700857357, - "learning_rate": 9.524481865645105e-06, - "loss": 0.0529, + "epoch": 1.8072824156305507, + "grad_norm": 0.4214028572855726, + "learning_rate": 6.681541081580281e-06, + "loss": 0.0429, "step": 2035 }, { - "epoch": 0.9042860315345326, - "grad_norm": 0.7009413752452551, - "learning_rate": 9.523656422443142e-06, - "loss": 0.0705, + "epoch": 1.8081705150976908, + "grad_norm": 0.30651081747774706, + "learning_rate": 6.6778906864779345e-06, + "loss": 0.037, "step": 2036 }, { - "epoch": 0.90473017988008, - "grad_norm": 0.5676919166524141, - "learning_rate": 9.522830299258773e-06, - "loss": 0.0642, + "epoch": 1.8090586145648313, + "grad_norm": 0.6446685900959485, + "learning_rate": 6.674239283260347e-06, + "loss": 0.0553, "step": 2037 }, { - "epoch": 0.9051743282256274, - "grad_norm": 0.5909379090037243, - "learning_rate": 9.522003496216184e-06, - "loss": 0.0611, + "epoch": 1.8099467140319716, + "grad_norm": 0.3543614113946352, + "learning_rate": 6.670586874121369e-06, + "loss": 0.0394, "step": 2038 }, { - "epoch": 0.9056184765711748, - "grad_norm": 0.6412971030112977, - "learning_rate": 9.521176013439652e-06, - "loss": 0.0668, + "epoch": 1.810834813499112, + "grad_norm": 0.3618617037372615, + "learning_rate": 6.666933461255451e-06, + "loss": 0.0444, "step": 2039 }, { - "epoch": 0.9060626249167222, - "grad_norm": 0.6697892166854362, - "learning_rate": 9.520347851053567e-06, - "loss": 0.0589, + "epoch": 1.8117229129662522, + "grad_norm": 0.4245914746715078, + "learning_rate": 6.663279046857647e-06, + "loss": 0.0481, "step": 2040 }, { - "epoch": 0.9065067732622696, - "grad_norm": 0.4472682187482963, - "learning_rate": 9.51951900918241e-06, - "loss": 0.0468, + "epoch": 1.8126110124333925, + "grad_norm": 0.5403697850119982, + "learning_rate": 6.659623633123613e-06, + "loss": 0.0483, "step": 2041 }, { - "epoch": 0.906950921607817, - "grad_norm": 0.6790472985515921, - "learning_rate": 9.518689487950772e-06, - "loss": 0.064, + "epoch": 1.8134991119005328, + "grad_norm": 0.5928311112067389, + "learning_rate": 6.655967222249606e-06, + "loss": 0.0547, "step": 2042 }, { - "epoch": 0.9073950699533644, - "grad_norm": 0.4982007942752425, - "learning_rate": 9.517859287483347e-06, - "loss": 0.0555, + "epoch": 1.8143872113676731, + "grad_norm": 0.36020724361911344, + "learning_rate": 6.65230981643248e-06, + "loss": 0.043, "step": 2043 }, { - "epoch": 0.9078392182989118, - "grad_norm": 0.4637933782350014, - "learning_rate": 9.517028407904925e-06, - "loss": 0.0565, + "epoch": 1.8152753108348136, + "grad_norm": 0.4390963443853013, + "learning_rate": 6.648651417869688e-06, + "loss": 0.0478, "step": 2044 }, { - "epoch": 0.9082833666444593, - "grad_norm": 0.5832414481867781, - "learning_rate": 9.516196849340402e-06, - "loss": 0.0742, + "epoch": 1.8161634103019537, + "grad_norm": 0.4233236483400129, + "learning_rate": 6.644992028759283e-06, + "loss": 0.0523, "step": 2045 }, { - "epoch": 0.9087275149900067, - "grad_norm": 0.5576423922621532, - "learning_rate": 9.515364611914777e-06, - "loss": 0.0628, + "epoch": 1.8170515097690942, + "grad_norm": 0.4450804724945297, + "learning_rate": 6.641331651299906e-06, + "loss": 0.053, "step": 2046 }, { - "epoch": 0.9091716633355541, - "grad_norm": 0.5229540500090215, - "learning_rate": 9.514531695753146e-06, - "loss": 0.0517, + "epoch": 1.8179396092362343, + "grad_norm": 0.36181807383807113, + "learning_rate": 6.6376702876908e-06, + "loss": 0.0464, "step": 2047 }, { - "epoch": 0.9096158116811015, - "grad_norm": 0.6260810656444592, - "learning_rate": 9.513698100980715e-06, - "loss": 0.0828, + "epoch": 1.8188277087033748, + "grad_norm": 0.33676684376312366, + "learning_rate": 6.634007940131794e-06, + "loss": 0.0458, "step": 2048 }, { - "epoch": 0.9100599600266489, - "grad_norm": 0.4707579422417085, - "learning_rate": 9.512863827722785e-06, - "loss": 0.0593, + "epoch": 1.8197158081705151, + "grad_norm": 0.3737934284503229, + "learning_rate": 6.63034461082331e-06, + "loss": 0.0375, "step": 2049 }, { - "epoch": 0.9105041083721963, - "grad_norm": 0.5601794044808186, - "learning_rate": 9.51202887610476e-06, - "loss": 0.063, + "epoch": 1.8206039076376554, + "grad_norm": 0.3832541594396424, + "learning_rate": 6.6266803019663604e-06, + "loss": 0.0459, "step": 2050 }, { - "epoch": 0.9109482567177437, - "grad_norm": 0.6023400251451128, - "learning_rate": 9.51119324625215e-06, - "loss": 0.0686, + "epoch": 1.8214920071047958, + "grad_norm": 0.5256262091397433, + "learning_rate": 6.623015015762547e-06, + "loss": 0.0504, "step": 2051 }, { - "epoch": 0.9113924050632911, - "grad_norm": 0.5886804844960863, - "learning_rate": 9.510356938290562e-06, - "loss": 0.0618, + "epoch": 1.822380106571936, + "grad_norm": 0.33840402093820304, + "learning_rate": 6.619348754414059e-06, + "loss": 0.0371, "step": 2052 }, { - "epoch": 0.9118365534088385, - "grad_norm": 0.5729230838893967, - "learning_rate": 9.509519952345709e-06, - "loss": 0.0594, + "epoch": 1.8232682060390764, + "grad_norm": 0.39633904955040034, + "learning_rate": 6.615681520123667e-06, + "loss": 0.0472, "step": 2053 }, { - "epoch": 0.9122807017543859, - "grad_norm": 0.5913444178469247, - "learning_rate": 9.508682288543405e-06, - "loss": 0.0544, + "epoch": 1.8241563055062167, + "grad_norm": 0.4642800671979631, + "learning_rate": 6.612013315094733e-06, + "loss": 0.0492, "step": 2054 }, { - "epoch": 0.9127248500999334, - "grad_norm": 0.5581180816842339, - "learning_rate": 9.507843947009562e-06, - "loss": 0.0489, + "epoch": 1.8250444049733572, + "grad_norm": 0.4105829155304549, + "learning_rate": 6.608344141531196e-06, + "loss": 0.041, "step": 2055 }, { - "epoch": 0.9131689984454808, - "grad_norm": 0.9592080607831761, - "learning_rate": 9.507004927870202e-06, - "loss": 0.0674, + "epoch": 1.8259325044404973, + "grad_norm": 0.37517066733437515, + "learning_rate": 6.604674001637582e-06, + "loss": 0.0466, "step": 2056 }, { - "epoch": 0.9136131467910282, - "grad_norm": 0.48343586169312186, - "learning_rate": 9.506165231251438e-06, - "loss": 0.0536, + "epoch": 1.8268206039076378, + "grad_norm": 0.3971195649011647, + "learning_rate": 6.601002897618993e-06, + "loss": 0.0402, "step": 2057 }, { - "epoch": 0.9140572951365756, - "grad_norm": 0.5238333120956294, - "learning_rate": 9.505324857279494e-06, - "loss": 0.0521, + "epoch": 1.8277087033747779, + "grad_norm": 0.4211811563422759, + "learning_rate": 6.597330831681115e-06, + "loss": 0.0527, "step": 2058 }, { - "epoch": 0.914501443482123, - "grad_norm": 0.4166519007433999, - "learning_rate": 9.504483806080694e-06, - "loss": 0.0586, + "epoch": 1.8285968028419184, + "grad_norm": 0.4177644253074288, + "learning_rate": 6.593657806030208e-06, + "loss": 0.0524, "step": 2059 }, { - "epoch": 0.9149455918276704, - "grad_norm": 0.6437419685785489, - "learning_rate": 9.503642077781457e-06, - "loss": 0.0767, + "epoch": 1.8294849023090585, + "grad_norm": 0.5205344156293021, + "learning_rate": 6.589983822873112e-06, + "loss": 0.0421, "step": 2060 }, { - "epoch": 0.9153897401732178, - "grad_norm": 0.6570037258452504, - "learning_rate": 9.502799672508314e-06, - "loss": 0.0587, + "epoch": 1.830373001776199, + "grad_norm": 0.3836569190869212, + "learning_rate": 6.586308884417236e-06, + "loss": 0.0439, "step": 2061 }, { - "epoch": 0.9158338885187652, - "grad_norm": 0.6138821258144063, - "learning_rate": 9.501956590387891e-06, - "loss": 0.0736, + "epoch": 1.8312611012433393, + "grad_norm": 0.4430535990838388, + "learning_rate": 6.582632992870571e-06, + "loss": 0.0481, "step": 2062 }, { - "epoch": 0.9162780368643126, - "grad_norm": 0.6641641647032757, - "learning_rate": 9.501112831546917e-06, - "loss": 0.0606, + "epoch": 1.8321492007104796, + "grad_norm": 0.39208662605247335, + "learning_rate": 6.5789561504416764e-06, + "loss": 0.0463, "step": 2063 }, { - "epoch": 0.91672218520986, - "grad_norm": 0.4693758498407545, - "learning_rate": 9.500268396112224e-06, - "loss": 0.056, + "epoch": 1.83303730017762, + "grad_norm": 0.3879886155905086, + "learning_rate": 6.575278359339685e-06, + "loss": 0.0416, "step": 2064 }, { - "epoch": 0.9171663335554076, - "grad_norm": 0.7149305856179645, - "learning_rate": 9.499423284210745e-06, - "loss": 0.0627, + "epoch": 1.8339253996447602, + "grad_norm": 0.46572152087202695, + "learning_rate": 6.571599621774294e-06, + "loss": 0.0522, "step": 2065 }, { - "epoch": 0.917610481900955, - "grad_norm": 0.522841296737736, - "learning_rate": 9.498577495969515e-06, - "loss": 0.0722, + "epoch": 1.8348134991119005, + "grad_norm": 0.45785826870440977, + "learning_rate": 6.567919939955778e-06, + "loss": 0.0525, "step": 2066 }, { - "epoch": 0.9180546302465024, - "grad_norm": 0.568329647769687, - "learning_rate": 9.497731031515669e-06, - "loss": 0.0546, + "epoch": 1.8357015985790408, + "grad_norm": 0.37121700029217813, + "learning_rate": 6.5642393160949715e-06, + "loss": 0.046, "step": 2067 }, { - "epoch": 0.9184987785920498, - "grad_norm": 0.7376883712368818, - "learning_rate": 9.496883890976445e-06, - "loss": 0.0743, + "epoch": 1.8365896980461813, + "grad_norm": 0.527652281455598, + "learning_rate": 6.560557752403277e-06, + "loss": 0.0389, "step": 2068 }, { - "epoch": 0.9189429269375972, - "grad_norm": 0.8322448418666403, - "learning_rate": 9.496036074479184e-06, - "loss": 0.0505, + "epoch": 1.8374777975133214, + "grad_norm": 0.5310334008842542, + "learning_rate": 6.5568752510926645e-06, + "loss": 0.0489, "step": 2069 }, { - "epoch": 0.9193870752831446, - "grad_norm": 0.47540090764910864, - "learning_rate": 9.495187582151328e-06, - "loss": 0.0503, + "epoch": 1.838365896980462, + "grad_norm": 0.4960897779274944, + "learning_rate": 6.553191814375667e-06, + "loss": 0.0501, "step": 2070 }, { - "epoch": 0.919831223628692, - "grad_norm": 0.5694137714424392, - "learning_rate": 9.494338414120419e-06, - "loss": 0.0552, + "epoch": 1.839253996447602, + "grad_norm": 0.3867255698905068, + "learning_rate": 6.5495074444653735e-06, + "loss": 0.0436, "step": 2071 }, { - "epoch": 0.9202753719742394, - "grad_norm": 0.4737570623495173, - "learning_rate": 9.493488570514099e-06, - "loss": 0.0546, + "epoch": 1.8401420959147425, + "grad_norm": 0.6419876218650102, + "learning_rate": 6.545822143575441e-06, + "loss": 0.0548, "step": 2072 }, { - "epoch": 0.9207195203197868, - "grad_norm": 0.5080926402077233, - "learning_rate": 9.492638051460116e-06, - "loss": 0.0634, + "epoch": 1.8410301953818826, + "grad_norm": 0.501660018948477, + "learning_rate": 6.54213591392008e-06, + "loss": 0.0537, "step": 2073 }, { - "epoch": 0.9211636686653342, - "grad_norm": 0.5930821340624887, - "learning_rate": 9.491786857086318e-06, - "loss": 0.0558, + "epoch": 1.8419182948490231, + "grad_norm": 0.514176055189614, + "learning_rate": 6.538448757714066e-06, + "loss": 0.0496, "step": 2074 }, { - "epoch": 0.9216078170108817, - "grad_norm": 0.6489494784499551, - "learning_rate": 9.490934987520653e-06, - "loss": 0.0574, + "epoch": 1.8428063943161634, + "grad_norm": 0.40759921456099524, + "learning_rate": 6.5347606771727245e-06, + "loss": 0.0485, "step": 2075 }, { - "epoch": 0.9220519653564291, - "grad_norm": 0.4274889745604907, - "learning_rate": 9.490082442891171e-06, - "loss": 0.0484, + "epoch": 1.8436944937833037, + "grad_norm": 0.48667752765541283, + "learning_rate": 6.531071674511939e-06, + "loss": 0.0529, "step": 2076 }, { - "epoch": 0.9224961137019765, - "grad_norm": 0.7132092664683398, - "learning_rate": 9.489229223326027e-06, - "loss": 0.0506, + "epoch": 1.844582593250444, + "grad_norm": 0.6444952542424409, + "learning_rate": 6.5273817519481474e-06, + "loss": 0.0404, "step": 2077 }, { - "epoch": 0.9229402620475239, - "grad_norm": 0.7098655395611968, - "learning_rate": 9.48837532895347e-06, - "loss": 0.0693, + "epoch": 1.8454706927175843, + "grad_norm": 0.36145831986300553, + "learning_rate": 6.523690911698339e-06, + "loss": 0.0455, "step": 2078 }, { - "epoch": 0.9233844103930713, - "grad_norm": 0.6125346051926611, - "learning_rate": 9.487520759901858e-06, - "loss": 0.0665, + "epoch": 1.8463587921847247, + "grad_norm": 0.4402720883989673, + "learning_rate": 6.519999155980058e-06, + "loss": 0.0456, "step": 2079 }, { - "epoch": 0.9238285587386187, - "grad_norm": 0.5007229228967727, - "learning_rate": 9.486665516299646e-06, - "loss": 0.0763, + "epoch": 1.847246891651865, + "grad_norm": 0.47197416916409135, + "learning_rate": 6.516306487011393e-06, + "loss": 0.0504, "step": 2080 }, { - "epoch": 0.9242727070841661, - "grad_norm": 0.6228392832149312, - "learning_rate": 9.485809598275391e-06, - "loss": 0.0602, + "epoch": 1.8481349911190055, + "grad_norm": 0.43927904744559854, + "learning_rate": 6.512612907010986e-06, + "loss": 0.0438, "step": 2081 }, { - "epoch": 0.9247168554297135, - "grad_norm": 0.5392941744359442, - "learning_rate": 9.484953005957753e-06, - "loss": 0.0621, + "epoch": 1.8490230905861456, + "grad_norm": 0.6033732389148113, + "learning_rate": 6.508918418198023e-06, + "loss": 0.0551, "step": 2082 }, { - "epoch": 0.9251610037752609, - "grad_norm": 0.6769648255970269, - "learning_rate": 9.484095739475492e-06, - "loss": 0.0829, + "epoch": 1.849911190053286, + "grad_norm": 0.4731524764323916, + "learning_rate": 6.50522302279224e-06, + "loss": 0.0395, "step": 2083 }, { - "epoch": 0.9256051521208084, - "grad_norm": 1.0001370312264637, - "learning_rate": 9.48323779895747e-06, - "loss": 0.0616, + "epoch": 1.8507992895204262, + "grad_norm": 0.3886459176710567, + "learning_rate": 6.501526723013913e-06, + "loss": 0.052, "step": 2084 }, { - "epoch": 0.9260493004663558, - "grad_norm": 0.6960955983895617, - "learning_rate": 9.482379184532652e-06, - "loss": 0.0701, + "epoch": 1.8516873889875667, + "grad_norm": 0.42995434792805065, + "learning_rate": 6.497829521083866e-06, + "loss": 0.0534, "step": 2085 }, { - "epoch": 0.9264934488119032, - "grad_norm": 0.5402499843217174, - "learning_rate": 9.481519896330098e-06, - "loss": 0.0557, + "epoch": 1.8525754884547068, + "grad_norm": 0.3770685946384062, + "learning_rate": 6.494131419223461e-06, + "loss": 0.037, "step": 2086 }, { - "epoch": 0.9269375971574506, - "grad_norm": 0.5511208018996919, - "learning_rate": 9.480659934478975e-06, - "loss": 0.0613, + "epoch": 1.8534635879218473, + "grad_norm": 0.43816930145050054, + "learning_rate": 6.490432419654602e-06, + "loss": 0.0554, "step": 2087 }, { - "epoch": 0.927381745502998, - "grad_norm": 0.5938957138905275, - "learning_rate": 9.479799299108553e-06, - "loss": 0.0808, + "epoch": 1.8543516873889876, + "grad_norm": 0.37123944678402515, + "learning_rate": 6.486732524599736e-06, + "loss": 0.0415, "step": 2088 }, { - "epoch": 0.9278258938485454, - "grad_norm": 0.7161437453790136, - "learning_rate": 9.478937990348196e-06, - "loss": 0.0606, + "epoch": 1.855239786856128, + "grad_norm": 0.37410095953505745, + "learning_rate": 6.483031736281843e-06, + "loss": 0.0476, "step": 2089 }, { - "epoch": 0.9282700421940928, - "grad_norm": 0.5468536840129006, - "learning_rate": 9.478076008327377e-06, - "loss": 0.0513, + "epoch": 1.8561278863232682, + "grad_norm": 0.36333640435943326, + "learning_rate": 6.47933005692444e-06, + "loss": 0.049, "step": 2090 }, { - "epoch": 0.9287141905396402, - "grad_norm": 0.9707762113595154, - "learning_rate": 9.477213353175663e-06, - "loss": 0.0869, + "epoch": 1.8570159857904085, + "grad_norm": 0.42352096688025875, + "learning_rate": 6.475627488751583e-06, + "loss": 0.0415, "step": 2091 }, { - "epoch": 0.9291583388851876, - "grad_norm": 0.7985679050710678, - "learning_rate": 9.476350025022728e-06, - "loss": 0.0794, + "epoch": 1.8579040852575488, + "grad_norm": 0.34066396339303506, + "learning_rate": 6.471924033987859e-06, + "loss": 0.0441, "step": 2092 }, { - "epoch": 0.929602487230735, - "grad_norm": 0.6127936826464601, - "learning_rate": 9.475486023998345e-06, - "loss": 0.055, + "epoch": 1.858792184724689, + "grad_norm": 0.361185901194101, + "learning_rate": 6.46821969485839e-06, + "loss": 0.0493, "step": 2093 }, { - "epoch": 0.9300466355762825, - "grad_norm": 0.6334292089301367, - "learning_rate": 9.474621350232387e-06, - "loss": 0.0801, + "epoch": 1.8596802841918296, + "grad_norm": 0.5659850613591552, + "learning_rate": 6.464514473588825e-06, + "loss": 0.0547, "step": 2094 }, { - "epoch": 0.9304907839218299, - "grad_norm": 0.5293672478661852, - "learning_rate": 9.47375600385483e-06, - "loss": 0.0463, + "epoch": 1.8605683836589697, + "grad_norm": 0.3700538488433121, + "learning_rate": 6.46080837240535e-06, + "loss": 0.0544, "step": 2095 }, { - "epoch": 0.9309349322673773, - "grad_norm": 0.6942808305366424, - "learning_rate": 9.47288998499575e-06, - "loss": 0.0772, + "epoch": 1.8614564831261102, + "grad_norm": 0.37391369608280134, + "learning_rate": 6.4571013935346724e-06, + "loss": 0.0552, "step": 2096 }, { - "epoch": 0.9313790806129247, - "grad_norm": 0.801229643208364, - "learning_rate": 9.472023293785322e-06, - "loss": 0.0838, + "epoch": 1.8623445825932503, + "grad_norm": 0.4506848177748878, + "learning_rate": 6.453393539204031e-06, + "loss": 0.0467, "step": 2097 }, { - "epoch": 0.9318232289584721, - "grad_norm": 0.5539233738084255, - "learning_rate": 9.471155930353829e-06, - "loss": 0.0477, + "epoch": 1.8632326820603908, + "grad_norm": 0.4760160455310682, + "learning_rate": 6.449684811641191e-06, + "loss": 0.0501, "step": 2098 }, { - "epoch": 0.9322673773040195, - "grad_norm": 0.5083333963598577, - "learning_rate": 9.470287894831648e-06, - "loss": 0.0493, + "epoch": 1.8641207815275311, + "grad_norm": 0.630914626311758, + "learning_rate": 6.44597521307444e-06, + "loss": 0.0537, "step": 2099 }, { - "epoch": 0.9327115256495669, - "grad_norm": 0.5714592576450407, - "learning_rate": 9.469419187349258e-06, - "loss": 0.0518, + "epoch": 1.8650088809946714, + "grad_norm": 0.4886407075177583, + "learning_rate": 6.442264745732589e-06, + "loss": 0.0467, "step": 2100 }, { - "epoch": 0.9331556739951143, - "grad_norm": 0.5821855293618007, - "learning_rate": 9.468549808037241e-06, - "loss": 0.0584, + "epoch": 1.8658969804618117, + "grad_norm": 0.4667539894576772, + "learning_rate": 6.438553411844976e-06, + "loss": 0.0471, "step": 2101 }, { - "epoch": 0.9335998223406617, - "grad_norm": 0.5808549016034106, - "learning_rate": 9.467679757026283e-06, - "loss": 0.0641, + "epoch": 1.866785079928952, + "grad_norm": 0.5111039869480317, + "learning_rate": 6.4348412136414505e-06, + "loss": 0.062, "step": 2102 }, { - "epoch": 0.9340439706862091, - "grad_norm": 0.5185841811549224, - "learning_rate": 9.466809034447165e-06, - "loss": 0.0484, + "epoch": 1.8676731793960923, + "grad_norm": 0.331091498971716, + "learning_rate": 6.431128153352389e-06, + "loss": 0.0426, "step": 2103 }, { - "epoch": 0.9344881190317567, - "grad_norm": 0.46447107395363035, - "learning_rate": 9.46593764043077e-06, - "loss": 0.0538, + "epoch": 1.8685612788632326, + "grad_norm": 0.5559235201845065, + "learning_rate": 6.427414233208682e-06, + "loss": 0.0527, "step": 2104 }, { - "epoch": 0.9349322673773041, - "grad_norm": 0.5957468835643678, - "learning_rate": 9.465065575108084e-06, - "loss": 0.0687, + "epoch": 1.8694493783303732, + "grad_norm": 0.4727738717236463, + "learning_rate": 6.423699455441741e-06, + "loss": 0.0525, "step": 2105 }, { - "epoch": 0.9353764157228515, - "grad_norm": 0.6722403187627196, - "learning_rate": 9.464192838610195e-06, - "loss": 0.0633, + "epoch": 1.8703374777975132, + "grad_norm": 0.5007264649411204, + "learning_rate": 6.419983822283486e-06, + "loss": 0.0494, "step": 2106 }, { - "epoch": 0.9358205640683989, - "grad_norm": 0.7367314225600079, - "learning_rate": 9.463319431068289e-06, - "loss": 0.0688, + "epoch": 1.8712255772646538, + "grad_norm": 0.43477312242283966, + "learning_rate": 6.4162673359663575e-06, + "loss": 0.0459, "step": 2107 }, { - "epoch": 0.9362647124139463, - "grad_norm": 0.5272343499456363, - "learning_rate": 9.462445352613654e-06, - "loss": 0.0598, + "epoch": 1.8721136767317939, + "grad_norm": 0.337940601269951, + "learning_rate": 6.412549998723304e-06, + "loss": 0.0468, "step": 2108 }, { - "epoch": 0.9367088607594937, - "grad_norm": 0.5251390084394981, - "learning_rate": 9.461570603377678e-06, - "loss": 0.0461, + "epoch": 1.8730017761989344, + "grad_norm": 0.36885606967588846, + "learning_rate": 6.408831812787788e-06, + "loss": 0.0457, "step": 2109 }, { - "epoch": 0.9371530091050411, - "grad_norm": 0.45228924863036196, - "learning_rate": 9.460695183491852e-06, - "loss": 0.0551, + "epoch": 1.8738898756660745, + "grad_norm": 0.42546400924864336, + "learning_rate": 6.405112780393781e-06, + "loss": 0.0445, "step": 2110 }, { - "epoch": 0.9375971574505885, - "grad_norm": 0.5609307631212084, - "learning_rate": 9.459819093087765e-06, - "loss": 0.0582, + "epoch": 1.874777975133215, + "grad_norm": 0.4619266591193366, + "learning_rate": 6.401392903775762e-06, + "loss": 0.0459, "step": 2111 }, { - "epoch": 0.9380413057961359, - "grad_norm": 0.6083990162855326, - "learning_rate": 9.45894233229711e-06, - "loss": 0.0727, + "epoch": 1.8756660746003553, + "grad_norm": 0.3894712290108777, + "learning_rate": 6.39767218516872e-06, + "loss": 0.0439, "step": 2112 }, { - "epoch": 0.9384854541416833, - "grad_norm": 0.6031459810485845, - "learning_rate": 9.458064901251679e-06, - "loss": 0.055, + "epoch": 1.8765541740674956, + "grad_norm": 0.485064306096109, + "learning_rate": 6.393950626808145e-06, + "loss": 0.0455, "step": 2113 }, { - "epoch": 0.9389296024872308, - "grad_norm": 0.5698111663947624, - "learning_rate": 9.457186800083363e-06, - "loss": 0.0542, + "epoch": 1.8774422735346359, + "grad_norm": 0.3403998697098418, + "learning_rate": 6.3902282309300375e-06, + "loss": 0.0421, "step": 2114 }, { - "epoch": 0.9393737508327782, - "grad_norm": 0.5620094721834581, - "learning_rate": 9.456308028924157e-06, - "loss": 0.0647, + "epoch": 1.8783303730017762, + "grad_norm": 0.3640300029190646, + "learning_rate": 6.3865049997709e-06, + "loss": 0.0428, "step": 2115 }, { - "epoch": 0.9398178991783256, - "grad_norm": 0.47201186226610237, - "learning_rate": 9.455428587906154e-06, - "loss": 0.0587, + "epoch": 1.8792184724689165, + "grad_norm": 0.4462567654607275, + "learning_rate": 6.38278093556773e-06, + "loss": 0.0569, "step": 2116 }, { - "epoch": 0.940262047523873, - "grad_norm": 0.5090071386100868, - "learning_rate": 9.45454847716155e-06, - "loss": 0.0485, + "epoch": 1.8801065719360568, + "grad_norm": 0.5788823336040582, + "learning_rate": 6.379056040558036e-06, + "loss": 0.0481, "step": 2117 }, { - "epoch": 0.9407061958694204, - "grad_norm": 0.6100904698718481, - "learning_rate": 9.453667696822644e-06, - "loss": 0.055, + "epoch": 1.8809946714031973, + "grad_norm": 0.339464299182698, + "learning_rate": 6.375330316979816e-06, + "loss": 0.0365, "step": 2118 }, { - "epoch": 0.9411503442149678, - "grad_norm": 0.5793242183604661, - "learning_rate": 9.452786247021825e-06, - "loss": 0.0633, + "epoch": 1.8818827708703374, + "grad_norm": 0.4054052815074723, + "learning_rate": 6.3716037670715725e-06, + "loss": 0.0423, "step": 2119 }, { - "epoch": 0.9415944925605152, - "grad_norm": 0.6603367019430398, - "learning_rate": 9.451904127891593e-06, - "loss": 0.062, + "epoch": 1.882770870337478, + "grad_norm": 0.4202419168351307, + "learning_rate": 6.3678763930723034e-06, + "loss": 0.0419, "step": 2120 }, { - "epoch": 0.9420386409060626, - "grad_norm": 0.48144950864097125, - "learning_rate": 9.451021339564549e-06, - "loss": 0.0541, + "epoch": 1.883658969804618, + "grad_norm": 0.36138740873727293, + "learning_rate": 6.364148197221499e-06, + "loss": 0.0466, "step": 2121 }, { - "epoch": 0.94248278925161, - "grad_norm": 0.8619120091253565, - "learning_rate": 9.450137882173385e-06, - "loss": 0.0622, + "epoch": 1.8845470692717585, + "grad_norm": 0.39202449801589895, + "learning_rate": 6.360419181759143e-06, + "loss": 0.0448, "step": 2122 }, { - "epoch": 0.9429269375971574, - "grad_norm": 0.574402948879313, - "learning_rate": 9.449253755850902e-06, - "loss": 0.0579, + "epoch": 1.8854351687388986, + "grad_norm": 0.5250988754303926, + "learning_rate": 6.3566893489257185e-06, + "loss": 0.0499, "step": 2123 }, { - "epoch": 0.9433710859427049, - "grad_norm": 0.5040572133776431, - "learning_rate": 9.448368960730002e-06, - "loss": 0.0471, + "epoch": 1.8863232682060391, + "grad_norm": 0.6469008346649571, + "learning_rate": 6.352958700962191e-06, + "loss": 0.061, "step": 2124 }, { - "epoch": 0.9438152342882523, - "grad_norm": 0.8130523767663201, - "learning_rate": 9.447483496943682e-06, - "loss": 0.0607, + "epoch": 1.8872113676731794, + "grad_norm": 0.4581135569733313, + "learning_rate": 6.349227240110019e-06, + "loss": 0.0461, "step": 2125 }, { - "epoch": 0.9442593826337997, - "grad_norm": 0.6488075902611455, - "learning_rate": 9.446597364625043e-06, - "loss": 0.0495, + "epoch": 1.8880994671403197, + "grad_norm": 0.8025399454101909, + "learning_rate": 6.345494968611152e-06, + "loss": 0.0512, "step": 2126 }, { - "epoch": 0.9447035309793471, - "grad_norm": 0.516081567488049, - "learning_rate": 9.445710563907286e-06, - "loss": 0.0597, + "epoch": 1.88898756660746, + "grad_norm": 0.44397943494301245, + "learning_rate": 6.3417618887080245e-06, + "loss": 0.0494, "step": 2127 }, { - "epoch": 0.9451476793248945, - "grad_norm": 0.6477844164341606, - "learning_rate": 9.444823094923712e-06, - "loss": 0.0581, + "epoch": 1.8898756660746003, + "grad_norm": 0.6018102541173735, + "learning_rate": 6.3380280026435535e-06, + "loss": 0.0452, "step": 2128 }, { - "epoch": 0.9455918276704419, - "grad_norm": 0.5960225920848716, - "learning_rate": 9.44393495780772e-06, - "loss": 0.0693, + "epoch": 1.8907637655417406, + "grad_norm": 0.4030835604751413, + "learning_rate": 6.334293312661147e-06, + "loss": 0.0353, "step": 2129 }, { - "epoch": 0.9460359760159893, - "grad_norm": 0.6610698243004944, - "learning_rate": 9.443046152692818e-06, - "loss": 0.0602, + "epoch": 1.891651865008881, + "grad_norm": 0.39190175083569617, + "learning_rate": 6.33055782100469e-06, + "loss": 0.0473, "step": 2130 }, { - "epoch": 0.9464801243615367, - "grad_norm": 0.501365231071439, - "learning_rate": 9.442156679712604e-06, - "loss": 0.0507, + "epoch": 1.8925399644760215, + "grad_norm": 0.46273519259461704, + "learning_rate": 6.3268215299185545e-06, + "loss": 0.0404, "step": 2131 }, { - "epoch": 0.9469242727070841, - "grad_norm": 0.5333891976903705, - "learning_rate": 9.441266539000782e-06, - "loss": 0.0551, + "epoch": 1.8934280639431615, + "grad_norm": 0.3962674432212313, + "learning_rate": 6.323084441647586e-06, + "loss": 0.0442, "step": 2132 }, { - "epoch": 0.9473684210526315, - "grad_norm": 0.7140797441741807, - "learning_rate": 9.440375730691154e-06, - "loss": 0.0559, + "epoch": 1.894316163410302, + "grad_norm": 0.7545203804080743, + "learning_rate": 6.319346558437116e-06, + "loss": 0.055, "step": 2133 }, { - "epoch": 0.947812569398179, - "grad_norm": 1.0212073230522318, - "learning_rate": 9.439484254917626e-06, - "loss": 0.0601, + "epoch": 1.8952042628774421, + "grad_norm": 0.5390726237497478, + "learning_rate": 6.3156078825329495e-06, + "loss": 0.0473, "step": 2134 }, { - "epoch": 0.9482567177437264, - "grad_norm": 0.6547086006006103, - "learning_rate": 9.4385921118142e-06, - "loss": 0.0678, + "epoch": 1.8960923623445827, + "grad_norm": 0.45025381579412616, + "learning_rate": 6.311868416181367e-06, + "loss": 0.0443, "step": 2135 }, { - "epoch": 0.9487008660892738, - "grad_norm": 0.5668847306270909, - "learning_rate": 9.437699301514983e-06, - "loss": 0.0562, + "epoch": 1.8969804618117228, + "grad_norm": 0.4467246091737874, + "learning_rate": 6.30812816162913e-06, + "loss": 0.0386, "step": 2136 }, { - "epoch": 0.9491450144348212, - "grad_norm": 0.535851688994489, - "learning_rate": 9.436805824154175e-06, - "loss": 0.0536, + "epoch": 1.8978685612788633, + "grad_norm": 0.3908841207514594, + "learning_rate": 6.3043871211234656e-06, + "loss": 0.0384, "step": 2137 }, { - "epoch": 0.9495891627803686, - "grad_norm": 0.6725155861850945, - "learning_rate": 9.435911679866085e-06, - "loss": 0.0636, + "epoch": 1.8987566607460036, + "grad_norm": 0.3987975718435107, + "learning_rate": 6.300645296912078e-06, + "loss": 0.0414, "step": 2138 }, { - "epoch": 0.950033311125916, - "grad_norm": 0.472128745022765, - "learning_rate": 9.435016868785117e-06, - "loss": 0.0514, + "epoch": 1.8996447602131439, + "grad_norm": 0.5002406973767204, + "learning_rate": 6.296902691243141e-06, + "loss": 0.0588, "step": 2139 }, { - "epoch": 0.9504774594714634, - "grad_norm": 0.5611219746408826, - "learning_rate": 9.434121391045775e-06, - "loss": 0.057, + "epoch": 1.9005328596802842, + "grad_norm": 0.48860923921704486, + "learning_rate": 6.2931593063653e-06, + "loss": 0.0612, "step": 2140 }, { - "epoch": 0.9509216078170108, - "grad_norm": 0.5197846459934121, - "learning_rate": 9.433225246782664e-06, - "loss": 0.0497, + "epoch": 1.9014209591474245, + "grad_norm": 0.3947407088728515, + "learning_rate": 6.289415144527662e-06, + "loss": 0.0454, "step": 2141 }, { - "epoch": 0.9513657561625583, - "grad_norm": 0.6901510307382841, - "learning_rate": 9.432328436130493e-06, - "loss": 0.0588, + "epoch": 1.9023090586145648, + "grad_norm": 0.41230602007856854, + "learning_rate": 6.2856702079798094e-06, + "loss": 0.0401, "step": 2142 }, { - "epoch": 0.9518099045081057, - "grad_norm": 0.6595671077813987, - "learning_rate": 9.431430959224067e-06, - "loss": 0.0852, + "epoch": 1.903197158081705, + "grad_norm": 0.33116293695159454, + "learning_rate": 6.2819244989717845e-06, + "loss": 0.0359, "step": 2143 }, { - "epoch": 0.9522540528536532, - "grad_norm": 0.5676059741434758, - "learning_rate": 9.43053281619829e-06, - "loss": 0.0507, + "epoch": 1.9040852575488456, + "grad_norm": 0.4000104975546426, + "learning_rate": 6.278178019754095e-06, + "loss": 0.0486, "step": 2144 }, { - "epoch": 0.9526982011992006, - "grad_norm": 0.4855036199663289, - "learning_rate": 9.429634007188169e-06, - "loss": 0.0474, + "epoch": 1.9049733570159857, + "grad_norm": 0.3731738103643593, + "learning_rate": 6.274430772577712e-06, + "loss": 0.049, "step": 2145 }, { - "epoch": 0.953142349544748, - "grad_norm": 0.732709155194895, - "learning_rate": 9.42873453232881e-06, - "loss": 0.0619, + "epoch": 1.9058614564831262, + "grad_norm": 0.3572735108537658, + "learning_rate": 6.270682759694069e-06, + "loss": 0.0424, "step": 2146 }, { - "epoch": 0.9535864978902954, - "grad_norm": 0.6306866348324036, - "learning_rate": 9.42783439175542e-06, - "loss": 0.0772, + "epoch": 1.9067495559502663, + "grad_norm": 0.42934308169002866, + "learning_rate": 6.266933983355056e-06, + "loss": 0.0436, "step": 2147 }, { - "epoch": 0.9540306462358428, - "grad_norm": 0.6444304070837995, - "learning_rate": 9.426933585603304e-06, - "loss": 0.0565, + "epoch": 1.9076376554174068, + "grad_norm": 0.3632847206577638, + "learning_rate": 6.263184445813024e-06, + "loss": 0.0486, "step": 2148 }, { - "epoch": 0.9544747945813902, - "grad_norm": 0.5421189880885259, - "learning_rate": 9.42603211400787e-06, - "loss": 0.0655, + "epoch": 1.9085257548845471, + "grad_norm": 0.3413940971429979, + "learning_rate": 6.259434149320782e-06, + "loss": 0.0385, "step": 2149 }, { - "epoch": 0.9549189429269376, - "grad_norm": 0.5350689023537523, - "learning_rate": 9.425129977104626e-06, - "loss": 0.0598, + "epoch": 1.9094138543516874, + "grad_norm": 0.48469701880453253, + "learning_rate": 6.255683096131595e-06, + "loss": 0.0426, "step": 2150 }, { - "epoch": 0.955363091272485, - "grad_norm": 0.3832349941062708, - "learning_rate": 9.424227175029175e-06, - "loss": 0.0474, + "epoch": 1.9103019538188277, + "grad_norm": 0.3662388409090857, + "learning_rate": 6.2519312884991805e-06, + "loss": 0.0461, "step": 2151 }, { - "epoch": 0.9558072396180324, - "grad_norm": 0.5764500588860708, - "learning_rate": 9.423323707917226e-06, - "loss": 0.0673, + "epoch": 1.911190053285968, + "grad_norm": 0.4539062927110461, + "learning_rate": 6.2481787286777116e-06, + "loss": 0.051, "step": 2152 }, { - "epoch": 0.9562513879635799, - "grad_norm": 0.41134986778955357, - "learning_rate": 9.422419575904584e-06, - "loss": 0.0449, + "epoch": 1.9120781527531083, + "grad_norm": 0.35755853642933133, + "learning_rate": 6.244425418921811e-06, + "loss": 0.0435, "step": 2153 }, { - "epoch": 0.9566955363091273, - "grad_norm": 0.40411844058020524, - "learning_rate": 9.421514779127156e-06, - "loss": 0.0457, + "epoch": 1.9129662522202486, + "grad_norm": 0.42878467561117417, + "learning_rate": 6.240671361486555e-06, + "loss": 0.0417, "step": 2154 }, { - "epoch": 0.9571396846546747, - "grad_norm": 0.5724570461924485, - "learning_rate": 9.420609317720948e-06, - "loss": 0.064, + "epoch": 1.913854351687389, + "grad_norm": 0.49409777074507605, + "learning_rate": 6.2369165586274664e-06, + "loss": 0.0487, "step": 2155 }, { - "epoch": 0.9575838330002221, - "grad_norm": 0.5481138081401853, - "learning_rate": 9.419703191822067e-06, - "loss": 0.0512, + "epoch": 1.9147424511545292, + "grad_norm": 0.41199253087363025, + "learning_rate": 6.233161012600518e-06, + "loss": 0.0415, "step": 2156 }, { - "epoch": 0.9580279813457695, - "grad_norm": 0.5977006719684248, - "learning_rate": 9.418796401566719e-06, - "loss": 0.0452, + "epoch": 1.9156305506216698, + "grad_norm": 0.405928965572566, + "learning_rate": 6.229404725662126e-06, + "loss": 0.046, "step": 2157 }, { - "epoch": 0.9584721296913169, - "grad_norm": 0.5349085172455206, - "learning_rate": 9.417888947091208e-06, - "loss": 0.065, + "epoch": 1.9165186500888098, + "grad_norm": 0.39740350515985995, + "learning_rate": 6.225647700069158e-06, + "loss": 0.0443, "step": 2158 }, { - "epoch": 0.9589162780368643, - "grad_norm": 0.6787928669434861, - "learning_rate": 9.416980828531944e-06, - "loss": 0.0677, + "epoch": 1.9174067495559504, + "grad_norm": 0.4834426843479704, + "learning_rate": 6.221889938078916e-06, + "loss": 0.0529, "step": 2159 }, { - "epoch": 0.9593604263824117, - "grad_norm": 0.709282953075048, - "learning_rate": 9.416072046025429e-06, - "loss": 0.0729, + "epoch": 1.9182948490230904, + "grad_norm": 0.49548825776613825, + "learning_rate": 6.218131441949151e-06, + "loss": 0.0617, "step": 2160 }, { - "epoch": 0.9598045747279591, - "grad_norm": 0.6790718166354139, - "learning_rate": 9.415162599708268e-06, - "loss": 0.0699, + "epoch": 1.919182948490231, + "grad_norm": 0.3498393151031275, + "learning_rate": 6.214372213938056e-06, + "loss": 0.0344, "step": 2161 }, { - "epoch": 0.9602487230735065, - "grad_norm": 0.545039186094067, - "learning_rate": 9.414252489717168e-06, - "loss": 0.0594, + "epoch": 1.9200710479573713, + "grad_norm": 0.33989027423396234, + "learning_rate": 6.210612256304262e-06, + "loss": 0.0378, "step": 2162 }, { - "epoch": 0.960692871419054, - "grad_norm": 0.7438799552703795, - "learning_rate": 9.413341716188934e-06, - "loss": 0.0653, + "epoch": 1.9209591474245116, + "grad_norm": 0.36163423760475416, + "learning_rate": 6.206851571306832e-06, + "loss": 0.0463, "step": 2163 }, { - "epoch": 0.9611370197646014, - "grad_norm": 0.5419774566213018, - "learning_rate": 9.412430279260473e-06, - "loss": 0.0452, + "epoch": 1.9218472468916519, + "grad_norm": 0.656676665341038, + "learning_rate": 6.2030901612052794e-06, + "loss": 0.0504, "step": 2164 }, { - "epoch": 0.9615811681101488, - "grad_norm": 0.5417611334038369, - "learning_rate": 9.411518179068785e-06, - "loss": 0.0695, + "epoch": 1.9227353463587922, + "grad_norm": 0.36680522804055904, + "learning_rate": 6.199328028259542e-06, + "loss": 0.0423, "step": 2165 }, { - "epoch": 0.9620253164556962, - "grad_norm": 0.556851360606536, - "learning_rate": 9.410605415750977e-06, - "loss": 0.0612, + "epoch": 1.9236234458259325, + "grad_norm": 0.38262720764696834, + "learning_rate": 6.195565174729995e-06, + "loss": 0.0399, "step": 2166 }, { - "epoch": 0.9624694648012436, - "grad_norm": 0.8789606927293152, - "learning_rate": 9.40969198944425e-06, - "loss": 0.0604, + "epoch": 1.9245115452930728, + "grad_norm": 0.37453073228605865, + "learning_rate": 6.191801602877448e-06, + "loss": 0.0385, "step": 2167 }, { - "epoch": 0.962913613146791, - "grad_norm": 0.5933413334023052, - "learning_rate": 9.40877790028591e-06, - "loss": 0.0623, + "epoch": 1.9253996447602133, + "grad_norm": 0.4683233973955515, + "learning_rate": 6.188037314963142e-06, + "loss": 0.0475, "step": 2168 }, { - "epoch": 0.9633577614923384, - "grad_norm": 0.5946398594923591, - "learning_rate": 9.407863148413361e-06, - "loss": 0.0419, + "epoch": 1.9262877442273534, + "grad_norm": 0.4439790066925214, + "learning_rate": 6.184272313248747e-06, + "loss": 0.0458, "step": 2169 }, { - "epoch": 0.9638019098378858, - "grad_norm": 0.48221233179240436, - "learning_rate": 9.406947733964103e-06, - "loss": 0.0546, + "epoch": 1.927175843694494, + "grad_norm": 0.37081278036155074, + "learning_rate": 6.1805065999963634e-06, + "loss": 0.0386, "step": 2170 }, { - "epoch": 0.9642460581834332, - "grad_norm": 0.6525276948819981, - "learning_rate": 9.40603165707574e-06, - "loss": 0.0526, + "epoch": 1.928063943161634, + "grad_norm": 0.47575224522252724, + "learning_rate": 6.176740177468515e-06, + "loss": 0.0443, "step": 2171 }, { - "epoch": 0.9646902065289806, - "grad_norm": 0.7205786693472772, - "learning_rate": 9.405114917885973e-06, - "loss": 0.0684, + "epoch": 1.9289520426287745, + "grad_norm": 0.40954337004055674, + "learning_rate": 6.172973047928159e-06, + "loss": 0.0436, "step": 2172 }, { - "epoch": 0.9651343548745281, - "grad_norm": 0.5044988796095775, - "learning_rate": 9.404197516532605e-06, - "loss": 0.0478, + "epoch": 1.9298401420959146, + "grad_norm": 0.3795071993422605, + "learning_rate": 6.169205213638671e-06, + "loss": 0.0392, "step": 2173 }, { - "epoch": 0.9655785032200755, - "grad_norm": 0.44346462918671525, - "learning_rate": 9.403279453153536e-06, - "loss": 0.0446, + "epoch": 1.9307282415630551, + "grad_norm": 0.5057545520526439, + "learning_rate": 6.165436676863853e-06, + "loss": 0.0528, "step": 2174 }, { - "epoch": 0.966022651565623, - "grad_norm": 0.5173823021640699, - "learning_rate": 9.402360727886766e-06, - "loss": 0.0623, + "epoch": 1.9316163410301954, + "grad_norm": 0.3815045784778289, + "learning_rate": 6.1616674398679275e-06, + "loss": 0.0418, "step": 2175 }, { - "epoch": 0.9664667999111703, - "grad_norm": 0.5687155650294274, - "learning_rate": 9.401441340870397e-06, - "loss": 0.0611, + "epoch": 1.9325044404973357, + "grad_norm": 0.4536496129499768, + "learning_rate": 6.1578975049155395e-06, + "loss": 0.0566, "step": 2176 }, { - "epoch": 0.9669109482567178, - "grad_norm": 0.6467987726672418, - "learning_rate": 9.400521292242626e-06, - "loss": 0.0529, + "epoch": 1.933392539964476, + "grad_norm": 0.43901000822965575, + "learning_rate": 6.154126874271753e-06, + "loss": 0.0591, "step": 2177 }, { - "epoch": 0.9673550966022652, - "grad_norm": 0.5130913991837206, - "learning_rate": 9.399600582141752e-06, - "loss": 0.0599, + "epoch": 1.9342806394316163, + "grad_norm": 0.35441921879257254, + "learning_rate": 6.150355550202048e-06, + "loss": 0.039, "step": 2178 }, { - "epoch": 0.9677992449478126, - "grad_norm": 0.6670998717556929, - "learning_rate": 9.398679210706176e-06, - "loss": 0.0682, + "epoch": 1.9351687388987566, + "grad_norm": 0.34742959296044007, + "learning_rate": 6.146583534972322e-06, + "loss": 0.0336, "step": 2179 }, { - "epoch": 0.96824339329336, - "grad_norm": 0.7694218770583813, - "learning_rate": 9.397757178074392e-06, - "loss": 0.0686, + "epoch": 1.936056838365897, + "grad_norm": 0.3082263743464388, + "learning_rate": 6.1428108308488934e-06, + "loss": 0.0407, "step": 2180 }, { - "epoch": 0.9686875416389074, - "grad_norm": 0.7755428807155127, - "learning_rate": 9.396834484385e-06, - "loss": 0.0574, + "epoch": 1.9369449378330375, + "grad_norm": 0.39803484202659967, + "learning_rate": 6.139037440098482e-06, + "loss": 0.0518, "step": 2181 }, { - "epoch": 0.9691316899844548, - "grad_norm": 0.44478529742795087, - "learning_rate": 9.395911129776699e-06, - "loss": 0.0429, + "epoch": 1.9378330373001775, + "grad_norm": 0.45351142859117755, + "learning_rate": 6.135263364988233e-06, + "loss": 0.0432, "step": 2182 }, { - "epoch": 0.9695758383300023, - "grad_norm": 0.7140201406983687, - "learning_rate": 9.394987114388278e-06, - "loss": 0.0711, + "epoch": 1.938721136767318, + "grad_norm": 0.46724683801450034, + "learning_rate": 6.1314886077856946e-06, + "loss": 0.0605, "step": 2183 }, { - "epoch": 0.9700199866755497, - "grad_norm": 0.8530149060800487, - "learning_rate": 9.394062438358637e-06, - "loss": 0.0529, + "epoch": 1.9396092362344581, + "grad_norm": 0.38730485735302783, + "learning_rate": 6.12771317075883e-06, + "loss": 0.0416, "step": 2184 }, { - "epoch": 0.9704641350210971, - "grad_norm": 0.6524072835443322, - "learning_rate": 9.39313710182677e-06, - "loss": 0.0624, + "epoch": 1.9404973357015987, + "grad_norm": 0.42992171692362197, + "learning_rate": 6.123937056176005e-06, + "loss": 0.0389, "step": 2185 }, { - "epoch": 0.9709082833666445, - "grad_norm": 0.4290712533966452, - "learning_rate": 9.39221110493177e-06, - "loss": 0.041, + "epoch": 1.9413854351687387, + "grad_norm": 0.46672428369803093, + "learning_rate": 6.120160266305999e-06, + "loss": 0.0512, "step": 2186 }, { - "epoch": 0.9713524317121919, - "grad_norm": 0.5324670553025179, - "learning_rate": 9.39128444781283e-06, - "loss": 0.0477, + "epoch": 1.9422735346358793, + "grad_norm": 0.4706018893641143, + "learning_rate": 6.116382803417993e-06, + "loss": 0.0444, "step": 2187 }, { - "epoch": 0.9717965800577393, - "grad_norm": 0.5901470773440277, - "learning_rate": 9.390357130609243e-06, - "loss": 0.0568, + "epoch": 1.9431616341030196, + "grad_norm": 0.31767870316165925, + "learning_rate": 6.112604669781572e-06, + "loss": 0.0371, "step": 2188 }, { - "epoch": 0.9722407284032867, - "grad_norm": 0.7720115526671916, - "learning_rate": 9.3894291534604e-06, - "loss": 0.0559, + "epoch": 1.9440497335701599, + "grad_norm": 0.3714468432315274, + "learning_rate": 6.108825867666729e-06, + "loss": 0.0491, "step": 2189 }, { - "epoch": 0.9726848767488341, - "grad_norm": 0.6618879002055548, - "learning_rate": 9.38850051650579e-06, - "loss": 0.0789, + "epoch": 1.9449378330373002, + "grad_norm": 0.39023993760606873, + "learning_rate": 6.1050463993438525e-06, + "loss": 0.0566, "step": 2190 }, { - "epoch": 0.9731290250943815, - "grad_norm": 0.6242983609779144, - "learning_rate": 9.387571219885008e-06, - "loss": 0.0709, + "epoch": 1.9458259325044405, + "grad_norm": 0.3731005928135524, + "learning_rate": 6.101266267083732e-06, + "loss": 0.0494, "step": 2191 }, { - "epoch": 0.9735731734399289, - "grad_norm": 0.5488182994392276, - "learning_rate": 9.386641263737736e-06, - "loss": 0.0483, + "epoch": 1.9467140319715808, + "grad_norm": 0.3415655238659524, + "learning_rate": 6.0974854731575615e-06, + "loss": 0.037, "step": 2192 }, { - "epoch": 0.9740173217854764, - "grad_norm": 0.5678755050400883, - "learning_rate": 9.38571064820377e-06, - "loss": 0.0639, + "epoch": 1.947602131438721, + "grad_norm": 0.334588217445017, + "learning_rate": 6.093704019836927e-06, + "loss": 0.0444, "step": 2193 }, { - "epoch": 0.9744614701310238, - "grad_norm": 0.7972177120971958, - "learning_rate": 9.384779373422992e-06, - "loss": 0.0688, + "epoch": 1.9484902309058616, + "grad_norm": 0.42727978732015626, + "learning_rate": 6.089921909393812e-06, + "loss": 0.0448, "step": 2194 }, { - "epoch": 0.9749056184765712, - "grad_norm": 0.4181144019615441, - "learning_rate": 9.38384743953539e-06, - "loss": 0.0434, + "epoch": 1.9493783303730017, + "grad_norm": 0.405971400722109, + "learning_rate": 6.086139144100596e-06, + "loss": 0.0445, "step": 2195 }, { - "epoch": 0.9753497668221186, - "grad_norm": 1.0045462377103442, - "learning_rate": 9.382914846681049e-06, - "loss": 0.0626, + "epoch": 1.9502664298401422, + "grad_norm": 0.46345149540794633, + "learning_rate": 6.082355726230052e-06, + "loss": 0.0471, "step": 2196 }, { - "epoch": 0.975793915167666, - "grad_norm": 0.5663797823359984, - "learning_rate": 9.381981595000153e-06, - "loss": 0.0536, + "epoch": 1.9511545293072823, + "grad_norm": 0.3588792619562609, + "learning_rate": 6.078571658055343e-06, + "loss": 0.0473, "step": 2197 }, { - "epoch": 0.9762380635132134, - "grad_norm": 0.5230033029520286, - "learning_rate": 9.381047684632986e-06, - "loss": 0.0431, + "epoch": 1.9520426287744228, + "grad_norm": 0.3619948040344296, + "learning_rate": 6.074786941850022e-06, + "loss": 0.0443, "step": 2198 }, { - "epoch": 0.9766822118587608, - "grad_norm": 0.5941306490458375, - "learning_rate": 9.380113115719933e-06, - "loss": 0.0514, + "epoch": 1.952930728241563, + "grad_norm": 0.46003516648541753, + "learning_rate": 6.0710015798880385e-06, + "loss": 0.0505, "step": 2199 }, { - "epoch": 0.9771263602043082, - "grad_norm": 0.7074167144721983, - "learning_rate": 9.379177888401473e-06, - "loss": 0.0516, + "epoch": 1.9538188277087034, + "grad_norm": 0.4616610153615785, + "learning_rate": 6.067215574443721e-06, + "loss": 0.042, "step": 2200 }, { - "epoch": 0.9775705085498556, - "grad_norm": 0.8936942623677783, - "learning_rate": 9.378242002818186e-06, - "loss": 0.0634, + "epoch": 1.9547069271758437, + "grad_norm": 0.3263572711987352, + "learning_rate": 6.06342892779179e-06, + "loss": 0.0425, "step": 2201 }, { - "epoch": 0.978014656895403, - "grad_norm": 0.8015956656924698, - "learning_rate": 9.377305459110754e-06, - "loss": 0.0701, + "epoch": 1.955595026642984, + "grad_norm": 0.48978190553900625, + "learning_rate": 6.059641642207349e-06, + "loss": 0.0469, "step": 2202 }, { - "epoch": 0.9784588052409505, - "grad_norm": 0.5888281751564788, - "learning_rate": 9.376368257419955e-06, - "loss": 0.0735, + "epoch": 1.9564831261101243, + "grad_norm": 0.36095080394233947, + "learning_rate": 6.055853719965888e-06, + "loss": 0.0468, "step": 2203 }, { - "epoch": 0.9789029535864979, - "grad_norm": 0.6871960760484549, - "learning_rate": 9.375430397886661e-06, - "loss": 0.0528, + "epoch": 1.9573712255772646, + "grad_norm": 0.4383836524400736, + "learning_rate": 6.052065163343274e-06, + "loss": 0.0471, "step": 2204 }, { - "epoch": 0.9793471019320453, - "grad_norm": 0.5915896411171675, - "learning_rate": 9.374491880651856e-06, - "loss": 0.0577, + "epoch": 1.958259325044405, + "grad_norm": 0.3432346045341955, + "learning_rate": 6.048275974615763e-06, + "loss": 0.0445, "step": 2205 }, { - "epoch": 0.9797912502775927, - "grad_norm": 0.6268683607318559, - "learning_rate": 9.373552705856612e-06, - "loss": 0.0511, + "epoch": 1.9591474245115452, + "grad_norm": 0.4474572470530067, + "learning_rate": 6.044486156059982e-06, + "loss": 0.0483, "step": 2206 }, { - "epoch": 0.9802353986231401, - "grad_norm": 0.5959527967947893, - "learning_rate": 9.372612873642101e-06, - "loss": 0.0577, + "epoch": 1.9600355239786857, + "grad_norm": 0.445184440401943, + "learning_rate": 6.040695709952944e-06, + "loss": 0.0427, "step": 2207 }, { - "epoch": 0.9806795469686875, - "grad_norm": 0.6376605781281672, - "learning_rate": 9.3716723841496e-06, - "loss": 0.056, + "epoch": 1.9609236234458258, + "grad_norm": 0.3769395422201313, + "learning_rate": 6.036904638572035e-06, + "loss": 0.0512, "step": 2208 }, { - "epoch": 0.9811236953142349, - "grad_norm": 0.6655725615122635, - "learning_rate": 9.370731237520476e-06, - "loss": 0.0495, + "epoch": 1.9618117229129663, + "grad_norm": 0.42956204509434803, + "learning_rate": 6.033112944195021e-06, + "loss": 0.0422, "step": 2209 }, { - "epoch": 0.9815678436597823, - "grad_norm": 0.6178758449973094, - "learning_rate": 9.369789433896201e-06, - "loss": 0.0785, + "epoch": 1.9626998223801064, + "grad_norm": 0.37218715861148377, + "learning_rate": 6.029320629100034e-06, + "loss": 0.0467, "step": 2210 }, { - "epoch": 0.9820119920053297, - "grad_norm": 0.6203954478751535, - "learning_rate": 9.368846973418347e-06, - "loss": 0.0541, + "epoch": 1.963587921847247, + "grad_norm": 0.34080116710205716, + "learning_rate": 6.0255276955655854e-06, + "loss": 0.0376, "step": 2211 }, { - "epoch": 0.9824561403508771, - "grad_norm": 0.5844524216400101, - "learning_rate": 9.367903856228575e-06, - "loss": 0.0484, + "epoch": 1.9644760213143873, + "grad_norm": 0.3214402294267105, + "learning_rate": 6.021734145870558e-06, + "loss": 0.0365, "step": 2212 }, { - "epoch": 0.9829002886964247, - "grad_norm": 1.3376432627550732, - "learning_rate": 9.366960082468658e-06, - "loss": 0.0523, + "epoch": 1.9653641207815276, + "grad_norm": 0.3525755964550656, + "learning_rate": 6.017939982294203e-06, + "loss": 0.0423, "step": 2213 }, { - "epoch": 0.983344437041972, - "grad_norm": 0.7650415999284177, - "learning_rate": 9.36601565228046e-06, - "loss": 0.0758, + "epoch": 1.9662522202486679, + "grad_norm": 0.3905564210946439, + "learning_rate": 6.014145207116141e-06, + "loss": 0.0453, "step": 2214 }, { - "epoch": 0.9837885853875195, - "grad_norm": 0.6031770466123566, - "learning_rate": 9.365070565805941e-06, - "loss": 0.0552, + "epoch": 1.9671403197158082, + "grad_norm": 0.4156069438990325, + "learning_rate": 6.01034982261636e-06, + "loss": 0.0492, "step": 2215 }, { - "epoch": 0.9842327337330669, - "grad_norm": 0.5317856643301976, - "learning_rate": 9.364124823187169e-06, - "loss": 0.0495, + "epoch": 1.9680284191829485, + "grad_norm": 0.4916076670695481, + "learning_rate": 6.006553831075215e-06, + "loss": 0.0531, "step": 2216 }, { - "epoch": 0.9846768820786143, - "grad_norm": 0.697207175537678, - "learning_rate": 9.363178424566302e-06, - "loss": 0.0575, + "epoch": 1.9689165186500888, + "grad_norm": 0.4292406230475399, + "learning_rate": 6.0027572347734216e-06, + "loss": 0.0449, "step": 2217 }, { - "epoch": 0.9851210304241617, - "grad_norm": 0.6901112062939687, - "learning_rate": 9.3622313700856e-06, - "loss": 0.0558, + "epoch": 1.9698046181172293, + "grad_norm": 0.4839747395829105, + "learning_rate": 5.998960035992067e-06, + "loss": 0.037, "step": 2218 }, { - "epoch": 0.9855651787697091, - "grad_norm": 0.7725648574354754, - "learning_rate": 9.361283659887421e-06, - "loss": 0.0526, + "epoch": 1.9706927175843694, + "grad_norm": 0.39556897595819734, + "learning_rate": 5.995162237012592e-06, + "loss": 0.04, "step": 2219 }, { - "epoch": 0.9860093271152565, - "grad_norm": 0.4919891771117441, - "learning_rate": 9.360335294114222e-06, - "loss": 0.0517, + "epoch": 1.97158081705151, + "grad_norm": 0.36822460637932836, + "learning_rate": 5.991363840116799e-06, + "loss": 0.0469, "step": 2220 }, { - "epoch": 0.9864534754608039, - "grad_norm": 0.604042614778997, - "learning_rate": 9.359386272908561e-06, - "loss": 0.0654, + "epoch": 1.97246891651865, + "grad_norm": 0.3956747417750124, + "learning_rate": 5.98756484758686e-06, + "loss": 0.0386, "step": 2221 }, { - "epoch": 0.9868976238063514, - "grad_norm": 0.533434108631547, - "learning_rate": 9.35843659641309e-06, - "loss": 0.0587, + "epoch": 1.9733570159857905, + "grad_norm": 0.3532515499098325, + "learning_rate": 5.983765261705289e-06, + "loss": 0.0414, "step": 2222 }, { - "epoch": 0.9873417721518988, - "grad_norm": 0.627831211955756, - "learning_rate": 9.35748626477056e-06, - "loss": 0.0577, + "epoch": 1.9742451154529306, + "grad_norm": 0.5035466196336941, + "learning_rate": 5.979965084754968e-06, + "loss": 0.0419, "step": 2223 }, { - "epoch": 0.9877859204974462, - "grad_norm": 0.48609069405336236, - "learning_rate": 9.356535278123826e-06, - "loss": 0.0625, + "epoch": 1.975133214920071, + "grad_norm": 0.40702905054749317, + "learning_rate": 5.9761643190191306e-06, + "loss": 0.0466, "step": 2224 }, { - "epoch": 0.9882300688429936, - "grad_norm": 0.5441221898111536, - "learning_rate": 9.355583636615832e-06, - "loss": 0.0671, + "epoch": 1.9760213143872114, + "grad_norm": 0.44802745275927625, + "learning_rate": 5.9723629667813645e-06, + "loss": 0.0393, "step": 2225 }, { - "epoch": 0.988674217188541, - "grad_norm": 0.6653694627323021, - "learning_rate": 9.354631340389633e-06, - "loss": 0.0678, + "epoch": 1.9769094138543517, + "grad_norm": 0.35762549891057027, + "learning_rate": 5.968561030325606e-06, + "loss": 0.0487, "step": 2226 }, { - "epoch": 0.9891183655340884, - "grad_norm": 0.49687550533635927, - "learning_rate": 9.353678389588367e-06, - "loss": 0.048, + "epoch": 1.977797513321492, + "grad_norm": 0.35013910891561983, + "learning_rate": 5.964758511936149e-06, + "loss": 0.0435, "step": 2227 }, { - "epoch": 0.9895625138796358, - "grad_norm": 0.5456376905073873, - "learning_rate": 9.352724784355286e-06, - "loss": 0.0763, + "epoch": 1.9786856127886323, + "grad_norm": 0.34922191729065305, + "learning_rate": 5.960955413897635e-06, + "loss": 0.0466, "step": 2228 }, { - "epoch": 0.9900066622251832, - "grad_norm": 0.5234550844694897, - "learning_rate": 9.35177052483373e-06, - "loss": 0.0573, + "epoch": 1.9795737122557726, + "grad_norm": 0.467416272653791, + "learning_rate": 5.957151738495048e-06, + "loss": 0.0485, "step": 2229 }, { - "epoch": 0.9904508105707306, - "grad_norm": 0.3992910106511535, - "learning_rate": 9.35081561116714e-06, - "loss": 0.0422, + "epoch": 1.980461811722913, + "grad_norm": 0.5291197544552343, + "learning_rate": 5.9533474880137276e-06, + "loss": 0.0564, "step": 2230 }, { - "epoch": 0.990894958916278, - "grad_norm": 0.4091987827719479, - "learning_rate": 9.349860043499056e-06, - "loss": 0.0474, + "epoch": 1.9813499111900534, + "grad_norm": 0.43472102473020946, + "learning_rate": 5.949542664739353e-06, + "loss": 0.0516, "step": 2231 }, { - "epoch": 0.9913391072618255, - "grad_norm": 0.5342639072522524, - "learning_rate": 9.348903821973114e-06, - "loss": 0.0663, + "epoch": 1.9822380106571935, + "grad_norm": 0.3577863252018631, + "learning_rate": 5.94573727095795e-06, + "loss": 0.0542, "step": 2232 }, { - "epoch": 0.9917832556073729, - "grad_norm": 0.4467060129332395, - "learning_rate": 9.347946946733055e-06, - "loss": 0.0443, + "epoch": 1.983126110124334, + "grad_norm": 0.35555273165445755, + "learning_rate": 5.941931308955885e-06, + "loss": 0.0467, "step": 2233 }, { - "epoch": 0.9922274039529203, - "grad_norm": 0.4523315857358909, - "learning_rate": 9.346989417922712e-06, - "loss": 0.0415, + "epoch": 1.9840142095914741, + "grad_norm": 0.4280711372451535, + "learning_rate": 5.93812478101987e-06, + "loss": 0.0505, "step": 2234 }, { - "epoch": 0.9926715522984677, - "grad_norm": 0.525064951465678, - "learning_rate": 9.346031235686014e-06, - "loss": 0.0663, + "epoch": 1.9849023090586146, + "grad_norm": 0.39287928024451396, + "learning_rate": 5.9343176894369515e-06, + "loss": 0.0423, "step": 2235 }, { - "epoch": 0.9931157006440151, - "grad_norm": 0.5615837318533068, - "learning_rate": 9.345072400166999e-06, - "loss": 0.0558, + "epoch": 1.9857904085257547, + "grad_norm": 0.4262044489424622, + "learning_rate": 5.93051003649452e-06, + "loss": 0.0446, "step": 2236 }, { - "epoch": 0.9935598489895625, - "grad_norm": 0.5550096636418779, - "learning_rate": 9.34411291150979e-06, - "loss": 0.0699, + "epoch": 1.9866785079928952, + "grad_norm": 0.42635191575274267, + "learning_rate": 5.9267018244803e-06, + "loss": 0.0448, "step": 2237 }, { - "epoch": 0.9940039973351099, - "grad_norm": 0.9242044346261947, - "learning_rate": 9.343152769858616e-06, - "loss": 0.0623, + "epoch": 1.9875666074600356, + "grad_norm": 0.39101569632425154, + "learning_rate": 5.9228930556823515e-06, + "loss": 0.0449, "step": 2238 }, { - "epoch": 0.9944481456806573, - "grad_norm": 0.6358712068894604, - "learning_rate": 9.342191975357806e-06, - "loss": 0.0675, + "epoch": 1.9884547069271759, + "grad_norm": 0.4917474281311356, + "learning_rate": 5.9190837323890715e-06, + "loss": 0.0505, "step": 2239 }, { - "epoch": 0.9948922940262047, - "grad_norm": 0.5189758212655892, - "learning_rate": 9.34123052815178e-06, - "loss": 0.0446, + "epoch": 1.9893428063943162, + "grad_norm": 0.3954654184821005, + "learning_rate": 5.915273856889189e-06, + "loss": 0.0493, "step": 2240 }, { - "epoch": 0.9953364423717521, - "grad_norm": 0.4493321450297416, - "learning_rate": 9.340268428385062e-06, - "loss": 0.0416, + "epoch": 1.9902309058614565, + "grad_norm": 0.3390129303412325, + "learning_rate": 5.911463431471764e-06, + "loss": 0.0404, "step": 2241 }, { - "epoch": 0.9957805907172996, - "grad_norm": 0.4052216095743849, - "learning_rate": 9.339305676202268e-06, - "loss": 0.0524, + "epoch": 1.9911190053285968, + "grad_norm": 0.32144578820830605, + "learning_rate": 5.907652458426187e-06, + "loss": 0.0426, "step": 2242 }, { - "epoch": 0.996224739062847, - "grad_norm": 0.4257914645053657, - "learning_rate": 9.338342271748122e-06, - "loss": 0.0439, + "epoch": 1.992007104795737, + "grad_norm": 0.43165150207886394, + "learning_rate": 5.903840940042179e-06, + "loss": 0.0359, "step": 2243 }, { - "epoch": 0.9966688874083944, - "grad_norm": 0.5443368549896095, - "learning_rate": 9.337378215167436e-06, - "loss": 0.0494, + "epoch": 1.9928952042628776, + "grad_norm": 0.4641684247626532, + "learning_rate": 5.9000288786097885e-06, + "loss": 0.0528, "step": 2244 }, { - "epoch": 0.9971130357539418, - "grad_norm": 0.48744281644400594, - "learning_rate": 9.336413506605123e-06, - "loss": 0.0692, + "epoch": 1.9937833037300177, + "grad_norm": 0.41190342167954275, + "learning_rate": 5.896216276419386e-06, + "loss": 0.0486, "step": 2245 }, { - "epoch": 0.9975571840994892, - "grad_norm": 0.730043582195284, - "learning_rate": 9.335448146206201e-06, - "loss": 0.0613, + "epoch": 1.9946714031971582, + "grad_norm": 0.33937679320158426, + "learning_rate": 5.8924031357616755e-06, + "loss": 0.0381, "step": 2246 }, { - "epoch": 0.9980013324450366, - "grad_norm": 0.6666928317513323, - "learning_rate": 9.334482134115774e-06, - "loss": 0.0626, + "epoch": 1.9955595026642983, + "grad_norm": 0.3670267910671915, + "learning_rate": 5.888589458927677e-06, + "loss": 0.0422, "step": 2247 }, { - "epoch": 0.998445480790584, - "grad_norm": 1.1582138795192063, - "learning_rate": 9.333515470479052e-06, - "loss": 0.0706, + "epoch": 1.9964476021314388, + "grad_norm": 0.3431648871652662, + "learning_rate": 5.884775248208733e-06, + "loss": 0.0378, "step": 2248 }, { - "epoch": 0.9988896291361314, - "grad_norm": 0.41845565011499725, - "learning_rate": 9.332548155441341e-06, - "loss": 0.0428, + "epoch": 1.997335701598579, + "grad_norm": 0.6145972650758212, + "learning_rate": 5.880960505896513e-06, + "loss": 0.0664, "step": 2249 }, { - "epoch": 0.9993337774816788, - "grad_norm": 0.49853102857677845, - "learning_rate": 9.331580189148047e-06, - "loss": 0.0498, + "epoch": 1.9982238010657194, + "grad_norm": 0.3733321388555608, + "learning_rate": 5.8771452342829975e-06, + "loss": 0.0439, "step": 2250 }, { - "epoch": 0.9997779258272262, - "grad_norm": 0.469460122533607, - "learning_rate": 9.330611571744668e-06, - "loss": 0.0584, + "epoch": 1.9991119005328597, + "grad_norm": 0.4197717737729959, + "learning_rate": 5.873329435660492e-06, + "loss": 0.0422, "step": 2251 }, { - "epoch": 0.9997779258272262, - "eval_loss": 0.06305240094661713, - "eval_runtime": 420.7122, - "eval_samples_per_second": 36.048, - "eval_steps_per_second": 1.127, - "step": 2251 + "epoch": 2.0, + "grad_norm": 0.5081836328682018, + "learning_rate": 5.8695131123216155e-06, + "loss": 0.0504, + "step": 2252 }, { - "epoch": 1.0002220741727736, - "grad_norm": 0.8383200378207877, - "learning_rate": 9.329642303376806e-06, - "loss": 0.0866, + "epoch": 2.0, + "eval_loss": 0.04935871437191963, + "eval_runtime": 78.0736, + "eval_samples_per_second": 194.252, + "eval_steps_per_second": 3.036, "step": 2252 }, { - "epoch": 1.0006662225183212, - "grad_norm": 0.4162547630354714, - "learning_rate": 9.328672384190158e-06, - "loss": 0.044, + "epoch": 2.0008880994671405, + "grad_norm": 0.7952094344327605, + "learning_rate": 5.865696266559303e-06, + "loss": 0.0427, "step": 2253 }, { - "epoch": 1.0011103708638684, - "grad_norm": 0.5811219466400265, - "learning_rate": 9.327701814330521e-06, - "loss": 0.0489, + "epoch": 2.0017761989342806, + "grad_norm": 0.4823228734323676, + "learning_rate": 5.861878900666801e-06, + "loss": 0.0488, "step": 2254 }, { - "epoch": 1.001554519209416, - "grad_norm": 0.4943074740960733, - "learning_rate": 9.326730593943784e-06, - "loss": 0.0532, + "epoch": 2.002664298401421, + "grad_norm": 0.3501005673920316, + "learning_rate": 5.858061016937673e-06, + "loss": 0.048, "step": 2255 }, { - "epoch": 1.0019986675549633, - "grad_norm": 0.5854940362247197, - "learning_rate": 9.325758723175942e-06, - "loss": 0.0447, + "epoch": 2.003552397868561, + "grad_norm": 0.4824957705288789, + "learning_rate": 5.85424261766579e-06, + "loss": 0.0533, "step": 2256 }, { - "epoch": 1.0024428159005108, - "grad_norm": 0.6734583854523404, - "learning_rate": 9.324786202173082e-06, - "loss": 0.0565, + "epoch": 2.0044404973357017, + "grad_norm": 0.3562463152160258, + "learning_rate": 5.850423705145334e-06, + "loss": 0.0365, "step": 2257 }, { - "epoch": 1.0028869642460583, - "grad_norm": 0.626997176192022, - "learning_rate": 9.32381303108139e-06, - "loss": 0.0554, + "epoch": 2.005328596802842, + "grad_norm": 0.4361043063883756, + "learning_rate": 5.846604281670795e-06, + "loss": 0.0397, "step": 2258 }, { - "epoch": 1.0033311125916056, - "grad_norm": 0.7899011567544635, - "learning_rate": 9.322839210047152e-06, - "loss": 0.0541, + "epoch": 2.0062166962699823, + "grad_norm": 0.4735692130528875, + "learning_rate": 5.8427843495369715e-06, + "loss": 0.0603, "step": 2259 }, { - "epoch": 1.003775260937153, - "grad_norm": 0.44777931003199795, - "learning_rate": 9.321864739216747e-06, - "loss": 0.0475, + "epoch": 2.0071047957371224, + "grad_norm": 0.3910962549697052, + "learning_rate": 5.838963911038965e-06, + "loss": 0.0343, "step": 2260 }, { - "epoch": 1.0042194092827004, - "grad_norm": 0.7176524264984108, - "learning_rate": 9.320889618736657e-06, - "loss": 0.0657, + "epoch": 2.007992895204263, + "grad_norm": 0.4929446695376269, + "learning_rate": 5.835142968472181e-06, + "loss": 0.0426, "step": 2261 }, { - "epoch": 1.0046635576282479, - "grad_norm": 0.6297899790013282, - "learning_rate": 9.319913848753457e-06, - "loss": 0.0548, + "epoch": 2.008880994671403, + "grad_norm": 0.40181116823087554, + "learning_rate": 5.831321524132336e-06, + "loss": 0.0388, "step": 2262 }, { - "epoch": 1.0051077059737952, - "grad_norm": 0.6829050036731806, - "learning_rate": 9.318937429413823e-06, - "loss": 0.0677, + "epoch": 2.0097690941385435, + "grad_norm": 0.44171569498479984, + "learning_rate": 5.827499580315435e-06, + "loss": 0.0418, "step": 2263 }, { - "epoch": 1.0055518543193427, - "grad_norm": 0.6099066772979853, - "learning_rate": 9.31796036086453e-06, - "loss": 0.0674, + "epoch": 2.0106571936056836, + "grad_norm": 0.5181303751598587, + "learning_rate": 5.8236771393177915e-06, + "loss": 0.0546, "step": 2264 }, { - "epoch": 1.00599600266489, - "grad_norm": 0.5220221509404842, - "learning_rate": 9.316982643252444e-06, - "loss": 0.0489, + "epoch": 2.011545293072824, + "grad_norm": 0.3997725294058655, + "learning_rate": 5.819854203436017e-06, + "loss": 0.0466, "step": 2265 }, { - "epoch": 1.0064401510104375, - "grad_norm": 0.6319552346310404, - "learning_rate": 9.316004276724533e-06, - "loss": 0.0566, + "epoch": 2.0124333925399647, + "grad_norm": 0.321014524920309, + "learning_rate": 5.8160307749670185e-06, + "loss": 0.0322, "step": 2266 }, { - "epoch": 1.006884299355985, - "grad_norm": 0.75308545956684, - "learning_rate": 9.315025261427864e-06, - "loss": 0.0611, + "epoch": 2.0133214920071048, + "grad_norm": 0.3503755061990151, + "learning_rate": 5.812206856207998e-06, + "loss": 0.0345, "step": 2267 }, { - "epoch": 1.0073284477015323, - "grad_norm": 0.5854190329106237, - "learning_rate": 9.314045597509598e-06, - "loss": 0.0589, + "epoch": 2.0142095914742453, + "grad_norm": 0.3934928239419917, + "learning_rate": 5.808382449456455e-06, + "loss": 0.0369, "step": 2268 }, { - "epoch": 1.0077725960470798, - "grad_norm": 0.4592027406322817, - "learning_rate": 9.313065285116997e-06, - "loss": 0.0363, + "epoch": 2.0150976909413854, + "grad_norm": 0.3357771609218698, + "learning_rate": 5.804557557010182e-06, + "loss": 0.0335, "step": 2269 }, { - "epoch": 1.008216744392627, - "grad_norm": 0.7355227136248891, - "learning_rate": 9.312084324397416e-06, - "loss": 0.0666, + "epoch": 2.015985790408526, + "grad_norm": 0.5573701881162235, + "learning_rate": 5.80073218116726e-06, + "loss": 0.0422, "step": 2270 }, { - "epoch": 1.0086608927381746, - "grad_norm": 0.6811364248406817, - "learning_rate": 9.311102715498312e-06, - "loss": 0.0664, + "epoch": 2.016873889875666, + "grad_norm": 0.39087568754014085, + "learning_rate": 5.796906324226064e-06, + "loss": 0.0376, "step": 2271 }, { - "epoch": 1.009105041083722, - "grad_norm": 0.4859582181876178, - "learning_rate": 9.310120458567238e-06, - "loss": 0.0455, + "epoch": 2.0177619893428065, + "grad_norm": 0.4522705113909331, + "learning_rate": 5.793079988485257e-06, + "loss": 0.0407, "step": 2272 }, { - "epoch": 1.0095491894292694, - "grad_norm": 0.6126182761710498, - "learning_rate": 9.309137553751843e-06, - "loss": 0.0568, + "epoch": 2.0186500888099466, + "grad_norm": 0.5408591361233624, + "learning_rate": 5.789253176243787e-06, + "loss": 0.0346, "step": 2273 }, { - "epoch": 1.0099933377748167, - "grad_norm": 0.6903438432741064, - "learning_rate": 9.308154001199874e-06, - "loss": 0.0546, + "epoch": 2.019538188277087, + "grad_norm": 0.43540521811593075, + "learning_rate": 5.785425889800894e-06, + "loss": 0.036, "step": 2274 }, { - "epoch": 1.0104374861203642, - "grad_norm": 0.5254671203092518, - "learning_rate": 9.307169801059175e-06, - "loss": 0.0518, + "epoch": 2.020426287744227, + "grad_norm": 0.34588690097994523, + "learning_rate": 5.781598131456097e-06, + "loss": 0.0347, "step": 2275 }, { - "epoch": 1.0108816344659115, - "grad_norm": 0.502322384644144, - "learning_rate": 9.30618495347769e-06, - "loss": 0.0528, + "epoch": 2.0213143872113677, + "grad_norm": 0.39823378357341, + "learning_rate": 5.777769903509201e-06, + "loss": 0.0397, "step": 2276 }, { - "epoch": 1.011325782811459, - "grad_norm": 0.4570165148396037, - "learning_rate": 9.305199458603456e-06, - "loss": 0.0423, + "epoch": 2.022202486678508, + "grad_norm": 0.3526815627280943, + "learning_rate": 5.773941208260295e-06, + "loss": 0.0382, "step": 2277 }, { - "epoch": 1.0117699311570065, - "grad_norm": 0.6475436488464652, - "learning_rate": 9.304213316584612e-06, - "loss": 0.0539, + "epoch": 2.0230905861456483, + "grad_norm": 0.34085681644037663, + "learning_rate": 5.770112048009747e-06, + "loss": 0.0355, "step": 2278 }, { - "epoch": 1.0122140795025538, - "grad_norm": 0.5830527566752789, - "learning_rate": 9.30322652756939e-06, - "loss": 0.0533, + "epoch": 2.023978685612789, + "grad_norm": 0.5025947220291987, + "learning_rate": 5.7662824250582024e-06, + "loss": 0.0437, "step": 2279 }, { - "epoch": 1.0126582278481013, - "grad_norm": 0.5218861392382439, - "learning_rate": 9.302239091706121e-06, - "loss": 0.0478, + "epoch": 2.024866785079929, + "grad_norm": 0.3393835832024569, + "learning_rate": 5.762452341706588e-06, + "loss": 0.0369, "step": 2280 }, { - "epoch": 1.0131023761936486, - "grad_norm": 0.7938697599205536, - "learning_rate": 9.301251009143236e-06, - "loss": 0.0913, + "epoch": 2.0257548845470694, + "grad_norm": 0.3667445086990257, + "learning_rate": 5.758621800256104e-06, + "loss": 0.0326, "step": 2281 }, { - "epoch": 1.0135465245391961, - "grad_norm": 0.5550504645736817, - "learning_rate": 9.300262280029257e-06, - "loss": 0.0644, + "epoch": 2.0266429840142095, + "grad_norm": 0.3703001088427121, + "learning_rate": 5.7547908030082325e-06, + "loss": 0.0393, "step": 2282 }, { - "epoch": 1.0139906728847434, - "grad_norm": 0.594954378218846, - "learning_rate": 9.29927290451281e-06, - "loss": 0.0541, + "epoch": 2.02753108348135, + "grad_norm": 0.3698404547191662, + "learning_rate": 5.750959352264717e-06, + "loss": 0.0348, "step": 2283 }, { - "epoch": 1.014434821230291, - "grad_norm": 0.9298600097346652, - "learning_rate": 9.298282882742612e-06, - "loss": 0.0518, + "epoch": 2.02841918294849, + "grad_norm": 0.43462692559074184, + "learning_rate": 5.747127450327587e-06, + "loss": 0.0401, "step": 2284 }, { - "epoch": 1.0148789695758382, - "grad_norm": 0.46811932472048295, - "learning_rate": 9.297292214867484e-06, - "loss": 0.0455, + "epoch": 2.0293072824156306, + "grad_norm": 0.6715523556953165, + "learning_rate": 5.743295099499135e-06, + "loss": 0.0529, "step": 2285 }, { - "epoch": 1.0153231179213857, - "grad_norm": 0.5871427629727887, - "learning_rate": 9.296300901036337e-06, - "loss": 0.0572, + "epoch": 2.0301953818827707, + "grad_norm": 0.43120066175573857, + "learning_rate": 5.739462302081927e-06, + "loss": 0.0457, "step": 2286 }, { - "epoch": 1.0157672662669333, - "grad_norm": 0.6945899846686857, - "learning_rate": 9.295308941398183e-06, - "loss": 0.061, + "epoch": 2.0310834813499112, + "grad_norm": 0.35974781406853623, + "learning_rate": 5.735629060378794e-06, + "loss": 0.0374, "step": 2287 }, { - "epoch": 1.0162114146124805, - "grad_norm": 0.640279460698154, - "learning_rate": 9.294316336102132e-06, - "loss": 0.0599, + "epoch": 2.0319715808170513, + "grad_norm": 0.41685416813432485, + "learning_rate": 5.731795376692836e-06, + "loss": 0.0416, "step": 2288 }, { - "epoch": 1.016655562958028, - "grad_norm": 0.31136909660088496, - "learning_rate": 9.293323085297386e-06, - "loss": 0.026, + "epoch": 2.032859680284192, + "grad_norm": 0.467495635455071, + "learning_rate": 5.7279612533274155e-06, + "loss": 0.0431, "step": 2289 }, { - "epoch": 1.0170997113035753, - "grad_norm": 0.4114152449104369, - "learning_rate": 9.29232918913325e-06, - "loss": 0.0379, + "epoch": 2.0337477797513324, + "grad_norm": 0.3463393769904308, + "learning_rate": 5.724126692586167e-06, + "loss": 0.0347, "step": 2290 }, { - "epoch": 1.0175438596491229, - "grad_norm": 0.41732022602175256, - "learning_rate": 9.291334647759122e-06, - "loss": 0.0422, + "epoch": 2.0346358792184724, + "grad_norm": 0.3883881517285845, + "learning_rate": 5.720291696772979e-06, + "loss": 0.0465, "step": 2291 }, { - "epoch": 1.0179880079946702, - "grad_norm": 0.762539369842014, - "learning_rate": 9.2903394613245e-06, - "loss": 0.0597, + "epoch": 2.035523978685613, + "grad_norm": 0.3759559738117336, + "learning_rate": 5.716456268192007e-06, + "loss": 0.0314, "step": 2292 }, { - "epoch": 1.0184321563402177, - "grad_norm": 0.9415453759336739, - "learning_rate": 9.289343629978978e-06, - "loss": 0.0739, + "epoch": 2.036412078152753, + "grad_norm": 0.389950661760715, + "learning_rate": 5.712620409147663e-06, + "loss": 0.0393, "step": 2293 }, { - "epoch": 1.018876304685765, - "grad_norm": 0.5123579743591409, - "learning_rate": 9.288347153872245e-06, - "loss": 0.0571, + "epoch": 2.0373001776198936, + "grad_norm": 0.3887518217522792, + "learning_rate": 5.70878412194462e-06, + "loss": 0.0357, "step": 2294 }, { - "epoch": 1.0193204530313125, - "grad_norm": 0.458862217726016, - "learning_rate": 9.287350033154088e-06, - "loss": 0.0468, + "epoch": 2.0381882770870337, + "grad_norm": 0.45164570387220965, + "learning_rate": 5.704947408887807e-06, + "loss": 0.0375, "step": 2295 }, { - "epoch": 1.01976460137686, - "grad_norm": 1.3873099747217557, - "learning_rate": 9.28635226797439e-06, - "loss": 0.0745, + "epoch": 2.039076376554174, + "grad_norm": 0.39126726554737956, + "learning_rate": 5.70111027228241e-06, + "loss": 0.043, "step": 2296 }, { - "epoch": 1.0202087497224073, - "grad_norm": 0.5997981500021724, - "learning_rate": 9.285353858483138e-06, - "loss": 0.0506, + "epoch": 2.0399644760213143, + "grad_norm": 0.44466939780687276, + "learning_rate": 5.697272714433867e-06, + "loss": 0.0378, "step": 2297 }, { - "epoch": 1.0206528980679548, - "grad_norm": 0.6396330710132598, - "learning_rate": 9.284354804830403e-06, - "loss": 0.0518, + "epoch": 2.040852575488455, + "grad_norm": 0.36494223514375723, + "learning_rate": 5.693434737647872e-06, + "loss": 0.0399, "step": 2298 }, { - "epoch": 1.021097046413502, - "grad_norm": 0.8119329100233045, - "learning_rate": 9.283355107166361e-06, - "loss": 0.088, + "epoch": 2.041740674955595, + "grad_norm": 0.3791244667049865, + "learning_rate": 5.68959634423037e-06, + "loss": 0.0385, "step": 2299 }, { - "epoch": 1.0215411947590496, - "grad_norm": 0.6001947150070646, - "learning_rate": 9.282354765641286e-06, - "loss": 0.0503, + "epoch": 2.0426287744227354, + "grad_norm": 0.3680804033230417, + "learning_rate": 5.685757536487557e-06, + "loss": 0.0333, "step": 2300 }, { - "epoch": 1.0219853431045969, - "grad_norm": 0.5257172181736586, - "learning_rate": 9.281353780405546e-06, - "loss": 0.0632, + "epoch": 2.0435168738898755, + "grad_norm": 0.40232797194085973, + "learning_rate": 5.681918316725874e-06, + "loss": 0.0391, "step": 2301 }, { - "epoch": 1.0224294914501444, - "grad_norm": 0.4904406999367498, - "learning_rate": 9.280352151609604e-06, - "loss": 0.0412, + "epoch": 2.044404973357016, + "grad_norm": 0.5488807941421024, + "learning_rate": 5.678078687252014e-06, + "loss": 0.041, "step": 2302 }, { - "epoch": 1.0228736397956917, - "grad_norm": 0.6023825108536248, - "learning_rate": 9.279349879404024e-06, - "loss": 0.0501, + "epoch": 2.0452930728241565, + "grad_norm": 0.3533105248855852, + "learning_rate": 5.674238650372913e-06, + "loss": 0.0462, "step": 2303 }, { - "epoch": 1.0233177881412392, - "grad_norm": 0.5713607373831051, - "learning_rate": 9.278346963939464e-06, - "loss": 0.0648, + "epoch": 2.0461811722912966, + "grad_norm": 0.36624694854114026, + "learning_rate": 5.670398208395755e-06, + "loss": 0.0327, "step": 2304 }, { - "epoch": 1.0237619364867865, - "grad_norm": 0.5117889653709818, - "learning_rate": 9.27734340536668e-06, - "loss": 0.0595, + "epoch": 2.047069271758437, + "grad_norm": 0.44984451832680755, + "learning_rate": 5.6665573636279645e-06, + "loss": 0.0481, "step": 2305 }, { - "epoch": 1.024206084832334, - "grad_norm": 0.5957895920503053, - "learning_rate": 9.27633920383652e-06, - "loss": 0.0588, + "epoch": 2.047957371225577, + "grad_norm": 0.4424528698739734, + "learning_rate": 5.662716118377212e-06, + "loss": 0.0313, "step": 2306 }, { - "epoch": 1.0246502331778815, - "grad_norm": 0.6119278145978992, - "learning_rate": 9.275334359499936e-06, - "loss": 0.0615, + "epoch": 2.0488454706927177, + "grad_norm": 0.6895484026383346, + "learning_rate": 5.6588744749514015e-06, + "loss": 0.0469, "step": 2307 }, { - "epoch": 1.0250943815234288, - "grad_norm": 0.5747304102651929, - "learning_rate": 9.274328872507973e-06, - "loss": 0.0609, + "epoch": 2.049733570159858, + "grad_norm": 0.4513378893263943, + "learning_rate": 5.6550324356586826e-06, + "loss": 0.0394, "step": 2308 }, { - "epoch": 1.0255385298689763, - "grad_norm": 0.5890004950126706, - "learning_rate": 9.273322743011775e-06, - "loss": 0.054, + "epoch": 2.0506216696269983, + "grad_norm": 0.464556274747316, + "learning_rate": 5.651190002807442e-06, + "loss": 0.0499, "step": 2309 }, { - "epoch": 1.0259826782145236, - "grad_norm": 0.46856707487309196, - "learning_rate": 9.272315971162573e-06, - "loss": 0.0438, + "epoch": 2.0515097690941384, + "grad_norm": 0.3924616519590994, + "learning_rate": 5.647347178706301e-06, + "loss": 0.0455, "step": 2310 }, { - "epoch": 1.0264268265600711, - "grad_norm": 0.5705380710124494, - "learning_rate": 9.27130855711171e-06, - "loss": 0.0477, + "epoch": 2.052397868561279, + "grad_norm": 0.43472551535337545, + "learning_rate": 5.643503965664113e-06, + "loss": 0.0435, "step": 2311 }, { - "epoch": 1.0268709749056184, - "grad_norm": 0.7064165326935903, - "learning_rate": 9.270300501010612e-06, - "loss": 0.0568, + "epoch": 2.053285968028419, + "grad_norm": 0.33532355528532265, + "learning_rate": 5.639660365989974e-06, + "loss": 0.0398, "step": 2312 }, { - "epoch": 1.027315123251166, - "grad_norm": 0.5416989623481593, - "learning_rate": 9.26929180301081e-06, - "loss": 0.0682, + "epoch": 2.0541740674955595, + "grad_norm": 0.4111267915605023, + "learning_rate": 5.635816381993204e-06, + "loss": 0.0504, "step": 2313 }, { - "epoch": 1.0277592715967132, - "grad_norm": 0.49073246688028643, - "learning_rate": 9.268282463263928e-06, - "loss": 0.0629, + "epoch": 2.0550621669626996, + "grad_norm": 0.30689373893709365, + "learning_rate": 5.631972015983357e-06, + "loss": 0.0404, "step": 2314 }, { - "epoch": 1.0282034199422607, - "grad_norm": 0.47098174668190423, - "learning_rate": 9.267272481921686e-06, - "loss": 0.0492, + "epoch": 2.05595026642984, + "grad_norm": 0.39067768284945187, + "learning_rate": 5.628127270270216e-06, + "loss": 0.0362, "step": 2315 }, { - "epoch": 1.0286475682878082, - "grad_norm": 0.3813955943581627, - "learning_rate": 9.266261859135901e-06, - "loss": 0.0308, + "epoch": 2.0568383658969807, + "grad_norm": 0.42380897626283953, + "learning_rate": 5.624282147163794e-06, + "loss": 0.0516, "step": 2316 }, { - "epoch": 1.0290917166333555, - "grad_norm": 0.6582567910792767, - "learning_rate": 9.265250595058486e-06, - "loss": 0.0573, + "epoch": 2.0577264653641207, + "grad_norm": 0.3401662750534891, + "learning_rate": 5.620436648974327e-06, + "loss": 0.0452, "step": 2317 }, { - "epoch": 1.029535864978903, - "grad_norm": 0.5953836977171739, - "learning_rate": 9.264238689841456e-06, - "loss": 0.0631, + "epoch": 2.0586145648312613, + "grad_norm": 0.35534646363805705, + "learning_rate": 5.616590778012281e-06, + "loss": 0.0397, "step": 2318 }, { - "epoch": 1.0299800133244503, - "grad_norm": 0.6292012725822909, - "learning_rate": 9.263226143636912e-06, - "loss": 0.0691, + "epoch": 2.0595026642984013, + "grad_norm": 0.4362074819535085, + "learning_rate": 5.612744536588342e-06, + "loss": 0.0458, "step": 2319 }, { - "epoch": 1.0304241616699978, - "grad_norm": 0.47225699541890964, - "learning_rate": 9.262212956597059e-06, - "loss": 0.0563, + "epoch": 2.060390763765542, + "grad_norm": 0.4322794319184328, + "learning_rate": 5.60889792701342e-06, + "loss": 0.0409, "step": 2320 }, { - "epoch": 1.0308683100155451, - "grad_norm": 0.48390119987242786, - "learning_rate": 9.261199128874197e-06, - "loss": 0.054, + "epoch": 2.061278863232682, + "grad_norm": 0.3883459633322041, + "learning_rate": 5.605050951598648e-06, + "loss": 0.0463, "step": 2321 }, { - "epoch": 1.0313124583610926, - "grad_norm": 0.5985733647635482, - "learning_rate": 9.26018466062072e-06, - "loss": 0.0582, + "epoch": 2.0621669626998225, + "grad_norm": 0.40505188925287783, + "learning_rate": 5.601203612655375e-06, + "loss": 0.0471, "step": 2322 }, { - "epoch": 1.03175660670664, - "grad_norm": 0.5869595699898266, - "learning_rate": 9.259169551989121e-06, - "loss": 0.0562, + "epoch": 2.0630550621669625, + "grad_norm": 0.36108421805009805, + "learning_rate": 5.597355912495172e-06, + "loss": 0.0364, "step": 2323 }, { - "epoch": 1.0322007550521874, - "grad_norm": 0.5529638354373985, - "learning_rate": 9.258153803131989e-06, - "loss": 0.055, + "epoch": 2.063943161634103, + "grad_norm": 0.4234595419373533, + "learning_rate": 5.593507853429824e-06, + "loss": 0.0335, "step": 2324 }, { - "epoch": 1.0326449033977347, - "grad_norm": 0.39012786867402377, - "learning_rate": 9.257137414202006e-06, - "loss": 0.0389, + "epoch": 2.064831261101243, + "grad_norm": 0.4297831836771365, + "learning_rate": 5.589659437771336e-06, + "loss": 0.0373, "step": 2325 }, { - "epoch": 1.0330890517432822, - "grad_norm": 0.5320314663484899, - "learning_rate": 9.256120385351953e-06, - "loss": 0.0593, + "epoch": 2.0657193605683837, + "grad_norm": 0.48330484467582835, + "learning_rate": 5.5858106678319225e-06, + "loss": 0.0342, "step": 2326 }, { - "epoch": 1.0335332000888298, - "grad_norm": 0.47550720714560507, - "learning_rate": 9.255102716734709e-06, - "loss": 0.0383, + "epoch": 2.0666074600355238, + "grad_norm": 0.3886759862387088, + "learning_rate": 5.581961545924013e-06, + "loss": 0.0363, "step": 2327 }, { - "epoch": 1.033977348434377, - "grad_norm": 0.6123732206520373, - "learning_rate": 9.254084408503243e-06, - "loss": 0.0733, + "epoch": 2.0674955595026643, + "grad_norm": 0.41718976870321706, + "learning_rate": 5.578112074360247e-06, + "loss": 0.0358, "step": 2328 }, { - "epoch": 1.0344214967799246, - "grad_norm": 0.6340880346930579, - "learning_rate": 9.253065460810627e-06, - "loss": 0.0703, + "epoch": 2.068383658969805, + "grad_norm": 0.39392661421508224, + "learning_rate": 5.574262255453479e-06, + "loss": 0.0426, "step": 2329 }, { - "epoch": 1.0348656451254719, - "grad_norm": 0.4558646564237513, - "learning_rate": 9.252045873810026e-06, - "loss": 0.0389, + "epoch": 2.069271758436945, + "grad_norm": 0.42149495836553186, + "learning_rate": 5.570412091516764e-06, + "loss": 0.0395, "step": 2330 }, { - "epoch": 1.0353097934710194, - "grad_norm": 0.573613972583976, - "learning_rate": 9.251025647654698e-06, - "loss": 0.0578, + "epoch": 2.0701598579040854, + "grad_norm": 0.4237706837990429, + "learning_rate": 5.566561584863374e-06, + "loss": 0.047, "step": 2331 }, { - "epoch": 1.0357539418165667, - "grad_norm": 0.45869687127989117, - "learning_rate": 9.250004782498006e-06, - "loss": 0.0486, + "epoch": 2.0710479573712255, + "grad_norm": 0.4034668610493672, + "learning_rate": 5.5627107378067764e-06, + "loss": 0.041, "step": 2332 }, { - "epoch": 1.0361980901621142, - "grad_norm": 0.700597406034261, - "learning_rate": 9.248983278493399e-06, - "loss": 0.0486, + "epoch": 2.071936056838366, + "grad_norm": 0.36742348344596093, + "learning_rate": 5.558859552660653e-06, + "loss": 0.0316, "step": 2333 }, { - "epoch": 1.0366422385076615, - "grad_norm": 0.5940754007816903, - "learning_rate": 9.247961135794428e-06, - "loss": 0.0487, + "epoch": 2.072824156305506, + "grad_norm": 0.3587954633601638, + "learning_rate": 5.5550080317388814e-06, + "loss": 0.0371, "step": 2334 }, { - "epoch": 1.037086386853209, - "grad_norm": 0.7212210545701735, - "learning_rate": 9.246938354554737e-06, - "loss": 0.051, + "epoch": 2.0737122557726466, + "grad_norm": 0.40199268106153924, + "learning_rate": 5.5511561773555455e-06, + "loss": 0.0487, "step": 2335 }, { - "epoch": 1.0375305351987565, - "grad_norm": 0.41449723705228086, - "learning_rate": 9.245914934928068e-06, - "loss": 0.0441, + "epoch": 2.0746003552397867, + "grad_norm": 0.407905082384333, + "learning_rate": 5.547303991824926e-06, + "loss": 0.0448, "step": 2336 }, { - "epoch": 1.0379746835443038, - "grad_norm": 0.9446091827668867, - "learning_rate": 9.24489087706826e-06, - "loss": 0.0941, + "epoch": 2.075488454706927, + "grad_norm": 0.5170026832974067, + "learning_rate": 5.5434514774615055e-06, + "loss": 0.0467, "step": 2337 }, { - "epoch": 1.0384188318898513, - "grad_norm": 0.43472127189546134, - "learning_rate": 9.243866181129246e-06, - "loss": 0.0444, + "epoch": 2.0763765541740673, + "grad_norm": 0.4119277516723528, + "learning_rate": 5.539598636579963e-06, + "loss": 0.0448, "step": 2338 }, { - "epoch": 1.0388629802353986, - "grad_norm": 0.8234990673910981, - "learning_rate": 9.242840847265053e-06, - "loss": 0.0449, + "epoch": 2.077264653641208, + "grad_norm": 0.3260123660764787, + "learning_rate": 5.535745471495174e-06, + "loss": 0.0368, "step": 2339 }, { - "epoch": 1.039307128580946, - "grad_norm": 0.5137093258500119, - "learning_rate": 9.241814875629806e-06, - "loss": 0.047, + "epoch": 2.0781527531083483, + "grad_norm": 0.3414650001047392, + "learning_rate": 5.531891984522209e-06, + "loss": 0.0368, "step": 2340 }, { - "epoch": 1.0397512769264934, - "grad_norm": 0.46164431011916773, - "learning_rate": 9.24078826637773e-06, - "loss": 0.0567, + "epoch": 2.0790408525754884, + "grad_norm": 0.4056721485870448, + "learning_rate": 5.52803817797633e-06, + "loss": 0.0438, "step": 2341 }, { - "epoch": 1.040195425272041, - "grad_norm": 0.5917045565013841, - "learning_rate": 9.239761019663139e-06, - "loss": 0.0529, + "epoch": 2.079928952042629, + "grad_norm": 0.3678665436664055, + "learning_rate": 5.524184054172993e-06, + "loss": 0.0531, "step": 2342 }, { - "epoch": 1.0406395736175882, - "grad_norm": 0.49372320773080497, - "learning_rate": 9.238733135640445e-06, - "loss": 0.0503, + "epoch": 2.080817051509769, + "grad_norm": 0.3163723687826652, + "learning_rate": 5.520329615427844e-06, + "loss": 0.0341, "step": 2343 }, { - "epoch": 1.0410837219631357, - "grad_norm": 0.47687223129773526, - "learning_rate": 9.237704614464157e-06, - "loss": 0.0538, + "epoch": 2.0817051509769096, + "grad_norm": 0.4452843633721888, + "learning_rate": 5.516474864056719e-06, + "loss": 0.0508, "step": 2344 }, { - "epoch": 1.0415278703086832, - "grad_norm": 0.5267358173749689, - "learning_rate": 9.236675456288879e-06, - "loss": 0.0533, + "epoch": 2.0825932504440496, + "grad_norm": 0.5166340185471159, + "learning_rate": 5.5126198023756405e-06, + "loss": 0.0414, "step": 2345 }, { - "epoch": 1.0419720186542305, - "grad_norm": 0.5562866454702318, - "learning_rate": 9.235645661269313e-06, - "loss": 0.0617, + "epoch": 2.08348134991119, + "grad_norm": 0.3184361504906298, + "learning_rate": 5.5087644327008175e-06, + "loss": 0.0282, "step": 2346 }, { - "epoch": 1.042416166999778, - "grad_norm": 0.48761824525349823, - "learning_rate": 9.234615229560251e-06, - "loss": 0.0646, + "epoch": 2.0843694493783302, + "grad_norm": 0.31126797004876566, + "learning_rate": 5.504908757348646e-06, + "loss": 0.0336, "step": 2347 }, { - "epoch": 1.0428603153453253, - "grad_norm": 0.5413755626896984, - "learning_rate": 9.233584161316588e-06, - "loss": 0.0485, + "epoch": 2.0852575488454708, + "grad_norm": 0.4467615849229334, + "learning_rate": 5.501052778635703e-06, + "loss": 0.0433, "step": 2348 }, { - "epoch": 1.0433044636908728, - "grad_norm": 0.48592846139974233, - "learning_rate": 9.232552456693308e-06, - "loss": 0.0466, + "epoch": 2.086145648312611, + "grad_norm": 0.48231834106863347, + "learning_rate": 5.4971964988787495e-06, + "loss": 0.0505, "step": 2349 }, { - "epoch": 1.04374861203642, - "grad_norm": 0.3666523977412306, - "learning_rate": 9.231520115845495e-06, - "loss": 0.0415, + "epoch": 2.0870337477797514, + "grad_norm": 0.46344451177017526, + "learning_rate": 5.493339920394725e-06, + "loss": 0.0416, "step": 2350 }, { - "epoch": 1.0441927603819676, - "grad_norm": 0.5041685160042538, - "learning_rate": 9.23048713892833e-06, - "loss": 0.0461, + "epoch": 2.0879218472468914, + "grad_norm": 0.4546314540078828, + "learning_rate": 5.4894830455007565e-06, + "loss": 0.0434, "step": 2351 }, { - "epoch": 1.044636908727515, - "grad_norm": 0.4557622908913518, - "learning_rate": 9.229453526097085e-06, - "loss": 0.0444, + "epoch": 2.088809946714032, + "grad_norm": 0.4698531978955114, + "learning_rate": 5.4856258765141345e-06, + "loss": 0.0499, "step": 2352 }, { - "epoch": 1.0450810570730624, - "grad_norm": 0.7840502309472853, - "learning_rate": 9.228419277507126e-06, - "loss": 0.0628, + "epoch": 2.0896980461811725, + "grad_norm": 0.3739618895122828, + "learning_rate": 5.481768415752342e-06, + "loss": 0.0458, "step": 2353 }, { - "epoch": 1.0455252054186097, - "grad_norm": 0.5291018953826038, - "learning_rate": 9.227384393313924e-06, - "loss": 0.0496, + "epoch": 2.0905861456483126, + "grad_norm": 0.47055253717642037, + "learning_rate": 5.477910665533025e-06, + "loss": 0.0366, "step": 2354 }, { - "epoch": 1.0459693537641572, - "grad_norm": 0.49003402431309595, - "learning_rate": 9.226348873673036e-06, - "loss": 0.0549, + "epoch": 2.091474245115453, + "grad_norm": 0.514735803166204, + "learning_rate": 5.4740526281740106e-06, + "loss": 0.0397, "step": 2355 }, { - "epoch": 1.0464135021097047, - "grad_norm": 0.5208623353433574, - "learning_rate": 9.22531271874012e-06, - "loss": 0.0473, + "epoch": 2.092362344582593, + "grad_norm": 0.7233544526900478, + "learning_rate": 5.470194305993296e-06, + "loss": 0.0525, "step": 2356 }, { - "epoch": 1.046857650455252, - "grad_norm": 0.6890800835128176, - "learning_rate": 9.224275928670925e-06, - "loss": 0.0554, + "epoch": 2.0932504440497337, + "grad_norm": 0.3705429016840043, + "learning_rate": 5.4663357013090504e-06, + "loss": 0.0396, "step": 2357 }, { - "epoch": 1.0473017988007995, - "grad_norm": 0.6619670076578327, - "learning_rate": 9.223238503621302e-06, - "loss": 0.0577, + "epoch": 2.094138543516874, + "grad_norm": 0.3236082840599957, + "learning_rate": 5.462476816439609e-06, + "loss": 0.0354, "step": 2358 }, { - "epoch": 1.0477459471463468, - "grad_norm": 0.5128511282981787, - "learning_rate": 9.22220044374719e-06, - "loss": 0.0406, + "epoch": 2.0950266429840143, + "grad_norm": 0.45769917506165464, + "learning_rate": 5.458617653703483e-06, + "loss": 0.0421, "step": 2359 }, { - "epoch": 1.0481900954918943, - "grad_norm": 0.521752374455369, - "learning_rate": 9.221161749204629e-06, - "loss": 0.05, + "epoch": 2.0959147424511544, + "grad_norm": 0.4548383020284334, + "learning_rate": 5.454758215419343e-06, + "loss": 0.0504, "step": 2360 }, { - "epoch": 1.0486342438374416, - "grad_norm": 0.6524832106347492, - "learning_rate": 9.220122420149753e-06, - "loss": 0.0548, + "epoch": 2.096802841918295, + "grad_norm": 0.3942923384474443, + "learning_rate": 5.450898503906027e-06, + "loss": 0.0391, "step": 2361 }, { - "epoch": 1.0490783921829892, - "grad_norm": 0.49101446033388607, - "learning_rate": 9.219082456738788e-06, - "loss": 0.0392, + "epoch": 2.097690941385435, + "grad_norm": 0.40259282724092404, + "learning_rate": 5.447038521482542e-06, + "loss": 0.0394, "step": 2362 }, { - "epoch": 1.0495225405285364, - "grad_norm": 0.40291059778432986, - "learning_rate": 9.218041859128062e-06, - "loss": 0.0403, + "epoch": 2.0985790408525755, + "grad_norm": 0.48116526775975643, + "learning_rate": 5.443178270468052e-06, + "loss": 0.0412, "step": 2363 }, { - "epoch": 1.049966688874084, - "grad_norm": 0.52475244536887, - "learning_rate": 9.217000627473993e-06, - "loss": 0.0518, + "epoch": 2.0994671403197156, + "grad_norm": 0.3684661252311372, + "learning_rate": 5.439317753181883e-06, + "loss": 0.0387, "step": 2364 }, { - "epoch": 1.0504108372196312, - "grad_norm": 0.6932318744101922, - "learning_rate": 9.215958761933093e-06, - "loss": 0.0586, + "epoch": 2.100355239786856, + "grad_norm": 0.3798127256992596, + "learning_rate": 5.435456971943525e-06, + "loss": 0.0421, "step": 2365 }, { - "epoch": 1.0508549855651788, - "grad_norm": 0.4220631830774893, - "learning_rate": 9.214916262661977e-06, - "loss": 0.045, + "epoch": 2.1012433392539966, + "grad_norm": 0.6152548648899345, + "learning_rate": 5.431595929072622e-06, + "loss": 0.0538, "step": 2366 }, { - "epoch": 1.0512991339107263, - "grad_norm": 0.4512747724107325, - "learning_rate": 9.213873129817346e-06, - "loss": 0.0543, + "epoch": 2.1021314387211367, + "grad_norm": 0.4109862071182783, + "learning_rate": 5.427734626888979e-06, + "loss": 0.0368, "step": 2367 }, { - "epoch": 1.0517432822562736, - "grad_norm": 0.4570731167347105, - "learning_rate": 9.212829363556003e-06, - "loss": 0.0594, + "epoch": 2.1030195381882772, + "grad_norm": 0.362726515994217, + "learning_rate": 5.423873067712552e-06, + "loss": 0.0367, "step": 2368 }, { - "epoch": 1.052187430601821, - "grad_norm": 0.508942846260648, - "learning_rate": 9.211784964034842e-06, - "loss": 0.0685, + "epoch": 2.1039076376554173, + "grad_norm": 0.5872429390180516, + "learning_rate": 5.420011253863458e-06, + "loss": 0.0453, "step": 2369 }, { - "epoch": 1.0526315789473684, - "grad_norm": 0.5865147734769778, - "learning_rate": 9.210739931410857e-06, - "loss": 0.064, + "epoch": 2.104795737122558, + "grad_norm": 0.3504058387005002, + "learning_rate": 5.416149187661961e-06, + "loss": 0.0324, "step": 2370 }, { - "epoch": 1.0530757272929159, - "grad_norm": 0.6404273704289678, - "learning_rate": 9.209694265841132e-06, - "loss": 0.0449, + "epoch": 2.105683836589698, + "grad_norm": 0.4239830737147979, + "learning_rate": 5.41228687142848e-06, + "loss": 0.0396, "step": 2371 }, { - "epoch": 1.0535198756384632, - "grad_norm": 0.5024824890374885, - "learning_rate": 9.208647967482849e-06, - "loss": 0.053, + "epoch": 2.1065719360568385, + "grad_norm": 0.4668127971727929, + "learning_rate": 5.408424307483583e-06, + "loss": 0.0427, "step": 2372 }, { - "epoch": 1.0539640239840107, - "grad_norm": 0.5079871293280772, - "learning_rate": 9.207601036493284e-06, - "loss": 0.0504, + "epoch": 2.1074600355239785, + "grad_norm": 0.40697925373166755, + "learning_rate": 5.404561498147989e-06, + "loss": 0.035, "step": 2373 }, { - "epoch": 1.054408172329558, - "grad_norm": 0.5005289716599005, - "learning_rate": 9.206553473029807e-06, - "loss": 0.0469, + "epoch": 2.108348134991119, + "grad_norm": 0.4978113689735989, + "learning_rate": 5.400698445742557e-06, + "loss": 0.0492, "step": 2374 }, { - "epoch": 1.0548523206751055, - "grad_norm": 0.6348636511274667, - "learning_rate": 9.205505277249888e-06, - "loss": 0.0664, + "epoch": 2.109236234458259, + "grad_norm": 0.323997207827145, + "learning_rate": 5.396835152588304e-06, + "loss": 0.0366, "step": 2375 }, { - "epoch": 1.055296469020653, - "grad_norm": 0.7124285353701557, - "learning_rate": 9.204456449311086e-06, - "loss": 0.0811, + "epoch": 2.1101243339253997, + "grad_norm": 0.47966324809157906, + "learning_rate": 5.392971621006382e-06, + "loss": 0.0416, "step": 2376 }, { - "epoch": 1.0557406173662003, - "grad_norm": 0.7155216157670515, - "learning_rate": 9.203406989371058e-06, - "loss": 0.068, + "epoch": 2.11101243339254, + "grad_norm": 0.3550930916403458, + "learning_rate": 5.389107853318088e-06, + "loss": 0.0376, "step": 2377 }, { - "epoch": 1.0561847657117478, - "grad_norm": 0.6903207059209383, - "learning_rate": 9.202356897587556e-06, - "loss": 0.0625, + "epoch": 2.1119005328596803, + "grad_norm": 0.37221503133895334, + "learning_rate": 5.385243851844866e-06, + "loss": 0.0365, "step": 2378 }, { - "epoch": 1.056628914057295, - "grad_norm": 0.4483522509553745, - "learning_rate": 9.201306174118428e-06, - "loss": 0.0461, + "epoch": 2.112788632326821, + "grad_norm": 0.33210107248881343, + "learning_rate": 5.381379618908296e-06, + "loss": 0.0403, "step": 2379 }, { - "epoch": 1.0570730624028426, - "grad_norm": 0.6352875132111704, - "learning_rate": 9.200254819121612e-06, - "loss": 0.0838, + "epoch": 2.113676731793961, + "grad_norm": 0.3391506479993458, + "learning_rate": 5.377515156830094e-06, + "loss": 0.0297, "step": 2380 }, { - "epoch": 1.05751721074839, - "grad_norm": 0.600462738349843, - "learning_rate": 9.19920283275515e-06, - "loss": 0.0498, + "epoch": 2.1145648312611014, + "grad_norm": 0.4238328579655558, + "learning_rate": 5.373650467932122e-06, + "loss": 0.0367, "step": 2381 }, { - "epoch": 1.0579613590939374, - "grad_norm": 0.513630635347009, - "learning_rate": 9.198150215177168e-06, - "loss": 0.0557, + "epoch": 2.1154529307282415, + "grad_norm": 0.41731344402610704, + "learning_rate": 5.369785554536372e-06, + "loss": 0.0379, "step": 2382 }, { - "epoch": 1.0584055074394847, - "grad_norm": 0.674636182160706, - "learning_rate": 9.197096966545896e-06, - "loss": 0.0608, + "epoch": 2.116341030195382, + "grad_norm": 0.49826717221477446, + "learning_rate": 5.365920418964973e-06, + "loss": 0.0406, "step": 2383 }, { - "epoch": 1.0588496557850322, - "grad_norm": 0.7211986590126924, - "learning_rate": 9.196043087019651e-06, - "loss": 0.0675, + "epoch": 2.117229129662522, + "grad_norm": 0.46991340393479397, + "learning_rate": 5.362055063540186e-06, + "loss": 0.0405, "step": 2384 }, { - "epoch": 1.0592938041305797, - "grad_norm": 0.48469937945694536, - "learning_rate": 9.194988576756855e-06, - "loss": 0.0533, + "epoch": 2.1181172291296626, + "grad_norm": 0.3551564687160109, + "learning_rate": 5.3581894905844066e-06, + "loss": 0.0373, "step": 2385 }, { - "epoch": 1.059737952476127, - "grad_norm": 0.6223548159808464, - "learning_rate": 9.193933435916013e-06, - "loss": 0.0609, + "epoch": 2.1190053285968027, + "grad_norm": 0.38328144224858046, + "learning_rate": 5.35432370242016e-06, + "loss": 0.0398, "step": 2386 }, { - "epoch": 1.0601821008216745, - "grad_norm": 0.4702625375198161, - "learning_rate": 9.192877664655736e-06, - "loss": 0.0629, + "epoch": 2.119893428063943, + "grad_norm": 0.3514847195641634, + "learning_rate": 5.350457701370099e-06, + "loss": 0.0389, "step": 2387 }, { - "epoch": 1.0606262491672218, - "grad_norm": 0.6156516266579465, - "learning_rate": 9.191821263134718e-06, - "loss": 0.0519, + "epoch": 2.1207815275310833, + "grad_norm": 0.36488905651696707, + "learning_rate": 5.346591489757008e-06, + "loss": 0.0399, "step": 2388 }, { - "epoch": 1.0610703975127693, - "grad_norm": 0.41634294802260347, - "learning_rate": 9.19076423151176e-06, - "loss": 0.0339, + "epoch": 2.121669626998224, + "grad_norm": 0.3875304275323844, + "learning_rate": 5.3427250699037925e-06, + "loss": 0.042, "step": 2389 }, { - "epoch": 1.0615145458583166, - "grad_norm": 0.5676744907352943, - "learning_rate": 9.189706569945749e-06, - "loss": 0.0672, + "epoch": 2.122557726465364, + "grad_norm": 0.45757383320267986, + "learning_rate": 5.338858444133489e-06, + "loss": 0.0558, "step": 2390 }, { - "epoch": 1.0619586942038641, - "grad_norm": 0.5506701658376884, - "learning_rate": 9.188648278595669e-06, - "loss": 0.0549, + "epoch": 2.1234458259325044, + "grad_norm": 0.3533317774363015, + "learning_rate": 5.334991614769254e-06, + "loss": 0.0421, "step": 2391 }, { - "epoch": 1.0624028425494114, - "grad_norm": 0.5548831925430109, - "learning_rate": 9.187589357620602e-06, - "loss": 0.0495, + "epoch": 2.124333925399645, + "grad_norm": 0.4164393587446632, + "learning_rate": 5.331124584134368e-06, + "loss": 0.0445, "step": 2392 }, { - "epoch": 1.062846990894959, - "grad_norm": 0.5733351694159566, - "learning_rate": 9.186529807179715e-06, - "loss": 0.0413, + "epoch": 2.125222024866785, + "grad_norm": 0.44989228202393267, + "learning_rate": 5.327257354552231e-06, + "loss": 0.042, "step": 2393 }, { - "epoch": 1.0632911392405062, - "grad_norm": 0.4263224419709196, - "learning_rate": 9.185469627432287e-06, - "loss": 0.0376, + "epoch": 2.1261101243339255, + "grad_norm": 0.3137401366512473, + "learning_rate": 5.323389928346364e-06, + "loss": 0.0304, "step": 2394 }, { - "epoch": 1.0637352875860537, - "grad_norm": 0.5107765087622207, - "learning_rate": 9.184408818537673e-06, - "loss": 0.0441, + "epoch": 2.1269982238010656, + "grad_norm": 0.38091384172821674, + "learning_rate": 5.319522307840404e-06, + "loss": 0.0338, "step": 2395 }, { - "epoch": 1.0641794359316012, - "grad_norm": 0.4468370290354222, - "learning_rate": 9.183347380655332e-06, - "loss": 0.0393, + "epoch": 2.127886323268206, + "grad_norm": 0.5376459504056712, + "learning_rate": 5.315654495358108e-06, + "loss": 0.045, "step": 2396 }, { - "epoch": 1.0646235842771485, - "grad_norm": 0.4890359715306359, - "learning_rate": 9.182285313944818e-06, - "loss": 0.0481, + "epoch": 2.1287744227353462, + "grad_norm": 0.36917158695970886, + "learning_rate": 5.3117864932233445e-06, + "loss": 0.0355, "step": 2397 }, { - "epoch": 1.065067732622696, - "grad_norm": 0.45195434165164133, - "learning_rate": 9.181222618565777e-06, - "loss": 0.0509, + "epoch": 2.1296625222024868, + "grad_norm": 0.3223307555476067, + "learning_rate": 5.3079183037601016e-06, + "loss": 0.0392, "step": 2398 }, { - "epoch": 1.0655118809682433, - "grad_norm": 0.49289142398585184, - "learning_rate": 9.180159294677948e-06, - "loss": 0.0515, + "epoch": 2.130550621669627, + "grad_norm": 0.38856369673299446, + "learning_rate": 5.304049929292472e-06, + "loss": 0.0401, "step": 2399 }, { - "epoch": 1.0659560293137909, - "grad_norm": 0.42385971921718624, - "learning_rate": 9.179095342441171e-06, - "loss": 0.0448, + "epoch": 2.1314387211367674, + "grad_norm": 0.4974670677937293, + "learning_rate": 5.300181372144665e-06, + "loss": 0.0452, "step": 2400 }, { - "epoch": 1.0664001776593381, - "grad_norm": 0.6172728890244491, - "learning_rate": 9.178030762015372e-06, - "loss": 0.0401, + "epoch": 2.1323268206039074, + "grad_norm": 0.4576734124078994, + "learning_rate": 5.296312634641e-06, + "loss": 0.046, "step": 2401 }, { - "epoch": 1.0668443260048857, - "grad_norm": 0.5217625747110314, - "learning_rate": 9.176965553560578e-06, - "loss": 0.0578, + "epoch": 2.133214920071048, + "grad_norm": 0.36265008372983243, + "learning_rate": 5.292443719105903e-06, + "loss": 0.0363, "step": 2402 }, { - "epoch": 1.067288474350433, - "grad_norm": 0.4881611280755445, - "learning_rate": 9.175899717236907e-06, - "loss": 0.0437, + "epoch": 2.1341030195381885, + "grad_norm": 0.402050964517044, + "learning_rate": 5.288574627863906e-06, + "loss": 0.0401, "step": 2403 }, { - "epoch": 1.0677326226959805, - "grad_norm": 0.5243251412540129, - "learning_rate": 9.174833253204571e-06, - "loss": 0.0408, + "epoch": 2.1349911190053286, + "grad_norm": 0.4020128427856151, + "learning_rate": 5.284705363239651e-06, + "loss": 0.0417, "step": 2404 }, { - "epoch": 1.068176771041528, - "grad_norm": 0.4299956266707952, - "learning_rate": 9.17376616162388e-06, - "loss": 0.0416, + "epoch": 2.135879218472469, + "grad_norm": 0.39612827186709665, + "learning_rate": 5.280835927557876e-06, + "loss": 0.04, "step": 2405 }, { - "epoch": 1.0686209193870753, - "grad_norm": 0.5888767684513153, - "learning_rate": 9.172698442655236e-06, - "loss": 0.064, + "epoch": 2.136767317939609, + "grad_norm": 0.44749739918923315, + "learning_rate": 5.27696632314343e-06, + "loss": 0.0421, "step": 2406 }, { - "epoch": 1.0690650677326228, - "grad_norm": 0.4022150666774181, - "learning_rate": 9.171630096459134e-06, - "loss": 0.0416, + "epoch": 2.1376554174067497, + "grad_norm": 0.4011125419385018, + "learning_rate": 5.27309655232126e-06, + "loss": 0.0329, "step": 2407 }, { - "epoch": 1.06950921607817, - "grad_norm": 0.9093917263815293, - "learning_rate": 9.170561123196165e-06, - "loss": 0.0573, + "epoch": 2.1385435168738898, + "grad_norm": 0.39472381503967285, + "learning_rate": 5.269226617416414e-06, + "loss": 0.0392, "step": 2408 }, { - "epoch": 1.0699533644237176, - "grad_norm": 0.5641484333920734, - "learning_rate": 9.169491523027012e-06, - "loss": 0.0527, + "epoch": 2.1394316163410303, + "grad_norm": 0.3719757758537787, + "learning_rate": 5.265356520754033e-06, + "loss": 0.0331, "step": 2409 }, { - "epoch": 1.0703975127692649, - "grad_norm": 0.5614858136344199, - "learning_rate": 9.168421296112457e-06, - "loss": 0.0525, + "epoch": 2.1403197158081704, + "grad_norm": 0.450768938323179, + "learning_rate": 5.2614862646593665e-06, + "loss": 0.0411, "step": 2410 }, { - "epoch": 1.0708416611148124, - "grad_norm": 0.5361742414415347, - "learning_rate": 9.167350442613371e-06, - "loss": 0.0512, + "epoch": 2.141207815275311, + "grad_norm": 0.3439741035367346, + "learning_rate": 5.257615851457749e-06, + "loss": 0.041, "step": 2411 }, { - "epoch": 1.0712858094603597, - "grad_norm": 0.4065744899599026, - "learning_rate": 9.166278962690724e-06, - "loss": 0.0352, + "epoch": 2.142095914742451, + "grad_norm": 0.3722561847725203, + "learning_rate": 5.253745283474615e-06, + "loss": 0.0374, "step": 2412 }, { - "epoch": 1.0717299578059072, - "grad_norm": 0.7190511616030503, - "learning_rate": 9.165206856505577e-06, - "loss": 0.0711, + "epoch": 2.1429840142095915, + "grad_norm": 0.34655027755283196, + "learning_rate": 5.2498745630354895e-06, + "loss": 0.0337, "step": 2413 }, { - "epoch": 1.0721741061514547, - "grad_norm": 0.5266458384013354, - "learning_rate": 9.164134124219085e-06, - "loss": 0.0436, + "epoch": 2.143872113676732, + "grad_norm": 0.37294456519210245, + "learning_rate": 5.246003692465993e-06, + "loss": 0.0412, "step": 2414 }, { - "epoch": 1.072618254497002, - "grad_norm": 0.5599669781296026, - "learning_rate": 9.163060765992495e-06, - "loss": 0.0759, + "epoch": 2.144760213143872, + "grad_norm": 0.3286926436051891, + "learning_rate": 5.242132674091828e-06, + "loss": 0.0386, "step": 2415 }, { - "epoch": 1.0730624028425495, - "grad_norm": 0.5128109774041522, - "learning_rate": 9.161986781987156e-06, - "loss": 0.0455, + "epoch": 2.1456483126110126, + "grad_norm": 0.3958233459670188, + "learning_rate": 5.238261510238797e-06, + "loss": 0.0463, "step": 2416 }, { - "epoch": 1.0735065511880968, - "grad_norm": 0.4078193083238796, - "learning_rate": 9.160912172364503e-06, - "loss": 0.0403, + "epoch": 2.1465364120781527, + "grad_norm": 0.50878401519936, + "learning_rate": 5.234390203232781e-06, + "loss": 0.0508, "step": 2417 }, { - "epoch": 1.0739506995336443, - "grad_norm": 0.6961390181093496, - "learning_rate": 9.15983693728607e-06, - "loss": 0.0757, + "epoch": 2.1474245115452932, + "grad_norm": 0.3528926521205191, + "learning_rate": 5.230518755399749e-06, + "loss": 0.04, "step": 2418 }, { - "epoch": 1.0743948478791916, - "grad_norm": 0.518134789703249, - "learning_rate": 9.158761076913481e-06, - "loss": 0.0458, + "epoch": 2.1483126110124333, + "grad_norm": 0.3876254327536883, + "learning_rate": 5.22664716906576e-06, + "loss": 0.0334, "step": 2419 }, { - "epoch": 1.074838996224739, - "grad_norm": 0.6466366059180623, - "learning_rate": 9.157684591408458e-06, - "loss": 0.0639, + "epoch": 2.149200710479574, + "grad_norm": 0.4164873782339424, + "learning_rate": 5.222775446556949e-06, + "loss": 0.0478, "step": 2420 }, { - "epoch": 1.0752831445702864, - "grad_norm": 0.45726102190092227, - "learning_rate": 9.156607480932813e-06, - "loss": 0.0485, + "epoch": 2.150088809946714, + "grad_norm": 0.48165592626531356, + "learning_rate": 5.2189035901995345e-06, + "loss": 0.0464, "step": 2421 }, { - "epoch": 1.075727292915834, - "grad_norm": 0.47111677703640187, - "learning_rate": 9.155529745648457e-06, - "loss": 0.0576, + "epoch": 2.1509769094138544, + "grad_norm": 0.2926323123110968, + "learning_rate": 5.21503160231982e-06, + "loss": 0.0286, "step": 2422 }, { - "epoch": 1.0761714412613812, - "grad_norm": 0.4424777927187194, - "learning_rate": 9.154451385717387e-06, - "loss": 0.0415, + "epoch": 2.1518650088809945, + "grad_norm": 0.3862857577544898, + "learning_rate": 5.2111594852441815e-06, + "loss": 0.0401, "step": 2423 }, { - "epoch": 1.0766155896069287, - "grad_norm": 0.5637865585741092, - "learning_rate": 9.153372401301706e-06, - "loss": 0.0569, + "epoch": 2.152753108348135, + "grad_norm": 0.45724493121170556, + "learning_rate": 5.207287241299078e-06, + "loss": 0.0438, "step": 2424 }, { - "epoch": 1.0770597379524762, - "grad_norm": 0.6193090479661898, - "learning_rate": 9.152292792563596e-06, - "loss": 0.0541, + "epoch": 2.153641207815275, + "grad_norm": 0.46963850875746427, + "learning_rate": 5.203414872811042e-06, + "loss": 0.0367, "step": 2425 }, { - "epoch": 1.0775038862980235, - "grad_norm": 0.5604767145835976, - "learning_rate": 9.151212559665345e-06, - "loss": 0.0487, + "epoch": 2.1545293072824157, + "grad_norm": 0.3874755323565219, + "learning_rate": 5.199542382106683e-06, + "loss": 0.0377, "step": 2426 }, { - "epoch": 1.077948034643571, - "grad_norm": 0.614420431496054, - "learning_rate": 9.150131702769332e-06, - "loss": 0.0543, + "epoch": 2.1554174067495557, + "grad_norm": 0.42098283646840423, + "learning_rate": 5.195669771512678e-06, + "loss": 0.04, "step": 2427 }, { - "epoch": 1.0783921829891183, - "grad_norm": 0.500155979890897, - "learning_rate": 9.149050222038024e-06, - "loss": 0.0543, + "epoch": 2.1563055062166963, + "grad_norm": 0.4038852165924546, + "learning_rate": 5.191797043355784e-06, + "loss": 0.0344, "step": 2428 }, { - "epoch": 1.0788363313346658, - "grad_norm": 0.6046519445448296, - "learning_rate": 9.147968117633988e-06, - "loss": 0.0669, + "epoch": 2.157193605683837, + "grad_norm": 0.46781236227378614, + "learning_rate": 5.1879241999628225e-06, + "loss": 0.0509, "step": 2429 }, { - "epoch": 1.0792804796802131, - "grad_norm": 0.5730375661279963, - "learning_rate": 9.14688538971988e-06, - "loss": 0.0495, + "epoch": 2.158081705150977, + "grad_norm": 0.475627220185539, + "learning_rate": 5.18405124366069e-06, + "loss": 0.0553, "step": 2430 }, { - "epoch": 1.0797246280257606, - "grad_norm": 0.482430110901872, - "learning_rate": 9.145802038458457e-06, - "loss": 0.0484, + "epoch": 2.1589698046181174, + "grad_norm": 0.4750349019531961, + "learning_rate": 5.180178176776343e-06, + "loss": 0.0397, "step": 2431 }, { - "epoch": 1.080168776371308, - "grad_norm": 0.49413095926471534, - "learning_rate": 9.144718064012562e-06, - "loss": 0.0482, + "epoch": 2.1598579040852575, + "grad_norm": 0.41955932914949795, + "learning_rate": 5.176305001636815e-06, + "loss": 0.0407, "step": 2432 }, { - "epoch": 1.0806129247168554, - "grad_norm": 0.6274129207366034, - "learning_rate": 9.143633466545136e-06, - "loss": 0.0609, + "epoch": 2.160746003552398, + "grad_norm": 0.5094174279352747, + "learning_rate": 5.172431720569193e-06, + "loss": 0.051, "step": 2433 }, { - "epoch": 1.0810570730624027, - "grad_norm": 0.4754419676708797, - "learning_rate": 9.142548246219212e-06, - "loss": 0.0539, + "epoch": 2.161634103019538, + "grad_norm": 0.6499009036247565, + "learning_rate": 5.168558335900637e-06, + "loss": 0.0589, "step": 2434 }, { - "epoch": 1.0815012214079502, - "grad_norm": 0.5129065641839461, - "learning_rate": 9.141462403197917e-06, - "loss": 0.0502, + "epoch": 2.1625222024866786, + "grad_norm": 0.928135622496828, + "learning_rate": 5.164684849958361e-06, + "loss": 0.0471, "step": 2435 }, { - "epoch": 1.0819453697534978, - "grad_norm": 0.5492083841021022, - "learning_rate": 9.14037593764447e-06, - "loss": 0.0547, + "epoch": 2.1634103019538187, + "grad_norm": 0.4229532799999624, + "learning_rate": 5.160811265069649e-06, + "loss": 0.0493, "step": 2436 }, { - "epoch": 1.082389518099045, - "grad_norm": 0.5638343322048809, - "learning_rate": 9.139288849722188e-06, - "loss": 0.0552, + "epoch": 2.164298401420959, + "grad_norm": 0.6163607840284632, + "learning_rate": 5.156937583561836e-06, + "loss": 0.0524, "step": 2437 }, { - "epoch": 1.0828336664445926, - "grad_norm": 0.48234420470469497, - "learning_rate": 9.138201139594478e-06, - "loss": 0.0509, + "epoch": 2.1651865008880993, + "grad_norm": 0.49230292894102246, + "learning_rate": 5.153063807762323e-06, + "loss": 0.0365, "step": 2438 }, { - "epoch": 1.0832778147901398, - "grad_norm": 0.48233545504362385, - "learning_rate": 9.137112807424842e-06, - "loss": 0.0618, + "epoch": 2.16607460035524, + "grad_norm": 0.4943115148059102, + "learning_rate": 5.149189939998559e-06, + "loss": 0.0452, "step": 2439 }, { - "epoch": 1.0837219631356874, - "grad_norm": 0.6597245369594125, - "learning_rate": 9.136023853376872e-06, - "loss": 0.0505, + "epoch": 2.1669626998223803, + "grad_norm": 0.467686666274734, + "learning_rate": 5.145315982598055e-06, + "loss": 0.0464, "step": 2440 }, { - "epoch": 1.0841661114812347, - "grad_norm": 0.8640144400788339, - "learning_rate": 9.134934277614258e-06, - "loss": 0.0552, + "epoch": 2.1678507992895204, + "grad_norm": 0.47532599194431346, + "learning_rate": 5.141441937888373e-06, + "loss": 0.0468, "step": 2441 }, { - "epoch": 1.0846102598267822, - "grad_norm": 0.5151117640182895, - "learning_rate": 9.133844080300783e-06, - "loss": 0.0552, + "epoch": 2.168738898756661, + "grad_norm": 0.3892145559248581, + "learning_rate": 5.137567808197129e-06, + "loss": 0.0411, "step": 2442 }, { - "epoch": 1.0850544081723295, - "grad_norm": 0.5553941913181785, - "learning_rate": 9.13275326160032e-06, - "loss": 0.068, + "epoch": 2.169626998223801, + "grad_norm": 0.36256033283499056, + "learning_rate": 5.133693595851987e-06, + "loss": 0.0339, "step": 2443 }, { - "epoch": 1.085498556517877, - "grad_norm": 1.2548272109304157, - "learning_rate": 9.131661821676839e-06, - "loss": 0.0615, + "epoch": 2.1705150976909415, + "grad_norm": 0.49318274840470505, + "learning_rate": 5.129819303180664e-06, + "loss": 0.0318, "step": 2444 }, { - "epoch": 1.0859427048634245, - "grad_norm": 0.7349991566989792, - "learning_rate": 9.130569760694402e-06, - "loss": 0.0592, + "epoch": 2.1714031971580816, + "grad_norm": 0.4068342729967645, + "learning_rate": 5.125944932510924e-06, + "loss": 0.045, "step": 2445 }, { - "epoch": 1.0863868532089718, - "grad_norm": 0.4235071999153545, - "learning_rate": 9.129477078817165e-06, - "loss": 0.0399, + "epoch": 2.172291296625222, + "grad_norm": 0.40835516360053953, + "learning_rate": 5.1220704861705775e-06, + "loss": 0.0308, "step": 2446 }, { - "epoch": 1.0868310015545193, - "grad_norm": 0.5132669145700011, - "learning_rate": 9.128383776209372e-06, - "loss": 0.051, + "epoch": 2.173179396092362, + "grad_norm": 0.4293983663737367, + "learning_rate": 5.1181959664874805e-06, + "loss": 0.0396, "step": 2447 }, { - "epoch": 1.0872751499000666, - "grad_norm": 1.6678578417052072, - "learning_rate": 9.127289853035371e-06, - "loss": 0.068, + "epoch": 2.1740674955595027, + "grad_norm": 0.3551141328057159, + "learning_rate": 5.114321375789533e-06, + "loss": 0.0315, "step": 2448 }, { - "epoch": 1.087719298245614, - "grad_norm": 0.3444388637101336, - "learning_rate": 9.126195309459593e-06, - "loss": 0.0326, + "epoch": 2.174955595026643, + "grad_norm": 0.39060087437266106, + "learning_rate": 5.1104467164046826e-06, + "loss": 0.0389, "step": 2449 }, { - "epoch": 1.0881634465911614, - "grad_norm": 0.38355662180503935, - "learning_rate": 9.12510014564657e-06, - "loss": 0.0552, + "epoch": 2.1758436944937833, + "grad_norm": 0.4575507982502618, + "learning_rate": 5.106571990660906e-06, + "loss": 0.0437, "step": 2450 }, { - "epoch": 1.0886075949367089, - "grad_norm": 0.5490923753617978, - "learning_rate": 9.124004361760921e-06, - "loss": 0.0598, + "epoch": 2.1767317939609234, + "grad_norm": 0.6223067818026029, + "learning_rate": 5.102697200886233e-06, + "loss": 0.0445, "step": 2451 }, { - "epoch": 1.0890517432822562, - "grad_norm": 0.49443123554529045, - "learning_rate": 9.122907957967363e-06, - "loss": 0.0545, + "epoch": 2.177619893428064, + "grad_norm": 0.4578559816803396, + "learning_rate": 5.098822349408723e-06, + "loss": 0.0408, "step": 2452 }, { - "epoch": 1.0894958916278037, - "grad_norm": 0.47265541042225623, - "learning_rate": 9.121810934430702e-06, - "loss": 0.0523, + "epoch": 2.1785079928952045, + "grad_norm": 0.4007856222752702, + "learning_rate": 5.094947438556478e-06, + "loss": 0.0426, "step": 2453 }, { - "epoch": 1.0899400399733512, - "grad_norm": 0.6134212234858121, - "learning_rate": 9.12071329131584e-06, - "loss": 0.0669, + "epoch": 2.1793960923623446, + "grad_norm": 0.4543726905784699, + "learning_rate": 5.091072470657632e-06, + "loss": 0.0477, "step": 2454 }, { - "epoch": 1.0903841883188985, - "grad_norm": 0.5102377020582352, - "learning_rate": 9.119615028787771e-06, - "loss": 0.0488, + "epoch": 2.180284191829485, + "grad_norm": 0.4401945881834879, + "learning_rate": 5.087197448040356e-06, + "loss": 0.0495, "step": 2455 }, { - "epoch": 1.090828336664446, - "grad_norm": 0.5477789256953114, - "learning_rate": 9.118516147011585e-06, - "loss": 0.0609, + "epoch": 2.181172291296625, + "grad_norm": 0.37683087318402914, + "learning_rate": 5.08332237303285e-06, + "loss": 0.0407, "step": 2456 }, { - "epoch": 1.0912724850099933, - "grad_norm": 0.5014262210362811, - "learning_rate": 9.117416646152459e-06, - "loss": 0.0455, + "epoch": 2.1820603907637657, + "grad_norm": 0.4454427822161007, + "learning_rate": 5.079447247963353e-06, + "loss": 0.0409, "step": 2457 }, { - "epoch": 1.0917166333555408, - "grad_norm": 0.5068151485473731, - "learning_rate": 9.11631652637567e-06, - "loss": 0.0558, + "epoch": 2.1829484902309058, + "grad_norm": 0.3944633204708997, + "learning_rate": 5.075572075160124e-06, + "loss": 0.0457, "step": 2458 }, { - "epoch": 1.092160781701088, - "grad_norm": 0.5967678649273153, - "learning_rate": 9.115215787846583e-06, - "loss": 0.0576, + "epoch": 2.1838365896980463, + "grad_norm": 0.457263937853391, + "learning_rate": 5.071696856951456e-06, + "loss": 0.0407, "step": 2459 }, { - "epoch": 1.0926049300466356, - "grad_norm": 0.5382239114996434, - "learning_rate": 9.114114430730656e-06, - "loss": 0.0568, + "epoch": 2.1847246891651864, + "grad_norm": 0.46945903448285464, + "learning_rate": 5.067821595665672e-06, + "loss": 0.0409, "step": 2460 }, { - "epoch": 1.093049078392183, - "grad_norm": 0.46675820455299727, - "learning_rate": 9.113012455193444e-06, - "loss": 0.0437, + "epoch": 2.185612788632327, + "grad_norm": 0.4454713636140774, + "learning_rate": 5.063946293631117e-06, + "loss": 0.0399, "step": 2461 }, { - "epoch": 1.0934932267377304, - "grad_norm": 0.6129912443014283, - "learning_rate": 9.111909861400594e-06, - "loss": 0.0584, + "epoch": 2.186500888099467, + "grad_norm": 0.3639475258217265, + "learning_rate": 5.060070953176161e-06, + "loss": 0.0346, "step": 2462 }, { - "epoch": 1.0939373750832777, - "grad_norm": 0.49745724599799956, - "learning_rate": 9.110806649517841e-06, - "loss": 0.0541, + "epoch": 2.1873889875666075, + "grad_norm": 0.3692077335640843, + "learning_rate": 5.0561955766291956e-06, + "loss": 0.0357, "step": 2463 }, { - "epoch": 1.0943815234288252, - "grad_norm": 0.5223020387615885, - "learning_rate": 9.109702819711018e-06, - "loss": 0.0579, + "epoch": 2.1882770870337476, + "grad_norm": 0.33478174781272485, + "learning_rate": 5.05232016631864e-06, + "loss": 0.0444, "step": 2464 }, { - "epoch": 1.0948256717743727, - "grad_norm": 0.522897777305782, - "learning_rate": 9.108598372146052e-06, - "loss": 0.065, + "epoch": 2.189165186500888, + "grad_norm": 0.659648827807359, + "learning_rate": 5.048444724572926e-06, + "loss": 0.0403, "step": 2465 }, { - "epoch": 1.09526982011992, - "grad_norm": 0.4978718424633595, - "learning_rate": 9.107493306988955e-06, - "loss": 0.057, + "epoch": 2.1900532859680286, + "grad_norm": 0.5913352691181757, + "learning_rate": 5.04456925372051e-06, + "loss": 0.0448, "step": 2466 }, { - "epoch": 1.0957139684654675, - "grad_norm": 0.41889427668506446, - "learning_rate": 9.10638762440584e-06, - "loss": 0.0546, + "epoch": 2.1909413854351687, + "grad_norm": 0.3353058086495773, + "learning_rate": 5.040693756089865e-06, + "loss": 0.0359, "step": 2467 }, { - "epoch": 1.0961581168110148, - "grad_norm": 0.4889531194168305, - "learning_rate": 9.10528132456291e-06, - "loss": 0.0475, + "epoch": 2.191829484902309, + "grad_norm": 0.7156363476313247, + "learning_rate": 5.036818234009475e-06, + "loss": 0.0479, "step": 2468 }, { - "epoch": 1.0966022651565623, - "grad_norm": 0.49561727802236016, - "learning_rate": 9.10417440762646e-06, - "loss": 0.0624, + "epoch": 2.1927175843694493, + "grad_norm": 0.3959098125472889, + "learning_rate": 5.032942689807846e-06, + "loss": 0.038, "step": 2469 }, { - "epoch": 1.0970464135021096, - "grad_norm": 0.5035282177847037, - "learning_rate": 9.10306687376288e-06, - "loss": 0.0613, + "epoch": 2.19360568383659, + "grad_norm": 0.3429627851552565, + "learning_rate": 5.029067125813491e-06, + "loss": 0.0331, "step": 2470 }, { - "epoch": 1.0974905618476571, - "grad_norm": 0.44693886713635184, - "learning_rate": 9.101958723138651e-06, - "loss": 0.0505, + "epoch": 2.19449378330373, + "grad_norm": 0.3924177849475489, + "learning_rate": 5.025191544354943e-06, + "loss": 0.035, "step": 2471 }, { - "epoch": 1.0979347101932044, - "grad_norm": 0.6676580453833789, - "learning_rate": 9.100849955920344e-06, - "loss": 0.0637, + "epoch": 2.1953818827708704, + "grad_norm": 0.4869526520943761, + "learning_rate": 5.021315947760733e-06, + "loss": 0.049, "step": 2472 }, { - "epoch": 1.098378858538752, - "grad_norm": 0.47501330783440915, - "learning_rate": 9.099740572274627e-06, - "loss": 0.0391, + "epoch": 2.1962699822380105, + "grad_norm": 0.3662791431581093, + "learning_rate": 5.0174403383594164e-06, + "loss": 0.0411, "step": 2473 }, { - "epoch": 1.0988230068842995, - "grad_norm": 0.6923283723141113, - "learning_rate": 9.098630572368262e-06, - "loss": 0.0518, + "epoch": 2.197158081705151, + "grad_norm": 0.42488731888157805, + "learning_rate": 5.013564718479541e-06, + "loss": 0.0404, "step": 2474 }, { - "epoch": 1.0992671552298467, - "grad_norm": 0.47645749570109275, - "learning_rate": 9.097519956368096e-06, - "loss": 0.0273, + "epoch": 2.198046181172291, + "grad_norm": 0.43766610179518745, + "learning_rate": 5.009689090449672e-06, + "loss": 0.0427, "step": 2475 }, { - "epoch": 1.0997113035753943, - "grad_norm": 0.8125207380279094, - "learning_rate": 9.096408724441078e-06, - "loss": 0.062, + "epoch": 2.1989342806394316, + "grad_norm": 0.559354053145988, + "learning_rate": 5.0058134565983755e-06, + "loss": 0.0497, "step": 2476 }, { - "epoch": 1.1001554519209416, - "grad_norm": 0.6208955890687268, - "learning_rate": 9.09529687675424e-06, - "loss": 0.0399, + "epoch": 2.199822380106572, + "grad_norm": 0.43829322947353366, + "learning_rate": 5.001937819254222e-06, + "loss": 0.0453, "step": 2477 }, { - "epoch": 1.100599600266489, - "grad_norm": 0.6086645282708443, - "learning_rate": 9.094184413474716e-06, - "loss": 0.0538, + "epoch": 2.2007104795737122, + "grad_norm": 0.33712304531982884, + "learning_rate": 4.99806218074578e-06, + "loss": 0.0365, "step": 2478 }, { - "epoch": 1.1010437486120364, - "grad_norm": 0.5638622958089239, - "learning_rate": 9.093071334769727e-06, - "loss": 0.087, + "epoch": 2.2015985790408528, + "grad_norm": 0.3471908460157216, + "learning_rate": 4.994186543401625e-06, + "loss": 0.0406, "step": 2479 }, { - "epoch": 1.1014878969575839, - "grad_norm": 0.5860122134432946, - "learning_rate": 9.091957640806585e-06, - "loss": 0.0514, + "epoch": 2.202486678507993, + "grad_norm": 0.3395579542540958, + "learning_rate": 4.990310909550329e-06, + "loss": 0.0384, "step": 2480 }, { - "epoch": 1.1019320453031312, - "grad_norm": 0.5483193797542968, - "learning_rate": 9.090843331752704e-06, - "loss": 0.0837, + "epoch": 2.2033747779751334, + "grad_norm": 0.4184935348825603, + "learning_rate": 4.98643528152046e-06, + "loss": 0.045, "step": 2481 }, { - "epoch": 1.1023761936486787, - "grad_norm": 0.671106831086069, - "learning_rate": 9.089728407775576e-06, - "loss": 0.0533, + "epoch": 2.2042628774422734, + "grad_norm": 0.45204372784847996, + "learning_rate": 4.982559661640587e-06, + "loss": 0.0406, "step": 2482 }, { - "epoch": 1.1028203419942262, - "grad_norm": 0.471025449998095, - "learning_rate": 9.088612869042794e-06, - "loss": 0.0694, + "epoch": 2.205150976909414, + "grad_norm": 0.3896104210146149, + "learning_rate": 4.978684052239268e-06, + "loss": 0.0402, "step": 2483 }, { - "epoch": 1.1032644903397735, - "grad_norm": 0.46180112116306377, - "learning_rate": 9.087496715722049e-06, - "loss": 0.0426, + "epoch": 2.206039076376554, + "grad_norm": 0.35630909986035014, + "learning_rate": 4.974808455645059e-06, + "loss": 0.0336, "step": 2484 }, { - "epoch": 1.103708638685321, - "grad_norm": 0.49860403863116065, - "learning_rate": 9.08637994798111e-06, - "loss": 0.0583, + "epoch": 2.2069271758436946, + "grad_norm": 0.34952584839653356, + "learning_rate": 4.970932874186509e-06, + "loss": 0.0316, "step": 2485 }, { - "epoch": 1.1041527870308683, - "grad_norm": 0.5897260124865925, - "learning_rate": 9.08526256598785e-06, - "loss": 0.0648, + "epoch": 2.2078152753108347, + "grad_norm": 0.4138651550318472, + "learning_rate": 4.967057310192157e-06, + "loss": 0.0442, "step": 2486 }, { - "epoch": 1.1045969353764158, - "grad_norm": 0.453592540271068, - "learning_rate": 9.084144569910229e-06, - "loss": 0.0455, + "epoch": 2.208703374777975, + "grad_norm": 0.3472660999027293, + "learning_rate": 4.963181765990526e-06, + "loss": 0.0344, "step": 2487 }, { - "epoch": 1.105041083721963, - "grad_norm": 0.6113711566277047, - "learning_rate": 9.083025959916302e-06, - "loss": 0.0556, + "epoch": 2.2095914742451153, + "grad_norm": 0.4229061559000631, + "learning_rate": 4.959306243910137e-06, + "loss": 0.0454, "step": 2488 }, { - "epoch": 1.1054852320675106, - "grad_norm": 0.4026753684401193, - "learning_rate": 9.081906736174217e-06, - "loss": 0.0347, + "epoch": 2.210479573712256, + "grad_norm": 0.46172989835250633, + "learning_rate": 4.955430746279491e-06, + "loss": 0.0472, "step": 2489 }, { - "epoch": 1.1059293804130579, - "grad_norm": 0.7535472922225546, - "learning_rate": 9.080786898852207e-06, - "loss": 0.0561, + "epoch": 2.211367673179396, + "grad_norm": 0.4036318673922448, + "learning_rate": 4.9515552754270755e-06, + "loss": 0.0427, "step": 2490 }, { - "epoch": 1.1063735287586054, - "grad_norm": 0.7712189521196268, - "learning_rate": 9.079666448118607e-06, - "loss": 0.0815, + "epoch": 2.2122557726465364, + "grad_norm": 0.541114161640207, + "learning_rate": 4.947679833681362e-06, + "loss": 0.0447, "step": 2491 }, { - "epoch": 1.1068176771041527, - "grad_norm": 0.549393105917364, - "learning_rate": 9.07854538414184e-06, - "loss": 0.0411, + "epoch": 2.213143872113677, + "grad_norm": 0.6612485939486665, + "learning_rate": 4.943804423370805e-06, + "loss": 0.0375, "step": 2492 }, { - "epoch": 1.1072618254497002, - "grad_norm": 0.8270414561268318, - "learning_rate": 9.077423707090418e-06, - "loss": 0.0697, + "epoch": 2.214031971580817, + "grad_norm": 0.38519461605628913, + "learning_rate": 4.939929046823841e-06, + "loss": 0.0373, "step": 2493 }, { - "epoch": 1.1077059737952477, - "grad_norm": 0.4463853554583335, - "learning_rate": 9.07630141713295e-06, - "loss": 0.0426, + "epoch": 2.2149200710479575, + "grad_norm": 0.43596239859118924, + "learning_rate": 4.936053706368885e-06, + "loss": 0.0369, "step": 2494 }, { - "epoch": 1.108150122140795, - "grad_norm": 0.6269526052059413, - "learning_rate": 9.075178514438133e-06, - "loss": 0.059, + "epoch": 2.2158081705150976, + "grad_norm": 0.5214142272669903, + "learning_rate": 4.93217840433433e-06, + "loss": 0.0487, "step": 2495 }, { - "epoch": 1.1085942704863425, - "grad_norm": 0.7332597224037389, - "learning_rate": 9.074054999174762e-06, - "loss": 0.0647, + "epoch": 2.216696269982238, + "grad_norm": 0.44909038495860126, + "learning_rate": 4.928303143048546e-06, + "loss": 0.0463, "step": 2496 }, { - "epoch": 1.1090384188318898, - "grad_norm": 0.6649774981712752, - "learning_rate": 9.072930871511718e-06, - "loss": 0.0751, + "epoch": 2.217584369449378, + "grad_norm": 0.32007862995712666, + "learning_rate": 4.924427924839877e-06, + "loss": 0.0272, "step": 2497 }, { - "epoch": 1.1094825671774373, - "grad_norm": 0.8146175851807935, - "learning_rate": 9.071806131617976e-06, - "loss": 0.0591, + "epoch": 2.2184724689165187, + "grad_norm": 0.39234455389527295, + "learning_rate": 4.92055275203665e-06, + "loss": 0.0449, "step": 2498 }, { - "epoch": 1.1099267155229846, - "grad_norm": 0.6708407619412956, - "learning_rate": 9.070680779662606e-06, - "loss": 0.0552, + "epoch": 2.219360568383659, + "grad_norm": 0.3507787456257942, + "learning_rate": 4.916677626967151e-06, + "loss": 0.0407, "step": 2499 }, { - "epoch": 1.1103708638685321, - "grad_norm": 0.6510598528132346, - "learning_rate": 9.069554815814765e-06, - "loss": 0.0547, + "epoch": 2.2202486678507993, + "grad_norm": 0.5166786079477914, + "learning_rate": 4.912802551959645e-06, + "loss": 0.048, "step": 2500 }, { - "epoch": 1.1108150122140794, - "grad_norm": 0.7337961607233396, - "learning_rate": 9.068428240243705e-06, - "loss": 0.08, + "epoch": 2.2211367673179394, + "grad_norm": 0.36191579299062243, + "learning_rate": 4.908927529342368e-06, + "loss": 0.0426, "step": 2501 }, { - "epoch": 1.111259160559627, - "grad_norm": 0.9471208974140737, - "learning_rate": 9.067301053118773e-06, - "loss": 0.0575, + "epoch": 2.22202486678508, + "grad_norm": 0.41294413343315517, + "learning_rate": 4.905052561443524e-06, + "loss": 0.0483, "step": 2502 }, { - "epoch": 1.1117033089051742, - "grad_norm": 0.6247228777438872, - "learning_rate": 9.066173254609399e-06, - "loss": 0.0593, + "epoch": 2.2229129662522205, + "grad_norm": 0.44858654491957084, + "learning_rate": 4.901177650591279e-06, + "loss": 0.0423, "step": 2503 }, { - "epoch": 1.1121474572507217, - "grad_norm": 0.4119422798089505, - "learning_rate": 9.065044844885111e-06, - "loss": 0.0369, + "epoch": 2.2238010657193605, + "grad_norm": 0.6103950236671091, + "learning_rate": 4.897302799113769e-06, + "loss": 0.044, "step": 2504 }, { - "epoch": 1.1125916055962692, - "grad_norm": 0.5707033305872198, - "learning_rate": 9.063915824115531e-06, - "loss": 0.0586, + "epoch": 2.224689165186501, + "grad_norm": 0.5347523795636051, + "learning_rate": 4.893428009339095e-06, + "loss": 0.0452, "step": 2505 }, { - "epoch": 1.1130357539418165, - "grad_norm": 0.5864819346852046, - "learning_rate": 9.062786192470372e-06, - "loss": 0.0541, + "epoch": 2.225577264653641, + "grad_norm": 0.41891851271819086, + "learning_rate": 4.889553283595321e-06, + "loss": 0.0368, "step": 2506 }, { - "epoch": 1.113479902287364, - "grad_norm": 0.4941216006043351, - "learning_rate": 9.06165595011943e-06, - "loss": 0.0467, + "epoch": 2.2264653641207817, + "grad_norm": 0.6514687549744711, + "learning_rate": 4.885678624210467e-06, + "loss": 0.044, "step": 2507 }, { - "epoch": 1.1139240506329113, - "grad_norm": 0.7179600112901418, - "learning_rate": 9.060525097232603e-06, - "loss": 0.061, + "epoch": 2.2273534635879217, + "grad_norm": 0.4499692226864143, + "learning_rate": 4.881804033512521e-06, + "loss": 0.0426, "step": 2508 }, { - "epoch": 1.1143681989784588, - "grad_norm": 0.536437166711899, - "learning_rate": 9.059393633979881e-06, - "loss": 0.0514, + "epoch": 2.2282415630550623, + "grad_norm": 0.32550795932680926, + "learning_rate": 4.877929513829424e-06, + "loss": 0.0353, "step": 2509 }, { - "epoch": 1.1148123473240061, - "grad_norm": 0.5200228688873401, - "learning_rate": 9.058261560531337e-06, - "loss": 0.0489, + "epoch": 2.2291296625222023, + "grad_norm": 0.3533849860086317, + "learning_rate": 4.874055067489076e-06, + "loss": 0.0403, "step": 2510 }, { - "epoch": 1.1152564956695536, - "grad_norm": 0.8639507676709893, - "learning_rate": 9.057128877057141e-06, - "loss": 0.0777, + "epoch": 2.230017761989343, + "grad_norm": 0.5868618759230922, + "learning_rate": 4.870180696819338e-06, + "loss": 0.0508, "step": 2511 }, { - "epoch": 1.1157006440151012, - "grad_norm": 0.7459642047335504, - "learning_rate": 9.055995583727559e-06, - "loss": 0.0462, + "epoch": 2.230905861456483, + "grad_norm": 0.4739367414083903, + "learning_rate": 4.866306404148015e-06, + "loss": 0.0465, "step": 2512 }, { - "epoch": 1.1161447923606485, - "grad_norm": 0.43557336969374605, - "learning_rate": 9.05486168071294e-06, - "loss": 0.0411, + "epoch": 2.2317939609236235, + "grad_norm": 0.5686742423392736, + "learning_rate": 4.862432191802872e-06, + "loss": 0.0488, "step": 2513 }, { - "epoch": 1.116588940706196, - "grad_norm": 0.4693246881167572, - "learning_rate": 9.05372716818373e-06, - "loss": 0.0317, + "epoch": 2.232682060390764, + "grad_norm": 0.6190975724239128, + "learning_rate": 4.858558062111627e-06, + "loss": 0.0544, "step": 2514 }, { - "epoch": 1.1170330890517433, - "grad_norm": 0.5801314945806954, - "learning_rate": 9.052592046310466e-06, - "loss": 0.0504, + "epoch": 2.233570159857904, + "grad_norm": 0.35318577146410396, + "learning_rate": 4.854684017401946e-06, + "loss": 0.0313, "step": 2515 }, { - "epoch": 1.1174772373972908, - "grad_norm": 0.5303403405055431, - "learning_rate": 9.051456315263775e-06, - "loss": 0.0417, + "epoch": 2.2344582593250446, + "grad_norm": 0.4053766693688942, + "learning_rate": 4.850810060001442e-06, + "loss": 0.0405, "step": 2516 }, { - "epoch": 1.117921385742838, - "grad_norm": 0.4682906045403869, - "learning_rate": 9.05031997521438e-06, - "loss": 0.0441, + "epoch": 2.2353463587921847, + "grad_norm": 0.43843182979747325, + "learning_rate": 4.846936192237678e-06, + "loss": 0.0394, "step": 2517 }, { - "epoch": 1.1183655340883856, - "grad_norm": 0.5610438227567353, - "learning_rate": 9.049183026333089e-06, - "loss": 0.054, + "epoch": 2.236234458259325, + "grad_norm": 0.36834885738507367, + "learning_rate": 4.843062416438164e-06, + "loss": 0.0321, "step": 2518 }, { - "epoch": 1.1188096824339329, - "grad_norm": 0.5309141601127093, - "learning_rate": 9.048045468790805e-06, - "loss": 0.0599, + "epoch": 2.2371225577264653, + "grad_norm": 0.44165322336636986, + "learning_rate": 4.839188734930353e-06, + "loss": 0.0426, "step": 2519 }, { - "epoch": 1.1192538307794804, - "grad_norm": 0.4715253267557383, - "learning_rate": 9.04690730275852e-06, - "loss": 0.0501, + "epoch": 2.238010657193606, + "grad_norm": 0.48788470844480375, + "learning_rate": 4.83531515004164e-06, + "loss": 0.0445, "step": 2520 }, { - "epoch": 1.1196979791250277, - "grad_norm": 0.42642161615167634, - "learning_rate": 9.045768528407326e-06, - "loss": 0.0352, + "epoch": 2.238898756660746, + "grad_norm": 0.4193260877184814, + "learning_rate": 4.831441664099366e-06, + "loss": 0.0455, "step": 2521 }, { - "epoch": 1.1201421274705752, - "grad_norm": 0.8452679234650169, - "learning_rate": 9.044629145908397e-06, - "loss": 0.0792, + "epoch": 2.2397868561278864, + "grad_norm": 0.43055837743335906, + "learning_rate": 4.8275682794308086e-06, + "loss": 0.0459, "step": 2522 }, { - "epoch": 1.1205862758161227, - "grad_norm": 0.5024078744040577, - "learning_rate": 9.043489155433e-06, - "loss": 0.0578, + "epoch": 2.2406749555950265, + "grad_norm": 0.4831699768851515, + "learning_rate": 4.823694998363187e-06, + "loss": 0.0554, "step": 2523 }, { - "epoch": 1.12103042416167, - "grad_norm": 0.6039439853816904, - "learning_rate": 9.042348557152495e-06, - "loss": 0.0591, + "epoch": 2.241563055062167, + "grad_norm": 0.3282028064215389, + "learning_rate": 4.8198218232236574e-06, + "loss": 0.0353, "step": 2524 }, { - "epoch": 1.1214745725072175, - "grad_norm": 0.5200721258931258, - "learning_rate": 9.041207351238336e-06, - "loss": 0.0589, + "epoch": 2.242451154529307, + "grad_norm": 0.319811130083667, + "learning_rate": 4.8159487563393106e-06, + "loss": 0.043, "step": 2525 }, { - "epoch": 1.1219187208527648, - "grad_norm": 0.6201687763530818, - "learning_rate": 9.040065537862063e-06, - "loss": 0.0564, + "epoch": 2.2433392539964476, + "grad_norm": 0.36193086810401764, + "learning_rate": 4.8120758000371775e-06, + "loss": 0.0442, "step": 2526 }, { - "epoch": 1.1223628691983123, - "grad_norm": 0.4816887693066457, - "learning_rate": 9.038923117195313e-06, - "loss": 0.047, + "epoch": 2.2442273534635877, + "grad_norm": 0.33164621134940664, + "learning_rate": 4.808202956644219e-06, + "loss": 0.0339, "step": 2527 }, { - "epoch": 1.1228070175438596, - "grad_norm": 0.4930745832447622, - "learning_rate": 9.037780089409807e-06, - "loss": 0.0437, + "epoch": 2.2451154529307282, + "grad_norm": 1.0731647467839553, + "learning_rate": 4.804330228487323e-06, + "loss": 0.0459, "step": 2528 }, { - "epoch": 1.123251165889407, - "grad_norm": 0.4493292408106485, - "learning_rate": 9.036636454677363e-06, - "loss": 0.0587, + "epoch": 2.2460035523978688, + "grad_norm": 0.41240977356633574, + "learning_rate": 4.800457617893319e-06, + "loss": 0.037, "step": 2529 }, { - "epoch": 1.1236953142349544, - "grad_norm": 0.5080635531655728, - "learning_rate": 9.035492213169892e-06, - "loss": 0.0536, + "epoch": 2.246891651865009, + "grad_norm": 0.42211434934630127, + "learning_rate": 4.796585127188958e-06, + "loss": 0.0402, "step": 2530 }, { - "epoch": 1.124139462580502, - "grad_norm": 0.3142735249050316, - "learning_rate": 9.034347365059389e-06, - "loss": 0.0324, + "epoch": 2.2477797513321494, + "grad_norm": 0.721098937515704, + "learning_rate": 4.792712758700923e-06, + "loss": 0.0417, "step": 2531 }, { - "epoch": 1.1245836109260492, - "grad_norm": 0.4572661512872438, - "learning_rate": 9.033201910517944e-06, - "loss": 0.0484, + "epoch": 2.2486678507992894, + "grad_norm": 0.42751787066261426, + "learning_rate": 4.788840514755819e-06, + "loss": 0.0371, "step": 2532 }, { - "epoch": 1.1250277592715967, - "grad_norm": 0.5858908999181529, - "learning_rate": 9.032055849717743e-06, - "loss": 0.041, + "epoch": 2.24955595026643, + "grad_norm": 0.5856360344875098, + "learning_rate": 4.784968397680181e-06, + "loss": 0.0354, "step": 2533 }, { - "epoch": 1.1254719076171442, - "grad_norm": 0.5641689055994421, - "learning_rate": 9.030909182831052e-06, - "loss": 0.062, + "epoch": 2.25044404973357, + "grad_norm": 0.4131978300410885, + "learning_rate": 4.781096409800466e-06, + "loss": 0.0441, "step": 2534 }, { - "epoch": 1.1259160559626915, - "grad_norm": 0.48230195194298414, - "learning_rate": 9.02976191003024e-06, - "loss": 0.055, + "epoch": 2.2513321492007106, + "grad_norm": 0.4156576374906733, + "learning_rate": 4.7772245534430535e-06, + "loss": 0.0417, "step": 2535 }, { - "epoch": 1.126360204308239, - "grad_norm": 0.45378491026345646, - "learning_rate": 9.028614031487757e-06, - "loss": 0.0485, + "epoch": 2.2522202486678506, + "grad_norm": 0.4015180530083811, + "learning_rate": 4.773352830934242e-06, + "loss": 0.0387, "step": 2536 }, { - "epoch": 1.1268043526537863, - "grad_norm": 0.5351056809427763, - "learning_rate": 9.027465547376154e-06, - "loss": 0.0554, + "epoch": 2.253108348134991, + "grad_norm": 0.3873023967813091, + "learning_rate": 4.7694812446002516e-06, + "loss": 0.0405, "step": 2537 }, { - "epoch": 1.1272485009993338, - "grad_norm": 0.44870767793480254, - "learning_rate": 9.02631645786806e-06, - "loss": 0.0443, + "epoch": 2.2539964476021312, + "grad_norm": 0.37819588112493085, + "learning_rate": 4.76560979676722e-06, + "loss": 0.0375, "step": 2538 }, { - "epoch": 1.1276926493448811, - "grad_norm": 1.1371656771905172, - "learning_rate": 9.02516676313621e-06, - "loss": 0.0572, + "epoch": 2.2548845470692718, + "grad_norm": 0.3405180786556365, + "learning_rate": 4.7617384897612055e-06, + "loss": 0.0426, "step": 2539 }, { - "epoch": 1.1281367976904286, - "grad_norm": 0.5747790388905519, - "learning_rate": 9.02401646335342e-06, - "loss": 0.0662, + "epoch": 2.2557726465364123, + "grad_norm": 0.4616456053902133, + "learning_rate": 4.757867325908174e-06, + "loss": 0.0425, "step": 2540 }, { - "epoch": 1.1285809460359761, - "grad_norm": 0.5477714109841172, - "learning_rate": 9.022865558692599e-06, - "loss": 0.0629, + "epoch": 2.2566607460035524, + "grad_norm": 0.37888562507067025, + "learning_rate": 4.753996307534009e-06, + "loss": 0.0387, "step": 2541 }, { - "epoch": 1.1290250943815234, - "grad_norm": 0.7201777798009189, - "learning_rate": 9.021714049326749e-06, - "loss": 0.0642, + "epoch": 2.257548845470693, + "grad_norm": 0.37789579527129913, + "learning_rate": 4.7501254369645105e-06, + "loss": 0.0384, "step": 2542 }, { - "epoch": 1.1294692427270707, - "grad_norm": 0.5084161768229669, - "learning_rate": 9.02056193542896e-06, - "loss": 0.0413, + "epoch": 2.258436944937833, + "grad_norm": 0.4358472658585724, + "learning_rate": 4.746254716525388e-06, + "loss": 0.0383, "step": 2543 }, { - "epoch": 1.1299133910726182, - "grad_norm": 0.5818759215205692, - "learning_rate": 9.019409217172414e-06, - "loss": 0.0458, + "epoch": 2.2593250444049735, + "grad_norm": 0.40128540158761594, + "learning_rate": 4.742384148542252e-06, + "loss": 0.0477, "step": 2544 }, { - "epoch": 1.1303575394181657, - "grad_norm": 0.4060737061879425, - "learning_rate": 9.018255894730384e-06, - "loss": 0.0417, + "epoch": 2.2602131438721136, + "grad_norm": 0.3799235401621402, + "learning_rate": 4.738513735340634e-06, + "loss": 0.0366, "step": 2545 }, { - "epoch": 1.130801687763713, - "grad_norm": 0.37654168507528063, - "learning_rate": 9.017101968276237e-06, - "loss": 0.0356, + "epoch": 2.261101243339254, + "grad_norm": 0.44371140206874726, + "learning_rate": 4.734643479245967e-06, + "loss": 0.0444, "step": 2546 }, { - "epoch": 1.1312458361092605, - "grad_norm": 0.5269591862707016, - "learning_rate": 9.015947437983423e-06, - "loss": 0.0502, + "epoch": 2.261989342806394, + "grad_norm": 0.45476470201441677, + "learning_rate": 4.730773382583589e-06, + "loss": 0.0401, "step": 2547 }, { - "epoch": 1.1316899844548078, - "grad_norm": 0.6087429306966435, - "learning_rate": 9.014792304025492e-06, - "loss": 0.0549, + "epoch": 2.2628774422735347, + "grad_norm": 0.41522269427289815, + "learning_rate": 4.726903447678741e-06, + "loss": 0.0421, "step": 2548 }, { - "epoch": 1.1321341328003554, - "grad_norm": 0.47161508183894374, - "learning_rate": 9.013636566576078e-06, - "loss": 0.046, + "epoch": 2.263765541740675, + "grad_norm": 0.45973427614502205, + "learning_rate": 4.723033676856571e-06, + "loss": 0.0419, "step": 2549 }, { - "epoch": 1.1325782811459026, - "grad_norm": 0.5566854251234638, - "learning_rate": 9.012480225808908e-06, - "loss": 0.0582, + "epoch": 2.2646536412078153, + "grad_norm": 0.34047523807710073, + "learning_rate": 4.719164072442125e-06, + "loss": 0.0365, "step": 2550 }, { - "epoch": 1.1330224294914502, - "grad_norm": 0.7226965671519505, - "learning_rate": 9.0113232818978e-06, - "loss": 0.0535, + "epoch": 2.265541740674956, + "grad_norm": 0.5434438568961559, + "learning_rate": 4.715294636760352e-06, + "loss": 0.0556, "step": 2551 }, { - "epoch": 1.1334665778369977, - "grad_norm": 0.528548310886907, - "learning_rate": 9.010165735016663e-06, - "loss": 0.0471, + "epoch": 2.266429840142096, + "grad_norm": 0.3987002813829804, + "learning_rate": 4.711425372136095e-06, + "loss": 0.0367, "step": 2552 }, { - "epoch": 1.133910726182545, - "grad_norm": 0.6224718792548548, - "learning_rate": 9.009007585339493e-06, - "loss": 0.0411, + "epoch": 2.267317939609236, + "grad_norm": 0.5178962986501356, + "learning_rate": 4.707556280894099e-06, + "loss": 0.0397, "step": 2553 }, { - "epoch": 1.1343548745280925, - "grad_norm": 0.46637655730340244, - "learning_rate": 9.007848833040385e-06, - "loss": 0.0382, + "epoch": 2.2682060390763765, + "grad_norm": 0.4294164720104162, + "learning_rate": 4.703687365359e-06, + "loss": 0.0417, "step": 2554 }, { - "epoch": 1.1347990228736398, - "grad_norm": 0.6524984653497812, - "learning_rate": 9.006689478293513e-06, - "loss": 0.0649, + "epoch": 2.269094138543517, + "grad_norm": 0.4796438938735421, + "learning_rate": 4.6998186278553375e-06, + "loss": 0.0472, "step": 2555 }, { - "epoch": 1.1352431712191873, - "grad_norm": 0.42573749158130464, - "learning_rate": 9.005529521273152e-06, - "loss": 0.0333, + "epoch": 2.269982238010657, + "grad_norm": 0.4884580178535705, + "learning_rate": 4.69595007070753e-06, + "loss": 0.0331, "step": 2556 }, { - "epoch": 1.1356873195647346, - "grad_norm": 0.4720058401308709, - "learning_rate": 9.004368962153662e-06, - "loss": 0.0454, + "epoch": 2.2708703374777977, + "grad_norm": 0.3881108467602305, + "learning_rate": 4.6920816962399e-06, + "loss": 0.0347, "step": 2557 }, { - "epoch": 1.136131467910282, - "grad_norm": 0.4623215710338066, - "learning_rate": 9.003207801109495e-06, - "loss": 0.0413, + "epoch": 2.2717584369449377, + "grad_norm": 0.39596463725067144, + "learning_rate": 4.6882135067766555e-06, + "loss": 0.0421, "step": 2558 }, { - "epoch": 1.1365756162558294, - "grad_norm": 0.6101080407770559, - "learning_rate": 9.002046038315192e-06, - "loss": 0.0534, + "epoch": 2.2726465364120783, + "grad_norm": 0.4294002256652185, + "learning_rate": 4.684345504641894e-06, + "loss": 0.0401, "step": 2559 }, { - "epoch": 1.1370197646013769, - "grad_norm": 0.5040799125891366, - "learning_rate": 9.000883673945387e-06, - "loss": 0.0392, + "epoch": 2.2735346358792183, + "grad_norm": 0.42363391918012416, + "learning_rate": 4.680477692159597e-06, + "loss": 0.0462, "step": 2560 }, { - "epoch": 1.1374639129469242, - "grad_norm": 0.6363733147612941, - "learning_rate": 8.999720708174802e-06, - "loss": 0.0628, + "epoch": 2.274422735346359, + "grad_norm": 0.3582869755447265, + "learning_rate": 4.676610071653638e-06, + "loss": 0.0372, "step": 2561 }, { - "epoch": 1.1379080612924717, - "grad_norm": 0.5620665765918448, - "learning_rate": 8.998557141178252e-06, - "loss": 0.0524, + "epoch": 2.275310834813499, + "grad_norm": 0.3916175494250579, + "learning_rate": 4.67274264544777e-06, + "loss": 0.0339, "step": 2562 }, { - "epoch": 1.1383522096380192, - "grad_norm": 0.48743987280774526, - "learning_rate": 8.99739297313064e-06, - "loss": 0.0502, + "epoch": 2.2761989342806395, + "grad_norm": 0.5189670677965533, + "learning_rate": 4.6688754158656335e-06, + "loss": 0.0451, "step": 2563 }, { - "epoch": 1.1387963579835665, - "grad_norm": 0.4714981818194369, - "learning_rate": 8.99622820420696e-06, - "loss": 0.0477, + "epoch": 2.2770870337477795, + "grad_norm": 0.41871587575728325, + "learning_rate": 4.665008385230747e-06, + "loss": 0.0398, "step": 2564 }, { - "epoch": 1.139240506329114, - "grad_norm": 0.466559878027, - "learning_rate": 8.995062834582297e-06, - "loss": 0.0585, + "epoch": 2.27797513321492, + "grad_norm": 0.39671935469464326, + "learning_rate": 4.6611415558665115e-06, + "loss": 0.0443, "step": 2565 }, { - "epoch": 1.1396846546746613, - "grad_norm": 0.5806057329454937, - "learning_rate": 8.993896864431825e-06, - "loss": 0.0653, + "epoch": 2.2788632326820606, + "grad_norm": 0.41792405006136724, + "learning_rate": 4.657274930096208e-06, + "loss": 0.039, "step": 2566 }, { - "epoch": 1.1401288030202088, - "grad_norm": 0.565404505122367, - "learning_rate": 8.992730293930812e-06, - "loss": 0.0375, + "epoch": 2.2797513321492007, + "grad_norm": 0.34331716917412153, + "learning_rate": 4.653408510242995e-06, + "loss": 0.0327, "step": 2567 }, { - "epoch": 1.140572951365756, - "grad_norm": 0.49869252300833783, - "learning_rate": 8.99156312325461e-06, - "loss": 0.0412, + "epoch": 2.280639431616341, + "grad_norm": 0.39629268819743063, + "learning_rate": 4.649542298629903e-06, + "loss": 0.0487, "step": 2568 }, { - "epoch": 1.1410170997113036, - "grad_norm": 0.4533506243280891, - "learning_rate": 8.990395352578665e-06, - "loss": 0.0423, + "epoch": 2.2815275310834813, + "grad_norm": 0.49315951881137177, + "learning_rate": 4.645676297579841e-06, + "loss": 0.0475, "step": 2569 }, { - "epoch": 1.141461248056851, - "grad_norm": 0.5186554476274421, - "learning_rate": 8.989226982078513e-06, - "loss": 0.0486, + "epoch": 2.282415630550622, + "grad_norm": 0.35594959843398993, + "learning_rate": 4.641810509415594e-06, + "loss": 0.0383, "step": 2570 }, { - "epoch": 1.1419053964023984, - "grad_norm": 0.5971608007231468, - "learning_rate": 8.988058011929781e-06, - "loss": 0.063, + "epoch": 2.283303730017762, + "grad_norm": 0.39177546065555535, + "learning_rate": 4.6379449364598165e-06, + "loss": 0.0366, "step": 2571 }, { - "epoch": 1.1423495447479457, - "grad_norm": 0.6848430211876612, - "learning_rate": 8.986888442308187e-06, - "loss": 0.0815, + "epoch": 2.2841918294849024, + "grad_norm": 0.3850619559079712, + "learning_rate": 4.634079581035029e-06, + "loss": 0.0328, "step": 2572 }, { - "epoch": 1.1427936930934932, - "grad_norm": 0.5213622343694703, - "learning_rate": 8.985718273389532e-06, - "loss": 0.0426, + "epoch": 2.2850799289520425, + "grad_norm": 0.4756070589111131, + "learning_rate": 4.630214445463629e-06, + "loss": 0.0428, "step": 2573 }, { - "epoch": 1.1432378414390407, - "grad_norm": 0.4678564391636386, - "learning_rate": 8.984547505349714e-06, - "loss": 0.0461, + "epoch": 2.285968028419183, + "grad_norm": 0.43955577293826825, + "learning_rate": 4.626349532067879e-06, + "loss": 0.0431, "step": 2574 }, { - "epoch": 1.143681989784588, - "grad_norm": 0.6562307201524248, - "learning_rate": 8.983376138364723e-06, - "loss": 0.0666, + "epoch": 2.286856127886323, + "grad_norm": 0.34848039746895904, + "learning_rate": 4.622484843169907e-06, + "loss": 0.0388, "step": 2575 }, { - "epoch": 1.1441261381301355, - "grad_norm": 0.7770057155172067, - "learning_rate": 8.982204172610632e-06, - "loss": 0.0761, + "epoch": 2.2877442273534636, + "grad_norm": 0.3472130105952495, + "learning_rate": 4.618620381091707e-06, + "loss": 0.0367, "step": 2576 }, { - "epoch": 1.1445702864756828, - "grad_norm": 0.5346372112341888, - "learning_rate": 8.981031608263608e-06, - "loss": 0.0742, + "epoch": 2.288632326820604, + "grad_norm": 0.3634314214099599, + "learning_rate": 4.614756148155135e-06, + "loss": 0.0357, "step": 2577 }, { - "epoch": 1.1450144348212303, - "grad_norm": 0.549185701186776, - "learning_rate": 8.979858445499908e-06, - "loss": 0.0583, + "epoch": 2.289520426287744, + "grad_norm": 0.3563163781719383, + "learning_rate": 4.610892146681913e-06, + "loss": 0.0385, "step": 2578 }, { - "epoch": 1.1454585831667776, - "grad_norm": 0.44014613661158175, - "learning_rate": 8.978684684495875e-06, - "loss": 0.0422, + "epoch": 2.2904085257548847, + "grad_norm": 0.36444852000028927, + "learning_rate": 4.607028378993619e-06, + "loss": 0.0418, "step": 2579 }, { - "epoch": 1.1459027315123251, - "grad_norm": 0.43518841056839336, - "learning_rate": 8.97751032542795e-06, - "loss": 0.0513, + "epoch": 2.291296625222025, + "grad_norm": 0.3520780957388402, + "learning_rate": 4.603164847411698e-06, + "loss": 0.0346, "step": 2580 }, { - "epoch": 1.1463468798578726, - "grad_norm": 0.3840746634100509, - "learning_rate": 8.976335368472657e-06, - "loss": 0.0295, + "epoch": 2.2921847246891653, + "grad_norm": 0.40450817499551517, + "learning_rate": 4.599301554257444e-06, + "loss": 0.0388, "step": 2581 }, { - "epoch": 1.14679102820342, - "grad_norm": 0.7385893846304249, - "learning_rate": 8.97515981380661e-06, - "loss": 0.0794, + "epoch": 2.2930728241563054, + "grad_norm": 0.4632521174305848, + "learning_rate": 4.595438501852013e-06, + "loss": 0.0466, "step": 2582 }, { - "epoch": 1.1472351765489675, - "grad_norm": 0.38857897583910317, - "learning_rate": 8.97398366160652e-06, - "loss": 0.0361, + "epoch": 2.293960923623446, + "grad_norm": 0.5782949167988353, + "learning_rate": 4.591575692516417e-06, + "loss": 0.0419, "step": 2583 }, { - "epoch": 1.1476793248945147, - "grad_norm": 0.6486029298679971, - "learning_rate": 8.972806912049178e-06, - "loss": 0.0732, + "epoch": 2.294849023090586, + "grad_norm": 0.3621161895075694, + "learning_rate": 4.587713128571522e-06, + "loss": 0.0342, "step": 2584 }, { - "epoch": 1.1481234732400623, - "grad_norm": 0.4395809321084259, - "learning_rate": 8.971629565311471e-06, - "loss": 0.0426, + "epoch": 2.2957371225577266, + "grad_norm": 0.3729789315196646, + "learning_rate": 4.583850812338041e-06, + "loss": 0.0401, "step": 2585 }, { - "epoch": 1.1485676215856095, - "grad_norm": 0.42320652941064185, - "learning_rate": 8.970451621570376e-06, - "loss": 0.0476, + "epoch": 2.2966252220248666, + "grad_norm": 0.3687046514181492, + "learning_rate": 4.579988746136543e-06, + "loss": 0.0413, "step": 2586 }, { - "epoch": 1.149011769931157, - "grad_norm": 0.3926981271234761, - "learning_rate": 8.969273081002954e-06, - "loss": 0.0408, + "epoch": 2.297513321492007, + "grad_norm": 0.4461284341597481, + "learning_rate": 4.576126932287449e-06, + "loss": 0.0442, "step": 2587 }, { - "epoch": 1.1494559182767043, - "grad_norm": 0.6068945137716437, - "learning_rate": 8.96809394378636e-06, - "loss": 0.0448, + "epoch": 2.2984014209591472, + "grad_norm": 0.5508823297299353, + "learning_rate": 4.572265373111024e-06, + "loss": 0.0396, "step": 2588 }, { - "epoch": 1.1499000666222519, - "grad_norm": 0.46200186631768236, - "learning_rate": 8.966914210097843e-06, - "loss": 0.0587, + "epoch": 2.2992895204262878, + "grad_norm": 0.49580490300154184, + "learning_rate": 4.56840407092738e-06, + "loss": 0.047, "step": 2589 }, { - "epoch": 1.1503442149677992, - "grad_norm": 0.8019976203155184, - "learning_rate": 8.965733880114734e-06, - "loss": 0.0768, + "epoch": 2.300177619893428, + "grad_norm": 0.3805186811895452, + "learning_rate": 4.5645430280564775e-06, + "loss": 0.041, "step": 2590 }, { - "epoch": 1.1507883633133467, - "grad_norm": 0.4272911771923715, - "learning_rate": 8.964552954014455e-06, - "loss": 0.0555, + "epoch": 2.3010657193605684, + "grad_norm": 0.3961612376062459, + "learning_rate": 4.560682246818118e-06, + "loss": 0.0391, "step": 2591 }, { - "epoch": 1.1512325116588942, - "grad_norm": 0.46591985962506655, - "learning_rate": 8.963371431974521e-06, - "loss": 0.0491, + "epoch": 2.301953818827709, + "grad_norm": 0.369603894824233, + "learning_rate": 4.556821729531951e-06, + "loss": 0.0389, "step": 2592 }, { - "epoch": 1.1516766600044415, - "grad_norm": 0.34427825623736114, - "learning_rate": 8.962189314172537e-06, - "loss": 0.0291, + "epoch": 2.302841918294849, + "grad_norm": 0.3388520145689632, + "learning_rate": 4.5529614785174606e-06, + "loss": 0.0353, "step": 2593 }, { - "epoch": 1.152120808349989, - "grad_norm": 0.3941777289275942, - "learning_rate": 8.961006600786191e-06, - "loss": 0.0386, + "epoch": 2.3037300177619895, + "grad_norm": 0.3970081601406595, + "learning_rate": 4.549101496093974e-06, + "loss": 0.0412, "step": 2594 }, { - "epoch": 1.1525649566955363, - "grad_norm": 0.7828627909758169, - "learning_rate": 8.959823291993268e-06, - "loss": 0.0644, + "epoch": 2.3046181172291296, + "grad_norm": 0.5662660049321084, + "learning_rate": 4.545241784580658e-06, + "loss": 0.0472, "step": 2595 }, { - "epoch": 1.1530091050410838, - "grad_norm": 0.44448667498264577, - "learning_rate": 8.95863938797164e-06, - "loss": 0.0437, + "epoch": 2.30550621669627, + "grad_norm": 0.37673003155222734, + "learning_rate": 4.5413823462965195e-06, + "loss": 0.0278, "step": 2596 }, { - "epoch": 1.153453253386631, - "grad_norm": 0.44346487742364565, - "learning_rate": 8.957454888899264e-06, - "loss": 0.0462, + "epoch": 2.30639431616341, + "grad_norm": 0.3526715067187626, + "learning_rate": 4.537523183560392e-06, + "loss": 0.0352, "step": 2597 }, { - "epoch": 1.1538974017321786, - "grad_norm": 0.4503855208067358, - "learning_rate": 8.956269794954195e-06, - "loss": 0.0467, + "epoch": 2.3072824156305507, + "grad_norm": 0.3913933260665068, + "learning_rate": 4.533664298690951e-06, + "loss": 0.0363, "step": 2598 }, { - "epoch": 1.1543415500777259, - "grad_norm": 0.50075395685955, - "learning_rate": 8.95508410631457e-06, - "loss": 0.0513, + "epoch": 2.308170515097691, + "grad_norm": 0.3266126022029271, + "learning_rate": 4.529805694006704e-06, + "loss": 0.0348, "step": 2599 }, { - "epoch": 1.1547856984232734, - "grad_norm": 1.5111182491065092, - "learning_rate": 8.953897823158618e-06, - "loss": 0.0422, + "epoch": 2.3090586145648313, + "grad_norm": 0.4409332935117264, + "learning_rate": 4.52594737182599e-06, + "loss": 0.0382, "step": 2600 }, { - "epoch": 1.1552298467688207, - "grad_norm": 0.4837059174435265, - "learning_rate": 8.95271094566466e-06, - "loss": 0.0484, + "epoch": 2.3099467140319714, + "grad_norm": 0.38668057959744595, + "learning_rate": 4.522089334466977e-06, + "loss": 0.0358, "step": 2601 }, { - "epoch": 1.1556739951143682, - "grad_norm": 0.9602960350821099, - "learning_rate": 8.9515234740111e-06, - "loss": 0.0596, + "epoch": 2.310834813499112, + "grad_norm": 0.4167138364334915, + "learning_rate": 4.51823158424766e-06, + "loss": 0.0347, "step": 2602 }, { - "epoch": 1.1561181434599157, - "grad_norm": 0.4674309097106074, - "learning_rate": 8.950335408376438e-06, - "loss": 0.0388, + "epoch": 2.3117229129662524, + "grad_norm": 0.3971986715978222, + "learning_rate": 4.5143741234858655e-06, + "loss": 0.0347, "step": 2603 }, { - "epoch": 1.156562291805463, - "grad_norm": 0.5494197343743635, - "learning_rate": 8.949146748939259e-06, - "loss": 0.0516, + "epoch": 2.3126110124333925, + "grad_norm": 0.33372030175031625, + "learning_rate": 4.510516954499246e-06, + "loss": 0.0381, "step": 2604 }, { - "epoch": 1.1570064401510105, - "grad_norm": 0.603296345877419, - "learning_rate": 8.94795749587824e-06, - "loss": 0.0648, + "epoch": 2.313499111900533, + "grad_norm": 0.3933343502736182, + "learning_rate": 4.5066600796052755e-06, + "loss": 0.0449, "step": 2605 }, { - "epoch": 1.1574505884965578, - "grad_norm": 0.6840688549848888, - "learning_rate": 8.946767649372144e-06, - "loss": 0.0633, + "epoch": 2.314387211367673, + "grad_norm": 0.45756384072037526, + "learning_rate": 4.502803501121252e-06, + "loss": 0.036, "step": 2606 }, { - "epoch": 1.1578947368421053, - "grad_norm": 0.7710851984691011, - "learning_rate": 8.945577209599829e-06, - "loss": 0.0557, + "epoch": 2.3152753108348136, + "grad_norm": 0.3987698602657196, + "learning_rate": 4.498947221364299e-06, + "loss": 0.0425, "step": 2607 }, { - "epoch": 1.1583388851876526, - "grad_norm": 0.5783757044901674, - "learning_rate": 8.944386176740233e-06, - "loss": 0.0552, + "epoch": 2.3161634103019537, + "grad_norm": 0.4512693637599679, + "learning_rate": 4.495091242651356e-06, + "loss": 0.0389, "step": 2608 }, { - "epoch": 1.1587830335332001, - "grad_norm": 0.4854270839314596, - "learning_rate": 8.943194550972392e-06, - "loss": 0.0531, + "epoch": 2.3170515097690942, + "grad_norm": 0.3974871510688192, + "learning_rate": 4.491235567299185e-06, + "loss": 0.0428, "step": 2609 }, { - "epoch": 1.1592271818787476, - "grad_norm": 0.5109780619188811, - "learning_rate": 8.942002332475428e-06, - "loss": 0.0512, + "epoch": 2.3179396092362343, + "grad_norm": 0.540763951633457, + "learning_rate": 4.487380197624361e-06, + "loss": 0.0448, "step": 2610 }, { - "epoch": 1.159671330224295, - "grad_norm": 0.541497267825657, - "learning_rate": 8.940809521428551e-06, - "loss": 0.0622, + "epoch": 2.318827708703375, + "grad_norm": 0.5609297515451566, + "learning_rate": 4.483525135943282e-06, + "loss": 0.0472, "step": 2611 }, { - "epoch": 1.1601154785698422, - "grad_norm": 0.6325619808955779, - "learning_rate": 8.939616118011058e-06, - "loss": 0.0577, + "epoch": 2.319715808170515, + "grad_norm": 0.561920406048222, + "learning_rate": 4.479670384572159e-06, + "loss": 0.0446, "step": 2612 }, { - "epoch": 1.1605596269153897, - "grad_norm": 0.5836195560024343, - "learning_rate": 8.938422122402342e-06, - "loss": 0.0608, + "epoch": 2.3206039076376554, + "grad_norm": 0.3662670350130841, + "learning_rate": 4.475815945827008e-06, + "loss": 0.0372, "step": 2613 }, { - "epoch": 1.1610037752609372, - "grad_norm": 0.5121529339351947, - "learning_rate": 8.937227534781878e-06, - "loss": 0.0496, + "epoch": 2.321492007104796, + "grad_norm": 0.4336635507906872, + "learning_rate": 4.4719618220236715e-06, + "loss": 0.0375, "step": 2614 }, { - "epoch": 1.1614479236064845, - "grad_norm": 0.5247406850098297, - "learning_rate": 8.936032355329233e-06, - "loss": 0.0509, + "epoch": 2.322380106571936, + "grad_norm": 0.31236302035819924, + "learning_rate": 4.468108015477792e-06, + "loss": 0.0314, "step": 2615 }, { - "epoch": 1.161892071952032, - "grad_norm": 0.43915296270490234, - "learning_rate": 8.934836584224065e-06, - "loss": 0.0483, + "epoch": 2.323268206039076, + "grad_norm": 0.3779332333237963, + "learning_rate": 4.464254528504827e-06, + "loss": 0.0307, "step": 2616 }, { - "epoch": 1.1623362202975793, - "grad_norm": 0.4849661298577939, - "learning_rate": 8.933640221646116e-06, - "loss": 0.0534, + "epoch": 2.3241563055062167, + "grad_norm": 0.36485662834350213, + "learning_rate": 4.460401363420038e-06, + "loss": 0.0376, "step": 2617 }, { - "epoch": 1.1627803686431268, - "grad_norm": 0.5053186137211374, - "learning_rate": 8.932443267775221e-06, - "loss": 0.0508, + "epoch": 2.325044404973357, + "grad_norm": 0.3360456767434795, + "learning_rate": 4.456548522538496e-06, + "loss": 0.0329, "step": 2618 }, { - "epoch": 1.1632245169886741, - "grad_norm": 0.5361080174409868, - "learning_rate": 8.931245722791305e-06, - "loss": 0.0609, + "epoch": 2.3259325044404973, + "grad_norm": 0.3792834149891537, + "learning_rate": 4.4526960081750754e-06, + "loss": 0.0299, "step": 2619 }, { - "epoch": 1.1636686653342216, - "grad_norm": 0.40985225869641795, - "learning_rate": 8.930047586874373e-06, - "loss": 0.0458, + "epoch": 2.326820603907638, + "grad_norm": 0.4468078495755882, + "learning_rate": 4.448843822644458e-06, + "loss": 0.0358, "step": 2620 }, { - "epoch": 1.1641128136797692, - "grad_norm": 0.48841596818152816, - "learning_rate": 8.928848860204531e-06, - "loss": 0.0782, + "epoch": 2.327708703374778, + "grad_norm": 0.5255619436694257, + "learning_rate": 4.444991968261121e-06, + "loss": 0.0498, "step": 2621 }, { - "epoch": 1.1645569620253164, - "grad_norm": 0.5219141696813542, - "learning_rate": 8.927649542961965e-06, - "loss": 0.0498, + "epoch": 2.3285968028419184, + "grad_norm": 0.3463107043548755, + "learning_rate": 4.441140447339349e-06, + "loss": 0.0336, "step": 2622 }, { - "epoch": 1.165001110370864, - "grad_norm": 0.6061661462461002, - "learning_rate": 8.926449635326954e-06, - "loss": 0.0586, + "epoch": 2.3294849023090585, + "grad_norm": 0.46177759076054403, + "learning_rate": 4.4372892621932235e-06, + "loss": 0.0371, "step": 2623 }, { - "epoch": 1.1654452587164112, - "grad_norm": 0.4500746397097711, - "learning_rate": 8.925249137479864e-06, - "loss": 0.0399, + "epoch": 2.330373001776199, + "grad_norm": 0.44754932979274265, + "learning_rate": 4.433438415136629e-06, + "loss": 0.0465, "step": 2624 }, { - "epoch": 1.1658894070619588, - "grad_norm": 0.5211157612826388, - "learning_rate": 8.92404804960115e-06, - "loss": 0.05, + "epoch": 2.331261101243339, + "grad_norm": 0.3744449625787688, + "learning_rate": 4.429587908483237e-06, + "loss": 0.0451, "step": 2625 }, { - "epoch": 1.166333555407506, - "grad_norm": 0.4903699164468352, - "learning_rate": 8.922846371871355e-06, - "loss": 0.0496, + "epoch": 2.3321492007104796, + "grad_norm": 0.38720151614565995, + "learning_rate": 4.425737744546522e-06, + "loss": 0.0341, "step": 2626 }, { - "epoch": 1.1667777037530536, - "grad_norm": 0.7464483529673437, - "learning_rate": 8.921644104471114e-06, - "loss": 0.0601, + "epoch": 2.3330373001776197, + "grad_norm": 0.37858368183699215, + "learning_rate": 4.421887925639753e-06, + "loss": 0.042, "step": 2627 }, { - "epoch": 1.1672218520986009, - "grad_norm": 0.4872521353158615, - "learning_rate": 8.920441247581148e-06, - "loss": 0.0525, + "epoch": 2.33392539964476, + "grad_norm": 0.3532938714042838, + "learning_rate": 4.4180384540759905e-06, + "loss": 0.0303, "step": 2628 }, { - "epoch": 1.1676660004441484, - "grad_norm": 0.5674275180118361, - "learning_rate": 8.919237801382265e-06, - "loss": 0.0552, + "epoch": 2.3348134991119007, + "grad_norm": 0.5221524768696196, + "learning_rate": 4.414189332168079e-06, + "loss": 0.0399, "step": 2629 }, { - "epoch": 1.1681101487896957, - "grad_norm": 0.8950605367456311, - "learning_rate": 8.918033766055364e-06, - "loss": 0.0602, + "epoch": 2.335701598579041, + "grad_norm": 0.40731896962907893, + "learning_rate": 4.4103405622286645e-06, + "loss": 0.036, "step": 2630 }, { - "epoch": 1.1685542971352432, - "grad_norm": 0.6732326138636984, - "learning_rate": 8.916829141781432e-06, - "loss": 0.0558, + "epoch": 2.3365896980461813, + "grad_norm": 0.4795094424305429, + "learning_rate": 4.406492146570176e-06, + "loss": 0.0445, "step": 2631 }, { - "epoch": 1.1689984454807907, - "grad_norm": 0.48994316207093613, - "learning_rate": 8.915623928741546e-06, - "loss": 0.0628, + "epoch": 2.3374777975133214, + "grad_norm": 0.3875058274637457, + "learning_rate": 4.4026440875048295e-06, + "loss": 0.0383, "step": 2632 }, { - "epoch": 1.169442593826338, - "grad_norm": 0.4212795487662145, - "learning_rate": 8.914418127116867e-06, - "loss": 0.0508, + "epoch": 2.338365896980462, + "grad_norm": 0.4483653823017389, + "learning_rate": 4.398796387344626e-06, + "loss": 0.0476, "step": 2633 }, { - "epoch": 1.1698867421718855, - "grad_norm": 0.49361391712276814, - "learning_rate": 8.91321173708865e-06, - "loss": 0.0638, + "epoch": 2.339253996447602, + "grad_norm": 0.4718006742914674, + "learning_rate": 4.394949048401354e-06, + "loss": 0.0502, "step": 2634 }, { - "epoch": 1.1703308905174328, - "grad_norm": 0.7512476603132378, - "learning_rate": 8.912004758838235e-06, - "loss": 0.0839, + "epoch": 2.3401420959147425, + "grad_norm": 0.49651496653938887, + "learning_rate": 4.391102072986581e-06, + "loss": 0.046, "step": 2635 }, { - "epoch": 1.1707750388629803, - "grad_norm": 0.505025169980782, - "learning_rate": 8.910797192547051e-06, - "loss": 0.0457, + "epoch": 2.3410301953818826, + "grad_norm": 0.3709334633768202, + "learning_rate": 4.387255463411658e-06, + "loss": 0.0369, "step": 2636 }, { - "epoch": 1.1712191872085276, - "grad_norm": 0.5671039324048706, - "learning_rate": 8.909589038396617e-06, - "loss": 0.044, + "epoch": 2.341918294849023, + "grad_norm": 0.4068319491201398, + "learning_rate": 4.3834092219877214e-06, + "loss": 0.0461, "step": 2637 }, { - "epoch": 1.171663335554075, - "grad_norm": 0.4745487457505175, - "learning_rate": 8.908380296568537e-06, - "loss": 0.0479, + "epoch": 2.342806394316163, + "grad_norm": 0.5168445023408296, + "learning_rate": 4.3795633510256745e-06, + "loss": 0.0418, "step": 2638 }, { - "epoch": 1.1721074838996224, - "grad_norm": 0.8949118984931155, - "learning_rate": 8.907170967244508e-06, - "loss": 0.0576, + "epoch": 2.3436944937833037, + "grad_norm": 0.36258257240961933, + "learning_rate": 4.375717852836207e-06, + "loss": 0.0382, "step": 2639 }, { - "epoch": 1.17255163224517, - "grad_norm": 0.4520757913913456, - "learning_rate": 8.905961050606311e-06, - "loss": 0.0467, + "epoch": 2.3445825932504443, + "grad_norm": 0.3772719221437402, + "learning_rate": 4.371872729729784e-06, + "loss": 0.0352, "step": 2640 }, { - "epoch": 1.1729957805907172, - "grad_norm": 0.4749545091509921, - "learning_rate": 8.904750546835817e-06, - "loss": 0.0446, + "epoch": 2.3454706927175843, + "grad_norm": 0.4179596388727361, + "learning_rate": 4.3680279840166444e-06, + "loss": 0.0403, "step": 2641 }, { - "epoch": 1.1734399289362647, - "grad_norm": 0.522016267548402, - "learning_rate": 8.903539456114988e-06, - "loss": 0.053, + "epoch": 2.346358792184725, + "grad_norm": 0.4114879162383246, + "learning_rate": 4.364183618006798e-06, + "loss": 0.0379, "step": 2642 }, { - "epoch": 1.1738840772818122, - "grad_norm": 0.530579514310406, - "learning_rate": 8.902327778625865e-06, - "loss": 0.0437, + "epoch": 2.347246891651865, + "grad_norm": 0.3749628004987579, + "learning_rate": 4.360339634010027e-06, + "loss": 0.0357, "step": 2643 }, { - "epoch": 1.1743282256273595, - "grad_norm": 0.6048157018439171, - "learning_rate": 8.90111551455059e-06, - "loss": 0.0435, + "epoch": 2.3481349911190055, + "grad_norm": 0.400806823046906, + "learning_rate": 4.356496034335887e-06, + "loss": 0.0467, "step": 2644 }, { - "epoch": 1.174772373972907, - "grad_norm": 0.5906854069033922, - "learning_rate": 8.899902664071384e-06, - "loss": 0.0626, + "epoch": 2.3490230905861456, + "grad_norm": 0.37358836479591945, + "learning_rate": 4.352652821293702e-06, + "loss": 0.0435, "step": 2645 }, { - "epoch": 1.1752165223184543, - "grad_norm": 0.47030730215576705, - "learning_rate": 8.898689227370563e-06, - "loss": 0.0386, + "epoch": 2.349911190053286, + "grad_norm": 0.3382574641349636, + "learning_rate": 4.34880999719256e-06, + "loss": 0.0401, "step": 2646 }, { - "epoch": 1.1756606706640018, - "grad_norm": 0.6351029367838139, - "learning_rate": 8.897475204630521e-06, - "loss": 0.0591, + "epoch": 2.350799289520426, + "grad_norm": 0.3725024069218942, + "learning_rate": 4.344967564341318e-06, + "loss": 0.0404, "step": 2647 }, { - "epoch": 1.176104819009549, - "grad_norm": 0.6410082214075851, - "learning_rate": 8.89626059603375e-06, - "loss": 0.0579, + "epoch": 2.3516873889875667, + "grad_norm": 0.3095419968941293, + "learning_rate": 4.341125525048599e-06, + "loss": 0.0371, "step": 2648 }, { - "epoch": 1.1765489673550966, - "grad_norm": 0.5406985095669288, - "learning_rate": 8.895045401762825e-06, - "loss": 0.0568, + "epoch": 2.3525754884547068, + "grad_norm": 0.5392748938131307, + "learning_rate": 4.3372838816227905e-06, + "loss": 0.0611, "step": 2649 }, { - "epoch": 1.1769931157006441, - "grad_norm": 0.7480895085443751, - "learning_rate": 8.893829622000412e-06, - "loss": 0.0707, + "epoch": 2.3534635879218473, + "grad_norm": 0.4635937921067071, + "learning_rate": 4.333442636372036e-06, + "loss": 0.0417, "step": 2650 }, { - "epoch": 1.1774372640461914, - "grad_norm": 0.45890411967998634, - "learning_rate": 8.892613256929261e-06, - "loss": 0.0416, + "epoch": 2.354351687388988, + "grad_norm": 0.3110201674381657, + "learning_rate": 4.329601791604246e-06, + "loss": 0.0351, "step": 2651 }, { - "epoch": 1.177881412391739, - "grad_norm": 0.396661981002553, - "learning_rate": 8.891396306732214e-06, - "loss": 0.0423, + "epoch": 2.355239786856128, + "grad_norm": 0.34607897834966267, + "learning_rate": 4.325761349627088e-06, + "loss": 0.0424, "step": 2652 }, { - "epoch": 1.1783255607372862, - "grad_norm": 0.5069251937753831, - "learning_rate": 8.890178771592198e-06, - "loss": 0.0487, + "epoch": 2.356127886323268, + "grad_norm": 0.3449482506256782, + "learning_rate": 4.321921312747989e-06, + "loss": 0.0382, "step": 2653 }, { - "epoch": 1.1787697090828337, - "grad_norm": 0.7785700220445251, - "learning_rate": 8.888960651692231e-06, - "loss": 0.0719, + "epoch": 2.3570159857904085, + "grad_norm": 0.45013139098831834, + "learning_rate": 4.318081683274128e-06, + "loss": 0.0358, "step": 2654 }, { - "epoch": 1.179213857428381, - "grad_norm": 0.5673896965611613, - "learning_rate": 8.887741947215415e-06, - "loss": 0.0556, + "epoch": 2.357904085257549, + "grad_norm": 0.3780648972123435, + "learning_rate": 4.3142424635124445e-06, + "loss": 0.0352, "step": 2655 }, { - "epoch": 1.1796580057739285, - "grad_norm": 0.542725413232058, - "learning_rate": 8.886522658344944e-06, - "loss": 0.0493, + "epoch": 2.358792184724689, + "grad_norm": 0.5219853618866549, + "learning_rate": 4.310403655769629e-06, + "loss": 0.0521, "step": 2656 }, { - "epoch": 1.1801021541194758, - "grad_norm": 0.6266382123491776, - "learning_rate": 8.885302785264098e-06, - "loss": 0.0383, + "epoch": 2.3596802841918296, + "grad_norm": 0.3383305725899369, + "learning_rate": 4.3065652623521285e-06, + "loss": 0.0384, "step": 2657 }, { - "epoch": 1.1805463024650233, - "grad_norm": 0.46581618376933115, - "learning_rate": 8.884082328156243e-06, - "loss": 0.0419, + "epoch": 2.3605683836589697, + "grad_norm": 0.40145436774201537, + "learning_rate": 4.302727285566134e-06, + "loss": 0.044, "step": 2658 }, { - "epoch": 1.1809904508105706, - "grad_norm": 0.5581446013530325, - "learning_rate": 8.882861287204836e-06, - "loss": 0.0498, + "epoch": 2.3614564831261102, + "grad_norm": 0.39845663541383897, + "learning_rate": 4.298889727717592e-06, + "loss": 0.0433, "step": 2659 }, { - "epoch": 1.1814345991561181, - "grad_norm": 0.42956852547953145, - "learning_rate": 8.881639662593417e-06, - "loss": 0.0381, + "epoch": 2.3623445825932503, + "grad_norm": 0.3778807856315741, + "learning_rate": 4.295052591112194e-06, + "loss": 0.0366, "step": 2660 }, { - "epoch": 1.1818787475016657, - "grad_norm": 0.45803495008537815, - "learning_rate": 8.880417454505622e-06, - "loss": 0.0492, + "epoch": 2.363232682060391, + "grad_norm": 0.38902474709123536, + "learning_rate": 4.291215878055382e-06, + "loss": 0.031, "step": 2661 }, { - "epoch": 1.182322895847213, - "grad_norm": 0.6914169208326956, - "learning_rate": 8.879194663125164e-06, - "loss": 0.0517, + "epoch": 2.364120781527531, + "grad_norm": 0.4161428012640726, + "learning_rate": 4.2873795908523384e-06, + "loss": 0.0429, "step": 2662 }, { - "epoch": 1.1827670441927605, - "grad_norm": 0.6395374873974822, - "learning_rate": 8.877971288635853e-06, - "loss": 0.0631, + "epoch": 2.3650088809946714, + "grad_norm": 0.38191207983459374, + "learning_rate": 4.283543731807994e-06, + "loss": 0.0445, "step": 2663 }, { - "epoch": 1.1832111925383078, - "grad_norm": 0.558846270469075, - "learning_rate": 8.876747331221583e-06, - "loss": 0.058, + "epoch": 2.3658969804618115, + "grad_norm": 0.42078712896487014, + "learning_rate": 4.27970830322702e-06, + "loss": 0.0375, "step": 2664 }, { - "epoch": 1.1836553408838553, - "grad_norm": 0.49754542881845054, - "learning_rate": 8.875522791066333e-06, - "loss": 0.0616, + "epoch": 2.366785079928952, + "grad_norm": 0.8424960126890609, + "learning_rate": 4.275873307413835e-06, + "loss": 0.0496, "step": 2665 }, { - "epoch": 1.1840994892294026, - "grad_norm": 0.641582498045218, - "learning_rate": 8.874297668354175e-06, - "loss": 0.0432, + "epoch": 2.3676731793960926, + "grad_norm": 0.4396625943845716, + "learning_rate": 4.272038746672586e-06, + "loss": 0.0467, "step": 2666 }, { - "epoch": 1.18454363757495, - "grad_norm": 0.5357146614905643, - "learning_rate": 8.873071963269265e-06, - "loss": 0.0581, + "epoch": 2.3685612788632326, + "grad_norm": 0.536416973392681, + "learning_rate": 4.268204623307166e-06, + "loss": 0.0471, "step": 2667 }, { - "epoch": 1.1849877859204974, - "grad_norm": 0.3864891516746441, - "learning_rate": 8.871845675995847e-06, - "loss": 0.0397, + "epoch": 2.369449378330373, + "grad_norm": 0.35724190190476407, + "learning_rate": 4.264370939621207e-06, + "loss": 0.0331, "step": 2668 }, { - "epoch": 1.1854319342660449, - "grad_norm": 0.5817776113032109, - "learning_rate": 8.870618806718252e-06, - "loss": 0.0548, + "epoch": 2.3703374777975132, + "grad_norm": 0.3474386013978734, + "learning_rate": 4.260537697918076e-06, + "loss": 0.0301, "step": 2669 }, { - "epoch": 1.1858760826115922, - "grad_norm": 0.6451637896241947, - "learning_rate": 8.8693913556209e-06, - "loss": 0.0555, + "epoch": 2.3712255772646538, + "grad_norm": 0.4181100995540747, + "learning_rate": 4.256704900500866e-06, + "loss": 0.0468, "step": 2670 }, { - "epoch": 1.1863202309571397, - "grad_norm": 0.522938279572616, - "learning_rate": 8.868163322888298e-06, - "loss": 0.0429, + "epoch": 2.372113676731794, + "grad_norm": 1.172328043611854, + "learning_rate": 4.2528725496724135e-06, + "loss": 0.052, "step": 2671 }, { - "epoch": 1.1867643793026872, - "grad_norm": 0.6059080038510308, - "learning_rate": 8.86693470870504e-06, - "loss": 0.056, + "epoch": 2.3730017761989344, + "grad_norm": 0.4189048331830737, + "learning_rate": 4.249040647735284e-06, + "loss": 0.0414, "step": 2672 }, { - "epoch": 1.1872085276482345, - "grad_norm": 0.5024058690626159, - "learning_rate": 8.865705513255807e-06, - "loss": 0.0371, + "epoch": 2.3738898756660745, + "grad_norm": 0.3545069090635621, + "learning_rate": 4.245209196991771e-06, + "loss": 0.0352, "step": 2673 }, { - "epoch": 1.187652675993782, - "grad_norm": 0.5033221412559168, - "learning_rate": 8.864475736725369e-06, - "loss": 0.06, + "epoch": 2.374777975133215, + "grad_norm": 0.6105691217865397, + "learning_rate": 4.2413781997438965e-06, + "loss": 0.0508, "step": 2674 }, { - "epoch": 1.1880968243393293, - "grad_norm": 0.4156977033942008, - "learning_rate": 8.863245379298582e-06, - "loss": 0.0378, + "epoch": 2.375666074600355, + "grad_norm": 0.3603993506401413, + "learning_rate": 4.2375476582934134e-06, + "loss": 0.0371, "step": 2675 }, { - "epoch": 1.1885409726848768, - "grad_norm": 0.8144903668245238, - "learning_rate": 8.86201444116039e-06, - "loss": 0.0614, + "epoch": 2.3765541740674956, + "grad_norm": 0.3793291502154685, + "learning_rate": 4.233717574941799e-06, + "loss": 0.0451, "step": 2676 }, { - "epoch": 1.188985121030424, - "grad_norm": 0.4226975303790829, - "learning_rate": 8.860782922495821e-06, - "loss": 0.046, + "epoch": 2.377442273534636, + "grad_norm": 0.4999983500982288, + "learning_rate": 4.229887951990255e-06, + "loss": 0.0352, "step": 2677 }, { - "epoch": 1.1894292693759716, - "grad_norm": 0.52956763181697, - "learning_rate": 8.859550823489997e-06, - "loss": 0.0384, + "epoch": 2.378330373001776, + "grad_norm": 0.3801942526748397, + "learning_rate": 4.2260587917397064e-06, + "loss": 0.0381, "step": 2678 }, { - "epoch": 1.189873417721519, - "grad_norm": 0.5489791027942442, - "learning_rate": 8.858318144328123e-06, - "loss": 0.0742, + "epoch": 2.3792184724689167, + "grad_norm": 0.4072780423292999, + "learning_rate": 4.2222300964908e-06, + "loss": 0.0405, "step": 2679 }, { - "epoch": 1.1903175660670664, - "grad_norm": 1.0552083022753025, - "learning_rate": 8.85708488519549e-06, - "loss": 0.0446, + "epoch": 2.380106571936057, + "grad_norm": 0.5037803829193044, + "learning_rate": 4.218401868543903e-06, + "loss": 0.0365, "step": 2680 }, { - "epoch": 1.1907617144126137, - "grad_norm": 0.5078924313476717, - "learning_rate": 8.855851046277478e-06, - "loss": 0.054, + "epoch": 2.3809946714031973, + "grad_norm": 0.44276942538337943, + "learning_rate": 4.214574110199108e-06, + "loss": 0.0417, "step": 2681 }, { - "epoch": 1.1912058627581612, - "grad_norm": 0.5148608895352339, - "learning_rate": 8.854616627759553e-06, - "loss": 0.0523, + "epoch": 2.3818827708703374, + "grad_norm": 0.45279219712738855, + "learning_rate": 4.2107468237562135e-06, + "loss": 0.0424, "step": 2682 }, { - "epoch": 1.1916500111037087, - "grad_norm": 0.7282958984855765, - "learning_rate": 8.853381629827272e-06, - "loss": 0.0506, + "epoch": 2.382770870337478, + "grad_norm": 0.2778588750278949, + "learning_rate": 4.206920011514744e-06, + "loss": 0.0288, "step": 2683 }, { - "epoch": 1.192094159449256, - "grad_norm": 0.5448500153068538, - "learning_rate": 8.852146052666275e-06, - "loss": 0.0404, + "epoch": 2.383658969804618, + "grad_norm": 0.3847579877166627, + "learning_rate": 4.203093675773936e-06, + "loss": 0.0418, "step": 2684 }, { - "epoch": 1.1925383077948035, - "grad_norm": 0.40034874918632524, - "learning_rate": 8.850909896462288e-06, - "loss": 0.0351, + "epoch": 2.3845470692717585, + "grad_norm": 0.3662947708633666, + "learning_rate": 4.199267818832741e-06, + "loss": 0.0353, "step": 2685 }, { - "epoch": 1.1929824561403508, - "grad_norm": 0.6365377232810542, - "learning_rate": 8.849673161401129e-06, - "loss": 0.0616, + "epoch": 2.3854351687388986, + "grad_norm": 0.3462681279029546, + "learning_rate": 4.195442442989819e-06, + "loss": 0.0405, "step": 2686 }, { - "epoch": 1.1934266044858983, - "grad_norm": 0.5874533737153114, - "learning_rate": 8.848435847668699e-06, - "loss": 0.06, + "epoch": 2.386323268206039, + "grad_norm": 0.2827109752420791, + "learning_rate": 4.1916175505435454e-06, + "loss": 0.0339, "step": 2687 }, { - "epoch": 1.1938707528314456, - "grad_norm": 0.4075740080944545, - "learning_rate": 8.847197955450988e-06, - "loss": 0.0399, + "epoch": 2.387211367673179, + "grad_norm": 0.38974521298192905, + "learning_rate": 4.187793143792003e-06, + "loss": 0.0358, "step": 2688 }, { - "epoch": 1.1943149011769931, - "grad_norm": 0.4560997072221143, - "learning_rate": 8.845959484934073e-06, - "loss": 0.0482, + "epoch": 2.3880994671403197, + "grad_norm": 0.46842705755348635, + "learning_rate": 4.183969225032984e-06, + "loss": 0.0484, "step": 2689 }, { - "epoch": 1.1947590495225406, - "grad_norm": 0.7271393081297982, - "learning_rate": 8.844720436304113e-06, - "loss": 0.0694, + "epoch": 2.38898756660746, + "grad_norm": 0.4501375814597474, + "learning_rate": 4.180145796563985e-06, + "loss": 0.0467, "step": 2690 }, { - "epoch": 1.195203197868088, - "grad_norm": 0.5135039124857559, - "learning_rate": 8.843480809747363e-06, - "loss": 0.064, + "epoch": 2.3898756660746003, + "grad_norm": 0.3869689008612881, + "learning_rate": 4.176322860682209e-06, + "loss": 0.0407, "step": 2691 }, { - "epoch": 1.1956473462136354, - "grad_norm": 0.5417817389406259, - "learning_rate": 8.842240605450158e-06, - "loss": 0.0816, + "epoch": 2.390763765541741, + "grad_norm": 0.39653085599217086, + "learning_rate": 4.172500419684566e-06, + "loss": 0.0426, "step": 2692 }, { - "epoch": 1.1960914945591827, - "grad_norm": 0.40641657577367263, - "learning_rate": 8.840999823598921e-06, - "loss": 0.0378, + "epoch": 2.391651865008881, + "grad_norm": 0.4559469979477046, + "learning_rate": 4.168678475867667e-06, + "loss": 0.0518, "step": 2693 }, { - "epoch": 1.1965356429047302, - "grad_norm": 0.7137594073963416, - "learning_rate": 8.839758464380163e-06, - "loss": 0.063, + "epoch": 2.3925399644760215, + "grad_norm": 0.4844137660745055, + "learning_rate": 4.1648570315278195e-06, + "loss": 0.041, "step": 2694 }, { - "epoch": 1.1969797912502775, - "grad_norm": 0.6152412711374633, - "learning_rate": 8.838516527980483e-06, - "loss": 0.0403, + "epoch": 2.3934280639431615, + "grad_norm": 0.5687974521056933, + "learning_rate": 4.161036088961037e-06, + "loss": 0.0376, "step": 2695 }, { - "epoch": 1.197423939595825, - "grad_norm": 0.5995872223176858, - "learning_rate": 8.837274014586564e-06, - "loss": 0.0461, + "epoch": 2.394316163410302, + "grad_norm": 0.3966335434023539, + "learning_rate": 4.15721565046303e-06, + "loss": 0.044, "step": 2696 }, { - "epoch": 1.1978680879413723, - "grad_norm": 0.5251664478854555, - "learning_rate": 8.836030924385175e-06, - "loss": 0.0558, + "epoch": 2.395204262877442, + "grad_norm": 0.3828405283624342, + "learning_rate": 4.153395718329206e-06, + "loss": 0.039, "step": 2697 }, { - "epoch": 1.1983122362869199, - "grad_norm": 0.444869598867083, - "learning_rate": 8.834787257563178e-06, - "loss": 0.0583, + "epoch": 2.3960923623445827, + "grad_norm": 0.3982785775450126, + "learning_rate": 4.149576294854668e-06, + "loss": 0.0398, "step": 2698 }, { - "epoch": 1.1987563846324671, - "grad_norm": 0.5140725378841854, - "learning_rate": 8.833543014307513e-06, - "loss": 0.0513, + "epoch": 2.3969804618117228, + "grad_norm": 0.3918824184327849, + "learning_rate": 4.145757382334212e-06, + "loss": 0.0355, "step": 2699 }, { - "epoch": 1.1992005329780147, - "grad_norm": 0.5779472328853184, - "learning_rate": 8.83229819480521e-06, - "loss": 0.0653, + "epoch": 2.3978685612788633, + "grad_norm": 0.4785647829306533, + "learning_rate": 4.141938983062329e-06, + "loss": 0.0539, "step": 2700 }, { - "epoch": 1.1996446813235622, - "grad_norm": 0.5908242045279332, - "learning_rate": 8.831052799243394e-06, - "loss": 0.0565, + "epoch": 2.3987566607460034, + "grad_norm": 0.5174157755413326, + "learning_rate": 4.1381210993332e-06, + "loss": 0.0398, "step": 2701 }, { - "epoch": 1.2000888296691095, - "grad_norm": 0.9242559999496844, - "learning_rate": 8.82980682780926e-06, - "loss": 0.0628, + "epoch": 2.399644760213144, + "grad_norm": 0.37733907417145135, + "learning_rate": 4.1343037334407e-06, + "loss": 0.051, "step": 2702 }, { - "epoch": 1.200532978014657, - "grad_norm": 0.4108528589278202, - "learning_rate": 8.828560280690104e-06, - "loss": 0.0438, + "epoch": 2.4005328596802844, + "grad_norm": 0.39113603647838174, + "learning_rate": 4.130486887678386e-06, + "loss": 0.0449, "step": 2703 }, { - "epoch": 1.2009771263602043, - "grad_norm": 0.4411744291440838, - "learning_rate": 8.827313158073304e-06, - "loss": 0.0567, + "epoch": 2.4014209591474245, + "grad_norm": 0.4295292054426644, + "learning_rate": 4.12667056433951e-06, + "loss": 0.0595, "step": 2704 }, { - "epoch": 1.2014212747057518, - "grad_norm": 0.5260335018456415, - "learning_rate": 8.826065460146318e-06, - "loss": 0.0579, + "epoch": 2.402309058614565, + "grad_norm": 0.3390281146932373, + "learning_rate": 4.1228547657170025e-06, + "loss": 0.0344, "step": 2705 }, { - "epoch": 1.201865423051299, - "grad_norm": 0.6891291032685649, - "learning_rate": 8.824817187096702e-06, - "loss": 0.056, + "epoch": 2.403197158081705, + "grad_norm": 0.30059656981610866, + "learning_rate": 4.11903949410349e-06, + "loss": 0.0296, "step": 2706 }, { - "epoch": 1.2023095713968466, - "grad_norm": 0.7678244223744226, - "learning_rate": 8.823568339112089e-06, - "loss": 0.0577, + "epoch": 2.4040852575488456, + "grad_norm": 0.38955577602654734, + "learning_rate": 4.115224751791269e-06, + "loss": 0.0509, "step": 2707 }, { - "epoch": 1.2027537197423939, - "grad_norm": 0.605196561971164, - "learning_rate": 8.822318916380207e-06, - "loss": 0.0511, + "epoch": 2.4049733570159857, + "grad_norm": 0.5506825358029819, + "learning_rate": 4.111410541072325e-06, + "loss": 0.0431, "step": 2708 }, { - "epoch": 1.2031978680879414, - "grad_norm": 0.5123811082634186, - "learning_rate": 8.821068919088858e-06, - "loss": 0.0578, + "epoch": 2.405861456483126, + "grad_norm": 0.3805251173156188, + "learning_rate": 4.107596864238325e-06, + "loss": 0.035, "step": 2709 }, { - "epoch": 1.2036420164334887, - "grad_norm": 0.4168805885973522, - "learning_rate": 8.819818347425943e-06, - "loss": 0.0402, + "epoch": 2.4067495559502663, + "grad_norm": 0.42385169806694045, + "learning_rate": 4.103783723580616e-06, + "loss": 0.0493, "step": 2710 }, { - "epoch": 1.2040861647790362, - "grad_norm": 0.4322765343829467, - "learning_rate": 8.818567201579444e-06, - "loss": 0.0404, + "epoch": 2.407637655417407, + "grad_norm": 0.3754169360294611, + "learning_rate": 4.099971121390214e-06, + "loss": 0.0386, "step": 2711 }, { - "epoch": 1.2045303131245837, - "grad_norm": 0.6086948606935972, - "learning_rate": 8.817315481737428e-06, - "loss": 0.0574, + "epoch": 2.408525754884547, + "grad_norm": 0.5621179327428543, + "learning_rate": 4.096159059957822e-06, + "loss": 0.04, "step": 2712 }, { - "epoch": 1.204974461470131, - "grad_norm": 0.5388644202630685, - "learning_rate": 8.816063188088049e-06, - "loss": 0.0526, + "epoch": 2.4094138543516874, + "grad_norm": 0.44413035112259003, + "learning_rate": 4.092347541573814e-06, + "loss": 0.0359, "step": 2713 }, { - "epoch": 1.2054186098156785, - "grad_norm": 0.4833033812042681, - "learning_rate": 8.814810320819551e-06, - "loss": 0.0522, + "epoch": 2.410301953818828, + "grad_norm": 0.6688079934738826, + "learning_rate": 4.088536568528239e-06, + "loss": 0.0336, "step": 2714 }, { - "epoch": 1.2058627581612258, - "grad_norm": 0.9567007955598438, - "learning_rate": 8.81355688012026e-06, - "loss": 0.0926, + "epoch": 2.411190053285968, + "grad_norm": 0.43700648671433046, + "learning_rate": 4.084726143110813e-06, + "loss": 0.0437, "step": 2715 }, { - "epoch": 1.2063069065067733, - "grad_norm": 0.6610578357230338, - "learning_rate": 8.812302866178586e-06, - "loss": 0.0508, + "epoch": 2.412078152753108, + "grad_norm": 0.3094797733618257, + "learning_rate": 4.08091626761093e-06, + "loss": 0.0316, "step": 2716 }, { - "epoch": 1.2067510548523206, - "grad_norm": 0.5170485323761623, - "learning_rate": 8.811048279183034e-06, - "loss": 0.0482, + "epoch": 2.4129662522202486, + "grad_norm": 0.48276689637977, + "learning_rate": 4.077106944317649e-06, + "loss": 0.0456, "step": 2717 }, { - "epoch": 1.207195203197868, - "grad_norm": 0.49170885647162377, - "learning_rate": 8.809793119322188e-06, - "loss": 0.0452, + "epoch": 2.413854351687389, + "grad_norm": 0.447972694388429, + "learning_rate": 4.0732981755197024e-06, + "loss": 0.0336, "step": 2718 }, { - "epoch": 1.2076393515434156, - "grad_norm": 0.6569021320261341, - "learning_rate": 8.808537386784717e-06, - "loss": 0.0602, + "epoch": 2.4147424511545292, + "grad_norm": 0.33650977191476394, + "learning_rate": 4.069489963505482e-06, + "loss": 0.0311, "step": 2719 }, { - "epoch": 1.208083499888963, - "grad_norm": 0.5861436832219081, - "learning_rate": 8.807281081759382e-06, - "loss": 0.0503, + "epoch": 2.4156305506216698, + "grad_norm": 0.4843850307505809, + "learning_rate": 4.065682310563049e-06, + "loss": 0.0434, "step": 2720 }, { - "epoch": 1.2085276482345104, - "grad_norm": 0.6362413163945917, - "learning_rate": 8.806024204435024e-06, - "loss": 0.0619, + "epoch": 2.41651865008881, + "grad_norm": 0.3218363169067914, + "learning_rate": 4.061875218980131e-06, + "loss": 0.0337, "step": 2721 }, { - "epoch": 1.2089717965800577, - "grad_norm": 0.8112427507399114, - "learning_rate": 8.804766755000577e-06, - "loss": 0.0878, + "epoch": 2.4174067495559504, + "grad_norm": 0.38755539137071665, + "learning_rate": 4.058068691044117e-06, + "loss": 0.0356, "step": 2722 }, { - "epoch": 1.2094159449256052, - "grad_norm": 0.6213886119436266, - "learning_rate": 8.803508733645056e-06, - "loss": 0.0391, + "epoch": 2.4182948490230904, + "grad_norm": 0.29684929361619666, + "learning_rate": 4.054262729042052e-06, + "loss": 0.0324, "step": 2723 }, { - "epoch": 1.2098600932711525, - "grad_norm": 0.4701764036600453, - "learning_rate": 8.80225014055756e-06, - "loss": 0.044, + "epoch": 2.419182948490231, + "grad_norm": 0.49865711099936944, + "learning_rate": 4.050457335260648e-06, + "loss": 0.0493, "step": 2724 }, { - "epoch": 1.2103042416167, - "grad_norm": 0.6176542835674168, - "learning_rate": 8.80099097592728e-06, - "loss": 0.0717, + "epoch": 2.420071047957371, + "grad_norm": 0.33211617378906494, + "learning_rate": 4.046652511986273e-06, + "loss": 0.0344, "step": 2725 }, { - "epoch": 1.2107483899622473, - "grad_norm": 0.5676221760627296, - "learning_rate": 8.799731239943488e-06, - "loss": 0.0485, + "epoch": 2.4209591474245116, + "grad_norm": 0.42675436849450327, + "learning_rate": 4.042848261504954e-06, + "loss": 0.0468, "step": 2726 }, { - "epoch": 1.2111925383077948, - "grad_norm": 0.7416139133479585, - "learning_rate": 8.798470932795545e-06, - "loss": 0.0848, + "epoch": 2.4218472468916517, + "grad_norm": 0.5191041505079167, + "learning_rate": 4.039044586102368e-06, + "loss": 0.0453, "step": 2727 }, { - "epoch": 1.2116366866533421, - "grad_norm": 0.8518948114813076, - "learning_rate": 8.797210054672897e-06, - "loss": 0.0603, + "epoch": 2.422735346358792, + "grad_norm": 0.34489502732281585, + "learning_rate": 4.0352414880638515e-06, + "loss": 0.0367, "step": 2728 }, { - "epoch": 1.2120808349988896, - "grad_norm": 0.5447845702100781, - "learning_rate": 8.795948605765071e-06, - "loss": 0.0503, + "epoch": 2.4236234458259327, + "grad_norm": 0.36686567825122324, + "learning_rate": 4.031438969674395e-06, + "loss": 0.0345, "step": 2729 }, { - "epoch": 1.2125249833444371, - "grad_norm": 0.6041323757362492, - "learning_rate": 8.794686586261692e-06, - "loss": 0.0606, + "epoch": 2.424511545293073, + "grad_norm": 0.6001448010579975, + "learning_rate": 4.027637033218638e-06, + "loss": 0.0633, "step": 2730 }, { - "epoch": 1.2129691316899844, - "grad_norm": 0.3720241663572847, - "learning_rate": 8.793423996352458e-06, - "loss": 0.0396, + "epoch": 2.4253996447602133, + "grad_norm": 0.5891148472698688, + "learning_rate": 4.023835680980871e-06, + "loss": 0.0454, "step": 2731 }, { - "epoch": 1.213413280035532, - "grad_norm": 0.6614032276078902, - "learning_rate": 8.792160836227156e-06, - "loss": 0.0778, + "epoch": 2.4262877442273534, + "grad_norm": 0.36258957798737657, + "learning_rate": 4.020034915245033e-06, + "loss": 0.0382, "step": 2732 }, { - "epoch": 1.2138574283810792, - "grad_norm": 0.4371895131814461, - "learning_rate": 8.790897106075665e-06, - "loss": 0.0464, + "epoch": 2.427175843694494, + "grad_norm": 0.33420820115356176, + "learning_rate": 4.016234738294712e-06, + "loss": 0.0345, "step": 2733 }, { - "epoch": 1.2143015767266268, - "grad_norm": 0.42112583525881137, - "learning_rate": 8.78963280608794e-06, - "loss": 0.0283, + "epoch": 2.428063943161634, + "grad_norm": 0.3920069199046036, + "learning_rate": 4.012435152413142e-06, + "loss": 0.0379, "step": 2734 }, { - "epoch": 1.214745725072174, - "grad_norm": 0.8563223231466682, - "learning_rate": 8.788367936454033e-06, - "loss": 0.0588, + "epoch": 2.4289520426287745, + "grad_norm": 0.41356769396612303, + "learning_rate": 4.008636159883202e-06, + "loss": 0.0352, "step": 2735 }, { - "epoch": 1.2151898734177216, - "grad_norm": 0.5558136427910825, - "learning_rate": 8.78710249736407e-06, - "loss": 0.0438, + "epoch": 2.4298401420959146, + "grad_norm": 0.4455515310459408, + "learning_rate": 4.00483776298741e-06, + "loss": 0.0399, "step": 2736 }, { - "epoch": 1.2156340217632688, - "grad_norm": 0.5340710064178852, - "learning_rate": 8.78583648900827e-06, - "loss": 0.0517, + "epoch": 2.430728241563055, + "grad_norm": 0.44120035915825245, + "learning_rate": 4.001039964007934e-06, + "loss": 0.0489, "step": 2737 }, { - "epoch": 1.2160781701088164, - "grad_norm": 0.4855046400709726, - "learning_rate": 8.784569911576937e-06, - "loss": 0.0429, + "epoch": 2.431616341030195, + "grad_norm": 0.5120214650549834, + "learning_rate": 3.99724276522658e-06, + "loss": 0.039, "step": 2738 }, { - "epoch": 1.2165223184543636, - "grad_norm": 0.5862019061765111, - "learning_rate": 8.783302765260456e-06, - "loss": 0.0549, + "epoch": 2.4325044404973357, + "grad_norm": 0.38981652065605005, + "learning_rate": 3.9934461689247875e-06, + "loss": 0.0421, "step": 2739 }, { - "epoch": 1.2169664667999112, - "grad_norm": 0.754950173363004, - "learning_rate": 8.782035050249302e-06, - "loss": 0.051, + "epoch": 2.4333925399644762, + "grad_norm": 0.568470672972159, + "learning_rate": 3.989650177383641e-06, + "loss": 0.0407, "step": 2740 }, { - "epoch": 1.2174106151454587, - "grad_norm": 0.5310823633824483, - "learning_rate": 8.780766766734037e-06, - "loss": 0.0535, + "epoch": 2.4342806394316163, + "grad_norm": 0.40355470611435923, + "learning_rate": 3.98585479288386e-06, + "loss": 0.0356, "step": 2741 }, { - "epoch": 1.217854763491006, - "grad_norm": 0.515619945566938, - "learning_rate": 8.779497914905302e-06, - "loss": 0.0518, + "epoch": 2.435168738898757, + "grad_norm": 0.4116171241446696, + "learning_rate": 3.982060017705798e-06, + "loss": 0.0382, "step": 2742 }, { - "epoch": 1.2182989118365535, - "grad_norm": 0.5419625287890412, - "learning_rate": 8.778228494953826e-06, - "loss": 0.0629, + "epoch": 2.436056838365897, + "grad_norm": 0.40057688779101597, + "learning_rate": 3.978265854129443e-06, + "loss": 0.0328, "step": 2743 }, { - "epoch": 1.2187430601821008, - "grad_norm": 0.41887351869316514, - "learning_rate": 8.776958507070427e-06, - "loss": 0.0436, + "epoch": 2.4369449378330375, + "grad_norm": 0.3094163402024519, + "learning_rate": 3.974472304434415e-06, + "loss": 0.0313, "step": 2744 }, { - "epoch": 1.2191872085276483, - "grad_norm": 0.5475369085076727, - "learning_rate": 8.775687951446007e-06, - "loss": 0.043, + "epoch": 2.4378330373001775, + "grad_norm": 0.4049276533515741, + "learning_rate": 3.970679370899968e-06, + "loss": 0.036, "step": 2745 }, { - "epoch": 1.2196313568731956, - "grad_norm": 0.6765114475250111, - "learning_rate": 8.774416828271548e-06, - "loss": 0.0542, + "epoch": 2.438721136767318, + "grad_norm": 0.47667367548802525, + "learning_rate": 3.966887055804982e-06, + "loss": 0.0476, "step": 2746 }, { - "epoch": 1.220075505218743, - "grad_norm": 0.678214320662665, - "learning_rate": 8.773145137738125e-06, - "loss": 0.0815, + "epoch": 2.439609236234458, + "grad_norm": 0.3766780299327582, + "learning_rate": 3.963095361427966e-06, + "loss": 0.0325, "step": 2747 }, { - "epoch": 1.2205196535642906, - "grad_norm": 0.49324022002296203, - "learning_rate": 8.771872880036893e-06, - "loss": 0.0524, + "epoch": 2.4404973357015987, + "grad_norm": 0.4859547568778557, + "learning_rate": 3.959304290047057e-06, + "loss": 0.0363, "step": 2748 }, { - "epoch": 1.2209638019098379, - "grad_norm": 0.5958708788497837, - "learning_rate": 8.770600055359094e-06, - "loss": 0.0456, + "epoch": 2.4413854351687387, + "grad_norm": 0.4869308975398042, + "learning_rate": 3.9555138439400185e-06, + "loss": 0.0428, "step": 2749 }, { - "epoch": 1.2214079502553852, - "grad_norm": 0.4999935592497019, - "learning_rate": 8.769326663896056e-06, - "loss": 0.0409, + "epoch": 2.4422735346358793, + "grad_norm": 0.30245289236148404, + "learning_rate": 3.95172402538424e-06, + "loss": 0.0309, "step": 2750 }, { - "epoch": 1.2218520986009327, - "grad_norm": 0.43642259806336103, - "learning_rate": 8.76805270583919e-06, - "loss": 0.05, + "epoch": 2.44316163410302, + "grad_norm": 0.5199088586403358, + "learning_rate": 3.947934836656728e-06, + "loss": 0.0422, "step": 2751 }, { - "epoch": 1.2222962469464802, - "grad_norm": 0.6350373365457326, - "learning_rate": 8.766778181379993e-06, - "loss": 0.046, + "epoch": 2.44404973357016, + "grad_norm": 0.5265924905344541, + "learning_rate": 3.944146280034114e-06, + "loss": 0.0486, "step": 2752 }, { - "epoch": 1.2227403952920275, - "grad_norm": 0.49370518082751286, - "learning_rate": 8.765503090710052e-06, - "loss": 0.0504, + "epoch": 2.4449378330373, + "grad_norm": 0.4834636258748378, + "learning_rate": 3.940358357792651e-06, + "loss": 0.0568, "step": 2753 }, { - "epoch": 1.223184543637575, - "grad_norm": 0.6617812231173426, - "learning_rate": 8.76422743402103e-06, - "loss": 0.0524, + "epoch": 2.4458259325044405, + "grad_norm": 0.44039544087123245, + "learning_rate": 3.9365710722082115e-06, + "loss": 0.0406, "step": 2754 }, { - "epoch": 1.2236286919831223, - "grad_norm": 0.46946114196024546, - "learning_rate": 8.762951211504682e-06, - "loss": 0.0459, + "epoch": 2.446714031971581, + "grad_norm": 0.6374575947435365, + "learning_rate": 3.93278442555628e-06, + "loss": 0.0487, "step": 2755 }, { - "epoch": 1.2240728403286698, - "grad_norm": 0.49081430568764717, - "learning_rate": 8.761674423352844e-06, - "loss": 0.0438, + "epoch": 2.447602131438721, + "grad_norm": 0.3804706890254963, + "learning_rate": 3.928998420111962e-06, + "loss": 0.0357, "step": 2756 }, { - "epoch": 1.224516988674217, - "grad_norm": 0.7508698890744291, - "learning_rate": 8.760397069757443e-06, - "loss": 0.0535, + "epoch": 2.4484902309058616, + "grad_norm": 0.4251012836402806, + "learning_rate": 3.925213058149978e-06, + "loss": 0.0367, "step": 2757 }, { - "epoch": 1.2249611370197646, - "grad_norm": 0.7095498367244547, - "learning_rate": 8.759119150910482e-06, - "loss": 0.0439, + "epoch": 2.4493783303730017, + "grad_norm": 0.3798698706885474, + "learning_rate": 3.92142834194466e-06, + "loss": 0.0476, "step": 2758 }, { - "epoch": 1.2254052853653121, - "grad_norm": 0.5423777599181153, - "learning_rate": 8.757840667004059e-06, - "loss": 0.0584, + "epoch": 2.450266429840142, + "grad_norm": 0.37773651740704356, + "learning_rate": 3.917644273769951e-06, + "loss": 0.0333, "step": 2759 }, { - "epoch": 1.2258494337108594, - "grad_norm": 0.38826805829970257, - "learning_rate": 8.756561618230348e-06, - "loss": 0.0372, + "epoch": 2.4511545293072823, + "grad_norm": 0.4482599796707847, + "learning_rate": 3.913860855899406e-06, + "loss": 0.0365, "step": 2760 }, { - "epoch": 1.226293582056407, - "grad_norm": 0.4214243171446597, - "learning_rate": 8.755282004781613e-06, - "loss": 0.0387, + "epoch": 2.452042628774423, + "grad_norm": 0.2763609593786991, + "learning_rate": 3.91007809060619e-06, + "loss": 0.0267, "step": 2761 }, { - "epoch": 1.2267377304019542, - "grad_norm": 0.6110973962790421, - "learning_rate": 8.754001826850201e-06, - "loss": 0.0504, + "epoch": 2.452930728241563, + "grad_norm": 0.4978321758468063, + "learning_rate": 3.906295980163074e-06, + "loss": 0.0416, "step": 2762 }, { - "epoch": 1.2271818787475017, - "grad_norm": 0.5706371024485902, - "learning_rate": 8.752721084628545e-06, - "loss": 0.0447, + "epoch": 2.4538188277087034, + "grad_norm": 0.31397778808375126, + "learning_rate": 3.902514526842441e-06, + "loss": 0.033, "step": 2763 }, { - "epoch": 1.227626027093049, - "grad_norm": 0.7667925876568604, - "learning_rate": 8.751439778309162e-06, - "loss": 0.0661, + "epoch": 2.4547069271758435, + "grad_norm": 0.3851362092370547, + "learning_rate": 3.89873373291627e-06, + "loss": 0.0401, "step": 2764 }, { - "epoch": 1.2280701754385965, - "grad_norm": 0.4947295202945595, - "learning_rate": 8.750157908084655e-06, - "loss": 0.0481, + "epoch": 2.455595026642984, + "grad_norm": 0.438629903479192, + "learning_rate": 3.894953600656149e-06, + "loss": 0.037, "step": 2765 }, { - "epoch": 1.2285143237841438, - "grad_norm": 0.4844603657944363, - "learning_rate": 8.74887547414771e-06, - "loss": 0.0428, + "epoch": 2.4564831261101245, + "grad_norm": 0.46741530520208524, + "learning_rate": 3.891174132333272e-06, + "loss": 0.0438, "step": 2766 }, { - "epoch": 1.2289584721296913, - "grad_norm": 0.5262436629587928, - "learning_rate": 8.747592476691102e-06, - "loss": 0.0572, + "epoch": 2.4573712255772646, + "grad_norm": 0.3797562200330637, + "learning_rate": 3.887395330218429e-06, + "loss": 0.0342, "step": 2767 }, { - "epoch": 1.2294026204752386, - "grad_norm": 0.48653491041855407, - "learning_rate": 8.746308915907681e-06, - "loss": 0.0466, + "epoch": 2.458259325044405, + "grad_norm": 0.3857543496406655, + "learning_rate": 3.883617196582009e-06, + "loss": 0.0424, "step": 2768 }, { - "epoch": 1.2298467688207861, - "grad_norm": 0.3598264709368313, - "learning_rate": 8.745024791990392e-06, - "loss": 0.036, + "epoch": 2.459147424511545, + "grad_norm": 0.4812522645092898, + "learning_rate": 3.879839733694002e-06, + "loss": 0.0532, "step": 2769 }, { - "epoch": 1.2302909171663337, - "grad_norm": 0.4281231861639907, - "learning_rate": 8.74374010513226e-06, - "loss": 0.0375, + "epoch": 2.4600355239786857, + "grad_norm": 0.3485268002136728, + "learning_rate": 3.876062943823996e-06, + "loss": 0.0347, "step": 2770 }, { - "epoch": 1.230735065511881, - "grad_norm": 0.6227157372551906, - "learning_rate": 8.742454855526396e-06, - "loss": 0.06, + "epoch": 2.460923623445826, + "grad_norm": 0.4411614487930039, + "learning_rate": 3.872286829241173e-06, + "loss": 0.0421, "step": 2771 }, { - "epoch": 1.2311792138574285, - "grad_norm": 0.5721086967364898, - "learning_rate": 8.741169043365994e-06, - "loss": 0.0517, + "epoch": 2.4618117229129663, + "grad_norm": 0.48810326046545194, + "learning_rate": 3.868511392214307e-06, + "loss": 0.0422, "step": 2772 }, { - "epoch": 1.2316233622029757, - "grad_norm": 0.4118993047204808, - "learning_rate": 8.739882668844332e-06, - "loss": 0.0433, + "epoch": 2.4626998223801064, + "grad_norm": 0.37294810337952194, + "learning_rate": 3.864736635011769e-06, + "loss": 0.037, "step": 2773 }, { - "epoch": 1.2320675105485233, - "grad_norm": 0.7343040841582446, - "learning_rate": 8.738595732154776e-06, - "loss": 0.0525, + "epoch": 2.463587921847247, + "grad_norm": 0.3728646324306839, + "learning_rate": 3.8609625599015185e-06, + "loss": 0.0366, "step": 2774 }, { - "epoch": 1.2325116588940705, - "grad_norm": 0.4304569080551511, - "learning_rate": 8.737308233490775e-06, - "loss": 0.0548, + "epoch": 2.464476021314387, + "grad_norm": 0.38711426996452747, + "learning_rate": 3.85718916915111e-06, + "loss": 0.0444, "step": 2775 }, { - "epoch": 1.232955807239618, - "grad_norm": 0.5483425130829543, - "learning_rate": 8.736020173045858e-06, - "loss": 0.0547, + "epoch": 2.4653641207815276, + "grad_norm": 0.5261969291466544, + "learning_rate": 3.853416465027679e-06, + "loss": 0.0416, "step": 2776 }, { - "epoch": 1.2333999555851654, - "grad_norm": 0.45127669817592087, - "learning_rate": 8.734731551013648e-06, - "loss": 0.0458, + "epoch": 2.466252220248668, + "grad_norm": 0.2970242562064065, + "learning_rate": 3.849644449797953e-06, + "loss": 0.0361, "step": 2777 }, { - "epoch": 1.2338441039307129, - "grad_norm": 0.5076179219488122, - "learning_rate": 8.733442367587842e-06, - "loss": 0.0495, + "epoch": 2.467140319715808, + "grad_norm": 0.32613001679980447, + "learning_rate": 3.845873125728248e-06, + "loss": 0.0371, "step": 2778 }, { - "epoch": 1.2342882522762602, - "grad_norm": 0.4061357633325624, - "learning_rate": 8.732152622962229e-06, - "loss": 0.038, + "epoch": 2.4680284191829482, + "grad_norm": 0.42788023619013316, + "learning_rate": 3.842102495084463e-06, + "loss": 0.0343, "step": 2779 }, { - "epoch": 1.2347324006218077, - "grad_norm": 0.6061025558148905, - "learning_rate": 8.730862317330678e-06, - "loss": 0.0552, + "epoch": 2.4689165186500888, + "grad_norm": 0.4399667867015698, + "learning_rate": 3.838332560132073e-06, + "loss": 0.0401, "step": 2780 }, { - "epoch": 1.2351765489673552, - "grad_norm": 0.7935186621762401, - "learning_rate": 8.729571450887145e-06, - "loss": 0.0497, + "epoch": 2.4698046181172293, + "grad_norm": 0.7905770446460482, + "learning_rate": 3.834563323136148e-06, + "loss": 0.0412, "step": 2781 }, { - "epoch": 1.2356206973129025, - "grad_norm": 0.4698985875136965, - "learning_rate": 8.728280023825667e-06, - "loss": 0.0449, + "epoch": 2.4706927175843694, + "grad_norm": 0.3454084982147548, + "learning_rate": 3.83079478636133e-06, + "loss": 0.0335, "step": 2782 }, { - "epoch": 1.23606484565845, - "grad_norm": 0.5845370985288897, - "learning_rate": 8.726988036340372e-06, - "loss": 0.0558, + "epoch": 2.47158081705151, + "grad_norm": 0.41736050082198234, + "learning_rate": 3.827026952071843e-06, + "loss": 0.0398, "step": 2783 }, { - "epoch": 1.2365089940039973, - "grad_norm": 0.40081844727764104, - "learning_rate": 8.725695488625463e-06, - "loss": 0.0309, + "epoch": 2.47246891651865, + "grad_norm": 0.414440722711596, + "learning_rate": 3.823259822531486e-06, + "loss": 0.039, "step": 2784 }, { - "epoch": 1.2369531423495448, - "grad_norm": 0.565816281763518, - "learning_rate": 8.724402380875234e-06, - "loss": 0.0527, + "epoch": 2.4733570159857905, + "grad_norm": 0.4383871393630624, + "learning_rate": 3.819493400003639e-06, + "loss": 0.0497, "step": 2785 }, { - "epoch": 1.237397290695092, - "grad_norm": 0.4908483433280354, - "learning_rate": 8.72310871328406e-06, - "loss": 0.0406, + "epoch": 2.4742451154529306, + "grad_norm": 0.38402241311475643, + "learning_rate": 3.8157276867512534e-06, + "loss": 0.0444, "step": 2786 }, { - "epoch": 1.2378414390406396, - "grad_norm": 0.5334199926157142, - "learning_rate": 8.7218144860464e-06, - "loss": 0.0432, + "epoch": 2.475133214920071, + "grad_norm": 0.3591487235416993, + "learning_rate": 3.8119626850368596e-06, + "loss": 0.0336, "step": 2787 }, { - "epoch": 1.238285587386187, - "grad_norm": 0.43486952273189144, - "learning_rate": 8.720519699356804e-06, - "loss": 0.0449, + "epoch": 2.476021314387211, + "grad_norm": 0.4649138044371827, + "learning_rate": 3.808198397122553e-06, + "loss": 0.0411, "step": 2788 }, { - "epoch": 1.2387297357317344, - "grad_norm": 0.5049916355349909, - "learning_rate": 8.719224353409895e-06, - "loss": 0.056, + "epoch": 2.4769094138543517, + "grad_norm": 0.30891025330418503, + "learning_rate": 3.804434825270007e-06, + "loss": 0.029, "step": 2789 }, { - "epoch": 1.239173884077282, - "grad_norm": 0.44547159070564635, - "learning_rate": 8.717928448400387e-06, - "loss": 0.0444, + "epoch": 2.477797513321492, + "grad_norm": 0.4057066028505945, + "learning_rate": 3.800671971740459e-06, + "loss": 0.0373, "step": 2790 }, { - "epoch": 1.2396180324228292, - "grad_norm": 0.4328408997299911, - "learning_rate": 8.716631984523076e-06, - "loss": 0.0461, + "epoch": 2.4786856127886323, + "grad_norm": 0.4485134118853294, + "learning_rate": 3.7969098387947227e-06, + "loss": 0.046, "step": 2791 }, { - "epoch": 1.2400621807683767, - "grad_norm": 0.42920188220498545, - "learning_rate": 8.715334961972844e-06, - "loss": 0.0477, + "epoch": 2.479573712255773, + "grad_norm": 0.3741271494789935, + "learning_rate": 3.793148428693169e-06, + "loss": 0.0373, "step": 2792 }, { - "epoch": 1.240506329113924, - "grad_norm": 0.45226945207712443, - "learning_rate": 8.714037380944655e-06, - "loss": 0.0445, + "epoch": 2.480461811722913, + "grad_norm": 0.3351458516011296, + "learning_rate": 3.7893877436957404e-06, + "loss": 0.0402, "step": 2793 }, { - "epoch": 1.2409504774594715, - "grad_norm": 0.6142043461351961, - "learning_rate": 8.712739241633557e-06, - "loss": 0.0526, + "epoch": 2.4813499111900534, + "grad_norm": 0.3170907636503459, + "learning_rate": 3.7856277860619444e-06, + "loss": 0.0352, "step": 2794 }, { - "epoch": 1.2413946258050188, - "grad_norm": 0.49524623051926364, - "learning_rate": 8.711440544234681e-06, - "loss": 0.0448, + "epoch": 2.4822380106571935, + "grad_norm": 0.3753494565551761, + "learning_rate": 3.7818685580508514e-06, + "loss": 0.0302, "step": 2795 }, { - "epoch": 1.2418387741505663, - "grad_norm": 0.527589137144962, - "learning_rate": 8.710141288943247e-06, - "loss": 0.0681, + "epoch": 2.483126110124334, + "grad_norm": 0.4120030499457759, + "learning_rate": 3.7781100619210863e-06, + "loss": 0.0515, "step": 2796 }, { - "epoch": 1.2422829224961136, - "grad_norm": 0.45949156992557444, - "learning_rate": 8.708841475954551e-06, - "loss": 0.0597, + "epoch": 2.484014209591474, + "grad_norm": 0.3368195602403163, + "learning_rate": 3.7743522999308444e-06, + "loss": 0.034, "step": 2797 }, { - "epoch": 1.2427270708416611, - "grad_norm": 0.5807053135952894, - "learning_rate": 8.707541105463982e-06, - "loss": 0.0602, + "epoch": 2.4849023090586146, + "grad_norm": 0.41438216257087224, + "learning_rate": 3.770595274337874e-06, + "loss": 0.0334, "step": 2798 }, { - "epoch": 1.2431712191872086, - "grad_norm": 0.47202242550842133, - "learning_rate": 8.706240177667003e-06, - "loss": 0.043, + "epoch": 2.4857904085257547, + "grad_norm": 0.33862474385802527, + "learning_rate": 3.7668389873994838e-06, + "loss": 0.0321, "step": 2799 }, { - "epoch": 1.243615367532756, - "grad_norm": 0.5195621264387825, - "learning_rate": 8.704938692759166e-06, - "loss": 0.0384, + "epoch": 2.4866785079928952, + "grad_norm": 0.3582555091253264, + "learning_rate": 3.763083441372535e-06, + "loss": 0.0349, "step": 2800 }, { - "epoch": 1.2440595158783034, - "grad_norm": 0.40308041507113895, - "learning_rate": 8.703636650936108e-06, - "loss": 0.0352, + "epoch": 2.4875666074600353, + "grad_norm": 0.40475396015514215, + "learning_rate": 3.7593286385134465e-06, + "loss": 0.0494, "step": 2801 }, { - "epoch": 1.2445036642238507, - "grad_norm": 0.5663764923556683, - "learning_rate": 8.70233405239355e-06, - "loss": 0.0487, + "epoch": 2.488454706927176, + "grad_norm": 0.38091393825789654, + "learning_rate": 3.7555745810781897e-06, + "loss": 0.0344, "step": 2802 }, { - "epoch": 1.2449478125693982, - "grad_norm": 0.6290221419877656, - "learning_rate": 8.70103089732729e-06, - "loss": 0.0518, + "epoch": 2.4893428063943164, + "grad_norm": 0.339786667559056, + "learning_rate": 3.7518212713222905e-06, + "loss": 0.0345, "step": 2803 }, { - "epoch": 1.2453919609149455, - "grad_norm": 0.4173665256358516, - "learning_rate": 8.699727185933215e-06, - "loss": 0.0352, + "epoch": 2.4902309058614565, + "grad_norm": 0.4273878372700921, + "learning_rate": 3.7480687115008208e-06, + "loss": 0.0443, "step": 2804 }, { - "epoch": 1.245836109260493, - "grad_norm": 0.8204810767617772, - "learning_rate": 8.698422918407299e-06, - "loss": 0.0487, + "epoch": 2.491119005328597, + "grad_norm": 0.500913255920978, + "learning_rate": 3.744316903868407e-06, + "loss": 0.0458, "step": 2805 }, { - "epoch": 1.2462802576060403, - "grad_norm": 0.9850006429206544, - "learning_rate": 8.697118094945593e-06, - "loss": 0.0865, + "epoch": 2.492007104795737, + "grad_norm": 0.321788951593254, + "learning_rate": 3.7405658506792173e-06, + "loss": 0.0318, "step": 2806 }, { - "epoch": 1.2467244059515878, - "grad_norm": 0.48735159482648766, - "learning_rate": 8.695812715744235e-06, - "loss": 0.0358, + "epoch": 2.4928952042628776, + "grad_norm": 0.3168269059365531, + "learning_rate": 3.736815554186978e-06, + "loss": 0.0343, "step": 2807 }, { - "epoch": 1.2471685542971351, - "grad_norm": 0.5010917630261117, - "learning_rate": 8.694506780999444e-06, - "loss": 0.0477, + "epoch": 2.4937833037300177, + "grad_norm": 0.34346931724268553, + "learning_rate": 3.7330660166449456e-06, + "loss": 0.0379, "step": 2808 }, { - "epoch": 1.2476127026426826, - "grad_norm": 0.4869736837475569, - "learning_rate": 8.693200290907525e-06, - "loss": 0.0484, + "epoch": 2.494671403197158, + "grad_norm": 0.3804179544568355, + "learning_rate": 3.729317240305932e-06, + "loss": 0.0374, "step": 2809 }, { - "epoch": 1.2480568509882302, - "grad_norm": 0.3824351180128345, - "learning_rate": 8.691893245664867e-06, - "loss": 0.0352, + "epoch": 2.4955595026642983, + "grad_norm": 0.3949637629728905, + "learning_rate": 3.7255692274222877e-06, + "loss": 0.0347, "step": 2810 }, { - "epoch": 1.2485009993337775, - "grad_norm": 0.5417070708808928, - "learning_rate": 8.690585645467937e-06, - "loss": 0.0447, + "epoch": 2.496447602131439, + "grad_norm": 0.3392338453183855, + "learning_rate": 3.7218219802459066e-06, + "loss": 0.0364, "step": 2811 }, { - "epoch": 1.248945147679325, - "grad_norm": 0.5086804337705053, - "learning_rate": 8.689277490513295e-06, - "loss": 0.05, + "epoch": 2.497335701598579, + "grad_norm": 0.3686146599576966, + "learning_rate": 3.7180755010282168e-06, + "loss": 0.0441, "step": 2812 }, { - "epoch": 1.2493892960248723, - "grad_norm": 0.4431550852814911, - "learning_rate": 8.687968780997576e-06, - "loss": 0.0635, + "epoch": 2.4982238010657194, + "grad_norm": 0.34774061639894743, + "learning_rate": 3.7143297920201914e-06, + "loss": 0.0366, "step": 2813 }, { - "epoch": 1.2498334443704198, - "grad_norm": 0.3783726536150246, - "learning_rate": 8.686659517117501e-06, - "loss": 0.0384, + "epoch": 2.49911190053286, + "grad_norm": 0.49083038717662253, + "learning_rate": 3.710584855472339e-06, + "loss": 0.0393, "step": 2814 }, { - "epoch": 1.250277592715967, - "grad_norm": 0.5331217000478606, - "learning_rate": 8.685349699069875e-06, - "loss": 0.0409, + "epoch": 2.5, + "grad_norm": 0.3572366891807521, + "learning_rate": 3.706840693634703e-06, + "loss": 0.0367, "step": 2815 }, { - "epoch": 1.2507217410615146, - "grad_norm": 0.38074858661235217, - "learning_rate": 8.684039327051586e-06, - "loss": 0.038, + "epoch": 2.50088809946714, + "grad_norm": 0.4455845738306417, + "learning_rate": 3.70309730875686e-06, + "loss": 0.0461, "step": 2816 }, { - "epoch": 1.251165889407062, - "grad_norm": 0.6121662418691846, - "learning_rate": 8.682728401259606e-06, - "loss": 0.0556, + "epoch": 2.5017761989342806, + "grad_norm": 0.5198819104211502, + "learning_rate": 3.699354703087923e-06, + "loss": 0.0376, "step": 2817 }, { - "epoch": 1.2516100377526094, - "grad_norm": 0.8655438293508221, - "learning_rate": 8.681416921890988e-06, - "loss": 0.0559, + "epoch": 2.502664298401421, + "grad_norm": 0.4422211669001438, + "learning_rate": 3.6956128788765344e-06, + "loss": 0.0403, "step": 2818 }, { - "epoch": 1.2520541860981567, - "grad_norm": 0.5646419752199546, - "learning_rate": 8.680104889142871e-06, - "loss": 0.0546, + "epoch": 2.503552397868561, + "grad_norm": 0.46793559864748135, + "learning_rate": 3.6918718383708724e-06, + "loss": 0.0417, "step": 2819 }, { - "epoch": 1.2524983344437042, - "grad_norm": 0.47189997209382956, - "learning_rate": 8.678792303212474e-06, - "loss": 0.038, + "epoch": 2.5044404973357017, + "grad_norm": 0.34059852627396947, + "learning_rate": 3.6881315838186343e-06, + "loss": 0.0338, "step": 2820 }, { - "epoch": 1.2529424827892517, - "grad_norm": 0.5347473900252707, - "learning_rate": 8.677479164297102e-06, - "loss": 0.0506, + "epoch": 2.505328596802842, + "grad_norm": 0.4051312569334247, + "learning_rate": 3.684392117467052e-06, + "loss": 0.0388, "step": 2821 }, { - "epoch": 1.253386631134799, - "grad_norm": 0.6752833223029766, - "learning_rate": 8.676165472594145e-06, - "loss": 0.0553, + "epoch": 2.5062166962699823, + "grad_norm": 0.42607681600785546, + "learning_rate": 3.680653441562885e-06, + "loss": 0.0377, "step": 2822 }, { - "epoch": 1.2538307794803465, - "grad_norm": 0.6290436949126601, - "learning_rate": 8.674851228301066e-06, - "loss": 0.0628, + "epoch": 2.5071047957371224, + "grad_norm": 0.355778634899678, + "learning_rate": 3.6769155583524146e-06, + "loss": 0.0322, "step": 2823 }, { - "epoch": 1.2542749278258938, - "grad_norm": 0.5190239487706246, - "learning_rate": 8.673536431615426e-06, - "loss": 0.053, + "epoch": 2.507992895204263, + "grad_norm": 0.3925274860510811, + "learning_rate": 3.6731784700814476e-06, + "loss": 0.0345, "step": 2824 }, { - "epoch": 1.2547190761714413, - "grad_norm": 0.5039759155119382, - "learning_rate": 8.672221082734857e-06, - "loss": 0.0538, + "epoch": 2.5088809946714035, + "grad_norm": 0.3848164029146997, + "learning_rate": 3.6694421789953106e-06, + "loss": 0.0403, "step": 2825 }, { - "epoch": 1.2551632245169886, - "grad_norm": 0.5007524426018746, - "learning_rate": 8.670905181857078e-06, - "loss": 0.0379, + "epoch": 2.5097690941385435, + "grad_norm": 0.3576375902415348, + "learning_rate": 3.6657066873388535e-06, + "loss": 0.0396, "step": 2826 }, { - "epoch": 1.255607372862536, - "grad_norm": 0.4471624712255033, - "learning_rate": 8.669588729179895e-06, - "loss": 0.0425, + "epoch": 2.5106571936056836, + "grad_norm": 0.40225398181238375, + "learning_rate": 3.6619719973564474e-06, + "loss": 0.0464, "step": 2827 }, { - "epoch": 1.2560515212080836, - "grad_norm": 0.6483988178781206, - "learning_rate": 8.668271724901188e-06, - "loss": 0.0667, + "epoch": 2.511545293072824, + "grad_norm": 0.43150829076536945, + "learning_rate": 3.658238111291977e-06, + "loss": 0.0485, "step": 2828 }, { - "epoch": 1.256495669553631, - "grad_norm": 0.7074590212609329, - "learning_rate": 8.666954169218929e-06, - "loss": 0.0644, + "epoch": 2.5124333925399647, + "grad_norm": 0.5158332048639124, + "learning_rate": 3.654505031388849e-06, + "loss": 0.0435, "step": 2829 }, { - "epoch": 1.2569398178991782, - "grad_norm": 0.4799067719368653, - "learning_rate": 8.665636062331166e-06, - "loss": 0.0516, + "epoch": 2.5133214920071048, + "grad_norm": 0.5704487254885319, + "learning_rate": 3.6507727598899824e-06, + "loss": 0.0308, "step": 2830 }, { - "epoch": 1.2573839662447257, - "grad_norm": 0.4519642992082907, - "learning_rate": 8.664317404436036e-06, - "loss": 0.0388, + "epoch": 2.5142095914742453, + "grad_norm": 0.37261650251003003, + "learning_rate": 3.6470412990378094e-06, + "loss": 0.0401, "step": 2831 }, { - "epoch": 1.2578281145902732, - "grad_norm": 1.9317339882104076, - "learning_rate": 8.662998195731755e-06, - "loss": 0.0445, + "epoch": 2.5150976909413854, + "grad_norm": 0.4524888623403166, + "learning_rate": 3.643310651074283e-06, + "loss": 0.0365, "step": 2832 }, { - "epoch": 1.2582722629358205, - "grad_norm": 0.9801917673057263, - "learning_rate": 8.661678436416621e-06, - "loss": 0.0609, + "epoch": 2.515985790408526, + "grad_norm": 0.36724890281451805, + "learning_rate": 3.6395808182408576e-06, + "loss": 0.0335, "step": 2833 }, { - "epoch": 1.258716411281368, - "grad_norm": 0.4629508087617577, - "learning_rate": 8.660358126689015e-06, - "loss": 0.046, + "epoch": 2.516873889875666, + "grad_norm": 0.3470388730002607, + "learning_rate": 3.635851802778502e-06, + "loss": 0.0312, "step": 2834 }, { - "epoch": 1.2591605596269153, - "grad_norm": 0.55871143511484, - "learning_rate": 8.659037266747405e-06, - "loss": 0.0669, + "epoch": 2.5177619893428065, + "grad_norm": 0.36925857817379404, + "learning_rate": 3.6321236069276974e-06, + "loss": 0.041, "step": 2835 }, { - "epoch": 1.2596047079724628, - "grad_norm": 0.4373848923476879, - "learning_rate": 8.65771585679034e-06, - "loss": 0.0365, + "epoch": 2.5186500888099466, + "grad_norm": 0.42091887871820133, + "learning_rate": 3.6283962329284296e-06, + "loss": 0.0385, "step": 2836 }, { - "epoch": 1.2600488563180101, - "grad_norm": 0.4620726535649706, - "learning_rate": 8.656393897016446e-06, - "loss": 0.0508, + "epoch": 2.519538188277087, + "grad_norm": 0.39060575848952295, + "learning_rate": 3.6246696830201857e-06, + "loss": 0.0346, "step": 2837 }, { - "epoch": 1.2604930046635576, - "grad_norm": 0.7076034863902437, - "learning_rate": 8.655071387624439e-06, - "loss": 0.066, + "epoch": 2.520426287744227, + "grad_norm": 0.41001251654868726, + "learning_rate": 3.620943959441966e-06, + "loss": 0.0407, "step": 2838 }, { - "epoch": 1.2609371530091051, - "grad_norm": 0.7170929705949399, - "learning_rate": 8.653748328813112e-06, - "loss": 0.0663, + "epoch": 2.5213143872113677, + "grad_norm": 0.6384879319000327, + "learning_rate": 3.61721906443227e-06, + "loss": 0.0399, "step": 2839 }, { - "epoch": 1.2613813013546524, - "grad_norm": 0.5278820360047365, - "learning_rate": 8.652424720781346e-06, - "loss": 0.0703, + "epoch": 2.522202486678508, + "grad_norm": 0.6358890048725068, + "learning_rate": 3.6134950002291024e-06, + "loss": 0.0458, "step": 2840 }, { - "epoch": 1.2618254497002, - "grad_norm": 0.6581330490442558, - "learning_rate": 8.6511005637281e-06, - "loss": 0.0475, + "epoch": 2.5230905861456483, + "grad_norm": 0.41654454959931, + "learning_rate": 3.609771769069963e-06, + "loss": 0.0484, "step": 2841 }, { - "epoch": 1.2622695980457472, - "grad_norm": 0.5785660781607422, - "learning_rate": 8.649775857852419e-06, - "loss": 0.0539, + "epoch": 2.5239786856127884, + "grad_norm": 0.41026870628096135, + "learning_rate": 3.6060493731918556e-06, + "loss": 0.0529, "step": 2842 }, { - "epoch": 1.2627137463912947, - "grad_norm": 0.7193486412721348, - "learning_rate": 8.648450603353427e-06, - "loss": 0.0527, + "epoch": 2.524866785079929, + "grad_norm": 0.36026343844245623, + "learning_rate": 3.6023278148312825e-06, + "loss": 0.0277, "step": 2843 }, { - "epoch": 1.263157894736842, - "grad_norm": 0.4879073594292654, - "learning_rate": 8.647124800430332e-06, - "loss": 0.0462, + "epoch": 2.5257548845470694, + "grad_norm": 0.6081041401548583, + "learning_rate": 3.59860709622424e-06, + "loss": 0.0378, "step": 2844 }, { - "epoch": 1.2636020430823895, - "grad_norm": 0.6283549482710774, - "learning_rate": 8.645798449282427e-06, - "loss": 0.047, + "epoch": 2.5266429840142095, + "grad_norm": 0.4201293121477265, + "learning_rate": 3.594887219606221e-06, + "loss": 0.0381, "step": 2845 }, { - "epoch": 1.264046191427937, - "grad_norm": 0.590026590546146, - "learning_rate": 8.644471550109084e-06, - "loss": 0.0417, + "epoch": 2.52753108348135, + "grad_norm": 0.32886891777410415, + "learning_rate": 3.5911681872122134e-06, + "loss": 0.0338, "step": 2846 }, { - "epoch": 1.2644903397734844, - "grad_norm": 0.5727280121631517, - "learning_rate": 8.643144103109757e-06, - "loss": 0.0461, + "epoch": 2.52841918294849, + "grad_norm": 0.37576580295724116, + "learning_rate": 3.587450001276696e-06, + "loss": 0.0382, "step": 2847 }, { - "epoch": 1.2649344881190316, - "grad_norm": 0.5188962100480214, - "learning_rate": 8.641816108483987e-06, - "loss": 0.0528, + "epoch": 2.5293072824156306, + "grad_norm": 0.5649131229696506, + "learning_rate": 3.5837326640336446e-06, + "loss": 0.038, "step": 2848 }, { - "epoch": 1.2653786364645792, - "grad_norm": 0.6762837917534186, - "learning_rate": 8.64048756643139e-06, - "loss": 0.0551, + "epoch": 2.5301953818827707, + "grad_norm": 0.5380441758868038, + "learning_rate": 3.5800161777165142e-06, + "loss": 0.0505, "step": 2849 }, { - "epoch": 1.2658227848101267, - "grad_norm": 0.6063186060719946, - "learning_rate": 8.639158477151673e-06, - "loss": 0.0529, + "epoch": 2.5310834813499112, + "grad_norm": 0.3520682042354716, + "learning_rate": 3.5763005445582598e-06, + "loss": 0.0327, "step": 2850 }, { - "epoch": 1.266266933155674, - "grad_norm": 0.8416294217857067, - "learning_rate": 8.637828840844615e-06, - "loss": 0.0605, + "epoch": 2.5319715808170518, + "grad_norm": 0.46086011318357123, + "learning_rate": 3.572585766791318e-06, + "loss": 0.0503, "step": 2851 }, { - "epoch": 1.2667110815012215, - "grad_norm": 0.42460216144695934, - "learning_rate": 8.636498657710091e-06, - "loss": 0.0406, + "epoch": 2.532859680284192, + "grad_norm": 0.333696984351875, + "learning_rate": 3.5688718466476126e-06, + "loss": 0.0321, "step": 2852 }, { - "epoch": 1.2671552298467688, - "grad_norm": 0.45348809902081694, - "learning_rate": 8.635167927948041e-06, - "loss": 0.0423, + "epoch": 2.533747779751332, + "grad_norm": 0.37443283983859443, + "learning_rate": 3.565158786358551e-06, + "loss": 0.0324, "step": 2853 }, { - "epoch": 1.2675993781923163, - "grad_norm": 0.5925945869143667, - "learning_rate": 8.633836651758502e-06, - "loss": 0.0422, + "epoch": 2.5346358792184724, + "grad_norm": 0.387891504117701, + "learning_rate": 3.561446588155026e-06, + "loss": 0.0368, "step": 2854 }, { - "epoch": 1.2680435265378636, - "grad_norm": 0.474702929089884, - "learning_rate": 8.632504829341588e-06, - "loss": 0.0365, + "epoch": 2.535523978685613, + "grad_norm": 0.35003652913451, + "learning_rate": 3.557735254267411e-06, + "loss": 0.0374, "step": 2855 }, { - "epoch": 1.268487674883411, - "grad_norm": 0.8276698250673459, - "learning_rate": 8.63117246089749e-06, - "loss": 0.0477, + "epoch": 2.536412078152753, + "grad_norm": 0.3848763312813055, + "learning_rate": 3.554024786925562e-06, + "loss": 0.0373, "step": 2856 }, { - "epoch": 1.2689318232289586, - "grad_norm": 0.4891929607601022, - "learning_rate": 8.62983954662649e-06, - "loss": 0.044, + "epoch": 2.5373001776198936, + "grad_norm": 0.5812958824030889, + "learning_rate": 3.5503151883588105e-06, + "loss": 0.0407, "step": 2857 }, { - "epoch": 1.2693759715745059, - "grad_norm": 0.6525774912230711, - "learning_rate": 8.628506086728947e-06, - "loss": 0.0651, + "epoch": 2.5381882770870337, + "grad_norm": 0.3621047327198333, + "learning_rate": 3.5466064607959706e-06, + "loss": 0.042, "step": 2858 }, { - "epoch": 1.2698201199200532, - "grad_norm": 0.6594678233990844, - "learning_rate": 8.6271720814053e-06, - "loss": 0.0381, + "epoch": 2.539076376554174, + "grad_norm": 0.37045340152949985, + "learning_rate": 3.5428986064653292e-06, + "loss": 0.0388, "step": 2859 }, { - "epoch": 1.2702642682656007, - "grad_norm": 0.3241646392485162, - "learning_rate": 8.625837530856074e-06, - "loss": 0.0309, + "epoch": 2.5399644760213143, + "grad_norm": 0.32207390348490944, + "learning_rate": 3.5391916275946524e-06, + "loss": 0.0305, "step": 2860 }, { - "epoch": 1.2707084166111482, - "grad_norm": 0.7122663241070569, - "learning_rate": 8.624502435281875e-06, - "loss": 0.0617, + "epoch": 2.540852575488455, + "grad_norm": 0.394066356311656, + "learning_rate": 3.5354855264111766e-06, + "loss": 0.0358, "step": 2861 }, { - "epoch": 1.2711525649566955, - "grad_norm": 0.7005320306757822, - "learning_rate": 8.623166794883393e-06, - "loss": 0.0581, + "epoch": 2.5417406749555953, + "grad_norm": 0.34432415195908195, + "learning_rate": 3.531780305141611e-06, + "loss": 0.0397, "step": 2862 }, { - "epoch": 1.271596713302243, - "grad_norm": 0.5328754180731429, - "learning_rate": 8.621830609861392e-06, - "loss": 0.0493, + "epoch": 2.5426287744227354, + "grad_norm": 0.36581966863870385, + "learning_rate": 3.528075966012141e-06, + "loss": 0.038, "step": 2863 }, { - "epoch": 1.2720408616477903, - "grad_norm": 0.6269919214433372, - "learning_rate": 8.620493880416727e-06, - "loss": 0.0516, + "epoch": 2.5435168738898755, + "grad_norm": 0.5032688099131521, + "learning_rate": 3.5243725112484195e-06, + "loss": 0.0379, "step": 2864 }, { - "epoch": 1.2724850099933378, - "grad_norm": 0.5844430878482636, - "learning_rate": 8.619156606750329e-06, - "loss": 0.0725, + "epoch": 2.544404973357016, + "grad_norm": 0.38488727674835654, + "learning_rate": 3.5206699430755608e-06, + "loss": 0.0409, "step": 2865 }, { - "epoch": 1.272929158338885, - "grad_norm": 0.4737556741912441, - "learning_rate": 8.617818789063217e-06, - "loss": 0.0421, + "epoch": 2.5452930728241565, + "grad_norm": 0.37304866547132043, + "learning_rate": 3.516968263718159e-06, + "loss": 0.0339, "step": 2866 }, { - "epoch": 1.2733733066844326, - "grad_norm": 0.7524484026464038, - "learning_rate": 8.616480427556484e-06, - "loss": 0.0708, + "epoch": 2.5461811722912966, + "grad_norm": 0.4307250413119771, + "learning_rate": 3.5132674754002647e-06, + "loss": 0.0397, "step": 2867 }, { - "epoch": 1.2738174550299801, - "grad_norm": 0.8150405412731492, - "learning_rate": 8.61514152243131e-06, - "loss": 0.0704, + "epoch": 2.5470692717584367, + "grad_norm": 0.34368311854340644, + "learning_rate": 3.509567580345399e-06, + "loss": 0.0375, "step": 2868 }, { - "epoch": 1.2742616033755274, - "grad_norm": 0.6289008429543912, - "learning_rate": 8.613802073888953e-06, - "loss": 0.0544, + "epoch": 2.547957371225577, + "grad_norm": 0.37156540513181685, + "learning_rate": 3.505868580776541e-06, + "loss": 0.035, "step": 2869 }, { - "epoch": 1.274705751721075, - "grad_norm": 0.49267955313551814, - "learning_rate": 8.612462082130758e-06, - "loss": 0.0423, + "epoch": 2.5488454706927177, + "grad_norm": 0.36206664437516267, + "learning_rate": 3.502170478916136e-06, + "loss": 0.035, "step": 2870 }, { - "epoch": 1.2751499000666222, - "grad_norm": 0.8023016461317273, - "learning_rate": 8.611121547358146e-06, - "loss": 0.0581, + "epoch": 2.549733570159858, + "grad_norm": 0.343392725047896, + "learning_rate": 3.498473276986088e-06, + "loss": 0.0329, "step": 2871 }, { - "epoch": 1.2755940484121697, - "grad_norm": 0.5971368904341509, - "learning_rate": 8.609780469772623e-06, - "loss": 0.0555, + "epoch": 2.5506216696269983, + "grad_norm": 0.3900332030331758, + "learning_rate": 3.494776977207762e-06, + "loss": 0.0336, "step": 2872 }, { - "epoch": 1.276038196757717, - "grad_norm": 0.497140537686175, - "learning_rate": 8.608438849575777e-06, - "loss": 0.0511, + "epoch": 2.5515097690941384, + "grad_norm": 0.3745245008953631, + "learning_rate": 3.4910815818019783e-06, + "loss": 0.0394, "step": 2873 }, { - "epoch": 1.2764823451032645, - "grad_norm": 0.5696597646442083, - "learning_rate": 8.607096686969274e-06, - "loss": 0.0592, + "epoch": 2.552397868561279, + "grad_norm": 0.43092854439295397, + "learning_rate": 3.4873870929890165e-06, + "loss": 0.0519, "step": 2874 }, { - "epoch": 1.276926493448812, - "grad_norm": 0.7494228951484337, - "learning_rate": 8.605753982154865e-06, - "loss": 0.0486, + "epoch": 2.553285968028419, + "grad_norm": 0.33144216286454115, + "learning_rate": 3.4836935129886073e-06, + "loss": 0.0403, "step": 2875 }, { - "epoch": 1.2773706417943593, - "grad_norm": 0.5764143358267766, - "learning_rate": 8.604410735334383e-06, - "loss": 0.0551, + "epoch": 2.5541740674955595, + "grad_norm": 0.44095855076671014, + "learning_rate": 3.4800008440199445e-06, + "loss": 0.0363, "step": 2876 }, { - "epoch": 1.2778147901399066, - "grad_norm": 0.6011073566193142, - "learning_rate": 8.603066946709739e-06, - "loss": 0.0567, + "epoch": 2.5550621669627, + "grad_norm": 0.36678200335324146, + "learning_rate": 3.4763090883016627e-06, + "loss": 0.042, "step": 2877 }, { - "epoch": 1.2782589384854541, - "grad_norm": 0.44537454001664567, - "learning_rate": 8.601722616482927e-06, - "loss": 0.0473, + "epoch": 2.55595026642984, + "grad_norm": 0.4160810683582419, + "learning_rate": 3.4726182480518534e-06, + "loss": 0.0417, "step": 2878 }, { - "epoch": 1.2787030868310016, - "grad_norm": 0.45558471841390763, - "learning_rate": 8.600377744856024e-06, - "loss": 0.0577, + "epoch": 2.55683836589698, + "grad_norm": 0.2806849700019478, + "learning_rate": 3.4689283254880617e-06, + "loss": 0.0321, "step": 2879 }, { - "epoch": 1.279147235176549, - "grad_norm": 0.5192384143571988, - "learning_rate": 8.599032332031185e-06, - "loss": 0.0549, + "epoch": 2.5577264653641207, + "grad_norm": 0.35270937093838695, + "learning_rate": 3.465239322827277e-06, + "loss": 0.0495, "step": 2880 }, { - "epoch": 1.2795913835220964, - "grad_norm": 0.641251450966757, - "learning_rate": 8.59768637821065e-06, - "loss": 0.0599, + "epoch": 2.5586145648312613, + "grad_norm": 0.3625520593586307, + "learning_rate": 3.4615512422859354e-06, + "loss": 0.042, "step": 2881 }, { - "epoch": 1.2800355318676437, - "grad_norm": 0.5747913363539975, - "learning_rate": 8.596339883596738e-06, - "loss": 0.0546, + "epoch": 2.5595026642984013, + "grad_norm": 0.32298762004616643, + "learning_rate": 3.4578640860799204e-06, + "loss": 0.0317, "step": 2882 }, { - "epoch": 1.2804796802131913, - "grad_norm": 0.5416056259419014, - "learning_rate": 8.594992848391852e-06, - "loss": 0.0536, + "epoch": 2.560390763765542, + "grad_norm": 0.36807535325436513, + "learning_rate": 3.454177856424561e-06, + "loss": 0.0376, "step": 2883 }, { - "epoch": 1.2809238285587385, - "grad_norm": 0.6028308551659004, - "learning_rate": 8.59364527279847e-06, - "loss": 0.0699, + "epoch": 2.561278863232682, + "grad_norm": 0.3473177487556965, + "learning_rate": 3.450492555534628e-06, + "loss": 0.0383, "step": 2884 }, { - "epoch": 1.281367976904286, - "grad_norm": 0.4368044045668833, - "learning_rate": 8.59229715701916e-06, - "loss": 0.0396, + "epoch": 2.5621669626998225, + "grad_norm": 0.36229498204501787, + "learning_rate": 3.446808185624335e-06, + "loss": 0.0411, "step": 2885 }, { - "epoch": 1.2818121252498336, - "grad_norm": 0.5104164604694876, - "learning_rate": 8.590948501256564e-06, - "loss": 0.04, + "epoch": 2.5630550621669625, + "grad_norm": 0.37695665100036135, + "learning_rate": 3.443124748907336e-06, + "loss": 0.0407, "step": 2886 }, { - "epoch": 1.2822562735953809, - "grad_norm": 0.7612641229651903, - "learning_rate": 8.58959930571341e-06, - "loss": 0.0624, + "epoch": 2.563943161634103, + "grad_norm": 0.4413943400042454, + "learning_rate": 3.439442247596724e-06, + "loss": 0.042, "step": 2887 }, { - "epoch": 1.2827004219409281, - "grad_norm": 0.5035114097597291, - "learning_rate": 8.588249570592502e-06, - "loss": 0.0437, + "epoch": 2.5648312611012436, + "grad_norm": 0.28336163559055605, + "learning_rate": 3.4357606839050293e-06, + "loss": 0.0332, "step": 2888 }, { - "epoch": 1.2831445702864757, - "grad_norm": 0.499768070561634, - "learning_rate": 8.586899296096731e-06, - "loss": 0.0378, + "epoch": 2.5657193605683837, + "grad_norm": 0.366834742739253, + "learning_rate": 3.4320800600442242e-06, + "loss": 0.0298, "step": 2889 }, { - "epoch": 1.2835887186320232, - "grad_norm": 0.4925272487243439, - "learning_rate": 8.585548482429064e-06, - "loss": 0.0406, + "epoch": 2.5666074600355238, + "grad_norm": 0.4107521682052408, + "learning_rate": 3.4284003782257076e-06, + "loss": 0.0436, "step": 2890 }, { - "epoch": 1.2840328669775705, - "grad_norm": 0.5079455242566149, - "learning_rate": 8.584197129792553e-06, - "loss": 0.049, + "epoch": 2.5674955595026643, + "grad_norm": 0.36825952705836196, + "learning_rate": 3.424721640660316e-06, + "loss": 0.0436, "step": 2891 }, { - "epoch": 1.284477015323118, - "grad_norm": 0.5999702910432165, - "learning_rate": 8.58284523839033e-06, - "loss": 0.0525, + "epoch": 2.568383658969805, + "grad_norm": 0.4261051183471473, + "learning_rate": 3.421043849558323e-06, + "loss": 0.0418, "step": 2892 }, { - "epoch": 1.2849211636686653, - "grad_norm": 0.5762429387843919, - "learning_rate": 8.581492808425604e-06, - "loss": 0.0412, + "epoch": 2.569271758436945, + "grad_norm": 0.34718143598343243, + "learning_rate": 3.41736700712943e-06, + "loss": 0.0455, "step": 2893 }, { - "epoch": 1.2853653120142128, - "grad_norm": 0.5170549640705353, - "learning_rate": 8.58013984010167e-06, - "loss": 0.0426, + "epoch": 2.5701598579040854, + "grad_norm": 0.34991386925439155, + "learning_rate": 3.4136911155827655e-06, + "loss": 0.0398, "step": 2894 }, { - "epoch": 1.28580946035976, - "grad_norm": 0.44644738551782165, - "learning_rate": 8.578786333621902e-06, - "loss": 0.0449, + "epoch": 2.5710479573712255, + "grad_norm": 0.35068336409204465, + "learning_rate": 3.4100161771268903e-06, + "loss": 0.0342, "step": 2895 }, { - "epoch": 1.2862536087053076, - "grad_norm": 0.49787403784527023, - "learning_rate": 8.577432289189755e-06, - "loss": 0.053, + "epoch": 2.571936056838366, + "grad_norm": 0.37072320692567096, + "learning_rate": 3.4063421939697925e-06, + "loss": 0.0284, "step": 2896 }, { - "epoch": 1.286697757050855, - "grad_norm": 0.4201343576902461, - "learning_rate": 8.576077707008766e-06, - "loss": 0.0548, + "epoch": 2.572824156305506, + "grad_norm": 0.4521511118608954, + "learning_rate": 3.4026691683188863e-06, + "loss": 0.0435, "step": 2897 }, { - "epoch": 1.2871419053964024, - "grad_norm": 0.43223595023539274, - "learning_rate": 8.57472258728255e-06, - "loss": 0.0414, + "epoch": 2.5737122557726466, + "grad_norm": 0.374071820358087, + "learning_rate": 3.3989971023810075e-06, + "loss": 0.0372, "step": 2898 }, { - "epoch": 1.2875860537419497, - "grad_norm": 0.45140599595731223, - "learning_rate": 8.573366930214807e-06, - "loss": 0.0512, + "epoch": 2.5746003552397867, + "grad_norm": 0.40806820394640164, + "learning_rate": 3.39532599836242e-06, + "loss": 0.0361, "step": 2899 }, { - "epoch": 1.2880302020874972, - "grad_norm": 0.7864322313017508, - "learning_rate": 8.57201073600931e-06, - "loss": 0.0611, + "epoch": 2.575488454706927, + "grad_norm": 0.45178010575771294, + "learning_rate": 3.3916558584688055e-06, + "loss": 0.0383, "step": 2900 }, { - "epoch": 1.2884743504330447, - "grad_norm": 0.4533939072001686, - "learning_rate": 8.570654004869924e-06, - "loss": 0.0455, + "epoch": 2.5763765541740673, + "grad_norm": 0.3505220127517853, + "learning_rate": 3.3879866849052694e-06, + "loss": 0.0361, "step": 2901 }, { - "epoch": 1.288918498778592, - "grad_norm": 0.5822538472138225, - "learning_rate": 8.569296737000586e-06, - "loss": 0.0689, + "epoch": 2.577264653641208, + "grad_norm": 0.30114347233551425, + "learning_rate": 3.3843184798763345e-06, + "loss": 0.0295, "step": 2902 }, { - "epoch": 1.2893626471241395, - "grad_norm": 0.6558805640817998, - "learning_rate": 8.567938932605315e-06, - "loss": 0.0716, + "epoch": 2.5781527531083483, + "grad_norm": 0.36488150352891885, + "learning_rate": 3.380651245585942e-06, + "loss": 0.0451, "step": 2903 }, { - "epoch": 1.2898067954696868, - "grad_norm": 0.6962929137245736, - "learning_rate": 8.566580591888216e-06, - "loss": 0.0456, + "epoch": 2.5790408525754884, + "grad_norm": 0.3484295377210476, + "learning_rate": 3.376984984237453e-06, + "loss": 0.0359, "step": 2904 }, { - "epoch": 1.2902509438152343, - "grad_norm": 0.5885420821356683, - "learning_rate": 8.565221715053467e-06, - "loss": 0.0519, + "epoch": 2.5799289520426285, + "grad_norm": 0.3194184868830932, + "learning_rate": 3.373319698033642e-06, + "loss": 0.0364, "step": 2905 }, { - "epoch": 1.2906950921607816, - "grad_norm": 0.5367742959382944, - "learning_rate": 8.563862302305333e-06, - "loss": 0.0385, + "epoch": 2.580817051509769, + "grad_norm": 0.3669837511743523, + "learning_rate": 3.3696553891766915e-06, + "loss": 0.0347, "step": 2906 }, { - "epoch": 1.2911392405063291, - "grad_norm": 0.7323581959894143, - "learning_rate": 8.562502353848155e-06, - "loss": 0.0659, + "epoch": 2.5817051509769096, + "grad_norm": 0.3339126721912081, + "learning_rate": 3.3659920598682076e-06, + "loss": 0.0363, "step": 2907 }, { - "epoch": 1.2915833888518766, - "grad_norm": 0.5051887556869058, - "learning_rate": 8.561141869886356e-06, - "loss": 0.0559, + "epoch": 2.5825932504440496, + "grad_norm": 0.34543586647078134, + "learning_rate": 3.3623297123092007e-06, + "loss": 0.0371, "step": 2908 }, { - "epoch": 1.292027537197424, - "grad_norm": 0.5491304399843213, - "learning_rate": 8.55978085062444e-06, - "loss": 0.0639, + "epoch": 2.58348134991119, + "grad_norm": 0.3732936486347056, + "learning_rate": 3.358668348700095e-06, + "loss": 0.0296, "step": 2909 }, { - "epoch": 1.2924716855429714, - "grad_norm": 0.8492250674519668, - "learning_rate": 8.558419296266995e-06, - "loss": 0.0636, + "epoch": 2.5843694493783302, + "grad_norm": 0.298841796117749, + "learning_rate": 3.355007971240719e-06, + "loss": 0.0311, "step": 2910 }, { - "epoch": 1.2929158338885187, - "grad_norm": 0.3882329610931263, - "learning_rate": 8.557057207018681e-06, - "loss": 0.0487, + "epoch": 2.5852575488454708, + "grad_norm": 0.4702763133687615, + "learning_rate": 3.3513485821303133e-06, + "loss": 0.0484, "step": 2911 }, { - "epoch": 1.2933599822340662, - "grad_norm": 0.625233318674613, - "learning_rate": 8.555694583084244e-06, - "loss": 0.0465, + "epoch": 2.586145648312611, + "grad_norm": 0.3860602131308114, + "learning_rate": 3.3476901835675225e-06, + "loss": 0.0317, "step": 2912 }, { - "epoch": 1.2938041305796135, - "grad_norm": 0.4026058166802596, - "learning_rate": 8.554331424668511e-06, - "loss": 0.0354, + "epoch": 2.5870337477797514, + "grad_norm": 0.384854003859277, + "learning_rate": 3.3440327777503965e-06, + "loss": 0.0357, "step": 2913 }, { - "epoch": 1.294248278925161, - "grad_norm": 0.5634837852049973, - "learning_rate": 8.552967731976388e-06, - "loss": 0.0541, + "epoch": 2.587921847246892, + "grad_norm": 0.4383993297060523, + "learning_rate": 3.340376366876389e-06, + "loss": 0.0336, "step": 2914 }, { - "epoch": 1.2946924272707085, - "grad_norm": 0.48751917705852116, - "learning_rate": 8.551603505212862e-06, - "loss": 0.0322, + "epoch": 2.588809946714032, + "grad_norm": 0.356911280912349, + "learning_rate": 3.336720953142354e-06, + "loss": 0.0306, "step": 2915 }, { - "epoch": 1.2951365756162558, - "grad_norm": 0.5156212240116493, - "learning_rate": 8.550238744582997e-06, - "loss": 0.0489, + "epoch": 2.589698046181172, + "grad_norm": 0.33852240382143295, + "learning_rate": 3.333066538744548e-06, + "loss": 0.0346, "step": 2916 }, { - "epoch": 1.2955807239618031, - "grad_norm": 0.6223211171347353, - "learning_rate": 8.548873450291939e-06, - "loss": 0.0457, + "epoch": 2.5905861456483126, + "grad_norm": 0.38118336262773833, + "learning_rate": 3.3294131258786323e-06, + "loss": 0.0386, "step": 2917 }, { - "epoch": 1.2960248723073506, - "grad_norm": 0.47672895894969763, - "learning_rate": 8.547507622544916e-06, - "loss": 0.0463, + "epoch": 2.591474245115453, + "grad_norm": 0.4241710970512753, + "learning_rate": 3.3257607167396544e-06, + "loss": 0.0424, "step": 2918 }, { - "epoch": 1.2964690206528982, - "grad_norm": 0.4421458887458943, - "learning_rate": 8.546141261547238e-06, - "loss": 0.045, + "epoch": 2.592362344582593, + "grad_norm": 0.3569774734083075, + "learning_rate": 3.322109313522067e-06, + "loss": 0.0368, "step": 2919 }, { - "epoch": 1.2969131689984454, - "grad_norm": 0.848623824582431, - "learning_rate": 8.544774367504291e-06, - "loss": 0.0605, + "epoch": 2.5932504440497337, + "grad_norm": 0.5444549033396232, + "learning_rate": 3.3184589184197196e-06, + "loss": 0.0493, "step": 2920 }, { - "epoch": 1.297357317343993, - "grad_norm": 0.554513822402812, - "learning_rate": 8.54340694062154e-06, - "loss": 0.0518, + "epoch": 2.594138543516874, + "grad_norm": 0.2617625106184296, + "learning_rate": 3.314809533625853e-06, + "loss": 0.025, "step": 2921 }, { - "epoch": 1.2978014656895402, - "grad_norm": 0.5649747315400963, - "learning_rate": 8.542038981104532e-06, - "loss": 0.0565, + "epoch": 2.5950266429840143, + "grad_norm": 0.363478572714634, + "learning_rate": 3.3111611613330997e-06, + "loss": 0.0362, "step": 2922 }, { - "epoch": 1.2982456140350878, - "grad_norm": 0.42021656791448186, - "learning_rate": 8.540670489158899e-06, - "loss": 0.0426, + "epoch": 2.5959147424511544, + "grad_norm": 0.3203471913208762, + "learning_rate": 3.307513803733485e-06, + "loss": 0.031, "step": 2923 }, { - "epoch": 1.298689762380635, - "grad_norm": 0.6402654468272049, - "learning_rate": 8.539301464990345e-06, - "loss": 0.0543, + "epoch": 2.596802841918295, + "grad_norm": 0.594434496755036, + "learning_rate": 3.3038674630184277e-06, + "loss": 0.0484, "step": 2924 }, { - "epoch": 1.2991339107261826, - "grad_norm": 0.3618130143994902, - "learning_rate": 8.53793190880466e-06, - "loss": 0.038, + "epoch": 2.5976909413854354, + "grad_norm": 0.5404762678979643, + "learning_rate": 3.3002221413787312e-06, + "loss": 0.0488, "step": 2925 }, { - "epoch": 1.29957805907173, - "grad_norm": 0.3384505689514398, - "learning_rate": 8.536561820807707e-06, - "loss": 0.0346, + "epoch": 2.5985790408525755, + "grad_norm": 0.389023684590373, + "learning_rate": 3.296577841004588e-06, + "loss": 0.0396, "step": 2926 }, { - "epoch": 1.3000222074172774, - "grad_norm": 0.46188012553966973, - "learning_rate": 8.535191201205439e-06, - "loss": 0.0514, + "epoch": 2.5994671403197156, + "grad_norm": 0.3645946034336746, + "learning_rate": 3.2929345640855786e-06, + "loss": 0.0407, "step": 2927 }, { - "epoch": 1.3004663557628247, - "grad_norm": 0.5300445785864025, - "learning_rate": 8.533820050203881e-06, - "loss": 0.0483, + "epoch": 2.600355239786856, + "grad_norm": 0.3698995047619411, + "learning_rate": 3.289292312810667e-06, + "loss": 0.0313, "step": 2928 }, { - "epoch": 1.3009105041083722, - "grad_norm": 0.5536409349327417, - "learning_rate": 8.532448368009139e-06, - "loss": 0.0489, + "epoch": 2.6012433392539966, + "grad_norm": 0.3785277559545529, + "learning_rate": 3.285651089368202e-06, + "loss": 0.0308, "step": 2929 }, { - "epoch": 1.3013546524539197, - "grad_norm": 0.371658656980648, - "learning_rate": 8.531076154827402e-06, - "loss": 0.0445, + "epoch": 2.6021314387211367, + "grad_norm": 0.42401408231598636, + "learning_rate": 3.282010895945913e-06, + "loss": 0.0399, "step": 2930 }, { - "epoch": 1.301798800799467, - "grad_norm": 0.5129545567634131, - "learning_rate": 8.529703410864938e-06, - "loss": 0.0341, + "epoch": 2.6030195381882772, + "grad_norm": 0.39439939566153936, + "learning_rate": 3.278371734730912e-06, + "loss": 0.0317, "step": 2931 }, { - "epoch": 1.3022429491450145, - "grad_norm": 0.6790917670823785, - "learning_rate": 8.52833013632809e-06, - "loss": 0.0604, + "epoch": 2.6039076376554173, + "grad_norm": 0.31028852178930894, + "learning_rate": 3.274733607909689e-06, + "loss": 0.0369, "step": 2932 }, { - "epoch": 1.3026870974905618, - "grad_norm": 0.4758905378740349, - "learning_rate": 8.526956331423289e-06, - "loss": 0.0458, + "epoch": 2.604795737122558, + "grad_norm": 0.37252961773648, + "learning_rate": 3.2710965176681204e-06, + "loss": 0.035, "step": 2933 }, { - "epoch": 1.3031312458361093, - "grad_norm": 0.5428940197749453, - "learning_rate": 8.525581996357036e-06, - "loss": 0.047, + "epoch": 2.605683836589698, + "grad_norm": 0.4535072794437051, + "learning_rate": 3.2674604661914455e-06, + "loss": 0.0371, "step": 2934 }, { - "epoch": 1.3035753941816566, - "grad_norm": 0.5546360540394489, - "learning_rate": 8.52420713133592e-06, - "loss": 0.0585, + "epoch": 2.6065719360568385, + "grad_norm": 0.3969832457752023, + "learning_rate": 3.2638254556642925e-06, + "loss": 0.0325, "step": 2935 }, { - "epoch": 1.304019542527204, - "grad_norm": 0.8256368560390267, - "learning_rate": 8.522831736566607e-06, - "loss": 0.0592, + "epoch": 2.6074600355239785, + "grad_norm": 0.4283737347708285, + "learning_rate": 3.2601914882706564e-06, + "loss": 0.0374, "step": 2936 }, { - "epoch": 1.3044636908727516, - "grad_norm": 0.6807172008845478, - "learning_rate": 8.521455812255843e-06, - "loss": 0.0727, + "epoch": 2.608348134991119, + "grad_norm": 0.4130381818412859, + "learning_rate": 3.256558566193912e-06, + "loss": 0.0376, "step": 2937 }, { - "epoch": 1.304907839218299, - "grad_norm": 0.5972239265610148, - "learning_rate": 8.52007935861045e-06, - "loss": 0.0531, + "epoch": 2.609236234458259, + "grad_norm": 0.42132721189588773, + "learning_rate": 3.2529266916167986e-06, + "loss": 0.0425, "step": 2938 }, { - "epoch": 1.3053519875638464, - "grad_norm": 0.5756047785651232, - "learning_rate": 8.518702375837335e-06, - "loss": 0.0484, + "epoch": 2.6101243339253997, + "grad_norm": 0.43400390122551263, + "learning_rate": 3.2492958667214307e-06, + "loss": 0.0446, "step": 2939 }, { - "epoch": 1.3057961359093937, - "grad_norm": 0.5254079002689842, - "learning_rate": 8.51732486414348e-06, - "loss": 0.0551, + "epoch": 2.61101243339254, + "grad_norm": 0.3466476212168075, + "learning_rate": 3.2456660936892915e-06, + "loss": 0.0325, "step": 2940 }, { - "epoch": 1.3062402842549412, - "grad_norm": 0.5057686654115676, - "learning_rate": 8.515946823735948e-06, - "loss": 0.0555, + "epoch": 2.6119005328596803, + "grad_norm": 0.34737023769869063, + "learning_rate": 3.2420373747012335e-06, + "loss": 0.0364, "step": 2941 }, { - "epoch": 1.3066844326004885, - "grad_norm": 0.5425819759814885, - "learning_rate": 8.514568254821884e-06, - "loss": 0.0461, + "epoch": 2.6127886323268203, + "grad_norm": 0.4005533408704453, + "learning_rate": 3.238409711937472e-06, + "loss": 0.0413, "step": 2942 }, { - "epoch": 1.307128580946036, - "grad_norm": 0.4919842279881122, - "learning_rate": 8.51318915760851e-06, - "loss": 0.0351, + "epoch": 2.613676731793961, + "grad_norm": 0.3947731548238819, + "learning_rate": 3.2347831075775902e-06, + "loss": 0.04, "step": 2943 }, { - "epoch": 1.3075727292915835, - "grad_norm": 0.5651189893610779, - "learning_rate": 8.511809532303126e-06, - "loss": 0.0485, + "epoch": 2.6145648312611014, + "grad_norm": 0.3995688788988381, + "learning_rate": 3.231157563800536e-06, + "loss": 0.0391, "step": 2944 }, { - "epoch": 1.3080168776371308, - "grad_norm": 0.5135664399251805, - "learning_rate": 8.510429379113114e-06, - "loss": 0.0345, + "epoch": 2.6154529307282415, + "grad_norm": 0.4067894054610786, + "learning_rate": 3.227533082784622e-06, + "loss": 0.0352, "step": 2945 }, { - "epoch": 1.308461025982678, - "grad_norm": 0.698576525158577, - "learning_rate": 8.509048698245934e-06, - "loss": 0.0541, + "epoch": 2.616341030195382, + "grad_norm": 0.3548331867833077, + "learning_rate": 3.2239096667075146e-06, + "loss": 0.0339, "step": 2946 }, { - "epoch": 1.3089051743282256, - "grad_norm": 0.5914910850001974, - "learning_rate": 8.507667489909126e-06, - "loss": 0.0422, + "epoch": 2.617229129662522, + "grad_norm": 0.36989131123859503, + "learning_rate": 3.220287317746247e-06, + "loss": 0.0372, "step": 2947 }, { - "epoch": 1.3093493226737731, - "grad_norm": 0.39464116666974663, - "learning_rate": 8.506285754310311e-06, - "loss": 0.0395, + "epoch": 2.6181172291296626, + "grad_norm": 0.37035928762402337, + "learning_rate": 3.216666038077211e-06, + "loss": 0.033, "step": 2948 }, { - "epoch": 1.3097934710193204, - "grad_norm": 0.41501372577846396, - "learning_rate": 8.504903491657185e-06, - "loss": 0.0392, + "epoch": 2.6190053285968027, + "grad_norm": 0.4617474395181904, + "learning_rate": 3.2130458298761545e-06, + "loss": 0.0346, "step": 2949 }, { - "epoch": 1.310237619364868, - "grad_norm": 0.5710008833310989, - "learning_rate": 8.503520702157527e-06, - "loss": 0.0486, + "epoch": 2.619893428063943, + "grad_norm": 0.29683093274114897, + "learning_rate": 3.2094266953181817e-06, + "loss": 0.039, "step": 2950 }, { - "epoch": 1.3106817677104152, - "grad_norm": 0.912615051335769, - "learning_rate": 8.502137386019191e-06, - "loss": 0.0636, + "epoch": 2.6207815275310837, + "grad_norm": 0.3736082637973777, + "learning_rate": 3.2058086365777496e-06, + "loss": 0.043, "step": 2951 }, { - "epoch": 1.3111259160559627, - "grad_norm": 0.543540441011416, - "learning_rate": 8.500753543450118e-06, - "loss": 0.062, + "epoch": 2.621669626998224, + "grad_norm": 0.33112148916076817, + "learning_rate": 3.2021916558286726e-06, + "loss": 0.0411, "step": 2952 }, { - "epoch": 1.31157006440151, - "grad_norm": 0.39754298101965646, - "learning_rate": 8.499369174658318e-06, - "loss": 0.0383, + "epoch": 2.622557726465364, + "grad_norm": 0.3930293105597273, + "learning_rate": 3.1985757552441153e-06, + "loss": 0.0359, "step": 2953 }, { - "epoch": 1.3120142127470575, - "grad_norm": 0.5539227325166909, - "learning_rate": 8.497984279851888e-06, - "loss": 0.0499, + "epoch": 2.6234458259325044, + "grad_norm": 0.3862605851823903, + "learning_rate": 3.194960936996596e-06, + "loss": 0.0289, "step": 2954 }, { - "epoch": 1.312458361092605, - "grad_norm": 0.599023615011808, - "learning_rate": 8.496598859238997e-06, - "loss": 0.0472, + "epoch": 2.624333925399645, + "grad_norm": 0.3151600324706895, + "learning_rate": 3.1913472032579767e-06, + "loss": 0.0329, "step": 2955 }, { - "epoch": 1.3129025094381523, - "grad_norm": 0.4220558992968304, - "learning_rate": 8.495212913027906e-06, - "loss": 0.0446, + "epoch": 2.625222024866785, + "grad_norm": 0.3524541625651139, + "learning_rate": 3.1877345561994733e-06, + "loss": 0.0352, "step": 2956 }, { - "epoch": 1.3133466577836996, - "grad_norm": 0.49593231969327095, - "learning_rate": 8.493826441426937e-06, - "loss": 0.0636, + "epoch": 2.6261101243339255, + "grad_norm": 0.337047036097464, + "learning_rate": 3.1841229979916465e-06, + "loss": 0.0339, "step": 2957 }, { - "epoch": 1.3137908061292471, - "grad_norm": 0.5382980257808322, - "learning_rate": 8.492439444644506e-06, - "loss": 0.0462, + "epoch": 2.6269982238010656, + "grad_norm": 0.35556623893984984, + "learning_rate": 3.180512530804407e-06, + "loss": 0.0344, "step": 2958 }, { - "epoch": 1.3142349544747947, - "grad_norm": 0.4925896725925621, - "learning_rate": 8.4910519228891e-06, - "loss": 0.0602, + "epoch": 2.627886323268206, + "grad_norm": 0.3193329513631799, + "learning_rate": 3.176903156807001e-06, + "loss": 0.0317, "step": 2959 }, { - "epoch": 1.314679102820342, - "grad_norm": 1.1877590532157762, - "learning_rate": 8.489663876369288e-06, - "loss": 0.0723, + "epoch": 2.6287744227353462, + "grad_norm": 0.3298905491531614, + "learning_rate": 3.173294878168025e-06, + "loss": 0.0376, "step": 2960 }, { - "epoch": 1.3151232511658895, - "grad_norm": 0.6920312294903902, - "learning_rate": 8.488275305293715e-06, - "loss": 0.0531, + "epoch": 2.6296625222024868, + "grad_norm": 0.39272454144962626, + "learning_rate": 3.1696876970554168e-06, + "loss": 0.0394, "step": 2961 }, { - "epoch": 1.3155673995114368, - "grad_norm": 0.36761567736761985, - "learning_rate": 8.486886209871108e-06, - "loss": 0.0397, + "epoch": 2.630550621669627, + "grad_norm": 0.3627808441763758, + "learning_rate": 3.1660816156364554e-06, + "loss": 0.0324, "step": 2962 }, { - "epoch": 1.3160115478569843, - "grad_norm": 0.55057942024177, - "learning_rate": 8.485496590310274e-06, - "loss": 0.0591, + "epoch": 2.6314387211367674, + "grad_norm": 0.43259092133880245, + "learning_rate": 3.162476636077752e-06, + "loss": 0.0501, "step": 2963 }, { - "epoch": 1.3164556962025316, - "grad_norm": 0.5710852175463684, - "learning_rate": 8.484106446820094e-06, - "loss": 0.0471, + "epoch": 2.6323268206039074, + "grad_norm": 0.3483421661518904, + "learning_rate": 3.158872760545265e-06, + "loss": 0.0394, "step": 2964 }, { - "epoch": 1.316899844548079, - "grad_norm": 0.5828688111433763, - "learning_rate": 8.482715779609526e-06, - "loss": 0.0551, + "epoch": 2.633214920071048, + "grad_norm": 0.4716050015458488, + "learning_rate": 3.1552699912042838e-06, + "loss": 0.0482, "step": 2965 }, { - "epoch": 1.3173439928936266, - "grad_norm": 0.531963458189058, - "learning_rate": 8.481324588887619e-06, - "loss": 0.0504, + "epoch": 2.6341030195381885, + "grad_norm": 0.4481975800780285, + "learning_rate": 3.151668330219438e-06, + "loss": 0.045, "step": 2966 }, { - "epoch": 1.3177881412391739, - "grad_norm": 0.9382025539874739, - "learning_rate": 8.47993287486349e-06, - "loss": 0.0778, + "epoch": 2.6349911190053286, + "grad_norm": 0.3278300926385035, + "learning_rate": 3.1480677797546844e-06, + "loss": 0.032, "step": 2967 }, { - "epoch": 1.3182322895847212, - "grad_norm": 0.5014993856992925, - "learning_rate": 8.478540637746334e-06, - "loss": 0.0635, + "epoch": 2.6358792184724686, + "grad_norm": 0.33690863420780326, + "learning_rate": 3.144468341973317e-06, + "loss": 0.0336, "step": 2968 }, { - "epoch": 1.3186764379302687, - "grad_norm": 0.396172698958811, - "learning_rate": 8.477147877745431e-06, - "loss": 0.0378, + "epoch": 2.636767317939609, + "grad_norm": 0.3783747932133116, + "learning_rate": 3.140870019037963e-06, + "loss": 0.0359, "step": 2969 }, { - "epoch": 1.3191205862758162, - "grad_norm": 0.5285271119946755, - "learning_rate": 8.475754595070134e-06, - "loss": 0.0544, + "epoch": 2.6376554174067497, + "grad_norm": 0.425392831471685, + "learning_rate": 3.1372728131105755e-06, + "loss": 0.0446, "step": 2970 }, { - "epoch": 1.3195647346213635, - "grad_norm": 0.4032933677589254, - "learning_rate": 8.474360789929881e-06, - "loss": 0.0431, + "epoch": 2.6385435168738898, + "grad_norm": 0.7784890754673194, + "learning_rate": 3.133676726352438e-06, + "loss": 0.0479, "step": 2971 }, { - "epoch": 1.320008882966911, - "grad_norm": 0.45143103762565806, - "learning_rate": 8.47296646253418e-06, - "loss": 0.0509, + "epoch": 2.6394316163410303, + "grad_norm": 0.5111252232332689, + "learning_rate": 3.130081760924163e-06, + "loss": 0.0384, "step": 2972 }, { - "epoch": 1.3204530313124583, - "grad_norm": 0.5252154450521924, - "learning_rate": 8.471571613092626e-06, - "loss": 0.0594, + "epoch": 2.6403197158081704, + "grad_norm": 0.3171858484190841, + "learning_rate": 3.1264879189856872e-06, + "loss": 0.0276, "step": 2973 }, { - "epoch": 1.3208971796580058, - "grad_norm": 0.34642633471428613, - "learning_rate": 8.470176241814886e-06, - "loss": 0.027, + "epoch": 2.641207815275311, + "grad_norm": 0.33107180168551675, + "learning_rate": 3.1228952026962767e-06, + "loss": 0.0355, "step": 2974 }, { - "epoch": 1.321341328003553, - "grad_norm": 0.4784318360661084, - "learning_rate": 8.46878034891071e-06, - "loss": 0.0444, + "epoch": 2.642095914742451, + "grad_norm": 0.4442599606911684, + "learning_rate": 3.119303614214513e-06, + "loss": 0.0385, "step": 2975 }, { - "epoch": 1.3217854763491006, - "grad_norm": 0.580781905109618, - "learning_rate": 8.467383934589923e-06, - "loss": 0.043, + "epoch": 2.6429840142095915, + "grad_norm": 0.3316176091595877, + "learning_rate": 3.115713155698308e-06, + "loss": 0.0327, "step": 2976 }, { - "epoch": 1.322229624694648, - "grad_norm": 0.46709485088977415, - "learning_rate": 8.465986999062427e-06, - "loss": 0.0485, + "epoch": 2.643872113676732, + "grad_norm": 0.3363556403586757, + "learning_rate": 3.1121238293048905e-06, + "loss": 0.0387, "step": 2977 }, { - "epoch": 1.3226737730401954, - "grad_norm": 0.7439128060207424, - "learning_rate": 8.464589542538213e-06, - "loss": 0.0566, + "epoch": 2.644760213143872, + "grad_norm": 0.4465577388620344, + "learning_rate": 3.1085356371908115e-06, + "loss": 0.043, "step": 2978 }, { - "epoch": 1.323117921385743, - "grad_norm": 0.5532502253043584, - "learning_rate": 8.463191565227336e-06, - "loss": 0.0486, + "epoch": 2.645648312611012, + "grad_norm": 0.3669758155338096, + "learning_rate": 3.1049485815119375e-06, + "loss": 0.0392, "step": 2979 }, { - "epoch": 1.3235620697312902, - "grad_norm": 0.46900654723685586, - "learning_rate": 8.461793067339936e-06, - "loss": 0.0384, + "epoch": 2.6465364120781527, + "grad_norm": 0.46008973055391744, + "learning_rate": 3.1013626644234544e-06, + "loss": 0.0554, "step": 2980 }, { - "epoch": 1.3240062180768377, - "grad_norm": 0.4715987465734639, - "learning_rate": 8.460394049086232e-06, - "loss": 0.0467, + "epoch": 2.6474245115452932, + "grad_norm": 0.4244444704806108, + "learning_rate": 3.097777888079864e-06, + "loss": 0.0342, "step": 2981 }, { - "epoch": 1.324450366422385, - "grad_norm": 0.4464995773383881, - "learning_rate": 8.458994510676523e-06, - "loss": 0.0392, + "epoch": 2.6483126110124333, + "grad_norm": 0.367731556869747, + "learning_rate": 3.0941942546349826e-06, + "loss": 0.0439, "step": 2982 }, { - "epoch": 1.3248945147679325, - "grad_norm": 0.5120257592364733, - "learning_rate": 8.457594452321178e-06, - "loss": 0.0468, + "epoch": 2.649200710479574, + "grad_norm": 0.4656929380593179, + "learning_rate": 3.090611766241938e-06, + "loss": 0.0472, "step": 2983 }, { - "epoch": 1.32533866311348, - "grad_norm": 0.5011205376177407, - "learning_rate": 8.456193874230656e-06, - "loss": 0.0479, + "epoch": 2.650088809946714, + "grad_norm": 0.6473793402715186, + "learning_rate": 3.087030425053172e-06, + "loss": 0.0414, "step": 2984 }, { - "epoch": 1.3257828114590273, - "grad_norm": 0.4942109449168538, - "learning_rate": 8.454792776615482e-06, - "loss": 0.0456, + "epoch": 2.6509769094138544, + "grad_norm": 0.3793462821056831, + "learning_rate": 3.0834502332204357e-06, + "loss": 0.0453, "step": 2985 }, { - "epoch": 1.3262269598045746, - "grad_norm": 0.5223005309985502, - "learning_rate": 8.453391159686268e-06, - "loss": 0.0578, + "epoch": 2.6518650088809945, + "grad_norm": 0.37597507486313714, + "learning_rate": 3.0798711928947942e-06, + "loss": 0.0352, "step": 2986 }, { - "epoch": 1.3266711081501221, - "grad_norm": 0.4751851347230112, - "learning_rate": 8.4519890236537e-06, - "loss": 0.0496, + "epoch": 2.652753108348135, + "grad_norm": 0.4146752423449461, + "learning_rate": 3.0762933062266137e-06, + "loss": 0.0367, "step": 2987 }, { - "epoch": 1.3271152564956696, - "grad_norm": 0.46876537416517666, - "learning_rate": 8.450586368728541e-06, - "loss": 0.0556, + "epoch": 2.6536412078152756, + "grad_norm": 0.36351325144612573, + "learning_rate": 3.0727165753655696e-06, + "loss": 0.0405, "step": 2988 }, { - "epoch": 1.327559404841217, - "grad_norm": 0.5342196778194671, - "learning_rate": 8.449183195121638e-06, - "loss": 0.0462, + "epoch": 2.6545293072824157, + "grad_norm": 0.35421094882711285, + "learning_rate": 3.0691410024606473e-06, + "loss": 0.0354, "step": 2989 }, { - "epoch": 1.3280035531867644, - "grad_norm": 0.7891053866526658, - "learning_rate": 8.447779503043907e-06, - "loss": 0.0695, + "epoch": 2.6554174067495557, + "grad_norm": 0.4496461727993196, + "learning_rate": 3.0655665896601328e-06, + "loss": 0.0415, "step": 2990 }, { - "epoch": 1.3284477015323117, - "grad_norm": 0.4837197404803745, - "learning_rate": 8.44637529270635e-06, - "loss": 0.0416, + "epoch": 2.6563055062166963, + "grad_norm": 0.36727983724418684, + "learning_rate": 3.061993339111611e-06, + "loss": 0.0407, "step": 2991 }, { - "epoch": 1.3288918498778592, - "grad_norm": 0.4822831093408252, - "learning_rate": 8.444970564320044e-06, - "loss": 0.0492, + "epoch": 2.657193605683837, + "grad_norm": 0.41815552831259034, + "learning_rate": 3.0584212529619777e-06, + "loss": 0.0339, "step": 2992 }, { - "epoch": 1.3293359982234065, - "grad_norm": 1.1468745158193232, - "learning_rate": 8.443565318096141e-06, - "loss": 0.0829, + "epoch": 2.658081705150977, + "grad_norm": 0.32399233087319707, + "learning_rate": 3.054850333357422e-06, + "loss": 0.0286, "step": 2993 }, { - "epoch": 1.329780146568954, - "grad_norm": 0.3846877899730632, - "learning_rate": 8.442159554245875e-06, - "loss": 0.0383, + "epoch": 2.6589698046181174, + "grad_norm": 0.4371919155295159, + "learning_rate": 3.051280582443436e-06, + "loss": 0.0389, "step": 2994 }, { - "epoch": 1.3302242949145016, - "grad_norm": 0.48249463397520415, - "learning_rate": 8.440753272980555e-06, - "loss": 0.0487, + "epoch": 2.6598579040852575, + "grad_norm": 0.4733948146254441, + "learning_rate": 3.0477120023648054e-06, + "loss": 0.038, "step": 2995 }, { - "epoch": 1.3306684432600488, - "grad_norm": 0.5300221622862935, - "learning_rate": 8.439346474511572e-06, - "loss": 0.0506, + "epoch": 2.660746003552398, + "grad_norm": 0.4172994785595082, + "learning_rate": 3.044144595265617e-06, + "loss": 0.0408, "step": 2996 }, { - "epoch": 1.3311125916055961, - "grad_norm": 0.6183171953601485, - "learning_rate": 8.437939159050388e-06, - "loss": 0.063, + "epoch": 2.661634103019538, + "grad_norm": 0.4185737348870064, + "learning_rate": 3.0405783632892504e-06, + "loss": 0.0382, "step": 2997 }, { - "epoch": 1.3315567399511437, - "grad_norm": 0.5988476945915722, - "learning_rate": 8.43653132680855e-06, - "loss": 0.0625, + "epoch": 2.6625222024866786, + "grad_norm": 0.43850163979209644, + "learning_rate": 3.0370133085783802e-06, + "loss": 0.0342, "step": 2998 }, { - "epoch": 1.3320008882966912, - "grad_norm": 0.5556515413344068, - "learning_rate": 8.435122977997675e-06, - "loss": 0.0491, + "epoch": 2.6634103019538187, + "grad_norm": 0.36394452920851955, + "learning_rate": 3.0334494332749716e-06, + "loss": 0.038, "step": 2999 }, { - "epoch": 1.3324450366422385, - "grad_norm": 0.4154893093213718, - "learning_rate": 8.433714112829464e-06, - "loss": 0.0431, + "epoch": 2.664298401420959, + "grad_norm": 0.8676387559738967, + "learning_rate": 3.0298867395202835e-06, + "loss": 0.0381, "step": 3000 }, { - "epoch": 1.332889184987786, - "grad_norm": 0.6020512275698489, - "learning_rate": 8.432304731515695e-06, - "loss": 0.0596, + "epoch": 2.6651865008880993, + "grad_norm": 0.3211973265446368, + "learning_rate": 3.0263252294548627e-06, + "loss": 0.0379, "step": 3001 }, { - "epoch": 1.3333333333333333, - "grad_norm": 0.41304989339620046, - "learning_rate": 8.430894834268218e-06, - "loss": 0.0436, + "epoch": 2.66607460035524, + "grad_norm": 0.3287240144142483, + "learning_rate": 3.0227649052185516e-06, + "loss": 0.0333, "step": 3002 }, { - "epoch": 1.3337774816788808, - "grad_norm": 0.5204666086414845, - "learning_rate": 8.429484421298968e-06, - "loss": 0.0581, + "epoch": 2.6669626998223803, + "grad_norm": 0.4447839208547026, + "learning_rate": 3.019205768950468e-06, + "loss": 0.036, "step": 3003 }, { - "epoch": 1.334221630024428, - "grad_norm": 0.5429684889982698, - "learning_rate": 8.428073492819953e-06, - "loss": 0.0404, + "epoch": 2.6678507992895204, + "grad_norm": 0.5902655962428176, + "learning_rate": 3.0156478227890248e-06, + "loss": 0.0375, "step": 3004 }, { - "epoch": 1.3346657783699756, - "grad_norm": 0.4188357634818928, - "learning_rate": 8.426662049043258e-06, - "loss": 0.0422, + "epoch": 2.6687388987566605, + "grad_norm": 0.47690906179300235, + "learning_rate": 3.0120910688719185e-06, + "loss": 0.0415, "step": 3005 }, { - "epoch": 1.335109926715523, - "grad_norm": 0.40403921511813046, - "learning_rate": 8.42525009018105e-06, - "loss": 0.0376, + "epoch": 2.669626998223801, + "grad_norm": 0.3396342251266381, + "learning_rate": 3.0085355093361302e-06, + "loss": 0.0379, "step": 3006 }, { - "epoch": 1.3355540750610704, - "grad_norm": 0.40629767747303225, - "learning_rate": 8.423837616445568e-06, - "loss": 0.0404, + "epoch": 2.6705150976909415, + "grad_norm": 0.5088779799265526, + "learning_rate": 3.004981146317919e-06, + "loss": 0.0367, "step": 3007 }, { - "epoch": 1.335998223406618, - "grad_norm": 0.4159409808633564, - "learning_rate": 8.42242462804913e-06, - "loss": 0.0394, + "epoch": 2.6714031971580816, + "grad_norm": 0.36418986487674176, + "learning_rate": 3.001427981952828e-06, + "loss": 0.0342, "step": 3008 }, { - "epoch": 1.3364423717521652, - "grad_norm": 0.7041981820609958, - "learning_rate": 8.421011125204134e-06, - "loss": 0.0693, + "epoch": 2.672291296625222, + "grad_norm": 0.3954559374557829, + "learning_rate": 2.9978760183756805e-06, + "loss": 0.0354, "step": 3009 }, { - "epoch": 1.3368865200977127, - "grad_norm": 0.7951485702237924, - "learning_rate": 8.419597108123054e-06, - "loss": 0.0612, + "epoch": 2.673179396092362, + "grad_norm": 0.3641799103563655, + "learning_rate": 2.9943252577205784e-06, + "loss": 0.0347, "step": 3010 }, { - "epoch": 1.33733066844326, - "grad_norm": 0.3990758518161739, - "learning_rate": 8.418182577018438e-06, - "loss": 0.0511, + "epoch": 2.6740674955595027, + "grad_norm": 0.3308059502230103, + "learning_rate": 2.9907757021208995e-06, + "loss": 0.0352, "step": 3011 }, { - "epoch": 1.3377748167888075, - "grad_norm": 0.9209585349240309, - "learning_rate": 8.416767532102918e-06, - "loss": 0.0416, + "epoch": 2.674955595026643, + "grad_norm": 0.33698903752999726, + "learning_rate": 2.987227353709298e-06, + "loss": 0.0325, "step": 3012 }, { - "epoch": 1.338218965134355, - "grad_norm": 0.5459049872521075, - "learning_rate": 8.415351973589197e-06, - "loss": 0.0434, + "epoch": 2.6758436944937833, + "grad_norm": 0.36608490742160205, + "learning_rate": 2.9836802146177034e-06, + "loss": 0.037, "step": 3013 }, { - "epoch": 1.3386631134799023, - "grad_norm": 0.5057701190286418, - "learning_rate": 8.413935901690057e-06, - "loss": 0.0434, + "epoch": 2.676731793960924, + "grad_norm": 0.3829778651837029, + "learning_rate": 2.9801342869773178e-06, + "loss": 0.0328, "step": 3014 }, { - "epoch": 1.3391072618254496, - "grad_norm": 0.48056478669043734, - "learning_rate": 8.412519316618359e-06, - "loss": 0.0502, + "epoch": 2.677619893428064, + "grad_norm": 0.4413181391001877, + "learning_rate": 2.976589572918619e-06, + "loss": 0.0372, "step": 3015 }, { - "epoch": 1.339551410170997, - "grad_norm": 0.45090544113428555, - "learning_rate": 8.411102218587039e-06, - "loss": 0.0419, + "epoch": 2.678507992895204, + "grad_norm": 0.4096440798592964, + "learning_rate": 2.9730460745713464e-06, + "loss": 0.0351, "step": 3016 }, { - "epoch": 1.3399955585165446, - "grad_norm": 0.4066569281524885, - "learning_rate": 8.40968460780911e-06, - "loss": 0.0368, + "epoch": 2.6793960923623446, + "grad_norm": 0.34925480702046746, + "learning_rate": 2.9695037940645203e-06, + "loss": 0.0361, "step": 3017 }, { - "epoch": 1.340439706862092, - "grad_norm": 0.48568103858702, - "learning_rate": 8.408266484497664e-06, - "loss": 0.0416, + "epoch": 2.680284191829485, + "grad_norm": 0.3937293274808995, + "learning_rate": 2.965962733526423e-06, + "loss": 0.0362, "step": 3018 }, { - "epoch": 1.3408838552076394, - "grad_norm": 0.5224793245797686, - "learning_rate": 8.406847848865871e-06, - "loss": 0.0573, + "epoch": 2.681172291296625, + "grad_norm": 0.4847093542004826, + "learning_rate": 2.9624228950846046e-06, + "loss": 0.0429, "step": 3019 }, { - "epoch": 1.3413280035531867, - "grad_norm": 0.5080673094422519, - "learning_rate": 8.405428701126973e-06, - "loss": 0.0496, + "epoch": 2.6820603907637657, + "grad_norm": 0.43458423290311116, + "learning_rate": 2.9588842808658814e-06, + "loss": 0.0406, "step": 3020 }, { - "epoch": 1.3417721518987342, - "grad_norm": 0.5515622788542163, - "learning_rate": 8.404009041494292e-06, - "loss": 0.054, + "epoch": 2.6829484902309058, + "grad_norm": 0.3815783977598914, + "learning_rate": 2.9553468929963324e-06, + "loss": 0.0446, "step": 3021 }, { - "epoch": 1.3422163002442815, - "grad_norm": 0.4992441781647522, - "learning_rate": 8.40258887018123e-06, - "loss": 0.0383, + "epoch": 2.6838365896980463, + "grad_norm": 0.4052260100848497, + "learning_rate": 2.951810733601304e-06, + "loss": 0.0342, "step": 3022 }, { - "epoch": 1.342660448589829, - "grad_norm": 0.5853495583783108, - "learning_rate": 8.40116818740126e-06, - "loss": 0.0419, + "epoch": 2.6847246891651864, + "grad_norm": 0.3753104263653454, + "learning_rate": 2.948275804805402e-06, + "loss": 0.0352, "step": 3023 }, { - "epoch": 1.3431045969353765, - "grad_norm": 0.47598229779455054, - "learning_rate": 8.399746993367936e-06, - "loss": 0.0342, + "epoch": 2.685612788632327, + "grad_norm": 0.35865529778040856, + "learning_rate": 2.94474210873249e-06, + "loss": 0.0374, "step": 3024 }, { - "epoch": 1.3435487452809238, - "grad_norm": 0.46492013802658844, - "learning_rate": 8.398325288294886e-06, - "loss": 0.043, + "epoch": 2.6865008880994674, + "grad_norm": 0.2891758746772565, + "learning_rate": 2.941209647505695e-06, + "loss": 0.0318, "step": 3025 }, { - "epoch": 1.3439928936264711, - "grad_norm": 0.4287825450323698, - "learning_rate": 8.396903072395819e-06, - "loss": 0.0401, + "epoch": 2.6873889875666075, + "grad_norm": 0.3522820379304216, + "learning_rate": 2.937678423247402e-06, + "loss": 0.0302, "step": 3026 }, { - "epoch": 1.3444370419720186, - "grad_norm": 0.4586017747854982, - "learning_rate": 8.395480345884516e-06, - "loss": 0.0431, + "epoch": 2.6882770870337476, + "grad_norm": 0.4449943279570721, + "learning_rate": 2.9341484380792507e-06, + "loss": 0.0441, "step": 3027 }, { - "epoch": 1.3448811903175661, - "grad_norm": 0.494703530250107, - "learning_rate": 8.39405710897484e-06, - "loss": 0.0462, + "epoch": 2.689165186500888, + "grad_norm": 0.3424612494984106, + "learning_rate": 2.9306196941221367e-06, + "loss": 0.045, "step": 3028 }, { - "epoch": 1.3453253386631134, - "grad_norm": 0.5340552133978543, - "learning_rate": 8.392633361880724e-06, - "loss": 0.0567, + "epoch": 2.6900532859680286, + "grad_norm": 0.3676593409513878, + "learning_rate": 2.9270921934962104e-06, + "loss": 0.038, "step": 3029 }, { - "epoch": 1.345769487008661, - "grad_norm": 0.49912393023299867, - "learning_rate": 8.391209104816183e-06, - "loss": 0.0501, + "epoch": 2.6909413854351687, + "grad_norm": 0.358705160343916, + "learning_rate": 2.9235659383208757e-06, + "loss": 0.0283, "step": 3030 }, { - "epoch": 1.3462136353542082, - "grad_norm": 0.6779311071781129, - "learning_rate": 8.389784337995306e-06, - "loss": 0.0682, + "epoch": 2.691829484902309, + "grad_norm": 0.4925229944074189, + "learning_rate": 2.9200409307147904e-06, + "loss": 0.0494, "step": 3031 }, { - "epoch": 1.3466577836997558, - "grad_norm": 0.47717232493661027, - "learning_rate": 8.388359061632262e-06, - "loss": 0.043, + "epoch": 2.6927175843694493, + "grad_norm": 0.4111014949629559, + "learning_rate": 2.916517172795854e-06, + "loss": 0.0443, "step": 3032 }, { - "epoch": 1.347101932045303, - "grad_norm": 0.47326694390163015, - "learning_rate": 8.386933275941294e-06, - "loss": 0.0456, + "epoch": 2.69360568383659, + "grad_norm": 0.36681988928787235, + "learning_rate": 2.912994666681225e-06, + "loss": 0.0326, "step": 3033 }, { - "epoch": 1.3475460803908506, - "grad_norm": 0.6662990359563103, - "learning_rate": 8.385506981136717e-06, - "loss": 0.0487, + "epoch": 2.69449378330373, + "grad_norm": 0.34229723815761903, + "learning_rate": 2.9094734144873037e-06, + "loss": 0.0355, "step": 3034 }, { - "epoch": 1.347990228736398, - "grad_norm": 0.718683780503916, - "learning_rate": 8.384080177432933e-06, - "loss": 0.0394, + "epoch": 2.6953818827708704, + "grad_norm": 0.36229111471339054, + "learning_rate": 2.9059534183297457e-06, + "loss": 0.0325, "step": 3035 }, { - "epoch": 1.3484343770819454, - "grad_norm": 0.5289766475406907, - "learning_rate": 8.382652865044414e-06, - "loss": 0.0553, + "epoch": 2.6962699822380105, + "grad_norm": 0.35222247668977646, + "learning_rate": 2.902434680323436e-06, + "loss": 0.0384, "step": 3036 }, { - "epoch": 1.3488785254274926, - "grad_norm": 0.6441199621383914, - "learning_rate": 8.381225044185708e-06, - "loss": 0.0522, + "epoch": 2.697158081705151, + "grad_norm": 0.42622225834403915, + "learning_rate": 2.8989172025825175e-06, + "loss": 0.0428, "step": 3037 }, { - "epoch": 1.3493226737730402, - "grad_norm": 0.9166796511578611, - "learning_rate": 8.37979671507144e-06, - "loss": 0.0458, + "epoch": 2.698046181172291, + "grad_norm": 0.41324685295948094, + "learning_rate": 2.8954009872203744e-06, + "loss": 0.0312, "step": 3038 }, { - "epoch": 1.3497668221185877, - "grad_norm": 0.531028146563615, - "learning_rate": 8.378367877916313e-06, - "loss": 0.0522, + "epoch": 2.6989342806394316, + "grad_norm": 0.3637683571924108, + "learning_rate": 2.8918860363496264e-06, + "loss": 0.0436, "step": 3039 }, { - "epoch": 1.350210970464135, - "grad_norm": 0.46742487240112696, - "learning_rate": 8.376938532935106e-06, - "loss": 0.0379, + "epoch": 2.699822380106572, + "grad_norm": 0.3824596679258426, + "learning_rate": 2.888372352082136e-06, + "loss": 0.0386, "step": 3040 }, { - "epoch": 1.3506551188096825, - "grad_norm": 0.5443821001104316, - "learning_rate": 8.375508680342674e-06, - "loss": 0.0439, + "epoch": 2.7007104795737122, + "grad_norm": 0.4166251517678143, + "learning_rate": 2.8848599365290077e-06, + "loss": 0.0389, "step": 3041 }, { - "epoch": 1.3510992671552298, - "grad_norm": 0.5727922217841197, - "learning_rate": 8.374078320353944e-06, - "loss": 0.0424, + "epoch": 2.7015985790408523, + "grad_norm": 0.33828199610338894, + "learning_rate": 2.881348791800579e-06, + "loss": 0.0362, "step": 3042 }, { - "epoch": 1.3515434155007773, - "grad_norm": 0.49458279067850347, - "learning_rate": 8.37264745318393e-06, - "loss": 0.0506, + "epoch": 2.702486678507993, + "grad_norm": 0.4600832400912325, + "learning_rate": 2.8778389200064293e-06, + "loss": 0.0435, "step": 3043 }, { - "epoch": 1.3519875638463246, - "grad_norm": 1.3831663223083857, - "learning_rate": 8.371216079047713e-06, - "loss": 0.0392, + "epoch": 2.7033747779751334, + "grad_norm": 0.43137721874031665, + "learning_rate": 2.8743303232553687e-06, + "loss": 0.0323, "step": 3044 }, { - "epoch": 1.352431712191872, - "grad_norm": 0.4619345954223979, - "learning_rate": 8.369784198160451e-06, - "loss": 0.0413, + "epoch": 2.7042628774422734, + "grad_norm": 0.3809669708160857, + "learning_rate": 2.87082300365544e-06, + "loss": 0.0428, "step": 3045 }, { - "epoch": 1.3528758605374196, - "grad_norm": 0.4847101460239981, - "learning_rate": 8.368351810737383e-06, - "loss": 0.0389, + "epoch": 2.705150976909414, + "grad_norm": 0.43923888562920393, + "learning_rate": 2.867316963313925e-06, + "loss": 0.0366, "step": 3046 }, { - "epoch": 1.3533200088829669, - "grad_norm": 0.5592056103162423, - "learning_rate": 8.366918916993817e-06, - "loss": 0.0527, + "epoch": 2.706039076376554, + "grad_norm": 0.4322001321322257, + "learning_rate": 2.8638122043373332e-06, + "loss": 0.049, "step": 3047 }, { - "epoch": 1.3537641572285144, - "grad_norm": 0.3872649705998668, - "learning_rate": 8.365485517145145e-06, - "loss": 0.0324, + "epoch": 2.7069271758436946, + "grad_norm": 0.4024055032038764, + "learning_rate": 2.8603087288314004e-06, + "loss": 0.0503, "step": 3048 }, { - "epoch": 1.3542083055740617, - "grad_norm": 0.4866737656887245, - "learning_rate": 8.364051611406829e-06, - "loss": 0.0438, + "epoch": 2.7078152753108347, + "grad_norm": 0.4738050977664571, + "learning_rate": 2.8568065389010996e-06, + "loss": 0.0464, "step": 3049 }, { - "epoch": 1.3546524539196092, - "grad_norm": 0.969360380725499, - "learning_rate": 8.362617199994413e-06, - "loss": 0.0471, + "epoch": 2.708703374777975, + "grad_norm": 0.32485361335492613, + "learning_rate": 2.853305636650623e-06, + "loss": 0.0356, "step": 3050 }, { - "epoch": 1.3550966022651565, - "grad_norm": 0.582815842887394, - "learning_rate": 8.36118228312351e-06, - "loss": 0.042, + "epoch": 2.7095914742451157, + "grad_norm": 0.35248987030964635, + "learning_rate": 2.849806024183397e-06, + "loss": 0.0376, "step": 3051 }, { - "epoch": 1.355540750610704, - "grad_norm": 0.721376890360811, - "learning_rate": 8.359746861009812e-06, - "loss": 0.0625, + "epoch": 2.710479573712256, + "grad_norm": 0.4176054705775532, + "learning_rate": 2.846307703602065e-06, + "loss": 0.0486, "step": 3052 }, { - "epoch": 1.3559848989562515, - "grad_norm": 0.6319029678683107, - "learning_rate": 8.358310933869091e-06, - "loss": 0.0645, + "epoch": 2.711367673179396, + "grad_norm": 0.34139511690655033, + "learning_rate": 2.842810677008502e-06, + "loss": 0.0324, "step": 3053 }, { - "epoch": 1.3564290473017988, - "grad_norm": 2.7049677243291828, - "learning_rate": 8.356874501917188e-06, - "loss": 0.0531, + "epoch": 2.7122557726465364, + "grad_norm": 0.31741424501326176, + "learning_rate": 2.8393149465037985e-06, + "loss": 0.0321, "step": 3054 }, { - "epoch": 1.356873195647346, - "grad_norm": 0.3779085536381152, - "learning_rate": 8.355437565370022e-06, - "loss": 0.0356, + "epoch": 2.713143872113677, + "grad_norm": 0.41725709223720847, + "learning_rate": 2.8358205141882735e-06, + "loss": 0.0315, "step": 3055 }, { - "epoch": 1.3573173439928936, - "grad_norm": 0.7424758490934648, - "learning_rate": 8.354000124443594e-06, - "loss": 0.0524, + "epoch": 2.714031971580817, + "grad_norm": 0.4440463575338334, + "learning_rate": 2.83232738216146e-06, + "loss": 0.0391, "step": 3056 }, { - "epoch": 1.3577614923384411, - "grad_norm": 0.6071475225743952, - "learning_rate": 8.352562179353971e-06, - "loss": 0.0476, + "epoch": 2.7149200710479575, + "grad_norm": 0.3400860185121632, + "learning_rate": 2.8288355525221095e-06, + "loss": 0.0344, "step": 3057 }, { - "epoch": 1.3582056406839884, - "grad_norm": 0.4823292349978002, - "learning_rate": 8.351123730317303e-06, - "loss": 0.037, + "epoch": 2.7158081705150976, + "grad_norm": 0.39726409592181433, + "learning_rate": 2.8253450273681983e-06, + "loss": 0.0337, "step": 3058 }, { - "epoch": 1.358649789029536, - "grad_norm": 0.42940088227364875, - "learning_rate": 8.349684777549813e-06, - "loss": 0.032, + "epoch": 2.716696269982238, + "grad_norm": 0.3542869938190121, + "learning_rate": 2.8218558087969113e-06, + "loss": 0.0385, "step": 3059 }, { - "epoch": 1.3590939373750832, - "grad_norm": 0.6291618703403149, - "learning_rate": 8.348245321267798e-06, - "loss": 0.0542, + "epoch": 2.717584369449378, + "grad_norm": 0.42471033370910666, + "learning_rate": 2.818367898904649e-06, + "loss": 0.0455, "step": 3060 }, { - "epoch": 1.3595380857206307, - "grad_norm": 0.46443218939790437, - "learning_rate": 8.346805361687637e-06, - "loss": 0.0482, + "epoch": 2.7184724689165187, + "grad_norm": 0.38325242452874897, + "learning_rate": 2.8148812997870325e-06, + "loss": 0.0425, "step": 3061 }, { - "epoch": 1.359982234066178, - "grad_norm": 0.44266144811788105, - "learning_rate": 8.345364899025776e-06, - "loss": 0.0508, + "epoch": 2.719360568383659, + "grad_norm": 0.35984571631548623, + "learning_rate": 2.8113960135388863e-06, + "loss": 0.0387, "step": 3062 }, { - "epoch": 1.3604263824117255, - "grad_norm": 0.5931758192805282, - "learning_rate": 8.343923933498742e-06, - "loss": 0.052, + "epoch": 2.7202486678507993, + "grad_norm": 0.34441000717761105, + "learning_rate": 2.807912042254254e-06, + "loss": 0.0383, "step": 3063 }, { - "epoch": 1.360870530757273, - "grad_norm": 0.4228622965921645, - "learning_rate": 8.342482465323141e-06, - "loss": 0.0342, + "epoch": 2.7211367673179394, + "grad_norm": 0.3205683697649063, + "learning_rate": 2.804429388026383e-06, + "loss": 0.0334, "step": 3064 }, { - "epoch": 1.3613146791028203, - "grad_norm": 0.6573207468033229, - "learning_rate": 8.341040494715644e-06, - "loss": 0.054, + "epoch": 2.72202486678508, + "grad_norm": 0.34943096582410743, + "learning_rate": 2.8009480529477316e-06, + "loss": 0.0384, "step": 3065 }, { - "epoch": 1.3617588274483676, - "grad_norm": 0.694757132531818, - "learning_rate": 8.339598021893007e-06, - "loss": 0.0573, + "epoch": 2.7229129662522205, + "grad_norm": 0.3463969882660535, + "learning_rate": 2.797468039109966e-06, + "loss": 0.0381, "step": 3066 }, { - "epoch": 1.3622029757939151, - "grad_norm": 0.5168331449919007, - "learning_rate": 8.338155047072058e-06, - "loss": 0.0476, + "epoch": 2.7238010657193605, + "grad_norm": 0.3969513229946938, + "learning_rate": 2.793989348603964e-06, + "loss": 0.0357, "step": 3067 }, { - "epoch": 1.3626471241394627, - "grad_norm": 0.5294718006665416, - "learning_rate": 8.336711570469698e-06, - "loss": 0.0536, + "epoch": 2.7246891651865006, + "grad_norm": 0.4109948891742574, + "learning_rate": 2.7905119835197934e-06, + "loss": 0.0362, "step": 3068 }, { - "epoch": 1.36309127248501, - "grad_norm": 0.5902958008882144, - "learning_rate": 8.33526759230291e-06, - "loss": 0.0543, + "epoch": 2.725577264653641, + "grad_norm": 0.37030197164004486, + "learning_rate": 2.78703594594674e-06, + "loss": 0.0438, "step": 3069 }, { - "epoch": 1.3635354208305575, - "grad_norm": 0.5123768514504031, - "learning_rate": 8.333823112788747e-06, - "loss": 0.055, + "epoch": 2.7264653641207817, + "grad_norm": 0.3286524903853804, + "learning_rate": 2.783561237973289e-06, + "loss": 0.0337, "step": 3070 }, { - "epoch": 1.3639795691761047, - "grad_norm": 0.4051444968388966, - "learning_rate": 8.332378132144336e-06, - "loss": 0.0408, + "epoch": 2.7273534635879217, + "grad_norm": 0.34347142029845, + "learning_rate": 2.7800878616871197e-06, + "loss": 0.0317, "step": 3071 }, { - "epoch": 1.3644237175216523, - "grad_norm": 0.45045135771966455, - "learning_rate": 8.330932650586887e-06, - "loss": 0.0449, + "epoch": 2.7282415630550623, + "grad_norm": 0.4061876988817035, + "learning_rate": 2.7766158191751204e-06, + "loss": 0.0406, "step": 3072 }, { - "epoch": 1.3648678658671995, - "grad_norm": 0.6922855447849937, - "learning_rate": 8.329486668333677e-06, - "loss": 0.052, + "epoch": 2.7291296625222023, + "grad_norm": 0.35728304118602083, + "learning_rate": 2.7731451125233723e-06, + "loss": 0.0372, "step": 3073 }, { - "epoch": 1.365312014212747, - "grad_norm": 0.3651988449178566, - "learning_rate": 8.328040185602063e-06, - "loss": 0.0383, + "epoch": 2.730017761989343, + "grad_norm": 0.49859679294884096, + "learning_rate": 2.769675743817154e-06, + "loss": 0.0369, "step": 3074 }, { - "epoch": 1.3657561625582946, - "grad_norm": 0.6637690048102727, - "learning_rate": 8.326593202609475e-06, - "loss": 0.0571, + "epoch": 2.730905861456483, + "grad_norm": 0.3541473907240671, + "learning_rate": 2.7662077151409433e-06, + "loss": 0.0368, "step": 3075 }, { - "epoch": 1.3662003109038419, - "grad_norm": 0.5428561517455449, - "learning_rate": 8.325145719573419e-06, - "loss": 0.0384, + "epoch": 2.7317939609236235, + "grad_norm": 0.33061854975846117, + "learning_rate": 2.7627410285784164e-06, + "loss": 0.0292, "step": 3076 }, { - "epoch": 1.3666444592493894, - "grad_norm": 0.7398928249310872, - "learning_rate": 8.323697736711478e-06, - "loss": 0.0535, + "epoch": 2.732682060390764, + "grad_norm": 0.3590679714754361, + "learning_rate": 2.7592756862124283e-06, + "loss": 0.0365, "step": 3077 }, { - "epoch": 1.3670886075949367, - "grad_norm": 0.6968067508458905, - "learning_rate": 8.322249254241309e-06, - "loss": 0.0624, + "epoch": 2.733570159857904, + "grad_norm": 0.3032153657146553, + "learning_rate": 2.755811690125042e-06, + "loss": 0.0281, "step": 3078 }, { - "epoch": 1.3675327559404842, - "grad_norm": 0.42153823690397146, - "learning_rate": 8.320800272380639e-06, - "loss": 0.036, + "epoch": 2.734458259325044, + "grad_norm": 0.3483171659209983, + "learning_rate": 2.7523490423975068e-06, + "loss": 0.0343, "step": 3079 }, { - "epoch": 1.3679769042860315, - "grad_norm": 0.7070448290401131, - "learning_rate": 8.319350791347279e-06, - "loss": 0.0682, + "epoch": 2.7353463587921847, + "grad_norm": 0.4175432531434774, + "learning_rate": 2.7488877451102595e-06, + "loss": 0.0422, "step": 3080 }, { - "epoch": 1.368421052631579, - "grad_norm": 0.5028115971086855, - "learning_rate": 8.31790081135911e-06, - "loss": 0.0543, + "epoch": 2.736234458259325, + "grad_norm": 0.38632308931942116, + "learning_rate": 2.7454278003429247e-06, + "loss": 0.031, "step": 3081 }, { - "epoch": 1.3688652009771265, - "grad_norm": 0.5307360164451863, - "learning_rate": 8.316450332634084e-06, - "loss": 0.0535, + "epoch": 2.7371225577264653, + "grad_norm": 0.4132379943308144, + "learning_rate": 2.741969210174321e-06, + "loss": 0.0389, "step": 3082 }, { - "epoch": 1.3693093493226738, - "grad_norm": 0.6501199496749419, - "learning_rate": 8.31499935539024e-06, - "loss": 0.0461, + "epoch": 2.738010657193606, + "grad_norm": 0.3592539348863347, + "learning_rate": 2.7385119766824442e-06, + "loss": 0.0354, "step": 3083 }, { - "epoch": 1.369753497668221, - "grad_norm": 0.5797446911962002, - "learning_rate": 8.313547879845682e-06, - "loss": 0.0472, + "epoch": 2.738898756660746, + "grad_norm": 0.3517297203952675, + "learning_rate": 2.7350561019444843e-06, + "loss": 0.0299, "step": 3084 }, { - "epoch": 1.3701976460137686, - "grad_norm": 0.4268924633424178, - "learning_rate": 8.312095906218588e-06, - "loss": 0.0447, + "epoch": 2.7397868561278864, + "grad_norm": 0.3338412132014452, + "learning_rate": 2.7316015880368075e-06, + "loss": 0.0364, "step": 3085 }, { - "epoch": 1.370641794359316, - "grad_norm": 0.6304839984425172, - "learning_rate": 8.310643434727216e-06, - "loss": 0.0625, + "epoch": 2.7406749555950265, + "grad_norm": 0.33894518891668773, + "learning_rate": 2.728148437034963e-06, + "loss": 0.0278, "step": 3086 }, { - "epoch": 1.3710859427048634, - "grad_norm": 0.5665653851714997, - "learning_rate": 8.3091904655899e-06, - "loss": 0.0539, + "epoch": 2.741563055062167, + "grad_norm": 0.37497061261448794, + "learning_rate": 2.7246966510136874e-06, + "loss": 0.0361, "step": 3087 }, { - "epoch": 1.371530091050411, - "grad_norm": 0.40002359226927725, - "learning_rate": 8.307736999025043e-06, - "loss": 0.0329, + "epoch": 2.7424511545293075, + "grad_norm": 0.3731636362628687, + "learning_rate": 2.721246232046891e-06, + "loss": 0.0362, "step": 3088 }, { - "epoch": 1.3719742393959582, - "grad_norm": 0.7513234692327438, - "learning_rate": 8.306283035251125e-06, - "loss": 0.0648, + "epoch": 2.7433392539964476, + "grad_norm": 0.3752080198911008, + "learning_rate": 2.717797182207663e-06, + "loss": 0.0401, "step": 3089 }, { - "epoch": 1.3724183877415057, - "grad_norm": 0.5469597650300881, - "learning_rate": 8.304828574486704e-06, - "loss": 0.0432, + "epoch": 2.7442273534635877, + "grad_norm": 0.31885751006488333, + "learning_rate": 2.7143495035682758e-06, + "loss": 0.0313, "step": 3090 }, { - "epoch": 1.372862536087053, - "grad_norm": 0.48401783163541084, - "learning_rate": 8.303373616950408e-06, - "loss": 0.0457, + "epoch": 2.7451154529307282, + "grad_norm": 0.4366593177861865, + "learning_rate": 2.7109031982001693e-06, + "loss": 0.0401, "step": 3091 }, { - "epoch": 1.3733066844326005, - "grad_norm": 0.4692359706390245, - "learning_rate": 8.301918162860944e-06, - "loss": 0.0422, + "epoch": 2.7460035523978688, + "grad_norm": 0.36538923873440216, + "learning_rate": 2.707458268173967e-06, + "loss": 0.038, "step": 3092 }, { - "epoch": 1.373750832778148, - "grad_norm": 0.5378714608611251, - "learning_rate": 8.30046221243709e-06, - "loss": 0.0513, + "epoch": 2.746891651865009, + "grad_norm": 0.43090700112460756, + "learning_rate": 2.7040147155594597e-06, + "loss": 0.0353, "step": 3093 }, { - "epoch": 1.3741949811236953, - "grad_norm": 0.5625724873607021, - "learning_rate": 8.2990057658977e-06, - "loss": 0.0532, + "epoch": 2.7477797513321494, + "grad_norm": 0.3470842529256806, + "learning_rate": 2.7005725424256113e-06, + "loss": 0.0409, "step": 3094 }, { - "epoch": 1.3746391294692426, - "grad_norm": 0.4899279935109507, - "learning_rate": 8.297548823461704e-06, - "loss": 0.0454, + "epoch": 2.7486678507992894, + "grad_norm": 0.4966612143178959, + "learning_rate": 2.6971317508405605e-06, + "loss": 0.0443, "step": 3095 }, { - "epoch": 1.3750832778147901, - "grad_norm": 0.7726706942428412, - "learning_rate": 8.296091385348104e-06, - "loss": 0.0697, + "epoch": 2.74955595026643, + "grad_norm": 0.401111051805152, + "learning_rate": 2.6936923428716156e-06, + "loss": 0.0407, "step": 3096 }, { - "epoch": 1.3755274261603376, - "grad_norm": 0.5012946731170276, - "learning_rate": 8.294633451775977e-06, - "loss": 0.0378, + "epoch": 2.75044404973357, + "grad_norm": 0.3436742483620814, + "learning_rate": 2.6902543205852496e-06, + "loss": 0.037, "step": 3097 }, { - "epoch": 1.375971574505885, - "grad_norm": 0.48151498654702213, - "learning_rate": 8.293175022964476e-06, - "loss": 0.0434, + "epoch": 2.7513321492007106, + "grad_norm": 0.42055982354584304, + "learning_rate": 2.686817686047104e-06, + "loss": 0.0432, "step": 3098 }, { - "epoch": 1.3764157228514324, - "grad_norm": 0.4130737267146749, - "learning_rate": 8.291716099132829e-06, - "loss": 0.0359, + "epoch": 2.7522202486678506, + "grad_norm": 0.32998544501435473, + "learning_rate": 2.6833824413219913e-06, + "loss": 0.0344, "step": 3099 }, { - "epoch": 1.3768598711969797, - "grad_norm": 0.4681903495603353, - "learning_rate": 8.290256680500336e-06, - "loss": 0.0398, + "epoch": 2.753108348134991, + "grad_norm": 0.33682246330946336, + "learning_rate": 2.6799485884738828e-06, + "loss": 0.0315, "step": 3100 }, { - "epoch": 1.3773040195425272, - "grad_norm": 0.6272554974390412, - "learning_rate": 8.28879676728637e-06, - "loss": 0.0627, + "epoch": 2.7539964476021312, + "grad_norm": 0.3852709983882135, + "learning_rate": 2.6765161295659147e-06, + "loss": 0.0363, "step": 3101 }, { - "epoch": 1.3777481678880745, - "grad_norm": 0.5051341999455976, - "learning_rate": 8.287336359710386e-06, - "loss": 0.047, + "epoch": 2.7548845470692718, + "grad_norm": 0.41192177696509297, + "learning_rate": 2.6730850666603915e-06, + "loss": 0.0394, "step": 3102 }, { - "epoch": 1.378192316233622, - "grad_norm": 0.5262863424826024, - "learning_rate": 8.285875457991903e-06, - "loss": 0.047, + "epoch": 2.7557726465364123, + "grad_norm": 0.35081987490990374, + "learning_rate": 2.669655401818769e-06, + "loss": 0.0343, "step": 3103 }, { - "epoch": 1.3786364645791696, - "grad_norm": 0.5493114786417674, - "learning_rate": 8.284414062350524e-06, - "loss": 0.0477, + "epoch": 2.7566607460035524, + "grad_norm": 0.3161120571571943, + "learning_rate": 2.6662271371016745e-06, + "loss": 0.0318, "step": 3104 }, { - "epoch": 1.3790806129247168, - "grad_norm": 0.529779249166117, - "learning_rate": 8.282952173005916e-06, - "loss": 0.0489, + "epoch": 2.7575488454706925, + "grad_norm": 0.4278308516005874, + "learning_rate": 2.662800274568885e-06, + "loss": 0.0356, "step": 3105 }, { - "epoch": 1.3795247612702641, - "grad_norm": 0.5169327523467377, - "learning_rate": 8.28148979017783e-06, - "loss": 0.0568, + "epoch": 2.758436944937833, + "grad_norm": 0.3136397724341847, + "learning_rate": 2.659374816279337e-06, + "loss": 0.0278, "step": 3106 }, { - "epoch": 1.3799689096158116, - "grad_norm": 0.6639124677133846, - "learning_rate": 8.280026914086086e-06, - "loss": 0.0831, + "epoch": 2.7593250444049735, + "grad_norm": 0.4531262771201052, + "learning_rate": 2.6559507642911254e-06, + "loss": 0.0388, "step": 3107 }, { - "epoch": 1.3804130579613592, - "grad_norm": 0.6604146215653776, - "learning_rate": 8.278563544950579e-06, - "loss": 0.0688, + "epoch": 2.7602131438721136, + "grad_norm": 0.3630650763261708, + "learning_rate": 2.652528120661504e-06, + "loss": 0.0364, "step": 3108 }, { - "epoch": 1.3808572063069064, - "grad_norm": 0.499500034008347, - "learning_rate": 8.277099682991276e-06, - "loss": 0.0466, + "epoch": 2.761101243339254, + "grad_norm": 0.29369108790662546, + "learning_rate": 2.6491068874468673e-06, + "loss": 0.0306, "step": 3109 }, { - "epoch": 1.381301354652454, - "grad_norm": 0.5463437639449784, - "learning_rate": 8.275635328428226e-06, - "loss": 0.0576, + "epoch": 2.761989342806394, + "grad_norm": 0.4405601872703178, + "learning_rate": 2.6456870667027746e-06, + "loss": 0.0378, "step": 3110 }, { - "epoch": 1.3817455029980013, - "grad_norm": 0.43757681367045054, - "learning_rate": 8.274170481481541e-06, - "loss": 0.0348, + "epoch": 2.7628774422735347, + "grad_norm": 0.3741620119668413, + "learning_rate": 2.6422686604839352e-06, + "loss": 0.0353, "step": 3111 }, { - "epoch": 1.3821896513435488, - "grad_norm": 0.5431219597419497, - "learning_rate": 8.272705142371414e-06, - "loss": 0.0592, + "epoch": 2.763765541740675, + "grad_norm": 0.3505583318842318, + "learning_rate": 2.6388516708442035e-06, + "loss": 0.036, "step": 3112 }, { - "epoch": 1.382633799689096, - "grad_norm": 0.6091624737010345, - "learning_rate": 8.271239311318111e-06, - "loss": 0.0645, + "epoch": 2.7646536412078153, + "grad_norm": 0.38289287641724995, + "learning_rate": 2.635436099836584e-06, + "loss": 0.0322, "step": 3113 }, { - "epoch": 1.3830779480346436, - "grad_norm": 0.46664330067036014, - "learning_rate": 8.269772988541971e-06, - "loss": 0.0293, + "epoch": 2.765541740674956, + "grad_norm": 0.3203790920698382, + "learning_rate": 2.6320219495132336e-06, + "loss": 0.0308, "step": 3114 }, { - "epoch": 1.383522096380191, - "grad_norm": 0.39018350334607316, - "learning_rate": 8.268306174263407e-06, - "loss": 0.0488, + "epoch": 2.766429840142096, + "grad_norm": 0.4995950897896086, + "learning_rate": 2.628609221925448e-06, + "loss": 0.0541, "step": 3115 }, { - "epoch": 1.3839662447257384, - "grad_norm": 0.7360437552444796, - "learning_rate": 8.266838868702904e-06, - "loss": 0.0539, + "epoch": 2.767317939609236, + "grad_norm": 0.39833917217254344, + "learning_rate": 2.625197919123677e-06, + "loss": 0.0357, "step": 3116 }, { - "epoch": 1.3844103930712859, - "grad_norm": 0.3523843914894874, - "learning_rate": 8.265371072081028e-06, - "loss": 0.033, + "epoch": 2.7682060390763765, + "grad_norm": 0.40304209922427403, + "learning_rate": 2.621788043157507e-06, + "loss": 0.0389, "step": 3117 }, { - "epoch": 1.3848545414168332, - "grad_norm": 0.5841378028235703, - "learning_rate": 8.263902784618409e-06, - "loss": 0.0512, + "epoch": 2.769094138543517, + "grad_norm": 0.4903203936952132, + "learning_rate": 2.618379596075668e-06, + "loss": 0.0336, "step": 3118 }, { - "epoch": 1.3852986897623807, - "grad_norm": 0.6604716728640524, - "learning_rate": 8.262434006535759e-06, - "loss": 0.0841, + "epoch": 2.769982238010657, + "grad_norm": 0.3312631123935787, + "learning_rate": 2.614972579926035e-06, + "loss": 0.0315, "step": 3119 }, { - "epoch": 1.385742838107928, - "grad_norm": 0.43203367132780174, - "learning_rate": 8.260964738053859e-06, - "loss": 0.0474, + "epoch": 2.7708703374777977, + "grad_norm": 0.6283735428100229, + "learning_rate": 2.611566996755626e-06, + "loss": 0.0306, "step": 3120 }, { - "epoch": 1.3861869864534755, - "grad_norm": 0.48844306169229995, - "learning_rate": 8.259494979393563e-06, - "loss": 0.0481, + "epoch": 2.7717584369449377, + "grad_norm": 0.5439363621147506, + "learning_rate": 2.608162848610586e-06, + "loss": 0.0418, "step": 3121 }, { - "epoch": 1.386631134799023, - "grad_norm": 0.3906618714847538, - "learning_rate": 8.258024730775805e-06, - "loss": 0.0398, + "epoch": 2.7726465364120783, + "grad_norm": 0.2961699441768386, + "learning_rate": 2.6047601375362076e-06, + "loss": 0.0318, "step": 3122 }, { - "epoch": 1.3870752831445703, - "grad_norm": 0.47997636522094667, - "learning_rate": 8.256553992421583e-06, - "loss": 0.0554, + "epoch": 2.7735346358792183, + "grad_norm": 0.39078320127905963, + "learning_rate": 2.6013588655769204e-06, + "loss": 0.0419, "step": 3123 }, { - "epoch": 1.3875194314901176, - "grad_norm": 0.6434789607226818, - "learning_rate": 8.255082764551978e-06, - "loss": 0.0484, + "epoch": 2.774422735346359, + "grad_norm": 0.3529278737597956, + "learning_rate": 2.597959034776286e-06, + "loss": 0.0264, "step": 3124 }, { - "epoch": 1.387963579835665, - "grad_norm": 0.3887987489990569, - "learning_rate": 8.25361104738814e-06, - "loss": 0.0464, + "epoch": 2.7753108348134994, + "grad_norm": 0.3599449620069869, + "learning_rate": 2.594560647176997e-06, + "loss": 0.0335, "step": 3125 }, { - "epoch": 1.3884077281812126, - "grad_norm": 0.487796373914711, - "learning_rate": 8.252138841151292e-06, - "loss": 0.0441, + "epoch": 2.7761989342806395, + "grad_norm": 0.3800935534712962, + "learning_rate": 2.5911637048208883e-06, + "loss": 0.0339, "step": 3126 }, { - "epoch": 1.38885187652676, - "grad_norm": 0.6398270077190703, - "learning_rate": 8.250666146062732e-06, - "loss": 0.0419, + "epoch": 2.7770870337477795, + "grad_norm": 0.4045657631427908, + "learning_rate": 2.5877682097489154e-06, + "loss": 0.0427, "step": 3127 }, { - "epoch": 1.3892960248723074, - "grad_norm": 0.4422350214805325, - "learning_rate": 8.249192962343829e-06, - "loss": 0.041, + "epoch": 2.77797513321492, + "grad_norm": 0.3408945249183258, + "learning_rate": 2.5843741640011736e-06, + "loss": 0.0327, "step": 3128 }, { - "epoch": 1.3897401732178547, - "grad_norm": 0.5252093775770235, - "learning_rate": 8.247719290216032e-06, - "loss": 0.0479, + "epoch": 2.7788632326820606, + "grad_norm": 0.321873706472909, + "learning_rate": 2.5809815696168834e-06, + "loss": 0.0346, "step": 3129 }, { - "epoch": 1.3901843215634022, - "grad_norm": 0.6735609599734196, - "learning_rate": 8.246245129900856e-06, - "loss": 0.038, + "epoch": 2.7797513321492007, + "grad_norm": 0.34804314898483796, + "learning_rate": 2.5775904286343897e-06, + "loss": 0.0401, "step": 3130 }, { - "epoch": 1.3906284699089495, - "grad_norm": 0.5147679810299035, - "learning_rate": 8.244770481619892e-06, - "loss": 0.05, + "epoch": 2.780639431616341, + "grad_norm": 0.3084675333042666, + "learning_rate": 2.5742007430911718e-06, + "loss": 0.0289, "step": 3131 }, { - "epoch": 1.391072618254497, - "grad_norm": 0.4260669615511085, - "learning_rate": 8.243295345594807e-06, - "loss": 0.0351, + "epoch": 2.7815275310834813, + "grad_norm": 0.39465989111686417, + "learning_rate": 2.5708125150238294e-06, + "loss": 0.0415, "step": 3132 }, { - "epoch": 1.3915167666000445, - "grad_norm": 0.5948196620222996, - "learning_rate": 8.241819722047337e-06, - "loss": 0.0532, + "epoch": 2.782415630550622, + "grad_norm": 0.3386636046446292, + "learning_rate": 2.567425746468086e-06, + "loss": 0.034, "step": 3133 }, { - "epoch": 1.3919609149455918, - "grad_norm": 0.4954498969782017, - "learning_rate": 8.240343611199294e-06, - "loss": 0.0395, + "epoch": 2.783303730017762, + "grad_norm": 0.6566379417707975, + "learning_rate": 2.5640404394587926e-06, + "loss": 0.0507, "step": 3134 }, { - "epoch": 1.3924050632911391, - "grad_norm": 0.5748384966095534, - "learning_rate": 8.238867013272562e-06, - "loss": 0.051, + "epoch": 2.7841918294849024, + "grad_norm": 0.394947389510546, + "learning_rate": 2.5606565960299155e-06, + "loss": 0.0459, "step": 3135 }, { - "epoch": 1.3928492116366866, - "grad_norm": 0.47868756581223826, - "learning_rate": 8.237389928489099e-06, - "loss": 0.0406, + "epoch": 2.7850799289520425, + "grad_norm": 0.5038306712498116, + "learning_rate": 2.5572742182145487e-06, + "loss": 0.0332, "step": 3136 }, { - "epoch": 1.3932933599822341, - "grad_norm": 0.42581407271928307, - "learning_rate": 8.235912357070938e-06, - "loss": 0.0337, + "epoch": 2.785968028419183, + "grad_norm": 0.7023618024773217, + "learning_rate": 2.5538933080449034e-06, + "loss": 0.0399, "step": 3137 }, { - "epoch": 1.3937375083277814, - "grad_norm": 0.6781537407713069, - "learning_rate": 8.234434299240179e-06, - "loss": 0.0483, + "epoch": 2.786856127886323, + "grad_norm": 0.4351684729605337, + "learning_rate": 2.5505138675523065e-06, + "loss": 0.0433, "step": 3138 }, { - "epoch": 1.394181656673329, - "grad_norm": 0.598623626498893, - "learning_rate": 8.232955755219002e-06, - "loss": 0.0498, + "epoch": 2.7877442273534636, + "grad_norm": 0.3612418971693695, + "learning_rate": 2.547135898767202e-06, + "loss": 0.0321, "step": 3139 }, { - "epoch": 1.3946258050188762, - "grad_norm": 0.6505669269254222, - "learning_rate": 8.231476725229659e-06, - "loss": 0.0391, + "epoch": 2.788632326820604, + "grad_norm": 0.34691162957898175, + "learning_rate": 2.5437594037191537e-06, + "loss": 0.037, "step": 3140 }, { - "epoch": 1.3950699533644237, - "grad_norm": 0.5150042095978116, - "learning_rate": 8.229997209494468e-06, - "loss": 0.0538, + "epoch": 2.789520426287744, + "grad_norm": 0.33204491770084243, + "learning_rate": 2.540384384436836e-06, + "loss": 0.0358, "step": 3141 }, { - "epoch": 1.395514101709971, - "grad_norm": 0.48445621459488397, - "learning_rate": 8.228517208235829e-06, - "loss": 0.0485, + "epoch": 2.7904085257548843, + "grad_norm": 0.3800840497245024, + "learning_rate": 2.5370108429480363e-06, + "loss": 0.0373, "step": 3142 }, { - "epoch": 1.3959582500555185, - "grad_norm": 0.42479311306640655, - "learning_rate": 8.22703672167621e-06, - "loss": 0.0454, + "epoch": 2.791296625222025, + "grad_norm": 0.39018940775699407, + "learning_rate": 2.533638781279659e-06, + "loss": 0.0345, "step": 3143 }, { - "epoch": 1.396402398401066, - "grad_norm": 0.4968133842399203, - "learning_rate": 8.225555750038157e-06, - "loss": 0.0548, + "epoch": 2.7921847246891653, + "grad_norm": 0.5326300879857159, + "learning_rate": 2.5302682014577128e-06, + "loss": 0.0413, "step": 3144 }, { - "epoch": 1.3968465467466133, - "grad_norm": 0.839408513781402, - "learning_rate": 8.22407429354428e-06, - "loss": 0.0508, + "epoch": 2.7930728241563054, + "grad_norm": 0.3926874549576116, + "learning_rate": 2.526899105507323e-06, + "loss": 0.0345, "step": 3145 }, { - "epoch": 1.3972906950921609, - "grad_norm": 0.4593510375216177, - "learning_rate": 8.222592352417268e-06, - "loss": 0.0484, + "epoch": 2.793960923623446, + "grad_norm": 0.5331303675935396, + "learning_rate": 2.523531495452718e-06, + "loss": 0.0366, "step": 3146 }, { - "epoch": 1.3977348434377082, - "grad_norm": 0.41788586732862426, - "learning_rate": 8.221109926879885e-06, - "loss": 0.0394, + "epoch": 2.794849023090586, + "grad_norm": 0.5328948201512393, + "learning_rate": 2.520165373317234e-06, + "loss": 0.0472, "step": 3147 }, { - "epoch": 1.3981789917832557, - "grad_norm": 0.6011495923589089, - "learning_rate": 8.219627017154962e-06, - "loss": 0.0538, + "epoch": 2.7957371225577266, + "grad_norm": 0.6570392083128864, + "learning_rate": 2.5168007411233143e-06, + "loss": 0.046, "step": 3148 }, { - "epoch": 1.398623140128803, - "grad_norm": 0.483716139104255, - "learning_rate": 8.218143623465407e-06, - "loss": 0.0479, + "epoch": 2.7966252220248666, + "grad_norm": 0.5039413424872988, + "learning_rate": 2.513437600892513e-06, + "loss": 0.0524, "step": 3149 }, { - "epoch": 1.3990672884743505, - "grad_norm": 0.7567570451553496, - "learning_rate": 8.216659746034199e-06, - "loss": 0.059, + "epoch": 2.797513321492007, + "grad_norm": 0.4504170060756764, + "learning_rate": 2.510075954645474e-06, + "loss": 0.0406, "step": 3150 }, { - "epoch": 1.399511436819898, - "grad_norm": 0.48553150567848463, - "learning_rate": 8.215175385084389e-06, - "loss": 0.0617, + "epoch": 2.7984014209591477, + "grad_norm": 0.47097439585522943, + "learning_rate": 2.5067158044019546e-06, + "loss": 0.0493, "step": 3151 }, { - "epoch": 1.3999555851654453, - "grad_norm": 0.47459878094993846, - "learning_rate": 8.2136905408391e-06, - "loss": 0.0527, + "epoch": 2.7992895204262878, + "grad_norm": 0.2949652998777908, + "learning_rate": 2.5033571521808123e-06, + "loss": 0.0345, "step": 3152 }, { - "epoch": 1.4003997335109926, - "grad_norm": 0.5833223937440162, - "learning_rate": 8.212205213521535e-06, - "loss": 0.0626, + "epoch": 2.800177619893428, + "grad_norm": 0.44072704377014293, + "learning_rate": 2.5000000000000015e-06, + "loss": 0.0355, "step": 3153 }, { - "epoch": 1.40084388185654, - "grad_norm": 0.35626979864365443, - "learning_rate": 8.210719403354961e-06, - "loss": 0.033, + "epoch": 2.8010657193605684, + "grad_norm": 0.4206901717156175, + "learning_rate": 2.496644349876573e-06, + "loss": 0.0343, "step": 3154 }, { - "epoch": 1.4012880302020876, - "grad_norm": 0.7430849859689593, - "learning_rate": 8.209233110562719e-06, - "loss": 0.0645, + "epoch": 2.801953818827709, + "grad_norm": 0.37458478856790284, + "learning_rate": 2.4932902038266826e-06, + "loss": 0.0371, "step": 3155 }, { - "epoch": 1.4017321785476349, - "grad_norm": 0.45738357167910687, - "learning_rate": 8.207746335368223e-06, - "loss": 0.0378, + "epoch": 2.802841918294849, + "grad_norm": 0.33662647621758907, + "learning_rate": 2.489937563865575e-06, + "loss": 0.0316, "step": 3156 }, { - "epoch": 1.4021763268931824, - "grad_norm": 0.5265228817791134, - "learning_rate": 8.206259077994966e-06, - "loss": 0.0653, + "epoch": 2.8037300177619895, + "grad_norm": 0.5403540396925937, + "learning_rate": 2.4865864320075966e-06, + "loss": 0.0529, "step": 3157 }, { - "epoch": 1.4026204752387297, - "grad_norm": 0.5945877517368485, - "learning_rate": 8.204771338666504e-06, - "loss": 0.0517, + "epoch": 2.8046181172291296, + "grad_norm": 0.3307046888382853, + "learning_rate": 2.4832368102661823e-06, + "loss": 0.0275, "step": 3158 }, { - "epoch": 1.4030646235842772, - "grad_norm": 0.48633704182422527, - "learning_rate": 8.20328311760647e-06, - "loss": 0.0467, + "epoch": 2.80550621669627, + "grad_norm": 0.3849223780314751, + "learning_rate": 2.4798887006538606e-06, + "loss": 0.0349, "step": 3159 }, { - "epoch": 1.4035087719298245, - "grad_norm": 0.45125218858977967, - "learning_rate": 8.201794415038569e-06, - "loss": 0.0436, + "epoch": 2.80639431616341, + "grad_norm": 0.48250628245999927, + "learning_rate": 2.476542105182254e-06, + "loss": 0.0428, "step": 3160 }, { - "epoch": 1.403952920275372, - "grad_norm": 0.454560780546378, - "learning_rate": 8.200305231186578e-06, - "loss": 0.0566, + "epoch": 2.8072824156305507, + "grad_norm": 0.37306916992082356, + "learning_rate": 2.4731970258620775e-06, + "loss": 0.0378, "step": 3161 }, { - "epoch": 1.4043970686209195, - "grad_norm": 0.4442970232995022, - "learning_rate": 8.198815566274346e-06, - "loss": 0.0343, + "epoch": 2.808170515097691, + "grad_norm": 0.41142639692118227, + "learning_rate": 2.469853464703124e-06, + "loss": 0.0433, "step": 3162 }, { - "epoch": 1.4048412169664668, - "grad_norm": 0.4600734220829904, - "learning_rate": 8.197325420525797e-06, - "loss": 0.0471, + "epoch": 2.8090586145648313, + "grad_norm": 0.35651442833603836, + "learning_rate": 2.4665114237142856e-06, + "loss": 0.0303, "step": 3163 }, { - "epoch": 1.405285365312014, - "grad_norm": 0.46601308549255893, - "learning_rate": 8.195834794164925e-06, - "loss": 0.0427, + "epoch": 2.8099467140319714, + "grad_norm": 0.40861213770415533, + "learning_rate": 2.4631709049035397e-06, + "loss": 0.0335, "step": 3164 }, { - "epoch": 1.4057295136575616, - "grad_norm": 0.5996621159738204, - "learning_rate": 8.194343687415795e-06, - "loss": 0.0457, + "epoch": 2.810834813499112, + "grad_norm": 0.8840489724240785, + "learning_rate": 2.4598319102779438e-06, + "loss": 0.0377, "step": 3165 }, { - "epoch": 1.4061736620031091, - "grad_norm": 0.9069132714321548, - "learning_rate": 8.192852100502547e-06, - "loss": 0.0678, + "epoch": 2.8117229129662524, + "grad_norm": 0.4532714630908367, + "learning_rate": 2.4564944418436416e-06, + "loss": 0.044, "step": 3166 }, { - "epoch": 1.4066178103486564, - "grad_norm": 0.43166301548877434, - "learning_rate": 8.191360033649392e-06, - "loss": 0.0352, + "epoch": 2.8126110124333925, + "grad_norm": 0.3614593805074122, + "learning_rate": 2.453158501605864e-06, + "loss": 0.0425, "step": 3167 }, { - "epoch": 1.407061958694204, - "grad_norm": 0.5216023574393313, - "learning_rate": 8.18986748708061e-06, - "loss": 0.0475, + "epoch": 2.8134991119005326, + "grad_norm": 0.31680155247348535, + "learning_rate": 2.4498240915689165e-06, + "loss": 0.0358, "step": 3168 }, { - "epoch": 1.4075061070397512, - "grad_norm": 0.5468879202276986, - "learning_rate": 8.18837446102056e-06, - "loss": 0.0547, + "epoch": 2.814387211367673, + "grad_norm": 0.4318446130593219, + "learning_rate": 2.4464912137361936e-06, + "loss": 0.0368, "step": 3169 }, { - "epoch": 1.4079502553852987, - "grad_norm": 0.4122751122216154, - "learning_rate": 8.186880955693667e-06, - "loss": 0.0357, + "epoch": 2.8152753108348136, + "grad_norm": 0.4250897496319355, + "learning_rate": 2.443159870110162e-06, + "loss": 0.0441, "step": 3170 }, { - "epoch": 1.408394403730846, - "grad_norm": 0.49605981026080986, - "learning_rate": 8.18538697132443e-06, - "loss": 0.0593, + "epoch": 2.8161634103019537, + "grad_norm": 0.3643571527710475, + "learning_rate": 2.439830062692368e-06, + "loss": 0.0388, "step": 3171 }, { - "epoch": 1.4088385520763935, - "grad_norm": 0.7328755570855009, - "learning_rate": 8.183892508137423e-06, - "loss": 0.0691, + "epoch": 2.8170515097690942, + "grad_norm": 0.3505699957865153, + "learning_rate": 2.4365017934834406e-06, + "loss": 0.0313, "step": 3172 }, { - "epoch": 1.409282700421941, - "grad_norm": 0.5835998191367716, - "learning_rate": 8.182397566357286e-06, - "loss": 0.0621, + "epoch": 2.8179396092362343, + "grad_norm": 0.4029893161185415, + "learning_rate": 2.433175064483078e-06, + "loss": 0.037, "step": 3173 }, { - "epoch": 1.4097268487674883, - "grad_norm": 0.45687160374129326, - "learning_rate": 8.180902146208734e-06, - "loss": 0.0529, + "epoch": 2.818827708703375, + "grad_norm": 0.34238881678764294, + "learning_rate": 2.429849877690053e-06, + "loss": 0.0307, "step": 3174 }, { - "epoch": 1.4101709971130356, - "grad_norm": 0.4464006299057523, - "learning_rate": 8.179406247916555e-06, - "loss": 0.0462, + "epoch": 2.819715808170515, + "grad_norm": 0.3872472148779724, + "learning_rate": 2.426526235102219e-06, + "loss": 0.034, "step": 3175 }, { - "epoch": 1.4106151454585831, - "grad_norm": 0.5104992313269215, - "learning_rate": 8.17790987170561e-06, - "loss": 0.0397, + "epoch": 2.8206039076376554, + "grad_norm": 0.37087670496994585, + "learning_rate": 2.4232041387164924e-06, + "loss": 0.0364, "step": 3176 }, { - "epoch": 1.4110592938041306, - "grad_norm": 0.4500241032324194, - "learning_rate": 8.176413017800828e-06, - "loss": 0.0588, + "epoch": 2.821492007104796, + "grad_norm": 0.3979646727542978, + "learning_rate": 2.4198835905288686e-06, + "loss": 0.0366, "step": 3177 }, { - "epoch": 1.411503442149678, - "grad_norm": 0.9698569462217295, - "learning_rate": 8.174915686427211e-06, - "loss": 0.0593, + "epoch": 2.822380106571936, + "grad_norm": 0.3653071700008589, + "learning_rate": 2.416564592534405e-06, + "loss": 0.0355, "step": 3178 }, { - "epoch": 1.4119475904952254, - "grad_norm": 1.9467722797227018, - "learning_rate": 8.173417877809835e-06, - "loss": 0.0657, + "epoch": 2.823268206039076, + "grad_norm": 0.32659833966423785, + "learning_rate": 2.413247146727237e-06, + "loss": 0.0349, "step": 3179 }, { - "epoch": 1.4123917388407727, - "grad_norm": 0.8619854581857347, - "learning_rate": 8.171919592173843e-06, - "loss": 0.0616, + "epoch": 2.8241563055062167, + "grad_norm": 0.44083296008742695, + "learning_rate": 2.4099312551005568e-06, + "loss": 0.04, "step": 3180 }, { - "epoch": 1.4128358871863202, - "grad_norm": 2.1957744086497324, - "learning_rate": 8.170420829744458e-06, - "loss": 0.0441, + "epoch": 2.825044404973357, + "grad_norm": 0.40793836680442663, + "learning_rate": 2.4066169196466326e-06, + "loss": 0.036, "step": 3181 }, { - "epoch": 1.4132800355318675, - "grad_norm": 0.437679909069633, - "learning_rate": 8.168921590746964e-06, - "loss": 0.0448, + "epoch": 2.8259325044404973, + "grad_norm": 0.3582256078871051, + "learning_rate": 2.4033041423567914e-06, + "loss": 0.0351, "step": 3182 }, { - "epoch": 1.413724183877415, - "grad_norm": 0.44545740554831986, - "learning_rate": 8.167421875406725e-06, - "loss": 0.0534, + "epoch": 2.826820603907638, + "grad_norm": 0.36582260805964584, + "learning_rate": 2.3999929252214245e-06, + "loss": 0.036, "step": 3183 }, { - "epoch": 1.4141683322229626, - "grad_norm": 0.6661022181292509, - "learning_rate": 8.165921683949172e-06, - "loss": 0.0752, + "epoch": 2.827708703374778, + "grad_norm": 0.38339914278379317, + "learning_rate": 2.39668327022999e-06, + "loss": 0.0384, "step": 3184 }, { - "epoch": 1.4146124805685099, - "grad_norm": 0.5379954758436954, - "learning_rate": 8.164421016599811e-06, - "loss": 0.0465, + "epoch": 2.8285968028419184, + "grad_norm": 0.8048974461847316, + "learning_rate": 2.393375179371003e-06, + "loss": 0.0558, "step": 3185 }, { - "epoch": 1.4150566289140574, - "grad_norm": 0.7649578135185098, - "learning_rate": 8.162919873584216e-06, - "loss": 0.0655, + "epoch": 2.8294849023090585, + "grad_norm": 0.5919715371103896, + "learning_rate": 2.390068654632039e-06, + "loss": 0.034, "step": 3186 }, { - "epoch": 1.4155007772596047, - "grad_norm": 0.6155469523069211, - "learning_rate": 8.161418255128037e-06, - "loss": 0.058, + "epoch": 2.830373001776199, + "grad_norm": 0.5241335446677382, + "learning_rate": 2.386763697999737e-06, + "loss": 0.0574, "step": 3187 }, { - "epoch": 1.4159449256051522, - "grad_norm": 0.43980966134894045, - "learning_rate": 8.15991616145699e-06, - "loss": 0.046, + "epoch": 2.8312611012433395, + "grad_norm": 0.4277430815376803, + "learning_rate": 2.3834603114597875e-06, + "loss": 0.034, "step": 3188 }, { - "epoch": 1.4163890739506995, - "grad_norm": 0.3772085957077231, - "learning_rate": 8.158413592796867e-06, - "loss": 0.0407, + "epoch": 2.8321492007104796, + "grad_norm": 0.3525031394477271, + "learning_rate": 2.380158496996945e-06, + "loss": 0.0373, "step": 3189 }, { - "epoch": 1.416833222296247, - "grad_norm": 0.4916035633372075, - "learning_rate": 8.156910549373529e-06, - "loss": 0.0509, + "epoch": 2.8330373001776197, + "grad_norm": 0.3858913624846048, + "learning_rate": 2.376858256595012e-06, + "loss": 0.0363, "step": 3190 }, { - "epoch": 1.4172773706417945, - "grad_norm": 0.44586244165360156, - "learning_rate": 8.15540703141291e-06, - "loss": 0.0421, + "epoch": 2.83392539964476, + "grad_norm": 0.33604324286970155, + "learning_rate": 2.3735595922368495e-06, + "loss": 0.0389, "step": 3191 }, { - "epoch": 1.4177215189873418, - "grad_norm": 0.6752971156056389, - "learning_rate": 8.153903039141011e-06, - "loss": 0.0394, + "epoch": 2.8348134991119007, + "grad_norm": 0.39935929326742753, + "learning_rate": 2.3702625059043697e-06, + "loss": 0.0401, "step": 3192 }, { - "epoch": 1.418165667332889, - "grad_norm": 0.5021744951340577, - "learning_rate": 8.15239857278391e-06, - "loss": 0.0446, + "epoch": 2.835701598579041, + "grad_norm": 0.4730886258127644, + "learning_rate": 2.3669669995785436e-06, + "loss": 0.0353, "step": 3193 }, { - "epoch": 1.4186098156784366, - "grad_norm": 0.4057752198360003, - "learning_rate": 8.150893632567755e-06, - "loss": 0.0413, + "epoch": 2.8365896980461813, + "grad_norm": 0.42154361099056825, + "learning_rate": 2.363673075239379e-06, + "loss": 0.0381, "step": 3194 }, { - "epoch": 1.419053964023984, - "grad_norm": 0.44646591292416776, - "learning_rate": 8.149388218718763e-06, - "loss": 0.033, + "epoch": 2.8374777975133214, + "grad_norm": 0.3934859872948006, + "learning_rate": 2.3603807348659447e-06, + "loss": 0.0346, "step": 3195 }, { - "epoch": 1.4194981123695314, - "grad_norm": 0.5430849460817241, - "learning_rate": 8.147882331463221e-06, - "loss": 0.0476, + "epoch": 2.838365896980462, + "grad_norm": 0.34488289545283485, + "learning_rate": 2.357089980436356e-06, + "loss": 0.0324, "step": 3196 }, { - "epoch": 1.419942260715079, - "grad_norm": 0.5748975413489712, - "learning_rate": 8.146375971027492e-06, - "loss": 0.0422, + "epoch": 2.839253996447602, + "grad_norm": 0.48559060269746396, + "learning_rate": 2.3538008139277705e-06, + "loss": 0.0386, "step": 3197 }, { - "epoch": 1.4203864090606262, - "grad_norm": 0.8569726405019049, - "learning_rate": 8.144869137638008e-06, - "loss": 0.0586, + "epoch": 2.8401420959147425, + "grad_norm": 0.4909439166088392, + "learning_rate": 2.3505132373163984e-06, + "loss": 0.0377, "step": 3198 }, { - "epoch": 1.4208305574061737, - "grad_norm": 0.48111798749968265, - "learning_rate": 8.14336183152127e-06, - "loss": 0.0511, + "epoch": 2.8410301953818826, + "grad_norm": 0.40415548474459967, + "learning_rate": 2.3472272525774905e-06, + "loss": 0.0421, "step": 3199 }, { - "epoch": 1.421274705751721, - "grad_norm": 0.4447988259403195, - "learning_rate": 8.141854052903853e-06, - "loss": 0.0409, + "epoch": 2.841918294849023, + "grad_norm": 0.31081432267044307, + "learning_rate": 2.343942861685338e-06, + "loss": 0.0337, "step": 3200 }, { - "epoch": 1.4217188540972685, - "grad_norm": 0.42318494315184574, - "learning_rate": 8.1403458020124e-06, - "loss": 0.0393, + "epoch": 2.842806394316163, + "grad_norm": 0.32784077186237176, + "learning_rate": 2.340660066613281e-06, + "loss": 0.0237, "step": 3201 }, { - "epoch": 1.422163002442816, - "grad_norm": 0.527946330855177, - "learning_rate": 8.138837079073628e-06, - "loss": 0.0544, + "epoch": 2.8436944937833037, + "grad_norm": 0.5013646416173178, + "learning_rate": 2.3373788693337024e-06, + "loss": 0.0455, "step": 3202 }, { - "epoch": 1.4226071507883633, - "grad_norm": 0.5168740647207917, - "learning_rate": 8.137327884314323e-06, - "loss": 0.042, + "epoch": 2.8445825932504443, + "grad_norm": 0.45898845240520664, + "learning_rate": 2.3340992718180126e-06, + "loss": 0.044, "step": 3203 }, { - "epoch": 1.4230512991339106, - "grad_norm": 0.44556932550096595, - "learning_rate": 8.135818217961344e-06, - "loss": 0.043, + "epoch": 2.8454706927175843, + "grad_norm": 0.38690156844100004, + "learning_rate": 2.3308212760366734e-06, + "loss": 0.0428, "step": 3204 }, { - "epoch": 1.423495447479458, - "grad_norm": 0.4521168782601511, - "learning_rate": 8.13430808024162e-06, - "loss": 0.0616, + "epoch": 2.8463587921847244, + "grad_norm": 0.38094134759029563, + "learning_rate": 2.3275448839591814e-06, + "loss": 0.0356, "step": 3205 }, { - "epoch": 1.4239395958250056, - "grad_norm": 0.998651913471337, - "learning_rate": 8.132797471382148e-06, - "loss": 0.0857, + "epoch": 2.847246891651865, + "grad_norm": 0.5811129908265699, + "learning_rate": 2.3242700975540665e-06, + "loss": 0.0362, "step": 3206 }, { - "epoch": 1.424383744170553, - "grad_norm": 0.4845757133637773, - "learning_rate": 8.131286391609996e-06, - "loss": 0.0316, + "epoch": 2.8481349911190055, + "grad_norm": 0.40116093066446995, + "learning_rate": 2.320996918788894e-06, + "loss": 0.045, "step": 3207 }, { - "epoch": 1.4248278925161004, - "grad_norm": 0.5115534012315782, - "learning_rate": 8.129774841152311e-06, - "loss": 0.0442, + "epoch": 2.8490230905861456, + "grad_norm": 0.3362104213982332, + "learning_rate": 2.3177253496302677e-06, + "loss": 0.0355, "step": 3208 }, { - "epoch": 1.4252720408616477, - "grad_norm": 0.4658784277879884, - "learning_rate": 8.128262820236302e-06, - "loss": 0.0392, + "epoch": 2.849911190053286, + "grad_norm": 0.3805397457710735, + "learning_rate": 2.3144553920438183e-06, + "loss": 0.0364, "step": 3209 }, { - "epoch": 1.4257161892071952, - "grad_norm": 0.7323138806429754, - "learning_rate": 8.12675032908925e-06, - "loss": 0.0476, + "epoch": 2.850799289520426, + "grad_norm": 0.35351437634179955, + "learning_rate": 2.3111870479942156e-06, + "loss": 0.0351, "step": 3210 }, { - "epoch": 1.4261603375527425, - "grad_norm": 0.7544916173168683, - "learning_rate": 8.125237367938511e-06, - "loss": 0.0539, + "epoch": 2.8516873889875667, + "grad_norm": 0.32550272153517773, + "learning_rate": 2.3079203194451527e-06, + "loss": 0.0343, "step": 3211 }, { - "epoch": 1.42660448589829, - "grad_norm": 0.62197595498055, - "learning_rate": 8.123723937011507e-06, - "loss": 0.0526, + "epoch": 2.8525754884547068, + "grad_norm": 0.3303439701522621, + "learning_rate": 2.3046552083593536e-06, + "loss": 0.035, "step": 3212 }, { - "epoch": 1.4270486342438375, - "grad_norm": 0.411592343530701, - "learning_rate": 8.12221003653573e-06, - "loss": 0.0422, + "epoch": 2.8534635879218473, + "grad_norm": 0.367616218813943, + "learning_rate": 2.3013917166985766e-06, + "loss": 0.0399, "step": 3213 }, { - "epoch": 1.4274927825893848, - "grad_norm": 0.5467878759617523, - "learning_rate": 8.12069566673875e-06, - "loss": 0.0486, + "epoch": 2.854351687388988, + "grad_norm": 0.33662839356963586, + "learning_rate": 2.298129846423599e-06, + "loss": 0.0404, "step": 3214 }, { - "epoch": 1.4279369309349323, - "grad_norm": 0.3951319611407401, - "learning_rate": 8.119180827848199e-06, - "loss": 0.0372, + "epoch": 2.855239786856128, + "grad_norm": 0.5840277130030126, + "learning_rate": 2.294869599494227e-06, + "loss": 0.049, "step": 3215 }, { - "epoch": 1.4283810792804796, - "grad_norm": 0.5355923646902133, - "learning_rate": 8.117665520091783e-06, - "loss": 0.0583, + "epoch": 2.856127886323268, + "grad_norm": 0.4443593809675, + "learning_rate": 2.2916109778692948e-06, + "loss": 0.0466, "step": 3216 }, { - "epoch": 1.4288252276260272, - "grad_norm": 0.49870365746589324, - "learning_rate": 8.11614974369728e-06, - "loss": 0.0521, + "epoch": 2.8570159857904085, + "grad_norm": 0.3367573564426318, + "learning_rate": 2.288353983506653e-06, + "loss": 0.0383, "step": 3217 }, { - "epoch": 1.4292693759715744, - "grad_norm": 0.5463670187827259, - "learning_rate": 8.114633498892537e-06, - "loss": 0.0567, + "epoch": 2.857904085257549, + "grad_norm": 0.5190183025532943, + "learning_rate": 2.285098618363183e-06, + "loss": 0.0377, "step": 3218 }, { - "epoch": 1.429713524317122, - "grad_norm": 0.5699638456777965, - "learning_rate": 8.11311678590547e-06, - "loss": 0.05, + "epoch": 2.858792184724689, + "grad_norm": 0.2958611938020286, + "learning_rate": 2.281844884394781e-06, + "loss": 0.0306, "step": 3219 }, { - "epoch": 1.4301576726626695, - "grad_norm": 0.3789965811347938, - "learning_rate": 8.11159960496407e-06, - "loss": 0.0405, + "epoch": 2.8596802841918296, + "grad_norm": 0.3452665952641175, + "learning_rate": 2.2785927835563624e-06, + "loss": 0.0343, "step": 3220 }, { - "epoch": 1.4306018210082168, - "grad_norm": 0.4496180621274656, - "learning_rate": 8.11008195629639e-06, - "loss": 0.0414, + "epoch": 2.8605683836589697, + "grad_norm": 0.37239810186921896, + "learning_rate": 2.2753423178018667e-06, + "loss": 0.0327, "step": 3221 }, { - "epoch": 1.431045969353764, - "grad_norm": 0.6660820533473273, - "learning_rate": 8.10856384013056e-06, - "loss": 0.0519, + "epoch": 2.8614564831261102, + "grad_norm": 0.3540942291981282, + "learning_rate": 2.27209348908425e-06, + "loss": 0.0384, "step": 3222 }, { - "epoch": 1.4314901176993116, - "grad_norm": 0.42475615332155653, - "learning_rate": 8.107045256694782e-06, - "loss": 0.0507, + "epoch": 2.8623445825932503, + "grad_norm": 0.36505716497548985, + "learning_rate": 2.268846299355481e-06, + "loss": 0.0367, "step": 3223 }, { - "epoch": 1.431934266044859, - "grad_norm": 0.531367471068279, - "learning_rate": 8.105526206217322e-06, - "loss": 0.0438, + "epoch": 2.863232682060391, + "grad_norm": 0.408787810640581, + "learning_rate": 2.265600750566544e-06, + "loss": 0.0399, "step": 3224 }, { - "epoch": 1.4323784143904064, - "grad_norm": 0.6173452877900699, - "learning_rate": 8.104006688926518e-06, - "loss": 0.044, + "epoch": 2.8641207815275314, + "grad_norm": 0.3638252630312671, + "learning_rate": 2.262356844667443e-06, + "loss": 0.0362, "step": 3225 }, { - "epoch": 1.4328225627359539, - "grad_norm": 0.5047091534195701, - "learning_rate": 8.102486705050782e-06, - "loss": 0.0448, + "epoch": 2.8650088809946714, + "grad_norm": 0.4133926248591744, + "learning_rate": 2.25911458360719e-06, + "loss": 0.0366, "step": 3226 }, { - "epoch": 1.4332667110815012, - "grad_norm": 0.756671059089655, - "learning_rate": 8.100966254818591e-06, - "loss": 0.0472, + "epoch": 2.8658969804618115, + "grad_norm": 0.3062802727335299, + "learning_rate": 2.255873969333807e-06, + "loss": 0.0277, "step": 3227 }, { - "epoch": 1.4337108594270487, - "grad_norm": 0.6679241833390342, - "learning_rate": 8.099445338458496e-06, - "loss": 0.0428, + "epoch": 2.866785079928952, + "grad_norm": 0.43524843682150793, + "learning_rate": 2.252635003794334e-06, + "loss": 0.0351, "step": 3228 }, { - "epoch": 1.434155007772596, - "grad_norm": 0.40517368896131173, - "learning_rate": 8.097923956199118e-06, - "loss": 0.0366, + "epoch": 2.8676731793960926, + "grad_norm": 0.3426621209283039, + "learning_rate": 2.249397688934811e-06, + "loss": 0.0389, "step": 3229 }, { - "epoch": 1.4345991561181435, - "grad_norm": 0.4490587246876595, - "learning_rate": 8.096402108269144e-06, - "loss": 0.0356, + "epoch": 2.8685612788632326, + "grad_norm": 0.3370006175503084, + "learning_rate": 2.246162026700296e-06, + "loss": 0.0346, "step": 3230 }, { - "epoch": 1.435043304463691, - "grad_norm": 0.5508426411635136, - "learning_rate": 8.094879794897333e-06, - "loss": 0.0499, + "epoch": 2.869449378330373, + "grad_norm": 0.36902644678834845, + "learning_rate": 2.2429280190348463e-06, + "loss": 0.0367, "step": 3231 }, { - "epoch": 1.4354874528092383, - "grad_norm": 0.39803990157367924, - "learning_rate": 8.093357016312518e-06, - "loss": 0.039, + "epoch": 2.8703374777975132, + "grad_norm": 0.3372975556473261, + "learning_rate": 2.2396956678815266e-06, + "loss": 0.0327, "step": 3232 }, { - "epoch": 1.4359316011547856, - "grad_norm": 0.3911481714996281, - "learning_rate": 8.091833772743595e-06, - "loss": 0.0408, + "epoch": 2.8712255772646538, + "grad_norm": 0.4067092763332751, + "learning_rate": 2.2364649751824096e-06, + "loss": 0.0336, "step": 3233 }, { - "epoch": 1.436375749500333, - "grad_norm": 0.43202710834535546, - "learning_rate": 8.090310064419536e-06, - "loss": 0.0517, + "epoch": 2.872113676731794, + "grad_norm": 0.41264789259217366, + "learning_rate": 2.2332359428785733e-06, + "loss": 0.0453, "step": 3234 }, { - "epoch": 1.4368198978458806, - "grad_norm": 0.63394524089963, - "learning_rate": 8.088785891569379e-06, - "loss": 0.0498, + "epoch": 2.8730017761989344, + "grad_norm": 0.38972966777230966, + "learning_rate": 2.230008572910087e-06, + "loss": 0.0382, "step": 3235 }, { - "epoch": 1.437264046191428, - "grad_norm": 0.6014431551871622, - "learning_rate": 8.087261254422232e-06, - "loss": 0.0553, + "epoch": 2.8738898756660745, + "grad_norm": 0.3445557467587843, + "learning_rate": 2.2267828672160317e-06, + "loss": 0.0348, "step": 3236 }, { - "epoch": 1.4377081945369754, - "grad_norm": 0.6136207381555616, - "learning_rate": 8.085736153207277e-06, - "loss": 0.0594, + "epoch": 2.874777975133215, + "grad_norm": 0.34872288625593595, + "learning_rate": 2.223558827734489e-06, + "loss": 0.0328, "step": 3237 }, { - "epoch": 1.4381523428825227, - "grad_norm": 0.5952470946539382, - "learning_rate": 8.08421058815376e-06, - "loss": 0.0769, + "epoch": 2.875666074600355, + "grad_norm": 0.3655919876619049, + "learning_rate": 2.2203364564025315e-06, + "loss": 0.0378, "step": 3238 }, { - "epoch": 1.4385964912280702, - "grad_norm": 0.5189933118580958, - "learning_rate": 8.082684559490999e-06, - "loss": 0.0554, + "epoch": 2.8765541740674956, + "grad_norm": 0.40558629003143404, + "learning_rate": 2.217115755156234e-06, + "loss": 0.0422, "step": 3239 }, { - "epoch": 1.4390406395736175, - "grad_norm": 0.4135426057188439, - "learning_rate": 8.081158067448385e-06, - "loss": 0.0396, + "epoch": 2.877442273534636, + "grad_norm": 0.364095436499667, + "learning_rate": 2.2138967259306702e-06, + "loss": 0.0365, "step": 3240 }, { - "epoch": 1.439484787919165, - "grad_norm": 0.5443217663228829, - "learning_rate": 8.079631112255372e-06, - "loss": 0.044, + "epoch": 2.878330373001776, + "grad_norm": 0.4287254451004397, + "learning_rate": 2.2106793706599045e-06, + "loss": 0.046, "step": 3241 }, { - "epoch": 1.4399289362647125, - "grad_norm": 0.5826766711918354, - "learning_rate": 8.078103694141487e-06, - "loss": 0.0446, + "epoch": 2.8792184724689163, + "grad_norm": 0.37982949088249746, + "learning_rate": 2.2074636912770003e-06, + "loss": 0.0408, "step": 3242 }, { - "epoch": 1.4403730846102598, - "grad_norm": 0.648859138960752, - "learning_rate": 8.076575813336333e-06, - "loss": 0.0609, + "epoch": 2.880106571936057, + "grad_norm": 0.33271622421670694, + "learning_rate": 2.20424968971401e-06, + "loss": 0.0306, "step": 3243 }, { - "epoch": 1.440817232955807, - "grad_norm": 0.4409431909915432, - "learning_rate": 8.07504747006957e-06, - "loss": 0.0531, + "epoch": 2.8809946714031973, + "grad_norm": 0.42343377317989356, + "learning_rate": 2.2010373679019773e-06, + "loss": 0.0378, "step": 3244 }, { - "epoch": 1.4412613813013546, - "grad_norm": 0.3721919303810988, - "learning_rate": 8.073518664570938e-06, - "loss": 0.0386, + "epoch": 2.8818827708703374, + "grad_norm": 0.4123469336539692, + "learning_rate": 2.1978267277709415e-06, + "loss": 0.0415, "step": 3245 }, { - "epoch": 1.4417055296469021, - "grad_norm": 0.43841622793791346, - "learning_rate": 8.07198939707024e-06, - "loss": 0.0493, + "epoch": 2.882770870337478, + "grad_norm": 0.32210500457700036, + "learning_rate": 2.1946177712499322e-06, + "loss": 0.0297, "step": 3246 }, { - "epoch": 1.4421496779924494, - "grad_norm": 0.5931350944063895, - "learning_rate": 8.070459667797351e-06, - "loss": 0.0634, + "epoch": 2.883658969804618, + "grad_norm": 0.358708910741767, + "learning_rate": 2.1914105002669567e-06, + "loss": 0.04, "step": 3247 }, { - "epoch": 1.442593826337997, - "grad_norm": 0.41937120746821677, - "learning_rate": 8.068929476982217e-06, - "loss": 0.0423, + "epoch": 2.8845470692717585, + "grad_norm": 0.5864879669784532, + "learning_rate": 2.188204916749021e-06, + "loss": 0.0579, "step": 3248 }, { - "epoch": 1.4430379746835442, - "grad_norm": 0.6050623457059973, - "learning_rate": 8.067398824854851e-06, - "loss": 0.07, + "epoch": 2.8854351687388986, + "grad_norm": 0.3731123103237032, + "learning_rate": 2.1850010226221147e-06, + "loss": 0.0376, "step": 3249 }, { - "epoch": 1.4434821230290917, - "grad_norm": 0.36263255781004144, - "learning_rate": 8.065867711645334e-06, - "loss": 0.0488, + "epoch": 2.886323268206039, + "grad_norm": 0.32710070054568563, + "learning_rate": 2.1817988198112095e-06, + "loss": 0.0303, "step": 3250 }, { - "epoch": 1.443926271374639, - "grad_norm": 0.5452720169189981, - "learning_rate": 8.064336137583821e-06, - "loss": 0.0592, + "epoch": 2.8872113676731797, + "grad_norm": 0.5116015572070288, + "learning_rate": 2.17859831024026e-06, + "loss": 0.0497, "step": 3251 }, { - "epoch": 1.4443704197201865, - "grad_norm": 0.5648377578907434, - "learning_rate": 8.062804102900532e-06, - "loss": 0.045, + "epoch": 2.8880994671403197, + "grad_norm": 0.536659787763374, + "learning_rate": 2.17539949583221e-06, + "loss": 0.0405, "step": 3252 }, { - "epoch": 1.444814568065734, - "grad_norm": 0.5203009129121725, - "learning_rate": 8.061271607825758e-06, - "loss": 0.0455, + "epoch": 2.88898756660746, + "grad_norm": 0.5229345175348173, + "learning_rate": 2.1722023785089763e-06, + "loss": 0.0361, "step": 3253 }, { - "epoch": 1.4452587164112813, - "grad_norm": 0.5388011169643779, - "learning_rate": 8.059738652589862e-06, - "loss": 0.0563, + "epoch": 2.8898756660746003, + "grad_norm": 0.364097366496522, + "learning_rate": 2.1690069601914648e-06, + "loss": 0.0336, "step": 3254 }, { - "epoch": 1.4457028647568289, - "grad_norm": 0.41394935328254073, - "learning_rate": 8.058205237423266e-06, - "loss": 0.0523, + "epoch": 2.890763765541741, + "grad_norm": 0.6525345476050085, + "learning_rate": 2.1658132427995543e-06, + "loss": 0.0456, "step": 3255 }, { - "epoch": 1.4461470131023761, - "grad_norm": 0.921759574953431, - "learning_rate": 8.056671362556476e-06, - "loss": 0.0356, + "epoch": 2.891651865008881, + "grad_norm": 0.4305618530941698, + "learning_rate": 2.162621228252101e-06, + "loss": 0.0403, "step": 3256 }, { - "epoch": 1.4465911614479237, - "grad_norm": 0.8754840871870988, - "learning_rate": 8.055137028220058e-06, - "loss": 0.0615, + "epoch": 2.8925399644760215, + "grad_norm": 0.311600428359317, + "learning_rate": 2.159430918466945e-06, + "loss": 0.0345, "step": 3257 }, { - "epoch": 1.447035309793471, - "grad_norm": 0.4878410786722894, - "learning_rate": 8.053602234644644e-06, - "loss": 0.0415, + "epoch": 2.8934280639431615, + "grad_norm": 0.3132058591814133, + "learning_rate": 2.156242315360893e-06, + "loss": 0.0294, "step": 3258 }, { - "epoch": 1.4474794581390185, - "grad_norm": 0.6534435017529012, - "learning_rate": 8.052066982060945e-06, - "loss": 0.0579, + "epoch": 2.894316163410302, + "grad_norm": 0.4910046166944337, + "learning_rate": 2.1530554208497357e-06, + "loss": 0.0394, "step": 3259 }, { - "epoch": 1.447923606484566, - "grad_norm": 0.9319839406156598, - "learning_rate": 8.050531270699731e-06, - "loss": 0.0484, + "epoch": 2.895204262877442, + "grad_norm": 0.3085842334505055, + "learning_rate": 2.1498702368482294e-06, + "loss": 0.0291, "step": 3260 }, { - "epoch": 1.4483677548301133, - "grad_norm": 0.4011114975003397, - "learning_rate": 8.048995100791847e-06, - "loss": 0.0372, + "epoch": 2.8960923623445827, + "grad_norm": 0.36125305999687723, + "learning_rate": 2.146686765270105e-06, + "loss": 0.0304, "step": 3261 }, { - "epoch": 1.4488119031756606, - "grad_norm": 0.5604426931227581, - "learning_rate": 8.047458472568208e-06, - "loss": 0.0496, + "epoch": 2.8969804618117228, + "grad_norm": 0.5352727077255195, + "learning_rate": 2.143505008028067e-06, + "loss": 0.0407, "step": 3262 }, { - "epoch": 1.449256051521208, - "grad_norm": 0.3856602729544308, - "learning_rate": 8.045921386259792e-06, - "loss": 0.0368, + "epoch": 2.8978685612788633, + "grad_norm": 0.533765241330171, + "learning_rate": 2.1403249670337895e-06, + "loss": 0.04, "step": 3263 }, { - "epoch": 1.4497001998667556, - "grad_norm": 0.37413170967076825, - "learning_rate": 8.044383842097651e-06, - "loss": 0.0426, + "epoch": 2.8987566607460034, + "grad_norm": 0.3154911523836893, + "learning_rate": 2.1371466441979127e-06, + "loss": 0.0347, "step": 3264 }, { - "epoch": 1.4501443482123029, - "grad_norm": 0.4129802274607094, - "learning_rate": 8.042845840312903e-06, - "loss": 0.0398, + "epoch": 2.899644760213144, + "grad_norm": 0.3210808049942224, + "learning_rate": 2.133970041430044e-06, + "loss": 0.0329, "step": 3265 }, { - "epoch": 1.4505884965578504, - "grad_norm": 0.8418363464980803, - "learning_rate": 8.041307381136738e-06, - "loss": 0.0656, + "epoch": 2.9005328596802844, + "grad_norm": 0.3309739089232218, + "learning_rate": 2.1307951606387626e-06, + "loss": 0.0303, "step": 3266 }, { - "epoch": 1.4510326449033977, - "grad_norm": 0.5365298547106184, - "learning_rate": 8.039768464800408e-06, - "loss": 0.048, + "epoch": 2.9014209591474245, + "grad_norm": 0.458161854847797, + "learning_rate": 2.1276220037316087e-06, + "loss": 0.0402, "step": 3267 }, { - "epoch": 1.4514767932489452, - "grad_norm": 1.0687462882240213, - "learning_rate": 8.038229091535244e-06, - "loss": 0.0503, + "epoch": 2.9023090586145646, + "grad_norm": 0.4046184614183439, + "learning_rate": 2.1244505726150853e-06, + "loss": 0.0352, "step": 3268 }, { - "epoch": 1.4519209415944925, - "grad_norm": 0.49239171264959003, - "learning_rate": 8.036689261572636e-06, - "loss": 0.0563, + "epoch": 2.903197158081705, + "grad_norm": 0.3900637322521146, + "learning_rate": 2.1212808691946646e-06, + "loss": 0.0374, "step": 3269 }, { - "epoch": 1.45236508994004, - "grad_norm": 0.5011792630285886, - "learning_rate": 8.035148975144046e-06, - "loss": 0.0396, + "epoch": 2.9040852575488456, + "grad_norm": 0.3843284507800547, + "learning_rate": 2.1181128953747735e-06, + "loss": 0.044, "step": 3270 }, { - "epoch": 1.4528092382855875, - "grad_norm": 0.49603707035563677, - "learning_rate": 8.033608232481009e-06, - "loss": 0.0501, + "epoch": 2.9049733570159857, + "grad_norm": 0.33483239462559267, + "learning_rate": 2.1149466530588082e-06, + "loss": 0.0306, "step": 3271 }, { - "epoch": 1.4532533866311348, - "grad_norm": 0.5939987053954954, - "learning_rate": 8.032067033815123e-06, - "loss": 0.0458, + "epoch": 2.905861456483126, + "grad_norm": 0.44035760639827976, + "learning_rate": 2.1117821441491166e-06, + "loss": 0.0439, "step": 3272 }, { - "epoch": 1.453697534976682, - "grad_norm": 0.5812600520648006, - "learning_rate": 8.030525379378053e-06, - "loss": 0.0447, + "epoch": 2.9067495559502663, + "grad_norm": 0.5724245552044548, + "learning_rate": 2.1086193705470064e-06, + "loss": 0.0352, "step": 3273 }, { - "epoch": 1.4541416833222296, - "grad_norm": 0.6480809819716409, - "learning_rate": 8.028983269401542e-06, - "loss": 0.0608, + "epoch": 2.907637655417407, + "grad_norm": 0.4610758244552123, + "learning_rate": 2.105458334152748e-06, + "loss": 0.0373, "step": 3274 }, { - "epoch": 1.454585831667777, - "grad_norm": 0.5090480104564535, - "learning_rate": 8.027440704117391e-06, - "loss": 0.0559, + "epoch": 2.908525754884547, + "grad_norm": 0.3746823307953264, + "learning_rate": 2.102299036865566e-06, + "loss": 0.0371, "step": 3275 }, { - "epoch": 1.4550299800133244, - "grad_norm": 0.6602628376919341, - "learning_rate": 8.025897683757473e-06, - "loss": 0.0587, + "epoch": 2.9094138543516874, + "grad_norm": 0.573966111334757, + "learning_rate": 2.0991414805836337e-06, + "loss": 0.044, "step": 3276 }, { - "epoch": 1.455474128358872, - "grad_norm": 0.5252379759239632, - "learning_rate": 8.024354208553735e-06, - "loss": 0.061, + "epoch": 2.910301953818828, + "grad_norm": 0.30787883438799263, + "learning_rate": 2.095985667204085e-06, + "loss": 0.0349, "step": 3277 }, { - "epoch": 1.4559182767044192, - "grad_norm": 0.4725375990607396, - "learning_rate": 8.022810278738185e-06, - "loss": 0.0408, + "epoch": 2.911190053285968, + "grad_norm": 0.340274332990767, + "learning_rate": 2.092831598623008e-06, + "loss": 0.0315, "step": 3278 }, { - "epoch": 1.4563624250499667, - "grad_norm": 0.5401711443633535, - "learning_rate": 8.021265894542898e-06, - "loss": 0.0667, + "epoch": 2.912078152753108, + "grad_norm": 0.3513766217297932, + "learning_rate": 2.089679276735438e-06, + "loss": 0.0301, "step": 3279 }, { - "epoch": 1.456806573395514, - "grad_norm": 0.5352342030628493, - "learning_rate": 8.019721056200027e-06, - "loss": 0.0458, + "epoch": 2.9129662522202486, + "grad_norm": 0.42793403329507357, + "learning_rate": 2.08652870343536e-06, + "loss": 0.0376, "step": 3280 }, { - "epoch": 1.4572507217410615, - "grad_norm": 0.4210073842896914, - "learning_rate": 8.018175763941784e-06, - "loss": 0.0437, + "epoch": 2.913854351687389, + "grad_norm": 0.45718656349728554, + "learning_rate": 2.0833798806157134e-06, + "loss": 0.0356, "step": 3281 }, { - "epoch": 1.457694870086609, - "grad_norm": 0.3904193074228819, - "learning_rate": 8.016630018000457e-06, - "loss": 0.0456, + "epoch": 2.9147424511545292, + "grad_norm": 0.3311931727511675, + "learning_rate": 2.0802328101683815e-06, + "loss": 0.0312, "step": 3282 }, { - "epoch": 1.4581390184321563, - "grad_norm": 0.4133159146388692, - "learning_rate": 8.015083818608393e-06, - "loss": 0.0385, + "epoch": 2.9156305506216698, + "grad_norm": 0.3506580566673122, + "learning_rate": 2.0770874939841987e-06, + "loss": 0.0412, "step": 3283 }, { - "epoch": 1.4585831667777038, - "grad_norm": 0.5275288891932939, - "learning_rate": 8.013537165998014e-06, - "loss": 0.0477, + "epoch": 2.91651865008881, + "grad_norm": 0.3534652802800643, + "learning_rate": 2.0739439339529423e-06, + "loss": 0.0338, "step": 3284 }, { - "epoch": 1.4590273151232511, - "grad_norm": 0.43540224877298483, - "learning_rate": 8.011990060401806e-06, - "loss": 0.0433, + "epoch": 2.9174067495559504, + "grad_norm": 0.36758672924618585, + "learning_rate": 2.0708021319633326e-06, + "loss": 0.0409, "step": 3285 }, { - "epoch": 1.4594714634687986, - "grad_norm": 0.476963740475049, - "learning_rate": 8.010442502052329e-06, - "loss": 0.0549, + "epoch": 2.9182948490230904, + "grad_norm": 0.37238723422318215, + "learning_rate": 2.0676620899030393e-06, + "loss": 0.0322, "step": 3286 }, { - "epoch": 1.459915611814346, - "grad_norm": 0.4927774183701049, - "learning_rate": 8.008894491182205e-06, - "loss": 0.0464, + "epoch": 2.919182948490231, + "grad_norm": 0.35946999846969785, + "learning_rate": 2.0645238096586737e-06, + "loss": 0.032, "step": 3287 }, { - "epoch": 1.4603597601598934, - "grad_norm": 0.4716288282663322, - "learning_rate": 8.007346028024125e-06, - "loss": 0.0437, + "epoch": 2.9200710479573715, + "grad_norm": 0.5037500981505074, + "learning_rate": 2.0613872931157808e-06, + "loss": 0.0348, "step": 3288 }, { - "epoch": 1.460803908505441, - "grad_norm": 0.5277979981560662, - "learning_rate": 8.005797112810854e-06, - "loss": 0.0452, + "epoch": 2.9209591474245116, + "grad_norm": 0.41678628046767463, + "learning_rate": 2.058252542158855e-06, + "loss": 0.04, "step": 3289 }, { - "epoch": 1.4612480568509882, - "grad_norm": 0.6486682818717127, - "learning_rate": 8.004247745775216e-06, - "loss": 0.0636, + "epoch": 2.9218472468916517, + "grad_norm": 0.34845412270064835, + "learning_rate": 2.055119558671328e-06, + "loss": 0.0367, "step": 3290 }, { - "epoch": 1.4616922051965355, - "grad_norm": 0.693590181694553, - "learning_rate": 8.00269792715011e-06, - "loss": 0.0392, + "epoch": 2.922735346358792, + "grad_norm": 0.33363190061050635, + "learning_rate": 2.051988344535567e-06, + "loss": 0.0334, "step": 3291 }, { - "epoch": 1.462136353542083, - "grad_norm": 0.4973813779816927, - "learning_rate": 8.001147657168497e-06, - "loss": 0.0523, + "epoch": 2.9236234458259327, + "grad_norm": 0.41667074837663, + "learning_rate": 2.0488589016328754e-06, + "loss": 0.0383, "step": 3292 }, { - "epoch": 1.4625805018876306, - "grad_norm": 0.42880946574568146, - "learning_rate": 7.99959693606341e-06, - "loss": 0.0365, + "epoch": 2.924511545293073, + "grad_norm": 0.34657318515762536, + "learning_rate": 2.0457312318434975e-06, + "loss": 0.0359, "step": 3293 }, { - "epoch": 1.4630246502331778, - "grad_norm": 0.4647071982633707, - "learning_rate": 7.99804576406795e-06, - "loss": 0.0473, + "epoch": 2.9253996447602133, + "grad_norm": 0.45150283166425603, + "learning_rate": 2.0426053370466058e-06, + "loss": 0.042, "step": 3294 }, { - "epoch": 1.4634687985787254, - "grad_norm": 0.5564296867063627, - "learning_rate": 7.996494141415284e-06, - "loss": 0.0654, + "epoch": 2.9262877442273534, + "grad_norm": 0.4025553185092821, + "learning_rate": 2.0394812191203133e-06, + "loss": 0.043, "step": 3295 }, { - "epoch": 1.4639129469242727, - "grad_norm": 0.4602454467326126, - "learning_rate": 7.994942068338647e-06, - "loss": 0.0375, + "epoch": 2.927175843694494, + "grad_norm": 0.3517187659910425, + "learning_rate": 2.0363588799416594e-06, + "loss": 0.0354, "step": 3296 }, { - "epoch": 1.4643570952698202, - "grad_norm": 0.44240678049891874, - "learning_rate": 7.993389545071341e-06, - "loss": 0.0538, + "epoch": 2.928063943161634, + "grad_norm": 0.31683729402917576, + "learning_rate": 2.033238321386616e-06, + "loss": 0.0289, "step": 3297 }, { - "epoch": 1.4648012436153675, - "grad_norm": 0.5956552998865925, - "learning_rate": 7.991836571846739e-06, - "loss": 0.0483, + "epoch": 2.9289520426287745, + "grad_norm": 0.3530534662804317, + "learning_rate": 2.0301195453300903e-06, + "loss": 0.0337, "step": 3298 }, { - "epoch": 1.465245391960915, - "grad_norm": 0.8272941029925692, - "learning_rate": 7.990283148898277e-06, - "loss": 0.0621, + "epoch": 2.9298401420959146, + "grad_norm": 0.33259384039497863, + "learning_rate": 2.027002553645912e-06, + "loss": 0.0351, "step": 3299 }, { - "epoch": 1.4656895403064625, - "grad_norm": 0.521288053624164, - "learning_rate": 7.988729276459463e-06, - "loss": 0.0389, + "epoch": 2.930728241563055, + "grad_norm": 0.33132222524162663, + "learning_rate": 2.0238873482068396e-06, + "loss": 0.0361, "step": 3300 }, { - "epoch": 1.4661336886520098, - "grad_norm": 0.3918246593118355, - "learning_rate": 7.987174954763867e-06, - "loss": 0.0351, + "epoch": 2.931616341030195, + "grad_norm": 0.2932215710891699, + "learning_rate": 2.020773930884563e-06, + "loss": 0.033, "step": 3301 }, { - "epoch": 1.466577836997557, - "grad_norm": 0.5695857738579163, - "learning_rate": 7.985620184045133e-06, - "loss": 0.051, + "epoch": 2.9325044404973357, + "grad_norm": 0.40884860043118654, + "learning_rate": 2.017662303549693e-06, + "loss": 0.0357, "step": 3302 }, { - "epoch": 1.4670219853431046, - "grad_norm": 0.5461168059678839, - "learning_rate": 7.98406496453697e-06, - "loss": 0.045, + "epoch": 2.9333925399644762, + "grad_norm": 0.3631064484842973, + "learning_rate": 2.0145524680717684e-06, + "loss": 0.0389, "step": 3303 }, { - "epoch": 1.467466133688652, - "grad_norm": 0.45316446756220935, - "learning_rate": 7.982509296473151e-06, - "loss": 0.0376, + "epoch": 2.9342806394316163, + "grad_norm": 0.40038919515485394, + "learning_rate": 2.0114444263192466e-06, + "loss": 0.0298, "step": 3304 }, { - "epoch": 1.4679102820341994, - "grad_norm": 0.5116686497553603, - "learning_rate": 7.98095318008752e-06, - "loss": 0.0523, + "epoch": 2.9351687388987564, + "grad_norm": 0.3944735488852371, + "learning_rate": 2.0083381801595153e-06, + "loss": 0.039, "step": 3305 }, { - "epoch": 1.4683544303797469, - "grad_norm": 0.5042517448525269, - "learning_rate": 7.97939661561399e-06, - "loss": 0.0473, + "epoch": 2.936056838365897, + "grad_norm": 0.3749654214627426, + "learning_rate": 2.0052337314588733e-06, + "loss": 0.0323, "step": 3306 }, { - "epoch": 1.4687985787252942, - "grad_norm": 0.5555029036352702, - "learning_rate": 7.977839603286537e-06, - "loss": 0.053, + "epoch": 2.9369449378330375, + "grad_norm": 0.36119220854664474, + "learning_rate": 2.002131082082549e-06, + "loss": 0.035, "step": 3307 }, { - "epoch": 1.4692427270708417, - "grad_norm": 0.3410097103369562, - "learning_rate": 7.976282143339207e-06, - "loss": 0.0293, + "epoch": 2.9378330373001775, + "grad_norm": 0.41339812862832803, + "learning_rate": 1.9990302338946838e-06, + "loss": 0.0338, "step": 3308 }, { - "epoch": 1.469686875416389, - "grad_norm": 0.6134381443649801, - "learning_rate": 7.974724236006113e-06, - "loss": 0.0528, + "epoch": 2.938721136767318, + "grad_norm": 0.433266835637927, + "learning_rate": 1.995931188758336e-06, + "loss": 0.0372, "step": 3309 }, { - "epoch": 1.4701310237619365, - "grad_norm": 0.4165723941603629, - "learning_rate": 7.973165881521435e-06, - "loss": 0.041, + "epoch": 2.939609236234458, + "grad_norm": 0.36692614273163887, + "learning_rate": 1.992833948535487e-06, + "loss": 0.0374, "step": 3310 }, { - "epoch": 1.470575172107484, - "grad_norm": 0.4357045233722827, - "learning_rate": 7.971607080119418e-06, - "loss": 0.0499, + "epoch": 2.9404973357015987, + "grad_norm": 0.40590300122226025, + "learning_rate": 1.9897385150870274e-06, + "loss": 0.0408, "step": 3311 }, { - "epoch": 1.4710193204530313, - "grad_norm": 0.5469760548909597, - "learning_rate": 7.97004783203438e-06, - "loss": 0.0436, + "epoch": 2.9413854351687387, + "grad_norm": 0.4029544927871777, + "learning_rate": 1.9866448902727643e-06, + "loss": 0.0398, "step": 3312 }, { - "epoch": 1.4714634687985786, - "grad_norm": 0.4196612097876158, - "learning_rate": 7.968488137500699e-06, - "loss": 0.0474, + "epoch": 2.9422735346358793, + "grad_norm": 0.33721157266618157, + "learning_rate": 1.983553075951421e-06, + "loss": 0.0256, "step": 3313 }, { - "epoch": 1.471907617144126, - "grad_norm": 0.4878242528103949, - "learning_rate": 7.966927996752824e-06, - "loss": 0.0407, + "epoch": 2.94316163410302, + "grad_norm": 0.32779352352741326, + "learning_rate": 1.9804630739806267e-06, + "loss": 0.0342, "step": 3314 }, { - "epoch": 1.4723517654896736, - "grad_norm": 0.4654212754221821, - "learning_rate": 7.965367410025275e-06, - "loss": 0.0276, + "epoch": 2.94404973357016, + "grad_norm": 0.3129523937087835, + "learning_rate": 1.97737488621693e-06, + "loss": 0.0322, "step": 3315 }, { - "epoch": 1.472795913835221, - "grad_norm": 0.4496002039864948, - "learning_rate": 7.96380637755263e-06, - "loss": 0.043, + "epoch": 2.9449378330373, + "grad_norm": 0.494246292301225, + "learning_rate": 1.974288514515783e-06, + "loss": 0.044, "step": 3316 }, { - "epoch": 1.4732400621807684, - "grad_norm": 0.843662279100629, - "learning_rate": 7.96224489956954e-06, - "loss": 0.0507, + "epoch": 2.9458259325044405, + "grad_norm": 0.3109120361380824, + "learning_rate": 1.9712039607315466e-06, + "loss": 0.0326, "step": 3317 }, { - "epoch": 1.4736842105263157, - "grad_norm": 0.32395601492358206, - "learning_rate": 7.960682976310721e-06, - "loss": 0.0356, + "epoch": 2.946714031971581, + "grad_norm": 0.47054441810154746, + "learning_rate": 1.968121226717493e-06, + "loss": 0.0489, "step": 3318 }, { - "epoch": 1.4741283588718632, - "grad_norm": 0.6598994798652713, - "learning_rate": 7.959120608010959e-06, - "loss": 0.0532, + "epoch": 2.947602131438721, + "grad_norm": 0.431348841788879, + "learning_rate": 1.9650403143258033e-06, + "loss": 0.0445, "step": 3319 }, { - "epoch": 1.4745725072174105, - "grad_norm": 0.6135947839100228, - "learning_rate": 7.957557794905104e-06, - "loss": 0.0568, + "epoch": 2.9484902309058616, + "grad_norm": 0.42005226880470004, + "learning_rate": 1.9619612254075536e-06, + "loss": 0.0424, "step": 3320 }, { - "epoch": 1.475016655562958, - "grad_norm": 0.5386416254084523, - "learning_rate": 7.955994537228068e-06, - "loss": 0.0524, + "epoch": 2.9493783303730017, + "grad_norm": 0.3859104877206694, + "learning_rate": 1.9588839618127355e-06, + "loss": 0.0406, "step": 3321 }, { - "epoch": 1.4754608039085055, - "grad_norm": 0.41178054802832886, - "learning_rate": 7.954430835214844e-06, - "loss": 0.0478, + "epoch": 2.950266429840142, + "grad_norm": 0.3768473207600592, + "learning_rate": 1.9558085253902394e-06, + "loss": 0.033, "step": 3322 }, { - "epoch": 1.4759049522540528, - "grad_norm": 0.553965746901051, - "learning_rate": 7.952866689100476e-06, - "loss": 0.0497, + "epoch": 2.9511545293072823, + "grad_norm": 0.326175984617022, + "learning_rate": 1.952734917987856e-06, + "loss": 0.0324, "step": 3323 }, { - "epoch": 1.4763491005996003, - "grad_norm": 0.5002040941699343, - "learning_rate": 7.951302099120087e-06, - "loss": 0.0479, + "epoch": 2.952042628774423, + "grad_norm": 0.40927173186547333, + "learning_rate": 1.949663141452282e-06, + "loss": 0.0346, "step": 3324 }, { - "epoch": 1.4767932489451476, - "grad_norm": 0.5774527696932492, - "learning_rate": 7.949737065508856e-06, - "loss": 0.0447, + "epoch": 2.9529307282415633, + "grad_norm": 0.48013759818574125, + "learning_rate": 1.9465931976291103e-06, + "loss": 0.0342, "step": 3325 }, { - "epoch": 1.4772373972906951, - "grad_norm": 0.5834169178186834, - "learning_rate": 7.948171588502036e-06, - "loss": 0.0442, + "epoch": 2.9538188277087034, + "grad_norm": 0.3345920652259473, + "learning_rate": 1.943525088362831e-06, + "loss": 0.0319, "step": 3326 }, { - "epoch": 1.4776815456362424, - "grad_norm": 0.48069558429150877, - "learning_rate": 7.946605668334947e-06, - "loss": 0.0509, + "epoch": 2.9547069271758435, + "grad_norm": 0.400664709623364, + "learning_rate": 1.9404588154968366e-06, + "loss": 0.0357, "step": 3327 }, { - "epoch": 1.47812569398179, - "grad_norm": 0.6916569805639394, - "learning_rate": 7.945039305242972e-06, - "loss": 0.0543, + "epoch": 2.955595026642984, + "grad_norm": 0.3141920159671145, + "learning_rate": 1.937394380873418e-06, + "loss": 0.0269, "step": 3328 }, { - "epoch": 1.4785698423273375, - "grad_norm": 0.45596286134877817, - "learning_rate": 7.943472499461562e-06, - "loss": 0.0463, + "epoch": 2.9564831261101245, + "grad_norm": 0.39910849610319554, + "learning_rate": 1.9343317863337506e-06, + "loss": 0.0346, "step": 3329 }, { - "epoch": 1.4790139906728847, - "grad_norm": 0.7085391238735931, - "learning_rate": 7.941905251226235e-06, - "loss": 0.0643, + "epoch": 2.9573712255772646, + "grad_norm": 0.3900489384340357, + "learning_rate": 1.931271033717916e-06, + "loss": 0.0325, "step": 3330 }, { - "epoch": 1.479458139018432, - "grad_norm": 0.878574496425455, - "learning_rate": 7.940337560772573e-06, - "loss": 0.0623, + "epoch": 2.9582593250444047, + "grad_norm": 0.3642120034133262, + "learning_rate": 1.9282121248648845e-06, + "loss": 0.0358, "step": 3331 }, { - "epoch": 1.4799022873639796, - "grad_norm": 0.3602452580945399, - "learning_rate": 7.93876942833623e-06, - "loss": 0.0325, + "epoch": 2.959147424511545, + "grad_norm": 0.5146655274099231, + "learning_rate": 1.925155061612518e-06, + "loss": 0.0329, "step": 3332 }, { - "epoch": 1.480346435709527, - "grad_norm": 0.5694019432619376, - "learning_rate": 7.937200854152917e-06, - "loss": 0.0527, + "epoch": 2.9600355239786857, + "grad_norm": 0.33719231204509664, + "learning_rate": 1.9220998457975687e-06, + "loss": 0.0321, "step": 3333 }, { - "epoch": 1.4807905840550744, - "grad_norm": 0.46397062738751477, - "learning_rate": 7.935631838458426e-06, - "loss": 0.0435, + "epoch": 2.960923623445826, + "grad_norm": 0.3771256970179299, + "learning_rate": 1.9190464792556836e-06, + "loss": 0.0313, "step": 3334 }, { - "epoch": 1.4812347324006219, - "grad_norm": 0.38977994271678623, - "learning_rate": 7.9340623814886e-06, - "loss": 0.0346, + "epoch": 2.9618117229129663, + "grad_norm": 0.36201294188026895, + "learning_rate": 1.915994963821392e-06, + "loss": 0.04, "step": 3335 }, { - "epoch": 1.4816788807461692, - "grad_norm": 0.5991214620269069, - "learning_rate": 7.932492483479358e-06, - "loss": 0.0519, + "epoch": 2.9626998223801064, + "grad_norm": 0.42619272931162727, + "learning_rate": 1.912945301328118e-06, + "loss": 0.0348, "step": 3336 }, { - "epoch": 1.4821230290917167, - "grad_norm": 0.42521594572783633, - "learning_rate": 7.930922144666679e-06, - "loss": 0.0407, + "epoch": 2.963587921847247, + "grad_norm": 0.3649419226641657, + "learning_rate": 1.9098974936081665e-06, + "loss": 0.0359, "step": 3337 }, { - "epoch": 1.482567177437264, - "grad_norm": 0.6068474747842975, - "learning_rate": 7.929351365286614e-06, - "loss": 0.0569, + "epoch": 2.964476021314387, + "grad_norm": 0.7621193429331412, + "learning_rate": 1.9068515424927297e-06, + "loss": 0.0402, "step": 3338 }, { - "epoch": 1.4830113257828115, - "grad_norm": 0.3673998638748444, - "learning_rate": 7.927780145575281e-06, - "loss": 0.0398, + "epoch": 2.9653641207815276, + "grad_norm": 0.4018314333188861, + "learning_rate": 1.9038074498118875e-06, + "loss": 0.0288, "step": 3339 }, { - "epoch": 1.483455474128359, - "grad_norm": 0.6378602388998965, - "learning_rate": 7.926208485768856e-06, - "loss": 0.0445, + "epoch": 2.966252220248668, + "grad_norm": 0.35306495582431857, + "learning_rate": 1.9007652173945994e-06, + "loss": 0.0324, "step": 3340 }, { - "epoch": 1.4838996224739063, - "grad_norm": 0.4635352438588869, - "learning_rate": 7.924636386103588e-06, - "loss": 0.0472, + "epoch": 2.967140319715808, + "grad_norm": 0.3183837603978152, + "learning_rate": 1.8977248470687065e-06, + "loss": 0.0328, "step": 3341 }, { - "epoch": 1.4843437708194536, - "grad_norm": 0.43741344875712923, - "learning_rate": 7.923063846815791e-06, - "loss": 0.0365, + "epoch": 2.9680284191829482, + "grad_norm": 0.38075989944376254, + "learning_rate": 1.8946863406609367e-06, + "loss": 0.0376, "step": 3342 }, { - "epoch": 1.484787919165001, - "grad_norm": 0.409274641921781, - "learning_rate": 7.921490868141843e-06, - "loss": 0.0437, + "epoch": 2.9689165186500888, + "grad_norm": 0.3235529230598592, + "learning_rate": 1.8916496999968909e-06, + "loss": 0.036, "step": 3343 }, { - "epoch": 1.4852320675105486, - "grad_norm": 0.44864312363339953, - "learning_rate": 7.91991745031819e-06, - "loss": 0.0365, + "epoch": 2.9698046181172293, + "grad_norm": 0.39967918964708427, + "learning_rate": 1.8886149269010556e-06, + "loss": 0.0323, "step": 3344 }, { - "epoch": 1.4856762158560959, - "grad_norm": 0.7767506869882325, - "learning_rate": 7.918343593581344e-06, - "loss": 0.055, + "epoch": 2.9706927175843694, + "grad_norm": 0.39920867111551245, + "learning_rate": 1.8855820231967892e-06, + "loss": 0.0333, "step": 3345 }, { - "epoch": 1.4861203642016434, - "grad_norm": 0.663893547353565, - "learning_rate": 7.916769298167881e-06, - "loss": 0.0539, + "epoch": 2.97158081705151, + "grad_norm": 0.3668464971121366, + "learning_rate": 1.8825509907063328e-06, + "loss": 0.0338, "step": 3346 }, { - "epoch": 1.4865645125471907, - "grad_norm": 0.44134070690444965, - "learning_rate": 7.915194564314446e-06, - "loss": 0.0451, + "epoch": 2.97246891651865, + "grad_norm": 0.36580422811700214, + "learning_rate": 1.8795218312507974e-06, + "loss": 0.0374, "step": 3347 }, { - "epoch": 1.4870086608927382, - "grad_norm": 0.5644812574628542, - "learning_rate": 7.913619392257748e-06, - "loss": 0.052, + "epoch": 2.9733570159857905, + "grad_norm": 0.2954628970559159, + "learning_rate": 1.876494546650175e-06, + "loss": 0.0401, "step": 3348 }, { - "epoch": 1.4874528092382855, - "grad_norm": 0.4293480800359804, - "learning_rate": 7.912043782234562e-06, - "loss": 0.0375, + "epoch": 2.9742451154529306, + "grad_norm": 0.3273296166549773, + "learning_rate": 1.873469138723325e-06, + "loss": 0.0354, "step": 3349 }, { - "epoch": 1.487896957583833, - "grad_norm": 0.4208900641552744, - "learning_rate": 7.910467734481726e-06, - "loss": 0.0263, + "epoch": 2.975133214920071, + "grad_norm": 0.4107714869843828, + "learning_rate": 1.8704456092879813e-06, + "loss": 0.0451, "step": 3350 }, { - "epoch": 1.4883411059293805, - "grad_norm": 0.5686191171590155, - "learning_rate": 7.90889124923615e-06, - "loss": 0.0552, + "epoch": 2.9760213143872116, + "grad_norm": 0.3882844503989167, + "learning_rate": 1.867423960160753e-06, + "loss": 0.0306, "step": 3351 }, { - "epoch": 1.4887852542749278, - "grad_norm": 0.45995768523750874, - "learning_rate": 7.907314326734807e-06, - "loss": 0.0392, + "epoch": 2.9769094138543517, + "grad_norm": 0.31073438444954216, + "learning_rate": 1.8644041931571138e-06, + "loss": 0.0332, "step": 3352 }, { - "epoch": 1.4892294026204753, - "grad_norm": 0.6197595746880575, - "learning_rate": 7.905736967214735e-06, - "loss": 0.055, + "epoch": 2.977797513321492, + "grad_norm": 0.37944811166255143, + "learning_rate": 1.8613863100914082e-06, + "loss": 0.0429, "step": 3353 }, { - "epoch": 1.4896735509660226, - "grad_norm": 0.6356786842165147, - "learning_rate": 7.904159170913035e-06, - "loss": 0.046, + "epoch": 2.9786856127886323, + "grad_norm": 0.5957741696978557, + "learning_rate": 1.8583703127768526e-06, + "loss": 0.0327, "step": 3354 }, { - "epoch": 1.4901176993115701, - "grad_norm": 0.4601859054406351, - "learning_rate": 7.902580938066878e-06, - "loss": 0.0414, + "epoch": 2.979573712255773, + "grad_norm": 0.37335510195578836, + "learning_rate": 1.8553562030255246e-06, + "loss": 0.038, "step": 3355 }, { - "epoch": 1.4905618476571174, - "grad_norm": 0.36553812505176564, - "learning_rate": 7.901002268913501e-06, - "loss": 0.0322, + "epoch": 2.980461811722913, + "grad_norm": 0.28581665267498463, + "learning_rate": 1.852343982648373e-06, + "loss": 0.0352, "step": 3356 }, { - "epoch": 1.491005996002665, - "grad_norm": 0.615226798089253, - "learning_rate": 7.899423163690204e-06, - "loss": 0.0549, + "epoch": 2.9813499111900534, + "grad_norm": 0.34624317000878835, + "learning_rate": 1.8493336534552076e-06, + "loss": 0.0384, "step": 3357 }, { - "epoch": 1.4914501443482124, - "grad_norm": 0.4095361485944277, - "learning_rate": 7.897843622634352e-06, - "loss": 0.0387, + "epoch": 2.9822380106571935, + "grad_norm": 0.3182975122449055, + "learning_rate": 1.8463252172547019e-06, + "loss": 0.0315, "step": 3358 }, { - "epoch": 1.4918942926937597, - "grad_norm": 0.5006702378564764, - "learning_rate": 7.896263645983378e-06, - "loss": 0.0517, + "epoch": 2.983126110124334, + "grad_norm": 0.6737271139227827, + "learning_rate": 1.8433186758543948e-06, + "loss": 0.048, "step": 3359 }, { - "epoch": 1.492338441039307, - "grad_norm": 0.6244366583571412, - "learning_rate": 7.89468323397478e-06, - "loss": 0.0496, + "epoch": 2.984014209591474, + "grad_norm": 0.36804671209761464, + "learning_rate": 1.8403140310606888e-06, + "loss": 0.037, "step": 3360 }, { - "epoch": 1.4927825893848545, - "grad_norm": 0.5367025221002972, - "learning_rate": 7.893102386846118e-06, - "loss": 0.0436, + "epoch": 2.9849023090586146, + "grad_norm": 0.36278004185633717, + "learning_rate": 1.8373112846788366e-06, + "loss": 0.0364, "step": 3361 }, { - "epoch": 1.493226737730402, - "grad_norm": 0.4508013926132327, - "learning_rate": 7.891521104835023e-06, - "loss": 0.0344, + "epoch": 2.9857904085257547, + "grad_norm": 0.34080469269804775, + "learning_rate": 1.8343104385129612e-06, + "loss": 0.0378, "step": 3362 }, { - "epoch": 1.4936708860759493, - "grad_norm": 0.6393470972778723, - "learning_rate": 7.889939388179188e-06, - "loss": 0.0526, + "epoch": 2.9866785079928952, + "grad_norm": 0.2905327881702447, + "learning_rate": 1.8313114943660403e-06, + "loss": 0.0338, "step": 3363 }, { - "epoch": 1.4941150344214968, - "grad_norm": 0.472940776234741, - "learning_rate": 7.888357237116372e-06, - "loss": 0.0431, + "epoch": 2.9875666074600353, + "grad_norm": 0.5281539360329269, + "learning_rate": 1.8283144540399078e-06, + "loss": 0.0458, "step": 3364 }, { - "epoch": 1.4945591827670441, - "grad_norm": 0.4993044626375099, - "learning_rate": 7.886774651884397e-06, - "loss": 0.0541, + "epoch": 2.988454706927176, + "grad_norm": 0.29317350057512703, + "learning_rate": 1.8253193193352525e-06, + "loss": 0.0358, "step": 3365 }, { - "epoch": 1.4950033311125916, - "grad_norm": 0.4991379094630535, - "learning_rate": 7.885191632721156e-06, - "loss": 0.0423, + "epoch": 2.9893428063943164, + "grad_norm": 0.3757051365198419, + "learning_rate": 1.8223260920516227e-06, + "loss": 0.0456, "step": 3366 }, { - "epoch": 1.495447479458139, - "grad_norm": 0.6104766035319223, - "learning_rate": 7.8836081798646e-06, - "loss": 0.0489, + "epoch": 2.9902309058614565, + "grad_norm": 0.35032849686712814, + "learning_rate": 1.8193347739874157e-06, + "loss": 0.0342, "step": 3367 }, { - "epoch": 1.4958916278036865, - "grad_norm": 0.582209332395513, - "learning_rate": 7.882024293552752e-06, - "loss": 0.0481, + "epoch": 2.9911190053285965, + "grad_norm": 0.43634666739329175, + "learning_rate": 1.8163453669398867e-06, + "loss": 0.0327, "step": 3368 }, { - "epoch": 1.496335776149234, - "grad_norm": 1.2878711444761508, - "learning_rate": 7.880439974023694e-06, - "loss": 0.0895, + "epoch": 2.992007104795737, + "grad_norm": 0.35950605478419084, + "learning_rate": 1.8133578727051388e-06, + "loss": 0.0411, "step": 3369 }, { - "epoch": 1.4967799244947813, - "grad_norm": 0.47752757971222215, - "learning_rate": 7.87885522151558e-06, - "loss": 0.0556, + "epoch": 2.9928952042628776, + "grad_norm": 0.3609794901562061, + "learning_rate": 1.8103722930781249e-06, + "loss": 0.0345, "step": 3370 }, { - "epoch": 1.4972240728403285, - "grad_norm": 0.34960537720365853, - "learning_rate": 7.877270036266622e-06, - "loss": 0.0361, + "epoch": 2.9937833037300177, + "grad_norm": 0.35013950214744116, + "learning_rate": 1.8073886298526522e-06, + "loss": 0.0317, "step": 3371 }, { - "epoch": 1.497668221185876, - "grad_norm": 0.6171844191638863, - "learning_rate": 7.875684418515101e-06, - "loss": 0.0413, + "epoch": 2.994671403197158, + "grad_norm": 0.6651815939906269, + "learning_rate": 1.8044068848213763e-06, + "loss": 0.0493, "step": 3372 }, { - "epoch": 1.4981123695314236, - "grad_norm": 0.5888605884776164, - "learning_rate": 7.874098368499362e-06, - "loss": 0.0433, + "epoch": 2.9955595026642983, + "grad_norm": 0.5234108356175042, + "learning_rate": 1.8014270597757926e-06, + "loss": 0.0441, "step": 3373 }, { - "epoch": 1.4985565178769709, - "grad_norm": 0.32860735680481784, - "learning_rate": 7.872511886457816e-06, - "loss": 0.029, + "epoch": 2.996447602131439, + "grad_norm": 0.4496691461596499, + "learning_rate": 1.7984491565062513e-06, + "loss": 0.0401, "step": 3374 }, { - "epoch": 1.4990006662225184, - "grad_norm": 0.4838256059081808, - "learning_rate": 7.87092497262894e-06, - "loss": 0.0428, + "epoch": 2.997335701598579, + "grad_norm": 0.37057822260084683, + "learning_rate": 1.7954731768019461e-06, + "loss": 0.039, "step": 3375 }, { - "epoch": 1.4994448145680657, - "grad_norm": 0.5961233345859986, - "learning_rate": 7.86933762725127e-06, - "loss": 0.0568, + "epoch": 2.9982238010657194, + "grad_norm": 0.2938240171494723, + "learning_rate": 1.792499122450913e-06, + "loss": 0.0282, "step": 3376 }, { - "epoch": 1.4998889629136132, - "grad_norm": 0.4985166006472107, - "learning_rate": 7.867749850563414e-06, - "loss": 0.0539, + "epoch": 2.99911190053286, + "grad_norm": 0.3108398912701676, + "learning_rate": 1.7895269952400303e-06, + "loss": 0.0297, "step": 3377 }, { - "epoch": 1.5003331112591605, - "grad_norm": 0.8288419788268757, - "learning_rate": 7.86616164280404e-06, - "loss": 0.0376, + "epoch": 3.0, + "grad_norm": 0.533545404175948, + "learning_rate": 1.7865567969550235e-06, + "loss": 0.0363, "step": 3378 }, { - "epoch": 1.500777259604708, - "grad_norm": 0.5437911475680088, - "learning_rate": 7.864573004211884e-06, - "loss": 0.0629, + "epoch": 3.0, + "eval_loss": 0.041787039488554, + "eval_runtime": 78.1532, + "eval_samples_per_second": 194.055, + "eval_steps_per_second": 3.033, + "step": 3378 + }, + { + "epoch": 3.0008880994671405, + "grad_norm": 0.32020596950233093, + "learning_rate": 1.7835885293804522e-06, + "loss": 0.0302, "step": 3379 }, { - "epoch": 1.5012214079502555, - "grad_norm": 0.5173272400012955, - "learning_rate": 7.862983935025745e-06, - "loss": 0.0496, + "epoch": 3.0017761989342806, + "grad_norm": 0.3088566077675897, + "learning_rate": 1.7806221942997237e-06, + "loss": 0.0273, "step": 3380 }, { - "epoch": 1.5016655562958028, - "grad_norm": 0.41368737678867235, - "learning_rate": 7.861394435484488e-06, - "loss": 0.0428, + "epoch": 3.002664298401421, + "grad_norm": 0.3730031254504042, + "learning_rate": 1.7776577934950783e-06, + "loss": 0.0324, "step": 3381 }, { - "epoch": 1.50210970464135, - "grad_norm": 0.382287625598364, - "learning_rate": 7.85980450582704e-06, - "loss": 0.0362, + "epoch": 3.003552397868561, + "grad_norm": 0.5123057571119825, + "learning_rate": 1.7746953287475944e-06, + "loss": 0.0434, "step": 3382 }, { - "epoch": 1.5025538529868976, - "grad_norm": 0.7914574257051636, - "learning_rate": 7.858214146292394e-06, - "loss": 0.0723, + "epoch": 3.0044404973357017, + "grad_norm": 0.36113545525063273, + "learning_rate": 1.7717348018371915e-06, + "loss": 0.0268, "step": 3383 }, { - "epoch": 1.502998001332445, - "grad_norm": 0.600368806153385, - "learning_rate": 7.85662335711961e-06, - "loss": 0.0426, + "epoch": 3.005328596802842, + "grad_norm": 0.37460870996734363, + "learning_rate": 1.7687762145426197e-06, + "loss": 0.032, "step": 3384 }, { - "epoch": 1.5034421496779924, - "grad_norm": 0.516289015563682, - "learning_rate": 7.855032138547811e-06, - "loss": 0.0401, + "epoch": 3.0062166962699823, + "grad_norm": 0.3876240880290442, + "learning_rate": 1.7658195686414692e-06, + "loss": 0.0297, "step": 3385 }, { - "epoch": 1.50388629802354, - "grad_norm": 0.4579999933485943, - "learning_rate": 7.853440490816182e-06, - "loss": 0.0372, + "epoch": 3.0071047957371224, + "grad_norm": 0.3267199322576471, + "learning_rate": 1.7628648659101587e-06, + "loss": 0.0251, "step": 3386 }, { - "epoch": 1.5043304463690874, - "grad_norm": 0.4924686770621345, - "learning_rate": 7.851848414163976e-06, - "loss": 0.0457, + "epoch": 3.007992895204263, + "grad_norm": 0.32689525299773015, + "learning_rate": 1.7599121081239406e-06, + "loss": 0.0286, "step": 3387 }, { - "epoch": 1.5047745947146347, - "grad_norm": 0.5624519083004446, - "learning_rate": 7.850255908830508e-06, - "loss": 0.0431, + "epoch": 3.008880994671403, + "grad_norm": 0.4240848754876425, + "learning_rate": 1.7569612970569e-06, + "loss": 0.0334, "step": 3388 }, { - "epoch": 1.505218743060182, - "grad_norm": 0.46856624335860086, - "learning_rate": 7.848662975055161e-06, - "loss": 0.0374, + "epoch": 3.0097690941385435, + "grad_norm": 0.36396117051931987, + "learning_rate": 1.7540124344819548e-06, + "loss": 0.0347, "step": 3389 }, { - "epoch": 1.5056628914057295, - "grad_norm": 0.6866568127731671, - "learning_rate": 7.847069613077377e-06, - "loss": 0.088, + "epoch": 3.0106571936056836, + "grad_norm": 0.35951825126530074, + "learning_rate": 1.7510655221708467e-06, + "loss": 0.0289, "step": 3390 }, { - "epoch": 1.506107039751277, - "grad_norm": 0.5263852322647139, - "learning_rate": 7.845475823136669e-06, - "loss": 0.0475, + "epoch": 3.011545293072824, + "grad_norm": 0.6340363117228983, + "learning_rate": 1.7481205618941472e-06, + "loss": 0.0355, "step": 3391 }, { - "epoch": 1.5065511880968243, - "grad_norm": 0.6378963066155895, - "learning_rate": 7.843881605472606e-06, - "loss": 0.0579, + "epoch": 3.0124333925399647, + "grad_norm": 0.35804834224058507, + "learning_rate": 1.7451775554212592e-06, + "loss": 0.0347, "step": 3392 }, { - "epoch": 1.5069953364423716, - "grad_norm": 0.8470563699341165, - "learning_rate": 7.84228696032483e-06, - "loss": 0.0685, + "epoch": 3.0133214920071048, + "grad_norm": 0.38625825431415106, + "learning_rate": 1.742236504520407e-06, + "loss": 0.0307, "step": 3393 }, { - "epoch": 1.5074394847879191, - "grad_norm": 0.5121438548331829, - "learning_rate": 7.840691887933042e-06, - "loss": 0.0482, + "epoch": 3.0142095914742453, + "grad_norm": 0.3860254175150108, + "learning_rate": 1.7392974109586396e-06, + "loss": 0.0262, "step": 3394 }, { - "epoch": 1.5078836331334666, - "grad_norm": 0.46761061686619343, - "learning_rate": 7.839096388537008e-06, - "loss": 0.053, + "epoch": 3.0150976909413854, + "grad_norm": 0.30432603098065447, + "learning_rate": 1.736360276501835e-06, + "loss": 0.0278, "step": 3395 }, { - "epoch": 1.508327781479014, - "grad_norm": 0.568716582017773, - "learning_rate": 7.837500462376559e-06, - "loss": 0.0446, + "epoch": 3.015985790408526, + "grad_norm": 0.38427685779628756, + "learning_rate": 1.7334251029146882e-06, + "loss": 0.042, "step": 3396 }, { - "epoch": 1.5087719298245614, - "grad_norm": 0.427488840303415, - "learning_rate": 7.83590410969159e-06, - "loss": 0.045, + "epoch": 3.016873889875666, + "grad_norm": 0.453636926549333, + "learning_rate": 1.7304918919607216e-06, + "loss": 0.0319, "step": 3397 }, { - "epoch": 1.509216078170109, - "grad_norm": 0.5336130953887138, - "learning_rate": 7.834307330722059e-06, - "loss": 0.0501, + "epoch": 3.0177619893428065, + "grad_norm": 0.34657786188265227, + "learning_rate": 1.7275606454022748e-06, + "loss": 0.037, "step": 3398 }, { - "epoch": 1.5096602265156562, - "grad_norm": 0.6643085395902935, - "learning_rate": 7.832710125707991e-06, - "loss": 0.0714, + "epoch": 3.0186500888099466, + "grad_norm": 0.35445950107686525, + "learning_rate": 1.7246313650005058e-06, + "loss": 0.0324, "step": 3399 }, { - "epoch": 1.5101043748612035, - "grad_norm": 0.7517073070810807, - "learning_rate": 7.831112494889472e-06, - "loss": 0.0574, + "epoch": 3.019538188277087, + "grad_norm": 0.3822732277179491, + "learning_rate": 1.721704052515395e-06, + "loss": 0.0292, "step": 3400 }, { - "epoch": 1.510548523206751, - "grad_norm": 0.6793225641094393, - "learning_rate": 7.829514438506651e-06, - "loss": 0.054, + "epoch": 3.020426287744227, + "grad_norm": 0.33562751195986795, + "learning_rate": 1.7187787097057429e-06, + "loss": 0.0316, "step": 3401 }, { - "epoch": 1.5109926715522985, - "grad_norm": 0.4505747895513854, - "learning_rate": 7.827915956799745e-06, - "loss": 0.0417, + "epoch": 3.0213143872113677, + "grad_norm": 0.3115479805509103, + "learning_rate": 1.7158553383291555e-06, + "loss": 0.0317, "step": 3402 }, { - "epoch": 1.5114368198978458, - "grad_norm": 0.7701556350457059, - "learning_rate": 7.826317050009035e-06, - "loss": 0.046, + "epoch": 3.022202486678508, + "grad_norm": 0.3635008070986166, + "learning_rate": 1.7129339401420648e-06, + "loss": 0.0335, "step": 3403 }, { - "epoch": 1.5118809682433931, - "grad_norm": 0.4711452448196391, - "learning_rate": 7.82471771837486e-06, - "loss": 0.0393, + "epoch": 3.0230905861456483, + "grad_norm": 0.36851852787930095, + "learning_rate": 1.7100145168997157e-06, + "loss": 0.0339, "step": 3404 }, { - "epoch": 1.5123251165889409, - "grad_norm": 0.5955356460688755, - "learning_rate": 7.823117962137628e-06, - "loss": 0.0475, + "epoch": 3.023978685612789, + "grad_norm": 0.35430945415155546, + "learning_rate": 1.7070970703561624e-06, + "loss": 0.0313, "step": 3405 }, { - "epoch": 1.5127692649344882, - "grad_norm": 0.47383670753602875, - "learning_rate": 7.821517781537811e-06, - "loss": 0.0471, + "epoch": 3.024866785079929, + "grad_norm": 0.3615631365194653, + "learning_rate": 1.7041816022642731e-06, + "loss": 0.0356, "step": 3406 }, { - "epoch": 1.5132134132800354, - "grad_norm": 0.36906733260563584, - "learning_rate": 7.819917176815942e-06, - "loss": 0.0424, + "epoch": 3.0257548845470694, + "grad_norm": 0.3749744762401782, + "learning_rate": 1.701268114375731e-06, + "loss": 0.036, "step": 3407 }, { - "epoch": 1.513657561625583, - "grad_norm": 0.5026639235608937, - "learning_rate": 7.818316148212619e-06, - "loss": 0.0478, + "epoch": 3.0266429840142095, + "grad_norm": 0.32980877647347795, + "learning_rate": 1.6983566084410224e-06, + "loss": 0.0369, "step": 3408 }, { - "epoch": 1.5141017099711305, - "grad_norm": 0.5632301347650163, - "learning_rate": 7.816714695968503e-06, - "loss": 0.0582, + "epoch": 3.02753108348135, + "grad_norm": 0.34045371592944046, + "learning_rate": 1.6954470862094514e-06, + "loss": 0.0314, "step": 3409 }, { - "epoch": 1.5145458583166778, - "grad_norm": 0.5601350342899486, - "learning_rate": 7.815112820324322e-06, - "loss": 0.052, + "epoch": 3.02841918294849, + "grad_norm": 0.44703484677785676, + "learning_rate": 1.692539549429123e-06, + "loss": 0.0386, "step": 3410 }, { - "epoch": 1.514990006662225, - "grad_norm": 0.44545554824483924, - "learning_rate": 7.813510521520864e-06, - "loss": 0.0437, + "epoch": 3.0293072824156306, + "grad_norm": 0.3865599181591045, + "learning_rate": 1.6896339998469509e-06, + "loss": 0.0356, "step": 3411 }, { - "epoch": 1.5154341550077726, - "grad_norm": 0.5321000678999631, - "learning_rate": 7.811907799798981e-06, - "loss": 0.0446, + "epoch": 3.0301953818827707, + "grad_norm": 0.32527492227949584, + "learning_rate": 1.6867304392086575e-06, + "loss": 0.034, "step": 3412 }, { - "epoch": 1.51587830335332, - "grad_norm": 0.6123306795917254, - "learning_rate": 7.81030465539959e-06, - "loss": 0.0472, + "epoch": 3.0310834813499112, + "grad_norm": 0.416752950219048, + "learning_rate": 1.6838288692587723e-06, + "loss": 0.0368, "step": 3413 }, { - "epoch": 1.5163224516988674, - "grad_norm": 0.7182097530608341, - "learning_rate": 7.808701088563669e-06, - "loss": 0.0652, + "epoch": 3.0319715808170513, + "grad_norm": 0.3452232934606978, + "learning_rate": 1.6809292917406178e-06, + "loss": 0.035, "step": 3414 }, { - "epoch": 1.5167666000444149, - "grad_norm": 0.6740358195253988, - "learning_rate": 7.807097099532264e-06, - "loss": 0.0446, + "epoch": 3.032859680284192, + "grad_norm": 0.3489591850168152, + "learning_rate": 1.678031708396331e-06, + "loss": 0.0272, "step": 3415 }, { - "epoch": 1.5172107483899624, - "grad_norm": 0.43545100676950116, - "learning_rate": 7.805492688546481e-06, - "loss": 0.0327, + "epoch": 3.0337477797513324, + "grad_norm": 0.40648659249767033, + "learning_rate": 1.6751361209668477e-06, + "loss": 0.0413, "step": 3416 }, { - "epoch": 1.5176548967355097, - "grad_norm": 0.4493644872097168, - "learning_rate": 7.80388785584749e-06, - "loss": 0.0401, + "epoch": 3.0346358792184724, + "grad_norm": 0.39593533001990056, + "learning_rate": 1.6722425311919015e-06, + "loss": 0.0321, "step": 3417 }, { - "epoch": 1.518099045081057, - "grad_norm": 0.4270665812232271, - "learning_rate": 7.802282601676522e-06, - "loss": 0.0517, + "epoch": 3.035523978685613, + "grad_norm": 0.363604393933297, + "learning_rate": 1.669350940810026e-06, + "loss": 0.0369, "step": 3418 }, { - "epoch": 1.5185431934266045, - "grad_norm": 0.48691036016457884, - "learning_rate": 7.800676926274881e-06, - "loss": 0.0398, + "epoch": 3.036412078152753, + "grad_norm": 0.3925221458198163, + "learning_rate": 1.6664613515585582e-06, + "loss": 0.0309, "step": 3419 }, { - "epoch": 1.518987341772152, - "grad_norm": 0.5623140589479151, - "learning_rate": 7.79907082988392e-06, - "loss": 0.0482, + "epoch": 3.0373001776198936, + "grad_norm": 0.36561598577530785, + "learning_rate": 1.6635737651736266e-06, + "loss": 0.0279, "step": 3420 }, { - "epoch": 1.5194314901176993, - "grad_norm": 0.6102149116754874, - "learning_rate": 7.797464312745067e-06, - "loss": 0.0541, + "epoch": 3.0381882770870337, + "grad_norm": 0.43074556289531746, + "learning_rate": 1.6606881833901627e-06, + "loss": 0.038, "step": 3421 }, { - "epoch": 1.5198756384632466, - "grad_norm": 0.5623605927044153, - "learning_rate": 7.795857375099806e-06, - "loss": 0.05, + "epoch": 3.039076376554174, + "grad_norm": 0.3268413196577665, + "learning_rate": 1.6578046079418885e-06, + "loss": 0.0309, "step": 3422 }, { - "epoch": 1.520319786808794, - "grad_norm": 0.4402109113654038, - "learning_rate": 7.794250017189689e-06, - "loss": 0.046, + "epoch": 3.0399644760213143, + "grad_norm": 0.38874874112487934, + "learning_rate": 1.6549230405613209e-06, + "loss": 0.0406, "step": 3423 }, { - "epoch": 1.5207639351543416, - "grad_norm": 0.48101120541164943, - "learning_rate": 7.792642239256327e-06, - "loss": 0.0461, + "epoch": 3.040852575488455, + "grad_norm": 0.31768216944866806, + "learning_rate": 1.6520434829797744e-06, + "loss": 0.0343, "step": 3424 }, { - "epoch": 1.521208083499889, - "grad_norm": 0.5296531840354222, - "learning_rate": 7.791034041541398e-06, - "loss": 0.0473, + "epoch": 3.041740674955595, + "grad_norm": 0.3260546316515089, + "learning_rate": 1.6491659369273521e-06, + "loss": 0.0311, "step": 3425 }, { - "epoch": 1.5216522318454364, - "grad_norm": 0.37685452582544965, - "learning_rate": 7.78942542428664e-06, - "loss": 0.0317, + "epoch": 3.0426287744227354, + "grad_norm": 0.3319666895264348, + "learning_rate": 1.6462904041329485e-06, + "loss": 0.0283, "step": 3426 }, { - "epoch": 1.522096380190984, - "grad_norm": 0.3991510986249673, - "learning_rate": 7.78781638773386e-06, - "loss": 0.0347, + "epoch": 3.0435168738898755, + "grad_norm": 0.3430621413156639, + "learning_rate": 1.6434168863242523e-06, + "loss": 0.0335, "step": 3427 }, { - "epoch": 1.5225405285365312, - "grad_norm": 0.7554838302963066, - "learning_rate": 7.786206932124918e-06, - "loss": 0.0586, + "epoch": 3.044404973357016, + "grad_norm": 0.3868401764722692, + "learning_rate": 1.6405453852277358e-06, + "loss": 0.0346, "step": 3428 }, { - "epoch": 1.5229846768820785, - "grad_norm": 0.42032863329599834, - "learning_rate": 7.784597057701745e-06, - "loss": 0.0252, + "epoch": 3.0452930728241565, + "grad_norm": 0.3061954731198063, + "learning_rate": 1.6376759025686662e-06, + "loss": 0.0267, "step": 3429 }, { - "epoch": 1.523428825227626, - "grad_norm": 0.5551436730522737, - "learning_rate": 7.782986764706334e-06, - "loss": 0.0563, + "epoch": 3.0461811722912966, + "grad_norm": 0.40069176783599647, + "learning_rate": 1.6348084400710912e-06, + "loss": 0.0316, "step": 3430 }, { - "epoch": 1.5238729735731735, - "grad_norm": 0.6187970153795604, - "learning_rate": 7.781376053380735e-06, - "loss": 0.0592, + "epoch": 3.047069271758437, + "grad_norm": 0.36681793584149164, + "learning_rate": 1.6319429994578518e-06, + "loss": 0.026, "step": 3431 }, { - "epoch": 1.5243171219187208, - "grad_norm": 0.5501189193555737, - "learning_rate": 7.779764923967069e-06, - "loss": 0.0556, + "epoch": 3.047957371225577, + "grad_norm": 0.39810557057369755, + "learning_rate": 1.629079582450566e-06, + "loss": 0.0291, "step": 3432 }, { - "epoch": 1.524761270264268, - "grad_norm": 0.44533955935715447, - "learning_rate": 7.778153376707513e-06, - "loss": 0.0507, + "epoch": 3.0488454706927177, + "grad_norm": 0.3300323779556455, + "learning_rate": 1.6262181907696456e-06, + "loss": 0.0347, "step": 3433 }, { - "epoch": 1.5252054186098158, - "grad_norm": 0.4340430082400607, - "learning_rate": 7.776541411844315e-06, - "loss": 0.0385, + "epoch": 3.049733570159858, + "grad_norm": 0.37920489565568494, + "learning_rate": 1.6233588261342769e-06, + "loss": 0.0273, "step": 3434 }, { - "epoch": 1.5256495669553631, - "grad_norm": 0.5599425444294093, - "learning_rate": 7.774929029619775e-06, - "loss": 0.0454, + "epoch": 3.0506216696269983, + "grad_norm": 0.3149149943652191, + "learning_rate": 1.620501490262431e-06, + "loss": 0.026, "step": 3435 }, { - "epoch": 1.5260937153009104, - "grad_norm": 0.3908201446163151, - "learning_rate": 7.773316230276267e-06, - "loss": 0.041, + "epoch": 3.0515097690941384, + "grad_norm": 0.4071584210435631, + "learning_rate": 1.6176461848708647e-06, + "loss": 0.0481, "step": 3436 }, { - "epoch": 1.526537863646458, - "grad_norm": 0.6432918655836172, - "learning_rate": 7.771703014056217e-06, - "loss": 0.0664, + "epoch": 3.052397868561279, + "grad_norm": 0.5098304758250216, + "learning_rate": 1.6147929116751081e-06, + "loss": 0.0358, "step": 3437 }, { - "epoch": 1.5269820119920055, - "grad_norm": 0.4341973490141778, - "learning_rate": 7.770089381202121e-06, - "loss": 0.0536, + "epoch": 3.053285968028419, + "grad_norm": 0.3488799001580875, + "learning_rate": 1.611941672389473e-06, + "loss": 0.03, "step": 3438 }, { - "epoch": 1.5274261603375527, - "grad_norm": 0.5232821809072614, - "learning_rate": 7.768475331956537e-06, - "loss": 0.0506, + "epoch": 3.0541740674955595, + "grad_norm": 0.35323693058259226, + "learning_rate": 1.6090924687270515e-06, + "loss": 0.0298, "step": 3439 }, { - "epoch": 1.5278703086831, - "grad_norm": 0.49831548658076835, - "learning_rate": 7.76686086656208e-06, - "loss": 0.0497, + "epoch": 3.0550621669626996, + "grad_norm": 0.347601327837638, + "learning_rate": 1.6062453023997083e-06, + "loss": 0.0316, "step": 3440 }, { - "epoch": 1.5283144570286475, - "grad_norm": 0.6350239638046895, - "learning_rate": 7.765245985261436e-06, - "loss": 0.0603, + "epoch": 3.05595026642984, + "grad_norm": 0.4492026480839529, + "learning_rate": 1.603400175118089e-06, + "loss": 0.0424, "step": 3441 }, { - "epoch": 1.528758605374195, - "grad_norm": 0.8205513486577842, - "learning_rate": 7.763630688297347e-06, - "loss": 0.0555, + "epoch": 3.0568383658969807, + "grad_norm": 0.3047539539857411, + "learning_rate": 1.6005570885916095e-06, + "loss": 0.0333, "step": 3442 }, { - "epoch": 1.5292027537197423, - "grad_norm": 0.5773711065726657, - "learning_rate": 7.76201497591262e-06, - "loss": 0.0564, + "epoch": 3.0577264653641207, + "grad_norm": 0.46885412379655417, + "learning_rate": 1.59771604452846e-06, + "loss": 0.0366, "step": 3443 }, { - "epoch": 1.5296469020652899, - "grad_norm": 0.42024558526878264, - "learning_rate": 7.760398848350121e-06, - "loss": 0.0327, + "epoch": 3.0586145648312613, + "grad_norm": 0.44731834331586895, + "learning_rate": 1.5948770446356065e-06, + "loss": 0.0357, "step": 3444 }, { - "epoch": 1.5300910504108374, - "grad_norm": 0.5673807060962988, - "learning_rate": 7.758782305852787e-06, - "loss": 0.0551, + "epoch": 3.0595026642984013, + "grad_norm": 0.3054708287316336, + "learning_rate": 1.5920400906187878e-06, + "loss": 0.0244, "step": 3445 }, { - "epoch": 1.5305351987563847, - "grad_norm": 0.4401980723100094, - "learning_rate": 7.757165348663606e-06, - "loss": 0.0341, + "epoch": 3.060390763765542, + "grad_norm": 0.44178108992771253, + "learning_rate": 1.5892051841825084e-06, + "loss": 0.0407, "step": 3446 }, { - "epoch": 1.530979347101932, - "grad_norm": 0.4134655160444097, - "learning_rate": 7.755547977025641e-06, - "loss": 0.0396, + "epoch": 3.061278863232682, + "grad_norm": 0.4012261005684536, + "learning_rate": 1.5863723270300441e-06, + "loss": 0.0394, "step": 3447 }, { - "epoch": 1.5314234954474795, - "grad_norm": 0.7907504517928898, - "learning_rate": 7.753930191182005e-06, - "loss": 0.072, + "epoch": 3.0621669626998225, + "grad_norm": 0.3769259339107391, + "learning_rate": 1.583541520863443e-06, + "loss": 0.0318, "step": 3448 }, { - "epoch": 1.531867643793027, - "grad_norm": 0.7425759733481122, - "learning_rate": 7.752311991375878e-06, - "loss": 0.0584, + "epoch": 3.0630550621669625, + "grad_norm": 0.37362646028931695, + "learning_rate": 1.580712767383516e-06, + "loss": 0.0319, "step": 3449 }, { - "epoch": 1.5323117921385743, - "grad_norm": 0.3701818263547308, - "learning_rate": 7.750693377850506e-06, - "loss": 0.0311, + "epoch": 3.063943161634103, + "grad_norm": 0.30727083447610515, + "learning_rate": 1.5778860682898456e-06, + "loss": 0.0277, "step": 3450 }, { - "epoch": 1.5327559404841216, - "grad_norm": 0.4194874739178355, - "learning_rate": 7.749074350849196e-06, - "loss": 0.0299, + "epoch": 3.064831261101243, + "grad_norm": 0.29004399809092374, + "learning_rate": 1.5750614252807767e-06, + "loss": 0.0285, "step": 3451 }, { - "epoch": 1.533200088829669, - "grad_norm": 0.399280041395756, - "learning_rate": 7.747454910615309e-06, - "loss": 0.0333, + "epoch": 3.0657193605683837, + "grad_norm": 0.3830849061399326, + "learning_rate": 1.5722388400534177e-06, + "loss": 0.035, "step": 3452 }, { - "epoch": 1.5336442371752166, - "grad_norm": 0.5116821227256184, - "learning_rate": 7.74583505739228e-06, - "loss": 0.053, + "epoch": 3.0666074600355238, + "grad_norm": 0.39043040622784064, + "learning_rate": 1.569418314303644e-06, + "loss": 0.0325, "step": 3453 }, { - "epoch": 1.5340883855207639, - "grad_norm": 0.6704114941806725, - "learning_rate": 7.744214791423597e-06, - "loss": 0.0559, + "epoch": 3.0674955595026643, + "grad_norm": 0.3514013874663258, + "learning_rate": 1.5665998497260959e-06, + "loss": 0.034, "step": 3454 }, { - "epoch": 1.5345325338663114, - "grad_norm": 0.4031097936225357, - "learning_rate": 7.742594112952816e-06, + "epoch": 3.068383658969805, + "grad_norm": 0.3894652781072553, + "learning_rate": 1.5637834480141646e-06, "loss": 0.04, "step": 3455 }, { - "epoch": 1.534976682211859, - "grad_norm": 0.39875784908896506, - "learning_rate": 7.74097302222355e-06, - "loss": 0.0359, + "epoch": 3.069271758436945, + "grad_norm": 0.5953228076292884, + "learning_rate": 1.5609691108600128e-06, + "loss": 0.0404, "step": 3456 }, { - "epoch": 1.5354208305574062, - "grad_norm": 0.49710136867375654, - "learning_rate": 7.739351519479479e-06, - "loss": 0.0433, + "epoch": 3.0701598579040854, + "grad_norm": 0.3809853596944762, + "learning_rate": 1.5581568399545593e-06, + "loss": 0.0311, "step": 3457 }, { - "epoch": 1.5358649789029535, - "grad_norm": 0.587479752061736, - "learning_rate": 7.73772960496434e-06, - "loss": 0.0482, + "epoch": 3.0710479573712255, + "grad_norm": 0.3513474082125203, + "learning_rate": 1.555346636987481e-06, + "loss": 0.0323, "step": 3458 }, { - "epoch": 1.536309127248501, - "grad_norm": 0.9958134431642607, - "learning_rate": 7.736107278921937e-06, - "loss": 0.0604, + "epoch": 3.071936056838366, + "grad_norm": 0.4450240937216318, + "learning_rate": 1.552538503647209e-06, + "loss": 0.0313, "step": 3459 }, { - "epoch": 1.5367532755940485, - "grad_norm": 0.5924386484163253, - "learning_rate": 7.73448454159613e-06, - "loss": 0.0643, + "epoch": 3.072824156305506, + "grad_norm": 0.3584246496100684, + "learning_rate": 1.5497324416209391e-06, + "loss": 0.033, "step": 3460 }, { - "epoch": 1.5371974239395958, - "grad_norm": 0.4469041409309243, - "learning_rate": 7.732861393230845e-06, - "loss": 0.0377, + "epoch": 3.0737122557726466, + "grad_norm": 0.43791254251751843, + "learning_rate": 1.5469284525946138e-06, + "loss": 0.0375, "step": 3461 }, { - "epoch": 1.537641572285143, - "grad_norm": 0.6971338285035604, - "learning_rate": 7.731237834070071e-06, - "loss": 0.0525, + "epoch": 3.0746003552397867, + "grad_norm": 0.34501952519306195, + "learning_rate": 1.5441265382529368e-06, + "loss": 0.029, "step": 3462 }, { - "epoch": 1.5380857206306906, - "grad_norm": 0.377836838682399, - "learning_rate": 7.729613864357854e-06, - "loss": 0.037, + "epoch": 3.075488454706927, + "grad_norm": 0.33819998442252974, + "learning_rate": 1.5413267002793619e-06, + "loss": 0.0311, "step": 3463 }, { - "epoch": 1.5385298689762381, - "grad_norm": 0.3909300894809345, - "learning_rate": 7.727989484338306e-06, - "loss": 0.0327, + "epoch": 3.0763765541740673, + "grad_norm": 0.364941288060177, + "learning_rate": 1.5385289403560938e-06, + "loss": 0.0308, "step": 3464 }, { - "epoch": 1.5389740173217854, - "grad_norm": 0.44535118870735724, - "learning_rate": 7.726364694255598e-06, - "loss": 0.0451, + "epoch": 3.077264653641208, + "grad_norm": 0.37049804392147895, + "learning_rate": 1.5357332601640945e-06, + "loss": 0.0279, "step": 3465 }, { - "epoch": 1.539418165667333, - "grad_norm": 0.4310182821670584, - "learning_rate": 7.724739494353963e-06, - "loss": 0.043, + "epoch": 3.0781527531083483, + "grad_norm": 0.35224684811311013, + "learning_rate": 1.532939661383071e-06, + "loss": 0.0274, "step": 3466 }, { - "epoch": 1.5398623140128804, - "grad_norm": 0.5072110735404695, - "learning_rate": 7.723113884877698e-06, - "loss": 0.0409, + "epoch": 3.0790408525754884, + "grad_norm": 0.33960419330735814, + "learning_rate": 1.5301481456914802e-06, + "loss": 0.0322, "step": 3467 }, { - "epoch": 1.5403064623584277, - "grad_norm": 0.4243921084057989, - "learning_rate": 7.721487866071158e-06, - "loss": 0.0577, + "epoch": 3.079928952042629, + "grad_norm": 0.3514171333499501, + "learning_rate": 1.5273587147665315e-06, + "loss": 0.0313, "step": 3468 }, { - "epoch": 1.540750610703975, - "grad_norm": 0.6225210766153572, - "learning_rate": 7.71986143817876e-06, - "loss": 0.041, + "epoch": 3.080817051509769, + "grad_norm": 0.38806742277357753, + "learning_rate": 1.5245713702841758e-06, + "loss": 0.0345, "step": 3469 }, { - "epoch": 1.5411947590495225, - "grad_norm": 0.7207947800360287, - "learning_rate": 7.718234601444987e-06, - "loss": 0.0525, + "epoch": 3.0817051509769096, + "grad_norm": 0.3434592806521134, + "learning_rate": 1.5217861139191175e-06, + "loss": 0.029, "step": 3470 }, { - "epoch": 1.54163890739507, - "grad_norm": 0.6448910865817588, - "learning_rate": 7.716607356114378e-06, - "loss": 0.0691, + "epoch": 3.0825932504440496, + "grad_norm": 0.3581274216121454, + "learning_rate": 1.5190029473447987e-06, + "loss": 0.0322, "step": 3471 }, { - "epoch": 1.5420830557406173, - "grad_norm": 0.4192180148347075, - "learning_rate": 7.714979702431537e-06, - "loss": 0.0401, + "epoch": 3.08348134991119, + "grad_norm": 0.324637577500593, + "learning_rate": 1.5162218722334138e-06, + "loss": 0.0273, "step": 3472 }, { - "epoch": 1.5425272040861646, - "grad_norm": 0.5353520298031071, - "learning_rate": 7.713351640641127e-06, - "loss": 0.047, + "epoch": 3.0843694493783302, + "grad_norm": 0.47742319524449367, + "learning_rate": 1.5134428902558924e-06, + "loss": 0.0348, "step": 3473 }, { - "epoch": 1.5429713524317124, - "grad_norm": 0.4566196176860585, - "learning_rate": 7.711723170987875e-06, - "loss": 0.0377, + "epoch": 3.0852575488454708, + "grad_norm": 0.3498761689207883, + "learning_rate": 1.5106660030819149e-06, + "loss": 0.0271, "step": 3474 }, { - "epoch": 1.5434155007772596, - "grad_norm": 0.8744543565589462, - "learning_rate": 7.710094293716563e-06, - "loss": 0.0421, + "epoch": 3.086145648312611, + "grad_norm": 0.3624470718730505, + "learning_rate": 1.507891212379896e-06, + "loss": 0.0268, "step": 3475 }, { - "epoch": 1.543859649122807, - "grad_norm": 0.4450101049065203, - "learning_rate": 7.708465009072046e-06, - "loss": 0.0368, + "epoch": 3.0870337477797514, + "grad_norm": 0.4346417954949858, + "learning_rate": 1.5051185198169933e-06, + "loss": 0.0377, "step": 3476 }, { - "epoch": 1.5443037974683544, - "grad_norm": 0.3880510280013894, - "learning_rate": 7.706835317299228e-06, - "loss": 0.042, + "epoch": 3.0879218472468914, + "grad_norm": 0.33645009045652774, + "learning_rate": 1.5023479270591063e-06, + "loss": 0.0275, "step": 3477 }, { - "epoch": 1.544747945813902, - "grad_norm": 0.5627812490127366, - "learning_rate": 7.705205218643079e-06, - "loss": 0.0485, + "epoch": 3.088809946714032, + "grad_norm": 0.40173607645775294, + "learning_rate": 1.4995794357708693e-06, + "loss": 0.0409, "step": 3478 }, { - "epoch": 1.5451920941594492, - "grad_norm": 0.5226084645577708, - "learning_rate": 7.703574713348633e-06, - "loss": 0.0392, + "epoch": 3.0896980461811725, + "grad_norm": 0.40010418491350547, + "learning_rate": 1.4968130476156545e-06, + "loss": 0.0325, "step": 3479 }, { - "epoch": 1.5456362425049965, - "grad_norm": 0.646405817519741, - "learning_rate": 7.701943801660983e-06, - "loss": 0.0679, + "epoch": 3.0905861456483126, + "grad_norm": 0.35042283762753534, + "learning_rate": 1.494048764255574e-06, + "loss": 0.0314, "step": 3480 }, { - "epoch": 1.546080390850544, - "grad_norm": 0.4165107020464423, - "learning_rate": 7.700312483825281e-06, - "loss": 0.0398, + "epoch": 3.091474245115453, + "grad_norm": 0.4682559608994265, + "learning_rate": 1.4912865873514697e-06, + "loss": 0.0366, "step": 3481 }, { - "epoch": 1.5465245391960916, - "grad_norm": 0.4015766878565599, - "learning_rate": 7.698680760086743e-06, - "loss": 0.0397, + "epoch": 3.092362344582593, + "grad_norm": 0.3374770521113918, + "learning_rate": 1.4885265185629244e-06, + "loss": 0.0271, "step": 3482 }, { - "epoch": 1.5469686875416389, - "grad_norm": 0.5170446946635605, - "learning_rate": 7.697048630690642e-06, - "loss": 0.0437, + "epoch": 3.0932504440497337, + "grad_norm": 0.37484643713753174, + "learning_rate": 1.4857685595482484e-06, + "loss": 0.0323, "step": 3483 }, { - "epoch": 1.5474128358871864, - "grad_norm": 0.6516919138739381, - "learning_rate": 7.69541609588232e-06, - "loss": 0.0487, + "epoch": 3.094138543516874, + "grad_norm": 0.34481804532192073, + "learning_rate": 1.483012711964486e-06, + "loss": 0.0287, "step": 3484 }, { - "epoch": 1.5478569842327339, - "grad_norm": 0.3866667271027062, - "learning_rate": 7.69378315590717e-06, - "loss": 0.0455, + "epoch": 3.0950266429840143, + "grad_norm": 0.4734565209167152, + "learning_rate": 1.4802589774674148e-06, + "loss": 0.0352, "step": 3485 }, { - "epoch": 1.5483011325782812, - "grad_norm": 0.4947363891869916, - "learning_rate": 7.692149811010651e-06, - "loss": 0.0455, + "epoch": 3.0959147424511544, + "grad_norm": 0.37127966682290336, + "learning_rate": 1.477507357711545e-06, + "loss": 0.0332, "step": 3486 }, { - "epoch": 1.5487452809238285, - "grad_norm": 0.3642222145788099, - "learning_rate": 7.690516061438287e-06, - "loss": 0.0302, + "epoch": 3.096802841918295, + "grad_norm": 0.3543911235539375, + "learning_rate": 1.4747578543501063e-06, + "loss": 0.0313, "step": 3487 }, { - "epoch": 1.549189429269376, - "grad_norm": 0.49548313003009337, - "learning_rate": 7.688881907435653e-06, - "loss": 0.0491, + "epoch": 3.097690941385435, + "grad_norm": 0.38039943600968423, + "learning_rate": 1.4720104690350672e-06, + "loss": 0.0349, "step": 3488 }, { - "epoch": 1.5496335776149235, - "grad_norm": 0.5051267185591125, - "learning_rate": 7.687247349248393e-06, - "loss": 0.037, + "epoch": 3.0985790408525755, + "grad_norm": 0.3245828792851648, + "learning_rate": 1.4692652034171207e-06, + "loss": 0.0287, "step": 3489 }, { - "epoch": 1.5500777259604708, - "grad_norm": 0.35479785899347316, - "learning_rate": 7.685612387122206e-06, - "loss": 0.0393, + "epoch": 3.0994671403197156, + "grad_norm": 0.42861540701989975, + "learning_rate": 1.4665220591456847e-06, + "loss": 0.0331, "step": 3490 }, { - "epoch": 1.550521874306018, - "grad_norm": 0.3491505040065738, - "learning_rate": 7.68397702130286e-06, - "loss": 0.0391, + "epoch": 3.100355239786856, + "grad_norm": 0.32183958469817114, + "learning_rate": 1.4637810378689004e-06, + "loss": 0.0273, "step": 3491 }, { - "epoch": 1.5509660226515656, - "grad_norm": 0.4064600432185325, - "learning_rate": 7.682341252036171e-06, - "loss": 0.0369, + "epoch": 3.1012433392539966, + "grad_norm": 0.45972692428559464, + "learning_rate": 1.4610421412336413e-06, + "loss": 0.0302, "step": 3492 }, { - "epoch": 1.551410170997113, - "grad_norm": 0.5118602299189453, - "learning_rate": 7.68070507956803e-06, - "loss": 0.0455, + "epoch": 3.1021314387211367, + "grad_norm": 0.3759598952389504, + "learning_rate": 1.4583053708854945e-06, + "loss": 0.0329, "step": 3493 }, { - "epoch": 1.5518543193426604, - "grad_norm": 0.4565926867283545, - "learning_rate": 7.679068504144378e-06, - "loss": 0.038, + "epoch": 3.1030195381882772, + "grad_norm": 0.36353279121847576, + "learning_rate": 1.455570728468778e-06, + "loss": 0.0335, "step": 3494 }, { - "epoch": 1.552298467688208, - "grad_norm": 0.7287584197582313, - "learning_rate": 7.677431526011218e-06, - "loss": 0.0569, + "epoch": 3.1039076376554173, + "grad_norm": 0.3290160590761642, + "learning_rate": 1.4528382156265269e-06, + "loss": 0.0299, "step": 3495 }, { - "epoch": 1.5527426160337554, - "grad_norm": 0.5994194003053249, - "learning_rate": 7.67579414541462e-06, - "loss": 0.0652, + "epoch": 3.104795737122558, + "grad_norm": 0.3595669023727179, + "learning_rate": 1.4501078340004954e-06, + "loss": 0.0325, "step": 3496 }, { - "epoch": 1.5531867643793027, - "grad_norm": 0.826708798968826, - "learning_rate": 7.674156362600708e-06, - "loss": 0.0788, + "epoch": 3.105683836589698, + "grad_norm": 0.35434918469827165, + "learning_rate": 1.4473795852311606e-06, + "loss": 0.0316, "step": 3497 }, { - "epoch": 1.55363091272485, - "grad_norm": 0.6304316452414537, - "learning_rate": 7.672518177815669e-06, - "loss": 0.0447, + "epoch": 3.1065719360568385, + "grad_norm": 0.4031990945442707, + "learning_rate": 1.4446534709577214e-06, + "loss": 0.0344, "step": 3498 }, { - "epoch": 1.5540750610703975, - "grad_norm": 0.5354755353841119, - "learning_rate": 7.67087959130575e-06, - "loss": 0.0398, + "epoch": 3.1074600355239785, + "grad_norm": 0.5073170920433319, + "learning_rate": 1.4419294928180828e-06, + "loss": 0.0337, "step": 3499 }, { - "epoch": 1.554519209415945, - "grad_norm": 0.5926105062715095, - "learning_rate": 7.669240603317257e-06, - "loss": 0.0495, + "epoch": 3.108348134991119, + "grad_norm": 0.33208651903203795, + "learning_rate": 1.4392076524488764e-06, + "loss": 0.0262, "step": 3500 }, { - "epoch": 1.5549633577614923, - "grad_norm": 0.4138530773015362, - "learning_rate": 7.66760121409656e-06, - "loss": 0.0354, + "epoch": 3.109236234458259, + "grad_norm": 0.3717968372001129, + "learning_rate": 1.4364879514854485e-06, + "loss": 0.0344, "step": 3501 }, { - "epoch": 1.5554075061070396, - "grad_norm": 0.44256364969603434, - "learning_rate": 7.665961423890085e-06, - "loss": 0.045, + "epoch": 3.1101243339253997, + "grad_norm": 0.3808171070996276, + "learning_rate": 1.4337703915618556e-06, + "loss": 0.0317, "step": 3502 }, { - "epoch": 1.5558516544525873, - "grad_norm": 0.47643325623563626, - "learning_rate": 7.664321232944321e-06, - "loss": 0.0403, + "epoch": 3.11101243339254, + "grad_norm": 0.3570466028704032, + "learning_rate": 1.4310549743108688e-06, + "loss": 0.0268, "step": 3503 }, { - "epoch": 1.5562958027981346, - "grad_norm": 0.3731544040854046, - "learning_rate": 7.662680641505817e-06, - "loss": 0.0412, + "epoch": 3.1119005328596803, + "grad_norm": 0.3570749694866279, + "learning_rate": 1.4283417013639767e-06, + "loss": 0.0324, "step": 3504 }, { - "epoch": 1.556739951143682, - "grad_norm": 0.8553804228205281, - "learning_rate": 7.661039649821183e-06, - "loss": 0.0688, + "epoch": 3.112788632326821, + "grad_norm": 0.362176667644257, + "learning_rate": 1.425630574351372e-06, + "loss": 0.038, "step": 3505 }, { - "epoch": 1.5571840994892294, - "grad_norm": 0.5134256859909045, - "learning_rate": 7.659398258137085e-06, - "loss": 0.044, + "epoch": 3.113676731793961, + "grad_norm": 0.3251101417555494, + "learning_rate": 1.422921594901966e-06, + "loss": 0.0362, "step": 3506 }, { - "epoch": 1.557628247834777, - "grad_norm": 0.5647254301702445, - "learning_rate": 7.657756466700252e-06, - "loss": 0.0437, + "epoch": 3.1145648312611014, + "grad_norm": 0.3388937962327327, + "learning_rate": 1.4202147646433734e-06, + "loss": 0.0267, "step": 3507 }, { - "epoch": 1.5580723961803242, - "grad_norm": 0.44604429848268956, - "learning_rate": 7.656114275757477e-06, - "loss": 0.0399, + "epoch": 3.1154529307282415, + "grad_norm": 0.36826712170779247, + "learning_rate": 1.4175100852019185e-06, + "loss": 0.0398, "step": 3508 }, { - "epoch": 1.5585165445258715, - "grad_norm": 0.45991000971025053, - "learning_rate": 7.654471685555606e-06, - "loss": 0.0484, + "epoch": 3.116341030195382, + "grad_norm": 0.3517639196196876, + "learning_rate": 1.4148075582026387e-06, + "loss": 0.028, "step": 3509 }, { - "epoch": 1.558960692871419, - "grad_norm": 0.5298662733782437, - "learning_rate": 7.65282869634155e-06, - "loss": 0.061, + "epoch": 3.117229129662522, + "grad_norm": 0.36309715329027414, + "learning_rate": 1.4121071852692708e-06, + "loss": 0.0254, "step": 3510 }, { - "epoch": 1.5594048412169665, - "grad_norm": 0.49816414515287183, - "learning_rate": 7.651185308362276e-06, - "loss": 0.0499, + "epoch": 3.1181172291296626, + "grad_norm": 0.34317416863813455, + "learning_rate": 1.4094089680242634e-06, + "loss": 0.0277, "step": 3511 }, { - "epoch": 1.5598489895625138, - "grad_norm": 0.4052222432440109, - "learning_rate": 7.649541521864816e-06, - "loss": 0.0451, + "epoch": 3.1190053285968027, + "grad_norm": 0.39704784916876024, + "learning_rate": 1.4067129080887642e-06, + "loss": 0.034, "step": 3512 }, { - "epoch": 1.5602931379080613, - "grad_norm": 0.5042113116702226, - "learning_rate": 7.647897337096257e-06, - "loss": 0.0457, + "epoch": 3.119893428063943, + "grad_norm": 0.3447554710079465, + "learning_rate": 1.4040190070826321e-06, + "loss": 0.0311, "step": 3513 }, { - "epoch": 1.5607372862536089, - "grad_norm": 0.7798340643811736, - "learning_rate": 7.646252754303746e-06, - "loss": 0.1019, + "epoch": 3.1207815275310833, + "grad_norm": 0.46479627750960195, + "learning_rate": 1.4013272666244209e-06, + "loss": 0.0345, "step": 3514 }, { - "epoch": 1.5611814345991561, - "grad_norm": 0.4972991596269239, - "learning_rate": 7.644607773734496e-06, - "loss": 0.0578, + "epoch": 3.121669626998224, + "grad_norm": 0.3254104020571842, + "learning_rate": 1.398637688331393e-06, + "loss": 0.0295, "step": 3515 }, { - "epoch": 1.5616255829447034, - "grad_norm": 0.5179606694497955, - "learning_rate": 7.642962395635773e-06, - "loss": 0.0484, + "epoch": 3.122557726465364, + "grad_norm": 0.40157747673258, + "learning_rate": 1.395950273819507e-06, + "loss": 0.0329, "step": 3516 }, { - "epoch": 1.562069731290251, - "grad_norm": 0.4239628046426599, - "learning_rate": 7.641316620254907e-06, - "loss": 0.0401, + "epoch": 3.1234458259325044, + "grad_norm": 0.3852837069169171, + "learning_rate": 1.393265024703422e-06, + "loss": 0.0308, "step": 3517 }, { - "epoch": 1.5625138796357985, - "grad_norm": 0.41910897244128137, - "learning_rate": 7.639670447839284e-06, - "loss": 0.0431, + "epoch": 3.124333925399645, + "grad_norm": 0.3276630375695141, + "learning_rate": 1.3905819425965005e-06, + "loss": 0.0309, "step": 3518 }, { - "epoch": 1.5629580279813458, - "grad_norm": 0.6150053023433038, - "learning_rate": 7.638023878636353e-06, - "loss": 0.0401, + "epoch": 3.125222024866785, + "grad_norm": 0.5304847611809405, + "learning_rate": 1.3879010291107986e-06, + "loss": 0.0532, "step": 3519 }, { - "epoch": 1.563402176326893, - "grad_norm": 0.4401882181013756, - "learning_rate": 7.63637691289362e-06, - "loss": 0.0557, + "epoch": 3.1261101243339255, + "grad_norm": 0.41905821351311295, + "learning_rate": 1.3852222858570685e-06, + "loss": 0.0309, "step": 3520 }, { - "epoch": 1.5638463246724406, - "grad_norm": 0.4753196454260942, - "learning_rate": 7.634729550858652e-06, - "loss": 0.0393, + "epoch": 3.1269982238010656, + "grad_norm": 0.39103923686669484, + "learning_rate": 1.382545714444764e-06, + "loss": 0.029, "step": 3521 }, { - "epoch": 1.564290473017988, - "grad_norm": 0.36301949275943124, - "learning_rate": 7.633081792779079e-06, - "loss": 0.0352, + "epoch": 3.127886323268206, + "grad_norm": 0.3350588699618402, + "learning_rate": 1.379871316482027e-06, + "loss": 0.0271, "step": 3522 }, { - "epoch": 1.5647346213635354, - "grad_norm": 0.6312670336926534, - "learning_rate": 7.631433638902583e-06, - "loss": 0.0458, + "epoch": 3.1287744227353462, + "grad_norm": 0.32239838791621245, + "learning_rate": 1.3771990935757013e-06, + "loss": 0.0279, "step": 3523 }, { - "epoch": 1.5651787697090829, - "grad_norm": 0.4853790667151454, - "learning_rate": 7.629785089476912e-06, - "loss": 0.0465, + "epoch": 3.1296625222024868, + "grad_norm": 0.4123712401960722, + "learning_rate": 1.3745290473313177e-06, + "loss": 0.0314, "step": 3524 }, { - "epoch": 1.5656229180546304, - "grad_norm": 0.5377885843949731, - "learning_rate": 7.628136144749867e-06, - "loss": 0.0432, + "epoch": 3.130550621669627, + "grad_norm": 0.38723740273270757, + "learning_rate": 1.3718611793530995e-06, + "loss": 0.0324, "step": 3525 }, { - "epoch": 1.5660670664001777, - "grad_norm": 0.4560714160433749, - "learning_rate": 7.626486804969316e-06, - "loss": 0.0443, + "epoch": 3.1314387211367674, + "grad_norm": 0.32223296368530385, + "learning_rate": 1.3691954912439648e-06, + "loss": 0.0261, "step": 3526 }, { - "epoch": 1.566511214745725, - "grad_norm": 0.40682805145370315, - "learning_rate": 7.624837070383183e-06, - "loss": 0.0353, + "epoch": 3.1323268206039074, + "grad_norm": 0.39004639051812723, + "learning_rate": 1.366531984605523e-06, + "loss": 0.0274, "step": 3527 }, { - "epoch": 1.5669553630912725, - "grad_norm": 0.4791132302623183, - "learning_rate": 7.6231869412394495e-06, - "loss": 0.0502, + "epoch": 3.133214920071048, + "grad_norm": 0.3821103643848609, + "learning_rate": 1.3638706610380648e-06, + "loss": 0.039, "step": 3528 }, { - "epoch": 1.56739951143682, - "grad_norm": 0.4343747460657118, - "learning_rate": 7.621536417786159e-06, - "loss": 0.0395, + "epoch": 3.1341030195381885, + "grad_norm": 0.3200448555791571, + "learning_rate": 1.3612115221405764e-06, + "loss": 0.036, "step": 3529 }, { - "epoch": 1.5678436597823673, - "grad_norm": 0.492794572243227, - "learning_rate": 7.619885500271413e-06, - "loss": 0.0361, + "epoch": 3.1349911190053286, + "grad_norm": 0.4274217582113707, + "learning_rate": 1.3585545695107317e-06, + "loss": 0.0348, "step": 3530 }, { - "epoch": 1.5682878081279146, - "grad_norm": 0.3645392422465027, - "learning_rate": 7.618234188943372e-06, - "loss": 0.0389, + "epoch": 3.135879218472469, + "grad_norm": 0.3815320210874098, + "learning_rate": 1.3558998047448869e-06, + "loss": 0.0405, "step": 3531 }, { - "epoch": 1.568731956473462, - "grad_norm": 0.43518151556556955, - "learning_rate": 7.616582484050256e-06, - "loss": 0.037, + "epoch": 3.136767317939609, + "grad_norm": 0.3967245046942835, + "learning_rate": 1.3532472294380843e-06, + "loss": 0.0329, "step": 3532 }, { - "epoch": 1.5691761048190096, - "grad_norm": 0.5607869474661537, - "learning_rate": 7.614930385840345e-06, - "loss": 0.0494, + "epoch": 3.1376554174067497, + "grad_norm": 0.3739682684199583, + "learning_rate": 1.350596845184055e-06, + "loss": 0.0254, "step": 3533 }, { - "epoch": 1.5696202531645569, - "grad_norm": 0.3841819768936594, - "learning_rate": 7.613277894561978e-06, - "loss": 0.0426, + "epoch": 3.1385435168738898, + "grad_norm": 0.3162116602600176, + "learning_rate": 1.3479486535752078e-06, + "loss": 0.0307, "step": 3534 }, { - "epoch": 1.5700644015101044, - "grad_norm": 0.3817054851480292, - "learning_rate": 7.611625010463549e-06, - "loss": 0.045, + "epoch": 3.1394316163410303, + "grad_norm": 0.4208812448914787, + "learning_rate": 1.34530265620264e-06, + "loss": 0.0376, "step": 3535 }, { - "epoch": 1.570508549855652, - "grad_norm": 0.8877604841062007, - "learning_rate": 7.60997173379352e-06, - "loss": 0.0511, + "epoch": 3.1403197158081704, + "grad_norm": 0.28686341216107925, + "learning_rate": 1.3426588546561264e-06, + "loss": 0.0275, "step": 3536 }, { - "epoch": 1.5709526982011992, - "grad_norm": 0.5367152034215165, - "learning_rate": 7.608318064800403e-06, - "loss": 0.0437, + "epoch": 3.141207815275311, + "grad_norm": 0.464660142600853, + "learning_rate": 1.340017250524121e-06, + "loss": 0.0362, "step": 3537 }, { - "epoch": 1.5713968465467465, - "grad_norm": 0.37970223029486955, - "learning_rate": 7.606664003732771e-06, - "loss": 0.0426, + "epoch": 3.142095914742451, + "grad_norm": 0.5120325348928773, + "learning_rate": 1.337377845393763e-06, + "loss": 0.041, "step": 3538 }, { - "epoch": 1.571840994892294, - "grad_norm": 0.5586015168302153, - "learning_rate": 7.605009550839263e-06, - "loss": 0.0443, + "epoch": 3.1429840142095915, + "grad_norm": 0.3199015168528209, + "learning_rate": 1.3347406408508695e-06, + "loss": 0.0254, "step": 3539 }, { - "epoch": 1.5722851432378415, - "grad_norm": 0.5314909266145166, - "learning_rate": 7.603354706368567e-06, - "loss": 0.0482, + "epoch": 3.143872113676732, + "grad_norm": 0.4338499140507683, + "learning_rate": 1.3321056384799285e-06, + "loss": 0.0331, "step": 3540 }, { - "epoch": 1.5727292915833888, - "grad_norm": 0.6999799538645216, - "learning_rate": 7.601699470569434e-06, - "loss": 0.0481, + "epoch": 3.144760213143872, + "grad_norm": 0.41380574169602147, + "learning_rate": 1.3294728398641127e-06, + "loss": 0.0321, "step": 3541 }, { - "epoch": 1.573173439928936, - "grad_norm": 0.6037904990943627, - "learning_rate": 7.600043843690677e-06, - "loss": 0.0563, + "epoch": 3.1456483126110126, + "grad_norm": 0.369763445539379, + "learning_rate": 1.3268422465852687e-06, + "loss": 0.0269, "step": 3542 }, { - "epoch": 1.5736175882744838, - "grad_norm": 0.47456339272048825, - "learning_rate": 7.5983878259811625e-06, - "loss": 0.069, + "epoch": 3.1465364120781527, + "grad_norm": 0.4148583659600249, + "learning_rate": 1.3242138602239168e-06, + "loss": 0.0353, "step": 3543 }, { - "epoch": 1.5740617366200311, - "grad_norm": 0.9776402251476092, - "learning_rate": 7.59673141768982e-06, - "loss": 0.0634, + "epoch": 3.1474245115452932, + "grad_norm": 0.33109060873080703, + "learning_rate": 1.3215876823592493e-06, + "loss": 0.0304, "step": 3544 }, { - "epoch": 1.5745058849655784, - "grad_norm": 0.430567824394842, - "learning_rate": 7.595074619065635e-06, - "loss": 0.0389, + "epoch": 3.1483126110124333, + "grad_norm": 0.382132967449249, + "learning_rate": 1.318963714569138e-06, + "loss": 0.0327, "step": 3545 }, { - "epoch": 1.574950033311126, - "grad_norm": 0.592046861388675, - "learning_rate": 7.593417430357649e-06, - "loss": 0.0569, + "epoch": 3.149200710479574, + "grad_norm": 0.3383325218121581, + "learning_rate": 1.31634195843012e-06, + "loss": 0.0299, "step": 3546 }, { - "epoch": 1.5753941816566734, - "grad_norm": 0.528657961671009, - "learning_rate": 7.591759851814972e-06, - "loss": 0.0477, + "epoch": 3.150088809946714, + "grad_norm": 0.40393250614316256, + "learning_rate": 1.313722415517409e-06, + "loss": 0.029, "step": 3547 }, { - "epoch": 1.5758383300022207, - "grad_norm": 0.6683751656639313, - "learning_rate": 7.590101883686761e-06, - "loss": 0.0567, + "epoch": 3.1509769094138544, + "grad_norm": 0.4546541345002346, + "learning_rate": 1.3111050874048853e-06, + "loss": 0.042, "step": 3548 }, { - "epoch": 1.576282478347768, - "grad_norm": 0.6152824586906789, - "learning_rate": 7.58844352622224e-06, - "loss": 0.0514, + "epoch": 3.1518650088809945, + "grad_norm": 0.4499379743205146, + "learning_rate": 1.3084899756650981e-06, + "loss": 0.0344, "step": 3549 }, { - "epoch": 1.5767266266933155, - "grad_norm": 0.5175459666893117, - "learning_rate": 7.5867847796706865e-06, - "loss": 0.0466, + "epoch": 3.152753108348135, + "grad_norm": 0.35340031710547076, + "learning_rate": 1.3058770818692701e-06, + "loss": 0.0341, "step": 3550 }, { - "epoch": 1.577170775038863, - "grad_norm": 0.46509447514156393, - "learning_rate": 7.585125644281439e-06, - "loss": 0.0356, + "epoch": 3.153641207815275, + "grad_norm": 0.34575268590107633, + "learning_rate": 1.303266407587286e-06, + "loss": 0.0333, "step": 3551 }, { - "epoch": 1.5776149233844103, - "grad_norm": 0.42445822473522876, - "learning_rate": 7.583466120303893e-06, - "loss": 0.0368, + "epoch": 3.1545293072824157, + "grad_norm": 0.3360853754282288, + "learning_rate": 1.3006579543876963e-06, + "loss": 0.0264, "step": 3552 }, { - "epoch": 1.5780590717299579, - "grad_norm": 0.5278835867879181, - "learning_rate": 7.581806207987504e-06, - "loss": 0.0412, + "epoch": 3.1554174067495557, + "grad_norm": 0.34728357967406603, + "learning_rate": 1.2980517238377243e-06, + "loss": 0.0333, "step": 3553 }, { - "epoch": 1.5785032200755054, - "grad_norm": 0.49560125396493776, - "learning_rate": 7.5801459075817865e-06, - "loss": 0.0435, + "epoch": 3.1563055062166963, + "grad_norm": 0.4519787099256083, + "learning_rate": 1.2954477175032493e-06, + "loss": 0.0367, "step": 3554 }, { - "epoch": 1.5789473684210527, - "grad_norm": 0.5441296297037692, - "learning_rate": 7.578485219336307e-06, - "loss": 0.0408, + "epoch": 3.157193605683837, + "grad_norm": 0.41641816261413234, + "learning_rate": 1.292845936948821e-06, + "loss": 0.0307, "step": 3555 }, { - "epoch": 1.5793915167666, - "grad_norm": 0.5569362734290482, - "learning_rate": 7.5768241435007e-06, - "loss": 0.0508, + "epoch": 3.158081705150977, + "grad_norm": 0.33086456984655066, + "learning_rate": 1.290246383737646e-06, + "loss": 0.0307, "step": 3556 }, { - "epoch": 1.5798356651121475, - "grad_norm": 0.688395754837478, - "learning_rate": 7.57516268032465e-06, - "loss": 0.0575, + "epoch": 3.1589698046181174, + "grad_norm": 0.45776048239984535, + "learning_rate": 1.2876490594315988e-06, + "loss": 0.0309, "step": 3557 }, { - "epoch": 1.580279813457695, - "grad_norm": 0.5366509995471049, - "learning_rate": 7.573500830057907e-06, - "loss": 0.0393, + "epoch": 3.1598579040852575, + "grad_norm": 0.3276100401593595, + "learning_rate": 1.285053965591208e-06, + "loss": 0.0218, "step": 3558 }, { - "epoch": 1.5807239618032423, - "grad_norm": 0.5521349728712537, - "learning_rate": 7.571838592950271e-06, - "loss": 0.0464, + "epoch": 3.160746003552398, + "grad_norm": 0.3921587024554653, + "learning_rate": 1.2824611037756686e-06, + "loss": 0.0322, "step": 3559 }, { - "epoch": 1.5811681101487896, - "grad_norm": 0.3794603822110102, - "learning_rate": 7.570175969251609e-06, - "loss": 0.0271, + "epoch": 3.161634103019538, + "grad_norm": 0.33801097072638053, + "learning_rate": 1.2798704755428303e-06, + "loss": 0.0282, "step": 3560 }, { - "epoch": 1.581612258494337, - "grad_norm": 0.954806853873049, - "learning_rate": 7.568512959211838e-06, - "loss": 0.0798, + "epoch": 3.1625222024866786, + "grad_norm": 0.3276014010818129, + "learning_rate": 1.2772820824492005e-06, + "loss": 0.0338, "step": 3561 }, { - "epoch": 1.5820564068398846, - "grad_norm": 0.3774549347222219, - "learning_rate": 7.566849563080938e-06, - "loss": 0.034, + "epoch": 3.1634103019538187, + "grad_norm": 0.33107384453826255, + "learning_rate": 1.2746959260499481e-06, + "loss": 0.0285, "step": 3562 }, { - "epoch": 1.5825005551854319, - "grad_norm": 0.5717245905532253, - "learning_rate": 7.565185781108944e-06, - "loss": 0.0499, + "epoch": 3.164298401420959, + "grad_norm": 0.5525909627135244, + "learning_rate": 1.2721120078988935e-06, + "loss": 0.0362, "step": 3563 }, { - "epoch": 1.5829447035309794, - "grad_norm": 0.5372548568959261, - "learning_rate": 7.563521613545954e-06, - "loss": 0.0543, + "epoch": 3.1651865008880993, + "grad_norm": 0.3125784896110762, + "learning_rate": 1.2695303295485128e-06, + "loss": 0.0266, "step": 3564 }, { - "epoch": 1.583388851876527, - "grad_norm": 0.6132605143548788, - "learning_rate": 7.56185706064212e-06, - "loss": 0.0513, + "epoch": 3.16607460035524, + "grad_norm": 0.5033226516615978, + "learning_rate": 1.2669508925499402e-06, + "loss": 0.0294, "step": 3565 }, { - "epoch": 1.5838330002220742, - "grad_norm": 0.4784620611087414, - "learning_rate": 7.560192122647647e-06, - "loss": 0.0507, + "epoch": 3.1669626998223803, + "grad_norm": 0.49455207444213073, + "learning_rate": 1.2643736984529587e-06, + "loss": 0.0314, "step": 3566 }, { - "epoch": 1.5842771485676215, - "grad_norm": 0.46891721716697665, - "learning_rate": 7.558526799812812e-06, - "loss": 0.0447, + "epoch": 3.1678507992895204, + "grad_norm": 0.4549611309469651, + "learning_rate": 1.2617987488060074e-06, + "loss": 0.0351, "step": 3567 }, { - "epoch": 1.584721296913169, - "grad_norm": 0.6007245405053189, - "learning_rate": 7.556861092387937e-06, - "loss": 0.0461, + "epoch": 3.168738898756661, + "grad_norm": 0.5322615508545987, + "learning_rate": 1.259226045156174e-06, + "loss": 0.0294, "step": 3568 }, { - "epoch": 1.5851654452587165, - "grad_norm": 0.6832271146214529, - "learning_rate": 7.555195000623404e-06, - "loss": 0.0615, + "epoch": 3.169626998223801, + "grad_norm": 0.4492201294052334, + "learning_rate": 1.256655589049197e-06, + "loss": 0.0299, "step": 3569 }, { - "epoch": 1.5856095936042638, - "grad_norm": 0.628507751994035, - "learning_rate": 7.553528524769658e-06, - "loss": 0.0511, + "epoch": 3.1705150976909415, + "grad_norm": 0.39680288347095183, + "learning_rate": 1.2540873820294663e-06, + "loss": 0.0322, "step": 3570 }, { - "epoch": 1.586053741949811, - "grad_norm": 0.47038947814194393, - "learning_rate": 7.551861665077199e-06, - "loss": 0.0503, + "epoch": 3.1714031971580816, + "grad_norm": 0.3126403079807289, + "learning_rate": 1.2515214256400216e-06, + "loss": 0.0261, "step": 3571 }, { - "epoch": 1.5864978902953588, - "grad_norm": 0.5039990895794063, - "learning_rate": 7.550194421796583e-06, - "loss": 0.0757, + "epoch": 3.172291296625222, + "grad_norm": 0.2995851897727122, + "learning_rate": 1.2489577214225467e-06, + "loss": 0.025, "step": 3572 }, { - "epoch": 1.586942038640906, - "grad_norm": 0.43304671963904556, - "learning_rate": 7.548526795178424e-06, - "loss": 0.0435, + "epoch": 3.173179396092362, + "grad_norm": 0.4722294181818512, + "learning_rate": 1.2463962709173727e-06, + "loss": 0.0386, "step": 3573 }, { - "epoch": 1.5873861869864534, - "grad_norm": 0.35567796030147675, - "learning_rate": 7.546858785473397e-06, - "loss": 0.0411, + "epoch": 3.1740674955595027, + "grad_norm": 0.4563629162787923, + "learning_rate": 1.2438370756634798e-06, + "loss": 0.0317, "step": 3574 }, { - "epoch": 1.587830335332001, - "grad_norm": 0.5082433548982926, - "learning_rate": 7.54519039293223e-06, - "loss": 0.0476, + "epoch": 3.174955595026643, + "grad_norm": 0.3541427562621811, + "learning_rate": 1.2412801371984896e-06, + "loss": 0.0298, "step": 3575 }, { - "epoch": 1.5882744836775484, - "grad_norm": 0.4853723595468299, - "learning_rate": 7.543521617805711e-06, - "loss": 0.0403, + "epoch": 3.1758436944937833, + "grad_norm": 0.3596867300973276, + "learning_rate": 1.2387254570586714e-06, + "loss": 0.0335, "step": 3576 }, { - "epoch": 1.5887186320230957, - "grad_norm": 0.4349996299659683, - "learning_rate": 7.541852460344687e-06, - "loss": 0.0444, + "epoch": 3.1767317939609234, + "grad_norm": 0.46727923275068173, + "learning_rate": 1.2361730367789348e-06, + "loss": 0.0391, "step": 3577 }, { - "epoch": 1.589162780368643, - "grad_norm": 0.3738858521887595, - "learning_rate": 7.540182920800061e-06, - "loss": 0.0342, + "epoch": 3.177619893428064, + "grad_norm": 0.41535192165244617, + "learning_rate": 1.2336228778928312e-06, + "loss": 0.0339, "step": 3578 }, { - "epoch": 1.5896069287141905, - "grad_norm": 0.3860111205972228, - "learning_rate": 7.5385129994227916e-06, - "loss": 0.0326, + "epoch": 3.1785079928952045, + "grad_norm": 0.3385146511794204, + "learning_rate": 1.231074981932555e-06, + "loss": 0.0298, "step": 3579 }, { - "epoch": 1.590051077059738, - "grad_norm": 0.4607949804916419, - "learning_rate": 7.536842696463894e-06, - "loss": 0.0406, + "epoch": 3.1793960923623446, + "grad_norm": 0.37876673668081506, + "learning_rate": 1.2285293504289448e-06, + "loss": 0.0379, "step": 3580 }, { - "epoch": 1.5904952254052853, - "grad_norm": 0.4090438356252611, - "learning_rate": 7.535172012174447e-06, - "loss": 0.0435, + "epoch": 3.180284191829485, + "grad_norm": 0.2931565929018687, + "learning_rate": 1.225985984911468e-06, + "loss": 0.0258, "step": 3581 }, { - "epoch": 1.5909393737508328, - "grad_norm": 0.44992279283684267, - "learning_rate": 7.533500946805583e-06, - "loss": 0.0433, + "epoch": 3.181172291296625, + "grad_norm": 0.37354509891361926, + "learning_rate": 1.2234448869082393e-06, + "loss": 0.0324, "step": 3582 }, { - "epoch": 1.5913835220963803, - "grad_norm": 0.5608893338456544, - "learning_rate": 7.531829500608489e-06, - "loss": 0.0461, + "epoch": 3.1820603907637657, + "grad_norm": 0.37746159995857514, + "learning_rate": 1.2209060579460114e-06, + "loss": 0.0269, "step": 3583 }, { - "epoch": 1.5918276704419276, - "grad_norm": 0.46587297637504893, - "learning_rate": 7.530157673834413e-06, - "loss": 0.0462, + "epoch": 3.1829484902309058, + "grad_norm": 0.4082476549440637, + "learning_rate": 1.2183694995501687e-06, + "loss": 0.0309, "step": 3584 }, { - "epoch": 1.592271818787475, - "grad_norm": 0.4650339559246697, - "learning_rate": 7.528485466734658e-06, - "loss": 0.0463, + "epoch": 3.1838365896980463, + "grad_norm": 0.3999879375086408, + "learning_rate": 1.2158352132447326e-06, + "loss": 0.0331, "step": 3585 }, { - "epoch": 1.5927159671330224, - "grad_norm": 0.6227086084109071, - "learning_rate": 7.526812879560586e-06, - "loss": 0.0579, + "epoch": 3.1847246891651864, + "grad_norm": 0.44253878404960434, + "learning_rate": 1.2133032005523632e-06, + "loss": 0.0362, "step": 3586 }, { - "epoch": 1.59316011547857, - "grad_norm": 0.38020073620262274, - "learning_rate": 7.525139912563616e-06, - "loss": 0.0356, + "epoch": 3.185612788632327, + "grad_norm": 0.42602035316738923, + "learning_rate": 1.2107734629943485e-06, + "loss": 0.0348, "step": 3587 }, { - "epoch": 1.5936042638241172, - "grad_norm": 0.4538280786991795, - "learning_rate": 7.523466565995224e-06, - "loss": 0.041, + "epoch": 3.186500888099467, + "grad_norm": 0.3403485636547022, + "learning_rate": 1.2082460020906168e-06, + "loss": 0.0318, "step": 3588 }, { - "epoch": 1.5940484121696645, - "grad_norm": 0.41382625396603623, - "learning_rate": 7.521792840106937e-06, - "loss": 0.0309, + "epoch": 3.1873889875666075, + "grad_norm": 0.3629239463382843, + "learning_rate": 1.2057208193597226e-06, + "loss": 0.0248, "step": 3589 }, { - "epoch": 1.594492560515212, - "grad_norm": 0.6086317820490929, - "learning_rate": 7.52011873515035e-06, - "loss": 0.055, + "epoch": 3.1882770870337476, + "grad_norm": 0.3635748301532728, + "learning_rate": 1.2031979163188523e-06, + "loss": 0.0295, "step": 3590 }, { - "epoch": 1.5949367088607596, - "grad_norm": 0.5518315774986915, - "learning_rate": 7.518444251377108e-06, - "loss": 0.0448, + "epoch": 3.189165186500888, + "grad_norm": 0.5736161809384074, + "learning_rate": 1.200677294483827e-06, + "loss": 0.0352, "step": 3591 }, { - "epoch": 1.5953808572063068, - "grad_norm": 0.46458719141632254, - "learning_rate": 7.516769389038915e-06, - "loss": 0.0375, + "epoch": 3.1900532859680286, + "grad_norm": 0.41314686201750805, + "learning_rate": 1.198158955369092e-06, + "loss": 0.0294, "step": 3592 }, { - "epoch": 1.5958250055518544, - "grad_norm": 0.6946531905139686, - "learning_rate": 7.515094148387529e-06, - "loss": 0.0637, + "epoch": 3.1909413854351687, + "grad_norm": 0.35802399099814725, + "learning_rate": 1.1956429004877229e-06, + "loss": 0.0297, "step": 3593 }, { - "epoch": 1.5962691538974019, - "grad_norm": 0.47113343903410826, - "learning_rate": 7.51341852967477e-06, - "loss": 0.0403, + "epoch": 3.191829484902309, + "grad_norm": 0.45446775683013857, + "learning_rate": 1.1931291313514255e-06, + "loss": 0.0343, "step": 3594 }, { - "epoch": 1.5967133022429492, - "grad_norm": 0.34877248157599594, - "learning_rate": 7.511742533152509e-06, - "loss": 0.035, + "epoch": 3.1927175843694493, + "grad_norm": 0.3815567088530151, + "learning_rate": 1.1906176494705269e-06, + "loss": 0.0313, "step": 3595 }, { - "epoch": 1.5971574505884965, - "grad_norm": 0.6597113801136277, - "learning_rate": 7.51006615907268e-06, - "loss": 0.0419, + "epoch": 3.19360568383659, + "grad_norm": 0.3338661128814227, + "learning_rate": 1.1881084563539864e-06, + "loss": 0.0294, "step": 3596 }, { - "epoch": 1.597601598934044, - "grad_norm": 0.6377364313042297, - "learning_rate": 7.508389407687267e-06, - "loss": 0.0503, + "epoch": 3.19449378330373, + "grad_norm": 0.39651084280681126, + "learning_rate": 1.1856015535093818e-06, + "loss": 0.0395, "step": 3597 }, { - "epoch": 1.5980457472795915, - "grad_norm": 0.45492565688398423, - "learning_rate": 7.506712279248316e-06, - "loss": 0.0399, + "epoch": 3.1953818827708704, + "grad_norm": 0.30600919417760303, + "learning_rate": 1.1830969424429206e-06, + "loss": 0.0278, "step": 3598 }, { - "epoch": 1.5984898956251388, - "grad_norm": 0.4963351358417883, - "learning_rate": 7.5050347740079285e-06, - "loss": 0.0404, + "epoch": 3.1962699822380105, + "grad_norm": 0.3985018150523524, + "learning_rate": 1.1805946246594274e-06, + "loss": 0.0301, "step": 3599 }, { - "epoch": 1.598934043970686, - "grad_norm": 0.3821109498168256, - "learning_rate": 7.503356892218261e-06, - "loss": 0.0341, + "epoch": 3.197158081705151, + "grad_norm": 0.3330658386535985, + "learning_rate": 1.1780946016623563e-06, + "loss": 0.0288, "step": 3600 }, { - "epoch": 1.5993781923162336, - "grad_norm": 0.5218817014308058, - "learning_rate": 7.501678634131528e-06, - "loss": 0.0457, + "epoch": 3.198046181172291, + "grad_norm": 0.38800269698605144, + "learning_rate": 1.1755968749537755e-06, + "loss": 0.0371, "step": 3601 }, { - "epoch": 1.599822340661781, - "grad_norm": 0.48140971211980904, - "learning_rate": 7.500000000000001e-06, - "loss": 0.0416, + "epoch": 3.1989342806394316, + "grad_norm": 0.3772262775640011, + "learning_rate": 1.1731014460343764e-06, + "loss": 0.0328, "step": 3602 }, { - "epoch": 1.6002664890073284, - "grad_norm": 0.6724090547593772, - "learning_rate": 7.498320990076006e-06, - "loss": 0.0407, + "epoch": 3.199822380106572, + "grad_norm": 0.35801569723417365, + "learning_rate": 1.170608316403472e-06, + "loss": 0.0355, "step": 3603 }, { - "epoch": 1.6007106373528759, - "grad_norm": 0.4673511339888186, - "learning_rate": 7.496641604611926e-06, - "loss": 0.0445, + "epoch": 3.2007104795737122, + "grad_norm": 0.540743442831892, + "learning_rate": 1.1681174875589912e-06, + "loss": 0.0306, "step": 3604 }, { - "epoch": 1.6011547856984234, - "grad_norm": 0.4736546778146936, - "learning_rate": 7.494961843860204e-06, - "loss": 0.0457, + "epoch": 3.2015985790408528, + "grad_norm": 0.38639688907139, + "learning_rate": 1.1656289609974786e-06, + "loss": 0.0353, "step": 3605 }, { - "epoch": 1.6015989340439707, - "grad_norm": 0.46683144779872127, - "learning_rate": 7.4932817080733345e-06, - "loss": 0.0534, + "epoch": 3.202486678507993, + "grad_norm": 0.31038169012942846, + "learning_rate": 1.1631427382141013e-06, + "loss": 0.0268, "step": 3606 }, { - "epoch": 1.602043082389518, - "grad_norm": 0.38959987746176267, - "learning_rate": 7.491601197503871e-06, - "loss": 0.037, + "epoch": 3.2033747779751334, + "grad_norm": 0.5787522461060308, + "learning_rate": 1.160658820702637e-06, + "loss": 0.0338, "step": 3607 }, { - "epoch": 1.6024872307350655, - "grad_norm": 0.5453813196476248, - "learning_rate": 7.489920312404422e-06, - "loss": 0.0524, + "epoch": 3.2042628774422734, + "grad_norm": 0.4370600645764752, + "learning_rate": 1.1581772099554828e-06, + "loss": 0.0272, "step": 3608 }, { - "epoch": 1.602931379080613, - "grad_norm": 0.5800473949841606, - "learning_rate": 7.488239053027653e-06, - "loss": 0.0575, + "epoch": 3.205150976909414, + "grad_norm": 0.34153399006094054, + "learning_rate": 1.1556979074636448e-06, + "loss": 0.0277, "step": 3609 }, { - "epoch": 1.6033755274261603, - "grad_norm": 0.5289683983594908, - "learning_rate": 7.486557419626288e-06, - "loss": 0.0548, + "epoch": 3.206039076376554, + "grad_norm": 0.39807487230475347, + "learning_rate": 1.153220914716745e-06, + "loss": 0.0257, "step": 3610 }, { - "epoch": 1.6038196757717076, - "grad_norm": 0.6070529095018132, - "learning_rate": 7.484875412453102e-06, - "loss": 0.0412, + "epoch": 3.2069271758436946, + "grad_norm": 0.36300900520000173, + "learning_rate": 1.1507462332030183e-06, + "loss": 0.0277, "step": 3611 }, { - "epoch": 1.6042638241172553, - "grad_norm": 0.589551206830852, - "learning_rate": 7.483193031760932e-06, - "loss": 0.0548, + "epoch": 3.2078152753108347, + "grad_norm": 0.7205542852667735, + "learning_rate": 1.1482738644093134e-06, + "loss": 0.0434, "step": 3612 }, { - "epoch": 1.6047079724628026, - "grad_norm": 0.4017721125553618, - "learning_rate": 7.481510277802667e-06, - "loss": 0.0294, + "epoch": 3.208703374777975, + "grad_norm": 0.34081759783713905, + "learning_rate": 1.1458038098210795e-06, + "loss": 0.0246, "step": 3613 }, { - "epoch": 1.60515212080835, - "grad_norm": 0.45650083282856746, - "learning_rate": 7.479827150831254e-06, - "loss": 0.0368, + "epoch": 3.2095914742451153, + "grad_norm": 0.41700014772652777, + "learning_rate": 1.1433360709223855e-06, + "loss": 0.0324, "step": 3614 }, { - "epoch": 1.6055962691538974, - "grad_norm": 0.3377636070281279, - "learning_rate": 7.478143651099694e-06, - "loss": 0.0361, + "epoch": 3.210479573712256, + "grad_norm": 0.35544687024847266, + "learning_rate": 1.1408706491959076e-06, + "loss": 0.0342, "step": 3615 }, { - "epoch": 1.606040417499445, - "grad_norm": 0.4206873654117964, - "learning_rate": 7.4764597788610496e-06, - "loss": 0.0373, + "epoch": 3.211367673179396, + "grad_norm": 0.45607285714623136, + "learning_rate": 1.1384075461229255e-06, + "loss": 0.0372, "step": 3616 }, { - "epoch": 1.6064845658449922, - "grad_norm": 0.5196617152819853, - "learning_rate": 7.47477553436843e-06, - "loss": 0.0535, + "epoch": 3.2122557726465364, + "grad_norm": 0.3212607067426791, + "learning_rate": 1.135946763183327e-06, + "loss": 0.0244, "step": 3617 }, { - "epoch": 1.6069287141905395, - "grad_norm": 0.499359501911801, - "learning_rate": 7.47309091787501e-06, - "loss": 0.0414, + "epoch": 3.213143872113677, + "grad_norm": 0.33666676900365416, + "learning_rate": 1.1334883018556103e-06, + "loss": 0.0331, "step": 3618 }, { - "epoch": 1.607372862536087, - "grad_norm": 0.5994533032039974, - "learning_rate": 7.471405929634014e-06, - "loss": 0.0521, + "epoch": 3.214031971580817, + "grad_norm": 0.3985437893793705, + "learning_rate": 1.1310321636168714e-06, + "loss": 0.0332, "step": 3619 }, { - "epoch": 1.6078170108816345, - "grad_norm": 0.3309953500833457, - "learning_rate": 7.469720569898725e-06, - "loss": 0.0331, + "epoch": 3.2149200710479575, + "grad_norm": 0.3132500002866778, + "learning_rate": 1.1285783499428182e-06, + "loss": 0.0314, "step": 3620 }, { - "epoch": 1.6082611592271818, - "grad_norm": 0.4509086857710446, - "learning_rate": 7.468034838922482e-06, - "loss": 0.0458, + "epoch": 3.2158081705150976, + "grad_norm": 0.44934975259489535, + "learning_rate": 1.126126862307757e-06, + "loss": 0.0407, "step": 3621 }, { - "epoch": 1.6087053075727293, - "grad_norm": 0.45137595767758343, - "learning_rate": 7.4663487369586776e-06, - "loss": 0.0469, + "epoch": 3.216696269982238, + "grad_norm": 0.38076278712215256, + "learning_rate": 1.1236777021845957e-06, + "loss": 0.0315, "step": 3622 }, { - "epoch": 1.6091494559182768, - "grad_norm": 0.4696695504363821, - "learning_rate": 7.464662264260761e-06, - "loss": 0.0452, + "epoch": 3.217584369449378, + "grad_norm": 0.3971270966244105, + "learning_rate": 1.1212308710448477e-06, + "loss": 0.0318, "step": 3623 }, { - "epoch": 1.6095936042638241, - "grad_norm": 0.4596849970347727, - "learning_rate": 7.46297542108224e-06, - "loss": 0.0438, + "epoch": 3.2184724689165187, + "grad_norm": 0.32239116405325563, + "learning_rate": 1.1187863703586283e-06, + "loss": 0.0272, "step": 3624 }, { - "epoch": 1.6100377526093714, - "grad_norm": 0.5614676347080366, - "learning_rate": 7.4612882076766744e-06, - "loss": 0.0599, + "epoch": 3.219360568383659, + "grad_norm": 0.37343600464931165, + "learning_rate": 1.1163442015946442e-06, + "loss": 0.0308, "step": 3625 }, { - "epoch": 1.610481900954919, - "grad_norm": 0.3947391918010033, - "learning_rate": 7.459600624297681e-06, - "loss": 0.0347, + "epoch": 3.2202486678507993, + "grad_norm": 0.36166634491368127, + "learning_rate": 1.1139043662202082e-06, + "loss": 0.0293, "step": 3626 }, { - "epoch": 1.6109260493004665, - "grad_norm": 0.6133457502317237, - "learning_rate": 7.4579126711989326e-06, - "loss": 0.053, + "epoch": 3.2211367673179394, + "grad_norm": 0.3654215711524558, + "learning_rate": 1.1114668657012324e-06, + "loss": 0.0314, "step": 3627 }, { - "epoch": 1.6113701976460137, - "grad_norm": 0.45537205293561567, - "learning_rate": 7.456224348634158e-06, - "loss": 0.0436, + "epoch": 3.22202486678508, + "grad_norm": 0.3603595596939604, + "learning_rate": 1.10903170150222e-06, + "loss": 0.0263, "step": 3628 }, { - "epoch": 1.611814345991561, - "grad_norm": 0.46644204998070415, - "learning_rate": 7.454535656857138e-06, - "loss": 0.0492, + "epoch": 3.2229129662522205, + "grad_norm": 0.3347442955939939, + "learning_rate": 1.1065988750862732e-06, + "loss": 0.0259, "step": 3629 }, { - "epoch": 1.6122584943371085, - "grad_norm": 0.5228600296837344, - "learning_rate": 7.4528465961217145e-06, - "loss": 0.0521, + "epoch": 3.2238010657193605, + "grad_norm": 0.36054957342562477, + "learning_rate": 1.1041683879150928e-06, + "loss": 0.0268, "step": 3630 }, { - "epoch": 1.612702642682656, - "grad_norm": 0.5557514684322911, - "learning_rate": 7.451157166681781e-06, - "loss": 0.0526, + "epoch": 3.224689165186501, + "grad_norm": 0.4058356455460025, + "learning_rate": 1.1017402414489674e-06, + "loss": 0.0307, "step": 3631 }, { - "epoch": 1.6131467910282034, - "grad_norm": 0.4758338953909275, - "learning_rate": 7.449467368791287e-06, - "loss": 0.0417, + "epoch": 3.225577264653641, + "grad_norm": 0.45335634106980394, + "learning_rate": 1.0993144371467874e-06, + "loss": 0.0326, "step": 3632 }, { - "epoch": 1.6135909393737509, - "grad_norm": 0.5619608884775461, - "learning_rate": 7.4477772027042395e-06, - "loss": 0.0582, + "epoch": 3.2264653641207817, + "grad_norm": 0.32254800073807566, + "learning_rate": 1.096890976466029e-06, + "loss": 0.0335, "step": 3633 }, { - "epoch": 1.6140350877192984, - "grad_norm": 0.46236286351850703, - "learning_rate": 7.4460866686746966e-06, - "loss": 0.0384, + "epoch": 3.2273534635879217, + "grad_norm": 0.43680237472285177, + "learning_rate": 1.094469860862763e-06, + "loss": 0.0328, "step": 3634 }, { - "epoch": 1.6144792360648457, - "grad_norm": 0.44941172754104425, - "learning_rate": 7.444395766956776e-06, - "loss": 0.0451, + "epoch": 3.2282415630550623, + "grad_norm": 0.34419604781032265, + "learning_rate": 1.0920510917916527e-06, + "loss": 0.0344, "step": 3635 }, { - "epoch": 1.614923384410393, - "grad_norm": 0.5343367958828964, - "learning_rate": 7.4427044978046496e-06, - "loss": 0.0539, + "epoch": 3.2291296625222023, + "grad_norm": 0.31588049720715605, + "learning_rate": 1.089634670705948e-06, + "loss": 0.0317, "step": 3636 }, { - "epoch": 1.6153675327559405, - "grad_norm": 0.7114764542018213, - "learning_rate": 7.4410128614725406e-06, - "loss": 0.0632, + "epoch": 3.230017761989343, + "grad_norm": 0.3584527839153107, + "learning_rate": 1.0872205990574924e-06, + "loss": 0.0273, "step": 3637 }, { - "epoch": 1.615811681101488, - "grad_norm": 0.33754698523568316, - "learning_rate": 7.439320858214736e-06, - "loss": 0.0432, + "epoch": 3.230905861456483, + "grad_norm": 0.35768532221840543, + "learning_rate": 1.0848088782967137e-06, + "loss": 0.0251, "step": 3638 }, { - "epoch": 1.6162558294470353, - "grad_norm": 0.43643183273515707, - "learning_rate": 7.437628488285568e-06, - "loss": 0.0379, + "epoch": 3.2317939609236235, + "grad_norm": 0.37716262645451376, + "learning_rate": 1.082399509872631e-06, + "loss": 0.0339, "step": 3639 }, { - "epoch": 1.6166999777925826, - "grad_norm": 0.44390629309704815, - "learning_rate": 7.435935751939429e-06, - "loss": 0.0446, + "epoch": 3.232682060390764, + "grad_norm": 0.3900464102557058, + "learning_rate": 1.0799924952328456e-06, + "loss": 0.0358, "step": 3640 }, { - "epoch": 1.6171441261381303, - "grad_norm": 0.4354396645312695, - "learning_rate": 7.4342426494307695e-06, - "loss": 0.043, + "epoch": 3.233570159857904, + "grad_norm": 0.30070035854918137, + "learning_rate": 1.0775878358235503e-06, + "loss": 0.0275, "step": 3641 }, { - "epoch": 1.6175882744836776, - "grad_norm": 0.44873911764176055, - "learning_rate": 7.432549181014088e-06, - "loss": 0.0302, + "epoch": 3.2344582593250446, + "grad_norm": 0.3530274441342446, + "learning_rate": 1.0751855330895179e-06, + "loss": 0.0308, "step": 3642 }, { - "epoch": 1.6180324228292249, - "grad_norm": 0.41154412795322054, - "learning_rate": 7.430855346943942e-06, - "loss": 0.0395, + "epoch": 3.2353463587921847, + "grad_norm": 0.3323833981612693, + "learning_rate": 1.0727855884741057e-06, + "loss": 0.0291, "step": 3643 }, { - "epoch": 1.6184765711747724, - "grad_norm": 0.5287832714190103, - "learning_rate": 7.4291611474749455e-06, - "loss": 0.0473, + "epoch": 3.236234458259325, + "grad_norm": 0.3005373035476294, + "learning_rate": 1.0703880034192582e-06, + "loss": 0.0253, "step": 3644 }, { - "epoch": 1.61892071952032, - "grad_norm": 0.4007407767223763, - "learning_rate": 7.427466582861765e-06, - "loss": 0.0378, + "epoch": 3.2371225577264653, + "grad_norm": 0.3682642479597278, + "learning_rate": 1.0679927793654983e-06, + "loss": 0.0304, "step": 3645 }, { - "epoch": 1.6193648678658672, - "grad_norm": 0.58929177581027, - "learning_rate": 7.42577165335912e-06, - "loss": 0.0551, + "epoch": 3.238010657193606, + "grad_norm": 0.3422044006741604, + "learning_rate": 1.0655999177519304e-06, + "loss": 0.0335, "step": 3646 }, { - "epoch": 1.6198090162114145, - "grad_norm": 0.3940542333112033, - "learning_rate": 7.42407635922179e-06, - "loss": 0.0294, + "epoch": 3.238898756660746, + "grad_norm": 0.3129341268530191, + "learning_rate": 1.063209420016243e-06, + "loss": 0.0326, "step": 3647 }, { - "epoch": 1.620253164556962, - "grad_norm": 0.3862139275428533, - "learning_rate": 7.4223807007046045e-06, - "loss": 0.0318, + "epoch": 3.2397868561278864, + "grad_norm": 0.39978841909230495, + "learning_rate": 1.0608212875946994e-06, + "loss": 0.0298, "step": 3648 }, { - "epoch": 1.6206973129025095, - "grad_norm": 0.4233634000349271, - "learning_rate": 7.4206846780624505e-06, - "loss": 0.0351, + "epoch": 3.2406749555950265, + "grad_norm": 0.45583744131392623, + "learning_rate": 1.0584355219221475e-06, + "loss": 0.0333, "step": 3649 }, { - "epoch": 1.6211414612480568, - "grad_norm": 0.595696689600085, - "learning_rate": 7.418988291550271e-06, - "loss": 0.05, + "epoch": 3.241563055062167, + "grad_norm": 0.34424345973016157, + "learning_rate": 1.0560521244320083e-06, + "loss": 0.0236, "step": 3650 }, { - "epoch": 1.6215856095936043, - "grad_norm": 0.45676187183693634, - "learning_rate": 7.417291541423057e-06, - "loss": 0.0408, + "epoch": 3.242451154529307, + "grad_norm": 0.36684106852920867, + "learning_rate": 1.0536710965562801e-06, + "loss": 0.0414, "step": 3651 }, { - "epoch": 1.6220297579391518, - "grad_norm": 0.5662085190370286, - "learning_rate": 7.415594427935864e-06, - "loss": 0.0449, + "epoch": 3.2433392539964476, + "grad_norm": 0.4431679278597177, + "learning_rate": 1.0512924397255409e-06, + "loss": 0.0332, "step": 3652 }, { - "epoch": 1.6224739062846991, - "grad_norm": 0.5893188179834489, - "learning_rate": 7.4138969513437945e-06, - "loss": 0.0514, + "epoch": 3.2442273534635877, + "grad_norm": 0.3450258871559446, + "learning_rate": 1.0489161553689454e-06, + "loss": 0.031, "step": 3653 }, { - "epoch": 1.6229180546302464, - "grad_norm": 1.1475511733222958, - "learning_rate": 7.412199111902007e-06, - "loss": 0.0539, + "epoch": 3.2451154529307282, + "grad_norm": 0.28470546275432324, + "learning_rate": 1.0465422449142137e-06, + "loss": 0.0281, "step": 3654 }, { - "epoch": 1.623362202975794, - "grad_norm": 0.4595814551148835, - "learning_rate": 7.410500909865718e-06, - "loss": 0.0422, + "epoch": 3.2460035523978688, + "grad_norm": 0.4127530639860459, + "learning_rate": 1.0441707097876486e-06, + "loss": 0.0353, "step": 3655 }, { - "epoch": 1.6238063513213414, - "grad_norm": 0.4905543614968632, - "learning_rate": 7.408802345490194e-06, - "loss": 0.0426, + "epoch": 3.246891651865009, + "grad_norm": 0.3568083807868309, + "learning_rate": 1.0418015514141239e-06, + "loss": 0.035, "step": 3656 }, { - "epoch": 1.6242504996668887, - "grad_norm": 0.44546697092666765, - "learning_rate": 7.407103419030759e-06, - "loss": 0.0543, + "epoch": 3.2477797513321494, + "grad_norm": 0.36552784917385517, + "learning_rate": 1.0394347712170837e-06, + "loss": 0.0294, "step": 3657 }, { - "epoch": 1.624694648012436, - "grad_norm": 0.4740804743160003, - "learning_rate": 7.405404130742793e-06, - "loss": 0.0493, + "epoch": 3.2486678507992894, + "grad_norm": 0.3670258157620859, + "learning_rate": 1.037070370618542e-06, + "loss": 0.0317, "step": 3658 }, { - "epoch": 1.6251387963579835, - "grad_norm": 1.5473311509032195, - "learning_rate": 7.4037044808817224e-06, - "loss": 0.0513, + "epoch": 3.24955595026643, + "grad_norm": 0.40799529442652116, + "learning_rate": 1.0347083510390882e-06, + "loss": 0.0326, "step": 3659 }, { - "epoch": 1.625582944703531, - "grad_norm": 0.6098239500874039, - "learning_rate": 7.402004469703038e-06, - "loss": 0.0367, + "epoch": 3.25044404973357, + "grad_norm": 0.3450119468412042, + "learning_rate": 1.0323487138978738e-06, + "loss": 0.0266, "step": 3660 }, { - "epoch": 1.6260270930490783, - "grad_norm": 0.6586494701114967, - "learning_rate": 7.4003040974622784e-06, - "loss": 0.0585, + "epoch": 3.2513321492007106, + "grad_norm": 0.4010754328666231, + "learning_rate": 1.0299914606126276e-06, + "loss": 0.0381, "step": 3661 }, { - "epoch": 1.6264712413946258, - "grad_norm": 0.45627803806709566, - "learning_rate": 7.39860336441504e-06, - "loss": 0.048, + "epoch": 3.2522202486678506, + "grad_norm": 0.33156606244503334, + "learning_rate": 1.0276365925996384e-06, + "loss": 0.0309, "step": 3662 }, { - "epoch": 1.6269153897401734, - "grad_norm": 0.5771583864203911, - "learning_rate": 7.3969022708169695e-06, - "loss": 0.0378, + "epoch": 3.253108348134991, + "grad_norm": 0.3950135284211048, + "learning_rate": 1.0252841112737649e-06, + "loss": 0.029, "step": 3663 }, { - "epoch": 1.6273595380857206, - "grad_norm": 0.4931171394680339, - "learning_rate": 7.395200816923774e-06, - "loss": 0.033, + "epoch": 3.2539964476021312, + "grad_norm": 0.36065627802528166, + "learning_rate": 1.022934018048432e-06, + "loss": 0.0316, "step": 3664 }, { - "epoch": 1.627803686431268, - "grad_norm": 0.7206307962976604, - "learning_rate": 7.393499002991206e-06, - "loss": 0.0541, + "epoch": 3.2548845470692718, + "grad_norm": 0.423051441368762, + "learning_rate": 1.0205863143356338e-06, + "loss": 0.0325, "step": 3665 }, { - "epoch": 1.6282478347768155, - "grad_norm": 0.4622361606059328, - "learning_rate": 7.3917968292750785e-06, - "loss": 0.0415, + "epoch": 3.2557726465364123, + "grad_norm": 0.3434621044795289, + "learning_rate": 1.0182410015459183e-06, + "loss": 0.026, "step": 3666 }, { - "epoch": 1.628691983122363, - "grad_norm": 0.7660149085068831, - "learning_rate": 7.390094296031259e-06, - "loss": 0.0627, + "epoch": 3.2566607460035524, + "grad_norm": 0.3955697345905485, + "learning_rate": 1.0158980810884057e-06, + "loss": 0.0353, "step": 3667 }, { - "epoch": 1.6291361314679103, - "grad_norm": 0.7776923661917167, - "learning_rate": 7.3883914035156666e-06, - "loss": 0.0396, + "epoch": 3.257548845470693, + "grad_norm": 0.400489083511983, + "learning_rate": 1.0135575543707782e-06, + "loss": 0.0331, "step": 3668 }, { - "epoch": 1.6295802798134575, - "grad_norm": 0.6556574717804856, - "learning_rate": 7.386688151984275e-06, - "loss": 0.0449, + "epoch": 3.258436944937833, + "grad_norm": 0.5068290095323994, + "learning_rate": 1.0112194227992767e-06, + "loss": 0.0363, "step": 3669 }, { - "epoch": 1.630024428159005, - "grad_norm": 0.6486302576198958, - "learning_rate": 7.384984541693111e-06, - "loss": 0.0456, + "epoch": 3.2593250444049735, + "grad_norm": 0.38441100164085706, + "learning_rate": 1.0088836877787024e-06, + "loss": 0.0363, "step": 3670 }, { - "epoch": 1.6304685765045526, - "grad_norm": 0.38753606074247743, - "learning_rate": 7.383280572898256e-06, - "loss": 0.0374, + "epoch": 3.2602131438721136, + "grad_norm": 0.3085506929583767, + "learning_rate": 1.006550350712422e-06, + "loss": 0.0265, "step": 3671 }, { - "epoch": 1.6309127248500999, - "grad_norm": 0.4446665040074296, - "learning_rate": 7.381576245855847e-06, - "loss": 0.0399, + "epoch": 3.261101243339254, + "grad_norm": 0.37596699763301317, + "learning_rate": 1.0042194130023546e-06, + "loss": 0.038, "step": 3672 }, { - "epoch": 1.6313568731956474, - "grad_norm": 0.6808941373857945, - "learning_rate": 7.379871560822071e-06, - "loss": 0.0523, + "epoch": 3.261989342806394, + "grad_norm": 0.46539946467784293, + "learning_rate": 1.0018908760489844e-06, + "loss": 0.034, "step": 3673 }, { - "epoch": 1.6318010215411949, - "grad_norm": 0.6878067318911092, - "learning_rate": 7.378166518053174e-06, - "loss": 0.0557, + "epoch": 3.2628774422735347, + "grad_norm": 0.36104104474291415, + "learning_rate": 9.995647412513475e-07, + "loss": 0.0362, "step": 3674 }, { - "epoch": 1.6322451698867422, - "grad_norm": 0.4460005003836504, - "learning_rate": 7.37646111780545e-06, - "loss": 0.0338, + "epoch": 3.263765541740675, + "grad_norm": 0.3990660872797956, + "learning_rate": 9.97241010007039e-07, + "loss": 0.0331, "step": 3675 }, { - "epoch": 1.6326893182322895, - "grad_norm": 0.392173745509544, - "learning_rate": 7.374755360335253e-06, - "loss": 0.0358, + "epoch": 3.2646536412078153, + "grad_norm": 0.42276274619842286, + "learning_rate": 9.949196837122121e-07, + "loss": 0.0388, "step": 3676 }, { - "epoch": 1.633133466577837, - "grad_norm": 0.4942681170970006, - "learning_rate": 7.3730492458989825e-06, - "loss": 0.0451, + "epoch": 3.265541740674956, + "grad_norm": 0.35859010345599995, + "learning_rate": 9.926007637615715e-07, + "loss": 0.0325, "step": 3677 }, { - "epoch": 1.6335776149233845, - "grad_norm": 0.35125391907425363, - "learning_rate": 7.371342774753101e-06, - "loss": 0.0376, + "epoch": 3.266429840142096, + "grad_norm": 0.3222964953326768, + "learning_rate": 9.902842515483763e-07, + "loss": 0.0256, "step": 3678 }, { - "epoch": 1.6340217632689318, - "grad_norm": 0.5143098431682592, - "learning_rate": 7.369635947154119e-06, - "loss": 0.0469, + "epoch": 3.267317939609236, + "grad_norm": 0.3574957365393956, + "learning_rate": 9.879701484644411e-07, + "loss": 0.0345, "step": 3679 }, { - "epoch": 1.634465911614479, - "grad_norm": 0.5196302676867172, - "learning_rate": 7.3679287633585995e-06, - "loss": 0.0436, + "epoch": 3.2682060390763765, + "grad_norm": 0.35860403384823814, + "learning_rate": 9.856584559001342e-07, + "loss": 0.0369, "step": 3680 }, { - "epoch": 1.6349100599600268, - "grad_norm": 0.5148856987270073, - "learning_rate": 7.366221223623163e-06, - "loss": 0.0429, + "epoch": 3.269094138543517, + "grad_norm": 0.37048339629496846, + "learning_rate": 9.833491752443713e-07, + "loss": 0.0284, "step": 3681 }, { - "epoch": 1.635354208305574, - "grad_norm": 0.42764696599583774, - "learning_rate": 7.3645133282044835e-06, - "loss": 0.0333, + "epoch": 3.269982238010657, + "grad_norm": 0.34021955727332254, + "learning_rate": 9.81042307884621e-07, + "loss": 0.031, "step": 3682 }, { - "epoch": 1.6357983566511214, - "grad_norm": 0.3662627068077827, - "learning_rate": 7.362805077359283e-06, - "loss": 0.0312, + "epoch": 3.2708703374777977, + "grad_norm": 0.4122784589503335, + "learning_rate": 9.787378552069044e-07, + "loss": 0.0325, "step": 3683 }, { - "epoch": 1.636242504996669, - "grad_norm": 0.5318925951291891, - "learning_rate": 7.361096471344341e-06, - "loss": 0.0392, + "epoch": 3.2717584369449377, + "grad_norm": 0.4373114567342978, + "learning_rate": 9.764358185957867e-07, + "loss": 0.0272, "step": 3684 }, { - "epoch": 1.6366866533422164, - "grad_norm": 0.5101772714710058, - "learning_rate": 7.359387510416494e-06, - "loss": 0.0444, + "epoch": 3.2726465364120783, + "grad_norm": 0.31816932071306603, + "learning_rate": 9.741361994343867e-07, + "loss": 0.0295, "step": 3685 }, { - "epoch": 1.6371308016877637, - "grad_norm": 0.6838175180628748, - "learning_rate": 7.357678194832623e-06, - "loss": 0.0443, + "epoch": 3.2735346358792183, + "grad_norm": 0.6459369984798691, + "learning_rate": 9.71838999104368e-07, + "loss": 0.0283, "step": 3686 }, { - "epoch": 1.637574950033311, - "grad_norm": 0.4298262192631913, - "learning_rate": 7.355968524849671e-06, - "loss": 0.0397, + "epoch": 3.274422735346359, + "grad_norm": 0.3766304584171301, + "learning_rate": 9.695442189859395e-07, + "loss": 0.0401, "step": 3687 }, { - "epoch": 1.6380190983788585, - "grad_norm": 0.5227260046796232, - "learning_rate": 7.354258500724627e-06, - "loss": 0.0428, + "epoch": 3.275310834813499, + "grad_norm": 0.33935473355987256, + "learning_rate": 9.672518604578595e-07, + "loss": 0.031, "step": 3688 }, { - "epoch": 1.638463246724406, - "grad_norm": 0.3848693505144518, - "learning_rate": 7.352548122714541e-06, - "loss": 0.038, + "epoch": 3.2761989342806395, + "grad_norm": 0.40498961036649267, + "learning_rate": 9.649619248974302e-07, + "loss": 0.0383, "step": 3689 }, { - "epoch": 1.6389073950699533, - "grad_norm": 0.35605605527782813, - "learning_rate": 7.350837391076509e-06, - "loss": 0.0385, + "epoch": 3.2770870337477795, + "grad_norm": 0.3818979587980272, + "learning_rate": 9.626744136804944e-07, + "loss": 0.0318, "step": 3690 }, { - "epoch": 1.6393515434155008, - "grad_norm": 0.5496746740887447, - "learning_rate": 7.349126306067681e-06, - "loss": 0.0399, + "epoch": 3.27797513321492, + "grad_norm": 0.3179592431463596, + "learning_rate": 9.603893281814446e-07, + "loss": 0.0258, "step": 3691 }, { - "epoch": 1.6397956917610483, - "grad_norm": 0.3577570105037349, - "learning_rate": 7.347414867945266e-06, - "loss": 0.035, + "epoch": 3.2788632326820606, + "grad_norm": 0.412061975287786, + "learning_rate": 9.581066697732106e-07, + "loss": 0.0295, "step": 3692 }, { - "epoch": 1.6402398401065956, - "grad_norm": 0.4964615292337297, - "learning_rate": 7.345703076966522e-06, - "loss": 0.0517, + "epoch": 3.2797513321492007, + "grad_norm": 0.35376706053661183, + "learning_rate": 9.558264398272675e-07, + "loss": 0.0244, "step": 3693 }, { - "epoch": 1.640683988452143, - "grad_norm": 0.7390592067058361, - "learning_rate": 7.343990933388757e-06, - "loss": 0.0367, + "epoch": 3.280639431616341, + "grad_norm": 0.3007743907618776, + "learning_rate": 9.53548639713629e-07, + "loss": 0.0244, "step": 3694 }, { - "epoch": 1.6411281367976904, - "grad_norm": 0.3720816990200118, - "learning_rate": 7.342278437469338e-06, - "loss": 0.0385, + "epoch": 3.2815275310834813, + "grad_norm": 0.4158163193069238, + "learning_rate": 9.512732708008487e-07, + "loss": 0.0325, "step": 3695 }, { - "epoch": 1.641572285143238, - "grad_norm": 0.4417091992699088, - "learning_rate": 7.340565589465681e-06, - "loss": 0.0496, + "epoch": 3.282415630550622, + "grad_norm": 0.35365647232122, + "learning_rate": 9.490003344560211e-07, + "loss": 0.0344, "step": 3696 }, { - "epoch": 1.6420164334887852, - "grad_norm": 0.4387119629881648, - "learning_rate": 7.338852389635258e-06, - "loss": 0.0482, + "epoch": 3.283303730017762, + "grad_norm": 0.3791989904442274, + "learning_rate": 9.467298320447804e-07, + "loss": 0.0353, "step": 3697 }, { - "epoch": 1.6424605818343325, - "grad_norm": 0.5237427797642873, - "learning_rate": 7.337138838235589e-06, - "loss": 0.0397, + "epoch": 3.2841918294849024, + "grad_norm": 0.3458884379167756, + "learning_rate": 9.444617649312954e-07, + "loss": 0.028, "step": 3698 }, { - "epoch": 1.64290473017988, - "grad_norm": 0.5126897468237226, - "learning_rate": 7.335424935524254e-06, - "loss": 0.0354, + "epoch": 3.2850799289520425, + "grad_norm": 0.38332292715688804, + "learning_rate": 9.421961344782709e-07, + "loss": 0.0278, "step": 3699 }, { - "epoch": 1.6433488785254275, - "grad_norm": 0.5632224593772709, - "learning_rate": 7.333710681758876e-06, - "loss": 0.0515, + "epoch": 3.285968028419183, + "grad_norm": 0.420476771200935, + "learning_rate": 9.399329420469527e-07, + "loss": 0.0361, "step": 3700 }, { - "epoch": 1.6437930268709748, - "grad_norm": 0.37651906930453466, - "learning_rate": 7.331996077197141e-06, - "loss": 0.0263, + "epoch": 3.286856127886323, + "grad_norm": 0.6265528642038826, + "learning_rate": 9.376721889971158e-07, + "loss": 0.0434, "step": 3701 }, { - "epoch": 1.6442371752165224, - "grad_norm": 0.403180672879644, - "learning_rate": 7.330281122096783e-06, - "loss": 0.0361, + "epoch": 3.2877442273534636, + "grad_norm": 0.3224220289092714, + "learning_rate": 9.354138766870758e-07, + "loss": 0.0296, "step": 3702 }, { - "epoch": 1.6446813235620699, - "grad_norm": 0.3870458503286661, - "learning_rate": 7.328565816715587e-06, - "loss": 0.0391, + "epoch": 3.288632326820604, + "grad_norm": 0.3949143641031206, + "learning_rate": 9.331580064736768e-07, + "loss": 0.0296, "step": 3703 }, { - "epoch": 1.6451254719076172, - "grad_norm": 0.46910403259693756, - "learning_rate": 7.326850161311394e-06, - "loss": 0.0539, + "epoch": 3.289520426287744, + "grad_norm": 0.3544687226222743, + "learning_rate": 9.309045797122962e-07, + "loss": 0.029, "step": 3704 }, { - "epoch": 1.6455696202531644, - "grad_norm": 0.6277176707566176, - "learning_rate": 7.325134156142093e-06, - "loss": 0.0434, + "epoch": 3.2904085257548847, + "grad_norm": 0.3556846381666713, + "learning_rate": 9.286535977568456e-07, + "loss": 0.0322, "step": 3705 }, { - "epoch": 1.646013768598712, - "grad_norm": 0.5311345861511363, - "learning_rate": 7.323417801465633e-06, - "loss": 0.0528, + "epoch": 3.291296625222025, + "grad_norm": 0.36853334223341294, + "learning_rate": 9.264050619597697e-07, + "loss": 0.0305, "step": 3706 }, { - "epoch": 1.6464579169442595, - "grad_norm": 0.49558316497497373, - "learning_rate": 7.32170109754001e-06, - "loss": 0.0518, + "epoch": 3.2921847246891653, + "grad_norm": 0.4737052791958284, + "learning_rate": 9.241589736720347e-07, + "loss": 0.0336, "step": 3707 }, { - "epoch": 1.6469020652898068, - "grad_norm": 0.4215235777284794, - "learning_rate": 7.319984044623274e-06, - "loss": 0.0374, + "epoch": 3.2930728241563054, + "grad_norm": 0.3466761014452016, + "learning_rate": 9.219153342431453e-07, + "loss": 0.0333, "step": 3708 }, { - "epoch": 1.647346213635354, - "grad_norm": 0.7018323227064814, - "learning_rate": 7.3182666429735236e-06, - "loss": 0.0589, + "epoch": 3.293960923623446, + "grad_norm": 0.3956546324980311, + "learning_rate": 9.196741450211322e-07, + "loss": 0.0334, "step": 3709 }, { - "epoch": 1.6477903619809018, - "grad_norm": 0.3811938009083553, - "learning_rate": 7.316548892848919e-06, - "loss": 0.0336, + "epoch": 3.294849023090586, + "grad_norm": 0.30393453654068525, + "learning_rate": 9.174354073525521e-07, + "loss": 0.0267, "step": 3710 }, { - "epoch": 1.648234510326449, - "grad_norm": 0.48162066324285185, - "learning_rate": 7.314830794507664e-06, - "loss": 0.0392, + "epoch": 3.2957371225577266, + "grad_norm": 0.32512515289364163, + "learning_rate": 9.151991225824891e-07, + "loss": 0.0275, "step": 3711 }, { - "epoch": 1.6486786586719964, - "grad_norm": 0.7241447798938966, - "learning_rate": 7.313112348208017e-06, - "loss": 0.0705, + "epoch": 3.2966252220248666, + "grad_norm": 0.3899953032700139, + "learning_rate": 9.129652920545562e-07, + "loss": 0.0364, "step": 3712 }, { - "epoch": 1.6491228070175439, - "grad_norm": 0.4040868157055559, - "learning_rate": 7.311393554208292e-06, - "loss": 0.0417, + "epoch": 3.297513321492007, + "grad_norm": 0.3984208555582086, + "learning_rate": 9.107339171108887e-07, + "loss": 0.0348, "step": 3713 }, { - "epoch": 1.6495669553630914, - "grad_norm": 0.43212705890325137, - "learning_rate": 7.3096744127668515e-06, - "loss": 0.0516, + "epoch": 3.2984014209591472, + "grad_norm": 0.32484139207266705, + "learning_rate": 9.085049990921497e-07, + "loss": 0.0296, "step": 3714 }, { - "epoch": 1.6500111037086387, - "grad_norm": 0.7227761525435585, - "learning_rate": 7.307954924142113e-06, - "loss": 0.0427, + "epoch": 3.2992895204262878, + "grad_norm": 0.4137772419589482, + "learning_rate": 9.062785393375228e-07, + "loss": 0.0402, "step": 3715 }, { - "epoch": 1.650455252054186, - "grad_norm": 0.6454421002922318, - "learning_rate": 7.306235088592545e-06, - "loss": 0.0709, + "epoch": 3.300177619893428, + "grad_norm": 0.38249895639311854, + "learning_rate": 9.040545391847155e-07, + "loss": 0.0344, "step": 3716 }, { - "epoch": 1.6508994003997335, - "grad_norm": 0.5263009072366842, - "learning_rate": 7.304514906376665e-06, - "loss": 0.0354, + "epoch": 3.3010657193605684, + "grad_norm": 0.3664008503037077, + "learning_rate": 9.018329999699599e-07, + "loss": 0.0345, "step": 3717 }, { - "epoch": 1.651343548745281, - "grad_norm": 0.4475608856046002, - "learning_rate": 7.3027943777530504e-06, - "loss": 0.0376, + "epoch": 3.301953818827709, + "grad_norm": 0.42487867897661014, + "learning_rate": 8.996139230280076e-07, + "loss": 0.0292, "step": 3718 }, { - "epoch": 1.6517876970908283, - "grad_norm": 0.4187029855449, - "learning_rate": 7.301073502980321e-06, - "loss": 0.032, + "epoch": 3.302841918294849, + "grad_norm": 0.37442585855265675, + "learning_rate": 8.973973096921285e-07, + "loss": 0.0302, "step": 3719 }, { - "epoch": 1.6522318454363758, - "grad_norm": 0.4688452297403804, - "learning_rate": 7.299352282317156e-06, - "loss": 0.0328, + "epoch": 3.3037300177619895, + "grad_norm": 0.40539585791226573, + "learning_rate": 8.95183161294118e-07, + "loss": 0.0344, "step": 3720 }, { - "epoch": 1.6526759937819233, - "grad_norm": 0.4905958290493516, - "learning_rate": 7.297630716022285e-06, - "loss": 0.0408, + "epoch": 3.3046181172291296, + "grad_norm": 0.3981479478515684, + "learning_rate": 8.929714791642846e-07, + "loss": 0.0296, "step": 3721 }, { - "epoch": 1.6531201421274706, - "grad_norm": 0.5196371163361964, - "learning_rate": 7.295908804354486e-06, - "loss": 0.0518, + "epoch": 3.30550621669627, + "grad_norm": 0.4407351031638227, + "learning_rate": 8.907622646314601e-07, + "loss": 0.0375, "step": 3722 }, { - "epoch": 1.653564290473018, - "grad_norm": 0.5745303077003567, - "learning_rate": 7.294186547572593e-06, - "loss": 0.0525, + "epoch": 3.30639431616341, + "grad_norm": 0.39268346732364684, + "learning_rate": 8.885555190229889e-07, + "loss": 0.0327, "step": 3723 }, { - "epoch": 1.6540084388185654, - "grad_norm": 0.4232397957508058, - "learning_rate": 7.292463945935492e-06, - "loss": 0.046, + "epoch": 3.3072824156305507, + "grad_norm": 0.3315670230253401, + "learning_rate": 8.86351243664737e-07, + "loss": 0.0344, "step": 3724 }, { - "epoch": 1.654452587164113, - "grad_norm": 0.5715502169683124, - "learning_rate": 7.290740999702117e-06, - "loss": 0.0502, + "epoch": 3.308170515097691, + "grad_norm": 0.36826434450412143, + "learning_rate": 8.841494398810813e-07, + "loss": 0.0287, "step": 3725 }, { - "epoch": 1.6548967355096602, - "grad_norm": 0.45041445198482544, - "learning_rate": 7.289017709131456e-06, - "loss": 0.0385, + "epoch": 3.3090586145648313, + "grad_norm": 0.40366740190123895, + "learning_rate": 8.819501089949179e-07, + "loss": 0.0257, "step": 3726 }, { - "epoch": 1.6553408838552075, - "grad_norm": 0.5898309113830352, - "learning_rate": 7.287294074482551e-06, - "loss": 0.0586, + "epoch": 3.3099467140319714, + "grad_norm": 0.3099488079064351, + "learning_rate": 8.797532523276542e-07, + "loss": 0.0255, "step": 3727 }, { - "epoch": 1.655785032200755, - "grad_norm": 0.42537829603624505, - "learning_rate": 7.285570096014491e-06, - "loss": 0.0382, + "epoch": 3.310834813499112, + "grad_norm": 0.5004868511195822, + "learning_rate": 8.775588711992117e-07, + "loss": 0.0309, "step": 3728 }, { - "epoch": 1.6562291805463025, - "grad_norm": 0.5068748041039001, - "learning_rate": 7.283845773986421e-06, - "loss": 0.0424, + "epoch": 3.3117229129662524, + "grad_norm": 0.4152989274534055, + "learning_rate": 8.753669669280263e-07, + "loss": 0.0439, "step": 3729 }, { - "epoch": 1.6566733288918498, - "grad_norm": 0.49158429307244256, - "learning_rate": 7.2821211086575365e-06, - "loss": 0.0413, + "epoch": 3.3126110124333925, + "grad_norm": 0.4499840275721616, + "learning_rate": 8.731775408310439e-07, + "loss": 0.0331, "step": 3730 }, { - "epoch": 1.6571174772373973, - "grad_norm": 0.5434741115681563, - "learning_rate": 7.280396100287082e-06, - "loss": 0.0415, + "epoch": 3.313499111900533, + "grad_norm": 0.48187952598845685, + "learning_rate": 8.709905942237206e-07, + "loss": 0.0306, "step": 3731 }, { - "epoch": 1.6575616255829448, - "grad_norm": 0.54047842476067, - "learning_rate": 7.278670749134356e-06, - "loss": 0.0337, + "epoch": 3.314387211367673, + "grad_norm": 0.610831295084475, + "learning_rate": 8.688061284200266e-07, + "loss": 0.0384, "step": 3732 }, { - "epoch": 1.6580057739284921, - "grad_norm": 0.7610613678272631, - "learning_rate": 7.276945055458709e-06, - "loss": 0.0513, + "epoch": 3.3152753108348136, + "grad_norm": 0.4034821390647482, + "learning_rate": 8.666241447324364e-07, + "loss": 0.0311, "step": 3733 }, { - "epoch": 1.6584499222740394, - "grad_norm": 0.43103079664602795, - "learning_rate": 7.275219019519542e-06, - "loss": 0.0365, + "epoch": 3.3161634103019537, + "grad_norm": 0.6499533896580538, + "learning_rate": 8.644446444719385e-07, + "loss": 0.0395, "step": 3734 }, { - "epoch": 1.658894070619587, - "grad_norm": 0.6707297267218786, - "learning_rate": 7.2734926415763074e-06, - "loss": 0.0425, + "epoch": 3.3170515097690942, + "grad_norm": 0.4237121525319725, + "learning_rate": 8.622676289480248e-07, + "loss": 0.04, "step": 3735 }, { - "epoch": 1.6593382189651344, - "grad_norm": 0.3906372094311339, - "learning_rate": 7.271765921888507e-06, - "loss": 0.0379, + "epoch": 3.3179396092362343, + "grad_norm": 0.3803000710175403, + "learning_rate": 8.600930994686962e-07, + "loss": 0.0325, "step": 3736 }, { - "epoch": 1.6597823673106817, - "grad_norm": 0.8497034117571121, - "learning_rate": 7.2700388607157e-06, - "loss": 0.0592, + "epoch": 3.318827708703375, + "grad_norm": 0.3969071493516198, + "learning_rate": 8.579210573404606e-07, + "loss": 0.0308, "step": 3737 }, { - "epoch": 1.660226515656229, - "grad_norm": 0.4906869751133543, - "learning_rate": 7.268311458317491e-06, - "loss": 0.0448, + "epoch": 3.319715808170515, + "grad_norm": 0.39349131446670027, + "learning_rate": 8.557515038683328e-07, + "loss": 0.0294, "step": 3738 }, { - "epoch": 1.6606706640017765, - "grad_norm": 0.46268372537996755, - "learning_rate": 7.266583714953536e-06, - "loss": 0.0363, + "epoch": 3.3206039076376554, + "grad_norm": 0.37583904590243433, + "learning_rate": 8.535844403558263e-07, + "loss": 0.0337, "step": 3739 }, { - "epoch": 1.661114812347324, - "grad_norm": 0.3974000929871205, - "learning_rate": 7.2648556308835476e-06, - "loss": 0.0375, + "epoch": 3.321492007104796, + "grad_norm": 0.3232199398831246, + "learning_rate": 8.51419868104964e-07, + "loss": 0.0232, "step": 3740 }, { - "epoch": 1.6615589606928713, - "grad_norm": 0.3782320844424881, - "learning_rate": 7.263127206367285e-06, - "loss": 0.0327, + "epoch": 3.322380106571936, + "grad_norm": 0.39660634854943194, + "learning_rate": 8.492577884162728e-07, + "loss": 0.0344, "step": 3741 }, { - "epoch": 1.6620031090384189, - "grad_norm": 0.7073636631807192, - "learning_rate": 7.2613984416645586e-06, - "loss": 0.0549, + "epoch": 3.323268206039076, + "grad_norm": 0.5936043101926092, + "learning_rate": 8.47098202588778e-07, + "loss": 0.0327, "step": 3742 }, { - "epoch": 1.6624472573839664, - "grad_norm": 0.48125971182332594, - "learning_rate": 7.2596693370352325e-06, - "loss": 0.0342, + "epoch": 3.3241563055062167, + "grad_norm": 0.40726891703153134, + "learning_rate": 8.449411119200074e-07, + "loss": 0.0387, "step": 3743 }, { - "epoch": 1.6628914057295137, - "grad_norm": 0.4622696042568772, - "learning_rate": 7.257939892739221e-06, - "loss": 0.0366, + "epoch": 3.325044404973357, + "grad_norm": 0.3523287663473434, + "learning_rate": 8.42786517705993e-07, + "loss": 0.032, "step": 3744 }, { - "epoch": 1.663335554075061, - "grad_norm": 0.4506482837249478, - "learning_rate": 7.256210109036485e-06, - "loss": 0.0399, + "epoch": 3.3259325044404973, + "grad_norm": 0.3076092239957714, + "learning_rate": 8.406344212412615e-07, + "loss": 0.0281, "step": 3745 }, { - "epoch": 1.6637797024206085, - "grad_norm": 0.468794971821978, - "learning_rate": 7.254479986187045e-06, - "loss": 0.0486, + "epoch": 3.326820603907638, + "grad_norm": 0.3798972726179211, + "learning_rate": 8.384848238188447e-07, + "loss": 0.0294, "step": 3746 }, { - "epoch": 1.664223850766156, - "grad_norm": 0.461349555479564, - "learning_rate": 7.252749524450967e-06, - "loss": 0.0399, + "epoch": 3.327708703374778, + "grad_norm": 0.3587293031362081, + "learning_rate": 8.363377267302691e-07, + "loss": 0.031, "step": 3747 }, { - "epoch": 1.6646679991117033, - "grad_norm": 0.35487507978196975, - "learning_rate": 7.251018724088367e-06, - "loss": 0.0399, + "epoch": 3.3285968028419184, + "grad_norm": 0.3541181682472956, + "learning_rate": 8.341931312655582e-07, + "loss": 0.029, "step": 3748 }, { - "epoch": 1.6651121474572506, - "grad_norm": 0.5665408950529203, - "learning_rate": 7.249287585359416e-06, - "loss": 0.0515, + "epoch": 3.3294849023090585, + "grad_norm": 0.3766437324480442, + "learning_rate": 8.320510387132358e-07, + "loss": 0.0343, "step": 3749 }, { - "epoch": 1.6655562958027983, - "grad_norm": 0.47765729176525834, - "learning_rate": 7.24755610852433e-06, - "loss": 0.0524, + "epoch": 3.330373001776199, + "grad_norm": 0.34932698482645635, + "learning_rate": 8.299114503603229e-07, + "loss": 0.0293, "step": 3750 }, { - "epoch": 1.6660004441483456, - "grad_norm": 0.49560492659442207, - "learning_rate": 7.245824293843382e-06, - "loss": 0.0474, + "epoch": 3.331261101243339, + "grad_norm": 0.42408207272194803, + "learning_rate": 8.277743674923272e-07, + "loss": 0.0333, "step": 3751 }, { - "epoch": 1.6664445924938929, - "grad_norm": 0.47858236763124845, - "learning_rate": 7.244092141576895e-06, - "loss": 0.0404, + "epoch": 3.3321492007104796, + "grad_norm": 0.39452469954487357, + "learning_rate": 8.256397913932612e-07, + "loss": 0.0331, "step": 3752 }, { - "epoch": 1.6668887408394404, - "grad_norm": 0.5242443798051569, - "learning_rate": 7.2423596519852354e-06, - "loss": 0.0564, + "epoch": 3.3330373001776197, + "grad_norm": 0.3351727421594338, + "learning_rate": 8.235077233456273e-07, + "loss": 0.0322, "step": 3753 }, { - "epoch": 1.667332889184988, - "grad_norm": 0.4276118939925525, - "learning_rate": 7.240626825328832e-06, - "loss": 0.0396, + "epoch": 3.33392539964476, + "grad_norm": 0.37069347321101703, + "learning_rate": 8.213781646304209e-07, + "loss": 0.0308, "step": 3754 }, { - "epoch": 1.6677770375305352, - "grad_norm": 0.365991499711862, - "learning_rate": 7.238893661868154e-06, - "loss": 0.0373, + "epoch": 3.3348134991119007, + "grad_norm": 0.4026695093751538, + "learning_rate": 8.192511165271267e-07, + "loss": 0.0348, "step": 3755 }, { - "epoch": 1.6682211858760825, - "grad_norm": 0.614415895237667, - "learning_rate": 7.237160161863725e-06, - "loss": 0.0528, + "epoch": 3.335701598579041, + "grad_norm": 0.39525398196727407, + "learning_rate": 8.171265803137279e-07, + "loss": 0.0313, "step": 3756 }, { - "epoch": 1.66866533422163, - "grad_norm": 0.40303741888400607, - "learning_rate": 7.235426325576123e-06, - "loss": 0.0407, + "epoch": 3.3365896980461813, + "grad_norm": 0.376849915981171, + "learning_rate": 8.150045572666921e-07, + "loss": 0.0296, "step": 3757 }, { - "epoch": 1.6691094825671775, - "grad_norm": 0.5735579153246894, - "learning_rate": 7.23369215326597e-06, - "loss": 0.047, + "epoch": 3.3374777975133214, + "grad_norm": 0.38621599822308506, + "learning_rate": 8.128850486609813e-07, + "loss": 0.0375, "step": 3758 }, { - "epoch": 1.6695536309127248, - "grad_norm": 0.6025767202418845, - "learning_rate": 7.231957645193943e-06, - "loss": 0.0375, + "epoch": 3.338365896980462, + "grad_norm": 0.44042208618378603, + "learning_rate": 8.107680557700442e-07, + "loss": 0.0317, "step": 3759 }, { - "epoch": 1.6699977792582723, - "grad_norm": 0.4473170515938267, - "learning_rate": 7.2302228016207666e-06, - "loss": 0.0483, + "epoch": 3.339253996447602, + "grad_norm": 0.334702391912227, + "learning_rate": 8.086535798658168e-07, + "loss": 0.0338, "step": 3760 }, { - "epoch": 1.6704419276038198, - "grad_norm": 0.677052589914415, - "learning_rate": 7.2284876228072195e-06, - "loss": 0.0635, + "epoch": 3.3401420959147425, + "grad_norm": 0.31134998390749746, + "learning_rate": 8.065416222187283e-07, + "loss": 0.0281, "step": 3761 }, { - "epoch": 1.6708860759493671, - "grad_norm": 0.5710811815695489, - "learning_rate": 7.226752109014127e-06, - "loss": 0.0464, + "epoch": 3.3410301953818826, + "grad_norm": 0.330437831672938, + "learning_rate": 8.044321840976876e-07, + "loss": 0.0282, "step": 3762 }, { - "epoch": 1.6713302242949144, - "grad_norm": 0.36528232704433305, - "learning_rate": 7.225016260502366e-06, - "loss": 0.0372, + "epoch": 3.341918294849023, + "grad_norm": 0.37975124020700324, + "learning_rate": 8.02325266770097e-07, + "loss": 0.0366, "step": 3763 }, { - "epoch": 1.671774372640462, - "grad_norm": 0.5011404570603847, - "learning_rate": 7.223280077532866e-06, - "loss": 0.0361, + "epoch": 3.342806394316163, + "grad_norm": 0.32857743009123846, + "learning_rate": 8.002208715018383e-07, + "loss": 0.031, "step": 3764 }, { - "epoch": 1.6722185209860094, - "grad_norm": 0.9792675536360905, - "learning_rate": 7.221543560366602e-06, - "loss": 0.0695, + "epoch": 3.3436944937833037, + "grad_norm": 0.3337041915180823, + "learning_rate": 7.981189995572825e-07, + "loss": 0.0268, "step": 3765 }, { - "epoch": 1.6726626693315567, - "grad_norm": 0.40690296889128674, - "learning_rate": 7.219806709264605e-06, - "loss": 0.0434, + "epoch": 3.3445825932504443, + "grad_norm": 0.431321499055408, + "learning_rate": 7.960196521992797e-07, + "loss": 0.0266, "step": 3766 }, { - "epoch": 1.673106817677104, - "grad_norm": 0.454187985377748, - "learning_rate": 7.21806952448795e-06, - "loss": 0.0353, + "epoch": 3.3454706927175843, + "grad_norm": 0.3830801589024942, + "learning_rate": 7.939228306891695e-07, + "loss": 0.0417, "step": 3767 }, { - "epoch": 1.6735509660226515, - "grad_norm": 0.41286435056484855, - "learning_rate": 7.216332006297769e-06, - "loss": 0.0379, + "epoch": 3.346358792184725, + "grad_norm": 0.34391562471941695, + "learning_rate": 7.918285362867684e-07, + "loss": 0.0353, "step": 3768 }, { - "epoch": 1.673995114368199, - "grad_norm": 0.6022194735287347, - "learning_rate": 7.2145941549552364e-06, - "loss": 0.0491, + "epoch": 3.347246891651865, + "grad_norm": 0.33981672446127464, + "learning_rate": 7.897367702503755e-07, + "loss": 0.0301, "step": 3769 }, { - "epoch": 1.6744392627137463, - "grad_norm": 0.368180906342434, - "learning_rate": 7.212855970721584e-06, - "loss": 0.0302, + "epoch": 3.3481349911190055, + "grad_norm": 0.31377349052923625, + "learning_rate": 7.876475338367745e-07, + "loss": 0.0293, "step": 3770 }, { - "epoch": 1.6748834110592938, - "grad_norm": 0.3923410824291727, - "learning_rate": 7.211117453858088e-06, - "loss": 0.0434, + "epoch": 3.3490230905861456, + "grad_norm": 0.33842976003769915, + "learning_rate": 7.855608283012251e-07, + "loss": 0.025, "step": 3771 }, { - "epoch": 1.6753275594048413, - "grad_norm": 0.5009695065668602, - "learning_rate": 7.209378604626081e-06, - "loss": 0.042, + "epoch": 3.349911190053286, + "grad_norm": 0.3429127010785955, + "learning_rate": 7.834766548974665e-07, + "loss": 0.0327, "step": 3772 }, { - "epoch": 1.6757717077503886, - "grad_norm": 0.41349339679074837, - "learning_rate": 7.207639423286938e-06, - "loss": 0.0363, + "epoch": 3.350799289520426, + "grad_norm": 0.7929300728178934, + "learning_rate": 7.813950148777205e-07, + "loss": 0.0334, "step": 3773 }, { - "epoch": 1.676215856095936, - "grad_norm": 0.363113317588028, - "learning_rate": 7.205899910102087e-06, - "loss": 0.0502, + "epoch": 3.3516873889875667, + "grad_norm": 0.3677005393618327, + "learning_rate": 7.793159094926822e-07, + "loss": 0.0248, "step": 3774 }, { - "epoch": 1.6766600044414834, - "grad_norm": 0.5667120483543412, - "learning_rate": 7.204160065333009e-06, - "loss": 0.047, + "epoch": 3.3525754884547068, + "grad_norm": 0.39449591355166663, + "learning_rate": 7.772393399915284e-07, + "loss": 0.0328, "step": 3775 }, { - "epoch": 1.677104152787031, - "grad_norm": 0.6591252875236339, - "learning_rate": 7.202419889241231e-06, - "loss": 0.0622, + "epoch": 3.3534635879218473, + "grad_norm": 0.34005889484332136, + "learning_rate": 7.751653076219073e-07, + "loss": 0.0291, "step": 3776 }, { - "epoch": 1.6775483011325782, - "grad_norm": 0.4627599954413904, - "learning_rate": 7.2006793820883315e-06, - "loss": 0.0294, + "epoch": 3.354351687388988, + "grad_norm": 0.6237973720484827, + "learning_rate": 7.730938136299448e-07, + "loss": 0.0352, "step": 3777 }, { - "epoch": 1.6779924494781255, - "grad_norm": 0.35617793994157315, - "learning_rate": 7.198938544135936e-06, - "loss": 0.0302, + "epoch": 3.355239786856128, + "grad_norm": 0.39755909316445903, + "learning_rate": 7.710248592602438e-07, + "loss": 0.0343, "step": 3778 }, { - "epoch": 1.6784365978236733, - "grad_norm": 0.4097862631071463, - "learning_rate": 7.197197375645724e-06, - "loss": 0.0414, + "epoch": 3.356127886323268, + "grad_norm": 0.3321730432216352, + "learning_rate": 7.689584457558808e-07, + "loss": 0.0296, "step": 3779 }, { - "epoch": 1.6788807461692206, - "grad_norm": 0.675818069401739, - "learning_rate": 7.195455876879425e-06, - "loss": 0.0386, + "epoch": 3.3570159857904085, + "grad_norm": 0.33482379380260235, + "learning_rate": 7.668945743584005e-07, + "loss": 0.0305, "step": 3780 }, { - "epoch": 1.6793248945147679, - "grad_norm": 0.5308299724023986, - "learning_rate": 7.193714048098812e-06, - "loss": 0.0425, + "epoch": 3.357904085257549, + "grad_norm": 0.33133186856325647, + "learning_rate": 7.648332463078261e-07, + "loss": 0.0301, "step": 3781 }, { - "epoch": 1.6797690428603154, - "grad_norm": 0.5374963949654195, - "learning_rate": 7.191971889565713e-06, - "loss": 0.0381, + "epoch": 3.358792184724689, + "grad_norm": 0.31778126152603536, + "learning_rate": 7.62774462842652e-07, + "loss": 0.0275, "step": 3782 }, { - "epoch": 1.6802131912058629, - "grad_norm": 0.5783394947551381, - "learning_rate": 7.190229401542004e-06, - "loss": 0.0568, + "epoch": 3.3596802841918296, + "grad_norm": 0.3147898697081914, + "learning_rate": 7.607182251998419e-07, + "loss": 0.0311, "step": 3783 }, { - "epoch": 1.6806573395514102, - "grad_norm": 0.5082898930645872, - "learning_rate": 7.18848658428961e-06, - "loss": 0.0421, + "epoch": 3.3605683836589697, + "grad_norm": 0.3680446869382627, + "learning_rate": 7.586645346148292e-07, + "loss": 0.033, "step": 3784 }, { - "epoch": 1.6811014878969575, - "grad_norm": 0.4025027461987578, - "learning_rate": 7.186743438070507e-06, - "loss": 0.0311, + "epoch": 3.3614564831261102, + "grad_norm": 0.4776113172426281, + "learning_rate": 7.566133923215202e-07, + "loss": 0.0304, "step": 3785 }, { - "epoch": 1.681545636242505, - "grad_norm": 0.3763513501474927, - "learning_rate": 7.1849999631467194e-06, - "loss": 0.0334, + "epoch": 3.3623445825932503, + "grad_norm": 0.3297207566950773, + "learning_rate": 7.545647995522865e-07, + "loss": 0.0304, "step": 3786 }, { - "epoch": 1.6819897845880525, - "grad_norm": 0.4272449780002536, - "learning_rate": 7.183256159780321e-06, - "loss": 0.0461, + "epoch": 3.363232682060391, + "grad_norm": 0.3819800062499834, + "learning_rate": 7.525187575379717e-07, + "loss": 0.0348, "step": 3787 }, { - "epoch": 1.6824339329335998, - "grad_norm": 0.6764426293492224, - "learning_rate": 7.181512028233433e-06, - "loss": 0.0494, + "epoch": 3.364120781527531, + "grad_norm": 0.3428977890729436, + "learning_rate": 7.504752675078836e-07, + "loss": 0.0286, "step": 3788 }, { - "epoch": 1.6828780812791473, - "grad_norm": 0.4493107523320909, - "learning_rate": 7.17976756876823e-06, - "loss": 0.0407, + "epoch": 3.3650088809946714, + "grad_norm": 0.3577025132434273, + "learning_rate": 7.484343306897973e-07, + "loss": 0.0303, "step": 3789 }, { - "epoch": 1.6833222296246948, - "grad_norm": 0.5311611910450604, - "learning_rate": 7.178022781646936e-06, - "loss": 0.0446, + "epoch": 3.3658969804618115, + "grad_norm": 0.7296831038086915, + "learning_rate": 7.463959483099547e-07, + "loss": 0.0438, "step": 3790 }, { - "epoch": 1.683766377970242, - "grad_norm": 0.49447252037240147, - "learning_rate": 7.176277667131817e-06, - "loss": 0.0418, + "epoch": 3.366785079928952, + "grad_norm": 0.4401383270258742, + "learning_rate": 7.443601215930662e-07, + "loss": 0.0336, "step": 3791 }, { - "epoch": 1.6842105263157894, - "grad_norm": 0.5692208839762621, - "learning_rate": 7.1745322254851966e-06, - "loss": 0.0492, + "epoch": 3.3676731793960926, + "grad_norm": 0.36126138694487814, + "learning_rate": 7.423268517622972e-07, + "loss": 0.0335, "step": 3792 }, { - "epoch": 1.684654674661337, - "grad_norm": 0.5169615968797148, - "learning_rate": 7.172786456969445e-06, - "loss": 0.0529, + "epoch": 3.3685612788632326, + "grad_norm": 0.3572275024120499, + "learning_rate": 7.402961400392867e-07, + "loss": 0.0314, "step": 3793 }, { - "epoch": 1.6850988230068844, - "grad_norm": 0.393440341612259, - "learning_rate": 7.171040361846979e-06, - "loss": 0.0467, + "epoch": 3.369449378330373, + "grad_norm": 0.4306726882285608, + "learning_rate": 7.382679876441329e-07, + "loss": 0.0304, "step": 3794 }, { - "epoch": 1.6855429713524317, - "grad_norm": 0.4049011862637367, - "learning_rate": 7.1692939403802676e-06, - "loss": 0.0395, + "epoch": 3.3703374777975132, + "grad_norm": 0.38675925468022065, + "learning_rate": 7.362423957953957e-07, + "loss": 0.0351, "step": 3795 }, { - "epoch": 1.685987119697979, - "grad_norm": 0.33756471256165815, - "learning_rate": 7.167547192831827e-06, - "loss": 0.0321, + "epoch": 3.3712255772646538, + "grad_norm": 0.4434276372282579, + "learning_rate": 7.34219365710096e-07, + "loss": 0.0311, "step": 3796 }, { - "epoch": 1.6864312680435265, - "grad_norm": 0.4670323249605969, - "learning_rate": 7.1658001194642225e-06, - "loss": 0.0433, + "epoch": 3.372113676731794, + "grad_norm": 0.41839888548301196, + "learning_rate": 7.321988986037193e-07, + "loss": 0.0294, "step": 3797 }, { - "epoch": 1.686875416389074, - "grad_norm": 0.47348531343201694, - "learning_rate": 7.16405272054007e-06, - "loss": 0.0391, + "epoch": 3.3730017761989344, + "grad_norm": 0.316783129372031, + "learning_rate": 7.301809956902051e-07, + "loss": 0.0298, "step": 3798 }, { - "epoch": 1.6873195647346213, - "grad_norm": 0.5297792409513463, - "learning_rate": 7.1623049963220325e-06, - "loss": 0.0396, + "epoch": 3.3738898756660745, + "grad_norm": 0.4591931338383639, + "learning_rate": 7.281656581819596e-07, + "loss": 0.031, "step": 3799 }, { - "epoch": 1.6877637130801688, - "grad_norm": 0.6022176716427052, - "learning_rate": 7.160556947072823e-06, - "loss": 0.0457, + "epoch": 3.374777975133215, + "grad_norm": 0.3422224581805127, + "learning_rate": 7.261528872898421e-07, + "loss": 0.0218, "step": 3800 }, { - "epoch": 1.6882078614257163, - "grad_norm": 0.4342556651872498, - "learning_rate": 7.158808573055205e-06, - "loss": 0.0406, + "epoch": 3.375666074600355, + "grad_norm": 0.3703462348547607, + "learning_rate": 7.241426842231697e-07, + "loss": 0.0366, "step": 3801 }, { - "epoch": 1.6886520097712636, - "grad_norm": 0.571370547347438, - "learning_rate": 7.157059874531982e-06, - "loss": 0.039, + "epoch": 3.3765541740674956, + "grad_norm": 0.4809962800480066, + "learning_rate": 7.221350501897217e-07, + "loss": 0.0329, "step": 3802 }, { - "epoch": 1.689096158116811, - "grad_norm": 0.6752816385308873, - "learning_rate": 7.155310851766022e-06, - "loss": 0.0518, + "epoch": 3.377442273534636, + "grad_norm": 0.3213178428399724, + "learning_rate": 7.201299863957295e-07, + "loss": 0.0263, "step": 3803 }, { - "epoch": 1.6895403064623584, - "grad_norm": 0.4681922042318178, - "learning_rate": 7.153561505020228e-06, - "loss": 0.0487, + "epoch": 3.378330373001776, + "grad_norm": 0.33091908597318836, + "learning_rate": 7.181274940458804e-07, + "loss": 0.0277, "step": 3804 }, { - "epoch": 1.689984454807906, - "grad_norm": 0.4722352367968629, - "learning_rate": 7.151811834557556e-06, - "loss": 0.0506, + "epoch": 3.3792184724689167, + "grad_norm": 0.3270173800334416, + "learning_rate": 7.161275743433182e-07, + "loss": 0.0289, "step": 3805 }, { - "epoch": 1.6904286031534532, - "grad_norm": 0.46550029686119654, - "learning_rate": 7.150061840641012e-06, - "loss": 0.042, + "epoch": 3.380106571936057, + "grad_norm": 0.3530297617803138, + "learning_rate": 7.141302284896424e-07, + "loss": 0.0307, "step": 3806 }, { - "epoch": 1.6908727514990005, - "grad_norm": 0.46380378977431014, - "learning_rate": 7.148311523533652e-06, - "loss": 0.051, + "epoch": 3.3809946714031973, + "grad_norm": 0.4346169163369569, + "learning_rate": 7.121354576849027e-07, + "loss": 0.0328, "step": 3807 }, { - "epoch": 1.691316899844548, - "grad_norm": 0.40338431598829894, - "learning_rate": 7.146560883498575e-06, - "loss": 0.0358, + "epoch": 3.3818827708703374, + "grad_norm": 0.3463311142800992, + "learning_rate": 7.101432631276022e-07, + "loss": 0.0314, "step": 3808 }, { - "epoch": 1.6917610481900955, - "grad_norm": 0.5997068568205779, - "learning_rate": 7.144809920798934e-06, - "loss": 0.0549, + "epoch": 3.382770870337478, + "grad_norm": 0.3468160599220919, + "learning_rate": 7.081536460146999e-07, + "loss": 0.0303, "step": 3809 }, { - "epoch": 1.6922051965356428, - "grad_norm": 0.746314445865342, - "learning_rate": 7.143058635697928e-06, - "loss": 0.0572, + "epoch": 3.383658969804618, + "grad_norm": 0.2985138989315133, + "learning_rate": 7.061666075416002e-07, + "loss": 0.0244, "step": 3810 }, { - "epoch": 1.6926493448811903, - "grad_norm": 0.6126953828556693, - "learning_rate": 7.141307028458805e-06, - "loss": 0.0581, + "epoch": 3.3845470692717585, + "grad_norm": 0.35681644177401894, + "learning_rate": 7.041821489021639e-07, + "loss": 0.0314, "step": 3811 }, { - "epoch": 1.6930934932267379, - "grad_norm": 0.45183518843270887, - "learning_rate": 7.13955509934486e-06, - "loss": 0.0527, + "epoch": 3.3854351687388986, + "grad_norm": 0.3627834172517948, + "learning_rate": 7.022002712886989e-07, + "loss": 0.034, "step": 3812 }, { - "epoch": 1.6935376415722851, - "grad_norm": 0.40741342750683424, - "learning_rate": 7.137802848619442e-06, - "loss": 0.0448, + "epoch": 3.386323268206039, + "grad_norm": 0.3246212083977764, + "learning_rate": 7.002209758919609e-07, + "loss": 0.0334, "step": 3813 }, { - "epoch": 1.6939817899178324, - "grad_norm": 0.409139232347241, - "learning_rate": 7.136050276545937e-06, - "loss": 0.0381, + "epoch": 3.387211367673179, + "grad_norm": 0.37909799404114786, + "learning_rate": 6.982442639011589e-07, + "loss": 0.04, "step": 3814 }, { - "epoch": 1.69442593826338, - "grad_norm": 0.532572512466246, - "learning_rate": 7.134297383387794e-06, - "loss": 0.0541, + "epoch": 3.3880994671403197, + "grad_norm": 0.35868018390808487, + "learning_rate": 6.962701365039448e-07, + "loss": 0.0416, "step": 3815 }, { - "epoch": 1.6948700866089275, - "grad_norm": 0.38463725645897007, - "learning_rate": 7.1325441694084955e-06, - "loss": 0.0413, + "epoch": 3.38898756660746, + "grad_norm": 0.35761964905048566, + "learning_rate": 6.942985948864195e-07, + "loss": 0.0353, "step": 3816 }, { - "epoch": 1.6953142349544748, - "grad_norm": 1.088465520533885, - "learning_rate": 7.130790634871585e-06, - "loss": 0.052, + "epoch": 3.3898756660746003, + "grad_norm": 0.377788647672182, + "learning_rate": 6.92329640233132e-07, + "loss": 0.0301, "step": 3817 }, { - "epoch": 1.695758383300022, - "grad_norm": 0.629929584219581, - "learning_rate": 7.129036780040646e-06, - "loss": 0.0567, + "epoch": 3.390763765541741, + "grad_norm": 0.34355742105522385, + "learning_rate": 6.903632737270732e-07, + "loss": 0.0309, "step": 3818 }, { - "epoch": 1.6962025316455698, - "grad_norm": 0.63053218226347, - "learning_rate": 7.127282605179311e-06, - "loss": 0.0532, + "epoch": 3.391651865008881, + "grad_norm": 0.3606687716210664, + "learning_rate": 6.883994965496832e-07, + "loss": 0.0349, "step": 3819 }, { - "epoch": 1.696646679991117, - "grad_norm": 0.4433559445013171, - "learning_rate": 7.125528110551266e-06, - "loss": 0.0463, + "epoch": 3.3925399644760215, + "grad_norm": 0.37505313589347944, + "learning_rate": 6.864383098808453e-07, + "loss": 0.036, "step": 3820 }, { - "epoch": 1.6970908283366644, - "grad_norm": 0.42229460674035696, - "learning_rate": 7.12377329642024e-06, - "loss": 0.0397, + "epoch": 3.3934280639431615, + "grad_norm": 0.3730142697141216, + "learning_rate": 6.844797148988824e-07, + "loss": 0.0348, "step": 3821 }, { - "epoch": 1.6975349766822119, - "grad_norm": 0.49873647457140674, - "learning_rate": 7.122018163050011e-06, - "loss": 0.0545, + "epoch": 3.394316163410302, + "grad_norm": 0.37808648585134375, + "learning_rate": 6.825237127805645e-07, + "loss": 0.0322, "step": 3822 }, { - "epoch": 1.6979791250277594, - "grad_norm": 0.6060966583878594, - "learning_rate": 7.1202627107044035e-06, - "loss": 0.0457, + "epoch": 3.395204262877442, + "grad_norm": 0.45339569519833145, + "learning_rate": 6.805703047011048e-07, + "loss": 0.0379, "step": 3823 }, { - "epoch": 1.6984232733733067, - "grad_norm": 0.603057650967115, - "learning_rate": 7.118506939647295e-06, - "loss": 0.0464, + "epoch": 3.3960923623445827, + "grad_norm": 0.32829297562673115, + "learning_rate": 6.786194918341532e-07, + "loss": 0.0283, "step": 3824 }, { - "epoch": 1.698867421718854, - "grad_norm": 0.6194476035587843, - "learning_rate": 7.116750850142606e-06, - "loss": 0.0457, + "epoch": 3.3969804618117228, + "grad_norm": 0.4386721053287848, + "learning_rate": 6.766712753518029e-07, + "loss": 0.0406, "step": 3825 }, { - "epoch": 1.6993115700644015, - "grad_norm": 0.5096734124949287, - "learning_rate": 7.114994442454306e-06, - "loss": 0.045, + "epoch": 3.3978685612788633, + "grad_norm": 0.3074810407951773, + "learning_rate": 6.747256564245886e-07, + "loss": 0.0261, "step": 3826 }, { - "epoch": 1.699755718409949, - "grad_norm": 0.5365035460325245, - "learning_rate": 7.113237716846416e-06, - "loss": 0.0623, + "epoch": 3.3987566607460034, + "grad_norm": 0.3103220152117213, + "learning_rate": 6.727826362214806e-07, + "loss": 0.0257, "step": 3827 }, { - "epoch": 1.7001998667554963, - "grad_norm": 0.4418379177592074, - "learning_rate": 7.111480673582998e-06, - "loss": 0.0434, + "epoch": 3.399644760213144, + "grad_norm": 0.5819057664604828, + "learning_rate": 6.708422159098927e-07, + "loss": 0.0338, "step": 3828 }, { - "epoch": 1.7006440151010438, - "grad_norm": 0.5486324781269635, - "learning_rate": 7.1097233129281674e-06, - "loss": 0.0515, + "epoch": 3.4005328596802844, + "grad_norm": 0.3441348225626167, + "learning_rate": 6.68904396655673e-07, + "loss": 0.0316, "step": 3829 }, { - "epoch": 1.7010881634465913, - "grad_norm": 0.46832429223654365, - "learning_rate": 7.107965635146085e-06, - "loss": 0.036, + "epoch": 3.4014209591474245, + "grad_norm": 0.34261708764837284, + "learning_rate": 6.66969179623106e-07, + "loss": 0.0342, "step": 3830 }, { - "epoch": 1.7015323117921386, - "grad_norm": 0.4428576565727785, - "learning_rate": 7.106207640500959e-06, - "loss": 0.0431, + "epoch": 3.402309058614565, + "grad_norm": 0.3593789300176169, + "learning_rate": 6.650365659749158e-07, + "loss": 0.0333, "step": 3831 }, { - "epoch": 1.7019764601376859, - "grad_norm": 0.44260932357845195, - "learning_rate": 7.104449329257047e-06, - "loss": 0.0488, + "epoch": 3.403197158081705, + "grad_norm": 0.38578061504573385, + "learning_rate": 6.631065568722633e-07, + "loss": 0.0312, "step": 3832 }, { - "epoch": 1.7024206084832334, - "grad_norm": 0.6480909417028274, - "learning_rate": 7.10269070167865e-06, - "loss": 0.0538, + "epoch": 3.4040852575488456, + "grad_norm": 0.36694991342779637, + "learning_rate": 6.61179153474738e-07, + "loss": 0.0338, "step": 3833 }, { - "epoch": 1.702864756828781, - "grad_norm": 0.5907643196584621, - "learning_rate": 7.100931758030126e-06, - "loss": 0.0667, + "epoch": 3.4049733570159857, + "grad_norm": 0.3054111576522598, + "learning_rate": 6.592543569403709e-07, + "loss": 0.027, "step": 3834 }, { - "epoch": 1.7033089051743282, - "grad_norm": 0.530721353895675, - "learning_rate": 7.0991724985758694e-06, - "loss": 0.0355, + "epoch": 3.405861456483126, + "grad_norm": 0.5506357580003206, + "learning_rate": 6.573321684256239e-07, + "loss": 0.0374, "step": 3835 }, { - "epoch": 1.7037530535198755, - "grad_norm": 0.536328694373341, - "learning_rate": 7.0974129235803256e-06, - "loss": 0.0492, + "epoch": 3.4067495559502663, + "grad_norm": 0.39472711253687154, + "learning_rate": 6.554125890853913e-07, + "loss": 0.0275, "step": 3836 }, { - "epoch": 1.704197201865423, - "grad_norm": 0.47794780473543447, - "learning_rate": 7.095653033307992e-06, - "loss": 0.0394, + "epoch": 3.407637655417407, + "grad_norm": 0.35724093468200274, + "learning_rate": 6.534956200729997e-07, + "loss": 0.0248, "step": 3837 }, { - "epoch": 1.7046413502109705, - "grad_norm": 0.5788739767339988, - "learning_rate": 7.093892828023408e-06, - "loss": 0.0684, + "epoch": 3.408525754884547, + "grad_norm": 0.31993450972444076, + "learning_rate": 6.515812625402096e-07, + "loss": 0.0317, "step": 3838 }, { - "epoch": 1.7050854985565178, - "grad_norm": 0.4591656439375814, - "learning_rate": 7.092132307991163e-06, - "loss": 0.0426, + "epoch": 3.4094138543516874, + "grad_norm": 0.35658484753581615, + "learning_rate": 6.496695176372092e-07, + "loss": 0.0335, "step": 3839 }, { - "epoch": 1.7055296469020653, - "grad_norm": 0.4376969955300339, - "learning_rate": 7.090371473475894e-06, - "loss": 0.0424, + "epoch": 3.410301953818828, + "grad_norm": 0.38624270368575286, + "learning_rate": 6.477603865126214e-07, + "loss": 0.031, "step": 3840 }, { - "epoch": 1.7059737952476128, - "grad_norm": 0.43964430921337727, - "learning_rate": 7.088610324742282e-06, - "loss": 0.0348, + "epoch": 3.411190053285968, + "grad_norm": 0.38377401466630545, + "learning_rate": 6.458538703134937e-07, + "loss": 0.0353, "step": 3841 }, { - "epoch": 1.7064179435931601, - "grad_norm": 0.40903365943544095, - "learning_rate": 7.086848862055059e-06, - "loss": 0.0317, + "epoch": 3.412078152753108, + "grad_norm": 0.3571595356343655, + "learning_rate": 6.439499701853046e-07, + "loss": 0.0331, "step": 3842 }, { - "epoch": 1.7068620919387074, - "grad_norm": 0.5320572070600705, - "learning_rate": 7.085087085679003e-06, - "loss": 0.0493, + "epoch": 3.4129662522202486, + "grad_norm": 0.3937687453830249, + "learning_rate": 6.420486872719634e-07, + "loss": 0.0373, "step": 3843 }, { - "epoch": 1.707306240284255, - "grad_norm": 0.4182055882812196, - "learning_rate": 7.0833249958789396e-06, - "loss": 0.0413, + "epoch": 3.413854351687389, + "grad_norm": 0.3466251624699343, + "learning_rate": 6.401500227158026e-07, + "loss": 0.0314, "step": 3844 }, { - "epoch": 1.7077503886298024, - "grad_norm": 0.5174573980801822, - "learning_rate": 7.081562592919737e-06, - "loss": 0.0468, + "epoch": 3.4147424511545292, + "grad_norm": 0.33100852598698377, + "learning_rate": 6.382539776575825e-07, + "loss": 0.0328, "step": 3845 }, { - "epoch": 1.7081945369753497, - "grad_norm": 0.36708254507339966, - "learning_rate": 7.07979987706632e-06, - "loss": 0.0392, + "epoch": 3.4156305506216698, + "grad_norm": 0.4058975020108741, + "learning_rate": 6.363605532364931e-07, + "loss": 0.0331, "step": 3846 }, { - "epoch": 1.708638685320897, - "grad_norm": 0.42988860867301804, - "learning_rate": 7.078036848583651e-06, - "loss": 0.0438, + "epoch": 3.41651865008881, + "grad_norm": 0.37458646195238915, + "learning_rate": 6.344697505901448e-07, + "loss": 0.0362, "step": 3847 }, { - "epoch": 1.7090828336664448, - "grad_norm": 0.8624108371914289, - "learning_rate": 7.076273507736744e-06, - "loss": 0.0564, + "epoch": 3.4174067495559504, + "grad_norm": 0.3249928537684176, + "learning_rate": 6.325815708545774e-07, + "loss": 0.0286, "step": 3848 }, { - "epoch": 1.709526982011992, - "grad_norm": 0.3825294027975058, - "learning_rate": 7.074509854790659e-06, - "loss": 0.042, + "epoch": 3.4182948490230904, + "grad_norm": 0.40535681839925725, + "learning_rate": 6.30696015164251e-07, + "loss": 0.0368, "step": 3849 }, { - "epoch": 1.7099711303575393, - "grad_norm": 0.7845280836992656, - "learning_rate": 7.072745890010502e-06, - "loss": 0.0513, + "epoch": 3.419182948490231, + "grad_norm": 0.2808714507262059, + "learning_rate": 6.288130846520518e-07, + "loss": 0.0181, "step": 3850 }, { - "epoch": 1.7104152787030868, - "grad_norm": 0.511895797137735, - "learning_rate": 7.070981613661429e-06, - "loss": 0.042, + "epoch": 3.420071047957371, + "grad_norm": 0.36029572937951054, + "learning_rate": 6.269327804492869e-07, + "loss": 0.0323, "step": 3851 }, { - "epoch": 1.7108594270486344, - "grad_norm": 0.39864960740423, - "learning_rate": 7.06921702600864e-06, - "loss": 0.0435, + "epoch": 3.4209591474245116, + "grad_norm": 0.4272451003033633, + "learning_rate": 6.250551036856872e-07, + "loss": 0.03, "step": 3852 }, { - "epoch": 1.7113035753941817, - "grad_norm": 0.3728345248612773, - "learning_rate": 7.067452127317381e-06, - "loss": 0.0414, + "epoch": 3.4218472468916517, + "grad_norm": 0.42111729049541885, + "learning_rate": 6.231800554894029e-07, + "loss": 0.0321, "step": 3853 }, { - "epoch": 1.711747723739729, - "grad_norm": 0.4146176183523404, - "learning_rate": 7.065686917852948e-06, - "loss": 0.0394, + "epoch": 3.422735346358792, + "grad_norm": 0.5370252081560716, + "learning_rate": 6.213076369870064e-07, + "loss": 0.0397, "step": 3854 }, { - "epoch": 1.7121918720852765, - "grad_norm": 0.4550263965386846, - "learning_rate": 7.063921397880682e-06, - "loss": 0.0353, + "epoch": 3.4236234458259327, + "grad_norm": 0.5099818288497758, + "learning_rate": 6.194378493034902e-07, + "loss": 0.0408, "step": 3855 }, { - "epoch": 1.712636020430824, - "grad_norm": 0.6273234491307014, - "learning_rate": 7.062155567665969e-06, - "loss": 0.0336, + "epoch": 3.424511545293073, + "grad_norm": 0.3515689219016062, + "learning_rate": 6.175706935622655e-07, + "loss": 0.0293, "step": 3856 }, { - "epoch": 1.7130801687763713, - "grad_norm": 0.5739885387932523, - "learning_rate": 7.0603894274742445e-06, - "loss": 0.0463, + "epoch": 3.4253996447602133, + "grad_norm": 0.333320779183509, + "learning_rate": 6.157061708851608e-07, + "loss": 0.0309, "step": 3857 }, { - "epoch": 1.7135243171219188, - "grad_norm": 0.46562620789463843, - "learning_rate": 7.05862297757099e-06, - "loss": 0.0453, + "epoch": 3.4262877442273534, + "grad_norm": 0.31170693918969006, + "learning_rate": 6.138442823924262e-07, + "loss": 0.028, "step": 3858 }, { - "epoch": 1.7139684654674663, - "grad_norm": 0.5078681314633765, - "learning_rate": 7.056856218221731e-06, - "loss": 0.0472, + "epoch": 3.427175843694494, + "grad_norm": 0.4287412167074321, + "learning_rate": 6.119850292027257e-07, + "loss": 0.0345, "step": 3859 }, { - "epoch": 1.7144126138130136, - "grad_norm": 0.44379024742354006, - "learning_rate": 7.055089149692044e-06, - "loss": 0.0362, + "epoch": 3.428063943161634, + "grad_norm": 0.34100670754132295, + "learning_rate": 6.101284124331425e-07, + "loss": 0.0327, "step": 3860 }, { - "epoch": 1.7148567621585609, - "grad_norm": 0.6765029010883838, - "learning_rate": 7.053321772247546e-06, - "loss": 0.0605, + "epoch": 3.4289520426287745, + "grad_norm": 0.3658376617686606, + "learning_rate": 6.082744331991742e-07, + "loss": 0.0295, "step": 3861 }, { - "epoch": 1.7153009105041084, - "grad_norm": 0.5667840551717872, - "learning_rate": 7.051554086153907e-06, - "loss": 0.052, + "epoch": 3.4298401420959146, + "grad_norm": 0.3125205886058981, + "learning_rate": 6.064230926147324e-07, + "loss": 0.0233, "step": 3862 }, { - "epoch": 1.715745058849656, - "grad_norm": 0.4505940404872499, - "learning_rate": 7.049786091676838e-06, - "loss": 0.0437, + "epoch": 3.430728241563055, + "grad_norm": 0.3845938917922705, + "learning_rate": 6.045743917921465e-07, + "loss": 0.0307, "step": 3863 }, { - "epoch": 1.7161892071952032, - "grad_norm": 0.3740985912782501, - "learning_rate": 7.0480177890821e-06, - "loss": 0.034, + "epoch": 3.431616341030195, + "grad_norm": 0.3723638375926968, + "learning_rate": 6.027283318421606e-07, + "loss": 0.0425, "step": 3864 }, { - "epoch": 1.7166333555407505, - "grad_norm": 0.5770251755986612, - "learning_rate": 7.046249178635499e-06, - "loss": 0.0502, + "epoch": 3.4325044404973357, + "grad_norm": 0.39544546921658086, + "learning_rate": 6.008849138739253e-07, + "loss": 0.028, "step": 3865 }, { - "epoch": 1.717077503886298, - "grad_norm": 0.5237695494745064, - "learning_rate": 7.044480260602888e-06, - "loss": 0.0565, + "epoch": 3.4333925399644762, + "grad_norm": 0.3377847882668556, + "learning_rate": 5.990441389950103e-07, + "loss": 0.0216, "step": 3866 }, { - "epoch": 1.7175216522318455, - "grad_norm": 0.49695229712892924, - "learning_rate": 7.042711035250162e-06, - "loss": 0.0526, + "epoch": 3.4342806394316163, + "grad_norm": 0.34808826310593893, + "learning_rate": 5.972060083113973e-07, + "loss": 0.0265, "step": 3867 }, { - "epoch": 1.7179658005773928, - "grad_norm": 0.7710570644630359, - "learning_rate": 7.0409415028432685e-06, - "loss": 0.0353, + "epoch": 3.435168738898757, + "grad_norm": 0.36342666131894075, + "learning_rate": 5.953705229274758e-07, + "loss": 0.0272, "step": 3868 }, { - "epoch": 1.7184099489229403, - "grad_norm": 0.5569345846072523, - "learning_rate": 7.0391716636481976e-06, - "loss": 0.0463, + "epoch": 3.436056838365897, + "grad_norm": 0.3648828072127289, + "learning_rate": 5.935376839460466e-07, + "loss": 0.033, "step": 3869 }, { - "epoch": 1.7188540972684878, - "grad_norm": 0.46227610895953286, - "learning_rate": 7.037401517930986e-06, - "loss": 0.0518, + "epoch": 3.4369449378330375, + "grad_norm": 0.37588337787367926, + "learning_rate": 5.917074924683236e-07, + "loss": 0.0334, "step": 3870 }, { - "epoch": 1.719298245614035, - "grad_norm": 0.3855965081893482, - "learning_rate": 7.035631065957718e-06, - "loss": 0.0345, + "epoch": 3.4378330373001775, + "grad_norm": 0.41140078655464724, + "learning_rate": 5.898799495939256e-07, + "loss": 0.036, "step": 3871 }, { - "epoch": 1.7197423939595824, - "grad_norm": 0.4363418780122806, - "learning_rate": 7.03386030799452e-06, - "loss": 0.0492, + "epoch": 3.438721136767318, + "grad_norm": 0.3686579848929117, + "learning_rate": 5.880550564208848e-07, + "loss": 0.0367, "step": 3872 }, { - "epoch": 1.72018654230513, - "grad_norm": 0.4427385688478732, - "learning_rate": 7.03208924430757e-06, - "loss": 0.0463, + "epoch": 3.439609236234458, + "grad_norm": 0.3536856126935369, + "learning_rate": 5.862328140456375e-07, + "loss": 0.0274, "step": 3873 }, { - "epoch": 1.7206306906506774, - "grad_norm": 0.5897859219840801, - "learning_rate": 7.030317875163086e-06, - "loss": 0.0472, + "epoch": 3.4404973357015987, + "grad_norm": 0.36570242055566476, + "learning_rate": 5.844132235630273e-07, + "loss": 0.0239, "step": 3874 }, { - "epoch": 1.7210748389962247, - "grad_norm": 0.44136482686712686, - "learning_rate": 7.0285462008273365e-06, - "loss": 0.0339, + "epoch": 3.4413854351687387, + "grad_norm": 0.3286523043915312, + "learning_rate": 5.825962860663076e-07, + "loss": 0.0283, "step": 3875 }, { - "epoch": 1.721518987341772, - "grad_norm": 0.4341786806553805, - "learning_rate": 7.026774221566634e-06, - "loss": 0.0479, + "epoch": 3.4422735346358793, + "grad_norm": 0.4536081363912329, + "learning_rate": 5.807820026471383e-07, + "loss": 0.0346, "step": 3876 }, { - "epoch": 1.7219631356873195, - "grad_norm": 0.4859029892336248, - "learning_rate": 7.0250019376473375e-06, - "loss": 0.0481, + "epoch": 3.44316163410302, + "grad_norm": 0.3770356725855568, + "learning_rate": 5.789703743955782e-07, + "loss": 0.0336, "step": 3877 }, { - "epoch": 1.722407284032867, - "grad_norm": 0.554393064292429, - "learning_rate": 7.0232293493358515e-06, - "loss": 0.0423, + "epoch": 3.44404973357016, + "grad_norm": 0.38629127404201613, + "learning_rate": 5.771614024000966e-07, + "loss": 0.0297, "step": 3878 }, { - "epoch": 1.7228514323784143, - "grad_norm": 0.4954355417965089, - "learning_rate": 7.021456456898624e-06, - "loss": 0.0529, + "epoch": 3.4449378330373, + "grad_norm": 0.45554001871843214, + "learning_rate": 5.753550877475672e-07, + "loss": 0.0395, "step": 3879 }, { - "epoch": 1.7232955807239618, - "grad_norm": 0.39348052805418005, - "learning_rate": 7.019683260602155e-06, - "loss": 0.04, + "epoch": 3.4458259325044405, + "grad_norm": 0.3828994357826869, + "learning_rate": 5.735514315232643e-07, + "loss": 0.0259, "step": 3880 }, { - "epoch": 1.7237397290695093, - "grad_norm": 0.4876831116193254, - "learning_rate": 7.017909760712982e-06, - "loss": 0.0416, + "epoch": 3.446714031971581, + "grad_norm": 0.3400942987707126, + "learning_rate": 5.717504348108649e-07, + "loss": 0.0271, "step": 3881 }, { - "epoch": 1.7241838774150566, - "grad_norm": 0.5390273774730024, - "learning_rate": 7.016135957497693e-06, - "loss": 0.0685, + "epoch": 3.447602131438721, + "grad_norm": 0.40395498260738727, + "learning_rate": 5.699520986924506e-07, + "loss": 0.0285, "step": 3882 }, { - "epoch": 1.724628025760604, - "grad_norm": 0.553831689747292, - "learning_rate": 7.014361851222923e-06, - "loss": 0.0575, + "epoch": 3.4484902309058616, + "grad_norm": 0.4610290801217755, + "learning_rate": 5.681564242485011e-07, + "loss": 0.0361, "step": 3883 }, { - "epoch": 1.7250721741061514, - "grad_norm": 0.5211453167042258, - "learning_rate": 7.012587442155349e-06, - "loss": 0.0508, + "epoch": 3.4493783303730017, + "grad_norm": 0.3904955359718084, + "learning_rate": 5.663634125579015e-07, + "loss": 0.0374, "step": 3884 }, { - "epoch": 1.725516322451699, - "grad_norm": 0.5052516566700862, - "learning_rate": 7.010812730561691e-06, - "loss": 0.0411, + "epoch": 3.450266429840142, + "grad_norm": 0.4051125274418542, + "learning_rate": 5.64573064697933e-07, + "loss": 0.0314, "step": 3885 }, { - "epoch": 1.7259604707972462, - "grad_norm": 0.5073428394585604, - "learning_rate": 7.009037716708725e-06, - "loss": 0.0426, + "epoch": 3.4511545293072823, + "grad_norm": 0.41715274332674607, + "learning_rate": 5.627853817442764e-07, + "loss": 0.0387, "step": 3886 }, { - "epoch": 1.7264046191427935, - "grad_norm": 0.48746152511684293, - "learning_rate": 7.007262400863262e-06, - "loss": 0.043, + "epoch": 3.452042628774423, + "grad_norm": 0.41375043211529533, + "learning_rate": 5.610003647710155e-07, + "loss": 0.0358, "step": 3887 }, { - "epoch": 1.7268487674883413, - "grad_norm": 0.48800631902973207, - "learning_rate": 7.005486783292164e-06, - "loss": 0.0416, + "epoch": 3.452930728241563, + "grad_norm": 0.3849340825322553, + "learning_rate": 5.592180148506266e-07, + "loss": 0.0357, "step": 3888 }, { - "epoch": 1.7272929158338886, - "grad_norm": 0.5824704615616569, - "learning_rate": 7.003710864262333e-06, - "loss": 0.0485, + "epoch": 3.4538188277087034, + "grad_norm": 0.38366477763105916, + "learning_rate": 5.574383330539884e-07, + "loss": 0.033, "step": 3889 }, { - "epoch": 1.7277370641794358, - "grad_norm": 0.4562507795280803, - "learning_rate": 7.0019346440407225e-06, - "loss": 0.0392, + "epoch": 3.4547069271758435, + "grad_norm": 0.36351596629126487, + "learning_rate": 5.55661320450373e-07, + "loss": 0.0276, "step": 3890 }, { - "epoch": 1.7281812125249834, - "grad_norm": 0.5660300341993364, - "learning_rate": 7.000158122894329e-06, - "loss": 0.0443, + "epoch": 3.455595026642984, + "grad_norm": 0.32163822954491966, + "learning_rate": 5.53886978107453e-07, + "loss": 0.0243, "step": 3891 }, { - "epoch": 1.7286253608705309, - "grad_norm": 1.2237037452955262, - "learning_rate": 6.9983813010901925e-06, - "loss": 0.0446, + "epoch": 3.4564831261101245, + "grad_norm": 0.3333783406879094, + "learning_rate": 5.521153070912905e-07, + "loss": 0.0278, "step": 3892 }, { - "epoch": 1.7290695092160782, - "grad_norm": 0.3834611348340635, - "learning_rate": 6.996604178895398e-06, - "loss": 0.0321, + "epoch": 3.4573712255772646, + "grad_norm": 0.36992938438025735, + "learning_rate": 5.50346308466349e-07, + "loss": 0.0319, "step": 3893 }, { - "epoch": 1.7295136575616255, - "grad_norm": 0.500508115884937, - "learning_rate": 6.994826756577082e-06, - "loss": 0.0547, + "epoch": 3.458259325044405, + "grad_norm": 0.4150549761478952, + "learning_rate": 5.48579983295483e-07, + "loss": 0.0375, "step": 3894 }, { - "epoch": 1.729957805907173, - "grad_norm": 0.41016179375129264, - "learning_rate": 6.993049034402417e-06, - "loss": 0.0299, + "epoch": 3.459147424511545, + "grad_norm": 0.3905131262318993, + "learning_rate": 5.46816332639939e-07, + "loss": 0.0333, "step": 3895 }, { - "epoch": 1.7304019542527205, - "grad_norm": 0.5423253130879615, - "learning_rate": 6.991271012638626e-06, - "loss": 0.0616, + "epoch": 3.4600355239786857, + "grad_norm": 0.29509890571878317, + "learning_rate": 5.450553575593614e-07, + "loss": 0.0268, "step": 3896 }, { - "epoch": 1.7308461025982678, - "grad_norm": 0.5282418101161326, - "learning_rate": 6.9894926915529774e-06, - "loss": 0.0457, + "epoch": 3.460923623445826, + "grad_norm": 0.33757827339222873, + "learning_rate": 5.432970591117842e-07, + "loss": 0.0352, "step": 3897 }, { - "epoch": 1.7312902509438153, - "grad_norm": 0.4989861140225304, - "learning_rate": 6.987714071412781e-06, - "loss": 0.0408, + "epoch": 3.4618117229129663, + "grad_norm": 0.3982455299082649, + "learning_rate": 5.415414383536311e-07, + "loss": 0.0371, "step": 3898 }, { - "epoch": 1.7317343992893628, - "grad_norm": 0.48460144374543296, - "learning_rate": 6.985935152485392e-06, - "loss": 0.0508, + "epoch": 3.4626998223801064, + "grad_norm": 0.36234715051564076, + "learning_rate": 5.397884963397215e-07, + "loss": 0.0308, "step": 3899 }, { - "epoch": 1.73217854763491, - "grad_norm": 0.37082720341713776, - "learning_rate": 6.984155935038217e-06, - "loss": 0.0376, + "epoch": 3.463587921847247, + "grad_norm": 0.34629684712057873, + "learning_rate": 5.380382341232626e-07, + "loss": 0.0306, "step": 3900 }, { - "epoch": 1.7326226959804574, - "grad_norm": 0.6794555649441637, - "learning_rate": 6.9823764193387e-06, - "loss": 0.0568, + "epoch": 3.464476021314387, + "grad_norm": 0.3027187589773608, + "learning_rate": 5.362906527558525e-07, + "loss": 0.0312, "step": 3901 }, { - "epoch": 1.7330668443260049, - "grad_norm": 0.5551808325017253, - "learning_rate": 6.980596605654332e-06, - "loss": 0.0502, + "epoch": 3.4653641207815276, + "grad_norm": 0.3772026083756171, + "learning_rate": 5.345457532874782e-07, + "loss": 0.0347, "step": 3902 }, { - "epoch": 1.7335109926715524, - "grad_norm": 0.5306956016396489, - "learning_rate": 6.9788164942526495e-06, - "loss": 0.0432, + "epoch": 3.466252220248668, + "grad_norm": 0.34451385929900896, + "learning_rate": 5.328035367665141e-07, + "loss": 0.03, "step": 3903 }, { - "epoch": 1.7339551410170997, - "grad_norm": 0.6362458412949452, - "learning_rate": 6.977036085401234e-06, - "loss": 0.0486, + "epoch": 3.467140319715808, + "grad_norm": 0.33209481947448777, + "learning_rate": 5.310640042397242e-07, + "loss": 0.0267, "step": 3904 }, { - "epoch": 1.734399289362647, - "grad_norm": 0.29485239085618, - "learning_rate": 6.9752553793677105e-06, - "loss": 0.0245, + "epoch": 3.4680284191829482, + "grad_norm": 0.4281994897154462, + "learning_rate": 5.293271567522629e-07, + "loss": 0.0327, "step": 3905 }, { - "epoch": 1.7348434377081945, - "grad_norm": 0.5487150575741024, - "learning_rate": 6.9734743764197485e-06, - "loss": 0.0463, + "epoch": 3.4689165186500888, + "grad_norm": 0.36843999056279964, + "learning_rate": 5.275929953476627e-07, + "loss": 0.0301, "step": 3906 }, { - "epoch": 1.735287586053742, - "grad_norm": 0.7871819167162935, - "learning_rate": 6.9716930768250655e-06, - "loss": 0.0524, + "epoch": 3.4698046181172293, + "grad_norm": 0.332406773470646, + "learning_rate": 5.258615210678508e-07, + "loss": 0.0285, "step": 3907 }, { - "epoch": 1.7357317343992893, - "grad_norm": 0.5219429848305476, - "learning_rate": 6.9699114808514215e-06, - "loss": 0.0549, + "epoch": 3.4706927175843694, + "grad_norm": 0.3390248706610025, + "learning_rate": 5.241327349531367e-07, + "loss": 0.0333, "step": 3908 }, { - "epoch": 1.7361758827448368, - "grad_norm": 0.4635849498371789, - "learning_rate": 6.968129588766617e-06, - "loss": 0.0604, + "epoch": 3.47158081705151, + "grad_norm": 0.4752550168976335, + "learning_rate": 5.224066380422143e-07, + "loss": 0.0388, "step": 3909 }, { - "epoch": 1.7366200310903843, - "grad_norm": 0.43616273854041554, - "learning_rate": 6.966347400838502e-06, - "loss": 0.0373, + "epoch": 3.47246891651865, + "grad_norm": 0.3984019141090526, + "learning_rate": 5.206832313721611e-07, + "loss": 0.0317, "step": 3910 }, { - "epoch": 1.7370641794359316, - "grad_norm": 0.593451901083156, - "learning_rate": 6.964564917334973e-06, - "loss": 0.0527, + "epoch": 3.4733570159857905, + "grad_norm": 0.3447408401617846, + "learning_rate": 5.189625159784411e-07, + "loss": 0.0233, "step": 3911 }, { - "epoch": 1.737508327781479, - "grad_norm": 0.6448036909846457, - "learning_rate": 6.962782138523963e-06, - "loss": 0.048, + "epoch": 3.4742451154529306, + "grad_norm": 0.30433019089185304, + "learning_rate": 5.172444928948983e-07, + "loss": 0.0313, "step": 3912 }, { - "epoch": 1.7379524761270264, - "grad_norm": 0.6507383892986597, - "learning_rate": 6.960999064673455e-06, - "loss": 0.0405, + "epoch": 3.475133214920071, + "grad_norm": 0.398223469497285, + "learning_rate": 5.155291631537618e-07, + "loss": 0.0288, "step": 3913 }, { - "epoch": 1.738396624472574, - "grad_norm": 0.3700619002880566, - "learning_rate": 6.959215696051478e-06, - "loss": 0.0375, + "epoch": 3.476021314387211, + "grad_norm": 0.3324608022558975, + "learning_rate": 5.1381652778564e-07, + "loss": 0.0338, "step": 3914 }, { - "epoch": 1.7388407728181212, - "grad_norm": 0.5636095366670266, - "learning_rate": 6.957432032926099e-06, - "loss": 0.0473, + "epoch": 3.4769094138543517, + "grad_norm": 0.3742206764308991, + "learning_rate": 5.121065878195237e-07, + "loss": 0.0253, "step": 3915 }, { - "epoch": 1.7392849211636685, - "grad_norm": 0.31284491700710365, - "learning_rate": 6.955648075565435e-06, - "loss": 0.0262, + "epoch": 3.477797513321492, + "grad_norm": 0.36840244472830547, + "learning_rate": 5.103993442827832e-07, + "loss": 0.037, "step": 3916 }, { - "epoch": 1.7397290695092162, - "grad_norm": 0.88892656741503, - "learning_rate": 6.953863824237644e-06, - "loss": 0.0439, + "epoch": 3.4786856127886323, + "grad_norm": 0.2886020552610924, + "learning_rate": 5.08694798201173e-07, + "loss": 0.0257, "step": 3917 }, { - "epoch": 1.7401732178547635, - "grad_norm": 0.3940650039553802, - "learning_rate": 6.952079279210931e-06, - "loss": 0.0336, + "epoch": 3.479573712255773, + "grad_norm": 0.4458380464085219, + "learning_rate": 5.069929505988192e-07, + "loss": 0.0367, "step": 3918 }, { - "epoch": 1.7406173662003108, - "grad_norm": 0.43415099426990134, - "learning_rate": 6.950294440753542e-06, - "loss": 0.0372, + "epoch": 3.480461811722913, + "grad_norm": 0.4297763864054579, + "learning_rate": 5.052938024982328e-07, + "loss": 0.0366, "step": 3919 }, { - "epoch": 1.7410615145458583, - "grad_norm": 0.43498781141215753, - "learning_rate": 6.948509309133769e-06, - "loss": 0.0471, + "epoch": 3.4813499111900534, + "grad_norm": 0.2868224853190072, + "learning_rate": 5.035973549203021e-07, + "loss": 0.0231, "step": 3920 }, { - "epoch": 1.7415056628914058, - "grad_norm": 0.3673155534955715, - "learning_rate": 6.9467238846199465e-06, - "loss": 0.0329, + "epoch": 3.4822380106571935, + "grad_norm": 0.4033760364862946, + "learning_rate": 5.019036088842905e-07, + "loss": 0.033, "step": 3921 }, { - "epoch": 1.7419498112369531, - "grad_norm": 0.3522859182784973, - "learning_rate": 6.944938167480456e-06, - "loss": 0.0337, + "epoch": 3.483126110124334, + "grad_norm": 0.3599477556106266, + "learning_rate": 5.002125654078388e-07, + "loss": 0.0318, "step": 3922 }, { - "epoch": 1.7423939595825004, - "grad_norm": 0.40635886553673484, - "learning_rate": 6.943152157983719e-06, - "loss": 0.0429, + "epoch": 3.484014209591474, + "grad_norm": 0.36309612247904965, + "learning_rate": 4.985242255069661e-07, + "loss": 0.0293, "step": 3923 }, { - "epoch": 1.742838107928048, - "grad_norm": 0.8451883765559832, - "learning_rate": 6.941365856398205e-06, - "loss": 0.0668, + "epoch": 3.4849023090586146, + "grad_norm": 0.3850741748039999, + "learning_rate": 4.968385901960648e-07, + "loss": 0.0341, "step": 3924 }, { - "epoch": 1.7432822562735955, - "grad_norm": 0.5600822140198348, - "learning_rate": 6.939579262992426e-06, - "loss": 0.0451, + "epoch": 3.4857904085257547, + "grad_norm": 0.3885361747137568, + "learning_rate": 4.951556604879049e-07, + "loss": 0.0308, "step": 3925 }, { - "epoch": 1.7437264046191427, - "grad_norm": 1.2545344075254676, - "learning_rate": 6.937792378034936e-06, - "loss": 0.0743, + "epoch": 3.4866785079928952, + "grad_norm": 0.3632966053399173, + "learning_rate": 4.934754373936274e-07, + "loss": 0.0304, "step": 3926 }, { - "epoch": 1.7441705529646903, - "grad_norm": 1.1071420667016871, - "learning_rate": 6.936005201794331e-06, - "loss": 0.073, + "epoch": 3.4875666074600353, + "grad_norm": 0.3930929590615878, + "learning_rate": 4.917979219227487e-07, + "loss": 0.0384, "step": 3927 }, { - "epoch": 1.7446147013102378, - "grad_norm": 0.4227881516723295, - "learning_rate": 6.93421773453926e-06, - "loss": 0.043, + "epoch": 3.488454706927176, + "grad_norm": 0.3896699517381978, + "learning_rate": 4.901231150831609e-07, + "loss": 0.035, "step": 3928 }, { - "epoch": 1.745058849655785, - "grad_norm": 0.46137549848480547, - "learning_rate": 6.932429976538407e-06, - "loss": 0.0413, + "epoch": 3.4893428063943164, + "grad_norm": 0.47188392203720475, + "learning_rate": 4.884510178811242e-07, + "loss": 0.04, "step": 3929 }, { - "epoch": 1.7455029980013324, - "grad_norm": 0.5679012650124624, - "learning_rate": 6.930641928060501e-06, - "loss": 0.0331, + "epoch": 3.4902309058614565, + "grad_norm": 0.44499118386396463, + "learning_rate": 4.867816313212731e-07, + "loss": 0.0353, "step": 3930 }, { - "epoch": 1.7459471463468799, - "grad_norm": 0.5321986376581693, - "learning_rate": 6.928853589374318e-06, - "loss": 0.0467, + "epoch": 3.491119005328597, + "grad_norm": 0.2983175269243176, + "learning_rate": 4.851149564066143e-07, + "loss": 0.025, "step": 3931 }, { - "epoch": 1.7463912946924274, - "grad_norm": 0.6219676953962676, - "learning_rate": 6.927064960748675e-06, - "loss": 0.0479, + "epoch": 3.492007104795737, + "grad_norm": 0.44784121725297044, + "learning_rate": 4.834509941385246e-07, + "loss": 0.0348, "step": 3932 }, { - "epoch": 1.7468354430379747, - "grad_norm": 0.5001191355293207, - "learning_rate": 6.925276042452433e-06, - "loss": 0.0535, + "epoch": 3.4928952042628776, + "grad_norm": 0.38652222379864165, + "learning_rate": 4.817897455167503e-07, + "loss": 0.0337, "step": 3933 }, { - "epoch": 1.747279591383522, - "grad_norm": 0.4563948076269455, - "learning_rate": 6.923486834754498e-06, - "loss": 0.0427, + "epoch": 3.4937833037300177, + "grad_norm": 0.37253923439096487, + "learning_rate": 4.801312115394064e-07, + "loss": 0.0321, "step": 3934 }, { - "epoch": 1.7477237397290695, - "grad_norm": 0.6974643048935755, - "learning_rate": 6.9216973379238175e-06, - "loss": 0.0485, + "epoch": 3.494671403197158, + "grad_norm": 0.30324285721890126, + "learning_rate": 4.784753932029806e-07, + "loss": 0.0306, "step": 3935 }, { - "epoch": 1.748167888074617, - "grad_norm": 0.9428504067985796, - "learning_rate": 6.9199075522293815e-06, - "loss": 0.0612, + "epoch": 3.4955595026642983, + "grad_norm": 0.3492390703031562, + "learning_rate": 4.7682229150232405e-07, + "loss": 0.0314, "step": 3936 }, { - "epoch": 1.7486120364201643, - "grad_norm": 0.8097896480889225, - "learning_rate": 6.918117477940227e-06, - "loss": 0.0467, + "epoch": 3.496447602131439, + "grad_norm": 0.33696592266827297, + "learning_rate": 4.751719074306604e-07, + "loss": 0.0289, "step": 3937 }, { - "epoch": 1.7490561847657118, - "grad_norm": 0.5120773769465646, - "learning_rate": 6.916327115325434e-06, - "loss": 0.0554, + "epoch": 3.497335701598579, + "grad_norm": 0.3262949590640255, + "learning_rate": 4.7352424197957767e-07, + "loss": 0.0282, "step": 3938 }, { - "epoch": 1.7495003331112593, - "grad_norm": 0.49704552104919647, - "learning_rate": 6.914536464654123e-06, - "loss": 0.0429, + "epoch": 3.4982238010657194, + "grad_norm": 0.33102629789968224, + "learning_rate": 4.718792961390295e-07, + "loss": 0.0269, "step": 3939 }, { - "epoch": 1.7499444814568066, - "grad_norm": 0.4966530008001101, - "learning_rate": 6.912745526195457e-06, - "loss": 0.0416, + "epoch": 3.49911190053286, + "grad_norm": 0.39103829467973583, + "learning_rate": 4.7023707089733915e-07, + "loss": 0.0291, "step": 3940 }, { - "epoch": 1.7503886298023539, - "grad_norm": 0.6069162585490371, - "learning_rate": 6.910954300218648e-06, - "loss": 0.0407, + "epoch": 3.5, + "grad_norm": 0.3244603190489018, + "learning_rate": 4.6859756724119297e-07, + "loss": 0.0289, "step": 3941 }, { - "epoch": 1.7508327781479014, - "grad_norm": 0.5065891450556588, - "learning_rate": 6.9091627869929456e-06, - "loss": 0.0522, + "epoch": 3.50088809946714, + "grad_norm": 0.4009753789908271, + "learning_rate": 4.669607861556402e-07, + "loss": 0.0418, "step": 3942 }, { - "epoch": 1.751276926493449, - "grad_norm": 0.5821716098410316, - "learning_rate": 6.907370986787647e-06, - "loss": 0.0588, + "epoch": 3.5017761989342806, + "grad_norm": 0.44225750663258634, + "learning_rate": 4.653267286240998e-07, + "loss": 0.0285, "step": 3943 }, { - "epoch": 1.7517210748389962, - "grad_norm": 0.4854527206747191, - "learning_rate": 6.905578899872085e-06, - "loss": 0.0347, + "epoch": 3.502664298401421, + "grad_norm": 0.36099165123017973, + "learning_rate": 4.6369539562834797e-07, + "loss": 0.0278, "step": 3944 }, { - "epoch": 1.7521652231845435, - "grad_norm": 0.6099586438595753, - "learning_rate": 6.903786526515648e-06, - "loss": 0.057, + "epoch": 3.503552397868561, + "grad_norm": 0.37669963389260863, + "learning_rate": 4.6206678814852855e-07, + "loss": 0.0311, "step": 3945 }, { - "epoch": 1.752609371530091, - "grad_norm": 0.39621484939213947, - "learning_rate": 6.901993866987755e-06, - "loss": 0.036, + "epoch": 3.5044404973357017, + "grad_norm": 0.3653911199451643, + "learning_rate": 4.6044090716314825e-07, + "loss": 0.0338, "step": 3946 }, { - "epoch": 1.7530535198756385, - "grad_norm": 0.5825955749635414, - "learning_rate": 6.9002009215578736e-06, - "loss": 0.0371, + "epoch": 3.505328596802842, + "grad_norm": 0.39058917353430855, + "learning_rate": 4.5881775364906957e-07, + "loss": 0.0344, "step": 3947 }, { - "epoch": 1.7534976682211858, - "grad_norm": 0.5518034617506287, - "learning_rate": 6.898407690495516e-06, - "loss": 0.0535, + "epoch": 3.5062166962699823, + "grad_norm": 0.3922029872761941, + "learning_rate": 4.571973285815223e-07, + "loss": 0.0278, "step": 3948 }, { - "epoch": 1.7539418165667333, - "grad_norm": 0.7169225712774631, - "learning_rate": 6.896614174070234e-06, - "loss": 0.065, + "epoch": 3.5071047957371224, + "grad_norm": 0.4153649844627601, + "learning_rate": 4.555796329340967e-07, + "loss": 0.0323, "step": 3949 }, { - "epoch": 1.7543859649122808, - "grad_norm": 0.487246075580528, - "learning_rate": 6.894820372551624e-06, - "loss": 0.0425, + "epoch": 3.507992895204263, + "grad_norm": 0.3922422286850339, + "learning_rate": 4.539646676787396e-07, + "loss": 0.0331, "step": 3950 }, { - "epoch": 1.7548301132578281, - "grad_norm": 0.5762490980340829, - "learning_rate": 6.893026286209324e-06, - "loss": 0.0579, + "epoch": 3.5088809946714035, + "grad_norm": 0.3714168950930685, + "learning_rate": 4.523524337857582e-07, + "loss": 0.0314, "step": 3951 }, { - "epoch": 1.7552742616033754, - "grad_norm": 0.40568305353842643, - "learning_rate": 6.891231915313017e-06, - "loss": 0.0447, + "epoch": 3.5097690941385435, + "grad_norm": 0.4241931810501754, + "learning_rate": 4.507429322238221e-07, + "loss": 0.0325, "step": 3952 }, { - "epoch": 1.755718409948923, - "grad_norm": 1.1277438855550295, - "learning_rate": 6.889437260132426e-06, - "loss": 0.0646, + "epoch": 3.5106571936056836, + "grad_norm": 0.3622181214141427, + "learning_rate": 4.4913616395995505e-07, + "loss": 0.0301, "step": 3953 }, { - "epoch": 1.7561625582944704, - "grad_norm": 1.0348468689646162, - "learning_rate": 6.887642320937319e-06, - "loss": 0.0477, + "epoch": 3.511545293072824, + "grad_norm": 0.4654443865234001, + "learning_rate": 4.4753212995954145e-07, + "loss": 0.039, "step": 3954 }, { - "epoch": 1.7566067066400177, - "grad_norm": 0.4516096937991291, - "learning_rate": 6.885847097997507e-06, - "loss": 0.0441, + "epoch": 3.5124333925399647, + "grad_norm": 0.3334121843075633, + "learning_rate": 4.4593083118632143e-07, + "loss": 0.0341, "step": 3955 }, { - "epoch": 1.757050854985565, - "grad_norm": 0.5215834296906894, - "learning_rate": 6.884051591582838e-06, - "loss": 0.0413, + "epoch": 3.5133214920071048, + "grad_norm": 0.405683466103701, + "learning_rate": 4.4433226860239043e-07, + "loss": 0.0327, "step": 3956 }, { - "epoch": 1.7574950033311127, - "grad_norm": 0.4942925713506997, - "learning_rate": 6.882255801963215e-06, - "loss": 0.0313, + "epoch": 3.5142095914742453, + "grad_norm": 0.43076733535358774, + "learning_rate": 4.42736443168203e-07, + "loss": 0.0382, "step": 3957 }, { - "epoch": 1.75793915167666, - "grad_norm": 0.5511920080731652, - "learning_rate": 6.8804597294085676e-06, - "loss": 0.0468, + "epoch": 3.5150976909413854, + "grad_norm": 0.35411383060158536, + "learning_rate": 4.4114335584256986e-07, + "loss": 0.0298, "step": 3958 }, { - "epoch": 1.7583833000222073, - "grad_norm": 0.8005773203813454, - "learning_rate": 6.87866337418888e-06, - "loss": 0.0723, + "epoch": 3.515985790408526, + "grad_norm": 0.35235900431486816, + "learning_rate": 4.3955300758264987e-07, + "loss": 0.0308, "step": 3959 }, { - "epoch": 1.7588274483677548, - "grad_norm": 0.6193224773136193, - "learning_rate": 6.876866736574175e-06, - "loss": 0.0458, + "epoch": 3.516873889875666, + "grad_norm": 0.3366112372387163, + "learning_rate": 4.3796539934396307e-07, + "loss": 0.028, "step": 3960 }, { - "epoch": 1.7592715967133024, - "grad_norm": 0.7463153621358136, - "learning_rate": 6.875069816834517e-06, - "loss": 0.0452, + "epoch": 3.5177619893428065, + "grad_norm": 0.31567997834720296, + "learning_rate": 4.363805320803821e-07, + "loss": 0.0318, "step": 3961 }, { - "epoch": 1.7597157450588496, - "grad_norm": 0.6653689016758167, - "learning_rate": 6.873272615240013e-06, - "loss": 0.0379, + "epoch": 3.5186500888099466, + "grad_norm": 0.4634528280904752, + "learning_rate": 4.3479840674413077e-07, + "loss": 0.03, "step": 3962 }, { - "epoch": 1.760159893404397, - "grad_norm": 0.46506318034990934, - "learning_rate": 6.871475132060814e-06, - "loss": 0.0405, + "epoch": 3.519538188277087, + "grad_norm": 0.4008635228535786, + "learning_rate": 4.3321902428578475e-07, + "loss": 0.031, "step": 3963 }, { - "epoch": 1.7606040417499444, - "grad_norm": 0.4803172632231074, - "learning_rate": 6.8696773675671125e-06, - "loss": 0.0494, + "epoch": 3.520426287744227, + "grad_norm": 0.42058392430256764, + "learning_rate": 4.316423856542751e-07, + "loss": 0.0337, "step": 3964 }, { - "epoch": 1.761048190095492, - "grad_norm": 0.5683228177486831, - "learning_rate": 6.8678793220291406e-06, - "loss": 0.0474, + "epoch": 3.5213143872113677, + "grad_norm": 0.3750614354675925, + "learning_rate": 4.3006849179688115e-07, + "loss": 0.0333, "step": 3965 }, { - "epoch": 1.7614923384410393, - "grad_norm": 0.6405917034428388, - "learning_rate": 6.866080995717179e-06, - "loss": 0.0444, + "epoch": 3.522202486678508, + "grad_norm": 0.4078111616286517, + "learning_rate": 4.28497343659236e-07, + "loss": 0.0316, "step": 3966 }, { - "epoch": 1.7619364867865868, - "grad_norm": 0.469723345316827, - "learning_rate": 6.864282388901544e-06, - "loss": 0.0456, + "epoch": 3.5230905861456483, + "grad_norm": 0.323934139235122, + "learning_rate": 4.269289421853212e-07, + "loss": 0.0288, "step": 3967 }, { - "epoch": 1.7623806351321343, - "grad_norm": 0.3596729988674669, - "learning_rate": 6.862483501852597e-06, - "loss": 0.0273, + "epoch": 3.5239786856127884, + "grad_norm": 0.36104854254096586, + "learning_rate": 4.253632883174663e-07, + "loss": 0.0312, "step": 3968 }, { - "epoch": 1.7628247834776816, - "grad_norm": 0.4095733039990315, - "learning_rate": 6.8606843348407416e-06, - "loss": 0.0423, + "epoch": 3.524866785079929, + "grad_norm": 0.3784468723502184, + "learning_rate": 4.2380038299635494e-07, + "loss": 0.0312, "step": 3969 }, { - "epoch": 1.7632689318232289, - "grad_norm": 0.5817103130183384, - "learning_rate": 6.858884888136423e-06, - "loss": 0.061, + "epoch": 3.5257548845470694, + "grad_norm": 0.3332069586993085, + "learning_rate": 4.2224022716101544e-07, + "loss": 0.0273, "step": 3970 }, { - "epoch": 1.7637130801687764, - "grad_norm": 0.5515400819468078, - "learning_rate": 6.85708516201013e-06, - "loss": 0.0493, + "epoch": 3.5266429840142095, + "grad_norm": 0.3065169726343551, + "learning_rate": 4.2068282174882344e-07, + "loss": 0.0283, "step": 3971 }, { - "epoch": 1.7641572285143239, - "grad_norm": 0.4010509115861224, - "learning_rate": 6.855285156732389e-06, - "loss": 0.0384, + "epoch": 3.52753108348135, + "grad_norm": 0.37434731360945855, + "learning_rate": 4.191281676955061e-07, + "loss": 0.0333, "step": 3972 }, { - "epoch": 1.7646013768598712, - "grad_norm": 0.48651107038259084, - "learning_rate": 6.853484872573773e-06, - "loss": 0.0417, + "epoch": 3.52841918294849, + "grad_norm": 0.39887429182412376, + "learning_rate": 4.1757626593513514e-07, + "loss": 0.0295, "step": 3973 }, { - "epoch": 1.7650455252054185, - "grad_norm": 0.6804998963023164, - "learning_rate": 6.851684309804898e-06, - "loss": 0.0511, + "epoch": 3.5293072824156306, + "grad_norm": 0.35927540568607647, + "learning_rate": 4.160271174001285e-07, + "loss": 0.0294, "step": 3974 }, { - "epoch": 1.765489673550966, - "grad_norm": 0.5378576658955843, - "learning_rate": 6.849883468696414e-06, - "loss": 0.0466, + "epoch": 3.5301953818827707, + "grad_norm": 0.3264131244118038, + "learning_rate": 4.144807230212483e-07, + "loss": 0.0285, "step": 3975 }, { - "epoch": 1.7659338218965135, - "grad_norm": 0.6080913375351253, - "learning_rate": 6.848082349519021e-06, - "loss": 0.0504, + "epoch": 3.5310834813499112, + "grad_norm": 0.33956545997361354, + "learning_rate": 4.12937083727607e-07, + "loss": 0.0266, "step": 3976 }, { - "epoch": 1.7663779702420608, - "grad_norm": 0.4317853459587004, - "learning_rate": 6.846280952543459e-06, - "loss": 0.0419, + "epoch": 3.5319715808170518, + "grad_norm": 0.334835037466751, + "learning_rate": 4.113962004466554e-07, + "loss": 0.0301, "step": 3977 }, { - "epoch": 1.7668221185876083, - "grad_norm": 0.44881873306480574, - "learning_rate": 6.844479278040506e-06, - "loss": 0.0373, + "epoch": 3.532859680284192, + "grad_norm": 0.3407365741387528, + "learning_rate": 4.0985807410419463e-07, + "loss": 0.0324, "step": 3978 }, { - "epoch": 1.7672662669331558, - "grad_norm": 0.38885972043532946, - "learning_rate": 6.842677326280984e-06, - "loss": 0.0396, + "epoch": 3.533747779751332, + "grad_norm": 0.377144850050285, + "learning_rate": 4.0832270562436436e-07, + "loss": 0.0282, "step": 3979 }, { - "epoch": 1.767710415278703, - "grad_norm": 0.4524735899424509, - "learning_rate": 6.840875097535761e-06, - "loss": 0.0448, + "epoch": 3.5346358792184724, + "grad_norm": 0.33712492550672146, + "learning_rate": 4.0679009592964834e-07, + "loss": 0.0279, "step": 3980 }, { - "epoch": 1.7681545636242504, - "grad_norm": 0.455210687429983, - "learning_rate": 6.8390725920757374e-06, - "loss": 0.0382, + "epoch": 3.535523978685613, + "grad_norm": 0.9349739434990134, + "learning_rate": 4.052602459408761e-07, + "loss": 0.0375, "step": 3981 }, { - "epoch": 1.768598711969798, - "grad_norm": 0.398331712609469, - "learning_rate": 6.837269810171864e-06, - "loss": 0.0414, + "epoch": 3.536412078152753, + "grad_norm": 0.30334810360924563, + "learning_rate": 4.037331565772157e-07, + "loss": 0.0247, "step": 3982 }, { - "epoch": 1.7690428603153454, - "grad_norm": 0.37522143210177494, - "learning_rate": 6.835466752095129e-06, - "loss": 0.0386, + "epoch": 3.5373001776198936, + "grad_norm": 0.30281540391674105, + "learning_rate": 4.0220882875617594e-07, + "loss": 0.0308, "step": 3983 }, { - "epoch": 1.7694870086608927, - "grad_norm": 0.6174104941670406, - "learning_rate": 6.833663418116561e-06, - "loss": 0.0389, + "epoch": 3.5381882770870337, + "grad_norm": 0.31139441547970165, + "learning_rate": 4.0068726339360985e-07, + "loss": 0.027, "step": 3984 }, { - "epoch": 1.76993115700644, - "grad_norm": 0.6659454494298849, - "learning_rate": 6.831859808507233e-06, - "loss": 0.0447, + "epoch": 3.539076376554174, + "grad_norm": 0.3528748309527303, + "learning_rate": 3.991684614037078e-07, + "loss": 0.0273, "step": 3985 }, { - "epoch": 1.7703753053519877, - "grad_norm": 0.47498410446306544, - "learning_rate": 6.830055923538258e-06, - "loss": 0.0417, + "epoch": 3.5399644760213143, + "grad_norm": 0.37054804647531303, + "learning_rate": 3.9765242369900205e-07, + "loss": 0.0278, "step": 3986 }, { - "epoch": 1.770819453697535, - "grad_norm": 0.4396131477174768, - "learning_rate": 6.82825176348079e-06, - "loss": 0.0372, + "epoch": 3.540852575488455, + "grad_norm": 0.3897783120052963, + "learning_rate": 3.9613915119036175e-07, + "loss": 0.042, "step": 3987 }, { - "epoch": 1.7712636020430823, - "grad_norm": 0.576388987980822, - "learning_rate": 6.826447328606026e-06, - "loss": 0.0432, + "epoch": 3.5417406749555953, + "grad_norm": 0.4120397041206129, + "learning_rate": 3.946286447869957e-07, + "loss": 0.0428, "step": 3988 }, { - "epoch": 1.7717077503886298, - "grad_norm": 0.5115559440177404, - "learning_rate": 6.8246426191852025e-06, - "loss": 0.046, + "epoch": 3.5426287744227354, + "grad_norm": 0.3341949376027205, + "learning_rate": 3.9312090539645077e-07, + "loss": 0.0241, "step": 3989 }, { - "epoch": 1.7721518987341773, - "grad_norm": 0.3933392068642798, - "learning_rate": 6.822837635489597e-06, - "loss": 0.0424, + "epoch": 3.5435168738898755, + "grad_norm": 0.421389210832489, + "learning_rate": 3.9161593392461394e-07, + "loss": 0.0293, "step": 3990 }, { - "epoch": 1.7725960470797246, - "grad_norm": 0.48762159715898146, - "learning_rate": 6.821032377790533e-06, - "loss": 0.0428, + "epoch": 3.544404973357016, + "grad_norm": 0.3121689018226074, + "learning_rate": 3.901137312757025e-07, + "loss": 0.0305, "step": 3991 }, { - "epoch": 1.773040195425272, - "grad_norm": 0.5275360991096351, - "learning_rate": 6.819226846359366e-06, - "loss": 0.0437, + "epoch": 3.5452930728241565, + "grad_norm": 0.34591273554346214, + "learning_rate": 3.886142983522767e-07, + "loss": 0.0275, "step": 3992 }, { - "epoch": 1.7734843437708194, - "grad_norm": 0.5998959208861094, - "learning_rate": 6.817421041467501e-06, - "loss": 0.0493, + "epoch": 3.5461811722912966, + "grad_norm": 0.35138389068617915, + "learning_rate": 3.8711763605523035e-07, + "loss": 0.0344, "step": 3993 }, { - "epoch": 1.773928492116367, - "grad_norm": 0.558883606638067, - "learning_rate": 6.815614963386383e-06, - "loss": 0.0443, + "epoch": 3.5470692717584367, + "grad_norm": 0.33989097463707235, + "learning_rate": 3.856237452837919e-07, + "loss": 0.0296, "step": 3994 }, { - "epoch": 1.7743726404619142, - "grad_norm": 0.530805377008147, - "learning_rate": 6.813808612387493e-06, - "loss": 0.0493, + "epoch": 3.547957371225577, + "grad_norm": 0.3381630441049522, + "learning_rate": 3.841326269355244e-07, + "loss": 0.0226, "step": 3995 }, { - "epoch": 1.7748167888074617, - "grad_norm": 0.44577841042032773, - "learning_rate": 6.812001988742356e-06, - "loss": 0.0423, + "epoch": 3.5488454706927177, + "grad_norm": 0.34907906949366113, + "learning_rate": 3.8264428190632807e-07, + "loss": 0.0297, "step": 3996 }, { - "epoch": 1.7752609371530093, - "grad_norm": 0.4208112565964139, - "learning_rate": 6.81019509272254e-06, - "loss": 0.0383, + "epoch": 3.549733570159858, + "grad_norm": 0.3866613987831623, + "learning_rate": 3.81158711090433e-07, + "loss": 0.029, "step": 3997 }, { - "epoch": 1.7757050854985565, - "grad_norm": 0.5652594178574358, - "learning_rate": 6.808387924599653e-06, - "loss": 0.0469, + "epoch": 3.5506216696269983, + "grad_norm": 0.3960749087467944, + "learning_rate": 3.796759153804053e-07, + "loss": 0.0349, "step": 3998 }, { - "epoch": 1.7761492338441038, - "grad_norm": 0.47567339447317186, - "learning_rate": 6.806580484645342e-06, - "loss": 0.0536, + "epoch": 3.5515097690941384, + "grad_norm": 0.37080346096911626, + "learning_rate": 3.7819589566714223e-07, + "loss": 0.027, "step": 3999 }, { - "epoch": 1.7765933821896513, - "grad_norm": 0.5061651547732329, - "learning_rate": 6.804772773131294e-06, - "loss": 0.0477, + "epoch": 3.552397868561279, + "grad_norm": 0.35462359752587536, + "learning_rate": 3.7671865283987254e-07, + "loss": 0.0282, "step": 4000 }, { - "epoch": 1.7770375305351989, - "grad_norm": 0.3302628327205031, - "learning_rate": 6.80296479032924e-06, - "loss": 0.0287, + "epoch": 3.553285968028419, + "grad_norm": 0.34920229960648946, + "learning_rate": 3.7524418778615903e-07, + "loss": 0.0306, "step": 4001 }, { - "epoch": 1.7774816788807462, - "grad_norm": 0.5657117208796698, - "learning_rate": 6.801156536510953e-06, - "loss": 0.063, + "epoch": 3.5541740674955595, + "grad_norm": 0.381088153269292, + "learning_rate": 3.7377250139189526e-07, + "loss": 0.0335, "step": 4002 }, { - "epoch": 1.7779258272262934, - "grad_norm": 0.40290409532344185, - "learning_rate": 6.799348011948242e-06, - "loss": 0.0328, + "epoch": 3.5550621669627, + "grad_norm": 0.6854256037254168, + "learning_rate": 3.7230359454130115e-07, + "loss": 0.031, "step": 4003 }, { - "epoch": 1.778369975571841, - "grad_norm": 0.4303302869190356, - "learning_rate": 6.797539216912958e-06, - "loss": 0.0389, + "epoch": 3.55595026642984, + "grad_norm": 0.33517642609484855, + "learning_rate": 3.7083746811693134e-07, + "loss": 0.0298, "step": 4004 }, { - "epoch": 1.7788141239173885, - "grad_norm": 0.4915190826163001, - "learning_rate": 6.795730151676996e-06, - "loss": 0.0506, + "epoch": 3.55683836589698, + "grad_norm": 0.31742503207285194, + "learning_rate": 3.693741229996689e-07, + "loss": 0.0285, "step": 4005 }, { - "epoch": 1.7792582722629358, - "grad_norm": 0.4472370244553756, - "learning_rate": 6.793920816512287e-06, - "loss": 0.0479, + "epoch": 3.5577264653641207, + "grad_norm": 0.3882259802457257, + "learning_rate": 3.679135600687239e-07, + "loss": 0.0342, "step": 4006 }, { - "epoch": 1.7797024206084833, - "grad_norm": 0.46823865267198, - "learning_rate": 6.792111211690807e-06, - "loss": 0.04, + "epoch": 3.5586145648312613, + "grad_norm": 0.4027570793881262, + "learning_rate": 3.664557802016366e-07, + "loss": 0.0336, "step": 4007 }, { - "epoch": 1.7801465689540308, - "grad_norm": 0.713302627768962, - "learning_rate": 6.790301337484569e-06, - "loss": 0.0467, + "epoch": 3.5595026642984013, + "grad_norm": 0.3155717693854574, + "learning_rate": 3.6500078427427534e-07, + "loss": 0.0313, "step": 4008 }, { - "epoch": 1.780590717299578, - "grad_norm": 0.544988693204241, - "learning_rate": 6.788491194165629e-06, - "loss": 0.0597, + "epoch": 3.560390763765542, + "grad_norm": 0.3891831380287714, + "learning_rate": 3.6354857316083293e-07, + "loss": 0.0283, "step": 4009 }, { - "epoch": 1.7810348656451254, - "grad_norm": 0.5488358962167847, - "learning_rate": 6.786680782006079e-06, - "loss": 0.0454, + "epoch": 3.561278863232682, + "grad_norm": 0.32920527491640894, + "learning_rate": 3.620991477338337e-07, + "loss": 0.0256, "step": 4010 }, { - "epoch": 1.7814790139906729, - "grad_norm": 0.45588667933120275, - "learning_rate": 6.784870101278058e-06, - "loss": 0.0458, + "epoch": 3.5621669626998225, + "grad_norm": 0.42504686946340525, + "learning_rate": 3.606525088641244e-07, + "loss": 0.0351, "step": 4011 }, { - "epoch": 1.7819231623362204, - "grad_norm": 0.41134367023706303, - "learning_rate": 6.783059152253743e-06, - "loss": 0.0423, + "epoch": 3.5630550621669625, + "grad_norm": 0.361425081542175, + "learning_rate": 3.5920865742087807e-07, + "loss": 0.0323, "step": 4012 }, { - "epoch": 1.7823673106817677, - "grad_norm": 0.5379695606986187, - "learning_rate": 6.7812479352053465e-06, - "loss": 0.0434, + "epoch": 3.563943161634103, + "grad_norm": 0.3690993683842551, + "learning_rate": 3.577675942715958e-07, + "loss": 0.0354, "step": 4013 }, { - "epoch": 1.782811459027315, - "grad_norm": 0.390627468228112, - "learning_rate": 6.779436450405127e-06, - "loss": 0.0459, + "epoch": 3.5648312611012436, + "grad_norm": 0.4222093409356401, + "learning_rate": 3.5632932028209944e-07, + "loss": 0.0309, "step": 4014 }, { - "epoch": 1.7832556073728625, - "grad_norm": 0.7523391862616232, - "learning_rate": 6.7776246981253835e-06, - "loss": 0.0578, + "epoch": 3.5657193605683837, + "grad_norm": 0.363286725553558, + "learning_rate": 3.548938363165388e-07, + "loss": 0.035, "step": 4015 }, { - "epoch": 1.78369975571841, - "grad_norm": 0.4670417748350829, - "learning_rate": 6.775812678638449e-06, - "loss": 0.0391, + "epoch": 3.5666074600355238, + "grad_norm": 0.3116321285071317, + "learning_rate": 3.534611432373836e-07, + "loss": 0.0255, "step": 4016 }, { - "epoch": 1.7841439040639573, - "grad_norm": 0.6629585111671369, - "learning_rate": 6.7740003922167045e-06, - "loss": 0.0525, + "epoch": 3.5674955595026643, + "grad_norm": 0.32923152944008094, + "learning_rate": 3.5203124190543117e-07, + "loss": 0.0236, "step": 4017 }, { - "epoch": 1.7845880524095048, - "grad_norm": 0.5468251468649823, - "learning_rate": 6.7721878391325655e-06, - "loss": 0.048, + "epoch": 3.568383658969805, + "grad_norm": 0.38168496867714735, + "learning_rate": 3.506041331797966e-07, + "loss": 0.0367, "step": 4018 }, { - "epoch": 1.7850322007550523, - "grad_norm": 0.6407194911078166, - "learning_rate": 6.770375019658491e-06, - "loss": 0.0342, + "epoch": 3.569271758436945, + "grad_norm": 0.3777866499507286, + "learning_rate": 3.491798179179212e-07, + "loss": 0.0268, "step": 4019 }, { - "epoch": 1.7854763491005996, - "grad_norm": 0.3555576600475007, - "learning_rate": 6.7685619340669775e-06, - "loss": 0.0424, + "epoch": 3.5701598579040854, + "grad_norm": 0.493230161127167, + "learning_rate": 3.4775829697556607e-07, + "loss": 0.0359, "step": 4020 }, { - "epoch": 1.785920497446147, - "grad_norm": 0.5655823535701915, - "learning_rate": 6.766748582630561e-06, - "loss": 0.0498, + "epoch": 3.5710479573712255, + "grad_norm": 0.5076369573429343, + "learning_rate": 3.4633957120681294e-07, + "loss": 0.0263, "step": 4021 }, { - "epoch": 1.7863646457916944, - "grad_norm": 0.5446372824826297, - "learning_rate": 6.764934965621823e-06, - "loss": 0.0326, + "epoch": 3.571936056838366, + "grad_norm": 0.34645969509892166, + "learning_rate": 3.4492364146406555e-07, + "loss": 0.0271, "step": 4022 }, { - "epoch": 1.786808794137242, - "grad_norm": 0.7635864213363933, - "learning_rate": 6.763121083313378e-06, - "loss": 0.0469, + "epoch": 3.572824156305506, + "grad_norm": 0.36133779292090595, + "learning_rate": 3.435105085980467e-07, + "loss": 0.0319, "step": 4023 }, { - "epoch": 1.7872529424827892, - "grad_norm": 0.5831230436540045, - "learning_rate": 6.761306935977883e-06, - "loss": 0.0437, + "epoch": 3.5737122557726466, + "grad_norm": 0.4159037386682091, + "learning_rate": 3.4210017345779777e-07, + "loss": 0.0384, "step": 4024 }, { - "epoch": 1.7876970908283365, - "grad_norm": 0.4755466748257561, - "learning_rate": 6.759492523888036e-06, - "loss": 0.0349, + "epoch": 3.5746003552397867, + "grad_norm": 0.34398044780432635, + "learning_rate": 3.406926368906832e-07, + "loss": 0.0265, "step": 4025 }, { - "epoch": 1.7881412391738842, - "grad_norm": 0.3810710893901648, - "learning_rate": 6.757677847316576e-06, - "loss": 0.0304, + "epoch": 3.575488454706927, + "grad_norm": 0.3926123342266739, + "learning_rate": 3.392878997423804e-07, + "loss": 0.0401, "step": 4026 }, { - "epoch": 1.7885853875194315, - "grad_norm": 0.5635909990676548, - "learning_rate": 6.755862906536276e-06, - "loss": 0.0509, + "epoch": 3.5763765541740673, + "grad_norm": 0.46764869355924493, + "learning_rate": 3.378859628568903e-07, + "loss": 0.0465, "step": 4027 }, { - "epoch": 1.7890295358649788, - "grad_norm": 0.44071811791526233, - "learning_rate": 6.754047701819954e-06, - "loss": 0.0409, + "epoch": 3.577264653641208, + "grad_norm": 0.3734611748174983, + "learning_rate": 3.3648682707652757e-07, + "loss": 0.0274, "step": 4028 }, { - "epoch": 1.7894736842105263, - "grad_norm": 0.6047548580207962, - "learning_rate": 6.752232233440469e-06, - "loss": 0.0472, + "epoch": 3.5781527531083483, + "grad_norm": 0.3317830032125016, + "learning_rate": 3.350904932419241e-07, + "loss": 0.0265, "step": 4029 }, { - "epoch": 1.7899178325560738, - "grad_norm": 0.4265056649643106, - "learning_rate": 6.750416501670712e-06, - "loss": 0.0341, + "epoch": 3.5790408525754884, + "grad_norm": 0.3140576416703216, + "learning_rate": 3.3369696219202996e-07, + "loss": 0.026, "step": 4030 }, { - "epoch": 1.7903619809016211, - "grad_norm": 0.4750323545017985, - "learning_rate": 6.74860050678362e-06, - "loss": 0.0399, + "epoch": 3.5799289520426285, + "grad_norm": 0.42006519972387857, + "learning_rate": 3.3230623476411317e-07, + "loss": 0.0357, "step": 4031 }, { - "epoch": 1.7908061292471684, - "grad_norm": 0.43275311316068676, - "learning_rate": 6.74678424905217e-06, - "loss": 0.0381, + "epoch": 3.580817051509769, + "grad_norm": 0.48292394902355373, + "learning_rate": 3.3091831179375036e-07, + "loss": 0.0325, "step": 4032 }, { - "epoch": 1.791250277592716, - "grad_norm": 0.4591066133415168, - "learning_rate": 6.744967728749374e-06, - "loss": 0.049, + "epoch": 3.5817051509769096, + "grad_norm": 0.2983300115029716, + "learning_rate": 3.295331941148394e-07, + "loss": 0.0302, "step": 4033 }, { - "epoch": 1.7916944259382634, - "grad_norm": 0.46035832875677174, - "learning_rate": 6.743150946148286e-06, - "loss": 0.037, + "epoch": 3.5825932504440496, + "grad_norm": 0.3894965984084569, + "learning_rate": 3.2815088255959195e-07, + "loss": 0.0385, "step": 4034 }, { - "epoch": 1.7921385742838107, - "grad_norm": 0.44089681593883423, - "learning_rate": 6.7413339015219995e-06, - "loss": 0.0489, + "epoch": 3.58348134991119, + "grad_norm": 0.2699639757693006, + "learning_rate": 3.267713779585319e-07, + "loss": 0.0205, "step": 4035 }, { - "epoch": 1.7925827226293582, - "grad_norm": 0.3967713328470784, - "learning_rate": 6.739516595143649e-06, - "loss": 0.0348, + "epoch": 3.5843694493783302, + "grad_norm": 0.3772247650517208, + "learning_rate": 3.253946811404957e-07, + "loss": 0.0286, "step": 4036 }, { - "epoch": 1.7930268709749058, - "grad_norm": 0.47307408803909806, - "learning_rate": 6.737699027286404e-06, - "loss": 0.0488, + "epoch": 3.5852575488454708, + "grad_norm": 0.38702656678018194, + "learning_rate": 3.2402079293263666e-07, + "loss": 0.0302, "step": 4037 }, { - "epoch": 1.793471019320453, - "grad_norm": 0.6980297064306341, - "learning_rate": 6.735881198223476e-06, - "loss": 0.0603, + "epoch": 3.586145648312611, + "grad_norm": 0.3441910102358272, + "learning_rate": 3.2264971416041625e-07, + "loss": 0.0286, "step": 4038 }, { - "epoch": 1.7939151676660003, - "grad_norm": 0.641654879554228, - "learning_rate": 6.734063108228118e-06, - "loss": 0.046, + "epoch": 3.5870337477797514, + "grad_norm": 0.4852995878459521, + "learning_rate": 3.212814456476121e-07, + "loss": 0.0381, "step": 4039 }, { - "epoch": 1.7943593160115479, - "grad_norm": 0.43889131829448813, - "learning_rate": 6.732244757573619e-06, - "loss": 0.0417, + "epoch": 3.587921847246892, + "grad_norm": 0.44584480610267, + "learning_rate": 3.1991598821631e-07, + "loss": 0.0356, "step": 4040 }, { - "epoch": 1.7948034643570954, - "grad_norm": 0.5791145146187624, - "learning_rate": 6.730426146533304e-06, - "loss": 0.0629, + "epoch": 3.588809946714032, + "grad_norm": 0.29655385436452336, + "learning_rate": 3.185533426869081e-07, + "loss": 0.0291, "step": 4041 }, { - "epoch": 1.7952476127026427, - "grad_norm": 0.3191065387044477, - "learning_rate": 6.728607275380548e-06, - "loss": 0.034, + "epoch": 3.589698046181172, + "grad_norm": 0.26659825956610705, + "learning_rate": 3.1719350987811537e-07, + "loss": 0.02, "step": 4042 }, { - "epoch": 1.79569176104819, - "grad_norm": 0.5785674908181686, - "learning_rate": 6.726788144388754e-06, - "loss": 0.0495, + "epoch": 3.5905861456483126, + "grad_norm": 0.3981329067663555, + "learning_rate": 3.1583649060695223e-07, + "loss": 0.0307, "step": 4043 }, { - "epoch": 1.7961359093937375, - "grad_norm": 0.5420845224176172, - "learning_rate": 6.724968753831367e-06, - "loss": 0.0568, + "epoch": 3.591474245115453, + "grad_norm": 0.3341932030061367, + "learning_rate": 3.1448228568874417e-07, + "loss": 0.0297, "step": 4044 }, { - "epoch": 1.796580057739285, - "grad_norm": 0.4088444696114397, - "learning_rate": 6.723149103981874e-06, - "loss": 0.045, + "epoch": 3.592362344582593, + "grad_norm": 0.28358982706492647, + "learning_rate": 3.131308959371293e-07, + "loss": 0.0213, "step": 4045 }, { - "epoch": 1.7970242060848323, - "grad_norm": 0.6392237685246868, - "learning_rate": 6.721329195113802e-06, - "loss": 0.0554, + "epoch": 3.5932504440497337, + "grad_norm": 0.31834932434390856, + "learning_rate": 3.1178232216405536e-07, + "loss": 0.0272, "step": 4046 }, { - "epoch": 1.7974683544303798, - "grad_norm": 0.3977912647423139, - "learning_rate": 6.7195090275007104e-06, - "loss": 0.0361, + "epoch": 3.594138543516874, + "grad_norm": 0.35842766921854496, + "learning_rate": 3.104365651797753e-07, + "loss": 0.0309, "step": 4047 }, { - "epoch": 1.7979125027759273, - "grad_norm": 0.36038011427697747, - "learning_rate": 6.717688601416201e-06, - "loss": 0.0406, + "epoch": 3.5950266429840143, + "grad_norm": 0.2983067942130664, + "learning_rate": 3.090936257928501e-07, + "loss": 0.0227, "step": 4048 }, { - "epoch": 1.7983566511214746, - "grad_norm": 0.5212415789499063, - "learning_rate": 6.715867917133919e-06, - "loss": 0.0422, + "epoch": 3.5959147424511544, + "grad_norm": 0.3557084676876161, + "learning_rate": 3.077535048101493e-07, + "loss": 0.0282, "step": 4049 }, { - "epoch": 1.7988007994670219, - "grad_norm": 0.49377763847355416, - "learning_rate": 6.714046974927539e-06, - "loss": 0.043, + "epoch": 3.596802841918295, + "grad_norm": 0.33551728305788187, + "learning_rate": 3.0641620303684837e-07, + "loss": 0.0279, "step": 4050 }, { - "epoch": 1.7992449478125694, - "grad_norm": 0.47840704302357145, - "learning_rate": 6.712225775070784e-06, - "loss": 0.044, + "epoch": 3.5976909413854354, + "grad_norm": 0.32555454976180176, + "learning_rate": 3.0508172127642896e-07, + "loss": 0.0251, "step": 4051 }, { - "epoch": 1.799689096158117, - "grad_norm": 0.4112067824225565, - "learning_rate": 6.71040431783741e-06, - "loss": 0.0354, + "epoch": 3.5985790408525755, + "grad_norm": 0.3926909676347029, + "learning_rate": 3.0375006033067865e-07, + "loss": 0.0336, "step": 4052 }, { - "epoch": 1.8001332445036642, - "grad_norm": 0.3668140961966663, - "learning_rate": 6.70858260350121e-06, - "loss": 0.0369, + "epoch": 3.5994671403197156, + "grad_norm": 0.34034680626207947, + "learning_rate": 3.024212209996885e-07, + "loss": 0.0339, "step": 4053 }, { - "epoch": 1.8005773928492115, - "grad_norm": 0.6061145451464093, - "learning_rate": 6.706760632336023e-06, - "loss": 0.0529, + "epoch": 3.600355239786856, + "grad_norm": 0.3748121404576559, + "learning_rate": 3.010952040818571e-07, + "loss": 0.0372, "step": 4054 }, { - "epoch": 1.8010215411947592, - "grad_norm": 0.804408929113384, - "learning_rate": 6.704938404615718e-06, - "loss": 0.0695, + "epoch": 3.6012433392539966, + "grad_norm": 0.3177708420804335, + "learning_rate": 2.9977201037388604e-07, + "loss": 0.0295, "step": 4055 }, { - "epoch": 1.8014656895403065, - "grad_norm": 0.44516753707686785, - "learning_rate": 6.703115920614212e-06, - "loss": 0.041, + "epoch": 3.6021314387211367, + "grad_norm": 0.41559526622614357, + "learning_rate": 2.9845164067077883e-07, + "loss": 0.0316, "step": 4056 }, { - "epoch": 1.8019098378858538, - "grad_norm": 0.6944007446846528, - "learning_rate": 6.701293180605451e-06, - "loss": 0.063, + "epoch": 3.6030195381882772, + "grad_norm": 0.36826358350633054, + "learning_rate": 2.971340957658447e-07, + "loss": 0.0355, "step": 4057 }, { - "epoch": 1.8023539862314013, - "grad_norm": 0.6199411354460107, - "learning_rate": 6.699470184863423e-06, - "loss": 0.0478, + "epoch": 3.6039076376554173, + "grad_norm": 0.37983089868627984, + "learning_rate": 2.9581937645069614e-07, + "loss": 0.0362, "step": 4058 }, { - "epoch": 1.8027981345769488, - "grad_norm": 0.6947222002075307, - "learning_rate": 6.6976469336621595e-06, - "loss": 0.0425, + "epoch": 3.604795737122558, + "grad_norm": 0.34783175136997246, + "learning_rate": 2.9450748351524504e-07, + "loss": 0.0316, "step": 4059 }, { - "epoch": 1.803242282922496, - "grad_norm": 0.5292009929059116, - "learning_rate": 6.6958234272757235e-06, - "loss": 0.0575, + "epoch": 3.605683836589698, + "grad_norm": 0.39126772271877386, + "learning_rate": 2.931984177477071e-07, + "loss": 0.0354, "step": 4060 }, { - "epoch": 1.8036864312680434, - "grad_norm": 0.438201912001024, - "learning_rate": 6.6939996659782194e-06, - "loss": 0.0395, + "epoch": 3.6065719360568385, + "grad_norm": 0.36861111492060794, + "learning_rate": 2.918921799345997e-07, + "loss": 0.0275, "step": 4061 }, { - "epoch": 1.804130579613591, - "grad_norm": 0.5407542517674611, - "learning_rate": 6.692175650043789e-06, - "loss": 0.0403, + "epoch": 3.6074600355239785, + "grad_norm": 0.3572386465416735, + "learning_rate": 2.905887708607397e-07, + "loss": 0.0309, "step": 4062 }, { - "epoch": 1.8045747279591384, - "grad_norm": 0.5631536015470302, - "learning_rate": 6.690351379746613e-06, - "loss": 0.0421, + "epoch": 3.608348134991119, + "grad_norm": 0.39986346163611447, + "learning_rate": 2.8928819130924656e-07, + "loss": 0.0407, "step": 4063 }, { - "epoch": 1.8050188763046857, - "grad_norm": 0.5913174820930391, - "learning_rate": 6.6885268553609115e-06, - "loss": 0.0265, + "epoch": 3.609236234458259, + "grad_norm": 0.33235581644169715, + "learning_rate": 2.8799044206153704e-07, + "loss": 0.0276, "step": 4064 }, { - "epoch": 1.8054630246502332, - "grad_norm": 0.576629329530581, - "learning_rate": 6.68670207716094e-06, - "loss": 0.0393, + "epoch": 3.6101243339253997, + "grad_norm": 0.37094624908904245, + "learning_rate": 2.8669552389732845e-07, + "loss": 0.0334, "step": 4065 }, { - "epoch": 1.8059071729957807, - "grad_norm": 0.47244671586292697, - "learning_rate": 6.6848770454209955e-06, - "loss": 0.043, + "epoch": 3.61101243339254, + "grad_norm": 0.3842345252591025, + "learning_rate": 2.854034375946385e-07, + "loss": 0.0329, "step": 4066 }, { - "epoch": 1.806351321341328, - "grad_norm": 0.42376459190231364, - "learning_rate": 6.683051760415409e-06, - "loss": 0.0349, + "epoch": 3.6119005328596803, + "grad_norm": 0.33697913363697846, + "learning_rate": 2.841141839297823e-07, + "loss": 0.0302, "step": 4067 }, { - "epoch": 1.8067954696868753, - "grad_norm": 0.5089448244604045, - "learning_rate": 6.681226222418553e-06, - "loss": 0.0367, + "epoch": 3.6127886323268203, + "grad_norm": 0.37985667672995926, + "learning_rate": 2.828277636773713e-07, + "loss": 0.0337, "step": 4068 }, { - "epoch": 1.8072396180324228, - "grad_norm": 0.7287865087611148, - "learning_rate": 6.679400431704837e-06, - "loss": 0.0474, + "epoch": 3.613676731793961, + "grad_norm": 0.40078877511862643, + "learning_rate": 2.815441776103184e-07, + "loss": 0.0453, "step": 4069 }, { - "epoch": 1.8076837663779703, - "grad_norm": 0.4089819648317366, - "learning_rate": 6.677574388548706e-06, - "loss": 0.0273, + "epoch": 3.6145648312611014, + "grad_norm": 0.3060248641723875, + "learning_rate": 2.802634264998294e-07, + "loss": 0.0323, "step": 4070 }, { - "epoch": 1.8081279147235176, - "grad_norm": 0.3655870554565193, - "learning_rate": 6.67574809322465e-06, - "loss": 0.0366, + "epoch": 3.6154529307282415, + "grad_norm": 0.3375294217896488, + "learning_rate": 2.7898551111541105e-07, + "loss": 0.0372, "step": 4071 }, { - "epoch": 1.808572063069065, - "grad_norm": 0.4524590952965907, - "learning_rate": 6.6739215460071885e-06, - "loss": 0.0388, + "epoch": 3.616341030195382, + "grad_norm": 0.3845114968023121, + "learning_rate": 2.7771043222486416e-07, + "loss": 0.0273, "step": 4072 }, { - "epoch": 1.8090162114146124, - "grad_norm": 0.8966024978124474, - "learning_rate": 6.672094747170883e-06, - "loss": 0.0566, + "epoch": 3.617229129662522, + "grad_norm": 0.341062787058291, + "learning_rate": 2.7643819059428367e-07, + "loss": 0.0271, "step": 4073 }, { - "epoch": 1.80946035976016, - "grad_norm": 0.3591914341706326, - "learning_rate": 6.670267696990335e-06, - "loss": 0.0318, + "epoch": 3.6181172291296626, + "grad_norm": 0.3356574146060281, + "learning_rate": 2.751687869880626e-07, + "loss": 0.0306, "step": 4074 }, { - "epoch": 1.8099045081057072, - "grad_norm": 0.44339585259060615, - "learning_rate": 6.668440395740178e-06, - "loss": 0.0363, + "epoch": 3.6190053285968027, + "grad_norm": 0.3162372354593641, + "learning_rate": 2.7390222216888804e-07, + "loss": 0.0281, "step": 4075 }, { - "epoch": 1.8103486564512548, - "grad_norm": 0.3901591376438945, - "learning_rate": 6.666612843695087e-06, - "loss": 0.0485, + "epoch": 3.619893428063943, + "grad_norm": 0.45588194472857524, + "learning_rate": 2.7263849689774135e-07, + "loss": 0.0412, "step": 4076 }, { - "epoch": 1.8107928047968023, - "grad_norm": 0.4015830587763988, - "learning_rate": 6.664785041129777e-06, - "loss": 0.0383, + "epoch": 3.6207815275310837, + "grad_norm": 0.3597778905910394, + "learning_rate": 2.7137761193389623e-07, + "loss": 0.0315, "step": 4077 }, { - "epoch": 1.8112369531423496, - "grad_norm": 0.5367818518459079, - "learning_rate": 6.662956988318994e-06, - "loss": 0.0456, + "epoch": 3.621669626998224, + "grad_norm": 0.3617714366812742, + "learning_rate": 2.701195680349228e-07, + "loss": 0.0318, "step": 4078 }, { - "epoch": 1.8116811014878968, - "grad_norm": 0.40853437415084287, - "learning_rate": 6.661128685537526e-06, - "loss": 0.0322, + "epoch": 3.622557726465364, + "grad_norm": 0.41357031731153654, + "learning_rate": 2.68864365956682e-07, + "loss": 0.0363, "step": 4079 }, { - "epoch": 1.8121252498334444, - "grad_norm": 0.6527784454525124, - "learning_rate": 6.659300133060201e-06, - "loss": 0.0507, + "epoch": 3.6234458259325044, + "grad_norm": 0.37322028907983257, + "learning_rate": 2.676120064533289e-07, + "loss": 0.0303, "step": 4080 }, { - "epoch": 1.8125693981789919, - "grad_norm": 0.5074533806315775, - "learning_rate": 6.657471331161878e-06, - "loss": 0.0432, + "epoch": 3.624333925399645, + "grad_norm": 0.32393294890052976, + "learning_rate": 2.663624902773088e-07, + "loss": 0.0245, "step": 4081 }, { - "epoch": 1.8130135465245392, - "grad_norm": 0.4472089329933828, - "learning_rate": 6.65564228011746e-06, - "loss": 0.0599, + "epoch": 3.625222024866785, + "grad_norm": 0.30567162060640496, + "learning_rate": 2.651158181793595e-07, + "loss": 0.0234, "step": 4082 }, { - "epoch": 1.8134576948700865, - "grad_norm": 0.47876435411805834, - "learning_rate": 6.653812980201882e-06, - "loss": 0.0478, + "epoch": 3.6261101243339255, + "grad_norm": 0.33495454755969983, + "learning_rate": 2.6387199090851135e-07, + "loss": 0.0319, "step": 4083 }, { - "epoch": 1.813901843215634, - "grad_norm": 0.44056500378250846, - "learning_rate": 6.651983431690119e-06, - "loss": 0.0434, + "epoch": 3.6269982238010656, + "grad_norm": 0.3942255313247027, + "learning_rate": 2.6263100921208484e-07, + "loss": 0.0364, "step": 4084 }, { - "epoch": 1.8143459915611815, - "grad_norm": 0.42532030681624505, - "learning_rate": 6.650153634857183e-06, - "loss": 0.0372, + "epoch": 3.627886323268206, + "grad_norm": 0.5529320872827523, + "learning_rate": 2.6139287383568745e-07, + "loss": 0.0333, "step": 4085 }, { - "epoch": 1.8147901399067288, - "grad_norm": 0.38789115474665614, - "learning_rate": 6.648323589978128e-06, - "loss": 0.0332, + "epoch": 3.6287744227353462, + "grad_norm": 0.3566634202518894, + "learning_rate": 2.6015758552322135e-07, + "loss": 0.0297, "step": 4086 }, { - "epoch": 1.8152342882522763, - "grad_norm": 0.5287105455383381, - "learning_rate": 6.646493297328034e-06, - "loss": 0.0483, + "epoch": 3.6296625222024868, + "grad_norm": 0.42591971243674315, + "learning_rate": 2.5892514501687673e-07, + "loss": 0.0424, "step": 4087 }, { - "epoch": 1.8156784365978238, - "grad_norm": 2.7184134061804475, - "learning_rate": 6.6446627571820295e-06, - "loss": 0.05, + "epoch": 3.630550621669627, + "grad_norm": 0.41957409707316257, + "learning_rate": 2.576955530571312e-07, + "loss": 0.0363, "step": 4088 }, { - "epoch": 1.816122584943371, - "grad_norm": 0.48923408476238167, - "learning_rate": 6.642831969815275e-06, - "loss": 0.0524, + "epoch": 3.6314387211367674, + "grad_norm": 0.37826897762300055, + "learning_rate": 2.5646881038275115e-07, + "loss": 0.0295, "step": 4089 }, { - "epoch": 1.8165667332889184, - "grad_norm": 0.4086306503799156, - "learning_rate": 6.641000935502968e-06, - "loss": 0.0369, + "epoch": 3.6323268206039074, + "grad_norm": 0.33846011705029777, + "learning_rate": 2.552449177307931e-07, + "loss": 0.0289, "step": 4090 }, { - "epoch": 1.817010881634466, - "grad_norm": 0.42279116752419027, - "learning_rate": 6.639169654520345e-06, - "loss": 0.0384, + "epoch": 3.633214920071048, + "grad_norm": 0.3434400150376539, + "learning_rate": 2.540238758365987e-07, + "loss": 0.0275, "step": 4091 }, { - "epoch": 1.8174550299800134, - "grad_norm": 0.47809993483091023, - "learning_rate": 6.637338127142678e-06, - "loss": 0.0557, + "epoch": 3.6341030195381885, + "grad_norm": 0.7356552257127704, + "learning_rate": 2.52805685433799e-07, + "loss": 0.0429, "step": 4092 }, { - "epoch": 1.8178991783255607, - "grad_norm": 0.43894142864567726, - "learning_rate": 6.635506353645277e-06, - "loss": 0.0442, + "epoch": 3.6349911190053286, + "grad_norm": 0.3460474954037151, + "learning_rate": 2.515903472543102e-07, + "loss": 0.0322, "step": 4093 }, { - "epoch": 1.818343326671108, - "grad_norm": 0.4258410799351809, - "learning_rate": 6.633674334303489e-06, - "loss": 0.0453, + "epoch": 3.6358792184724686, + "grad_norm": 0.31851118641004755, + "learning_rate": 2.503778620283348e-07, + "loss": 0.0267, "step": 4094 }, { - "epoch": 1.8187874750166557, - "grad_norm": 0.4223073933724936, - "learning_rate": 6.631842069392698e-06, - "loss": 0.0425, + "epoch": 3.636767317939609, + "grad_norm": 0.6093920804945067, + "learning_rate": 2.491682304843629e-07, + "loss": 0.0318, "step": 4095 }, { - "epoch": 1.819231623362203, - "grad_norm": 0.379351123137964, - "learning_rate": 6.630009559188323e-06, - "loss": 0.0333, + "epoch": 3.6376554174067497, + "grad_norm": 0.40552284870398103, + "learning_rate": 2.4796145334916867e-07, + "loss": 0.0285, "step": 4096 }, { - "epoch": 1.8196757717077503, - "grad_norm": 0.38768367796199465, - "learning_rate": 6.628176803965823e-06, - "loss": 0.0391, + "epoch": 3.6385435168738898, + "grad_norm": 0.3387147964505112, + "learning_rate": 2.4675753134781043e-07, + "loss": 0.028, "step": 4097 }, { - "epoch": 1.8201199200532978, - "grad_norm": 0.34224780916225656, - "learning_rate": 6.62634380400069e-06, - "loss": 0.0341, + "epoch": 3.6394316163410303, + "grad_norm": 0.32833452932555013, + "learning_rate": 2.455564652036324e-07, + "loss": 0.0286, "step": 4098 }, { - "epoch": 1.8205640683988453, - "grad_norm": 0.42049665765180966, - "learning_rate": 6.624510559568458e-06, - "loss": 0.0332, + "epoch": 3.6403197158081704, + "grad_norm": 0.33956735996608145, + "learning_rate": 2.4435825563826455e-07, + "loss": 0.0277, "step": 4099 }, { - "epoch": 1.8210082167443926, - "grad_norm": 0.4650798805753388, - "learning_rate": 6.622677070944692e-06, - "loss": 0.0522, + "epoch": 3.641207815275311, + "grad_norm": 0.42900278824587085, + "learning_rate": 2.431629033716171e-07, + "loss": 0.0463, "step": 4100 }, { - "epoch": 1.82145236508994, - "grad_norm": 0.7403412237132078, - "learning_rate": 6.6208433384049974e-06, - "loss": 0.0468, + "epoch": 3.642095914742451, + "grad_norm": 0.2931273940040887, + "learning_rate": 2.4197040912188496e-07, + "loss": 0.0235, "step": 4101 }, { - "epoch": 1.8218965134354874, - "grad_norm": 0.5003084175625752, - "learning_rate": 6.619009362225017e-06, - "loss": 0.0369, + "epoch": 3.6429840142095915, + "grad_norm": 0.5045463380588222, + "learning_rate": 2.4078077360554673e-07, + "loss": 0.0287, "step": 4102 }, { - "epoch": 1.822340661781035, - "grad_norm": 0.4709276604503224, - "learning_rate": 6.617175142680426e-06, - "loss": 0.0373, + "epoch": 3.643872113676732, + "grad_norm": 0.32863453823771394, + "learning_rate": 2.395939975373618e-07, + "loss": 0.023, "step": 4103 }, { - "epoch": 1.8227848101265822, - "grad_norm": 0.405634995248283, - "learning_rate": 6.615340680046941e-06, - "loss": 0.0402, + "epoch": 3.644760213143872, + "grad_norm": 0.3280952166052903, + "learning_rate": 2.3841008163037415e-07, + "loss": 0.0313, "step": 4104 }, { - "epoch": 1.8232289584721297, - "grad_norm": 0.48277401824042093, - "learning_rate": 6.613505974600313e-06, - "loss": 0.0427, + "epoch": 3.645648312611012, + "grad_norm": 0.33935062342771694, + "learning_rate": 2.3722902659590653e-07, + "loss": 0.0256, "step": 4105 }, { - "epoch": 1.8236731068176772, - "grad_norm": 0.5146304089304894, - "learning_rate": 6.611671026616328e-06, - "loss": 0.0449, + "epoch": 3.6465364120781527, + "grad_norm": 0.34394834952041614, + "learning_rate": 2.3605083314356349e-07, + "loss": 0.0276, "step": 4106 }, { - "epoch": 1.8241172551632245, - "grad_norm": 0.5014137439041658, - "learning_rate": 6.609835836370808e-06, - "loss": 0.0462, + "epoch": 3.6474245115452932, + "grad_norm": 0.35006997907059395, + "learning_rate": 2.3487550198123153e-07, + "loss": 0.0295, "step": 4107 }, { - "epoch": 1.8245614035087718, - "grad_norm": 0.5653319356301473, - "learning_rate": 6.6080004041396176e-06, - "loss": 0.0385, + "epoch": 3.6483126110124333, + "grad_norm": 0.35845802214579003, + "learning_rate": 2.3370303381507643e-07, + "loss": 0.0293, "step": 4108 }, { - "epoch": 1.8250055518543193, - "grad_norm": 0.43607877589233934, - "learning_rate": 6.60616473019865e-06, - "loss": 0.0366, + "epoch": 3.649200710479574, + "grad_norm": 0.2961638070375035, + "learning_rate": 2.3253342934954347e-07, + "loss": 0.0228, "step": 4109 }, { - "epoch": 1.8254497001998669, - "grad_norm": 0.34512361350227927, - "learning_rate": 6.6043288148238405e-06, - "loss": 0.0327, + "epoch": 3.650088809946714, + "grad_norm": 0.3581970416631591, + "learning_rate": 2.3136668928735838e-07, + "loss": 0.0326, "step": 4110 }, { - "epoch": 1.8258938485454141, - "grad_norm": 0.46278297105670835, - "learning_rate": 6.6024926582911576e-06, - "loss": 0.0412, + "epoch": 3.6509769094138544, + "grad_norm": 0.37317477081009887, + "learning_rate": 2.3020281432952485e-07, + "loss": 0.0305, "step": 4111 }, { - "epoch": 1.8263379968909614, - "grad_norm": 0.4906820801194115, - "learning_rate": 6.600656260876605e-06, - "loss": 0.0381, + "epoch": 3.6518650088809945, + "grad_norm": 0.3321849730320183, + "learning_rate": 2.2904180517532682e-07, + "loss": 0.0338, "step": 4112 }, { - "epoch": 1.826782145236509, - "grad_norm": 0.48071884316968305, - "learning_rate": 6.598819622856227e-06, - "loss": 0.0409, + "epoch": 3.652753108348135, + "grad_norm": 0.34095544835376845, + "learning_rate": 2.2788366252232408e-07, + "loss": 0.0309, "step": 4113 }, { - "epoch": 1.8272262935820565, - "grad_norm": 0.47730730807604993, - "learning_rate": 6.596982744506101e-06, - "loss": 0.0557, + "epoch": 3.6536412078152756, + "grad_norm": 0.3135931275379927, + "learning_rate": 2.2672838706635557e-07, + "loss": 0.0345, "step": 4114 }, { - "epoch": 1.8276704419276038, - "grad_norm": 0.3790889364580573, - "learning_rate": 6.595145626102339e-06, - "loss": 0.0404, + "epoch": 3.6545293072824157, + "grad_norm": 0.3398201526476228, + "learning_rate": 2.255759795015372e-07, + "loss": 0.0277, "step": 4115 }, { - "epoch": 1.8281145902731513, - "grad_norm": 0.463399073158891, - "learning_rate": 6.593308267921095e-06, - "loss": 0.0411, + "epoch": 3.6554174067495557, + "grad_norm": 0.4258577290727356, + "learning_rate": 2.2442644052026286e-07, + "loss": 0.0358, "step": 4116 }, { - "epoch": 1.8285587386186988, - "grad_norm": 0.501176246654533, - "learning_rate": 6.59147067023855e-06, - "loss": 0.0506, + "epoch": 3.6563055062166963, + "grad_norm": 0.344712297338292, + "learning_rate": 2.2327977081320064e-07, + "loss": 0.0364, "step": 4117 }, { - "epoch": 1.829002886964246, - "grad_norm": 0.46578549229661426, - "learning_rate": 6.58963283333093e-06, - "loss": 0.0454, + "epoch": 3.657193605683837, + "grad_norm": 0.2844802997510866, + "learning_rate": 2.2213597106929608e-07, + "loss": 0.0254, "step": 4118 }, { - "epoch": 1.8294470353097934, - "grad_norm": 0.4499464900379103, - "learning_rate": 6.587794757474493e-06, - "loss": 0.0328, + "epoch": 3.658081705150977, + "grad_norm": 0.3482327656423746, + "learning_rate": 2.209950419757717e-07, + "loss": 0.0385, "step": 4119 }, { - "epoch": 1.8298911836553409, - "grad_norm": 0.4379709545832753, - "learning_rate": 6.585956442945531e-06, - "loss": 0.0342, + "epoch": 3.6589698046181174, + "grad_norm": 0.409568753384068, + "learning_rate": 2.1985698421812308e-07, + "loss": 0.0299, "step": 4120 }, { - "epoch": 1.8303353320008884, - "grad_norm": 0.5260425346926577, - "learning_rate": 6.584117890020374e-06, - "loss": 0.0439, + "epoch": 3.6598579040852575, + "grad_norm": 0.30398466916264566, + "learning_rate": 2.1872179848012099e-07, + "loss": 0.0297, "step": 4121 }, { - "epoch": 1.8307794803464357, - "grad_norm": 0.426360295901579, - "learning_rate": 6.5822790989753905e-06, - "loss": 0.0411, + "epoch": 3.660746003552398, + "grad_norm": 0.3286674712491026, + "learning_rate": 2.1758948544381153e-07, + "loss": 0.0277, "step": 4122 }, { - "epoch": 1.831223628691983, - "grad_norm": 0.45007999975919516, - "learning_rate": 6.5804400700869806e-06, - "loss": 0.0443, + "epoch": 3.661634103019538, + "grad_norm": 0.38599884179691046, + "learning_rate": 2.164600457895144e-07, + "loss": 0.022, "step": 4123 }, { - "epoch": 1.8316677770375307, - "grad_norm": 0.59501919115864, - "learning_rate": 6.578600803631579e-06, - "loss": 0.0454, + "epoch": 3.6625222024866786, + "grad_norm": 0.3298170939527743, + "learning_rate": 2.153334801958229e-07, + "loss": 0.0281, "step": 4124 }, { - "epoch": 1.832111925383078, - "grad_norm": 0.3671794598732458, - "learning_rate": 6.5767612998856625e-06, - "loss": 0.0393, + "epoch": 3.6634103019538187, + "grad_norm": 0.35507918769242575, + "learning_rate": 2.1420978933960334e-07, + "loss": 0.0298, "step": 4125 }, { - "epoch": 1.8325560737286253, - "grad_norm": 0.36069972183791427, - "learning_rate": 6.574921559125737e-06, - "loss": 0.0375, + "epoch": 3.664298401420959, + "grad_norm": 0.657045311502623, + "learning_rate": 2.130889738959946e-07, + "loss": 0.0396, "step": 4126 }, { - "epoch": 1.8330002220741728, - "grad_norm": 0.47931723820550676, - "learning_rate": 6.573081581628349e-06, - "loss": 0.0398, + "epoch": 3.6651865008880993, + "grad_norm": 0.28407464356676004, + "learning_rate": 2.119710345384085e-07, + "loss": 0.0237, "step": 4127 }, { - "epoch": 1.8334443704197203, - "grad_norm": 0.4393540376137144, - "learning_rate": 6.571241367670077e-06, - "loss": 0.048, + "epoch": 3.66607460035524, + "grad_norm": 0.4714399982634715, + "learning_rate": 2.1085597193853002e-07, + "loss": 0.0345, "step": 4128 }, { - "epoch": 1.8338885187652676, - "grad_norm": 0.5502773042166398, - "learning_rate": 6.569400917527536e-06, - "loss": 0.0391, + "epoch": 3.6669626998223803, + "grad_norm": 0.3325805043726439, + "learning_rate": 2.0974378676631269e-07, + "loss": 0.0323, "step": 4129 }, { - "epoch": 1.8343326671108149, - "grad_norm": 0.42922588665741507, - "learning_rate": 6.567560231477379e-06, - "loss": 0.0371, + "epoch": 3.6678507992895204, + "grad_norm": 0.3715649450962102, + "learning_rate": 2.086344796899825e-07, + "loss": 0.0348, "step": 4130 }, { - "epoch": 1.8347768154563624, - "grad_norm": 0.4243115655078441, - "learning_rate": 6.56571930979629e-06, - "loss": 0.0464, + "epoch": 3.6687388987566605, + "grad_norm": 0.34322855603072555, + "learning_rate": 2.0752805137603914e-07, + "loss": 0.0308, "step": 4131 }, { - "epoch": 1.83522096380191, - "grad_norm": 0.5637304292180152, - "learning_rate": 6.563878152760992e-06, - "loss": 0.0587, + "epoch": 3.669626998223801, + "grad_norm": 0.3079586875780006, + "learning_rate": 2.064245024892475e-07, + "loss": 0.0286, "step": 4132 }, { - "epoch": 1.8356651121474572, - "grad_norm": 0.5080427318268489, - "learning_rate": 6.562036760648242e-06, - "loss": 0.0445, + "epoch": 3.6705150976909415, + "grad_norm": 0.4725254838554427, + "learning_rate": 2.0532383369264663e-07, + "loss": 0.0354, "step": 4133 }, { - "epoch": 1.8361092604930047, - "grad_norm": 0.5069784829710062, - "learning_rate": 6.560195133734833e-06, - "loss": 0.0329, + "epoch": 3.6714031971580816, + "grad_norm": 0.3993837533428711, + "learning_rate": 2.042260456475431e-07, + "loss": 0.0315, "step": 4134 }, { - "epoch": 1.8365534088385522, - "grad_norm": 0.3415574151612511, - "learning_rate": 6.55835327229759e-06, - "loss": 0.0292, + "epoch": 3.672291296625222, + "grad_norm": 0.37875590996423947, + "learning_rate": 2.0313113901351266e-07, + "loss": 0.0346, "step": 4135 }, { - "epoch": 1.8369975571840995, - "grad_norm": 0.4077726290203496, - "learning_rate": 6.556511176613381e-06, - "loss": 0.0347, + "epoch": 3.673179396092362, + "grad_norm": 0.33922148147615033, + "learning_rate": 2.0203911444840184e-07, + "loss": 0.0278, "step": 4136 }, { - "epoch": 1.8374417055296468, - "grad_norm": 0.5240761531044744, - "learning_rate": 6.554668846959102e-06, - "loss": 0.0454, + "epoch": 3.6740674955595027, + "grad_norm": 0.28865242326757395, + "learning_rate": 2.009499726083225e-07, + "loss": 0.0249, "step": 4137 }, { - "epoch": 1.8378858538751943, - "grad_norm": 0.5670009106024317, - "learning_rate": 6.552826283611684e-06, - "loss": 0.0488, + "epoch": 3.674955595026643, + "grad_norm": 0.4428997092567071, + "learning_rate": 1.9986371414765615e-07, + "loss": 0.0318, "step": 4138 }, { - "epoch": 1.8383300022207418, - "grad_norm": 0.5071828765865913, - "learning_rate": 6.5509834868480994e-06, - "loss": 0.0464, + "epoch": 3.6758436944937833, + "grad_norm": 0.39516553402180626, + "learning_rate": 1.987803397190524e-07, + "loss": 0.04, "step": 4139 }, { - "epoch": 1.8387741505662891, - "grad_norm": 0.6942163546904666, - "learning_rate": 6.54914045694535e-06, - "loss": 0.0558, + "epoch": 3.676731793960924, + "grad_norm": 0.33091490578280347, + "learning_rate": 1.9769984997342838e-07, + "loss": 0.028, "step": 4140 }, { - "epoch": 1.8392182989118364, - "grad_norm": 0.42462008853388156, - "learning_rate": 6.547297194180473e-06, - "loss": 0.028, + "epoch": 3.677619893428064, + "grad_norm": 0.3335222004990329, + "learning_rate": 1.966222455599659e-07, + "loss": 0.0295, "step": 4141 }, { - "epoch": 1.839662447257384, - "grad_norm": 0.3935408254645297, - "learning_rate": 6.545453698830545e-06, - "loss": 0.028, + "epoch": 3.678507992895204, + "grad_norm": 0.3245358540412729, + "learning_rate": 1.9554752712611368e-07, + "loss": 0.0257, "step": 4142 }, { - "epoch": 1.8401065956029314, - "grad_norm": 0.43222089529601715, - "learning_rate": 6.543609971172673e-06, - "loss": 0.0439, + "epoch": 3.6793960923623446, + "grad_norm": 0.2964035236786363, + "learning_rate": 1.9447569531758913e-07, + "loss": 0.0224, "step": 4143 }, { - "epoch": 1.8405507439484787, - "grad_norm": 0.8970440945065747, - "learning_rate": 6.541766011484001e-06, - "loss": 0.0569, + "epoch": 3.680284191829485, + "grad_norm": 0.3386943141170172, + "learning_rate": 1.9340675077837156e-07, + "loss": 0.03, "step": 4144 }, { - "epoch": 1.8409948922940262, - "grad_norm": 0.43018474254953293, - "learning_rate": 6.539921820041708e-06, - "loss": 0.0429, + "epoch": 3.681172291296625, + "grad_norm": 0.34385542148242515, + "learning_rate": 1.9234069415070944e-07, + "loss": 0.0276, "step": 4145 }, { - "epoch": 1.8414390406395738, - "grad_norm": 0.5272129102858879, - "learning_rate": 6.538077397123006e-06, - "loss": 0.042, + "epoch": 3.6820603907637657, + "grad_norm": 0.40638885409888764, + "learning_rate": 1.9127752607511263e-07, + "loss": 0.0329, "step": 4146 }, { - "epoch": 1.841883188985121, - "grad_norm": 0.5767831742144044, - "learning_rate": 6.536232743005144e-06, - "loss": 0.0505, + "epoch": 3.6829484902309058, + "grad_norm": 0.2950797220950981, + "learning_rate": 1.902172471903563e-07, + "loss": 0.0278, "step": 4147 }, { - "epoch": 1.8423273373306683, - "grad_norm": 0.5455867077197153, - "learning_rate": 6.534387857965405e-06, - "loss": 0.0499, + "epoch": 3.6838365896980463, + "grad_norm": 0.4421989622826594, + "learning_rate": 1.891598581334825e-07, + "loss": 0.0424, "step": 4148 }, { - "epoch": 1.8427714856762158, - "grad_norm": 0.4392562319330063, - "learning_rate": 6.532542742281105e-06, - "loss": 0.0404, + "epoch": 3.6847246891651864, + "grad_norm": 0.3369769711437819, + "learning_rate": 1.8810535953979304e-07, + "loss": 0.0386, "step": 4149 }, { - "epoch": 1.8432156340217634, - "grad_norm": 0.41150977239123576, - "learning_rate": 6.5306973962296e-06, - "loss": 0.0388, + "epoch": 3.685612788632327, + "grad_norm": 0.312409083332414, + "learning_rate": 1.8705375204285503e-07, + "loss": 0.029, "step": 4150 }, { - "epoch": 1.8436597823673107, - "grad_norm": 1.313911582342841, - "learning_rate": 6.528851820088273e-06, - "loss": 0.0595, + "epoch": 3.6865008880994674, + "grad_norm": 0.5383907853717661, + "learning_rate": 1.8600503627449972e-07, + "loss": 0.0335, "step": 4151 }, { - "epoch": 1.844103930712858, - "grad_norm": 0.3322454559510484, - "learning_rate": 6.527006014134546e-06, - "loss": 0.0274, + "epoch": 3.6873889875666075, + "grad_norm": 0.5185575405835587, + "learning_rate": 1.8495921286481755e-07, + "loss": 0.0369, "step": 4152 }, { - "epoch": 1.8445480790584055, - "grad_norm": 0.44783811513765753, - "learning_rate": 6.525159978645876e-06, - "loss": 0.0391, + "epoch": 3.6882770870337476, + "grad_norm": 0.688257047676778, + "learning_rate": 1.8391628244216531e-07, + "loss": 0.045, "step": 4153 }, { - "epoch": 1.844992227403953, - "grad_norm": 0.5214447787650061, - "learning_rate": 6.523313713899755e-06, - "loss": 0.0396, + "epoch": 3.689165186500888, + "grad_norm": 0.3663439018419157, + "learning_rate": 1.8287624563315842e-07, + "loss": 0.0284, "step": 4154 }, { - "epoch": 1.8454363757495003, - "grad_norm": 0.45172874563459914, - "learning_rate": 6.521467220173705e-06, - "loss": 0.0356, + "epoch": 3.6900532859680286, + "grad_norm": 0.34515058573917856, + "learning_rate": 1.8183910306267427e-07, + "loss": 0.0269, "step": 4155 }, { - "epoch": 1.8458805240950478, - "grad_norm": 0.3652608177780825, - "learning_rate": 6.519620497745286e-06, - "loss": 0.0413, + "epoch": 3.6909413854351687, + "grad_norm": 0.38381647555908577, + "learning_rate": 1.8080485535385327e-07, + "loss": 0.0349, "step": 4156 }, { - "epoch": 1.8463246724405953, - "grad_norm": 0.435616251575995, - "learning_rate": 6.5177735468920935e-06, - "loss": 0.0444, + "epoch": 3.691829484902309, + "grad_norm": 0.3845552524534645, + "learning_rate": 1.797735031280956e-07, + "loss": 0.0371, "step": 4157 }, { - "epoch": 1.8467688207861426, - "grad_norm": 0.7590654646546372, - "learning_rate": 6.515926367891754e-06, - "loss": 0.049, + "epoch": 3.6927175843694493, + "grad_norm": 0.3009108341562991, + "learning_rate": 1.787450470050589e-07, + "loss": 0.023, "step": 4158 }, { - "epoch": 1.8472129691316899, - "grad_norm": 0.45565692087369364, - "learning_rate": 6.51407896102193e-06, - "loss": 0.0377, + "epoch": 3.69360568383659, + "grad_norm": 0.34032029486921384, + "learning_rate": 1.77719487602665e-07, + "loss": 0.0337, "step": 4159 }, { - "epoch": 1.8476571174772374, - "grad_norm": 1.8021394181856782, - "learning_rate": 6.512231326560319e-06, - "loss": 0.0632, + "epoch": 3.69449378330373, + "grad_norm": 0.3783272835463549, + "learning_rate": 1.7669682553709323e-07, + "loss": 0.0413, "step": 4160 }, { - "epoch": 1.8481012658227849, - "grad_norm": 0.37814866423712995, - "learning_rate": 6.510383464784651e-06, - "loss": 0.0357, + "epoch": 3.6953818827708704, + "grad_norm": 0.36760174865925677, + "learning_rate": 1.7567706142278318e-07, + "loss": 0.0332, "step": 4161 }, { - "epoch": 1.8485454141683322, - "grad_norm": 0.5013395214860348, - "learning_rate": 6.508535375972691e-06, - "loss": 0.0359, + "epoch": 3.6962699822380105, + "grad_norm": 0.30744482215918945, + "learning_rate": 1.7466019587243088e-07, + "loss": 0.0255, "step": 4162 }, { - "epoch": 1.8489895625138795, - "grad_norm": 0.6460169691477774, - "learning_rate": 6.506687060402238e-06, - "loss": 0.055, + "epoch": 3.697158081705151, + "grad_norm": 0.4482029084810956, + "learning_rate": 1.7364622949699373e-07, + "loss": 0.0333, "step": 4163 }, { - "epoch": 1.8494337108594272, - "grad_norm": 0.731410974029027, - "learning_rate": 6.504838518351127e-06, - "loss": 0.0418, + "epoch": 3.698046181172291, + "grad_norm": 0.3382286333629386, + "learning_rate": 1.726351629056855e-07, + "loss": 0.0282, "step": 4164 }, { - "epoch": 1.8498778592049745, - "grad_norm": 0.4318554461256705, - "learning_rate": 6.502989750097224e-06, - "loss": 0.0423, + "epoch": 3.6989342806394316, + "grad_norm": 0.3602323470891706, + "learning_rate": 1.716269967059786e-07, + "loss": 0.0337, "step": 4165 }, { - "epoch": 1.8503220075505218, - "grad_norm": 0.4065429352215067, - "learning_rate": 6.501140755918428e-06, - "loss": 0.0325, + "epoch": 3.699822380106572, + "grad_norm": 0.3716082610197423, + "learning_rate": 1.706217315036024e-07, + "loss": 0.0247, "step": 4166 }, { - "epoch": 1.8507661558960693, - "grad_norm": 0.5268522126273875, - "learning_rate": 6.499291536092679e-06, - "loss": 0.052, + "epoch": 3.7007104795737122, + "grad_norm": 0.40300697352721165, + "learning_rate": 1.6961936790254207e-07, + "loss": 0.0315, "step": 4167 }, { - "epoch": 1.8512103042416168, - "grad_norm": 0.4963236453076724, - "learning_rate": 6.497442090897943e-06, - "loss": 0.0507, + "epoch": 3.7015985790408523, + "grad_norm": 0.27629597358248276, + "learning_rate": 1.6861990650504256e-07, + "loss": 0.0214, "step": 4168 }, { - "epoch": 1.851654452587164, - "grad_norm": 0.5716468049405105, - "learning_rate": 6.495592420612224e-06, - "loss": 0.0557, + "epoch": 3.702486678507993, + "grad_norm": 0.3685144138535106, + "learning_rate": 1.6762334791160296e-07, + "loss": 0.0378, "step": 4169 }, { - "epoch": 1.8520986009327114, - "grad_norm": 0.36399318359325455, - "learning_rate": 6.493742525513556e-06, - "loss": 0.0318, + "epoch": 3.7033747779751334, + "grad_norm": 0.473328561248784, + "learning_rate": 1.6662969272097652e-07, + "loss": 0.0307, "step": 4170 }, { - "epoch": 1.852542749278259, - "grad_norm": 0.4368593597512668, - "learning_rate": 6.491892405880015e-06, - "loss": 0.036, + "epoch": 3.7042628774422734, + "grad_norm": 0.38700449724979513, + "learning_rate": 1.6563894153017567e-07, + "loss": 0.0335, "step": 4171 }, { - "epoch": 1.8529868976238064, - "grad_norm": 0.490968844938355, - "learning_rate": 6.490042061989701e-06, - "loss": 0.0384, + "epoch": 3.705150976909414, + "grad_norm": 0.36352673886304254, + "learning_rate": 1.6465109493446708e-07, + "loss": 0.031, "step": 4172 }, { - "epoch": 1.8534310459693537, - "grad_norm": 0.4895671851812533, - "learning_rate": 6.4881914941207545e-06, - "loss": 0.0526, + "epoch": 3.706039076376554, + "grad_norm": 0.3335281985604826, + "learning_rate": 1.6366615352737092e-07, + "loss": 0.0296, "step": 4173 }, { - "epoch": 1.8538751943149012, - "grad_norm": 0.4582350620997356, - "learning_rate": 6.486340702551347e-06, - "loss": 0.044, + "epoch": 3.7069271758436946, + "grad_norm": 0.3298592306508931, + "learning_rate": 1.6268411790066218e-07, + "loss": 0.0307, "step": 4174 }, { - "epoch": 1.8543193426604487, - "grad_norm": 0.4289059463290211, - "learning_rate": 6.484489687559682e-06, - "loss": 0.0379, + "epoch": 3.7078152753108347, + "grad_norm": 0.34974447407247544, + "learning_rate": 1.6170498864437112e-07, + "loss": 0.0322, "step": 4175 }, { - "epoch": 1.854763491005996, - "grad_norm": 0.3956879154243665, - "learning_rate": 6.4826384494240006e-06, - "loss": 0.0324, + "epoch": 3.708703374777975, + "grad_norm": 0.37732363449954476, + "learning_rate": 1.6072876634677993e-07, + "loss": 0.0378, "step": 4176 }, { - "epoch": 1.8552076393515433, - "grad_norm": 0.4900266012746407, - "learning_rate": 6.480786988422575e-06, - "loss": 0.0472, + "epoch": 3.7095914742451157, + "grad_norm": 0.3972473100874786, + "learning_rate": 1.5975545159442717e-07, + "loss": 0.0266, "step": 4177 }, { - "epoch": 1.8556517876970908, - "grad_norm": 0.4376815416787356, - "learning_rate": 6.47893530483371e-06, - "loss": 0.0631, + "epoch": 3.710479573712256, + "grad_norm": 0.31543674418786594, + "learning_rate": 1.5878504497210123e-07, + "loss": 0.0283, "step": 4178 }, { - "epoch": 1.8560959360426383, - "grad_norm": 0.3452667188092541, - "learning_rate": 6.4770833989357464e-06, - "loss": 0.031, + "epoch": 3.711367673179396, + "grad_norm": 0.2643553116005103, + "learning_rate": 1.5781754706284512e-07, + "loss": 0.0284, "step": 4179 }, { - "epoch": 1.8565400843881856, - "grad_norm": 0.4575028834398297, - "learning_rate": 6.4752312710070565e-06, - "loss": 0.0357, + "epoch": 3.7122557726465364, + "grad_norm": 0.44185218697847933, + "learning_rate": 1.568529584479539e-07, + "loss": 0.0399, "step": 4180 }, { - "epoch": 1.856984232733733, - "grad_norm": 0.35802922814056004, - "learning_rate": 6.4733789213260465e-06, - "loss": 0.0363, + "epoch": 3.713143872113677, + "grad_norm": 0.3586063831653754, + "learning_rate": 1.558912797069745e-07, + "loss": 0.0265, "step": 4181 }, { - "epoch": 1.8574283810792804, - "grad_norm": 0.4610516053299678, - "learning_rate": 6.471526350171158e-06, - "loss": 0.0421, + "epoch": 3.714031971580817, + "grad_norm": 0.3405430171087667, + "learning_rate": 1.5493251141770527e-07, + "loss": 0.0333, "step": 4182 }, { - "epoch": 1.857872529424828, - "grad_norm": 0.41994717663617964, - "learning_rate": 6.46967355782086e-06, - "loss": 0.0427, + "epoch": 3.7149200710479575, + "grad_norm": 0.423317334564177, + "learning_rate": 1.5397665415619655e-07, + "loss": 0.038, "step": 4183 }, { - "epoch": 1.8583166777703752, - "grad_norm": 0.41511781298154615, - "learning_rate": 6.4678205445536615e-06, - "loss": 0.0388, + "epoch": 3.7158081705150976, + "grad_norm": 0.32880566440929193, + "learning_rate": 1.5302370849674952e-07, + "loss": 0.0309, "step": 4184 }, { - "epoch": 1.8587608261159227, - "grad_norm": 0.4226703337206278, - "learning_rate": 6.465967310648103e-06, - "loss": 0.0363, + "epoch": 3.716696269982238, + "grad_norm": 0.34239075545054704, + "learning_rate": 1.5207367501191617e-07, + "loss": 0.0255, "step": 4185 }, { - "epoch": 1.8592049744614703, - "grad_norm": 0.3756285974893726, - "learning_rate": 6.464113856382752e-06, - "loss": 0.0431, + "epoch": 3.717584369449378, + "grad_norm": 0.3189902999675348, + "learning_rate": 1.5112655427249656e-07, + "loss": 0.0261, "step": 4186 }, { - "epoch": 1.8596491228070176, - "grad_norm": 0.7660609289210849, - "learning_rate": 6.46226018203622e-06, - "loss": 0.0548, + "epoch": 3.7184724689165187, + "grad_norm": 0.3956973296104195, + "learning_rate": 1.501823468475444e-07, + "loss": 0.031, "step": 4187 }, { - "epoch": 1.8600932711525648, - "grad_norm": 0.44778181358321073, - "learning_rate": 6.460406287887142e-06, - "loss": 0.0402, + "epoch": 3.719360568383659, + "grad_norm": 0.41930525520246403, + "learning_rate": 1.4924105330436034e-07, + "loss": 0.0365, "step": 4188 }, { - "epoch": 1.8605374194981124, - "grad_norm": 0.4653083552128945, - "learning_rate": 6.4585521742141924e-06, - "loss": 0.0429, + "epoch": 3.7202486678507993, + "grad_norm": 0.3660565318001229, + "learning_rate": 1.4830267420849587e-07, + "loss": 0.027, "step": 4189 }, { - "epoch": 1.8609815678436599, - "grad_norm": 0.4092361015012088, - "learning_rate": 6.456697841296072e-06, - "loss": 0.0538, + "epoch": 3.7211367673179394, + "grad_norm": 0.3434992461021154, + "learning_rate": 1.4736721012375055e-07, + "loss": 0.0339, "step": 4190 }, { - "epoch": 1.8614257161892072, - "grad_norm": 0.3943066511337006, - "learning_rate": 6.4548432894115236e-06, - "loss": 0.0422, + "epoch": 3.72202486678508, + "grad_norm": 0.3560428097284687, + "learning_rate": 1.4643466161217145e-07, + "loss": 0.032, "step": 4191 }, { - "epoch": 1.8618698645347544, - "grad_norm": 0.574897072037143, - "learning_rate": 6.452988518839314e-06, - "loss": 0.0638, + "epoch": 3.7229129662522205, + "grad_norm": 0.35961153697449205, + "learning_rate": 1.4550502923405652e-07, + "loss": 0.026, "step": 4192 }, { - "epoch": 1.8623140128803022, - "grad_norm": 0.3525774963677566, - "learning_rate": 6.451133529858249e-06, - "loss": 0.0308, + "epoch": 3.7238010657193605, + "grad_norm": 0.34050757138628135, + "learning_rate": 1.445783135479495e-07, + "loss": 0.031, "step": 4193 }, { - "epoch": 1.8627581612258495, - "grad_norm": 0.47268074934306253, - "learning_rate": 6.449278322747164e-06, - "loss": 0.0443, + "epoch": 3.7246891651865006, + "grad_norm": 0.3436294314699083, + "learning_rate": 1.4365451511064276e-07, + "loss": 0.0285, "step": 4194 }, { - "epoch": 1.8632023095713968, - "grad_norm": 0.49332043887118054, - "learning_rate": 6.447422897784927e-06, - "loss": 0.0486, + "epoch": 3.725577264653641, + "grad_norm": 0.38277294080370794, + "learning_rate": 1.427336344771757e-07, + "loss": 0.0334, "step": 4195 }, { - "epoch": 1.8636464579169443, - "grad_norm": 0.4599615865532288, - "learning_rate": 6.445567255250442e-06, - "loss": 0.0374, + "epoch": 3.7264653641207817, + "grad_norm": 0.31483558549985985, + "learning_rate": 1.4181567220083403e-07, + "loss": 0.0278, "step": 4196 }, { - "epoch": 1.8640906062624918, - "grad_norm": 0.6216566278556651, - "learning_rate": 6.443711395422641e-06, - "loss": 0.0458, + "epoch": 3.7273534635879217, + "grad_norm": 0.36868641216833004, + "learning_rate": 1.4090062883315103e-07, + "loss": 0.0289, "step": 4197 }, { - "epoch": 1.864534754608039, - "grad_norm": 0.45476603291439793, - "learning_rate": 6.4418553185804946e-06, - "loss": 0.0411, + "epoch": 3.7282415630550623, + "grad_norm": 0.3247370386901695, + "learning_rate": 1.3998850492390637e-07, + "loss": 0.0325, "step": 4198 }, { - "epoch": 1.8649789029535864, - "grad_norm": 0.5568340998927974, - "learning_rate": 6.4399990250030005e-06, - "loss": 0.0469, + "epoch": 3.7291296625222023, + "grad_norm": 0.34494670182583104, + "learning_rate": 1.3907930102112334e-07, + "loss": 0.0241, "step": 4199 }, { - "epoch": 1.8654230512991339, - "grad_norm": 0.5792755550634929, - "learning_rate": 6.438142514969192e-06, - "loss": 0.0486, + "epoch": 3.730017761989343, + "grad_norm": 0.33992028442056216, + "learning_rate": 1.3817301767107327e-07, + "loss": 0.0275, "step": 4200 }, { - "epoch": 1.8658671996446814, - "grad_norm": 0.5484197721468562, - "learning_rate": 6.436285788758133e-06, - "loss": 0.0373, + "epoch": 3.730905861456483, + "grad_norm": 0.3610905282697854, + "learning_rate": 1.3726965541827287e-07, + "loss": 0.0263, "step": 4201 }, { - "epoch": 1.8663113479902287, - "grad_norm": 0.408467206743211, - "learning_rate": 6.434428846648923e-06, - "loss": 0.0345, + "epoch": 3.7317939609236235, + "grad_norm": 0.32422628854444807, + "learning_rate": 1.3636921480548239e-07, + "loss": 0.0283, "step": 4202 }, { - "epoch": 1.8667554963357762, - "grad_norm": 0.4273190617477224, - "learning_rate": 6.43257168892069e-06, - "loss": 0.0521, + "epoch": 3.732682060390764, + "grad_norm": 0.35359932676372424, + "learning_rate": 1.3547169637370627e-07, + "loss": 0.0318, "step": 4203 }, { - "epoch": 1.8671996446813237, - "grad_norm": 0.913822616436895, - "learning_rate": 6.430714315852595e-06, - "loss": 0.0731, + "epoch": 3.733570159857904, + "grad_norm": 0.4050953095439511, + "learning_rate": 1.3457710066219542e-07, + "loss": 0.031, "step": 4204 }, { - "epoch": 1.867643793026871, - "grad_norm": 0.3654209870999856, - "learning_rate": 6.428856727723838e-06, - "loss": 0.0426, + "epoch": 3.734458259325044, + "grad_norm": 0.419891533641244, + "learning_rate": 1.336854282084432e-07, + "loss": 0.0427, "step": 4205 }, { - "epoch": 1.8680879413724183, - "grad_norm": 0.35677094964790335, - "learning_rate": 6.426998924813641e-06, - "loss": 0.0354, + "epoch": 3.7353463587921847, + "grad_norm": 0.3003440978143265, + "learning_rate": 1.3279667954818664e-07, + "loss": 0.0236, "step": 4206 }, { - "epoch": 1.8685320897179658, - "grad_norm": 0.7442434568098797, - "learning_rate": 6.425140907401266e-06, - "loss": 0.057, + "epoch": 3.736234458259325, + "grad_norm": 0.37555342930424257, + "learning_rate": 1.3191085521540635e-07, + "loss": 0.0277, "step": 4207 }, { - "epoch": 1.8689762380635133, - "grad_norm": 0.5792617957834036, - "learning_rate": 6.423282675766002e-06, - "loss": 0.0409, + "epoch": 3.7371225577264653, + "grad_norm": 0.361372911496851, + "learning_rate": 1.3102795574232607e-07, + "loss": 0.0285, "step": 4208 }, { - "epoch": 1.8694203864090606, - "grad_norm": 0.7180220670929347, - "learning_rate": 6.4214242301871766e-06, - "loss": 0.055, + "epoch": 3.738010657193606, + "grad_norm": 0.34710037215510453, + "learning_rate": 1.3014798165941144e-07, + "loss": 0.0349, "step": 4209 }, { - "epoch": 1.869864534754608, - "grad_norm": 0.369340852755107, - "learning_rate": 6.4195655709441425e-06, - "loss": 0.0365, + "epoch": 3.738898756660746, + "grad_norm": 0.37898887202273684, + "learning_rate": 1.292709334953729e-07, + "loss": 0.0305, "step": 4210 }, { - "epoch": 1.8703086831001554, - "grad_norm": 0.5907561592268787, - "learning_rate": 6.41770669831629e-06, - "loss": 0.05, + "epoch": 3.7397868561278864, + "grad_norm": 0.413910408702082, + "learning_rate": 1.283968117771589e-07, + "loss": 0.0324, "step": 4211 }, { - "epoch": 1.870752831445703, - "grad_norm": 0.4737879055532281, - "learning_rate": 6.415847612583036e-06, - "loss": 0.0387, + "epoch": 3.7406749555950265, + "grad_norm": 0.29718868611173865, + "learning_rate": 1.275256170299627e-07, + "loss": 0.0268, "step": 4212 }, { - "epoch": 1.8711969797912502, - "grad_norm": 0.45358931580739376, - "learning_rate": 6.413988314023837e-06, - "loss": 0.0455, + "epoch": 3.741563055062167, + "grad_norm": 0.32730887641162354, + "learning_rate": 1.266573497772189e-07, + "loss": 0.0354, "step": 4213 }, { - "epoch": 1.8716411281367977, - "grad_norm": 0.396008499582763, - "learning_rate": 6.412128802918174e-06, - "loss": 0.0428, + "epoch": 3.7424511545293075, + "grad_norm": 0.35097057765889, + "learning_rate": 1.257920105406013e-07, + "loss": 0.0268, "step": 4214 }, { - "epoch": 1.8720852764823452, - "grad_norm": 0.446819083411039, - "learning_rate": 6.410269079545563e-06, - "loss": 0.0353, + "epoch": 3.7433392539964476, + "grad_norm": 0.38122809128474444, + "learning_rate": 1.2492959984002573e-07, + "loss": 0.0335, "step": 4215 }, { - "epoch": 1.8725294248278925, - "grad_norm": 0.5360694926018593, - "learning_rate": 6.408409144185555e-06, - "loss": 0.0394, + "epoch": 3.7442273534635877, + "grad_norm": 0.4330875326163401, + "learning_rate": 1.2407011819364878e-07, + "loss": 0.0287, "step": 4216 }, { - "epoch": 1.8729735731734398, - "grad_norm": 0.419696086205574, - "learning_rate": 6.406548997117728e-06, - "loss": 0.0442, + "epoch": 3.7451154529307282, + "grad_norm": 0.3957769309250494, + "learning_rate": 1.2321356611786627e-07, + "loss": 0.0339, "step": 4217 }, { - "epoch": 1.8734177215189873, - "grad_norm": 0.5541434243753425, - "learning_rate": 6.404688638621691e-06, - "loss": 0.0362, + "epoch": 3.7460035523978688, + "grad_norm": 0.4296555474055646, + "learning_rate": 1.2235994412731543e-07, + "loss": 0.038, "step": 4218 }, { - "epoch": 1.8738618698645348, - "grad_norm": 0.5424748341749609, - "learning_rate": 6.402828068977092e-06, - "loss": 0.0385, + "epoch": 3.746891651865009, + "grad_norm": 0.3301880668188309, + "learning_rate": 1.2150925273487045e-07, + "loss": 0.0271, "step": 4219 }, { - "epoch": 1.8743060182100821, - "grad_norm": 0.5755082545471814, - "learning_rate": 6.400967288463604e-06, - "loss": 0.0408, + "epoch": 3.7477797513321494, + "grad_norm": 0.4316727360217353, + "learning_rate": 1.2066149245164692e-07, + "loss": 0.0334, "step": 4220 }, { - "epoch": 1.8747501665556294, - "grad_norm": 0.5534620278515894, - "learning_rate": 6.399106297360934e-06, - "loss": 0.0394, + "epoch": 3.7486678507992894, + "grad_norm": 0.3544806840083174, + "learning_rate": 1.1981666378699908e-07, + "loss": 0.0338, "step": 4221 }, { - "epoch": 1.875194314901177, - "grad_norm": 0.43339133128599205, - "learning_rate": 6.397245095948822e-06, - "loss": 0.0447, + "epoch": 3.74955595026643, + "grad_norm": 0.3617935118362104, + "learning_rate": 1.189747672485192e-07, + "loss": 0.0355, "step": 4222 }, { - "epoch": 1.8756384632467245, - "grad_norm": 0.4242062003649521, - "learning_rate": 6.395383684507036e-06, - "loss": 0.0367, + "epoch": 3.75044404973357, + "grad_norm": 0.39600797799712956, + "learning_rate": 1.1813580334203711e-07, + "loss": 0.0313, "step": 4223 }, { - "epoch": 1.8760826115922717, - "grad_norm": 0.429738984394203, - "learning_rate": 6.393522063315379e-06, - "loss": 0.0434, + "epoch": 3.7513321492007106, + "grad_norm": 0.37376421002392135, + "learning_rate": 1.172997725716224e-07, + "loss": 0.0353, "step": 4224 }, { - "epoch": 1.8765267599378193, - "grad_norm": 0.5905398248805794, - "learning_rate": 6.391660232653685e-06, - "loss": 0.0387, + "epoch": 3.7522202486678506, + "grad_norm": 0.39413298598820423, + "learning_rate": 1.1646667543958157e-07, + "loss": 0.03, "step": 4225 }, { - "epoch": 1.8769709082833668, - "grad_norm": 0.46069485833459556, - "learning_rate": 6.389798192801816e-06, - "loss": 0.042, + "epoch": 3.753108348134991, + "grad_norm": 0.3137860846716177, + "learning_rate": 1.1563651244645869e-07, + "loss": 0.0255, "step": 4226 }, { - "epoch": 1.877415056628914, - "grad_norm": 0.3636972167781559, - "learning_rate": 6.387935944039672e-06, - "loss": 0.0355, + "epoch": 3.7539964476021312, + "grad_norm": 0.37435698529593536, + "learning_rate": 1.148092840910342e-07, + "loss": 0.0398, "step": 4227 }, { - "epoch": 1.8778592049744613, - "grad_norm": 0.4783149493763749, - "learning_rate": 6.3860734866471775e-06, - "loss": 0.0404, + "epoch": 3.7548845470692718, + "grad_norm": 0.3541195168548642, + "learning_rate": 1.1398499087032666e-07, + "loss": 0.0319, "step": 4228 }, { - "epoch": 1.8783033533200089, - "grad_norm": 0.3543783240099781, - "learning_rate": 6.384210820904292e-06, - "loss": 0.0291, + "epoch": 3.7557726465364123, + "grad_norm": 0.3604044892364321, + "learning_rate": 1.1316363327958879e-07, + "loss": 0.0341, "step": 4229 }, { - "epoch": 1.8787475016655564, - "grad_norm": 0.4823964461174852, - "learning_rate": 6.382347947091008e-06, - "loss": 0.0454, + "epoch": 3.7566607460035524, + "grad_norm": 0.4187936879223956, + "learning_rate": 1.1234521181231306e-07, + "loss": 0.0406, "step": 4230 }, { - "epoch": 1.8791916500111037, - "grad_norm": 0.7938854928835402, - "learning_rate": 6.380484865487346e-06, - "loss": 0.0651, + "epoch": 3.7575488454706925, + "grad_norm": 0.32822177029173305, + "learning_rate": 1.1152972696022447e-07, + "loss": 0.0314, "step": 4231 }, { - "epoch": 1.879635798356651, - "grad_norm": 0.43202860148998995, - "learning_rate": 6.378621576373356e-06, - "loss": 0.0373, + "epoch": 3.758436944937833, + "grad_norm": 0.35407604113573227, + "learning_rate": 1.107171792132855e-07, + "loss": 0.0307, "step": 4232 }, { - "epoch": 1.8800799467021987, - "grad_norm": 0.40297846856972874, - "learning_rate": 6.376758080029126e-06, - "loss": 0.0399, + "epoch": 3.7593250444049735, + "grad_norm": 0.3546460353575967, + "learning_rate": 1.0990756905969402e-07, + "loss": 0.0291, "step": 4233 }, { - "epoch": 1.880524095047746, - "grad_norm": 0.6777187231862498, - "learning_rate": 6.37489437673477e-06, - "loss": 0.0451, + "epoch": 3.7602131438721136, + "grad_norm": 0.4577208780159834, + "learning_rate": 1.0910089698588145e-07, + "loss": 0.0328, "step": 4234 }, { - "epoch": 1.8809682433932933, - "grad_norm": 0.449724408023701, - "learning_rate": 6.3730304667704315e-06, - "loss": 0.0364, + "epoch": 3.761101243339254, + "grad_norm": 0.35986670260273723, + "learning_rate": 1.0829716347651509e-07, + "loss": 0.0274, "step": 4235 }, { - "epoch": 1.8814123917388408, - "grad_norm": 0.508560023808873, - "learning_rate": 6.371166350416293e-06, - "loss": 0.0357, + "epoch": 3.761989342806394, + "grad_norm": 0.29072196982265625, + "learning_rate": 1.0749636901449701e-07, + "loss": 0.0261, "step": 4236 }, { - "epoch": 1.8818565400843883, - "grad_norm": 0.45097496833863476, - "learning_rate": 6.369302027952559e-06, - "loss": 0.0314, + "epoch": 3.7628774422735347, + "grad_norm": 0.37516091498542603, + "learning_rate": 1.0669851408096233e-07, + "loss": 0.0335, "step": 4237 }, { - "epoch": 1.8823006884299356, - "grad_norm": 0.3904261571231129, - "learning_rate": 6.36743749965947e-06, - "loss": 0.0417, + "epoch": 3.763765541740675, + "grad_norm": 0.3523270057886884, + "learning_rate": 1.0590359915528092e-07, + "loss": 0.0331, "step": 4238 }, { - "epoch": 1.8827448367754829, - "grad_norm": 0.3804238828895874, - "learning_rate": 6.365572765817295e-06, - "loss": 0.039, + "epoch": 3.7646536412078153, + "grad_norm": 0.36768387999095853, + "learning_rate": 1.0511162471505631e-07, + "loss": 0.032, "step": 4239 }, { - "epoch": 1.8831889851210304, - "grad_norm": 0.5067555487119486, - "learning_rate": 6.363707826706336e-06, - "loss": 0.0382, + "epoch": 3.765541740674956, + "grad_norm": 0.428925812305749, + "learning_rate": 1.0432259123612399e-07, + "loss": 0.0443, "step": 4240 }, { - "epoch": 1.883633133466578, - "grad_norm": 0.4732566932753566, - "learning_rate": 6.3618426826069265e-06, - "loss": 0.0471, + "epoch": 3.766429840142096, + "grad_norm": 0.31788241076246565, + "learning_rate": 1.035364991925536e-07, + "loss": 0.0297, "step": 4241 }, { - "epoch": 1.8840772818121252, - "grad_norm": 0.4900514966702779, - "learning_rate": 6.359977333799429e-06, - "loss": 0.0441, + "epoch": 3.767317939609236, + "grad_norm": 0.3583843501974432, + "learning_rate": 1.0275334905664847e-07, + "loss": 0.0253, "step": 4242 }, { - "epoch": 1.8845214301576727, - "grad_norm": 0.5196575689420362, - "learning_rate": 6.358111780564233e-06, - "loss": 0.0411, + "epoch": 3.7682060390763765, + "grad_norm": 0.30819284282278403, + "learning_rate": 1.0197314129894109e-07, + "loss": 0.0256, "step": 4243 }, { - "epoch": 1.8849655785032202, - "grad_norm": 0.43773037282352034, - "learning_rate": 6.35624602318177e-06, - "loss": 0.0402, + "epoch": 3.769094138543517, + "grad_norm": 0.35591547028474246, + "learning_rate": 1.0119587638819983e-07, + "loss": 0.0304, "step": 4244 }, { - "epoch": 1.8854097268487675, - "grad_norm": 0.589548376792247, - "learning_rate": 6.354380061932489e-06, - "loss": 0.0381, + "epoch": 3.769982238010657, + "grad_norm": 0.3476573908870843, + "learning_rate": 1.0042155479142335e-07, + "loss": 0.0316, "step": 4245 }, { - "epoch": 1.8858538751943148, - "grad_norm": 0.5624692048798179, - "learning_rate": 6.352513897096878e-06, - "loss": 0.0394, + "epoch": 3.7708703374777977, + "grad_norm": 0.3897359438223438, + "learning_rate": 9.965017697384116e-08, + "loss": 0.0279, "step": 4246 }, { - "epoch": 1.8862980235398623, - "grad_norm": 0.48689251252375637, - "learning_rate": 6.3506475289554534e-06, - "loss": 0.0524, + "epoch": 3.7717584369449377, + "grad_norm": 0.3523787578693443, + "learning_rate": 9.888174339891421e-08, + "loss": 0.0312, "step": 4247 }, { - "epoch": 1.8867421718854098, - "grad_norm": 0.8189635814115437, - "learning_rate": 6.3487809577887625e-06, - "loss": 0.0587, + "epoch": 3.7726465364120783, + "grad_norm": 0.5531277406393773, + "learning_rate": 9.811625452833651e-08, + "loss": 0.0409, "step": 4248 }, { - "epoch": 1.8871863202309571, - "grad_norm": 0.43746947984188156, - "learning_rate": 6.346914183877379e-06, - "loss": 0.0316, + "epoch": 3.7735346358792183, + "grad_norm": 0.381447451834388, + "learning_rate": 9.73537108220296e-08, + "loss": 0.0369, "step": 4249 }, { - "epoch": 1.8876304685765044, - "grad_norm": 0.5538154749300077, - "learning_rate": 6.345047207501916e-06, - "loss": 0.0471, + "epoch": 3.774422735346359, + "grad_norm": 0.43838551605035114, + "learning_rate": 9.659411273814922e-08, + "loss": 0.0241, "step": 4250 }, { - "epoch": 1.888074616922052, - "grad_norm": 0.4360679802124053, - "learning_rate": 6.34318002894301e-06, - "loss": 0.0371, + "epoch": 3.7753108348134994, + "grad_norm": 0.3433131363492874, + "learning_rate": 9.583746073307809e-08, + "loss": 0.029, "step": 4251 }, { - "epoch": 1.8885187652675994, - "grad_norm": 0.5075276918535925, - "learning_rate": 6.341312648481328e-06, - "loss": 0.0417, + "epoch": 3.7761989342806395, + "grad_norm": 0.4028789491262155, + "learning_rate": 9.508375526142976e-08, + "loss": 0.0254, "step": 4252 }, { - "epoch": 1.8889629136131467, - "grad_norm": 0.49156392064774324, - "learning_rate": 6.339445066397569e-06, - "loss": 0.0423, + "epoch": 3.7770870337477795, + "grad_norm": 0.4384693436592215, + "learning_rate": 9.433299677604868e-08, + "loss": 0.029, "step": 4253 }, { - "epoch": 1.8894070619586942, - "grad_norm": 0.852671422019365, - "learning_rate": 6.337577282972465e-06, - "loss": 0.0524, + "epoch": 3.77797513321492, + "grad_norm": 0.35262305060561455, + "learning_rate": 9.35851857280079e-08, + "loss": 0.0368, "step": 4254 }, { - "epoch": 1.8898512103042417, - "grad_norm": 0.37799180602912125, - "learning_rate": 6.335709298486773e-06, - "loss": 0.0303, + "epoch": 3.7788632326820606, + "grad_norm": 0.39423915341441945, + "learning_rate": 9.284032256660857e-08, + "loss": 0.0327, "step": 4255 }, { - "epoch": 1.890295358649789, - "grad_norm": 0.4549492374420902, - "learning_rate": 6.333841113221283e-06, - "loss": 0.0376, + "epoch": 3.7797513321492007, + "grad_norm": 0.3333976254416016, + "learning_rate": 9.209840773938161e-08, + "loss": 0.0304, "step": 4256 }, { - "epoch": 1.8907395069953363, - "grad_norm": 0.4682663299692366, - "learning_rate": 6.331972727456816e-06, - "loss": 0.0264, + "epoch": 3.780639431616341, + "grad_norm": 0.3813482214944881, + "learning_rate": 9.135944169208766e-08, + "loss": 0.0363, "step": 4257 }, { - "epoch": 1.8911836553408838, - "grad_norm": 0.3907535099519968, - "learning_rate": 6.330104141474223e-06, - "loss": 0.0382, + "epoch": 3.7815275310834813, + "grad_norm": 0.33686074845626796, + "learning_rate": 9.062342486871267e-08, + "loss": 0.0275, "step": 4258 }, { - "epoch": 1.8916278036864314, - "grad_norm": 0.43935294276918296, - "learning_rate": 6.328235355554382e-06, - "loss": 0.0362, + "epoch": 3.782415630550622, + "grad_norm": 0.32966389403848834, + "learning_rate": 8.989035771147458e-08, + "loss": 0.0271, "step": 4259 }, { - "epoch": 1.8920719520319786, - "grad_norm": 0.576541252998893, - "learning_rate": 6.326366369978204e-06, - "loss": 0.0481, + "epoch": 3.783303730017762, + "grad_norm": 0.37048752323192036, + "learning_rate": 8.916024066081552e-08, + "loss": 0.0385, "step": 4260 }, { - "epoch": 1.892516100377526, - "grad_norm": 0.43778261815885045, - "learning_rate": 6.324497185026631e-06, - "loss": 0.034, + "epoch": 3.7841918294849024, + "grad_norm": 0.3761651368578672, + "learning_rate": 8.84330741554068e-08, + "loss": 0.0312, "step": 4261 }, { - "epoch": 1.8929602487230737, - "grad_norm": 0.44466828672780784, - "learning_rate": 6.3226278009806315e-06, - "loss": 0.0307, + "epoch": 3.7850799289520425, + "grad_norm": 0.2769064763216777, + "learning_rate": 8.770885863214729e-08, + "loss": 0.0258, "step": 4262 }, { - "epoch": 1.893404397068621, - "grad_norm": 0.41339084625105765, - "learning_rate": 6.320758218121205e-06, - "loss": 0.0443, + "epoch": 3.785968028419183, + "grad_norm": 0.3477019498066257, + "learning_rate": 8.698759452616112e-08, + "loss": 0.0313, "step": 4263 }, { - "epoch": 1.8938485454141682, - "grad_norm": 0.4481199734504317, - "learning_rate": 6.318888436729382e-06, - "loss": 0.0354, + "epoch": 3.786856127886323, + "grad_norm": 0.35835074590053034, + "learning_rate": 8.626928227080167e-08, + "loss": 0.0311, "step": 4264 }, { - "epoch": 1.8942926937597158, - "grad_norm": 0.6150629213528874, - "learning_rate": 6.317018457086226e-06, - "loss": 0.046, + "epoch": 3.7877442273534636, + "grad_norm": 0.37515477929002006, + "learning_rate": 8.555392229764592e-08, + "loss": 0.0376, "step": 4265 }, { - "epoch": 1.8947368421052633, - "grad_norm": 0.8066271182355852, - "learning_rate": 6.31514827947282e-06, - "loss": 0.0425, + "epoch": 3.788632326820604, + "grad_norm": 0.36780112941468734, + "learning_rate": 8.484151503650062e-08, + "loss": 0.0275, "step": 4266 }, { - "epoch": 1.8951809904508106, - "grad_norm": 0.5827323877974867, - "learning_rate": 6.31327790417029e-06, - "loss": 0.0457, + "epoch": 3.789520426287744, + "grad_norm": 0.30232429359883534, + "learning_rate": 8.413206091539505e-08, + "loss": 0.0236, "step": 4267 }, { - "epoch": 1.8956251387963579, - "grad_norm": 0.4984272032963798, - "learning_rate": 6.311407331459781e-06, - "loss": 0.03, + "epoch": 3.7904085257548843, + "grad_norm": 0.3621536801089417, + "learning_rate": 8.342556036058492e-08, + "loss": 0.0364, "step": 4268 }, { - "epoch": 1.8960692871419054, - "grad_norm": 0.5018245857439331, - "learning_rate": 6.309536561622474e-06, - "loss": 0.0399, + "epoch": 3.791296625222025, + "grad_norm": 0.33754912437109746, + "learning_rate": 8.272201379655398e-08, + "loss": 0.0334, "step": 4269 }, { - "epoch": 1.8965134354874529, - "grad_norm": 0.46552046167173733, - "learning_rate": 6.307665594939575e-06, - "loss": 0.0379, + "epoch": 3.7921847246891653, + "grad_norm": 0.378479374416979, + "learning_rate": 8.20214216460069e-08, + "loss": 0.0301, "step": 4270 }, { - "epoch": 1.8969575838330002, - "grad_norm": 0.48037243813958513, - "learning_rate": 6.3057944316923246e-06, - "loss": 0.0293, + "epoch": 3.7930728241563054, + "grad_norm": 0.39100241252671175, + "learning_rate": 8.132378432987753e-08, + "loss": 0.0337, "step": 4271 }, { - "epoch": 1.8974017321785477, - "grad_norm": 0.4527119332615495, - "learning_rate": 6.30392307216199e-06, - "loss": 0.0386, + "epoch": 3.793960923623446, + "grad_norm": 0.39194682078387527, + "learning_rate": 8.062910226732112e-08, + "loss": 0.0383, "step": 4272 }, { - "epoch": 1.8978458805240952, - "grad_norm": 0.419684782365093, - "learning_rate": 6.3020515166298665e-06, - "loss": 0.0317, + "epoch": 3.794849023090586, + "grad_norm": 0.34143733677569477, + "learning_rate": 7.993737587571825e-08, + "loss": 0.0303, "step": 4273 }, { - "epoch": 1.8982900288696425, - "grad_norm": 0.418987789446061, - "learning_rate": 6.300179765377283e-06, - "loss": 0.0361, + "epoch": 3.7957371225577266, + "grad_norm": 0.36064013407949275, + "learning_rate": 7.924860557067593e-08, + "loss": 0.0313, "step": 4274 }, { - "epoch": 1.8987341772151898, - "grad_norm": 0.58612758503126, - "learning_rate": 6.298307818685595e-06, - "loss": 0.0416, + "epoch": 3.7966252220248666, + "grad_norm": 0.4843753019752317, + "learning_rate": 7.856279176602144e-08, + "loss": 0.0365, "step": 4275 }, { - "epoch": 1.8991783255607373, - "grad_norm": 0.501397444674289, - "learning_rate": 6.296435676836188e-06, - "loss": 0.0443, + "epoch": 3.797513321492007, + "grad_norm": 0.3560252848603473, + "learning_rate": 7.78799348738074e-08, + "loss": 0.0295, "step": 4276 }, { - "epoch": 1.8996224739062848, - "grad_norm": 0.4267458008276168, - "learning_rate": 6.294563340110474e-06, - "loss": 0.0465, + "epoch": 3.7984014209591477, + "grad_norm": 0.38564615796744883, + "learning_rate": 7.72000353043112e-08, + "loss": 0.0326, "step": 4277 }, { - "epoch": 1.900066622251832, - "grad_norm": 0.5455194479999508, - "learning_rate": 6.292690808789901e-06, - "loss": 0.055, + "epoch": 3.7992895204262878, + "grad_norm": 0.303037033637724, + "learning_rate": 7.652309346603048e-08, + "loss": 0.0232, "step": 4278 }, { - "epoch": 1.9005107705973794, - "grad_norm": 0.678438654630706, - "learning_rate": 6.290818083155941e-06, - "loss": 0.0559, + "epoch": 3.800177619893428, + "grad_norm": 0.332761121905067, + "learning_rate": 7.584910976568937e-08, + "loss": 0.0221, "step": 4279 }, { - "epoch": 1.900954918942927, - "grad_norm": 0.5367006488443361, - "learning_rate": 6.288945163490093e-06, - "loss": 0.045, + "epoch": 3.8010657193605684, + "grad_norm": 0.34664824674177536, + "learning_rate": 7.517808460823117e-08, + "loss": 0.0267, "step": 4280 }, { - "epoch": 1.9013990672884744, - "grad_norm": 0.41451518482226474, - "learning_rate": 6.287072050073894e-06, - "loss": 0.0384, + "epoch": 3.801953818827709, + "grad_norm": 0.3414872510743276, + "learning_rate": 7.451001839682336e-08, + "loss": 0.0293, "step": 4281 }, { - "epoch": 1.9018432156340217, - "grad_norm": 0.594043909692931, - "learning_rate": 6.2851987431889025e-06, - "loss": 0.0414, + "epoch": 3.802841918294849, + "grad_norm": 0.31486452720736763, + "learning_rate": 7.3844911532856e-08, + "loss": 0.0267, "step": 4282 }, { - "epoch": 1.9022873639795692, - "grad_norm": 0.4491119239025248, - "learning_rate": 6.2833252431167066e-06, - "loss": 0.0393, + "epoch": 3.8037300177619895, + "grad_norm": 0.35659179633742877, + "learning_rate": 7.318276441594108e-08, + "loss": 0.0271, "step": 4283 }, { - "epoch": 1.9027315123251167, - "grad_norm": 0.39577109884937645, - "learning_rate": 6.2814515501389275e-06, - "loss": 0.035, + "epoch": 3.8046181172291296, + "grad_norm": 0.34424266531677655, + "learning_rate": 7.252357744391036e-08, + "loss": 0.0245, "step": 4284 }, { - "epoch": 1.903175660670664, - "grad_norm": 0.3708469471452283, - "learning_rate": 6.279577664537213e-06, - "loss": 0.026, + "epoch": 3.80550621669627, + "grad_norm": 0.42009285441865224, + "learning_rate": 7.186735101281928e-08, + "loss": 0.0275, "step": 4285 }, { - "epoch": 1.9036198090162113, - "grad_norm": 0.4178266369288517, - "learning_rate": 6.2777035865932375e-06, + "epoch": 3.80639431616341, + "grad_norm": 0.3910495428886012, + "learning_rate": 7.121408551694408e-08, "loss": 0.0334, "step": 4286 }, { - "epoch": 1.9040639573617588, - "grad_norm": 0.5196821555520535, - "learning_rate": 6.275829316588711e-06, - "loss": 0.0454, + "epoch": 3.8072824156305507, + "grad_norm": 0.3887130690681448, + "learning_rate": 7.056378134878139e-08, + "loss": 0.0263, "step": 4287 }, { - "epoch": 1.9045081057073063, - "grad_norm": 0.4709231790248879, - "learning_rate": 6.273954854805364e-06, - "loss": 0.0395, + "epoch": 3.808170515097691, + "grad_norm": 0.41585764515781903, + "learning_rate": 6.991643889904865e-08, + "loss": 0.0351, "step": 4288 }, { - "epoch": 1.9049522540528536, - "grad_norm": 0.47040632233388197, - "learning_rate": 6.2720802015249615e-06, - "loss": 0.0459, + "epoch": 3.8090586145648313, + "grad_norm": 0.36689636763640526, + "learning_rate": 6.927205855668417e-08, + "loss": 0.0355, "step": 4289 }, { - "epoch": 1.905396402398401, - "grad_norm": 0.5132518236758158, - "learning_rate": 6.2702053570292976e-06, - "loss": 0.0453, + "epoch": 3.8099467140319714, + "grad_norm": 0.34791282762484455, + "learning_rate": 6.863064070884607e-08, + "loss": 0.0373, "step": 4290 }, { - "epoch": 1.9058405507439484, - "grad_norm": 0.4975445699296196, - "learning_rate": 6.26833032160019e-06, - "loss": 0.0384, + "epoch": 3.810834813499112, + "grad_norm": 0.3631896445310664, + "learning_rate": 6.799218574091326e-08, + "loss": 0.0307, "step": 4291 }, { - "epoch": 1.906284699089496, - "grad_norm": 0.42703967829123596, - "learning_rate": 6.26645509551949e-06, - "loss": 0.036, + "epoch": 3.8117229129662524, + "grad_norm": 0.46817744999701366, + "learning_rate": 6.735669403648503e-08, + "loss": 0.0439, "step": 4292 }, { - "epoch": 1.9067288474350432, - "grad_norm": 0.4865047041410074, - "learning_rate": 6.264579679069077e-06, - "loss": 0.0371, + "epoch": 3.8126110124333925, + "grad_norm": 0.38259950260942327, + "learning_rate": 6.672416597737763e-08, + "loss": 0.0283, "step": 4293 }, { - "epoch": 1.9071729957805907, - "grad_norm": 0.5254303510911112, - "learning_rate": 6.262704072530856e-06, - "loss": 0.0501, + "epoch": 3.8134991119005326, + "grad_norm": 0.40337343124177544, + "learning_rate": 6.609460194362927e-08, + "loss": 0.0336, "step": 4294 }, { - "epoch": 1.9076171441261383, - "grad_norm": 0.36509386500252816, - "learning_rate": 6.260828276186762e-06, - "loss": 0.0366, + "epoch": 3.814387211367673, + "grad_norm": 0.38495762258543054, + "learning_rate": 6.546800231349793e-08, + "loss": 0.033, "step": 4295 }, { - "epoch": 1.9080612924716855, - "grad_norm": 0.4317077297037696, - "learning_rate": 6.258952290318763e-06, - "loss": 0.0367, + "epoch": 3.8152753108348136, + "grad_norm": 0.3290737448978435, + "learning_rate": 6.484436746345634e-08, + "loss": 0.0282, "step": 4296 }, { - "epoch": 1.9085054408172328, - "grad_norm": 0.4444631290624624, - "learning_rate": 6.257076115208847e-06, - "loss": 0.0366, + "epoch": 3.8161634103019537, + "grad_norm": 0.3858915709196421, + "learning_rate": 6.422369776820036e-08, + "loss": 0.0346, "step": 4297 }, { - "epoch": 1.9089495891627803, - "grad_norm": 0.3303398456720475, - "learning_rate": 6.255199751139036e-06, - "loss": 0.03, + "epoch": 3.8170515097690942, + "grad_norm": 0.3420728360249852, + "learning_rate": 6.360599360064224e-08, + "loss": 0.0318, "step": 4298 }, { - "epoch": 1.9093937375083279, - "grad_norm": 0.33258553726398404, - "learning_rate": 6.253323198391383e-06, - "loss": 0.0345, + "epoch": 3.8179396092362343, + "grad_norm": 0.41664185777198154, + "learning_rate": 6.299125533191286e-08, + "loss": 0.0281, "step": 4299 }, { - "epoch": 1.9098378858538752, - "grad_norm": 0.6445178538911288, - "learning_rate": 6.251446457247961e-06, - "loss": 0.0444, + "epoch": 3.818827708703375, + "grad_norm": 0.3962704915837752, + "learning_rate": 6.237948333136068e-08, + "loss": 0.0282, "step": 4300 }, { - "epoch": 1.9102820341994224, - "grad_norm": 0.503486774604937, - "learning_rate": 6.249569527990878e-06, - "loss": 0.0508, + "epoch": 3.819715808170515, + "grad_norm": 0.4038464560650923, + "learning_rate": 6.177067796655334e-08, + "loss": 0.0335, "step": 4301 }, { - "epoch": 1.9107261825449702, - "grad_norm": 0.532598812939261, - "learning_rate": 6.247692410902271e-06, - "loss": 0.0366, + "epoch": 3.8206039076376554, + "grad_norm": 0.3730830701692366, + "learning_rate": 6.11648396032738e-08, + "loss": 0.0315, "step": 4302 }, { - "epoch": 1.9111703308905175, - "grad_norm": 0.5689719802618134, - "learning_rate": 6.245815106264297e-06, - "loss": 0.0508, + "epoch": 3.821492007104796, + "grad_norm": 0.42568534133699615, + "learning_rate": 6.056196860552421e-08, + "loss": 0.0407, "step": 4303 }, { - "epoch": 1.9116144792360648, - "grad_norm": 0.44795775453400327, - "learning_rate": 6.243937614359152e-06, - "loss": 0.0425, + "epoch": 3.822380106571936, + "grad_norm": 0.3748187361883993, + "learning_rate": 5.996206533552373e-08, + "loss": 0.0289, "step": 4304 }, { - "epoch": 1.9120586275816123, - "grad_norm": 0.63806779442234, - "learning_rate": 6.242059935469051e-06, - "loss": 0.0409, + "epoch": 3.823268206039076, + "grad_norm": 0.3366591717911857, + "learning_rate": 5.9365130153707374e-08, + "loss": 0.0316, "step": 4305 }, { - "epoch": 1.9125027759271598, - "grad_norm": 0.41950353971072635, - "learning_rate": 6.240182069876244e-06, - "loss": 0.0344, + "epoch": 3.8241563055062167, + "grad_norm": 0.3237067178908839, + "learning_rate": 5.8771163418727686e-08, + "loss": 0.0262, "step": 4306 }, { - "epoch": 1.912946924272707, - "grad_norm": 0.599261124567421, - "learning_rate": 6.238304017863005e-06, - "loss": 0.0415, + "epoch": 3.825044404973357, + "grad_norm": 0.3226604068159033, + "learning_rate": 5.8180165487454754e-08, + "loss": 0.0264, "step": 4307 }, { - "epoch": 1.9133910726182544, - "grad_norm": 0.364585724464603, - "learning_rate": 6.236425779711637e-06, - "loss": 0.0346, + "epoch": 3.8259325044404973, + "grad_norm": 0.47668452083716517, + "learning_rate": 5.7592136714971214e-08, + "loss": 0.0316, "step": 4308 }, { - "epoch": 1.9138352209638019, - "grad_norm": 0.6254535972915019, - "learning_rate": 6.23454735570447e-06, - "loss": 0.0509, + "epoch": 3.826820603907638, + "grad_norm": 0.35405850227990326, + "learning_rate": 5.700707745458001e-08, + "loss": 0.0361, "step": 4309 }, { - "epoch": 1.9142793693093494, - "grad_norm": 0.41487059504531737, - "learning_rate": 6.232668746123865e-06, - "loss": 0.0331, + "epoch": 3.827708703374778, + "grad_norm": 0.3483911527846554, + "learning_rate": 5.6424988057797723e-08, + "loss": 0.0282, "step": 4310 }, { - "epoch": 1.9147235176548967, - "grad_norm": 0.5104472659733067, - "learning_rate": 6.230789951252208e-06, - "loss": 0.0349, + "epoch": 3.8285968028419184, + "grad_norm": 0.4185263789996629, + "learning_rate": 5.584586887435739e-08, + "loss": 0.0383, "step": 4311 }, { - "epoch": 1.9151676660004442, - "grad_norm": 0.40794899664296186, - "learning_rate": 6.228910971371913e-06, - "loss": 0.0408, + "epoch": 3.8294849023090585, + "grad_norm": 0.319413368147179, + "learning_rate": 5.5269720252206225e-08, + "loss": 0.0258, "step": 4312 }, { - "epoch": 1.9156118143459917, - "grad_norm": 0.5437919242408334, - "learning_rate": 6.227031806765424e-06, - "loss": 0.0387, + "epoch": 3.830373001776199, + "grad_norm": 0.3460427540781141, + "learning_rate": 5.4696542537509e-08, + "loss": 0.0284, "step": 4313 }, { - "epoch": 1.916055962691539, - "grad_norm": 0.4898004885001543, - "learning_rate": 6.225152457715211e-06, - "loss": 0.0501, + "epoch": 3.8312611012433395, + "grad_norm": 0.28556441758858203, + "learning_rate": 5.4126336074641905e-08, + "loss": 0.0247, "step": 4314 }, { - "epoch": 1.9165001110370863, - "grad_norm": 0.32765369475691036, - "learning_rate": 6.223272924503773e-06, - "loss": 0.0313, + "epoch": 3.8321492007104796, + "grad_norm": 0.3348374123329273, + "learning_rate": 5.3559101206200337e-08, + "loss": 0.0297, "step": 4315 }, { - "epoch": 1.9169442593826338, - "grad_norm": 1.1036222737179877, - "learning_rate": 6.221393207413634e-06, - "loss": 0.0555, + "epoch": 3.8330373001776197, + "grad_norm": 0.3766724819392865, + "learning_rate": 5.2994838272990567e-08, + "loss": 0.0339, "step": 4316 }, { - "epoch": 1.9173884077281813, - "grad_norm": 0.5313402238818737, - "learning_rate": 6.219513306727347e-06, - "loss": 0.0435, + "epoch": 3.83392539964476, + "grad_norm": 0.3614763475685039, + "learning_rate": 5.2433547614035296e-08, + "loss": 0.0292, "step": 4317 }, { - "epoch": 1.9178325560737286, - "grad_norm": 0.44083561782231834, - "learning_rate": 6.217633222727495e-06, - "loss": 0.0375, + "epoch": 3.8348134991119007, + "grad_norm": 0.3476340792098871, + "learning_rate": 5.1875229566570316e-08, + "loss": 0.0285, "step": 4318 }, { - "epoch": 1.918276704419276, - "grad_norm": 0.9040287143232594, - "learning_rate": 6.215752955696686e-06, - "loss": 0.0741, + "epoch": 3.835701598579041, + "grad_norm": 0.35206208433206104, + "learning_rate": 5.131988446604674e-08, + "loss": 0.0288, "step": 4319 }, { - "epoch": 1.9187208527648234, - "grad_norm": 0.406508099143207, - "learning_rate": 6.213872505917554e-06, - "loss": 0.0309, + "epoch": 3.8365896980461813, + "grad_norm": 0.35235509559553985, + "learning_rate": 5.076751264612767e-08, + "loss": 0.0336, "step": 4320 }, { - "epoch": 1.919165001110371, - "grad_norm": 0.4001138707894935, - "learning_rate": 6.2119918736727666e-06, - "loss": 0.0292, + "epoch": 3.8374777975133214, + "grad_norm": 0.3387059741626194, + "learning_rate": 5.0218114438692065e-08, + "loss": 0.0272, "step": 4321 }, { - "epoch": 1.9196091494559182, - "grad_norm": 0.37528739888488877, - "learning_rate": 6.210111059245011e-06, - "loss": 0.0323, + "epoch": 3.838365896980462, + "grad_norm": 0.2940058924932474, + "learning_rate": 4.967169017382978e-08, + "loss": 0.0233, "step": 4322 }, { - "epoch": 1.9200532978014657, - "grad_norm": 0.4611567927734023, - "learning_rate": 6.2082300629170065e-06, - "loss": 0.0369, + "epoch": 3.839253996447602, + "grad_norm": 0.5090182431358794, + "learning_rate": 4.912824017984541e-08, + "loss": 0.0327, "step": 4323 }, { - "epoch": 1.9204974461470132, - "grad_norm": 0.4615724924310186, - "learning_rate": 6.2063488849715e-06, - "loss": 0.0468, + "epoch": 3.8401420959147425, + "grad_norm": 0.4579340325310792, + "learning_rate": 4.858776478325666e-08, + "loss": 0.0382, "step": 4324 }, { - "epoch": 1.9209415944925605, - "grad_norm": 0.36847012924343564, - "learning_rate": 6.204467525691265e-06, - "loss": 0.0403, + "epoch": 3.8410301953818826, + "grad_norm": 0.38268228889926165, + "learning_rate": 4.805026430879267e-08, + "loss": 0.0291, "step": 4325 }, { - "epoch": 1.9213857428381078, - "grad_norm": 0.7462560129733484, - "learning_rate": 6.202585985359099e-06, - "loss": 0.0463, + "epoch": 3.841918294849023, + "grad_norm": 0.343892329770722, + "learning_rate": 4.751573907939677e-08, + "loss": 0.0311, "step": 4326 }, { - "epoch": 1.9218298911836553, - "grad_norm": 0.4111595385170657, - "learning_rate": 6.200704264257832e-06, - "loss": 0.0402, + "epoch": 3.842806394316163, + "grad_norm": 0.40706138301457534, + "learning_rate": 4.6984189416223735e-08, + "loss": 0.0314, "step": 4327 }, { - "epoch": 1.9222740395292028, - "grad_norm": 0.5886660338070746, - "learning_rate": 6.198822362670316e-06, - "loss": 0.0409, + "epoch": 3.8436944937833037, + "grad_norm": 0.3111134558120298, + "learning_rate": 4.6455615638640875e-08, + "loss": 0.023, "step": 4328 }, { - "epoch": 1.9227181878747501, - "grad_norm": 0.551351371253542, - "learning_rate": 6.196940280879436e-06, - "loss": 0.0371, + "epoch": 3.8445825932504443, + "grad_norm": 0.3477045472139336, + "learning_rate": 4.593001806422748e-08, + "loss": 0.0244, "step": 4329 }, { - "epoch": 1.9231623362202974, - "grad_norm": 0.5211858751187458, - "learning_rate": 6.1950580191681e-06, - "loss": 0.043, + "epoch": 3.8454706927175843, + "grad_norm": 0.33859044332690635, + "learning_rate": 4.540739700877483e-08, + "loss": 0.0328, "step": 4330 }, { - "epoch": 1.9236064845658452, - "grad_norm": 0.4258998039644192, - "learning_rate": 6.193175577819242e-06, - "loss": 0.0353, + "epoch": 3.8463587921847244, + "grad_norm": 0.3691228667767026, + "learning_rate": 4.48877527862851e-08, + "loss": 0.0325, "step": 4331 }, { - "epoch": 1.9240506329113924, - "grad_norm": 0.4043175914509727, - "learning_rate": 6.191292957115825e-06, - "loss": 0.0285, + "epoch": 3.847246891651865, + "grad_norm": 0.3588167745313349, + "learning_rate": 4.4371085708972974e-08, + "loss": 0.0267, "step": 4332 }, { - "epoch": 1.9244947812569397, - "grad_norm": 0.4563276066272647, - "learning_rate": 6.1894101573408425e-06, - "loss": 0.0371, + "epoch": 3.8481349911190055, + "grad_norm": 0.32927581237235615, + "learning_rate": 4.385739608726347e-08, + "loss": 0.0319, "step": 4333 }, { - "epoch": 1.9249389296024872, - "grad_norm": 0.54090150421601, - "learning_rate": 6.1875271787773075e-06, - "loss": 0.0519, + "epoch": 3.8490230905861456, + "grad_norm": 0.29159483128634467, + "learning_rate": 4.334668422979305e-08, + "loss": 0.0299, "step": 4334 }, { - "epoch": 1.9253830779480348, - "grad_norm": 0.3562700559697815, - "learning_rate": 6.185644021708266e-06, - "loss": 0.0267, + "epoch": 3.849911190053286, + "grad_norm": 0.3312135156060222, + "learning_rate": 4.283895044340958e-08, + "loss": 0.0245, "step": 4335 }, { - "epoch": 1.925827226293582, - "grad_norm": 1.2138310918842605, - "learning_rate": 6.183760686416785e-06, - "loss": 0.0483, + "epoch": 3.850799289520426, + "grad_norm": 0.2868713577117152, + "learning_rate": 4.233419503317182e-08, + "loss": 0.0286, "step": 4336 }, { - "epoch": 1.9262713746391293, - "grad_norm": 0.4024294145624342, - "learning_rate": 6.181877173185966e-06, - "loss": 0.0311, + "epoch": 3.8516873889875667, + "grad_norm": 0.3475136235984561, + "learning_rate": 4.183241830234663e-08, + "loss": 0.0322, "step": 4337 }, { - "epoch": 1.9267155229846769, - "grad_norm": 0.6790926459631366, - "learning_rate": 6.1799934822989315e-06, - "loss": 0.0494, + "epoch": 3.8525754884547068, + "grad_norm": 0.3937627490986881, + "learning_rate": 4.1333620552413967e-08, + "loss": 0.0271, "step": 4338 }, { - "epoch": 1.9271596713302244, - "grad_norm": 0.48577305067511145, - "learning_rate": 6.178109614038832e-06, - "loss": 0.0412, + "epoch": 3.8534635879218473, + "grad_norm": 0.40467753769294995, + "learning_rate": 4.0837802083062984e-08, + "loss": 0.0334, "step": 4339 }, { - "epoch": 1.9276038196757717, - "grad_norm": 0.4461768395787564, - "learning_rate": 6.176225568688844e-06, - "loss": 0.0337, + "epoch": 3.854351687388988, + "grad_norm": 0.34675837935471, + "learning_rate": 4.0344963192193165e-08, + "loss": 0.0286, "step": 4340 }, { - "epoch": 1.9280479680213192, - "grad_norm": 0.38001980047585654, - "learning_rate": 6.174341346532173e-06, - "loss": 0.0404, + "epoch": 3.855239786856128, + "grad_norm": 0.32529428338468314, + "learning_rate": 3.985510417591265e-08, + "loss": 0.0321, "step": 4341 }, { - "epoch": 1.9284921163668667, - "grad_norm": 0.4252479347353827, - "learning_rate": 6.1724569478520495e-06, - "loss": 0.0452, + "epoch": 3.856127886323268, + "grad_norm": 0.3724586734964784, + "learning_rate": 3.936822532854046e-08, + "loss": 0.0317, "step": 4342 }, { - "epoch": 1.928936264712414, - "grad_norm": 0.3799614809822163, - "learning_rate": 6.1705723729317295e-06, - "loss": 0.0401, + "epoch": 3.8570159857904085, + "grad_norm": 0.34911932378498656, + "learning_rate": 3.888432694260425e-08, + "loss": 0.0312, "step": 4343 }, { - "epoch": 1.9293804130579613, - "grad_norm": 0.8795175854169174, - "learning_rate": 6.168687622054497e-06, - "loss": 0.0419, + "epoch": 3.857904085257549, + "grad_norm": 0.36508793241837756, + "learning_rate": 3.840340930884145e-08, + "loss": 0.0372, "step": 4344 }, { - "epoch": 1.9298245614035088, - "grad_norm": 0.35580314936274277, - "learning_rate": 6.1668026955036645e-06, - "loss": 0.0275, + "epoch": 3.858792184724689, + "grad_norm": 0.42477103778186287, + "learning_rate": 3.792547271619929e-08, + "loss": 0.0405, "step": 4345 }, { - "epoch": 1.9302687097490563, - "grad_norm": 0.5402722431604882, - "learning_rate": 6.1649175935625635e-06, - "loss": 0.0459, + "epoch": 3.8596802841918296, + "grad_norm": 0.3502306374820717, + "learning_rate": 3.745051745183137e-08, + "loss": 0.0293, "step": 4346 }, { - "epoch": 1.9307128580946036, - "grad_norm": 0.6557570062151068, - "learning_rate": 6.1630323165145615e-06, - "loss": 0.0463, + "epoch": 3.8605683836589697, + "grad_norm": 0.3576034002722447, + "learning_rate": 3.697854380110277e-08, + "loss": 0.0308, "step": 4347 }, { - "epoch": 1.9311570064401509, - "grad_norm": 0.45047259263477546, - "learning_rate": 6.161146864643045e-06, - "loss": 0.0408, + "epoch": 3.8614564831261102, + "grad_norm": 0.3663533569527988, + "learning_rate": 3.65095520475861e-08, + "loss": 0.0292, "step": 4348 }, { - "epoch": 1.9316011547856984, - "grad_norm": 0.4701310559005135, - "learning_rate": 6.159261238231431e-06, - "loss": 0.0379, + "epoch": 3.8623445825932503, + "grad_norm": 0.37016066414991033, + "learning_rate": 3.604354247306152e-08, + "loss": 0.0368, "step": 4349 }, { - "epoch": 1.932045303131246, - "grad_norm": 0.4022795195574756, - "learning_rate": 6.15737543756316e-06, - "loss": 0.0372, + "epoch": 3.863232682060391, + "grad_norm": 0.3462057549727546, + "learning_rate": 3.558051535751894e-08, + "loss": 0.0294, "step": 4350 }, { - "epoch": 1.9324894514767932, - "grad_norm": 0.4838913493812182, - "learning_rate": 6.1554894629217e-06, - "loss": 0.0512, + "epoch": 3.8641207815275314, + "grad_norm": 0.3872169706759806, + "learning_rate": 3.5120470979156386e-08, + "loss": 0.0418, "step": 4351 }, { - "epoch": 1.9329335998223407, - "grad_norm": 1.2278585350906228, - "learning_rate": 6.153603314590547e-06, - "loss": 0.0864, + "epoch": 3.8650088809946714, + "grad_norm": 0.33307226944359014, + "learning_rate": 3.466340961437775e-08, + "loss": 0.0282, "step": 4352 }, { - "epoch": 1.9333777481678882, - "grad_norm": 0.354715018531944, - "learning_rate": 6.1517169928532185e-06, - "loss": 0.0324, + "epoch": 3.8658969804618115, + "grad_norm": 0.3531028159774626, + "learning_rate": 3.420933153779671e-08, + "loss": 0.0304, "step": 4353 }, { - "epoch": 1.9338218965134355, - "grad_norm": 0.3643440217338777, - "learning_rate": 6.149830497993261e-06, - "loss": 0.0346, + "epoch": 3.866785079928952, + "grad_norm": 0.3712953348328917, + "learning_rate": 3.375823702223391e-08, + "loss": 0.0341, "step": 4354 }, { - "epoch": 1.9342660448589828, - "grad_norm": 0.5368481909179409, - "learning_rate": 6.147943830294248e-06, - "loss": 0.0406, + "epoch": 3.8676731793960926, + "grad_norm": 0.5930361349122664, + "learning_rate": 3.3310126338716444e-08, + "loss": 0.0303, "step": 4355 }, { - "epoch": 1.9347101932045303, - "grad_norm": 0.3847206541326022, - "learning_rate": 6.146056990039777e-06, - "loss": 0.0291, + "epoch": 3.8685612788632326, + "grad_norm": 0.3366657561722301, + "learning_rate": 3.286499975648061e-08, + "loss": 0.0314, "step": 4356 }, { - "epoch": 1.9351543415500778, - "grad_norm": 0.4195486386731195, - "learning_rate": 6.1441699775134724e-06, - "loss": 0.037, + "epoch": 3.869449378330373, + "grad_norm": 0.30761576587876494, + "learning_rate": 3.242285754296859e-08, + "loss": 0.0259, "step": 4357 }, { - "epoch": 1.935598489895625, - "grad_norm": 0.4387846462865948, - "learning_rate": 6.142282792998985e-06, - "loss": 0.0336, + "epoch": 3.8703374777975132, + "grad_norm": 0.4057322839382098, + "learning_rate": 3.198369996382844e-08, + "loss": 0.035, "step": 4358 }, { - "epoch": 1.9360426382411724, - "grad_norm": 0.33683241345246556, - "learning_rate": 6.14039543677999e-06, - "loss": 0.0357, + "epoch": 3.8712255772646538, + "grad_norm": 0.38516550750614265, + "learning_rate": 3.1547527282916876e-08, + "loss": 0.0285, "step": 4359 }, { - "epoch": 1.93648678658672, - "grad_norm": 0.39849510173786257, - "learning_rate": 6.138507909140187e-06, - "loss": 0.0384, + "epoch": 3.872113676731794, + "grad_norm": 0.3726666679963141, + "learning_rate": 3.1114339762296495e-08, + "loss": 0.0318, "step": 4360 }, { - "epoch": 1.9369309349322674, - "grad_norm": 0.4846553810738608, - "learning_rate": 6.136620210363307e-06, - "loss": 0.0488, + "epoch": 3.8730017761989344, + "grad_norm": 0.45217090450614816, + "learning_rate": 3.0684137662236324e-08, + "loss": 0.0375, "step": 4361 }, { - "epoch": 1.9373750832778147, - "grad_norm": 0.44711186795351665, - "learning_rate": 6.1347323407331e-06, - "loss": 0.0403, + "epoch": 3.8738898756660745, + "grad_norm": 0.3690454521221677, + "learning_rate": 3.0256921241211824e-08, + "loss": 0.0316, "step": 4362 }, { - "epoch": 1.9378192316233622, - "grad_norm": 0.546459442186708, - "learning_rate": 6.132844300533348e-06, - "loss": 0.0442, + "epoch": 3.874777975133215, + "grad_norm": 0.30883468045000373, + "learning_rate": 2.983269075590323e-08, + "loss": 0.0251, "step": 4363 }, { - "epoch": 1.9382633799689097, - "grad_norm": 0.6727853387902581, - "learning_rate": 6.130956090047852e-06, - "loss": 0.0542, + "epoch": 3.875666074600355, + "grad_norm": 0.3133306119098753, + "learning_rate": 2.9411446461199978e-08, + "loss": 0.0281, "step": 4364 }, { - "epoch": 1.938707528314457, - "grad_norm": 0.5269547149952615, - "learning_rate": 6.129067709560445e-06, - "loss": 0.0553, + "epoch": 3.8765541740674956, + "grad_norm": 0.6609166358769407, + "learning_rate": 2.8993188610193513e-08, + "loss": 0.0376, "step": 4365 }, { - "epoch": 1.9391516766600043, - "grad_norm": 1.3382161278121019, - "learning_rate": 6.127179159354985e-06, - "loss": 0.0426, + "epoch": 3.877442273534636, + "grad_norm": 0.3364655775257454, + "learning_rate": 2.8577917454183367e-08, + "loss": 0.0375, "step": 4366 }, { - "epoch": 1.9395958250055518, - "grad_norm": 0.4304150369100395, - "learning_rate": 6.125290439715346e-06, - "loss": 0.0373, + "epoch": 3.878330373001776, + "grad_norm": 0.32332908013329004, + "learning_rate": 2.8165633242674408e-08, + "loss": 0.0255, "step": 4367 }, { - "epoch": 1.9400399733510993, - "grad_norm": 0.43883673281798163, - "learning_rate": 6.12340155092544e-06, - "loss": 0.0377, + "epoch": 3.8792184724689163, + "grad_norm": 0.3374522564042292, + "learning_rate": 2.7756336223375702e-08, + "loss": 0.0297, "step": 4368 }, { - "epoch": 1.9404841216966466, - "grad_norm": 0.5188193338070988, - "learning_rate": 6.121512493269197e-06, - "loss": 0.0371, + "epoch": 3.880106571936057, + "grad_norm": 0.3223234211010064, + "learning_rate": 2.735002664220332e-08, + "loss": 0.0303, "step": 4369 }, { - "epoch": 1.9409282700421941, - "grad_norm": 0.505484234164325, - "learning_rate": 6.119623267030576e-06, - "loss": 0.0402, + "epoch": 3.8809946714031973, + "grad_norm": 0.32260140969535633, + "learning_rate": 2.6946704743276432e-08, + "loss": 0.0266, "step": 4370 }, { - "epoch": 1.9413724183877417, - "grad_norm": 0.5176850701235032, - "learning_rate": 6.1177338724935576e-06, - "loss": 0.0466, + "epoch": 3.8818827708703374, + "grad_norm": 0.3489766999359135, + "learning_rate": 2.6546370768920638e-08, + "loss": 0.0262, "step": 4371 }, { - "epoch": 1.941816566733289, - "grad_norm": 0.4703785260705156, - "learning_rate": 6.115844309942153e-06, - "loss": 0.04, + "epoch": 3.882770870337478, + "grad_norm": 0.4192868948264376, + "learning_rate": 2.6149024959665204e-08, + "loss": 0.031, "step": 4372 }, { - "epoch": 1.9422607150788362, - "grad_norm": 0.43937571329979475, - "learning_rate": 6.1139545796603925e-06, - "loss": 0.0395, + "epoch": 3.883658969804618, + "grad_norm": 0.3657141947965113, + "learning_rate": 2.5754667554244717e-08, + "loss": 0.0348, "step": 4373 }, { - "epoch": 1.9427048634243838, - "grad_norm": 0.5671825068350916, - "learning_rate": 6.112064681932335e-06, - "loss": 0.0445, + "epoch": 3.8845470692717585, + "grad_norm": 0.4307137443150828, + "learning_rate": 2.536329878959909e-08, + "loss": 0.0369, "step": 4374 }, { - "epoch": 1.9431490117699313, - "grad_norm": 0.38466158842057985, - "learning_rate": 6.110174617042066e-06, - "loss": 0.033, + "epoch": 3.8854351687388986, + "grad_norm": 0.35289049259198735, + "learning_rate": 2.4974918900870782e-08, + "loss": 0.0292, "step": 4375 }, { - "epoch": 1.9435931601154786, - "grad_norm": 0.46940056697212595, - "learning_rate": 6.108284385273695e-06, - "loss": 0.0448, + "epoch": 3.886323268206039, + "grad_norm": 0.32992230436377096, + "learning_rate": 2.458952812140758e-08, + "loss": 0.0237, "step": 4376 }, { - "epoch": 1.9440373084610258, - "grad_norm": 0.4747699143093753, - "learning_rate": 6.106393986911353e-06, - "loss": 0.0459, + "epoch": 3.8872113676731797, + "grad_norm": 0.30266200036111696, + "learning_rate": 2.420712668276093e-08, + "loss": 0.0286, "step": 4377 }, { - "epoch": 1.9444814568065734, - "grad_norm": 0.4922037733238623, - "learning_rate": 6.1045034222392e-06, - "loss": 0.0462, + "epoch": 3.8880994671403197, + "grad_norm": 0.3339250029881702, + "learning_rate": 2.3827714814686488e-08, + "loss": 0.028, "step": 4378 }, { - "epoch": 1.9449256051521209, - "grad_norm": 0.48074571927831655, - "learning_rate": 6.102612691541422e-06, - "loss": 0.0433, + "epoch": 3.88898756660746, + "grad_norm": 0.4569844580828058, + "learning_rate": 2.3451292745143572e-08, + "loss": 0.0292, "step": 4379 }, { - "epoch": 1.9453697534976682, - "grad_norm": 0.45838348922641425, - "learning_rate": 6.1007217951022244e-06, - "loss": 0.0567, + "epoch": 3.8898756660746003, + "grad_norm": 0.36223606525550006, + "learning_rate": 2.3077860700295717e-08, + "loss": 0.0354, "step": 4380 }, { - "epoch": 1.9458139018432157, - "grad_norm": 0.39557432428184086, - "learning_rate": 6.098830733205844e-06, - "loss": 0.0318, + "epoch": 3.890763765541741, + "grad_norm": 0.33860378609473823, + "learning_rate": 2.2707418904509004e-08, + "loss": 0.035, "step": 4381 }, { - "epoch": 1.9462580501887632, - "grad_norm": 0.4177848324601465, - "learning_rate": 6.096939506136539e-06, - "loss": 0.0475, + "epoch": 3.891651865008881, + "grad_norm": 0.34459530016921563, + "learning_rate": 2.2339967580353173e-08, + "loss": 0.0312, "step": 4382 }, { - "epoch": 1.9467021985343105, - "grad_norm": 0.4875377634328118, - "learning_rate": 6.095048114178591e-06, - "loss": 0.0385, + "epoch": 3.8925399644760215, + "grad_norm": 0.3718421787480263, + "learning_rate": 2.197550694860162e-08, + "loss": 0.031, "step": 4383 }, { - "epoch": 1.9471463468798578, - "grad_norm": 0.3458943442785933, - "learning_rate": 6.093156557616311e-06, - "loss": 0.0259, + "epoch": 3.8934280639431615, + "grad_norm": 0.3983433406484029, + "learning_rate": 2.1614037228230857e-08, + "loss": 0.0428, "step": 4384 }, { - "epoch": 1.9475904952254053, - "grad_norm": 0.48182367581929, - "learning_rate": 6.09126483673403e-06, - "loss": 0.0516, + "epoch": 3.894316163410302, + "grad_norm": 0.35969146510568784, + "learning_rate": 2.1255558636419925e-08, + "loss": 0.0317, "step": 4385 }, { - "epoch": 1.9480346435709528, - "grad_norm": 0.4073510855592602, - "learning_rate": 6.089372951816108e-06, - "loss": 0.0313, + "epoch": 3.895204262877442, + "grad_norm": 0.32795656581532545, + "learning_rate": 2.090007138855099e-08, + "loss": 0.0318, "step": 4386 }, { - "epoch": 1.9484787919165, - "grad_norm": 0.48547952748532, - "learning_rate": 6.087480903146926e-06, - "loss": 0.0445, + "epoch": 3.8960923623445827, + "grad_norm": 0.42925324451322316, + "learning_rate": 2.054757569820931e-08, + "loss": 0.0264, "step": 4387 }, { - "epoch": 1.9489229402620474, - "grad_norm": 0.5136489604538488, - "learning_rate": 6.085588691010888e-06, - "loss": 0.0452, + "epoch": 3.8969804618117228, + "grad_norm": 0.4052490784414431, + "learning_rate": 2.0198071777182136e-08, + "loss": 0.0468, "step": 4388 }, { - "epoch": 1.9493670886075949, - "grad_norm": 0.37826853557517365, - "learning_rate": 6.0836963156924335e-06, - "loss": 0.0356, + "epoch": 3.8978685612788633, + "grad_norm": 0.3603324555012287, + "learning_rate": 1.9851559835459277e-08, + "loss": 0.0317, "step": 4389 }, { - "epoch": 1.9498112369531424, - "grad_norm": 0.4912572577888434, - "learning_rate": 6.081803777476012e-06, - "loss": 0.0462, + "epoch": 3.8987566607460034, + "grad_norm": 0.3210421385639071, + "learning_rate": 1.950804008123308e-08, + "loss": 0.0249, "step": 4390 }, { - "epoch": 1.9502553852986897, - "grad_norm": 0.4860207069904635, - "learning_rate": 6.079911076646106e-06, - "loss": 0.0425, + "epoch": 3.899644760213144, + "grad_norm": 0.31113861637791496, + "learning_rate": 1.9167512720897896e-08, + "loss": 0.0272, "step": 4391 }, { - "epoch": 1.9506995336442372, - "grad_norm": 0.3704904721214982, - "learning_rate": 6.07801821348722e-06, - "loss": 0.032, + "epoch": 3.9005328596802844, + "grad_norm": 0.3121746661598503, + "learning_rate": 1.8829977959051728e-08, + "loss": 0.025, "step": 4392 }, { - "epoch": 1.9511436819897847, - "grad_norm": 0.4740957603042776, - "learning_rate": 6.076125188283885e-06, - "loss": 0.0447, + "epoch": 3.9014209591474245, + "grad_norm": 0.3631011849664124, + "learning_rate": 1.8495435998491796e-08, + "loss": 0.0283, "step": 4393 }, { - "epoch": 1.951587830335332, - "grad_norm": 0.4597613542477622, - "learning_rate": 6.074232001320654e-06, - "loss": 0.0508, + "epoch": 3.9023090586145646, + "grad_norm": 0.37966032224972057, + "learning_rate": 1.816388704021843e-08, + "loss": 0.0361, "step": 4394 }, { - "epoch": 1.9520319786808793, - "grad_norm": 0.35666990194120035, - "learning_rate": 6.072338652882105e-06, - "loss": 0.036, + "epoch": 3.903197158081705, + "grad_norm": 0.2910539720829746, + "learning_rate": 1.7835331283435065e-08, + "loss": 0.0216, "step": 4395 }, { - "epoch": 1.9524761270264268, - "grad_norm": 0.5521794806193095, - "learning_rate": 6.070445143252842e-06, - "loss": 0.0555, + "epoch": 3.9040852575488456, + "grad_norm": 0.41715398574395457, + "learning_rate": 1.7509768925544902e-08, + "loss": 0.0363, "step": 4396 }, { - "epoch": 1.9529202753719743, - "grad_norm": 0.4402940395866301, - "learning_rate": 6.0685514727174885e-06, - "loss": 0.0365, + "epoch": 3.9049733570159857, + "grad_norm": 0.4393386937039567, + "learning_rate": 1.718720016215314e-08, + "loss": 0.0388, "step": 4397 }, { - "epoch": 1.9533644237175216, - "grad_norm": 0.45905507047472427, - "learning_rate": 6.066657641560697e-06, - "loss": 0.0371, + "epoch": 3.905861456483126, + "grad_norm": 0.3908960542013512, + "learning_rate": 1.6867625187066418e-08, + "loss": 0.0306, "step": 4398 }, { - "epoch": 1.953808572063069, - "grad_norm": 0.5661699028159852, - "learning_rate": 6.064763650067145e-06, - "loss": 0.0382, + "epoch": 3.9067495559502663, + "grad_norm": 0.30938149095981177, + "learning_rate": 1.655104419229281e-08, + "loss": 0.0263, "step": 4399 }, { - "epoch": 1.9542527204086166, - "grad_norm": 0.35700483978034786, - "learning_rate": 6.062869498521527e-06, - "loss": 0.0366, + "epoch": 3.907637655417407, + "grad_norm": 0.36052732524194336, + "learning_rate": 1.6237457368041833e-08, + "loss": 0.033, "step": 4400 }, { - "epoch": 1.954696868754164, - "grad_norm": 0.43424661857402086, - "learning_rate": 6.060975187208569e-06, - "loss": 0.0489, + "epoch": 3.908525754884547, + "grad_norm": 0.31285339372140775, + "learning_rate": 1.5926864902723882e-08, + "loss": 0.0251, "step": 4401 }, { - "epoch": 1.9551410170997112, - "grad_norm": 0.4129343066796859, - "learning_rate": 6.059080716413016e-06, - "loss": 0.0389, + "epoch": 3.9094138543516874, + "grad_norm": 0.30362241125417555, + "learning_rate": 1.5619266982948576e-08, + "loss": 0.0246, "step": 4402 }, { - "epoch": 1.9555851654452587, - "grad_norm": 0.5034728724416178, - "learning_rate": 6.057186086419643e-06, - "loss": 0.043, + "epoch": 3.910301953818828, + "grad_norm": 0.40522883397041964, + "learning_rate": 1.5314663793529195e-08, + "loss": 0.0399, "step": 4403 }, { - "epoch": 1.9560293137908062, - "grad_norm": 0.36382177248066033, - "learning_rate": 6.055291297513243e-06, - "loss": 0.0305, + "epoch": 3.911190053285968, + "grad_norm": 0.33940970253321623, + "learning_rate": 1.5013055517478227e-08, + "loss": 0.0241, "step": 4404 }, { - "epoch": 1.9564734621363535, - "grad_norm": 0.42171053505933703, - "learning_rate": 6.053396349978632e-06, - "loss": 0.0537, + "epoch": 3.912078152753108, + "grad_norm": 0.6869631004896491, + "learning_rate": 1.4714442336007939e-08, + "loss": 0.0265, "step": 4405 }, { - "epoch": 1.9569176104819008, - "grad_norm": 0.42351346045820926, - "learning_rate": 6.0515012441006574e-06, - "loss": 0.0444, + "epoch": 3.9129662522202486, + "grad_norm": 0.3479602430347928, + "learning_rate": 1.4418824428533152e-08, + "loss": 0.0279, "step": 4406 }, { - "epoch": 1.9573617588274483, - "grad_norm": 0.3951815066981233, - "learning_rate": 6.0496059801641835e-06, - "loss": 0.0329, + "epoch": 3.913854351687389, + "grad_norm": 0.35189230000307964, + "learning_rate": 1.4126201972666786e-08, + "loss": 0.032, "step": 4407 }, { - "epoch": 1.9578059071729959, - "grad_norm": 0.6939750072874196, - "learning_rate": 6.047710558454102e-06, - "loss": 0.0495, + "epoch": 3.9147424511545292, + "grad_norm": 0.3374428462147622, + "learning_rate": 1.3836575144223763e-08, + "loss": 0.0331, "step": 4408 }, { - "epoch": 1.9582500555185431, - "grad_norm": 0.47139234544641123, - "learning_rate": 6.0458149792553245e-06, - "loss": 0.0338, + "epoch": 3.9156305506216698, + "grad_norm": 0.3932222565628268, + "learning_rate": 1.3549944117218771e-08, + "loss": 0.0303, "step": 4409 }, { - "epoch": 1.9586942038640907, - "grad_norm": 0.4743764736759579, - "learning_rate": 6.043919242852792e-06, - "loss": 0.0401, + "epoch": 3.91651865008881, + "grad_norm": 0.3905548643151072, + "learning_rate": 1.3266309063865724e-08, + "loss": 0.0373, "step": 4410 }, { - "epoch": 1.9591383522096382, - "grad_norm": 0.7527833386589416, - "learning_rate": 6.042023349531463e-06, - "loss": 0.046, + "epoch": 3.9174067495559504, + "grad_norm": 0.3581931365729894, + "learning_rate": 1.2985670154578856e-08, + "loss": 0.0368, "step": 4411 }, { - "epoch": 1.9595825005551855, - "grad_norm": 0.4434725389118798, - "learning_rate": 6.040127299576324e-06, - "loss": 0.0366, + "epoch": 3.9182948490230904, + "grad_norm": 0.3455849993181419, + "learning_rate": 1.270802755797329e-08, + "loss": 0.0298, "step": 4412 }, { - "epoch": 1.9600266489007327, - "grad_norm": 0.5310028240539822, - "learning_rate": 6.038231093272383e-06, - "loss": 0.0475, + "epoch": 3.919182948490231, + "grad_norm": 0.34729808591163475, + "learning_rate": 1.2433381440862814e-08, + "loss": 0.0336, "step": 4413 }, { - "epoch": 1.9604707972462803, - "grad_norm": 0.6870302944712058, - "learning_rate": 6.036334730904672e-06, - "loss": 0.0401, + "epoch": 3.9200710479573715, + "grad_norm": 0.34014520671272247, + "learning_rate": 1.2161731968260981e-08, + "loss": 0.0238, "step": 4414 }, { - "epoch": 1.9609149455918278, - "grad_norm": 0.3754707520324698, - "learning_rate": 6.034438212758249e-06, - "loss": 0.0344, + "epoch": 3.9209591474245116, + "grad_norm": 0.36396393616068506, + "learning_rate": 1.1893079303381127e-08, + "loss": 0.0283, "step": 4415 }, { - "epoch": 1.961359093937375, - "grad_norm": 0.7273648929204632, - "learning_rate": 6.032541539118188e-06, - "loss": 0.059, + "epoch": 3.9218472468916517, + "grad_norm": 0.4075439359104293, + "learning_rate": 1.1627423607635246e-08, + "loss": 0.037, "step": 4416 }, { - "epoch": 1.9618032422829224, - "grad_norm": 0.5563803052374947, - "learning_rate": 6.030644710269595e-06, - "loss": 0.0337, + "epoch": 3.922735346358792, + "grad_norm": 0.38375575558697966, + "learning_rate": 1.1364765040636772e-08, + "loss": 0.035, "step": 4417 }, { - "epoch": 1.9622473906284699, - "grad_norm": 0.5196612260430283, - "learning_rate": 6.028747726497594e-06, - "loss": 0.043, + "epoch": 3.9236234458259327, + "grad_norm": 0.3555601567631089, + "learning_rate": 1.1105103760195578e-08, + "loss": 0.0342, "step": 4418 }, { - "epoch": 1.9626915389740174, - "grad_norm": 0.37874941124980105, - "learning_rate": 6.026850588087334e-06, - "loss": 0.0438, + "epoch": 3.924511545293073, + "grad_norm": 0.3779977661952045, + "learning_rate": 1.0848439922322984e-08, + "loss": 0.0386, "step": 4419 }, { - "epoch": 1.9631356873195647, - "grad_norm": 0.3791041958907497, - "learning_rate": 6.024953295323987e-06, - "loss": 0.0318, + "epoch": 3.9253996447602133, + "grad_norm": 0.3746265004100518, + "learning_rate": 1.059477368122841e-08, + "loss": 0.031, "step": 4420 }, { - "epoch": 1.9635798356651122, - "grad_norm": 0.7223736573889502, - "learning_rate": 6.02305584849275e-06, - "loss": 0.0406, + "epoch": 3.9262877442273534, + "grad_norm": 0.4093351345110092, + "learning_rate": 1.0344105189320496e-08, + "loss": 0.0327, "step": 4421 }, { - "epoch": 1.9640239840106597, - "grad_norm": 0.5182650699628792, - "learning_rate": 6.02115824787884e-06, - "loss": 0.0292, + "epoch": 3.927175843694494, + "grad_norm": 0.36839518559894463, + "learning_rate": 1.0096434597205995e-08, + "loss": 0.0349, "step": 4422 }, { - "epoch": 1.964468132356207, - "grad_norm": 0.42934775755171317, - "learning_rate": 6.019260493767499e-06, - "loss": 0.0328, + "epoch": 3.928063943161634, + "grad_norm": 0.357834203515738, + "learning_rate": 9.851762053691426e-09, + "loss": 0.0251, "step": 4423 }, { - "epoch": 1.9649122807017543, - "grad_norm": 0.5027721140555967, - "learning_rate": 6.0173625864439924e-06, - "loss": 0.0328, + "epoch": 3.9289520426287745, + "grad_norm": 0.42674085134064105, + "learning_rate": 9.610087705781979e-09, + "loss": 0.0305, "step": 4424 }, { - "epoch": 1.9653564290473018, - "grad_norm": 0.4239046281164633, - "learning_rate": 6.015464526193605e-06, - "loss": 0.0402, + "epoch": 3.9298401420959146, + "grad_norm": 0.4328029975863711, + "learning_rate": 9.371411698680388e-09, + "loss": 0.0339, "step": 4425 }, { - "epoch": 1.9658005773928493, - "grad_norm": 0.3685503956667686, - "learning_rate": 6.013566313301651e-06, - "loss": 0.0356, + "epoch": 3.930728241563055, + "grad_norm": 0.37474273469732067, + "learning_rate": 9.135734175789723e-09, + "loss": 0.031, "step": 4426 }, { - "epoch": 1.9662447257383966, - "grad_norm": 0.5335432976601984, - "learning_rate": 6.011667948053462e-06, - "loss": 0.0488, + "epoch": 3.931616341030195, + "grad_norm": 0.3799716585454093, + "learning_rate": 8.903055278709494e-09, + "loss": 0.0409, "step": 4427 }, { - "epoch": 1.9666888740839439, - "grad_norm": 0.41452010755189883, - "learning_rate": 6.009769430734395e-06, - "loss": 0.0458, + "epoch": 3.9325044404973357, + "grad_norm": 0.4941566703547101, + "learning_rate": 8.673375147238983e-09, + "loss": 0.0348, "step": 4428 }, { - "epoch": 1.9671330224294914, - "grad_norm": 0.4362710704789863, - "learning_rate": 6.007870761629831e-06, - "loss": 0.0399, + "epoch": 3.9333925399644762, + "grad_norm": 0.3051086264307757, + "learning_rate": 8.446693919375026e-09, + "loss": 0.0284, "step": 4429 }, { - "epoch": 1.967577170775039, - "grad_norm": 0.40405492276821936, - "learning_rate": 6.005971941025171e-06, - "loss": 0.0338, + "epoch": 3.9342806394316163, + "grad_norm": 0.35148664081966585, + "learning_rate": 8.223011731313679e-09, + "loss": 0.0234, "step": 4430 }, { - "epoch": 1.9680213191205862, - "grad_norm": 0.602695490290635, - "learning_rate": 6.004072969205838e-06, - "loss": 0.0684, + "epoch": 3.9351687388987564, + "grad_norm": 0.46463968792863114, + "learning_rate": 8.002328717447438e-09, + "loss": 0.0331, "step": 4431 }, { - "epoch": 1.9684654674661337, - "grad_norm": 0.47106411491289435, - "learning_rate": 6.002173846457282e-06, - "loss": 0.0402, + "epoch": 3.936056838365897, + "grad_norm": 0.3388242083345385, + "learning_rate": 7.78464501036802e-09, + "loss": 0.0285, "step": 4432 }, { - "epoch": 1.9689096158116812, - "grad_norm": 0.5941226728121359, - "learning_rate": 6.0002745730649725e-06, - "loss": 0.0376, + "epoch": 3.9369449378330375, + "grad_norm": 0.4574281105885283, + "learning_rate": 7.569960740864691e-09, + "loss": 0.0437, "step": 4433 }, { - "epoch": 1.9693537641572285, - "grad_norm": 0.3952404366832551, - "learning_rate": 5.998375149314404e-06, - "loss": 0.0375, + "epoch": 3.9378330373001775, + "grad_norm": 0.38361101761730315, + "learning_rate": 7.358276037924827e-09, + "loss": 0.0357, "step": 4434 }, { - "epoch": 1.9697979125027758, - "grad_norm": 0.5414636177351856, - "learning_rate": 5.996475575491091e-06, - "loss": 0.036, + "epoch": 3.938721136767318, + "grad_norm": 0.46578909974309685, + "learning_rate": 7.14959102873336e-09, + "loss": 0.0284, "step": 4435 }, { - "epoch": 1.9702420608483233, - "grad_norm": 0.35141134241433086, - "learning_rate": 5.994575851880571e-06, - "loss": 0.0299, + "epoch": 3.939609236234458, + "grad_norm": 0.3574257582077392, + "learning_rate": 6.943905838673881e-09, + "loss": 0.0351, "step": 4436 }, { - "epoch": 1.9706862091938708, - "grad_norm": 0.5469528061005677, - "learning_rate": 5.992675978768406e-06, - "loss": 0.0312, + "epoch": 3.9404973357015987, + "grad_norm": 0.363374991490502, + "learning_rate": 6.7412205913253145e-09, + "loss": 0.0259, "step": 4437 }, { - "epoch": 1.9711303575394181, - "grad_norm": 0.4485140591176533, - "learning_rate": 5.99077595644018e-06, - "loss": 0.0397, + "epoch": 3.9413854351687387, + "grad_norm": 0.31613225253795635, + "learning_rate": 6.54153540846636e-09, + "loss": 0.0305, "step": 4438 }, { - "epoch": 1.9715745058849656, - "grad_norm": 0.4559475041999291, - "learning_rate": 5.988875785181496e-06, - "loss": 0.0399, + "epoch": 3.9422735346358793, + "grad_norm": 0.34656321322412253, + "learning_rate": 6.344850410072157e-09, + "loss": 0.0238, "step": 4439 }, { - "epoch": 1.9720186542305131, - "grad_norm": 0.5184191786649656, - "learning_rate": 5.986975465277983e-06, - "loss": 0.042, + "epoch": 3.94316163410302, + "grad_norm": 0.3123946843459642, + "learning_rate": 6.151165714316509e-09, + "loss": 0.0255, "step": 4440 }, { - "epoch": 1.9724628025760604, - "grad_norm": 0.5018306786878316, - "learning_rate": 5.9850749970152935e-06, - "loss": 0.0433, + "epoch": 3.94404973357016, + "grad_norm": 0.36304887085285237, + "learning_rate": 5.9604814375685546e-09, + "loss": 0.0261, "step": 4441 }, { - "epoch": 1.9729069509216077, - "grad_norm": 0.41853899287083135, - "learning_rate": 5.983174380679096e-06, - "loss": 0.0311, + "epoch": 3.9449378330373, + "grad_norm": 0.334897253182668, + "learning_rate": 5.772797694396093e-09, + "loss": 0.0275, "step": 4442 }, { - "epoch": 1.9733510992671552, - "grad_norm": 0.3979151331311135, - "learning_rate": 5.98127361655509e-06, - "loss": 0.0393, + "epoch": 3.9458259325044405, + "grad_norm": 0.3136357507978124, + "learning_rate": 5.58811459756392e-09, + "loss": 0.0304, "step": 4443 }, { - "epoch": 1.9737952476127028, - "grad_norm": 0.4773579735193138, - "learning_rate": 5.979372704928991e-06, - "loss": 0.0404, + "epoch": 3.946714031971581, + "grad_norm": 0.4593259847309228, + "learning_rate": 5.406432258033834e-09, + "loss": 0.0321, "step": 4444 }, { - "epoch": 1.97423939595825, - "grad_norm": 0.5742352016540867, - "learning_rate": 5.977471646086535e-06, - "loss": 0.0371, + "epoch": 3.947602131438721, + "grad_norm": 0.40217183419243724, + "learning_rate": 5.227750784964625e-09, + "loss": 0.0366, "step": 4445 }, { - "epoch": 1.9746835443037973, - "grad_norm": 0.4462263069511724, - "learning_rate": 5.97557044031349e-06, - "loss": 0.0327, + "epoch": 3.9484902309058616, + "grad_norm": 0.34898375985369834, + "learning_rate": 5.05207028571264e-09, + "loss": 0.0341, "step": 4446 }, { - "epoch": 1.9751276926493448, - "grad_norm": 0.6732019962659918, - "learning_rate": 5.973669087895633e-06, - "loss": 0.0596, + "epoch": 3.9493783303730017, + "grad_norm": 0.39904753677337107, + "learning_rate": 4.8793908658306685e-09, + "loss": 0.0341, "step": 4447 }, { - "epoch": 1.9755718409948924, - "grad_norm": 0.485770657972991, - "learning_rate": 5.971767589118772e-06, - "loss": 0.0337, + "epoch": 3.950266429840142, + "grad_norm": 0.3553762719573451, + "learning_rate": 4.709712629067942e-09, + "loss": 0.0286, "step": 4448 }, { - "epoch": 1.9760159893404396, - "grad_norm": 0.342670431757156, - "learning_rate": 5.969865944268737e-06, - "loss": 0.0303, + "epoch": 3.9511545293072823, + "grad_norm": 0.35527412309700473, + "learning_rate": 4.543035677371244e-09, + "loss": 0.0301, "step": 4449 }, { - "epoch": 1.9764601376859872, - "grad_norm": 0.4112414603189444, - "learning_rate": 5.9679641536313734e-06, - "loss": 0.038, + "epoch": 3.952042628774423, + "grad_norm": 0.39552273161631557, + "learning_rate": 4.3793601108843565e-09, + "loss": 0.0368, "step": 4450 }, { - "epoch": 1.9769042860315347, - "grad_norm": 0.6127929425160972, - "learning_rate": 5.9660622174925564e-06, - "loss": 0.0537, + "epoch": 3.9529307282415633, + "grad_norm": 0.37130737779018136, + "learning_rate": 4.21868602794695e-09, + "loss": 0.031, "step": 4451 }, { - "epoch": 1.977348434377082, - "grad_norm": 0.33024844150086474, - "learning_rate": 5.964160136138177e-06, - "loss": 0.0353, + "epoch": 3.9538188277087034, + "grad_norm": 0.3440514548923429, + "learning_rate": 4.061013525095692e-09, + "loss": 0.0303, "step": 4452 }, { - "epoch": 1.9777925827226293, - "grad_norm": 0.5529535095882723, - "learning_rate": 5.96225790985415e-06, - "loss": 0.0442, + "epoch": 3.9547069271758435, + "grad_norm": 0.35033498305532507, + "learning_rate": 3.906342697063692e-09, + "loss": 0.0337, "step": 4453 }, { - "epoch": 1.9782367310681768, - "grad_norm": 0.4225770450854345, - "learning_rate": 5.960355538926414e-06, - "loss": 0.0494, + "epoch": 3.955595026642984, + "grad_norm": 0.30750918842297564, + "learning_rate": 3.754673636781614e-09, + "loss": 0.0276, "step": 4454 }, { - "epoch": 1.9786808794137243, - "grad_norm": 0.5282007627303459, - "learning_rate": 5.958453023640928e-06, - "loss": 0.0416, + "epoch": 3.9564831261101245, + "grad_norm": 0.33644798243597185, + "learning_rate": 3.606006435374343e-09, + "loss": 0.0304, "step": 4455 }, { - "epoch": 1.9791250277592716, - "grad_norm": 0.3898204372093461, - "learning_rate": 5.956550364283671e-06, - "loss": 0.0373, + "epoch": 3.9573712255772646, + "grad_norm": 0.39809286248444264, + "learning_rate": 3.4603411821654277e-09, + "loss": 0.0326, "step": 4456 }, { - "epoch": 1.9795691761048189, - "grad_norm": 0.6543021886189491, - "learning_rate": 5.954647561140643e-06, - "loss": 0.0406, + "epoch": 3.9582593250444047, + "grad_norm": 0.38058939879062353, + "learning_rate": 3.317677964674304e-09, + "loss": 0.029, "step": 4457 }, { - "epoch": 1.9800133244503664, - "grad_norm": 0.7003740987141756, - "learning_rate": 5.952744614497872e-06, - "loss": 0.0437, + "epoch": 3.959147424511545, + "grad_norm": 0.4372617290747728, + "learning_rate": 3.1780168686151856e-09, + "loss": 0.0336, "step": 4458 }, { - "epoch": 1.9804574727959139, - "grad_norm": 0.4937974164168222, - "learning_rate": 5.9508415246414e-06, - "loss": 0.0539, + "epoch": 3.9600355239786857, + "grad_norm": 0.3848830082973257, + "learning_rate": 3.0413579779003944e-09, + "loss": 0.0306, "step": 4459 }, { - "epoch": 1.9809016211414612, - "grad_norm": 0.46863912061469276, - "learning_rate": 5.948938291857296e-06, - "loss": 0.0503, + "epoch": 3.960923623445826, + "grad_norm": 0.41395429123456157, + "learning_rate": 2.9077013746370284e-09, + "loss": 0.0458, "step": 4460 }, { - "epoch": 1.9813457694870087, - "grad_norm": 0.5235494039593215, - "learning_rate": 5.947034916431646e-06, - "loss": 0.0462, + "epoch": 3.9618117229129663, + "grad_norm": 0.3645564211377606, + "learning_rate": 2.7770471391302954e-09, + "loss": 0.0298, "step": 4461 }, { - "epoch": 1.9817899178325562, - "grad_norm": 0.4097032398279417, - "learning_rate": 5.945131398650561e-06, - "loss": 0.038, + "epoch": 3.9626998223801064, + "grad_norm": 0.3398971948514898, + "learning_rate": 2.6493953498790692e-09, + "loss": 0.0246, "step": 4462 }, { - "epoch": 1.9822340661781035, - "grad_norm": 0.4528672743260559, - "learning_rate": 5.943227738800172e-06, - "loss": 0.0558, + "epoch": 3.963587921847247, + "grad_norm": 0.38068185224854645, + "learning_rate": 2.5247460835803317e-09, + "loss": 0.0276, "step": 4463 }, { - "epoch": 1.9826782145236508, - "grad_norm": 0.5204018448947473, - "learning_rate": 5.941323937166632e-06, - "loss": 0.0522, + "epoch": 3.964476021314387, + "grad_norm": 0.44423873753371224, + "learning_rate": 2.4030994151252873e-09, + "loss": 0.0275, "step": 4464 }, { - "epoch": 1.9831223628691983, - "grad_norm": 0.3540421279343773, - "learning_rate": 5.939419994036113e-06, - "loss": 0.038, + "epoch": 3.9653641207815276, + "grad_norm": 0.3828132649398428, + "learning_rate": 2.2844554176026933e-09, + "loss": 0.0325, "step": 4465 }, { - "epoch": 1.9835665112147458, - "grad_norm": 0.6263363230917521, - "learning_rate": 5.937515909694811e-06, - "loss": 0.0569, + "epoch": 3.966252220248668, + "grad_norm": 0.38784021300605276, + "learning_rate": 2.1688141622966397e-09, + "loss": 0.037, "step": 4466 }, { - "epoch": 1.984010659560293, - "grad_norm": 0.4116502316940924, - "learning_rate": 5.9356116844289426e-06, - "loss": 0.0413, + "epoch": 3.967140319715808, + "grad_norm": 0.30232065076110426, + "learning_rate": 2.0561757186871033e-09, + "loss": 0.0248, "step": 4467 }, { - "epoch": 1.9844548079058404, - "grad_norm": 0.44077017956645004, - "learning_rate": 5.933707318524744e-06, - "loss": 0.0333, + "epoch": 3.9680284191829482, + "grad_norm": 0.3393350555374966, + "learning_rate": 1.9465401544493944e-09, + "loss": 0.0252, "step": 4468 }, { - "epoch": 1.9848989562513881, - "grad_norm": 0.3729944832338479, - "learning_rate": 5.931802812268476e-06, - "loss": 0.0327, + "epoch": 3.9689165186500888, + "grad_norm": 0.37312630808588526, + "learning_rate": 1.8399075354552653e-09, + "loss": 0.0325, "step": 4469 }, { - "epoch": 1.9853431045969354, - "grad_norm": 0.40626287554685864, - "learning_rate": 5.929898165946416e-06, - "loss": 0.0363, + "epoch": 3.9698046181172293, + "grad_norm": 0.3460099167477063, + "learning_rate": 1.736277925771801e-09, + "loss": 0.0305, "step": 4470 }, { - "epoch": 1.9857872529424827, - "grad_norm": 0.5950032125541065, - "learning_rate": 5.927993379844864e-06, - "loss": 0.0426, + "epoch": 3.9706927175843694, + "grad_norm": 0.3152575554068713, + "learning_rate": 1.635651387662529e-09, + "loss": 0.038, "step": 4471 }, { - "epoch": 1.9862314012880302, - "grad_norm": 0.40799770820647496, - "learning_rate": 5.9260884542501455e-06, - "loss": 0.044, + "epoch": 3.97158081705151, + "grad_norm": 0.3338993687005216, + "learning_rate": 1.5380279815863097e-09, + "loss": 0.0255, "step": 4472 }, { - "epoch": 1.9866755496335777, - "grad_norm": 0.41140378050861237, - "learning_rate": 5.9241833894486e-06, - "loss": 0.0482, + "epoch": 3.97246891651865, + "grad_norm": 0.40719111799698354, + "learning_rate": 1.4434077661967804e-09, + "loss": 0.0402, "step": 4473 }, { - "epoch": 1.987119697979125, - "grad_norm": 0.40378379008973436, - "learning_rate": 5.922278185726591e-06, - "loss": 0.0327, + "epoch": 3.9733570159857905, + "grad_norm": 0.296777642640809, + "learning_rate": 1.3517907983445767e-09, + "loss": 0.029, "step": 4474 }, { - "epoch": 1.9875638463246723, - "grad_norm": 0.41453258443290436, - "learning_rate": 5.920372843370504e-06, - "loss": 0.0416, + "epoch": 3.9742451154529306, + "grad_norm": 0.5392442936919057, + "learning_rate": 1.2631771330751107e-09, + "loss": 0.0397, "step": 4475 }, { - "epoch": 1.9880079946702198, - "grad_norm": 0.5109670847768875, - "learning_rate": 5.9184673626667455e-06, - "loss": 0.039, + "epoch": 3.975133214920071, + "grad_norm": 0.4056363313670081, + "learning_rate": 1.1775668236291282e-09, + "loss": 0.0312, "step": 4476 }, { - "epoch": 1.9884521430157673, - "grad_norm": 0.5686294228064248, - "learning_rate": 5.9165617439017395e-06, - "loss": 0.0534, + "epoch": 3.9760213143872116, + "grad_norm": 0.40720194454595754, + "learning_rate": 1.0949599214438166e-09, + "loss": 0.0366, "step": 4477 }, { - "epoch": 1.9888962913613146, - "grad_norm": 0.4064180941136422, - "learning_rate": 5.914655987361934e-06, - "loss": 0.0328, + "epoch": 3.9769094138543517, + "grad_norm": 0.364250813844353, + "learning_rate": 1.0153564761511414e-09, + "loss": 0.0332, "step": 4478 }, { - "epoch": 1.9893404397068621, - "grad_norm": 0.5070558821658653, - "learning_rate": 5.912750093333796e-06, - "loss": 0.0542, + "epoch": 3.977797513321492, + "grad_norm": 0.3556858387174976, + "learning_rate": 9.387565355784e-10, + "loss": 0.0246, "step": 4479 }, { - "epoch": 1.9897845880524097, - "grad_norm": 0.3530913079623509, - "learning_rate": 5.910844062103814e-06, - "loss": 0.0395, + "epoch": 3.9786856127886323, + "grad_norm": 0.45165219318354977, + "learning_rate": 8.651601457493331e-10, + "loss": 0.0289, "step": 4480 }, { - "epoch": 1.990228736397957, - "grad_norm": 0.43523429839148015, - "learning_rate": 5.908937893958497e-06, - "loss": 0.0366, + "epoch": 3.979573712255773, + "grad_norm": 0.35139031002380433, + "learning_rate": 7.945673508813479e-10, + "loss": 0.0317, "step": 4481 }, { - "epoch": 1.9906728847435042, - "grad_norm": 0.4229033433291945, - "learning_rate": 5.907031589184374e-06, - "loss": 0.0383, + "epoch": 3.980461811722913, + "grad_norm": 0.3442321499166264, + "learning_rate": 7.269781933888498e-10, + "loss": 0.0263, "step": 4482 }, { - "epoch": 1.9911170330890517, - "grad_norm": 0.39083478145330547, - "learning_rate": 5.905125148067997e-06, - "loss": 0.0371, + "epoch": 3.9813499111900534, + "grad_norm": 0.3938383521002017, + "learning_rate": 6.623927138804665e-10, + "loss": 0.0277, "step": 4483 }, { - "epoch": 1.9915611814345993, - "grad_norm": 0.5025271192866929, - "learning_rate": 5.9032185708959354e-06, - "loss": 0.0395, + "epoch": 3.9822380106571935, + "grad_norm": 0.3487659679775154, + "learning_rate": 6.008109511612681e-10, + "loss": 0.0293, "step": 4484 }, { - "epoch": 1.9920053297801465, - "grad_norm": 0.349650340955071, - "learning_rate": 5.901311857954777e-06, - "loss": 0.0266, + "epoch": 3.983126110124334, + "grad_norm": 0.38471884813260165, + "learning_rate": 5.422329422305472e-10, + "loss": 0.0284, "step": 4485 }, { - "epoch": 1.9924494781256938, - "grad_norm": 0.5279876104400976, - "learning_rate": 5.899405009531136e-06, - "loss": 0.0402, + "epoch": 3.984014209591474, + "grad_norm": 0.3897173591989827, + "learning_rate": 4.86658722283484e-10, + "loss": 0.0273, "step": 4486 }, { - "epoch": 1.9928936264712414, - "grad_norm": 0.6233845443742896, - "learning_rate": 5.897498025911645e-06, - "loss": 0.048, + "epoch": 3.9849023090586146, + "grad_norm": 0.30948987005245854, + "learning_rate": 4.3408832471059094e-10, + "loss": 0.0284, "step": 4487 }, { - "epoch": 1.9933377748167889, - "grad_norm": 0.49026655627892524, - "learning_rate": 5.8955909073829555e-06, - "loss": 0.0412, + "epoch": 3.9857904085257547, + "grad_norm": 0.3627150868494569, + "learning_rate": 3.8452178109660285e-10, + "loss": 0.0308, "step": 4488 }, { - "epoch": 1.9937819231623362, - "grad_norm": 0.5000915892039338, - "learning_rate": 5.893683654231737e-06, - "loss": 0.0394, + "epoch": 3.9866785079928952, + "grad_norm": 0.4022849724346789, + "learning_rate": 3.379591212226974e-10, + "loss": 0.0351, "step": 4489 }, { - "epoch": 1.9942260715078837, - "grad_norm": 0.5000990386217609, - "learning_rate": 5.891776266744686e-06, - "loss": 0.0355, + "epoch": 3.9875666074600353, + "grad_norm": 0.3631211766442342, + "learning_rate": 2.944003730653844e-10, + "loss": 0.0279, "step": 4490 }, { - "epoch": 1.9946702198534312, - "grad_norm": 0.39359957737650036, - "learning_rate": 5.889868745208514e-06, - "loss": 0.0304, + "epoch": 3.988454706927176, + "grad_norm": 0.324546551530327, + "learning_rate": 2.5384556279484107e-10, + "loss": 0.0289, "step": 4491 }, { - "epoch": 1.9951143681989785, - "grad_norm": 0.9651263441707514, - "learning_rate": 5.8879610899099505e-06, - "loss": 0.0529, + "epoch": 3.9893428063943164, + "grad_norm": 0.3355581682470875, + "learning_rate": 2.1629471477768727e-10, + "loss": 0.0264, "step": 4492 }, { - "epoch": 1.9955585165445258, - "grad_norm": 0.32907807731556993, - "learning_rate": 5.886053301135755e-06, - "loss": 0.0322, + "epoch": 3.9902309058614565, + "grad_norm": 0.32219069993034016, + "learning_rate": 1.8174785157532017e-10, + "loss": 0.0305, "step": 4493 }, { - "epoch": 1.9960026648900733, - "grad_norm": 0.4231846959633054, - "learning_rate": 5.8841453791726944e-06, - "loss": 0.0362, + "epoch": 3.9911190053285965, + "grad_norm": 0.34967548702004914, + "learning_rate": 1.502049939444694e-10, + "loss": 0.0251, "step": 4494 }, { - "epoch": 1.9964468132356208, - "grad_norm": 0.4958581720016726, - "learning_rate": 5.882237324307564e-06, - "loss": 0.0304, + "epoch": 3.992007104795737, + "grad_norm": 0.36096755448800333, + "learning_rate": 1.2166616083608695e-10, + "loss": 0.0325, "step": 4495 }, { - "epoch": 1.996890961581168, - "grad_norm": 0.44600019470214586, - "learning_rate": 5.880329136827178e-06, - "loss": 0.0405, + "epoch": 3.9928952042628776, + "grad_norm": 0.5522564036143544, + "learning_rate": 9.613136939812251e-11, + "loss": 0.0366, "step": 4496 }, { - "epoch": 1.9973351099267154, - "grad_norm": 0.5659602027375302, - "learning_rate": 5.878420817018369e-06, - "loss": 0.0472, + "epoch": 3.9937833037300177, + "grad_norm": 0.37215104990308784, + "learning_rate": 7.360063497163783e-11, + "loss": 0.0354, "step": 4497 }, { - "epoch": 1.9977792582722629, - "grad_norm": 1.0132374448664145, - "learning_rate": 5.87651236516799e-06, - "loss": 0.0706, + "epoch": 3.994671403197158, + "grad_norm": 0.28028856136098346, + "learning_rate": 5.4073971093582257e-11, + "loss": 0.0266, "step": 4498 }, { - "epoch": 1.9982234066178104, - "grad_norm": 0.4914853042492375, - "learning_rate": 5.874603781562911e-06, - "loss": 0.0422, + "epoch": 3.9955595026642983, + "grad_norm": 0.3228560504657294, + "learning_rate": 3.755138949679271e-11, + "loss": 0.0258, "step": 4499 }, { - "epoch": 1.9986675549633577, - "grad_norm": 0.40024702771181153, - "learning_rate": 5.872695066490028e-06, - "loss": 0.0344, + "epoch": 3.996447602131439, + "grad_norm": 0.35030932548468985, + "learning_rate": 2.403290010777326e-11, + "loss": 0.0336, "step": 4500 }, { - "epoch": 1.9991117033089052, - "grad_norm": 0.49874140037011355, - "learning_rate": 5.870786220236253e-06, - "loss": 0.0417, + "epoch": 3.997335701598579, + "grad_norm": 0.39703349791825004, + "learning_rate": 1.3518511048360438e-11, + "loss": 0.0327, "step": 4501 }, { - "epoch": 1.9995558516544527, - "grad_norm": 0.47988916644310126, - "learning_rate": 5.868877243088515e-06, - "loss": 0.0441, + "epoch": 3.9982238010657194, + "grad_norm": 0.29928946564050385, + "learning_rate": 6.00822863683348e-12, + "loss": 0.0264, "step": 4502 }, { - "epoch": 2.0, - "grad_norm": 0.904943869460001, - "learning_rate": 5.866968135333769e-06, - "loss": 0.0502, - "step": 4503 - }, - { - "epoch": 2.0, - "eval_loss": 0.04465880244970322, - "eval_runtime": 403.5178, - "eval_samples_per_second": 37.584, - "eval_steps_per_second": 1.175, + "epoch": 3.99911190053286, + "grad_norm": 0.40616736854240193, + "learning_rate": 1.502057384583644e-12, + "loss": 0.0343, "step": 4503 }, { - "epoch": 2.0004441483455473, - "grad_norm": 0.4375728669928517, - "learning_rate": 5.8650588972589865e-06, - "loss": 0.0263, + "epoch": 4.0, + "grad_norm": 0.3392781532186694, + "learning_rate": 0.0, + "loss": 0.0238, "step": 4504 }, { - "epoch": 2.000888296691095, - "grad_norm": 0.9215657711078544, - "learning_rate": 5.863149529151154e-06, - "loss": 0.0492, - "step": 4505 - }, - { - "epoch": 2.0013324450366423, - "grad_norm": 0.5271072063506441, - "learning_rate": 5.8612400312972865e-06, - "loss": 0.0508, - "step": 4506 - }, - { - "epoch": 2.0017765933821896, - "grad_norm": 0.48411386914362037, - "learning_rate": 5.859330403984413e-06, - "loss": 0.0371, - "step": 4507 - }, - { - "epoch": 2.002220741727737, - "grad_norm": 0.4017233429265068, - "learning_rate": 5.85742064749958e-06, - "loss": 0.0418, - "step": 4508 - }, - { - "epoch": 2.0026648900732846, - "grad_norm": 0.4120119737365717, - "learning_rate": 5.85551076212986e-06, - "loss": 0.046, - "step": 4509 - }, - { - "epoch": 2.003109038418832, - "grad_norm": 0.4366707287416164, - "learning_rate": 5.8536007481623406e-06, - "loss": 0.0443, - "step": 4510 - }, - { - "epoch": 2.003553186764379, - "grad_norm": 0.7145508695971371, - "learning_rate": 5.851690605884127e-06, - "loss": 0.0532, - "step": 4511 - }, - { - "epoch": 2.0039973351099265, - "grad_norm": 0.5011936156881229, - "learning_rate": 5.84978033558235e-06, - "loss": 0.0354, - "step": 4512 - }, - { - "epoch": 2.0044414834554742, - "grad_norm": 0.36581304940892057, - "learning_rate": 5.847869937544151e-06, - "loss": 0.0294, - "step": 4513 - }, - { - "epoch": 2.0048856318010215, - "grad_norm": 0.37996248718965503, - "learning_rate": 5.845959412056699e-06, - "loss": 0.0286, - "step": 4514 - }, - { - "epoch": 2.005329780146569, - "grad_norm": 0.5097034078222489, - "learning_rate": 5.844048759407177e-06, - "loss": 0.0414, - "step": 4515 - }, - { - "epoch": 2.0057739284921166, - "grad_norm": 0.4963020471009325, - "learning_rate": 5.842137979882786e-06, - "loss": 0.0459, - "step": 4516 - }, - { - "epoch": 2.006218076837664, - "grad_norm": 0.6161319347902735, - "learning_rate": 5.840227073770754e-06, - "loss": 0.0629, - "step": 4517 - }, - { - "epoch": 2.006662225183211, - "grad_norm": 0.3511248203446856, - "learning_rate": 5.838316041358319e-06, - "loss": 0.0295, - "step": 4518 - }, - { - "epoch": 2.0071063735287584, - "grad_norm": 0.37830736549229443, - "learning_rate": 5.836404882932744e-06, - "loss": 0.0319, - "step": 4519 - }, - { - "epoch": 2.007550521874306, - "grad_norm": 0.6333845141434062, - "learning_rate": 5.8344935987813045e-06, - "loss": 0.0379, - "step": 4520 - }, - { - "epoch": 2.0079946702198535, - "grad_norm": 0.5166443794796353, - "learning_rate": 5.832582189191304e-06, - "loss": 0.0346, - "step": 4521 - }, - { - "epoch": 2.0084388185654007, - "grad_norm": 0.4317191740162663, - "learning_rate": 5.8306706544500544e-06, - "loss": 0.0319, - "step": 4522 - }, - { - "epoch": 2.0088829669109485, - "grad_norm": 0.4388602499538576, - "learning_rate": 5.828758994844896e-06, - "loss": 0.0377, - "step": 4523 - }, - { - "epoch": 2.0093271152564958, - "grad_norm": 0.5946947521373457, - "learning_rate": 5.826847210663184e-06, - "loss": 0.0421, - "step": 4524 - }, - { - "epoch": 2.009771263602043, - "grad_norm": 0.3821952135869883, - "learning_rate": 5.8249353021922895e-06, - "loss": 0.0303, - "step": 4525 - }, - { - "epoch": 2.0102154119475903, - "grad_norm": 0.5519322372289749, - "learning_rate": 5.823023269719606e-06, - "loss": 0.0513, - "step": 4526 - }, - { - "epoch": 2.010659560293138, - "grad_norm": 0.6313520947120921, - "learning_rate": 5.821111113532545e-06, - "loss": 0.0484, - "step": 4527 - }, - { - "epoch": 2.0111037086386854, - "grad_norm": 0.5368562306090153, - "learning_rate": 5.819198833918533e-06, - "loss": 0.051, - "step": 4528 - }, - { - "epoch": 2.0115478569842327, - "grad_norm": 0.39624325698963303, - "learning_rate": 5.817286431165024e-06, - "loss": 0.0333, - "step": 4529 - }, - { - "epoch": 2.01199200532978, - "grad_norm": 0.411233856841024, - "learning_rate": 5.815373905559478e-06, - "loss": 0.0312, - "step": 4530 - }, - { - "epoch": 2.0124361536753277, - "grad_norm": 0.3742461123258978, - "learning_rate": 5.813461257389384e-06, - "loss": 0.0268, - "step": 4531 - }, - { - "epoch": 2.012880302020875, - "grad_norm": 0.3749818385865246, - "learning_rate": 5.811548486942246e-06, - "loss": 0.0346, - "step": 4532 - }, - { - "epoch": 2.0133244503664223, - "grad_norm": 0.34116601308623506, - "learning_rate": 5.809635594505585e-06, - "loss": 0.0251, - "step": 4533 - }, - { - "epoch": 2.01376859871197, - "grad_norm": 0.5178521236398543, - "learning_rate": 5.807722580366939e-06, - "loss": 0.0381, - "step": 4534 - }, - { - "epoch": 2.0142127470575173, - "grad_norm": 0.32707032102652794, - "learning_rate": 5.805809444813869e-06, - "loss": 0.0275, - "step": 4535 - }, - { - "epoch": 2.0146568954030646, - "grad_norm": 0.4024749636693741, - "learning_rate": 5.80389618813395e-06, - "loss": 0.0365, - "step": 4536 - }, - { - "epoch": 2.015101043748612, - "grad_norm": 0.3506397945464826, - "learning_rate": 5.8019828106147805e-06, - "loss": 0.0252, - "step": 4537 - }, - { - "epoch": 2.0155451920941596, - "grad_norm": 0.5822851446048461, - "learning_rate": 5.80006931254397e-06, - "loss": 0.0369, - "step": 4538 - }, - { - "epoch": 2.015989340439707, - "grad_norm": 0.44067833638988113, - "learning_rate": 5.798155694209151e-06, - "loss": 0.0344, - "step": 4539 - }, - { - "epoch": 2.016433488785254, - "grad_norm": 0.4460004586157456, - "learning_rate": 5.796241955897972e-06, - "loss": 0.0381, - "step": 4540 - }, - { - "epoch": 2.0168776371308015, - "grad_norm": 0.3328661208832795, - "learning_rate": 5.7943280978981034e-06, - "loss": 0.0284, - "step": 4541 - }, - { - "epoch": 2.017321785476349, - "grad_norm": 0.48357435513498903, - "learning_rate": 5.792414120497227e-06, - "loss": 0.0361, - "step": 4542 - }, - { - "epoch": 2.0177659338218965, - "grad_norm": 0.4866683235779641, - "learning_rate": 5.790500023983049e-06, - "loss": 0.0359, - "step": 4543 - }, - { - "epoch": 2.018210082167444, - "grad_norm": 0.39359993621118305, - "learning_rate": 5.788585808643287e-06, - "loss": 0.0281, - "step": 4544 - }, - { - "epoch": 2.0186542305129915, - "grad_norm": 0.4058975435223384, - "learning_rate": 5.786671474765683e-06, - "loss": 0.0314, - "step": 4545 - }, - { - "epoch": 2.019098378858539, - "grad_norm": 0.38121689511830104, - "learning_rate": 5.784757022637993e-06, - "loss": 0.0291, - "step": 4546 - }, - { - "epoch": 2.019542527204086, - "grad_norm": 0.4508375438456433, - "learning_rate": 5.782842452547992e-06, - "loss": 0.0334, - "step": 4547 - }, - { - "epoch": 2.0199866755496334, - "grad_norm": 0.42673347363830266, - "learning_rate": 5.780927764783473e-06, - "loss": 0.0249, - "step": 4548 - }, - { - "epoch": 2.020430823895181, - "grad_norm": 0.43669165432209434, - "learning_rate": 5.779012959632244e-06, - "loss": 0.0358, - "step": 4549 - }, - { - "epoch": 2.0208749722407284, - "grad_norm": 0.4022820994220209, - "learning_rate": 5.777098037382135e-06, - "loss": 0.0293, - "step": 4550 - }, - { - "epoch": 2.0213191205862757, - "grad_norm": 0.42387266543810437, - "learning_rate": 5.77518299832099e-06, - "loss": 0.0384, - "step": 4551 - }, - { - "epoch": 2.021763268931823, - "grad_norm": 0.4585217445539665, - "learning_rate": 5.7732678427366725e-06, - "loss": 0.0349, - "step": 4552 - }, - { - "epoch": 2.0222074172773707, - "grad_norm": 0.44358027335422784, - "learning_rate": 5.771352570917062e-06, - "loss": 0.0332, - "step": 4553 - }, - { - "epoch": 2.022651565622918, - "grad_norm": 0.43507443913798416, - "learning_rate": 5.769437183150057e-06, - "loss": 0.0347, - "step": 4554 - }, - { - "epoch": 2.0230957139684653, - "grad_norm": 0.37205140444853446, - "learning_rate": 5.767521679723574e-06, - "loss": 0.029, - "step": 4555 - }, - { - "epoch": 2.023539862314013, - "grad_norm": 0.3603917915607534, - "learning_rate": 5.765606060925545e-06, - "loss": 0.0309, - "step": 4556 - }, - { - "epoch": 2.0239840106595604, - "grad_norm": 0.5400074623803027, - "learning_rate": 5.763690327043919e-06, - "loss": 0.0459, - "step": 4557 - }, - { - "epoch": 2.0244281590051076, - "grad_norm": 0.39760305609807395, - "learning_rate": 5.761774478366664e-06, - "loss": 0.0338, - "step": 4558 - }, - { - "epoch": 2.024872307350655, - "grad_norm": 0.44010489846206996, - "learning_rate": 5.759858515181763e-06, - "loss": 0.0305, - "step": 4559 - }, - { - "epoch": 2.0253164556962027, - "grad_norm": 0.38439690133076265, - "learning_rate": 5.757942437777222e-06, - "loss": 0.0281, - "step": 4560 - }, - { - "epoch": 2.02576060404175, - "grad_norm": 0.36756874447130394, - "learning_rate": 5.756026246441056e-06, - "loss": 0.0276, - "step": 4561 - }, - { - "epoch": 2.0262047523872972, - "grad_norm": 0.3543127710037651, - "learning_rate": 5.754109941461302e-06, - "loss": 0.0304, - "step": 4562 - }, - { - "epoch": 2.026648900732845, - "grad_norm": 0.4789471062184959, - "learning_rate": 5.7521935231260166e-06, - "loss": 0.0361, - "step": 4563 - }, - { - "epoch": 2.0270930490783923, - "grad_norm": 0.4658261130750723, - "learning_rate": 5.7502769917232635e-06, - "loss": 0.0297, - "step": 4564 - }, - { - "epoch": 2.0275371974239396, - "grad_norm": 0.37364282464180315, - "learning_rate": 5.748360347541136e-06, - "loss": 0.0301, - "step": 4565 - }, - { - "epoch": 2.027981345769487, - "grad_norm": 0.5624254377961666, - "learning_rate": 5.746443590867735e-06, - "loss": 0.0375, - "step": 4566 - }, - { - "epoch": 2.0284254941150346, - "grad_norm": 0.4305222164333765, - "learning_rate": 5.7445267219911815e-06, - "loss": 0.033, - "step": 4567 - }, - { - "epoch": 2.028869642460582, - "grad_norm": 0.669501618559028, - "learning_rate": 5.742609741199615e-06, - "loss": 0.0497, - "step": 4568 - }, - { - "epoch": 2.029313790806129, - "grad_norm": 0.38137453136293886, - "learning_rate": 5.740692648781191e-06, - "loss": 0.0377, - "step": 4569 - }, - { - "epoch": 2.0297579391516765, - "grad_norm": 0.4610447553595522, - "learning_rate": 5.738775445024078e-06, - "loss": 0.0394, - "step": 4570 - }, - { - "epoch": 2.030202087497224, - "grad_norm": 0.46477609561023486, - "learning_rate": 5.736858130216468e-06, - "loss": 0.044, - "step": 4571 - }, - { - "epoch": 2.0306462358427715, - "grad_norm": 0.34915412378821464, - "learning_rate": 5.7349407046465625e-06, - "loss": 0.0234, - "step": 4572 - }, - { - "epoch": 2.0310903841883188, - "grad_norm": 0.47023014530194523, - "learning_rate": 5.733023168602584e-06, - "loss": 0.0427, - "step": 4573 - }, - { - "epoch": 2.0315345325338665, - "grad_norm": 0.5052588063022365, - "learning_rate": 5.731105522372773e-06, - "loss": 0.0406, - "step": 4574 - }, - { - "epoch": 2.031978680879414, - "grad_norm": 0.5345856053201472, - "learning_rate": 5.729187766245382e-06, - "loss": 0.0347, - "step": 4575 - }, - { - "epoch": 2.032422829224961, - "grad_norm": 0.41581297859158317, - "learning_rate": 5.727269900508682e-06, - "loss": 0.0366, - "step": 4576 - }, - { - "epoch": 2.0328669775705084, - "grad_norm": 0.5119888393853211, - "learning_rate": 5.725351925450964e-06, - "loss": 0.0372, - "step": 4577 - }, - { - "epoch": 2.033311125916056, - "grad_norm": 0.4247836793476431, - "learning_rate": 5.723433841360528e-06, - "loss": 0.0259, - "step": 4578 - }, - { - "epoch": 2.0337552742616034, - "grad_norm": 0.3959142952189294, - "learning_rate": 5.721515648525698e-06, - "loss": 0.0358, - "step": 4579 - }, - { - "epoch": 2.0341994226071507, - "grad_norm": 0.4826767145510797, - "learning_rate": 5.719597347234809e-06, - "loss": 0.0404, - "step": 4580 - }, - { - "epoch": 2.034643570952698, - "grad_norm": 0.4091839747164647, - "learning_rate": 5.7176789377762155e-06, - "loss": 0.0421, - "step": 4581 - }, - { - "epoch": 2.0350877192982457, - "grad_norm": 0.41330812118932564, - "learning_rate": 5.715760420438284e-06, - "loss": 0.025, - "step": 4582 - }, - { - "epoch": 2.035531867643793, - "grad_norm": 0.3786001612535725, - "learning_rate": 5.713841795509405e-06, - "loss": 0.0304, - "step": 4583 - }, - { - "epoch": 2.0359760159893403, - "grad_norm": 0.38932682295296067, - "learning_rate": 5.711923063277979e-06, - "loss": 0.033, - "step": 4584 - }, - { - "epoch": 2.036420164334888, - "grad_norm": 0.3925654682001415, - "learning_rate": 5.710004224032421e-06, - "loss": 0.035, - "step": 4585 - }, - { - "epoch": 2.0368643126804353, - "grad_norm": 0.40837083418560577, - "learning_rate": 5.708085278061167e-06, - "loss": 0.0232, - "step": 4586 - }, - { - "epoch": 2.0373084610259826, - "grad_norm": 0.4396699075372347, - "learning_rate": 5.706166225652669e-06, - "loss": 0.0404, - "step": 4587 - }, - { - "epoch": 2.03775260937153, - "grad_norm": 0.486841383129979, - "learning_rate": 5.704247067095391e-06, - "loss": 0.0325, - "step": 4588 - }, - { - "epoch": 2.0381967577170776, - "grad_norm": 0.4503221450108852, - "learning_rate": 5.702327802677815e-06, - "loss": 0.0346, - "step": 4589 - }, - { - "epoch": 2.038640906062625, - "grad_norm": 0.4565221863051935, - "learning_rate": 5.70040843268844e-06, - "loss": 0.0404, - "step": 4590 - }, - { - "epoch": 2.0390850544081722, - "grad_norm": 0.39766027067422877, - "learning_rate": 5.698488957415782e-06, - "loss": 0.0325, - "step": 4591 - }, - { - "epoch": 2.03952920275372, - "grad_norm": 0.469121812114123, - "learning_rate": 5.6965693771483654e-06, - "loss": 0.0361, - "step": 4592 - }, - { - "epoch": 2.0399733510992673, - "grad_norm": 0.34294205911608633, - "learning_rate": 5.6946496921747394e-06, - "loss": 0.0274, - "step": 4593 - }, - { - "epoch": 2.0404174994448145, - "grad_norm": 0.45197263089243245, - "learning_rate": 5.692729902783467e-06, - "loss": 0.0357, - "step": 4594 - }, - { - "epoch": 2.040861647790362, - "grad_norm": 0.4143752687268431, - "learning_rate": 5.6908100092631215e-06, - "loss": 0.0325, - "step": 4595 - }, - { - "epoch": 2.0413057961359096, - "grad_norm": 0.5126979012787386, - "learning_rate": 5.688890011902295e-06, - "loss": 0.0402, - "step": 4596 - }, - { - "epoch": 2.041749944481457, - "grad_norm": 0.38129445526321254, - "learning_rate": 5.686969910989599e-06, - "loss": 0.0247, - "step": 4597 - }, - { - "epoch": 2.042194092827004, - "grad_norm": 0.43439418379805417, - "learning_rate": 5.685049706813657e-06, - "loss": 0.0334, - "step": 4598 - }, - { - "epoch": 2.0426382411725514, - "grad_norm": 0.3528242448836029, - "learning_rate": 5.683129399663105e-06, - "loss": 0.0237, - "step": 4599 - }, - { - "epoch": 2.043082389518099, - "grad_norm": 0.4979008424907728, - "learning_rate": 5.681208989826601e-06, - "loss": 0.0346, - "step": 4600 - }, - { - "epoch": 2.0435265378636465, - "grad_norm": 0.4344255344249799, - "learning_rate": 5.679288477592815e-06, - "loss": 0.0288, - "step": 4601 - }, - { - "epoch": 2.0439706862091938, - "grad_norm": 0.5875305767400001, - "learning_rate": 5.67736786325043e-06, - "loss": 0.0336, - "step": 4602 - }, - { - "epoch": 2.0444148345547415, - "grad_norm": 0.6007666135225987, - "learning_rate": 5.675447147088148e-06, - "loss": 0.0337, - "step": 4603 - }, - { - "epoch": 2.044858982900289, - "grad_norm": 0.3501303227446166, - "learning_rate": 5.673526329394688e-06, - "loss": 0.0316, - "step": 4604 - }, - { - "epoch": 2.045303131245836, - "grad_norm": 0.5466639051174087, - "learning_rate": 5.6716054104587784e-06, - "loss": 0.0513, - "step": 4605 - }, - { - "epoch": 2.0457472795913834, - "grad_norm": 0.3453198669378243, - "learning_rate": 5.669684390569167e-06, - "loss": 0.0292, - "step": 4606 - }, - { - "epoch": 2.046191427936931, - "grad_norm": 0.3855523349827862, - "learning_rate": 5.667763270014616e-06, - "loss": 0.0274, - "step": 4607 - }, - { - "epoch": 2.0466355762824784, - "grad_norm": 0.34309904151982246, - "learning_rate": 5.665842049083902e-06, - "loss": 0.0264, - "step": 4608 - }, - { - "epoch": 2.0470797246280257, - "grad_norm": 0.7952938969316026, - "learning_rate": 5.6639207280658194e-06, - "loss": 0.0534, - "step": 4609 - }, - { - "epoch": 2.047523872973573, - "grad_norm": 0.3602547692842435, - "learning_rate": 5.6619993072491694e-06, - "loss": 0.027, - "step": 4610 - }, - { - "epoch": 2.0479680213191207, - "grad_norm": 0.33009638203411035, - "learning_rate": 5.6600777869227805e-06, - "loss": 0.0267, - "step": 4611 - }, - { - "epoch": 2.048412169664668, - "grad_norm": 0.43117489579425794, - "learning_rate": 5.658156167375488e-06, - "loss": 0.038, - "step": 4612 - }, - { - "epoch": 2.0488563180102153, - "grad_norm": 0.7172674302994839, - "learning_rate": 5.656234448896142e-06, - "loss": 0.0461, - "step": 4613 - }, - { - "epoch": 2.049300466355763, - "grad_norm": 0.4507173154420471, - "learning_rate": 5.654312631773612e-06, - "loss": 0.027, - "step": 4614 - }, - { - "epoch": 2.0497446147013103, - "grad_norm": 0.6398319387217463, - "learning_rate": 5.652390716296778e-06, - "loss": 0.041, - "step": 4615 - }, - { - "epoch": 2.0501887630468576, - "grad_norm": 0.43458565414603195, - "learning_rate": 5.650468702754537e-06, - "loss": 0.0475, - "step": 4616 - }, - { - "epoch": 2.050632911392405, - "grad_norm": 0.4724564626628951, - "learning_rate": 5.6485465914358005e-06, - "loss": 0.039, - "step": 4617 - }, - { - "epoch": 2.0510770597379526, - "grad_norm": 0.4813725140216776, - "learning_rate": 5.646624382629495e-06, - "loss": 0.0442, - "step": 4618 - }, - { - "epoch": 2.0515212080835, - "grad_norm": 0.422057899941897, - "learning_rate": 5.64470207662456e-06, - "loss": 0.0354, - "step": 4619 - }, - { - "epoch": 2.051965356429047, - "grad_norm": 0.5520244840521104, - "learning_rate": 5.6427796737099515e-06, - "loss": 0.0393, - "step": 4620 - }, - { - "epoch": 2.0524095047745945, - "grad_norm": 0.5717238340997483, - "learning_rate": 5.64085717417464e-06, - "loss": 0.041, - "step": 4621 - }, - { - "epoch": 2.0528536531201422, - "grad_norm": 0.4162602609657127, - "learning_rate": 5.638934578307608e-06, - "loss": 0.0233, - "step": 4622 - }, - { - "epoch": 2.0532978014656895, - "grad_norm": 0.40689474682922516, - "learning_rate": 5.637011886397854e-06, - "loss": 0.0479, - "step": 4623 - }, - { - "epoch": 2.053741949811237, - "grad_norm": 0.46291048789069594, - "learning_rate": 5.635089098734394e-06, - "loss": 0.0345, - "step": 4624 - }, - { - "epoch": 2.0541860981567845, - "grad_norm": 0.6061112924495777, - "learning_rate": 5.633166215606254e-06, - "loss": 0.0519, - "step": 4625 - }, - { - "epoch": 2.054630246502332, - "grad_norm": 0.37385156175737194, - "learning_rate": 5.631243237302478e-06, - "loss": 0.0432, - "step": 4626 - }, - { - "epoch": 2.055074394847879, - "grad_norm": 0.32050349885999035, - "learning_rate": 5.629320164112116e-06, - "loss": 0.031, - "step": 4627 - }, - { - "epoch": 2.0555185431934264, - "grad_norm": 0.44064173532980155, - "learning_rate": 5.627396996324247e-06, - "loss": 0.0255, - "step": 4628 - }, - { - "epoch": 2.055962691538974, - "grad_norm": 0.35873848998988794, - "learning_rate": 5.625473734227952e-06, - "loss": 0.0352, - "step": 4629 - }, - { - "epoch": 2.0564068398845214, - "grad_norm": 0.6158412215543486, - "learning_rate": 5.623550378112328e-06, - "loss": 0.0442, - "step": 4630 - }, - { - "epoch": 2.0568509882300687, - "grad_norm": 0.4813854059047201, - "learning_rate": 5.621626928266489e-06, - "loss": 0.0478, - "step": 4631 - }, - { - "epoch": 2.0572951365756165, - "grad_norm": 0.4790652853444873, - "learning_rate": 5.619703384979566e-06, - "loss": 0.0463, - "step": 4632 - }, - { - "epoch": 2.0577392849211638, - "grad_norm": 0.41738179511720797, - "learning_rate": 5.617779748540695e-06, - "loss": 0.0338, - "step": 4633 - }, - { - "epoch": 2.058183433266711, - "grad_norm": 0.41477600567937595, - "learning_rate": 5.615856019239034e-06, - "loss": 0.0361, - "step": 4634 - }, - { - "epoch": 2.0586275816122583, - "grad_norm": 0.4967975613891594, - "learning_rate": 5.613932197363753e-06, - "loss": 0.0333, - "step": 4635 - }, - { - "epoch": 2.059071729957806, - "grad_norm": 0.49458901207002304, - "learning_rate": 5.612008283204033e-06, - "loss": 0.0427, - "step": 4636 - }, - { - "epoch": 2.0595158783033534, - "grad_norm": 0.42427512577649273, - "learning_rate": 5.610084277049071e-06, - "loss": 0.0354, - "step": 4637 - }, - { - "epoch": 2.0599600266489007, - "grad_norm": 0.43245137715403886, - "learning_rate": 5.608160179188079e-06, - "loss": 0.0326, - "step": 4638 - }, - { - "epoch": 2.060404174994448, - "grad_norm": 0.4874634691089771, - "learning_rate": 5.6062359899102815e-06, - "loss": 0.0371, - "step": 4639 - }, - { - "epoch": 2.0608483233399957, - "grad_norm": 0.5004987254382077, - "learning_rate": 5.604311709504917e-06, - "loss": 0.0397, - "step": 4640 - }, - { - "epoch": 2.061292471685543, - "grad_norm": 0.4751004755592279, - "learning_rate": 5.602387338261236e-06, - "loss": 0.0414, - "step": 4641 - }, - { - "epoch": 2.0617366200310903, - "grad_norm": 0.5299506551694105, - "learning_rate": 5.600462876468506e-06, - "loss": 0.0368, - "step": 4642 - }, - { - "epoch": 2.062180768376638, - "grad_norm": 0.5312690498368814, - "learning_rate": 5.598538324416007e-06, - "loss": 0.0449, - "step": 4643 - }, - { - "epoch": 2.0626249167221853, - "grad_norm": 0.38593169971896735, - "learning_rate": 5.5966136823930286e-06, - "loss": 0.0281, - "step": 4644 - }, - { - "epoch": 2.0630690650677326, - "grad_norm": 0.3958642054782115, - "learning_rate": 5.594688950688879e-06, - "loss": 0.034, - "step": 4645 - }, - { - "epoch": 2.06351321341328, - "grad_norm": 0.40570614021956714, - "learning_rate": 5.592764129592879e-06, - "loss": 0.0323, - "step": 4646 - }, - { - "epoch": 2.0639573617588276, - "grad_norm": 0.45390531771906856, - "learning_rate": 5.590839219394361e-06, - "loss": 0.0254, - "step": 4647 - }, - { - "epoch": 2.064401510104375, - "grad_norm": 0.39108513738732803, - "learning_rate": 5.58891422038267e-06, - "loss": 0.0278, - "step": 4648 - }, - { - "epoch": 2.064845658449922, - "grad_norm": 0.5223612545370472, - "learning_rate": 5.58698913284717e-06, - "loss": 0.0367, - "step": 4649 - }, - { - "epoch": 2.0652898067954695, - "grad_norm": 0.48567914535043527, - "learning_rate": 5.585063957077231e-06, - "loss": 0.0303, - "step": 4650 - }, - { - "epoch": 2.065733955141017, - "grad_norm": 0.3022843285079369, - "learning_rate": 5.583138693362241e-06, - "loss": 0.0268, - "step": 4651 - }, - { - "epoch": 2.0661781034865645, - "grad_norm": 0.3626425665807493, - "learning_rate": 5.5812133419916e-06, - "loss": 0.0282, - "step": 4652 - }, - { - "epoch": 2.066622251832112, - "grad_norm": 0.4001866579471354, - "learning_rate": 5.5792879032547205e-06, - "loss": 0.0346, - "step": 4653 - }, - { - "epoch": 2.0670664001776595, - "grad_norm": 0.4654295875105734, - "learning_rate": 5.577362377441029e-06, - "loss": 0.0348, - "step": 4654 - }, - { - "epoch": 2.067510548523207, - "grad_norm": 0.39677357569841354, - "learning_rate": 5.5754367648399644e-06, - "loss": 0.0274, - "step": 4655 - }, - { - "epoch": 2.067954696868754, - "grad_norm": 0.4143564936380847, - "learning_rate": 5.5735110657409775e-06, - "loss": 0.0427, - "step": 4656 - }, - { - "epoch": 2.0683988452143014, - "grad_norm": 0.3681145367593844, - "learning_rate": 5.571585280433537e-06, - "loss": 0.0321, - "step": 4657 - }, - { - "epoch": 2.068842993559849, - "grad_norm": 0.46007574827334824, - "learning_rate": 5.569659409207119e-06, - "loss": 0.0324, - "step": 4658 - }, - { - "epoch": 2.0692871419053964, - "grad_norm": 0.4377445107919845, - "learning_rate": 5.567733452351214e-06, - "loss": 0.0364, - "step": 4659 - }, - { - "epoch": 2.0697312902509437, - "grad_norm": 0.5314428707092943, - "learning_rate": 5.565807410155329e-06, - "loss": 0.039, - "step": 4660 - }, - { - "epoch": 2.0701754385964914, - "grad_norm": 0.4445997114256765, - "learning_rate": 5.563881282908976e-06, - "loss": 0.0451, - "step": 4661 - }, - { - "epoch": 2.0706195869420387, - "grad_norm": 0.41675191455829885, - "learning_rate": 5.561955070901689e-06, - "loss": 0.032, - "step": 4662 - }, - { - "epoch": 2.071063735287586, - "grad_norm": 0.49984516013690966, - "learning_rate": 5.56002877442301e-06, - "loss": 0.0379, - "step": 4663 - }, - { - "epoch": 2.0715078836331333, - "grad_norm": 0.48011634971976885, - "learning_rate": 5.558102393762491e-06, - "loss": 0.0289, - "step": 4664 - }, - { - "epoch": 2.071952031978681, - "grad_norm": 0.44249526000612904, - "learning_rate": 5.556175929209703e-06, - "loss": 0.0254, - "step": 4665 - }, - { - "epoch": 2.0723961803242283, - "grad_norm": 0.40597832272770745, - "learning_rate": 5.554249381054224e-06, - "loss": 0.0269, - "step": 4666 - }, - { - "epoch": 2.0728403286697756, - "grad_norm": 0.45148588795717426, - "learning_rate": 5.552322749585649e-06, - "loss": 0.0388, - "step": 4667 - }, - { - "epoch": 2.073284477015323, - "grad_norm": 0.598994856507402, - "learning_rate": 5.550396035093582e-06, - "loss": 0.0423, - "step": 4668 - }, - { - "epoch": 2.0737286253608707, - "grad_norm": 0.5181098142664673, - "learning_rate": 5.548469237867642e-06, - "loss": 0.0433, - "step": 4669 - }, - { - "epoch": 2.074172773706418, - "grad_norm": 0.5101400816921863, - "learning_rate": 5.546542358197458e-06, - "loss": 0.0463, - "step": 4670 - }, - { - "epoch": 2.0746169220519652, - "grad_norm": 0.5101937458580871, - "learning_rate": 5.544615396372673e-06, - "loss": 0.0341, - "step": 4671 - }, - { - "epoch": 2.075061070397513, - "grad_norm": 0.5392434065378148, - "learning_rate": 5.542688352682944e-06, - "loss": 0.0451, - "step": 4672 - }, - { - "epoch": 2.0755052187430603, - "grad_norm": 0.6897926097007651, - "learning_rate": 5.540761227417934e-06, - "loss": 0.0355, - "step": 4673 - }, - { - "epoch": 2.0759493670886076, - "grad_norm": 0.28051948597694654, - "learning_rate": 5.53883402086733e-06, - "loss": 0.0243, - "step": 4674 - }, - { - "epoch": 2.076393515434155, - "grad_norm": 0.793052180191732, - "learning_rate": 5.536906733320816e-06, - "loss": 0.0563, - "step": 4675 - }, - { - "epoch": 2.0768376637797026, - "grad_norm": 0.4215318597715739, - "learning_rate": 5.5349793650681006e-06, - "loss": 0.0354, - "step": 4676 - }, - { - "epoch": 2.07728181212525, - "grad_norm": 0.34425051368314813, - "learning_rate": 5.533051916398899e-06, - "loss": 0.0295, - "step": 4677 - }, - { - "epoch": 2.077725960470797, - "grad_norm": 0.6685404197751216, - "learning_rate": 5.531124387602938e-06, - "loss": 0.0399, - "step": 4678 - }, - { - "epoch": 2.0781701088163445, - "grad_norm": 0.31141908709280325, - "learning_rate": 5.529196778969961e-06, - "loss": 0.0271, - "step": 4679 - }, - { - "epoch": 2.078614257161892, - "grad_norm": 0.4606806580907262, - "learning_rate": 5.527269090789718e-06, - "loss": 0.0394, - "step": 4680 - }, - { - "epoch": 2.0790584055074395, - "grad_norm": 0.4252999601959849, - "learning_rate": 5.525341323351975e-06, - "loss": 0.0394, - "step": 4681 - }, - { - "epoch": 2.0795025538529868, - "grad_norm": 0.5409739246288477, - "learning_rate": 5.5234134769465065e-06, - "loss": 0.0521, - "step": 4682 - }, - { - "epoch": 2.0799467021985345, - "grad_norm": 0.4948477331543779, - "learning_rate": 5.5214855518631005e-06, - "loss": 0.0485, - "step": 4683 - }, - { - "epoch": 2.080390850544082, - "grad_norm": 0.3543340984392336, - "learning_rate": 5.519557548391557e-06, - "loss": 0.0242, - "step": 4684 - }, - { - "epoch": 2.080834998889629, - "grad_norm": 0.3751926442365401, - "learning_rate": 5.517629466821691e-06, - "loss": 0.036, - "step": 4685 - }, - { - "epoch": 2.0812791472351764, - "grad_norm": 0.4117924266399266, - "learning_rate": 5.515701307443321e-06, - "loss": 0.0485, - "step": 4686 - }, - { - "epoch": 2.081723295580724, - "grad_norm": 0.47251260035381365, - "learning_rate": 5.513773070546284e-06, - "loss": 0.0419, - "step": 4687 - }, - { - "epoch": 2.0821674439262714, - "grad_norm": 0.6076861002224307, - "learning_rate": 5.5118447564204295e-06, - "loss": 0.0391, - "step": 4688 - }, - { - "epoch": 2.0826115922718187, - "grad_norm": 0.4792646073001442, - "learning_rate": 5.50991636535561e-06, - "loss": 0.0295, - "step": 4689 - }, - { - "epoch": 2.0830557406173664, - "grad_norm": 0.32440405189073857, - "learning_rate": 5.5079878976417e-06, - "loss": 0.022, - "step": 4690 - }, - { - "epoch": 2.0834998889629137, - "grad_norm": 0.4221580236556979, - "learning_rate": 5.506059353568581e-06, - "loss": 0.0263, - "step": 4691 - }, - { - "epoch": 2.083944037308461, - "grad_norm": 0.35561395384075745, - "learning_rate": 5.504130733426145e-06, - "loss": 0.0273, - "step": 4692 - }, - { - "epoch": 2.0843881856540083, - "grad_norm": 0.4935918433503542, - "learning_rate": 5.502202037504293e-06, - "loss": 0.0328, - "step": 4693 - }, - { - "epoch": 2.084832333999556, - "grad_norm": 0.439030655630297, - "learning_rate": 5.500273266092947e-06, - "loss": 0.0278, - "step": 4694 - }, - { - "epoch": 2.0852764823451033, - "grad_norm": 0.5492104667996629, - "learning_rate": 5.49834441948203e-06, - "loss": 0.0477, - "step": 4695 - }, - { - "epoch": 2.0857206306906506, - "grad_norm": 0.48997887857295236, - "learning_rate": 5.496415497961482e-06, - "loss": 0.036, - "step": 4696 - }, - { - "epoch": 2.086164779036198, - "grad_norm": 0.9251501688761363, - "learning_rate": 5.49448650182125e-06, - "loss": 0.0543, - "step": 4697 - }, - { - "epoch": 2.0866089273817456, - "grad_norm": 0.44096687951153934, - "learning_rate": 5.492557431351298e-06, - "loss": 0.0372, - "step": 4698 - }, - { - "epoch": 2.087053075727293, - "grad_norm": 0.48261463177180974, - "learning_rate": 5.4906282868415974e-06, - "loss": 0.0323, - "step": 4699 - }, - { - "epoch": 2.08749722407284, - "grad_norm": 0.44612947746443893, - "learning_rate": 5.488699068582129e-06, - "loss": 0.0379, - "step": 4700 - }, - { - "epoch": 2.087941372418388, - "grad_norm": 0.6019236481517227, - "learning_rate": 5.486769776862891e-06, - "loss": 0.0392, - "step": 4701 - }, - { - "epoch": 2.0883855207639352, - "grad_norm": 0.41037964449549674, - "learning_rate": 5.484840411973888e-06, - "loss": 0.045, - "step": 4702 - }, - { - "epoch": 2.0888296691094825, - "grad_norm": 0.6139112370737255, - "learning_rate": 5.482910974205133e-06, - "loss": 0.0434, - "step": 4703 - }, - { - "epoch": 2.08927381745503, - "grad_norm": 0.47897436644713304, - "learning_rate": 5.480981463846655e-06, - "loss": 0.0407, - "step": 4704 - }, - { - "epoch": 2.0897179658005776, - "grad_norm": 0.4257961843862624, - "learning_rate": 5.479051881188494e-06, - "loss": 0.0402, - "step": 4705 - }, - { - "epoch": 2.090162114146125, - "grad_norm": 0.4458277075765739, - "learning_rate": 5.477122226520698e-06, - "loss": 0.0335, - "step": 4706 - }, - { - "epoch": 2.090606262491672, - "grad_norm": 0.5283109987423077, - "learning_rate": 5.475192500133324e-06, - "loss": 0.0321, - "step": 4707 - }, - { - "epoch": 2.0910504108372194, - "grad_norm": 0.6005581697681983, - "learning_rate": 5.473262702316447e-06, - "loss": 0.0369, - "step": 4708 - }, - { - "epoch": 2.091494559182767, - "grad_norm": 0.40161109082786944, - "learning_rate": 5.471332833360147e-06, - "loss": 0.0289, - "step": 4709 - }, - { - "epoch": 2.0919387075283145, - "grad_norm": 0.4371088945729004, - "learning_rate": 5.4694028935545126e-06, - "loss": 0.0343, - "step": 4710 - }, - { - "epoch": 2.0923828558738617, - "grad_norm": 1.1703243713580254, - "learning_rate": 5.467472883189653e-06, - "loss": 0.0507, - "step": 4711 - }, - { - "epoch": 2.0928270042194095, - "grad_norm": 0.40364633007175293, - "learning_rate": 5.465542802555677e-06, - "loss": 0.0295, - "step": 4712 - }, - { - "epoch": 2.0932711525649568, - "grad_norm": 0.46666883180291846, - "learning_rate": 5.4636126519427095e-06, - "loss": 0.0412, - "step": 4713 - }, - { - "epoch": 2.093715300910504, - "grad_norm": 0.4145958391325037, - "learning_rate": 5.461682431640885e-06, - "loss": 0.0319, - "step": 4714 - }, - { - "epoch": 2.0941594492560514, - "grad_norm": 0.3217656626768428, - "learning_rate": 5.459752141940347e-06, - "loss": 0.0318, - "step": 4715 - }, - { - "epoch": 2.094603597601599, - "grad_norm": 0.5490857014126957, - "learning_rate": 5.457821783131254e-06, - "loss": 0.046, - "step": 4716 - }, - { - "epoch": 2.0950477459471464, - "grad_norm": 0.4425046183099989, - "learning_rate": 5.455891355503768e-06, - "loss": 0.0299, - "step": 4717 - }, - { - "epoch": 2.0954918942926937, - "grad_norm": 0.6349791543930909, - "learning_rate": 5.453960859348069e-06, - "loss": 0.0565, - "step": 4718 - }, - { - "epoch": 2.095936042638241, - "grad_norm": 0.4072328187100098, - "learning_rate": 5.4520302949543415e-06, - "loss": 0.0349, - "step": 4719 - }, - { - "epoch": 2.0963801909837887, - "grad_norm": 0.3811118913667539, - "learning_rate": 5.450099662612781e-06, - "loss": 0.0305, - "step": 4720 - }, - { - "epoch": 2.096824339329336, - "grad_norm": 0.4774255455908721, - "learning_rate": 5.448168962613596e-06, - "loss": 0.0368, - "step": 4721 - }, - { - "epoch": 2.0972684876748833, - "grad_norm": 0.32813215206793156, - "learning_rate": 5.446238195247003e-06, - "loss": 0.0321, - "step": 4722 - }, - { - "epoch": 2.097712636020431, - "grad_norm": 0.5512782021205551, - "learning_rate": 5.44430736080323e-06, - "loss": 0.0387, - "step": 4723 - }, - { - "epoch": 2.0981567843659783, - "grad_norm": 0.5835802051010996, - "learning_rate": 5.44237645957251e-06, - "loss": 0.0354, - "step": 4724 - }, - { - "epoch": 2.0986009327115256, - "grad_norm": 0.3806252215797266, - "learning_rate": 5.440445491845095e-06, - "loss": 0.0363, - "step": 4725 - }, - { - "epoch": 2.099045081057073, - "grad_norm": 0.42251102762378473, - "learning_rate": 5.438514457911241e-06, - "loss": 0.0386, - "step": 4726 - }, - { - "epoch": 2.0994892294026206, - "grad_norm": 0.35748316520246215, - "learning_rate": 5.436583358061215e-06, - "loss": 0.0282, - "step": 4727 - }, - { - "epoch": 2.099933377748168, - "grad_norm": 0.46907671848937027, - "learning_rate": 5.434652192585294e-06, - "loss": 0.0411, - "step": 4728 - }, - { - "epoch": 2.100377526093715, - "grad_norm": 0.41478959673973786, - "learning_rate": 5.432720961773765e-06, - "loss": 0.0323, - "step": 4729 - }, - { - "epoch": 2.1008216744392625, - "grad_norm": 0.6441994593730457, - "learning_rate": 5.430789665916925e-06, - "loss": 0.0477, - "step": 4730 - }, - { - "epoch": 2.1012658227848102, - "grad_norm": 0.7936242646685359, - "learning_rate": 5.428858305305079e-06, - "loss": 0.0438, - "step": 4731 - }, - { - "epoch": 2.1017099711303575, - "grad_norm": 0.4542564531841097, - "learning_rate": 5.426926880228547e-06, - "loss": 0.039, - "step": 4732 - }, - { - "epoch": 2.102154119475905, - "grad_norm": 0.43879296755150776, - "learning_rate": 5.424995390977651e-06, - "loss": 0.024, - "step": 4733 - }, - { - "epoch": 2.1025982678214525, - "grad_norm": 0.4720404728255635, - "learning_rate": 5.423063837842728e-06, - "loss": 0.0366, - "step": 4734 - }, - { - "epoch": 2.103042416167, - "grad_norm": 0.3210152022294204, - "learning_rate": 5.421132221114124e-06, - "loss": 0.0287, - "step": 4735 - }, - { - "epoch": 2.103486564512547, - "grad_norm": 0.8156671041659774, - "learning_rate": 5.419200541082194e-06, - "loss": 0.0423, - "step": 4736 - }, - { - "epoch": 2.1039307128580944, - "grad_norm": 0.5417603660166322, - "learning_rate": 5.417268798037303e-06, - "loss": 0.0388, - "step": 4737 - }, - { - "epoch": 2.104374861203642, - "grad_norm": 0.46205560016247954, - "learning_rate": 5.415336992269821e-06, - "loss": 0.0363, - "step": 4738 - }, - { - "epoch": 2.1048190095491894, - "grad_norm": 0.3348835489607936, - "learning_rate": 5.413405124070134e-06, - "loss": 0.0206, - "step": 4739 - }, - { - "epoch": 2.1052631578947367, - "grad_norm": 0.46193255668613026, - "learning_rate": 5.411473193728636e-06, - "loss": 0.0295, - "step": 4740 - }, - { - "epoch": 2.1057073062402845, - "grad_norm": 0.4346087883354204, - "learning_rate": 5.409541201535727e-06, - "loss": 0.039, - "step": 4741 - }, - { - "epoch": 2.1061514545858318, - "grad_norm": 0.46994212871132296, - "learning_rate": 5.407609147781816e-06, - "loss": 0.0374, - "step": 4742 - }, - { - "epoch": 2.106595602931379, - "grad_norm": 0.4605573678630878, - "learning_rate": 5.405677032757329e-06, - "loss": 0.0371, - "step": 4743 - }, - { - "epoch": 2.1070397512769263, - "grad_norm": 0.6425519607388686, - "learning_rate": 5.403744856752691e-06, - "loss": 0.0337, - "step": 4744 - }, - { - "epoch": 2.107483899622474, - "grad_norm": 0.34739298130141844, - "learning_rate": 5.401812620058343e-06, - "loss": 0.0269, - "step": 4745 - }, - { - "epoch": 2.1079280479680214, - "grad_norm": 0.4490417625063517, - "learning_rate": 5.399880322964733e-06, - "loss": 0.0372, - "step": 4746 - }, - { - "epoch": 2.1083721963135686, - "grad_norm": 0.6489786195301251, - "learning_rate": 5.397947965762317e-06, - "loss": 0.0509, - "step": 4747 - }, - { - "epoch": 2.108816344659116, - "grad_norm": 0.326521120100658, - "learning_rate": 5.396015548741562e-06, - "loss": 0.0288, - "step": 4748 - }, - { - "epoch": 2.1092604930046637, - "grad_norm": 0.4078407477455278, - "learning_rate": 5.394083072192944e-06, - "loss": 0.0382, - "step": 4749 - }, - { - "epoch": 2.109704641350211, - "grad_norm": 0.5177065744163067, - "learning_rate": 5.392150536406945e-06, - "loss": 0.0449, - "step": 4750 - }, - { - "epoch": 2.1101487896957583, - "grad_norm": 0.33995975799200945, - "learning_rate": 5.39021794167406e-06, - "loss": 0.0264, - "step": 4751 - }, - { - "epoch": 2.110592938041306, - "grad_norm": 0.42003616424190965, - "learning_rate": 5.388285288284787e-06, - "loss": 0.0372, - "step": 4752 - }, - { - "epoch": 2.1110370863868533, - "grad_norm": 0.3472432854029145, - "learning_rate": 5.386352576529641e-06, - "loss": 0.0307, - "step": 4753 - }, - { - "epoch": 2.1114812347324006, - "grad_norm": 0.4130591691843907, - "learning_rate": 5.384419806699141e-06, - "loss": 0.0276, - "step": 4754 - }, - { - "epoch": 2.111925383077948, - "grad_norm": 0.4591952950693401, - "learning_rate": 5.382486979083812e-06, - "loss": 0.0363, - "step": 4755 - }, - { - "epoch": 2.1123695314234956, - "grad_norm": 0.3857971037693101, - "learning_rate": 5.380554093974193e-06, - "loss": 0.0359, - "step": 4756 - }, - { - "epoch": 2.112813679769043, - "grad_norm": 0.37782187453237637, - "learning_rate": 5.37862115166083e-06, - "loss": 0.0358, - "step": 4757 - }, - { - "epoch": 2.11325782811459, - "grad_norm": 0.3395886410665437, - "learning_rate": 5.376688152434275e-06, - "loss": 0.0243, - "step": 4758 - }, - { - "epoch": 2.1137019764601375, - "grad_norm": 0.4470609752189812, - "learning_rate": 5.374755096585093e-06, - "loss": 0.0246, - "step": 4759 - }, - { - "epoch": 2.114146124805685, - "grad_norm": 0.4371584818717651, - "learning_rate": 5.372821984403854e-06, - "loss": 0.028, - "step": 4760 - }, - { - "epoch": 2.1145902731512325, - "grad_norm": 0.49305971027495465, - "learning_rate": 5.370888816181138e-06, - "loss": 0.0362, - "step": 4761 - }, - { - "epoch": 2.11503442149678, - "grad_norm": 0.4175857188422132, - "learning_rate": 5.368955592207531e-06, - "loss": 0.033, - "step": 4762 - }, - { - "epoch": 2.1154785698423275, - "grad_norm": 0.5139037264567338, - "learning_rate": 5.367022312773633e-06, - "loss": 0.0323, - "step": 4763 - }, - { - "epoch": 2.115922718187875, - "grad_norm": 0.8385325013800425, - "learning_rate": 5.365088978170045e-06, - "loss": 0.0387, - "step": 4764 - }, - { - "epoch": 2.116366866533422, - "grad_norm": 0.4295996848147971, - "learning_rate": 5.363155588687383e-06, - "loss": 0.0297, - "step": 4765 - }, - { - "epoch": 2.1168110148789694, - "grad_norm": 0.5951452068654114, - "learning_rate": 5.361222144616267e-06, - "loss": 0.0395, - "step": 4766 - }, - { - "epoch": 2.117255163224517, - "grad_norm": 0.3679591157867813, - "learning_rate": 5.359288646247326e-06, - "loss": 0.0324, - "step": 4767 - }, - { - "epoch": 2.1176993115700644, - "grad_norm": 0.351749833643755, - "learning_rate": 5.357355093871199e-06, - "loss": 0.0262, - "step": 4768 - }, - { - "epoch": 2.1181434599156117, - "grad_norm": 0.4550233701632367, - "learning_rate": 5.355421487778529e-06, - "loss": 0.0405, - "step": 4769 - }, - { - "epoch": 2.1185876082611594, - "grad_norm": 0.5196049007489856, - "learning_rate": 5.353487828259973e-06, - "loss": 0.0354, - "step": 4770 - }, - { - "epoch": 2.1190317566067067, - "grad_norm": 0.5003818333286508, - "learning_rate": 5.351554115606194e-06, - "loss": 0.0346, - "step": 4771 - }, - { - "epoch": 2.119475904952254, - "grad_norm": 0.48260320470669443, - "learning_rate": 5.349620350107857e-06, - "loss": 0.0363, - "step": 4772 - }, - { - "epoch": 2.1199200532978013, - "grad_norm": 0.5154065971255504, - "learning_rate": 5.347686532055643e-06, - "loss": 0.037, - "step": 4773 - }, - { - "epoch": 2.120364201643349, - "grad_norm": 0.5641510479972823, - "learning_rate": 5.345752661740236e-06, - "loss": 0.0418, - "step": 4774 - }, - { - "epoch": 2.1208083499888963, - "grad_norm": 0.3842072493781642, - "learning_rate": 5.343818739452332e-06, - "loss": 0.0289, - "step": 4775 - }, - { - "epoch": 2.1212524983344436, - "grad_norm": 0.48007816649878726, - "learning_rate": 5.34188476548263e-06, - "loss": 0.0382, - "step": 4776 - }, - { - "epoch": 2.121696646679991, - "grad_norm": 0.403727363474673, - "learning_rate": 5.339950740121842e-06, - "loss": 0.0355, - "step": 4777 - }, - { - "epoch": 2.1221407950255387, - "grad_norm": 0.5060901595070586, - "learning_rate": 5.338016663660681e-06, - "loss": 0.0372, - "step": 4778 - }, - { - "epoch": 2.122584943371086, - "grad_norm": 0.991711640962585, - "learning_rate": 5.336082536389875e-06, - "loss": 0.0575, - "step": 4779 - }, - { - "epoch": 2.1230290917166332, - "grad_norm": 0.6280177866580242, - "learning_rate": 5.334148358600154e-06, - "loss": 0.0524, - "step": 4780 - }, - { - "epoch": 2.123473240062181, - "grad_norm": 0.38877559204447126, - "learning_rate": 5.332214130582259e-06, - "loss": 0.0252, - "step": 4781 - }, - { - "epoch": 2.1239173884077283, - "grad_norm": 0.5153735769890214, - "learning_rate": 5.330279852626936e-06, - "loss": 0.0399, - "step": 4782 - }, - { - "epoch": 2.1243615367532755, - "grad_norm": 0.4835493880223582, - "learning_rate": 5.32834552502494e-06, - "loss": 0.0398, - "step": 4783 - }, - { - "epoch": 2.124805685098823, - "grad_norm": 0.5440156606024599, - "learning_rate": 5.326411148067036e-06, - "loss": 0.0359, - "step": 4784 - }, - { - "epoch": 2.1252498334443706, - "grad_norm": 0.47437512421776024, - "learning_rate": 5.324476722043991e-06, - "loss": 0.0357, - "step": 4785 - }, - { - "epoch": 2.125693981789918, - "grad_norm": 0.4015404790313753, - "learning_rate": 5.322542247246583e-06, - "loss": 0.0259, - "step": 4786 - }, - { - "epoch": 2.126138130135465, - "grad_norm": 0.3327411043647303, - "learning_rate": 5.320607723965594e-06, - "loss": 0.0288, - "step": 4787 - }, - { - "epoch": 2.1265822784810124, - "grad_norm": 0.4112477949684857, - "learning_rate": 5.318673152491821e-06, - "loss": 0.0306, - "step": 4788 - }, - { - "epoch": 2.12702642682656, - "grad_norm": 0.4352341389187628, - "learning_rate": 5.316738533116058e-06, - "loss": 0.0294, - "step": 4789 - }, - { - "epoch": 2.1274705751721075, - "grad_norm": 0.5247331669668354, - "learning_rate": 5.314803866129114e-06, - "loss": 0.0455, - "step": 4790 - }, - { - "epoch": 2.1279147235176548, - "grad_norm": 0.5740961919642005, - "learning_rate": 5.3128691518218015e-06, - "loss": 0.0313, - "step": 4791 - }, - { - "epoch": 2.1283588718632025, - "grad_norm": 0.3931985647767843, - "learning_rate": 5.310934390484939e-06, - "loss": 0.0303, - "step": 4792 - }, - { - "epoch": 2.12880302020875, - "grad_norm": 0.4665172928488037, - "learning_rate": 5.308999582409357e-06, - "loss": 0.0324, - "step": 4793 - }, - { - "epoch": 2.129247168554297, - "grad_norm": 0.394722347425358, - "learning_rate": 5.307064727885889e-06, - "loss": 0.0386, - "step": 4794 - }, - { - "epoch": 2.1296913168998444, - "grad_norm": 0.3880325853085057, - "learning_rate": 5.305129827205375e-06, - "loss": 0.0305, - "step": 4795 - }, - { - "epoch": 2.130135465245392, - "grad_norm": 0.33971233702286885, - "learning_rate": 5.303194880658668e-06, - "loss": 0.0336, - "step": 4796 - }, - { - "epoch": 2.1305796135909394, - "grad_norm": 0.5540350723444334, - "learning_rate": 5.301259888536616e-06, - "loss": 0.0369, - "step": 4797 - }, - { - "epoch": 2.1310237619364867, - "grad_norm": 0.562001973004502, - "learning_rate": 5.299324851130086e-06, - "loss": 0.0463, - "step": 4798 - }, - { - "epoch": 2.1314679102820344, - "grad_norm": 0.40052051170910113, - "learning_rate": 5.297389768729949e-06, - "loss": 0.0343, - "step": 4799 - }, - { - "epoch": 2.1319120586275817, - "grad_norm": 0.37582805444109113, - "learning_rate": 5.295454641627076e-06, - "loss": 0.0279, - "step": 4800 - }, - { - "epoch": 2.132356206973129, - "grad_norm": 0.556183127195747, - "learning_rate": 5.293519470112351e-06, - "loss": 0.055, - "step": 4801 - }, - { - "epoch": 2.1328003553186763, - "grad_norm": 0.43600853079083646, - "learning_rate": 5.2915842544766645e-06, - "loss": 0.0341, - "step": 4802 - }, - { - "epoch": 2.133244503664224, - "grad_norm": 0.43024160171177206, - "learning_rate": 5.289648995010912e-06, - "loss": 0.0311, - "step": 4803 - }, - { - "epoch": 2.1336886520097713, - "grad_norm": 0.4112510401014529, - "learning_rate": 5.287713692005993e-06, - "loss": 0.0319, - "step": 4804 - }, - { - "epoch": 2.1341328003553186, - "grad_norm": 0.4594812201613338, - "learning_rate": 5.285778345752821e-06, - "loss": 0.0372, - "step": 4805 - }, - { - "epoch": 2.134576948700866, - "grad_norm": 0.3657072390383706, - "learning_rate": 5.2838429565423074e-06, - "loss": 0.0283, - "step": 4806 - }, - { - "epoch": 2.1350210970464136, - "grad_norm": 0.6140456097709794, - "learning_rate": 5.281907524665377e-06, - "loss": 0.0454, - "step": 4807 - }, - { - "epoch": 2.135465245391961, - "grad_norm": 0.39045919917216204, - "learning_rate": 5.279972050412957e-06, - "loss": 0.0378, - "step": 4808 - }, - { - "epoch": 2.135909393737508, - "grad_norm": 0.3927097404516126, - "learning_rate": 5.278036534075981e-06, - "loss": 0.0333, - "step": 4809 - }, - { - "epoch": 2.136353542083056, - "grad_norm": 0.5754764647926375, - "learning_rate": 5.276100975945393e-06, - "loss": 0.0327, - "step": 4810 - }, - { - "epoch": 2.1367976904286032, - "grad_norm": 0.9692035662201081, - "learning_rate": 5.274165376312136e-06, - "loss": 0.0433, - "step": 4811 - }, - { - "epoch": 2.1372418387741505, - "grad_norm": 0.3898618853951006, - "learning_rate": 5.272229735467166e-06, - "loss": 0.0301, - "step": 4812 - }, - { - "epoch": 2.137685987119698, - "grad_norm": 0.5041899434970386, - "learning_rate": 5.270294053701442e-06, - "loss": 0.0293, - "step": 4813 - }, - { - "epoch": 2.1381301354652456, - "grad_norm": 0.4402127129653806, - "learning_rate": 5.268358331305931e-06, - "loss": 0.0321, - "step": 4814 - }, - { - "epoch": 2.138574283810793, - "grad_norm": 0.45642467547731175, - "learning_rate": 5.266422568571604e-06, - "loss": 0.0359, - "step": 4815 - }, - { - "epoch": 2.13901843215634, - "grad_norm": 0.3935008736888044, - "learning_rate": 5.264486765789439e-06, - "loss": 0.034, - "step": 4816 - }, - { - "epoch": 2.1394625805018874, - "grad_norm": 0.334289910641262, - "learning_rate": 5.262550923250421e-06, - "loss": 0.0235, - "step": 4817 - }, - { - "epoch": 2.139906728847435, - "grad_norm": 0.45188823216686197, - "learning_rate": 5.260615041245538e-06, - "loss": 0.0293, - "step": 4818 - }, - { - "epoch": 2.1403508771929824, - "grad_norm": 0.5579134180377057, - "learning_rate": 5.25867912006579e-06, - "loss": 0.0428, - "step": 4819 - }, - { - "epoch": 2.1407950255385297, - "grad_norm": 0.33710370134564305, - "learning_rate": 5.256743160002174e-06, - "loss": 0.0265, - "step": 4820 - }, - { - "epoch": 2.1412391738840775, - "grad_norm": 0.5496665050765797, - "learning_rate": 5.254807161345699e-06, - "loss": 0.0482, - "step": 4821 - }, - { - "epoch": 2.1416833222296248, - "grad_norm": 0.41971477565058973, - "learning_rate": 5.2528711243873795e-06, - "loss": 0.032, - "step": 4822 - }, - { - "epoch": 2.142127470575172, - "grad_norm": 0.4604575178968341, - "learning_rate": 5.2509350494182365e-06, - "loss": 0.0366, - "step": 4823 - }, - { - "epoch": 2.1425716189207193, - "grad_norm": 0.3714626261982935, - "learning_rate": 5.2489989367292916e-06, - "loss": 0.0256, - "step": 4824 - }, - { - "epoch": 2.143015767266267, - "grad_norm": 0.40647254628304297, - "learning_rate": 5.247062786611575e-06, - "loss": 0.034, - "step": 4825 - }, - { - "epoch": 2.1434599156118144, - "grad_norm": 0.534798459704081, - "learning_rate": 5.245126599356126e-06, - "loss": 0.0344, - "step": 4826 - }, - { - "epoch": 2.1439040639573617, - "grad_norm": 0.3767967274585948, - "learning_rate": 5.243190375253987e-06, - "loss": 0.0388, - "step": 4827 - }, - { - "epoch": 2.1443482123029094, - "grad_norm": 0.43416732973374494, - "learning_rate": 5.241254114596201e-06, - "loss": 0.036, - "step": 4828 - }, - { - "epoch": 2.1447923606484567, - "grad_norm": 0.37510655690708544, - "learning_rate": 5.2393178176738246e-06, - "loss": 0.0339, - "step": 4829 - }, - { - "epoch": 2.145236508994004, - "grad_norm": 0.509295659153276, - "learning_rate": 5.237381484777914e-06, - "loss": 0.0449, - "step": 4830 - }, - { - "epoch": 2.1456806573395513, - "grad_norm": 0.43018107465023353, - "learning_rate": 5.235445116199536e-06, - "loss": 0.0386, - "step": 4831 - }, - { - "epoch": 2.146124805685099, - "grad_norm": 0.532799480118876, - "learning_rate": 5.2335087122297545e-06, - "loss": 0.0536, - "step": 4832 - }, - { - "epoch": 2.1465689540306463, - "grad_norm": 0.3898351411048676, - "learning_rate": 5.231572273159649e-06, - "loss": 0.0328, - "step": 4833 - }, - { - "epoch": 2.1470131023761936, - "grad_norm": 0.34769775883890974, - "learning_rate": 5.229635799280298e-06, - "loss": 0.0297, - "step": 4834 - }, - { - "epoch": 2.147457250721741, - "grad_norm": 0.4711513587854737, - "learning_rate": 5.2276992908827825e-06, - "loss": 0.0422, - "step": 4835 - }, - { - "epoch": 2.1479013990672886, - "grad_norm": 0.405896587319854, - "learning_rate": 5.2257627482581985e-06, - "loss": 0.033, - "step": 4836 - }, - { - "epoch": 2.148345547412836, - "grad_norm": 0.3745089869031137, - "learning_rate": 5.2238261716976375e-06, - "loss": 0.0246, - "step": 4837 - }, - { - "epoch": 2.148789695758383, - "grad_norm": 0.40688545389600017, - "learning_rate": 5.2218895614922e-06, - "loss": 0.044, - "step": 4838 - }, - { - "epoch": 2.1492338441039305, - "grad_norm": 0.48659815613858365, - "learning_rate": 5.219952917932993e-06, - "loss": 0.0412, - "step": 4839 - }, - { - "epoch": 2.149677992449478, - "grad_norm": 0.5300858396288847, - "learning_rate": 5.218016241311126e-06, - "loss": 0.0441, - "step": 4840 - }, - { - "epoch": 2.1501221407950255, - "grad_norm": 0.5970494126272853, - "learning_rate": 5.216079531917714e-06, - "loss": 0.0377, - "step": 4841 - }, - { - "epoch": 2.150566289140573, - "grad_norm": 0.4023665494232216, - "learning_rate": 5.2141427900438765e-06, - "loss": 0.0261, - "step": 4842 - }, - { - "epoch": 2.1510104374861205, - "grad_norm": 0.3360515019130085, - "learning_rate": 5.212206015980742e-06, - "loss": 0.0232, - "step": 4843 - }, - { - "epoch": 2.151454585831668, - "grad_norm": 0.37156982937954675, - "learning_rate": 5.210269210019438e-06, - "loss": 0.0313, - "step": 4844 - }, - { - "epoch": 2.151898734177215, - "grad_norm": 0.43881581226491895, - "learning_rate": 5.2083323724511e-06, - "loss": 0.0387, - "step": 4845 - }, - { - "epoch": 2.1523428825227624, - "grad_norm": 0.5133014074927194, - "learning_rate": 5.206395503566867e-06, - "loss": 0.033, - "step": 4846 - }, - { - "epoch": 2.15278703086831, - "grad_norm": 0.7393084909457972, - "learning_rate": 5.204458603657885e-06, - "loss": 0.0451, - "step": 4847 - }, - { - "epoch": 2.1532311792138574, - "grad_norm": 0.5276996020099548, - "learning_rate": 5.2025216730153016e-06, - "loss": 0.0244, - "step": 4848 - }, - { - "epoch": 2.1536753275594047, - "grad_norm": 0.47830343576785345, - "learning_rate": 5.200584711930267e-06, - "loss": 0.0387, - "step": 4849 - }, - { - "epoch": 2.1541194759049525, - "grad_norm": 0.5578861068890496, - "learning_rate": 5.198647720693948e-06, - "loss": 0.0421, - "step": 4850 - }, - { - "epoch": 2.1545636242504997, - "grad_norm": 0.3280166182279637, - "learning_rate": 5.1967106995975e-06, - "loss": 0.0233, - "step": 4851 - }, - { - "epoch": 2.155007772596047, - "grad_norm": 0.5452793800296186, - "learning_rate": 5.194773648932092e-06, - "loss": 0.0403, - "step": 4852 - }, - { - "epoch": 2.1554519209415943, - "grad_norm": 0.5130821544913388, - "learning_rate": 5.192836568988895e-06, - "loss": 0.0307, - "step": 4853 - }, - { - "epoch": 2.155896069287142, - "grad_norm": 0.33770672647996586, - "learning_rate": 5.190899460059088e-06, - "loss": 0.0232, - "step": 4854 - }, - { - "epoch": 2.1563402176326893, - "grad_norm": 0.6783231395478261, - "learning_rate": 5.188962322433848e-06, - "loss": 0.0359, - "step": 4855 - }, - { - "epoch": 2.1567843659782366, - "grad_norm": 0.49312348081568724, - "learning_rate": 5.187025156404361e-06, - "loss": 0.051, - "step": 4856 - }, - { - "epoch": 2.1572285143237844, - "grad_norm": 0.5188160057025217, - "learning_rate": 5.185087962261817e-06, - "loss": 0.037, - "step": 4857 - }, - { - "epoch": 2.1576726626693317, - "grad_norm": 0.4979475713257613, - "learning_rate": 5.183150740297407e-06, - "loss": 0.0457, - "step": 4858 - }, - { - "epoch": 2.158116811014879, - "grad_norm": 0.8466081637820392, - "learning_rate": 5.181213490802329e-06, - "loss": 0.0467, - "step": 4859 - }, - { - "epoch": 2.1585609593604262, - "grad_norm": 0.4708804574175826, - "learning_rate": 5.179276214067788e-06, - "loss": 0.0404, - "step": 4860 - }, - { - "epoch": 2.159005107705974, - "grad_norm": 0.3951062024143596, - "learning_rate": 5.1773389103849835e-06, - "loss": 0.0262, - "step": 4861 - }, - { - "epoch": 2.1594492560515213, - "grad_norm": 0.5862820412725568, - "learning_rate": 5.175401580045131e-06, - "loss": 0.0471, - "step": 4862 - }, - { - "epoch": 2.1598934043970686, - "grad_norm": 0.3583235113891142, - "learning_rate": 5.173464223339438e-06, - "loss": 0.0261, - "step": 4863 - }, - { - "epoch": 2.160337552742616, - "grad_norm": 0.7139304922452345, - "learning_rate": 5.171526840559129e-06, - "loss": 0.0581, - "step": 4864 - }, - { - "epoch": 2.1607817010881636, - "grad_norm": 0.41880853503590465, - "learning_rate": 5.169589431995421e-06, - "loss": 0.0305, - "step": 4865 - }, - { - "epoch": 2.161225849433711, - "grad_norm": 0.974797863480534, - "learning_rate": 5.16765199793954e-06, - "loss": 0.0476, - "step": 4866 - }, - { - "epoch": 2.161669997779258, - "grad_norm": 0.6058306397735719, - "learning_rate": 5.165714538682716e-06, - "loss": 0.0603, - "step": 4867 - }, - { - "epoch": 2.1621141461248055, - "grad_norm": 0.6017608849107138, - "learning_rate": 5.163777054516182e-06, - "loss": 0.0415, - "step": 4868 - }, - { - "epoch": 2.162558294470353, - "grad_norm": 1.5666330014886598, - "learning_rate": 5.161839545731175e-06, - "loss": 0.0424, - "step": 4869 - }, - { - "epoch": 2.1630024428159005, - "grad_norm": 0.4625370484241095, - "learning_rate": 5.159902012618933e-06, - "loss": 0.0337, - "step": 4870 - }, - { - "epoch": 2.1634465911614478, - "grad_norm": 0.6306574373250589, - "learning_rate": 5.1579644554707054e-06, - "loss": 0.057, - "step": 4871 - }, - { - "epoch": 2.1638907395069955, - "grad_norm": 0.9235120056329936, - "learning_rate": 5.156026874577735e-06, - "loss": 0.0561, - "step": 4872 - }, - { - "epoch": 2.164334887852543, - "grad_norm": 0.4148788359576288, - "learning_rate": 5.154089270231275e-06, - "loss": 0.0365, - "step": 4873 - }, - { - "epoch": 2.16477903619809, - "grad_norm": 0.5384050767235271, - "learning_rate": 5.152151642722582e-06, - "loss": 0.0397, - "step": 4874 - }, - { - "epoch": 2.1652231845436374, - "grad_norm": 0.42398179308700884, - "learning_rate": 5.15021399234291e-06, - "loss": 0.0244, - "step": 4875 - }, - { - "epoch": 2.165667332889185, - "grad_norm": 0.4428879862840884, - "learning_rate": 5.148276319383525e-06, - "loss": 0.0338, - "step": 4876 - }, - { - "epoch": 2.1661114812347324, - "grad_norm": 0.5151742619891833, - "learning_rate": 5.146338624135689e-06, - "loss": 0.0429, - "step": 4877 - }, - { - "epoch": 2.1665556295802797, - "grad_norm": 0.5906956484118873, - "learning_rate": 5.144400906890672e-06, - "loss": 0.0486, - "step": 4878 - }, - { - "epoch": 2.1669997779258274, - "grad_norm": 0.5089563089876176, - "learning_rate": 5.142463167939748e-06, - "loss": 0.0336, - "step": 4879 - }, - { - "epoch": 2.1674439262713747, - "grad_norm": 0.4990120356759535, - "learning_rate": 5.140525407574187e-06, - "loss": 0.0397, - "step": 4880 - }, - { - "epoch": 2.167888074616922, - "grad_norm": 0.47055574764670566, - "learning_rate": 5.138587626085271e-06, - "loss": 0.0411, - "step": 4881 - }, - { - "epoch": 2.1683322229624693, - "grad_norm": 0.49103425451011984, - "learning_rate": 5.136649823764281e-06, - "loss": 0.0422, - "step": 4882 - }, - { - "epoch": 2.168776371308017, - "grad_norm": 0.44674321466653016, - "learning_rate": 5.1347120009025005e-06, - "loss": 0.0312, - "step": 4883 - }, - { - "epoch": 2.1692205196535643, - "grad_norm": 0.2849178715545674, - "learning_rate": 5.132774157791218e-06, - "loss": 0.0226, - "step": 4884 - }, - { - "epoch": 2.1696646679991116, - "grad_norm": 0.46152868939886915, - "learning_rate": 5.130836294721726e-06, - "loss": 0.0364, - "step": 4885 - }, - { - "epoch": 2.170108816344659, - "grad_norm": 0.43325439116729864, - "learning_rate": 5.128898411985315e-06, - "loss": 0.0288, - "step": 4886 - }, - { - "epoch": 2.1705529646902066, - "grad_norm": 0.39479226787289406, - "learning_rate": 5.1269605098732825e-06, - "loss": 0.0251, - "step": 4887 - }, - { - "epoch": 2.170997113035754, - "grad_norm": 0.41019425261041265, - "learning_rate": 5.12502258867693e-06, - "loss": 0.031, - "step": 4888 - }, - { - "epoch": 2.1714412613813012, - "grad_norm": 0.508850109174841, - "learning_rate": 5.123084648687557e-06, - "loss": 0.0474, - "step": 4889 - }, - { - "epoch": 2.171885409726849, - "grad_norm": 0.42681095953850656, - "learning_rate": 5.121146690196472e-06, - "loss": 0.0244, - "step": 4890 - }, - { - "epoch": 2.1723295580723962, - "grad_norm": 0.32346458488317453, - "learning_rate": 5.1192087134949804e-06, - "loss": 0.0264, - "step": 4891 - }, - { - "epoch": 2.1727737064179435, - "grad_norm": 0.40304790268945045, - "learning_rate": 5.1172707188743955e-06, - "loss": 0.0264, - "step": 4892 - }, - { - "epoch": 2.173217854763491, - "grad_norm": 0.5433823527011491, - "learning_rate": 5.115332706626028e-06, - "loss": 0.0459, - "step": 4893 - }, - { - "epoch": 2.1736620031090386, - "grad_norm": 0.33519824313183216, - "learning_rate": 5.113394677041197e-06, - "loss": 0.0289, - "step": 4894 - }, - { - "epoch": 2.174106151454586, - "grad_norm": 0.399864916190364, - "learning_rate": 5.111456630411218e-06, - "loss": 0.026, - "step": 4895 - }, - { - "epoch": 2.174550299800133, - "grad_norm": 0.49817314831150256, - "learning_rate": 5.109518567027416e-06, - "loss": 0.0341, - "step": 4896 - }, - { - "epoch": 2.1749944481456804, - "grad_norm": 0.41814385729810516, - "learning_rate": 5.107580487181112e-06, - "loss": 0.0315, - "step": 4897 - }, - { - "epoch": 2.175438596491228, - "grad_norm": 0.44895548729597046, - "learning_rate": 5.105642391163633e-06, - "loss": 0.0403, - "step": 4898 - }, - { - "epoch": 2.1758827448367755, - "grad_norm": 0.3688472653021723, - "learning_rate": 5.10370427926631e-06, - "loss": 0.0345, - "step": 4899 - }, - { - "epoch": 2.1763268931823228, - "grad_norm": 0.4629343116213356, - "learning_rate": 5.1017661517804694e-06, - "loss": 0.0353, - "step": 4900 - }, - { - "epoch": 2.1767710415278705, - "grad_norm": 0.4559917864661212, - "learning_rate": 5.099828008997448e-06, - "loss": 0.0398, - "step": 4901 - }, - { - "epoch": 2.1772151898734178, - "grad_norm": 0.45480161362407634, - "learning_rate": 5.097889851208583e-06, - "loss": 0.035, - "step": 4902 - }, - { - "epoch": 2.177659338218965, - "grad_norm": 0.42473449296712756, - "learning_rate": 5.0959516787052085e-06, - "loss": 0.0317, - "step": 4903 - }, - { - "epoch": 2.1781034865645124, - "grad_norm": 0.4952860166933172, - "learning_rate": 5.094013491778668e-06, - "loss": 0.0443, - "step": 4904 - }, - { - "epoch": 2.17854763491006, - "grad_norm": 0.4001101623153066, - "learning_rate": 5.092075290720302e-06, - "loss": 0.0298, - "step": 4905 - }, - { - "epoch": 2.1789917832556074, - "grad_norm": 0.5225430247317463, - "learning_rate": 5.0901370758214565e-06, - "loss": 0.0398, - "step": 4906 - }, - { - "epoch": 2.1794359316011547, - "grad_norm": 0.6146833193819635, - "learning_rate": 5.088198847373477e-06, - "loss": 0.0464, - "step": 4907 - }, - { - "epoch": 2.1798800799467024, - "grad_norm": 0.47802919024440194, - "learning_rate": 5.086260605667712e-06, - "loss": 0.037, - "step": 4908 - }, - { - "epoch": 2.1803242282922497, - "grad_norm": 0.5368694025591854, - "learning_rate": 5.084322350995512e-06, - "loss": 0.0488, - "step": 4909 - }, - { - "epoch": 2.180768376637797, - "grad_norm": 0.32415608827454506, - "learning_rate": 5.0823840836482316e-06, - "loss": 0.0233, - "step": 4910 - }, - { - "epoch": 2.1812125249833443, - "grad_norm": 0.4761380556003533, - "learning_rate": 5.080445803917225e-06, - "loss": 0.0471, - "step": 4911 - }, - { - "epoch": 2.181656673328892, - "grad_norm": 0.3515304190735633, - "learning_rate": 5.078507512093844e-06, - "loss": 0.0259, - "step": 4912 - }, - { - "epoch": 2.1821008216744393, - "grad_norm": 0.6155251528359741, - "learning_rate": 5.076569208469454e-06, - "loss": 0.0474, - "step": 4913 - }, - { - "epoch": 2.1825449700199866, - "grad_norm": 0.4580217790122366, - "learning_rate": 5.0746308933354105e-06, - "loss": 0.0438, - "step": 4914 - }, - { - "epoch": 2.182989118365534, - "grad_norm": 0.408141050974567, - "learning_rate": 5.072692566983074e-06, - "loss": 0.0376, - "step": 4915 - }, - { - "epoch": 2.1834332667110816, - "grad_norm": 0.4622430926137514, - "learning_rate": 5.070754229703811e-06, - "loss": 0.0325, - "step": 4916 - }, - { - "epoch": 2.183877415056629, - "grad_norm": 0.49891927431787836, - "learning_rate": 5.068815881788986e-06, - "loss": 0.0383, - "step": 4917 - }, - { - "epoch": 2.184321563402176, - "grad_norm": 0.3944468340370449, - "learning_rate": 5.0668775235299636e-06, - "loss": 0.0364, - "step": 4918 - }, - { - "epoch": 2.184765711747724, - "grad_norm": 0.5143753892518631, - "learning_rate": 5.064939155218115e-06, - "loss": 0.0327, - "step": 4919 - }, - { - "epoch": 2.1852098600932712, - "grad_norm": 0.4613419832071711, - "learning_rate": 5.0630007771448064e-06, - "loss": 0.0365, - "step": 4920 - }, - { - "epoch": 2.1856540084388185, - "grad_norm": 0.42353949789442463, - "learning_rate": 5.061062389601413e-06, - "loss": 0.0305, - "step": 4921 - }, - { - "epoch": 2.186098156784366, - "grad_norm": 0.4615529011211128, - "learning_rate": 5.059123992879303e-06, - "loss": 0.0363, - "step": 4922 - }, - { - "epoch": 2.1865423051299135, - "grad_norm": 0.38154251403565953, - "learning_rate": 5.057185587269854e-06, - "loss": 0.0237, - "step": 4923 - }, - { - "epoch": 2.186986453475461, - "grad_norm": 0.45850860345485767, - "learning_rate": 5.05524717306444e-06, - "loss": 0.0309, - "step": 4924 - }, - { - "epoch": 2.187430601821008, - "grad_norm": 0.3296860610369256, - "learning_rate": 5.053308750554437e-06, - "loss": 0.0311, - "step": 4925 - }, - { - "epoch": 2.1878747501665554, - "grad_norm": 0.3804383024783399, - "learning_rate": 5.051370320031221e-06, - "loss": 0.0337, - "step": 4926 - }, - { - "epoch": 2.188318898512103, - "grad_norm": 0.44377911056623215, - "learning_rate": 5.049431881786176e-06, - "loss": 0.0451, - "step": 4927 - }, - { - "epoch": 2.1887630468576504, - "grad_norm": 0.3897615063112148, - "learning_rate": 5.04749343611068e-06, - "loss": 0.0264, - "step": 4928 - }, - { - "epoch": 2.1892071952031977, - "grad_norm": 0.857708991433585, - "learning_rate": 5.045554983296111e-06, - "loss": 0.0421, - "step": 4929 - }, - { - "epoch": 2.1896513435487455, - "grad_norm": 0.4460864492390492, - "learning_rate": 5.043616523633856e-06, - "loss": 0.0348, - "step": 4930 - }, - { - "epoch": 2.1900954918942928, - "grad_norm": 0.46033992290883, - "learning_rate": 5.0416780574152976e-06, - "loss": 0.0391, - "step": 4931 - }, - { - "epoch": 2.19053964023984, - "grad_norm": 0.36797085648755834, - "learning_rate": 5.0397395849318165e-06, - "loss": 0.0379, - "step": 4932 - }, - { - "epoch": 2.1909837885853873, - "grad_norm": 0.3675945320964035, - "learning_rate": 5.0378011064748025e-06, - "loss": 0.0252, - "step": 4933 - }, - { - "epoch": 2.191427936930935, - "grad_norm": 0.6163310761797228, - "learning_rate": 5.035862622335641e-06, - "loss": 0.0477, - "step": 4934 - }, - { - "epoch": 2.1918720852764824, - "grad_norm": 0.5755380695413213, - "learning_rate": 5.0339241328057164e-06, - "loss": 0.0321, - "step": 4935 - }, - { - "epoch": 2.1923162336220297, - "grad_norm": 0.7365554315970313, - "learning_rate": 5.0319856381764175e-06, - "loss": 0.0421, - "step": 4936 - }, - { - "epoch": 2.1927603819675774, - "grad_norm": 0.3786316850609603, - "learning_rate": 5.030047138739136e-06, - "loss": 0.0274, - "step": 4937 - }, - { - "epoch": 2.1932045303131247, - "grad_norm": 0.46503560614578676, - "learning_rate": 5.028108634785258e-06, - "loss": 0.0329, - "step": 4938 - }, - { - "epoch": 2.193648678658672, - "grad_norm": 0.39835692305695053, - "learning_rate": 5.0261701266061746e-06, - "loss": 0.0267, - "step": 4939 - }, - { - "epoch": 2.1940928270042193, - "grad_norm": 0.3883823936267887, - "learning_rate": 5.024231614493277e-06, - "loss": 0.0284, - "step": 4940 - }, - { - "epoch": 2.194536975349767, - "grad_norm": 0.38466420585767425, - "learning_rate": 5.022293098737957e-06, - "loss": 0.0302, - "step": 4941 - }, - { - "epoch": 2.1949811236953143, - "grad_norm": 0.5034584168375414, - "learning_rate": 5.0203545796316044e-06, - "loss": 0.05, - "step": 4942 - }, - { - "epoch": 2.1954252720408616, - "grad_norm": 0.5196054438026021, - "learning_rate": 5.0184160574656125e-06, - "loss": 0.0348, - "step": 4943 - }, - { - "epoch": 2.195869420386409, - "grad_norm": 0.4645537912636916, - "learning_rate": 5.0164775325313755e-06, - "loss": 0.0376, - "step": 4944 - }, - { - "epoch": 2.1963135687319566, - "grad_norm": 0.4894769238899238, - "learning_rate": 5.0145390051202846e-06, - "loss": 0.0357, - "step": 4945 - }, - { - "epoch": 2.196757717077504, - "grad_norm": 0.3225076116998955, - "learning_rate": 5.012600475523733e-06, - "loss": 0.0268, - "step": 4946 - }, - { - "epoch": 2.197201865423051, - "grad_norm": 0.46007489982886224, - "learning_rate": 5.010661944033118e-06, - "loss": 0.0406, - "step": 4947 - }, - { - "epoch": 2.197646013768599, - "grad_norm": 0.6157542604470476, - "learning_rate": 5.008723410939832e-06, - "loss": 0.0329, - "step": 4948 - }, - { - "epoch": 2.198090162114146, - "grad_norm": 0.41686085632858955, - "learning_rate": 5.006784876535268e-06, - "loss": 0.0416, - "step": 4949 - }, - { - "epoch": 2.1985343104596935, - "grad_norm": 0.5468745101745486, - "learning_rate": 5.004846341110822e-06, - "loss": 0.0499, - "step": 4950 - }, - { - "epoch": 2.198978458805241, - "grad_norm": 0.43554432758066686, - "learning_rate": 5.002907804957889e-06, - "loss": 0.0353, - "step": 4951 - }, - { - "epoch": 2.1994226071507885, - "grad_norm": 0.4925752472694445, - "learning_rate": 5.000969268367862e-06, - "loss": 0.0367, - "step": 4952 - }, - { - "epoch": 2.199866755496336, - "grad_norm": 0.4262017675003274, - "learning_rate": 4.999030731632139e-06, - "loss": 0.0444, - "step": 4953 - }, - { - "epoch": 2.200310903841883, - "grad_norm": 0.3899535716527509, - "learning_rate": 4.997092195042113e-06, - "loss": 0.0299, - "step": 4954 - }, - { - "epoch": 2.2007550521874304, - "grad_norm": 0.4534373375430896, - "learning_rate": 4.995153658889181e-06, - "loss": 0.0346, - "step": 4955 - }, - { - "epoch": 2.201199200532978, - "grad_norm": 0.32786374874254637, - "learning_rate": 4.993215123464734e-06, - "loss": 0.0292, - "step": 4956 - }, - { - "epoch": 2.2016433488785254, - "grad_norm": 0.48869610889723925, - "learning_rate": 4.991276589060169e-06, - "loss": 0.0435, - "step": 4957 - }, - { - "epoch": 2.2020874972240727, - "grad_norm": 0.41141706707817044, - "learning_rate": 4.989338055966883e-06, - "loss": 0.0317, - "step": 4958 - }, - { - "epoch": 2.2025316455696204, - "grad_norm": 0.35799145890163087, - "learning_rate": 4.987399524476268e-06, - "loss": 0.0343, - "step": 4959 - }, - { - "epoch": 2.2029757939151677, - "grad_norm": 0.39541269197255546, - "learning_rate": 4.985460994879717e-06, - "loss": 0.0303, - "step": 4960 - }, - { - "epoch": 2.203419942260715, - "grad_norm": 0.4446866574845067, - "learning_rate": 4.983522467468627e-06, - "loss": 0.0458, - "step": 4961 - }, - { - "epoch": 2.2038640906062623, - "grad_norm": 0.44841266243980166, - "learning_rate": 4.981583942534388e-06, - "loss": 0.0401, - "step": 4962 - }, - { - "epoch": 2.20430823895181, - "grad_norm": 0.40609549354200053, - "learning_rate": 4.979645420368397e-06, - "loss": 0.0318, - "step": 4963 - }, - { - "epoch": 2.2047523872973573, - "grad_norm": 0.4655683020461079, - "learning_rate": 4.977706901262045e-06, - "loss": 0.0318, - "step": 4964 - }, - { - "epoch": 2.2051965356429046, - "grad_norm": 0.4105935741895686, - "learning_rate": 4.975768385506725e-06, - "loss": 0.0365, - "step": 4965 - }, - { - "epoch": 2.2056406839884524, - "grad_norm": 0.4840996358141117, - "learning_rate": 4.973829873393827e-06, - "loss": 0.0288, - "step": 4966 - }, - { - "epoch": 2.2060848323339997, - "grad_norm": 0.43687024211685477, - "learning_rate": 4.971891365214743e-06, - "loss": 0.0284, - "step": 4967 - }, - { - "epoch": 2.206528980679547, - "grad_norm": 0.5919063970733193, - "learning_rate": 4.969952861260865e-06, - "loss": 0.0316, - "step": 4968 - }, - { - "epoch": 2.2069731290250942, - "grad_norm": 0.34222056631687486, - "learning_rate": 4.968014361823583e-06, - "loss": 0.0269, - "step": 4969 - }, - { - "epoch": 2.207417277370642, - "grad_norm": 0.577057583701676, - "learning_rate": 4.966075867194285e-06, - "loss": 0.0391, - "step": 4970 - }, - { - "epoch": 2.2078614257161893, - "grad_norm": 0.45130320193766277, - "learning_rate": 4.964137377664362e-06, - "loss": 0.0358, - "step": 4971 - }, - { - "epoch": 2.2083055740617366, - "grad_norm": 0.37843493642246295, - "learning_rate": 4.9621988935252e-06, - "loss": 0.0356, - "step": 4972 - }, - { - "epoch": 2.208749722407284, - "grad_norm": 0.37308118817776387, - "learning_rate": 4.9602604150681835e-06, - "loss": 0.0244, - "step": 4973 - }, - { - "epoch": 2.2091938707528316, - "grad_norm": 0.5336868565377078, - "learning_rate": 4.958321942584703e-06, - "loss": 0.0488, - "step": 4974 - }, - { - "epoch": 2.209638019098379, - "grad_norm": 0.3742330589994002, - "learning_rate": 4.956383476366145e-06, - "loss": 0.0309, - "step": 4975 - }, - { - "epoch": 2.210082167443926, - "grad_norm": 0.46020603183712594, - "learning_rate": 4.95444501670389e-06, - "loss": 0.0323, - "step": 4976 - }, - { - "epoch": 2.2105263157894735, - "grad_norm": 0.8475632066917597, - "learning_rate": 4.9525065638893226e-06, - "loss": 0.0465, - "step": 4977 - }, - { - "epoch": 2.210970464135021, - "grad_norm": 0.5219077605882717, - "learning_rate": 4.950568118213825e-06, - "loss": 0.0393, - "step": 4978 - }, - { - "epoch": 2.2114146124805685, - "grad_norm": 0.37860034305038076, - "learning_rate": 4.948629679968778e-06, - "loss": 0.0366, - "step": 4979 - }, - { - "epoch": 2.2118587608261158, - "grad_norm": 0.6128046042996554, - "learning_rate": 4.946691249445565e-06, - "loss": 0.04, - "step": 4980 - }, - { - "epoch": 2.2123029091716635, - "grad_norm": 0.4839071616267329, - "learning_rate": 4.944752826935562e-06, - "loss": 0.035, - "step": 4981 - }, - { - "epoch": 2.212747057517211, - "grad_norm": 0.45065394012575055, - "learning_rate": 4.942814412730147e-06, - "loss": 0.0267, - "step": 4982 - }, - { - "epoch": 2.213191205862758, - "grad_norm": 0.5986492760384308, - "learning_rate": 4.940876007120699e-06, - "loss": 0.0289, - "step": 4983 - }, - { - "epoch": 2.2136353542083054, - "grad_norm": 0.5082807449831283, - "learning_rate": 4.938937610398588e-06, - "loss": 0.034, - "step": 4984 - }, - { - "epoch": 2.214079502553853, - "grad_norm": 0.46534139162341914, - "learning_rate": 4.9369992228551935e-06, - "loss": 0.0323, - "step": 4985 - }, - { - "epoch": 2.2145236508994004, - "grad_norm": 0.4943061938684834, - "learning_rate": 4.935060844781886e-06, - "loss": 0.0353, - "step": 4986 - }, - { - "epoch": 2.2149677992449477, - "grad_norm": 0.4336318300801724, - "learning_rate": 4.933122476470038e-06, - "loss": 0.0275, - "step": 4987 - }, - { - "epoch": 2.2154119475904954, - "grad_norm": 0.4360854695437589, - "learning_rate": 4.931184118211016e-06, - "loss": 0.0349, - "step": 4988 - }, - { - "epoch": 2.2158560959360427, - "grad_norm": 0.6620193372024554, - "learning_rate": 4.929245770296191e-06, - "loss": 0.0483, - "step": 4989 - }, - { - "epoch": 2.21630024428159, - "grad_norm": 0.48994703833403463, - "learning_rate": 4.927307433016927e-06, - "loss": 0.0432, - "step": 4990 - }, - { - "epoch": 2.2167443926271373, - "grad_norm": 0.4622061500888628, - "learning_rate": 4.925369106664591e-06, - "loss": 0.0387, - "step": 4991 - }, - { - "epoch": 2.217188540972685, - "grad_norm": 0.4624898194765989, - "learning_rate": 4.923430791530547e-06, - "loss": 0.0196, - "step": 4992 - }, - { - "epoch": 2.2176326893182323, - "grad_norm": 0.3453109472117995, - "learning_rate": 4.9214924879061565e-06, - "loss": 0.0286, - "step": 4993 - }, - { - "epoch": 2.2180768376637796, - "grad_norm": 0.41871877730955476, - "learning_rate": 4.919554196082778e-06, - "loss": 0.0397, - "step": 4994 - }, - { - "epoch": 2.2185209860093273, - "grad_norm": 0.42587370028059823, - "learning_rate": 4.91761591635177e-06, - "loss": 0.0408, - "step": 4995 - }, - { - "epoch": 2.2189651343548746, - "grad_norm": 0.36571656649505513, - "learning_rate": 4.9156776490044875e-06, - "loss": 0.0335, - "step": 4996 - }, - { - "epoch": 2.219409282700422, - "grad_norm": 0.4229412174299105, - "learning_rate": 4.91373939433229e-06, - "loss": 0.0396, - "step": 4997 - }, - { - "epoch": 2.219853431045969, - "grad_norm": 0.3904502538208118, - "learning_rate": 4.911801152626525e-06, - "loss": 0.0352, - "step": 4998 - }, - { - "epoch": 2.220297579391517, - "grad_norm": 0.7376575867139856, - "learning_rate": 4.909862924178545e-06, - "loss": 0.04, - "step": 4999 - }, - { - "epoch": 2.2207417277370642, - "grad_norm": 0.45886434872272697, - "learning_rate": 4.9079247092797e-06, - "loss": 0.0375, - "step": 5000 - }, - { - "epoch": 2.2211858760826115, - "grad_norm": 0.38398725710621795, - "learning_rate": 4.905986508221333e-06, - "loss": 0.034, - "step": 5001 - }, - { - "epoch": 2.221630024428159, - "grad_norm": 0.4363686982956202, - "learning_rate": 4.904048321294791e-06, - "loss": 0.0395, - "step": 5002 - }, - { - "epoch": 2.2220741727737066, - "grad_norm": 0.47908384837669443, - "learning_rate": 4.9021101487914185e-06, - "loss": 0.0451, - "step": 5003 - }, - { - "epoch": 2.222518321119254, - "grad_norm": 0.49404577583056375, - "learning_rate": 4.900171991002553e-06, - "loss": 0.0382, - "step": 5004 - }, - { - "epoch": 2.222962469464801, - "grad_norm": 0.4254636742864719, - "learning_rate": 4.898233848219532e-06, - "loss": 0.0385, - "step": 5005 - }, - { - "epoch": 2.2234066178103484, - "grad_norm": 0.5219692883962996, - "learning_rate": 4.896295720733694e-06, - "loss": 0.04, - "step": 5006 - }, - { - "epoch": 2.223850766155896, - "grad_norm": 0.8849555604502084, - "learning_rate": 4.894357608836368e-06, - "loss": 0.0357, - "step": 5007 - }, - { - "epoch": 2.2242949145014435, - "grad_norm": 0.772611179602613, - "learning_rate": 4.89241951281889e-06, - "loss": 0.0418, - "step": 5008 - }, - { - "epoch": 2.2247390628469907, - "grad_norm": 0.5532053379610344, - "learning_rate": 4.890481432972586e-06, - "loss": 0.0352, - "step": 5009 - }, - { - "epoch": 2.2251832111925385, - "grad_norm": 0.4003154527748429, - "learning_rate": 4.8885433695887836e-06, - "loss": 0.032, - "step": 5010 - }, - { - "epoch": 2.2256273595380858, - "grad_norm": 0.45204263035502396, - "learning_rate": 4.886605322958806e-06, - "loss": 0.0303, - "step": 5011 - }, - { - "epoch": 2.226071507883633, - "grad_norm": 0.3440942170404326, - "learning_rate": 4.884667293373973e-06, - "loss": 0.022, - "step": 5012 - }, - { - "epoch": 2.2265156562291804, - "grad_norm": 0.7116582097517625, - "learning_rate": 4.882729281125605e-06, - "loss": 0.0523, - "step": 5013 - }, - { - "epoch": 2.226959804574728, - "grad_norm": 0.4053030773514669, - "learning_rate": 4.88079128650502e-06, - "loss": 0.027, - "step": 5014 - }, - { - "epoch": 2.2274039529202754, - "grad_norm": 0.6656245142980673, - "learning_rate": 4.878853309803529e-06, - "loss": 0.0462, - "step": 5015 - }, - { - "epoch": 2.2278481012658227, - "grad_norm": 0.38760410447982047, - "learning_rate": 4.876915351312444e-06, - "loss": 0.0265, - "step": 5016 - }, - { - "epoch": 2.2282922496113704, - "grad_norm": 0.407666066744564, - "learning_rate": 4.874977411323073e-06, - "loss": 0.0354, - "step": 5017 - }, - { - "epoch": 2.2287363979569177, - "grad_norm": 0.483920298386755, - "learning_rate": 4.873039490126718e-06, - "loss": 0.0367, - "step": 5018 - }, - { - "epoch": 2.229180546302465, - "grad_norm": 0.45353775305411304, - "learning_rate": 4.871101588014686e-06, - "loss": 0.0358, - "step": 5019 - }, - { - "epoch": 2.2296246946480123, - "grad_norm": 0.4339146999685688, - "learning_rate": 4.869163705278276e-06, - "loss": 0.0326, - "step": 5020 - }, - { - "epoch": 2.23006884299356, - "grad_norm": 1.02624057870213, - "learning_rate": 4.867225842208783e-06, - "loss": 0.0598, - "step": 5021 - }, - { - "epoch": 2.2305129913391073, - "grad_norm": 0.44283161622309714, - "learning_rate": 4.8652879990975e-06, - "loss": 0.0378, - "step": 5022 - }, - { - "epoch": 2.2309571396846546, - "grad_norm": 0.44468206860911264, - "learning_rate": 4.863350176235721e-06, - "loss": 0.0435, - "step": 5023 - }, - { - "epoch": 2.2314012880302023, - "grad_norm": 0.4317246743183953, - "learning_rate": 4.861412373914729e-06, - "loss": 0.0339, - "step": 5024 - }, - { - "epoch": 2.2318454363757496, - "grad_norm": 1.9956444643720284, - "learning_rate": 4.8594745924258144e-06, - "loss": 0.0611, - "step": 5025 - }, - { - "epoch": 2.232289584721297, - "grad_norm": 0.5268379152925193, - "learning_rate": 4.857536832060255e-06, - "loss": 0.0408, - "step": 5026 - }, - { - "epoch": 2.232733733066844, - "grad_norm": 0.4937129380739942, - "learning_rate": 4.85559909310933e-06, - "loss": 0.0523, - "step": 5027 - }, - { - "epoch": 2.233177881412392, - "grad_norm": 0.42104370790377565, - "learning_rate": 4.853661375864313e-06, - "loss": 0.0226, - "step": 5028 - }, - { - "epoch": 2.233622029757939, - "grad_norm": 0.38358747557836487, - "learning_rate": 4.851723680616477e-06, - "loss": 0.0288, - "step": 5029 - }, - { - "epoch": 2.2340661781034865, - "grad_norm": 0.35278248179931115, - "learning_rate": 4.84978600765709e-06, - "loss": 0.0273, - "step": 5030 - }, - { - "epoch": 2.234510326449034, - "grad_norm": 0.5070929341551949, - "learning_rate": 4.84784835727742e-06, - "loss": 0.0421, - "step": 5031 - }, - { - "epoch": 2.2349544747945815, - "grad_norm": 0.4414918243805681, - "learning_rate": 4.845910729768726e-06, - "loss": 0.0299, - "step": 5032 - }, - { - "epoch": 2.235398623140129, - "grad_norm": 0.4272744129564785, - "learning_rate": 4.843973125422266e-06, - "loss": 0.0377, - "step": 5033 - }, - { - "epoch": 2.235842771485676, - "grad_norm": 0.4042780850426815, - "learning_rate": 4.842035544529296e-06, - "loss": 0.0273, - "step": 5034 - }, - { - "epoch": 2.2362869198312234, - "grad_norm": 0.384529185725243, - "learning_rate": 4.8400979873810675e-06, - "loss": 0.0297, - "step": 5035 - }, - { - "epoch": 2.236731068176771, - "grad_norm": 0.5143165626824721, - "learning_rate": 4.838160454268827e-06, - "loss": 0.0378, - "step": 5036 - }, - { - "epoch": 2.2371752165223184, - "grad_norm": 0.48737479457986954, - "learning_rate": 4.8362229454838185e-06, - "loss": 0.0343, - "step": 5037 - }, - { - "epoch": 2.2376193648678657, - "grad_norm": 0.4465431723189805, - "learning_rate": 4.834285461317286e-06, - "loss": 0.0367, - "step": 5038 - }, - { - "epoch": 2.2380635132134135, - "grad_norm": 0.5195671115110356, - "learning_rate": 4.832348002060461e-06, - "loss": 0.0396, - "step": 5039 - }, - { - "epoch": 2.2385076615589607, - "grad_norm": 0.46391257174900896, - "learning_rate": 4.830410568004581e-06, - "loss": 0.036, - "step": 5040 - }, - { - "epoch": 2.238951809904508, - "grad_norm": 0.5290347592358237, - "learning_rate": 4.8284731594408715e-06, - "loss": 0.0448, - "step": 5041 - }, - { - "epoch": 2.2393959582500553, - "grad_norm": 0.4737259034687236, - "learning_rate": 4.826535776660562e-06, - "loss": 0.0328, - "step": 5042 - }, - { - "epoch": 2.239840106595603, - "grad_norm": 0.6929115316947699, - "learning_rate": 4.824598419954871e-06, - "loss": 0.0473, - "step": 5043 - }, - { - "epoch": 2.2402842549411504, - "grad_norm": 0.6258064825937694, - "learning_rate": 4.822661089615017e-06, - "loss": 0.0594, - "step": 5044 - }, - { - "epoch": 2.2407284032866976, - "grad_norm": 0.4537831813620375, - "learning_rate": 4.8207237859322144e-06, - "loss": 0.0413, - "step": 5045 - }, - { - "epoch": 2.2411725516322454, - "grad_norm": 0.37664633790712776, - "learning_rate": 4.818786509197672e-06, - "loss": 0.0303, - "step": 5046 - }, - { - "epoch": 2.2416166999777927, - "grad_norm": 0.3886976621007685, - "learning_rate": 4.816849259702594e-06, - "loss": 0.0311, - "step": 5047 - }, - { - "epoch": 2.24206084832334, - "grad_norm": 0.4486972497289991, - "learning_rate": 4.814912037738185e-06, - "loss": 0.0442, - "step": 5048 - }, - { - "epoch": 2.2425049966688873, - "grad_norm": 0.35002545439343274, - "learning_rate": 4.812974843595641e-06, - "loss": 0.0318, - "step": 5049 - }, - { - "epoch": 2.242949145014435, - "grad_norm": 0.48300556886418516, - "learning_rate": 4.811037677566154e-06, - "loss": 0.0395, - "step": 5050 - }, - { - "epoch": 2.2433932933599823, - "grad_norm": 0.45349978147674774, - "learning_rate": 4.8091005399409145e-06, - "loss": 0.0371, - "step": 5051 - }, - { - "epoch": 2.2438374417055296, - "grad_norm": 0.4518413356936792, - "learning_rate": 4.807163431011107e-06, - "loss": 0.0311, - "step": 5052 - }, - { - "epoch": 2.244281590051077, - "grad_norm": 0.3777032140049169, - "learning_rate": 4.80522635106791e-06, - "loss": 0.0292, - "step": 5053 - }, - { - "epoch": 2.2447257383966246, - "grad_norm": 0.4369243999495971, - "learning_rate": 4.8032893004025016e-06, - "loss": 0.0264, - "step": 5054 - }, - { - "epoch": 2.245169886742172, - "grad_norm": 0.6447452509815467, - "learning_rate": 4.801352279306054e-06, - "loss": 0.0427, - "step": 5055 - }, - { - "epoch": 2.245614035087719, - "grad_norm": 0.5030519012852677, - "learning_rate": 4.799415288069733e-06, - "loss": 0.0381, - "step": 5056 - }, - { - "epoch": 2.246058183433267, - "grad_norm": 0.4348315875534344, - "learning_rate": 4.797478326984702e-06, - "loss": 0.0235, - "step": 5057 - }, - { - "epoch": 2.246502331778814, - "grad_norm": 0.47850023846789697, - "learning_rate": 4.795541396342116e-06, - "loss": 0.036, - "step": 5058 - }, - { - "epoch": 2.2469464801243615, - "grad_norm": 0.5227923023136899, - "learning_rate": 4.793604496433133e-06, - "loss": 0.0382, - "step": 5059 - }, - { - "epoch": 2.247390628469909, - "grad_norm": 0.5404616232699001, - "learning_rate": 4.791667627548902e-06, - "loss": 0.0426, - "step": 5060 - }, - { - "epoch": 2.2478347768154565, - "grad_norm": 0.5031844080280006, - "learning_rate": 4.7897307899805624e-06, - "loss": 0.0296, - "step": 5061 - }, - { - "epoch": 2.248278925161004, - "grad_norm": 0.557884313184048, - "learning_rate": 4.78779398401926e-06, - "loss": 0.0324, - "step": 5062 - }, - { - "epoch": 2.248723073506551, - "grad_norm": 0.46196149242671997, - "learning_rate": 4.785857209956124e-06, - "loss": 0.031, - "step": 5063 - }, - { - "epoch": 2.2491672218520984, - "grad_norm": 0.40473001188667446, - "learning_rate": 4.783920468082288e-06, - "loss": 0.0341, - "step": 5064 - }, - { - "epoch": 2.249611370197646, - "grad_norm": 0.558756976897473, - "learning_rate": 4.781983758688876e-06, - "loss": 0.0257, - "step": 5065 - }, - { - "epoch": 2.2500555185431934, - "grad_norm": 0.4215840569154271, - "learning_rate": 4.780047082067009e-06, - "loss": 0.0402, - "step": 5066 - }, - { - "epoch": 2.2504996668887407, - "grad_norm": 0.41373536715916187, - "learning_rate": 4.778110438507801e-06, - "loss": 0.0352, - "step": 5067 - }, - { - "epoch": 2.2509438152342884, - "grad_norm": 0.41096920941351195, - "learning_rate": 4.776173828302365e-06, - "loss": 0.0411, - "step": 5068 - }, - { - "epoch": 2.2513879635798357, - "grad_norm": 0.4730245858374104, - "learning_rate": 4.774237251741805e-06, - "loss": 0.0327, - "step": 5069 - }, - { - "epoch": 2.251832111925383, - "grad_norm": 0.38391334338943445, - "learning_rate": 4.7723007091172175e-06, - "loss": 0.0368, - "step": 5070 - }, - { - "epoch": 2.2522762602709303, - "grad_norm": 0.34213411038422287, - "learning_rate": 4.770364200719703e-06, - "loss": 0.0288, - "step": 5071 - }, - { - "epoch": 2.252720408616478, - "grad_norm": 0.3470528030999554, - "learning_rate": 4.7684277268403515e-06, - "loss": 0.0218, - "step": 5072 - }, - { - "epoch": 2.2531645569620253, - "grad_norm": 0.48424365825920085, - "learning_rate": 4.766491287770246e-06, - "loss": 0.047, - "step": 5073 - }, - { - "epoch": 2.2536087053075726, - "grad_norm": 0.44693843285607443, - "learning_rate": 4.7645548838004665e-06, - "loss": 0.0314, - "step": 5074 - }, - { - "epoch": 2.2540528536531204, - "grad_norm": 0.43485150995828376, - "learning_rate": 4.762618515222085e-06, - "loss": 0.0342, - "step": 5075 - }, - { - "epoch": 2.2544970019986676, - "grad_norm": 0.4279292260890883, - "learning_rate": 4.760682182326176e-06, - "loss": 0.0407, - "step": 5076 - }, - { - "epoch": 2.254941150344215, - "grad_norm": 0.4726345938038404, - "learning_rate": 4.7587458854038e-06, - "loss": 0.0366, - "step": 5077 - }, - { - "epoch": 2.2553852986897622, - "grad_norm": 0.4982236299340504, - "learning_rate": 4.756809624746015e-06, - "loss": 0.0351, - "step": 5078 - }, - { - "epoch": 2.25582944703531, - "grad_norm": 0.416498003591409, - "learning_rate": 4.754873400643875e-06, - "loss": 0.0345, - "step": 5079 - }, - { - "epoch": 2.2562735953808573, - "grad_norm": 0.5784538425140838, - "learning_rate": 4.7529372133884265e-06, - "loss": 0.0351, - "step": 5080 - }, - { - "epoch": 2.2567177437264045, - "grad_norm": 0.4069962515786798, - "learning_rate": 4.75100106327071e-06, - "loss": 0.0287, - "step": 5081 - }, - { - "epoch": 2.2571618920719523, - "grad_norm": 0.44000239841542776, - "learning_rate": 4.749064950581765e-06, - "loss": 0.0318, - "step": 5082 - }, - { - "epoch": 2.2576060404174996, - "grad_norm": 0.4673614175152977, - "learning_rate": 4.747128875612621e-06, - "loss": 0.0298, - "step": 5083 - }, - { - "epoch": 2.258050188763047, - "grad_norm": 0.577011350942782, - "learning_rate": 4.745192838654304e-06, - "loss": 0.0388, - "step": 5084 - }, - { - "epoch": 2.258494337108594, - "grad_norm": 0.40314614436836343, - "learning_rate": 4.743256839997828e-06, - "loss": 0.0318, - "step": 5085 - }, - { - "epoch": 2.2589384854541414, - "grad_norm": 0.3926832668709334, - "learning_rate": 4.741320879934213e-06, - "loss": 0.0361, - "step": 5086 - }, - { - "epoch": 2.259382633799689, - "grad_norm": 0.5101559666731487, - "learning_rate": 4.739384958754461e-06, - "loss": 0.0451, - "step": 5087 - }, - { - "epoch": 2.2598267821452365, - "grad_norm": 0.35566284403310655, - "learning_rate": 4.73744907674958e-06, - "loss": 0.0228, - "step": 5088 - }, - { - "epoch": 2.2602709304907838, - "grad_norm": 0.5266642057409271, - "learning_rate": 4.7355132342105615e-06, - "loss": 0.0387, - "step": 5089 - }, - { - "epoch": 2.2607150788363315, - "grad_norm": 0.4021115546007318, - "learning_rate": 4.733577431428398e-06, - "loss": 0.0313, - "step": 5090 - }, - { - "epoch": 2.261159227181879, - "grad_norm": 0.48902419392318525, - "learning_rate": 4.73164166869407e-06, - "loss": 0.0465, - "step": 5091 - }, - { - "epoch": 2.261603375527426, - "grad_norm": 0.4831670488263336, - "learning_rate": 4.729705946298557e-06, - "loss": 0.0298, - "step": 5092 - }, - { - "epoch": 2.2620475238729734, - "grad_norm": 1.0045520985152046, - "learning_rate": 4.727770264532835e-06, - "loss": 0.0386, - "step": 5093 - }, - { - "epoch": 2.262491672218521, - "grad_norm": 0.5424294421568248, - "learning_rate": 4.725834623687866e-06, - "loss": 0.0448, - "step": 5094 - }, - { - "epoch": 2.2629358205640684, - "grad_norm": 0.37090231243704935, - "learning_rate": 4.723899024054609e-06, - "loss": 0.0276, - "step": 5095 - }, - { - "epoch": 2.2633799689096157, - "grad_norm": 0.41587596025980633, - "learning_rate": 4.7219634659240195e-06, - "loss": 0.037, - "step": 5096 - }, - { - "epoch": 2.2638241172551634, - "grad_norm": 0.4579599445978751, - "learning_rate": 4.720027949587046e-06, - "loss": 0.0355, - "step": 5097 - }, - { - "epoch": 2.2642682656007107, - "grad_norm": 0.33053691402038254, - "learning_rate": 4.718092475334623e-06, - "loss": 0.0237, - "step": 5098 - }, - { - "epoch": 2.264712413946258, - "grad_norm": 0.4734298805101396, - "learning_rate": 4.716157043457692e-06, - "loss": 0.0406, - "step": 5099 - }, - { - "epoch": 2.2651565622918053, - "grad_norm": 0.495566512713766, - "learning_rate": 4.71422165424718e-06, - "loss": 0.0446, - "step": 5100 - }, - { - "epoch": 2.265600710637353, - "grad_norm": 0.5845335631241523, - "learning_rate": 4.712286307994008e-06, - "loss": 0.0483, - "step": 5101 - }, - { - "epoch": 2.2660448589829003, - "grad_norm": 0.5632138786502331, - "learning_rate": 4.71035100498909e-06, - "loss": 0.0301, - "step": 5102 - }, - { - "epoch": 2.2664890073284476, - "grad_norm": 0.4679864832864739, - "learning_rate": 4.708415745523338e-06, - "loss": 0.0282, - "step": 5103 - }, - { - "epoch": 2.2669331556739953, - "grad_norm": 0.3347447433418278, - "learning_rate": 4.70648052988765e-06, - "loss": 0.0284, - "step": 5104 - }, - { - "epoch": 2.2673773040195426, - "grad_norm": 0.3922788691977759, - "learning_rate": 4.704545358372926e-06, - "loss": 0.0376, - "step": 5105 - }, - { - "epoch": 2.26782145236509, - "grad_norm": 0.574421309924243, - "learning_rate": 4.702610231270053e-06, - "loss": 0.0357, - "step": 5106 - }, - { - "epoch": 2.268265600710637, - "grad_norm": 0.3970980176428026, - "learning_rate": 4.7006751488699145e-06, - "loss": 0.0352, - "step": 5107 - }, - { - "epoch": 2.268709749056185, - "grad_norm": 0.48320849905313684, - "learning_rate": 4.698740111463386e-06, - "loss": 0.0482, - "step": 5108 - }, - { - "epoch": 2.2691538974017322, - "grad_norm": 0.48488451433259316, - "learning_rate": 4.696805119341334e-06, - "loss": 0.0323, - "step": 5109 - }, - { - "epoch": 2.2695980457472795, - "grad_norm": 0.38461440123857293, - "learning_rate": 4.694870172794625e-06, - "loss": 0.0256, - "step": 5110 - }, - { - "epoch": 2.270042194092827, - "grad_norm": 0.5557330047140479, - "learning_rate": 4.692935272114113e-06, - "loss": 0.0303, - "step": 5111 - }, - { - "epoch": 2.2704863424383745, - "grad_norm": 0.4256236131352179, - "learning_rate": 4.6910004175906435e-06, - "loss": 0.0233, - "step": 5112 - }, - { - "epoch": 2.270930490783922, - "grad_norm": 0.46988918438082106, - "learning_rate": 4.689065609515062e-06, - "loss": 0.0363, - "step": 5113 - }, - { - "epoch": 2.271374639129469, - "grad_norm": 0.49373029930466805, - "learning_rate": 4.687130848178202e-06, - "loss": 0.0483, - "step": 5114 - }, - { - "epoch": 2.2718187874750164, - "grad_norm": 0.34972548300588746, - "learning_rate": 4.685196133870887e-06, - "loss": 0.0255, - "step": 5115 - }, - { - "epoch": 2.272262935820564, - "grad_norm": 0.581571662658351, - "learning_rate": 4.683261466883942e-06, - "loss": 0.0414, - "step": 5116 - }, - { - "epoch": 2.2727070841661114, - "grad_norm": 0.44401771745072516, - "learning_rate": 4.681326847508181e-06, - "loss": 0.032, - "step": 5117 - }, - { - "epoch": 2.2731512325116587, - "grad_norm": 0.5278060546522586, - "learning_rate": 4.6793922760344065e-06, - "loss": 0.0418, - "step": 5118 - }, - { - "epoch": 2.2735953808572065, - "grad_norm": 0.5206864377402288, - "learning_rate": 4.6774577527534195e-06, - "loss": 0.0388, - "step": 5119 - }, - { - "epoch": 2.2740395292027538, - "grad_norm": 0.4082716881176943, - "learning_rate": 4.675523277956011e-06, - "loss": 0.0283, - "step": 5120 - }, - { - "epoch": 2.274483677548301, - "grad_norm": 0.4188100723565611, - "learning_rate": 4.673588851932964e-06, - "loss": 0.0377, - "step": 5121 - }, - { - "epoch": 2.2749278258938483, - "grad_norm": 0.32957447472088847, - "learning_rate": 4.671654474975061e-06, - "loss": 0.0284, - "step": 5122 - }, - { - "epoch": 2.275371974239396, - "grad_norm": 0.5325173700148402, - "learning_rate": 4.669720147373065e-06, - "loss": 0.0304, - "step": 5123 - }, - { - "epoch": 2.2758161225849434, - "grad_norm": 0.7259350026566369, - "learning_rate": 4.667785869417744e-06, - "loss": 0.0484, - "step": 5124 - }, - { - "epoch": 2.2762602709304907, - "grad_norm": 0.34263569388234766, - "learning_rate": 4.6658516413998486e-06, - "loss": 0.0302, - "step": 5125 - }, - { - "epoch": 2.2767044192760384, - "grad_norm": 0.4050161385405534, - "learning_rate": 4.663917463610128e-06, - "loss": 0.0327, - "step": 5126 - }, - { - "epoch": 2.2771485676215857, - "grad_norm": 0.4351043792797754, - "learning_rate": 4.661983336339319e-06, - "loss": 0.0347, - "step": 5127 - }, - { - "epoch": 2.277592715967133, - "grad_norm": 0.4544801574632252, - "learning_rate": 4.66004925987816e-06, - "loss": 0.0335, - "step": 5128 - }, - { - "epoch": 2.2780368643126803, - "grad_norm": 0.5298789728156417, - "learning_rate": 4.6581152345173714e-06, - "loss": 0.0481, - "step": 5129 - }, - { - "epoch": 2.278481012658228, - "grad_norm": 0.42879788700967447, - "learning_rate": 4.656181260547669e-06, - "loss": 0.0306, - "step": 5130 - }, - { - "epoch": 2.2789251610037753, - "grad_norm": 0.432457604882885, - "learning_rate": 4.654247338259766e-06, - "loss": 0.0348, - "step": 5131 - }, - { - "epoch": 2.2793693093493226, - "grad_norm": 0.4238026290724064, - "learning_rate": 4.652313467944358e-06, - "loss": 0.0362, - "step": 5132 - }, - { - "epoch": 2.2798134576948703, - "grad_norm": 0.37078187284702663, - "learning_rate": 4.650379649892145e-06, - "loss": 0.0225, - "step": 5133 - }, - { - "epoch": 2.2802576060404176, - "grad_norm": 0.541283465006368, - "learning_rate": 4.648445884393808e-06, - "loss": 0.0427, - "step": 5134 - }, - { - "epoch": 2.280701754385965, - "grad_norm": 0.5756809781245248, - "learning_rate": 4.646512171740028e-06, - "loss": 0.0416, - "step": 5135 - }, - { - "epoch": 2.281145902731512, - "grad_norm": 0.6172689341906578, - "learning_rate": 4.6445785122214715e-06, - "loss": 0.0468, - "step": 5136 - }, - { - "epoch": 2.28159005107706, - "grad_norm": 0.4664467318852636, - "learning_rate": 4.6426449061288035e-06, - "loss": 0.0394, - "step": 5137 - }, - { - "epoch": 2.282034199422607, - "grad_norm": 0.3708061416037573, - "learning_rate": 4.640711353752675e-06, - "loss": 0.0308, - "step": 5138 - }, - { - "epoch": 2.2824783477681545, - "grad_norm": 0.4312428309303351, - "learning_rate": 4.638777855383735e-06, - "loss": 0.0361, - "step": 5139 - }, - { - "epoch": 2.282922496113702, - "grad_norm": 0.3963057474327594, - "learning_rate": 4.636844411312618e-06, - "loss": 0.0244, - "step": 5140 - }, - { - "epoch": 2.2833666444592495, - "grad_norm": 0.48035729602544386, - "learning_rate": 4.634911021829956e-06, - "loss": 0.0388, - "step": 5141 - }, - { - "epoch": 2.283810792804797, - "grad_norm": 0.3951503991589514, - "learning_rate": 4.63297768722637e-06, - "loss": 0.0311, - "step": 5142 - }, - { - "epoch": 2.284254941150344, - "grad_norm": 0.3422700971686053, - "learning_rate": 4.6310444077924705e-06, - "loss": 0.0244, - "step": 5143 - }, - { - "epoch": 2.2846990894958914, - "grad_norm": 0.6947651223374093, - "learning_rate": 4.629111183818863e-06, - "loss": 0.0351, - "step": 5144 - }, - { - "epoch": 2.285143237841439, - "grad_norm": 0.4898604396865276, - "learning_rate": 4.627178015596147e-06, - "loss": 0.0369, - "step": 5145 - }, - { - "epoch": 2.2855873861869864, - "grad_norm": 0.384272758272605, - "learning_rate": 4.625244903414908e-06, - "loss": 0.0378, - "step": 5146 - }, - { - "epoch": 2.2860315345325337, - "grad_norm": 0.6383973512520991, - "learning_rate": 4.623311847565725e-06, - "loss": 0.0371, - "step": 5147 - }, - { - "epoch": 2.2864756828780815, - "grad_norm": 0.43116354217379305, - "learning_rate": 4.621378848339172e-06, - "loss": 0.0385, - "step": 5148 - }, - { - "epoch": 2.2869198312236287, - "grad_norm": 0.37732659096182064, - "learning_rate": 4.619445906025807e-06, - "loss": 0.0302, - "step": 5149 - }, - { - "epoch": 2.287363979569176, - "grad_norm": 0.38619197439251224, - "learning_rate": 4.6175130209161894e-06, - "loss": 0.0309, - "step": 5150 - }, - { - "epoch": 2.2878081279147233, - "grad_norm": 0.3996619270485742, - "learning_rate": 4.615580193300861e-06, - "loss": 0.0342, - "step": 5151 - }, - { - "epoch": 2.288252276260271, - "grad_norm": 0.4975335885399077, - "learning_rate": 4.613647423470361e-06, - "loss": 0.0295, - "step": 5152 - }, - { - "epoch": 2.2886964246058183, - "grad_norm": 0.3800465801231585, - "learning_rate": 4.611714711715215e-06, - "loss": 0.0287, - "step": 5153 - }, - { - "epoch": 2.2891405729513656, - "grad_norm": 0.4187996600512099, - "learning_rate": 4.609782058325944e-06, - "loss": 0.0357, - "step": 5154 - }, - { - "epoch": 2.2895847212969134, - "grad_norm": 0.6627247371766289, - "learning_rate": 4.607849463593056e-06, - "loss": 0.0319, - "step": 5155 - }, - { - "epoch": 2.2900288696424607, - "grad_norm": 0.44696110840918063, - "learning_rate": 4.6059169278070576e-06, - "loss": 0.0439, - "step": 5156 - }, - { - "epoch": 2.290473017988008, - "grad_norm": 0.43597000773880085, - "learning_rate": 4.603984451258439e-06, - "loss": 0.0309, - "step": 5157 - }, - { - "epoch": 2.2909171663335552, - "grad_norm": 0.3416433662316786, - "learning_rate": 4.602052034237684e-06, - "loss": 0.0244, - "step": 5158 - }, - { - "epoch": 2.291361314679103, - "grad_norm": 0.4086413859197415, - "learning_rate": 4.600119677035269e-06, - "loss": 0.0357, - "step": 5159 - }, - { - "epoch": 2.2918054630246503, - "grad_norm": 0.4905561101374712, - "learning_rate": 4.598187379941659e-06, - "loss": 0.036, - "step": 5160 - }, - { - "epoch": 2.2922496113701976, - "grad_norm": 0.47425089286645844, - "learning_rate": 4.59625514324731e-06, - "loss": 0.0344, - "step": 5161 - }, - { - "epoch": 2.2926937597157453, - "grad_norm": 0.4768903592454644, - "learning_rate": 4.594322967242673e-06, - "loss": 0.0445, - "step": 5162 - }, - { - "epoch": 2.2931379080612926, - "grad_norm": 0.6604194357601725, - "learning_rate": 4.592390852218185e-06, - "loss": 0.0353, - "step": 5163 - }, - { - "epoch": 2.29358205640684, - "grad_norm": 0.6477706525687013, - "learning_rate": 4.590458798464275e-06, - "loss": 0.0346, - "step": 5164 - }, - { - "epoch": 2.294026204752387, - "grad_norm": 0.4782427676732749, - "learning_rate": 4.588526806271366e-06, - "loss": 0.0332, - "step": 5165 - }, - { - "epoch": 2.294470353097935, - "grad_norm": 0.33598954590538455, - "learning_rate": 4.5865948759298656e-06, - "loss": 0.0242, - "step": 5166 - }, - { - "epoch": 2.294914501443482, - "grad_norm": 0.5314954081213854, - "learning_rate": 4.58466300773018e-06, - "loss": 0.0375, - "step": 5167 - }, - { - "epoch": 2.2953586497890295, - "grad_norm": 0.45504963601939913, - "learning_rate": 4.582731201962699e-06, - "loss": 0.0325, - "step": 5168 - }, - { - "epoch": 2.2958027981345768, - "grad_norm": 0.5020378540227112, - "learning_rate": 4.5807994589178066e-06, - "loss": 0.0397, - "step": 5169 - }, - { - "epoch": 2.2962469464801245, - "grad_norm": 0.44086322018058155, - "learning_rate": 4.578867778885877e-06, - "loss": 0.0385, - "step": 5170 - }, - { - "epoch": 2.296691094825672, - "grad_norm": 0.4190415887827377, - "learning_rate": 4.5769361621572735e-06, - "loss": 0.0351, - "step": 5171 - }, - { - "epoch": 2.297135243171219, - "grad_norm": 0.40356235040543037, - "learning_rate": 4.575004609022349e-06, - "loss": 0.0384, - "step": 5172 - }, - { - "epoch": 2.2975793915167664, - "grad_norm": 0.5214338427099589, - "learning_rate": 4.573073119771455e-06, - "loss": 0.0414, - "step": 5173 - }, - { - "epoch": 2.298023539862314, - "grad_norm": 0.6042586649916282, - "learning_rate": 4.571141694694922e-06, - "loss": 0.0398, - "step": 5174 - }, - { - "epoch": 2.2984676882078614, - "grad_norm": 0.5451011557742893, - "learning_rate": 4.569210334083077e-06, - "loss": 0.0284, - "step": 5175 - }, - { - "epoch": 2.2989118365534087, - "grad_norm": 0.6693775358695208, - "learning_rate": 4.567279038226237e-06, - "loss": 0.0393, - "step": 5176 - }, - { - "epoch": 2.2993559848989564, - "grad_norm": 0.4565914726092485, - "learning_rate": 4.565347807414709e-06, - "loss": 0.0369, - "step": 5177 - }, - { - "epoch": 2.2998001332445037, - "grad_norm": 0.3909149508886464, - "learning_rate": 4.563416641938786e-06, - "loss": 0.0312, - "step": 5178 - }, - { - "epoch": 2.300244281590051, - "grad_norm": 0.48805937276628275, - "learning_rate": 4.5614855420887595e-06, - "loss": 0.0417, - "step": 5179 - }, - { - "epoch": 2.3006884299355983, - "grad_norm": 0.47062502463527334, - "learning_rate": 4.559554508154906e-06, - "loss": 0.0359, - "step": 5180 - }, - { - "epoch": 2.301132578281146, - "grad_norm": 0.41598990819931514, - "learning_rate": 4.557623540427492e-06, - "loss": 0.0298, - "step": 5181 - }, - { - "epoch": 2.3015767266266933, - "grad_norm": 0.5301634737897044, - "learning_rate": 4.555692639196774e-06, - "loss": 0.0419, - "step": 5182 - }, - { - "epoch": 2.3020208749722406, - "grad_norm": 0.3965912885980032, - "learning_rate": 4.553761804752997e-06, - "loss": 0.0268, - "step": 5183 - }, - { - "epoch": 2.3024650233177884, - "grad_norm": 0.3853380797199742, - "learning_rate": 4.551831037386405e-06, - "loss": 0.0296, - "step": 5184 - }, - { - "epoch": 2.3029091716633356, - "grad_norm": 0.4626560137060772, - "learning_rate": 4.54990033738722e-06, - "loss": 0.0347, - "step": 5185 - }, - { - "epoch": 2.303353320008883, - "grad_norm": 0.4662767664074809, - "learning_rate": 4.54796970504566e-06, - "loss": 0.0382, - "step": 5186 - }, - { - "epoch": 2.3037974683544302, - "grad_norm": 0.5212735203918284, - "learning_rate": 4.546039140651932e-06, - "loss": 0.0347, - "step": 5187 - }, - { - "epoch": 2.304241616699978, - "grad_norm": 0.6660381074871294, - "learning_rate": 4.544108644496232e-06, - "loss": 0.0414, - "step": 5188 - }, - { - "epoch": 2.3046857650455252, - "grad_norm": 0.49995652041639127, - "learning_rate": 4.542178216868746e-06, - "loss": 0.0394, - "step": 5189 - }, - { - "epoch": 2.3051299133910725, - "grad_norm": 0.38693338487127227, - "learning_rate": 4.540247858059654e-06, - "loss": 0.0244, - "step": 5190 - }, - { - "epoch": 2.3055740617366203, - "grad_norm": 0.42802350164666275, - "learning_rate": 4.538317568359117e-06, - "loss": 0.0234, - "step": 5191 - }, - { - "epoch": 2.3060182100821676, - "grad_norm": 0.33959690635237333, - "learning_rate": 4.536387348057292e-06, - "loss": 0.0273, - "step": 5192 - }, - { - "epoch": 2.306462358427715, - "grad_norm": 0.560656839558957, - "learning_rate": 4.5344571974443255e-06, - "loss": 0.0361, - "step": 5193 - }, - { - "epoch": 2.306906506773262, - "grad_norm": 0.3618868056091977, - "learning_rate": 4.5325271168103496e-06, - "loss": 0.0289, - "step": 5194 - }, - { - "epoch": 2.3073506551188094, - "grad_norm": 0.38869936597852134, - "learning_rate": 4.530597106445487e-06, - "loss": 0.0335, - "step": 5195 - }, - { - "epoch": 2.307794803464357, - "grad_norm": 0.451335091229613, - "learning_rate": 4.528667166639855e-06, - "loss": 0.0293, - "step": 5196 - }, - { - "epoch": 2.3082389518099045, - "grad_norm": 0.3756798518286319, - "learning_rate": 4.526737297683554e-06, - "loss": 0.0307, - "step": 5197 - }, - { - "epoch": 2.3086831001554518, - "grad_norm": 0.41638755620692586, - "learning_rate": 4.524807499866678e-06, - "loss": 0.0313, - "step": 5198 - }, - { - "epoch": 2.3091272485009995, - "grad_norm": 0.49512523947166825, - "learning_rate": 4.522877773479305e-06, - "loss": 0.0341, - "step": 5199 - }, - { - "epoch": 2.3095713968465468, - "grad_norm": 0.38110036222157506, - "learning_rate": 4.520948118811508e-06, - "loss": 0.0301, - "step": 5200 - }, - { - "epoch": 2.310015545192094, - "grad_norm": 0.4176831056196918, - "learning_rate": 4.519018536153346e-06, - "loss": 0.0325, - "step": 5201 - }, - { - "epoch": 2.3104596935376414, - "grad_norm": 0.37358674774847195, - "learning_rate": 4.517089025794869e-06, - "loss": 0.0223, - "step": 5202 - }, - { - "epoch": 2.310903841883189, - "grad_norm": 0.5722574491295905, - "learning_rate": 4.515159588026114e-06, - "loss": 0.0385, - "step": 5203 - }, - { - "epoch": 2.3113479902287364, - "grad_norm": 0.3452449788849875, - "learning_rate": 4.51323022313711e-06, - "loss": 0.0239, - "step": 5204 - }, - { - "epoch": 2.3117921385742837, - "grad_norm": 0.5594235169002529, - "learning_rate": 4.511300931417872e-06, - "loss": 0.0378, - "step": 5205 - }, - { - "epoch": 2.3122362869198314, - "grad_norm": 0.4629383899400429, - "learning_rate": 4.509371713158404e-06, - "loss": 0.0465, - "step": 5206 - }, - { - "epoch": 2.3126804352653787, - "grad_norm": 0.344536109032876, - "learning_rate": 4.507442568648702e-06, - "loss": 0.0214, - "step": 5207 - }, - { - "epoch": 2.313124583610926, - "grad_norm": 0.5327665523608216, - "learning_rate": 4.505513498178752e-06, - "loss": 0.0474, - "step": 5208 - }, - { - "epoch": 2.3135687319564733, - "grad_norm": 0.43197164521390413, - "learning_rate": 4.503584502038521e-06, - "loss": 0.0307, - "step": 5209 - }, - { - "epoch": 2.314012880302021, - "grad_norm": 0.46940506061763837, - "learning_rate": 4.501655580517972e-06, - "loss": 0.0405, - "step": 5210 - }, - { - "epoch": 2.3144570286475683, - "grad_norm": 0.3043189475892609, - "learning_rate": 4.499726733907056e-06, - "loss": 0.0208, - "step": 5211 - }, - { - "epoch": 2.3149011769931156, - "grad_norm": 0.5162978477725472, - "learning_rate": 4.497797962495707e-06, - "loss": 0.0417, - "step": 5212 - }, - { - "epoch": 2.3153453253386633, - "grad_norm": 0.4166068328105937, - "learning_rate": 4.495869266573857e-06, - "loss": 0.0363, - "step": 5213 - }, - { - "epoch": 2.3157894736842106, - "grad_norm": 0.5916403656140393, - "learning_rate": 4.49394064643142e-06, - "loss": 0.0401, - "step": 5214 - }, - { - "epoch": 2.316233622029758, - "grad_norm": 0.6022617123765319, - "learning_rate": 4.492012102358301e-06, - "loss": 0.0307, - "step": 5215 - }, - { - "epoch": 2.316677770375305, - "grad_norm": 0.4369169634282751, - "learning_rate": 4.490083634644391e-06, - "loss": 0.042, - "step": 5216 - }, - { - "epoch": 2.317121918720853, - "grad_norm": 0.5201884422005028, - "learning_rate": 4.488155243579574e-06, - "loss": 0.0353, - "step": 5217 - }, - { - "epoch": 2.3175660670664002, - "grad_norm": 0.4192988414508763, - "learning_rate": 4.486226929453716e-06, - "loss": 0.0347, - "step": 5218 - }, - { - "epoch": 2.3180102154119475, - "grad_norm": 0.47826969893341476, - "learning_rate": 4.4842986925566805e-06, - "loss": 0.0412, - "step": 5219 - }, - { - "epoch": 2.3184543637574953, - "grad_norm": 0.4411896998427744, - "learning_rate": 4.482370533178311e-06, - "loss": 0.0473, - "step": 5220 - }, - { - "epoch": 2.3188985121030425, - "grad_norm": 0.7154366399859141, - "learning_rate": 4.4804424516084435e-06, - "loss": 0.0318, - "step": 5221 - }, - { - "epoch": 2.31934266044859, - "grad_norm": 0.4486899236378927, - "learning_rate": 4.478514448136901e-06, - "loss": 0.03, - "step": 5222 - }, - { - "epoch": 2.319786808794137, - "grad_norm": 0.6696629896177303, - "learning_rate": 4.476586523053494e-06, - "loss": 0.0447, - "step": 5223 - }, - { - "epoch": 2.3202309571396844, - "grad_norm": 0.36764590834428945, - "learning_rate": 4.474658676648025e-06, - "loss": 0.0317, - "step": 5224 - }, - { - "epoch": 2.320675105485232, - "grad_norm": 0.4074350155089678, - "learning_rate": 4.4727309092102825e-06, - "loss": 0.0338, - "step": 5225 - }, - { - "epoch": 2.3211192538307794, - "grad_norm": 0.5410060660419875, - "learning_rate": 4.47080322103004e-06, - "loss": 0.0406, - "step": 5226 - }, - { - "epoch": 2.3215634021763267, - "grad_norm": 0.3869245785320258, - "learning_rate": 4.4688756123970625e-06, - "loss": 0.025, - "step": 5227 - }, - { - "epoch": 2.3220075505218745, - "grad_norm": 0.3306613006808203, - "learning_rate": 4.466948083601103e-06, - "loss": 0.0216, - "step": 5228 - }, - { - "epoch": 2.3224516988674218, - "grad_norm": 0.3885322170673341, - "learning_rate": 4.4650206349319e-06, - "loss": 0.0334, - "step": 5229 - }, - { - "epoch": 2.322895847212969, - "grad_norm": 0.45727532111911046, - "learning_rate": 4.463093266679185e-06, - "loss": 0.0314, - "step": 5230 - }, - { - "epoch": 2.3233399955585163, - "grad_norm": 0.40470162685593614, - "learning_rate": 4.4611659791326726e-06, - "loss": 0.0225, - "step": 5231 - }, - { - "epoch": 2.323784143904064, - "grad_norm": 0.3709631261080152, - "learning_rate": 4.459238772582067e-06, - "loss": 0.0281, - "step": 5232 - }, - { - "epoch": 2.3242282922496114, - "grad_norm": 0.5000330379973269, - "learning_rate": 4.457311647317058e-06, - "loss": 0.0408, - "step": 5233 - }, - { - "epoch": 2.3246724405951587, - "grad_norm": 0.40795990787328895, - "learning_rate": 4.4553846036273294e-06, - "loss": 0.0304, - "step": 5234 - }, - { - "epoch": 2.3251165889407064, - "grad_norm": 0.41652506953547214, - "learning_rate": 4.453457641802542e-06, - "loss": 0.0283, - "step": 5235 - }, - { - "epoch": 2.3255607372862537, - "grad_norm": 0.422544506737885, - "learning_rate": 4.451530762132359e-06, - "loss": 0.0247, - "step": 5236 - }, - { - "epoch": 2.326004885631801, - "grad_norm": 0.42941673106868544, - "learning_rate": 4.4496039649064185e-06, - "loss": 0.0271, - "step": 5237 - }, - { - "epoch": 2.3264490339773483, - "grad_norm": 0.5518213591251218, - "learning_rate": 4.4476772504143525e-06, - "loss": 0.0283, - "step": 5238 - }, - { - "epoch": 2.326893182322896, - "grad_norm": 0.3905942457168205, - "learning_rate": 4.445750618945778e-06, - "loss": 0.0338, - "step": 5239 - }, - { - "epoch": 2.3273373306684433, - "grad_norm": 0.5264154088819042, - "learning_rate": 4.443824070790298e-06, - "loss": 0.0373, - "step": 5240 - }, - { - "epoch": 2.3277814790139906, - "grad_norm": 0.4859563195815842, - "learning_rate": 4.4418976062375095e-06, - "loss": 0.0472, - "step": 5241 - }, - { - "epoch": 2.3282256273595383, - "grad_norm": 0.43082619257296717, - "learning_rate": 4.439971225576992e-06, - "loss": 0.0321, - "step": 5242 - }, - { - "epoch": 2.3286697757050856, - "grad_norm": 0.4148209810054243, - "learning_rate": 4.438044929098312e-06, - "loss": 0.027, - "step": 5243 - }, - { - "epoch": 2.329113924050633, - "grad_norm": 0.3906796792959702, - "learning_rate": 4.436118717091025e-06, - "loss": 0.0302, - "step": 5244 - }, - { - "epoch": 2.32955807239618, - "grad_norm": 0.5074169188006878, - "learning_rate": 4.434192589844674e-06, - "loss": 0.033, - "step": 5245 - }, - { - "epoch": 2.330002220741728, - "grad_norm": 0.5700168097501661, - "learning_rate": 4.432266547648786e-06, - "loss": 0.0386, - "step": 5246 - }, - { - "epoch": 2.330446369087275, - "grad_norm": 0.7002894794054985, - "learning_rate": 4.430340590792883e-06, - "loss": 0.0452, - "step": 5247 - }, - { - "epoch": 2.3308905174328225, - "grad_norm": 0.43587113620478884, - "learning_rate": 4.428414719566464e-06, - "loss": 0.0455, - "step": 5248 - }, - { - "epoch": 2.33133466577837, - "grad_norm": 0.4414200442172319, - "learning_rate": 4.426488934259023e-06, - "loss": 0.038, - "step": 5249 - }, - { - "epoch": 2.3317788141239175, - "grad_norm": 0.3603980656114873, - "learning_rate": 4.424563235160039e-06, - "loss": 0.0284, - "step": 5250 - }, - { - "epoch": 2.332222962469465, - "grad_norm": 0.3831150016755091, - "learning_rate": 4.422637622558973e-06, - "loss": 0.0302, - "step": 5251 - }, - { - "epoch": 2.332667110815012, - "grad_norm": 0.3992416324305334, - "learning_rate": 4.42071209674528e-06, - "loss": 0.0324, - "step": 5252 - }, - { - "epoch": 2.3331112591605594, - "grad_norm": 0.4795655074267854, - "learning_rate": 4.4187866580084005e-06, - "loss": 0.0456, - "step": 5253 - }, - { - "epoch": 2.333555407506107, - "grad_norm": 0.3484662534276821, - "learning_rate": 4.41686130663776e-06, - "loss": 0.0223, - "step": 5254 - }, - { - "epoch": 2.3339995558516544, - "grad_norm": 0.4234114697000452, - "learning_rate": 4.4149360429227695e-06, - "loss": 0.0296, - "step": 5255 - }, - { - "epoch": 2.3344437041972017, - "grad_norm": 0.4632049279941834, - "learning_rate": 4.4130108671528315e-06, - "loss": 0.0264, - "step": 5256 - }, - { - "epoch": 2.3348878525427494, - "grad_norm": 0.5054504530004318, - "learning_rate": 4.41108577961733e-06, - "loss": 0.0423, - "step": 5257 - }, - { - "epoch": 2.3353320008882967, - "grad_norm": 0.4132515605214912, - "learning_rate": 4.40916078060564e-06, - "loss": 0.026, - "step": 5258 - }, - { - "epoch": 2.335776149233844, - "grad_norm": 0.4886399938932667, - "learning_rate": 4.407235870407122e-06, - "loss": 0.0378, - "step": 5259 - }, - { - "epoch": 2.3362202975793913, - "grad_norm": 0.48169156288056764, - "learning_rate": 4.4053110493111226e-06, - "loss": 0.0366, - "step": 5260 - }, - { - "epoch": 2.336664445924939, - "grad_norm": 0.5264104312524703, - "learning_rate": 4.403386317606972e-06, - "loss": 0.0427, - "step": 5261 - }, - { - "epoch": 2.3371085942704863, - "grad_norm": 0.3726652002849068, - "learning_rate": 4.4014616755839955e-06, - "loss": 0.026, - "step": 5262 - }, - { - "epoch": 2.3375527426160336, - "grad_norm": 0.531223484070124, - "learning_rate": 4.399537123531494e-06, - "loss": 0.0389, - "step": 5263 - }, - { - "epoch": 2.3379968909615814, - "grad_norm": 0.5377515081195285, - "learning_rate": 4.3976126617387645e-06, - "loss": 0.0466, - "step": 5264 - }, - { - "epoch": 2.3384410393071287, - "grad_norm": 0.4256267011064791, - "learning_rate": 4.395688290495084e-06, - "loss": 0.0381, - "step": 5265 - }, - { - "epoch": 2.338885187652676, - "grad_norm": 0.4792927506755932, - "learning_rate": 4.393764010089719e-06, - "loss": 0.046, - "step": 5266 - }, - { - "epoch": 2.3393293359982232, - "grad_norm": 0.5281025771868061, - "learning_rate": 4.391839820811923e-06, - "loss": 0.0396, - "step": 5267 - }, - { - "epoch": 2.339773484343771, - "grad_norm": 0.4893018865311666, - "learning_rate": 4.389915722950931e-06, - "loss": 0.0368, - "step": 5268 - }, - { - "epoch": 2.3402176326893183, - "grad_norm": 0.4896668042908599, - "learning_rate": 4.387991716795968e-06, - "loss": 0.0406, - "step": 5269 - }, - { - "epoch": 2.3406617810348656, - "grad_norm": 0.39529432125848185, - "learning_rate": 4.386067802636249e-06, - "loss": 0.0323, - "step": 5270 - }, - { - "epoch": 2.3411059293804133, - "grad_norm": 0.38393804306481244, - "learning_rate": 4.384143980760968e-06, - "loss": 0.0321, - "step": 5271 - }, - { - "epoch": 2.3415500777259606, - "grad_norm": 0.4030478157940605, - "learning_rate": 4.382220251459306e-06, - "loss": 0.0348, - "step": 5272 - }, - { - "epoch": 2.341994226071508, - "grad_norm": 0.46335758617758316, - "learning_rate": 4.380296615020437e-06, - "loss": 0.0481, - "step": 5273 - }, - { - "epoch": 2.342438374417055, - "grad_norm": 0.39799741674310996, - "learning_rate": 4.3783730717335124e-06, - "loss": 0.0365, - "step": 5274 - }, - { - "epoch": 2.342882522762603, - "grad_norm": 0.6313177165283153, - "learning_rate": 4.376449621887674e-06, - "loss": 0.0357, - "step": 5275 - }, - { - "epoch": 2.34332667110815, - "grad_norm": 0.46331907567288616, - "learning_rate": 4.37452626577205e-06, - "loss": 0.0404, - "step": 5276 - }, - { - "epoch": 2.3437708194536975, - "grad_norm": 0.45182075889541223, - "learning_rate": 4.372603003675755e-06, - "loss": 0.0273, - "step": 5277 - }, - { - "epoch": 2.3442149677992448, - "grad_norm": 0.45396241594109277, - "learning_rate": 4.370679835887885e-06, - "loss": 0.0334, - "step": 5278 - }, - { - "epoch": 2.3446591161447925, - "grad_norm": 0.4008935982764722, - "learning_rate": 4.368756762697525e-06, - "loss": 0.0295, - "step": 5279 - }, - { - "epoch": 2.34510326449034, - "grad_norm": 0.4497120560175991, - "learning_rate": 4.366833784393746e-06, - "loss": 0.034, - "step": 5280 - }, - { - "epoch": 2.345547412835887, - "grad_norm": 0.5274538552861234, - "learning_rate": 4.364910901265607e-06, - "loss": 0.0358, - "step": 5281 - }, - { - "epoch": 2.3459915611814344, - "grad_norm": 0.5012732757402258, - "learning_rate": 4.362988113602147e-06, - "loss": 0.0351, - "step": 5282 - }, - { - "epoch": 2.346435709526982, - "grad_norm": 0.41702907826337277, - "learning_rate": 4.361065421692394e-06, - "loss": 0.0311, - "step": 5283 - }, - { - "epoch": 2.3468798578725294, - "grad_norm": 0.4723862662210319, - "learning_rate": 4.3591428258253634e-06, - "loss": 0.0283, - "step": 5284 - }, - { - "epoch": 2.3473240062180767, - "grad_norm": 0.4642888220640226, - "learning_rate": 4.35722032629005e-06, - "loss": 0.0341, - "step": 5285 - }, - { - "epoch": 2.3477681545636244, - "grad_norm": 0.5515201989991365, - "learning_rate": 4.35529792337544e-06, - "loss": 0.0426, - "step": 5286 - }, - { - "epoch": 2.3482123029091717, - "grad_norm": 0.42814364810008887, - "learning_rate": 4.353375617370506e-06, - "loss": 0.0386, - "step": 5287 - }, - { - "epoch": 2.348656451254719, - "grad_norm": 0.422713836528967, - "learning_rate": 4.3514534085642e-06, - "loss": 0.0477, - "step": 5288 - }, - { - "epoch": 2.3491005996002663, - "grad_norm": 0.388857891163739, - "learning_rate": 4.349531297245464e-06, - "loss": 0.0284, - "step": 5289 - }, - { - "epoch": 2.349544747945814, - "grad_norm": 0.33430013810360537, - "learning_rate": 4.347609283703224e-06, - "loss": 0.0222, - "step": 5290 - }, - { - "epoch": 2.3499888962913613, - "grad_norm": 0.4550663748035299, - "learning_rate": 4.345687368226391e-06, - "loss": 0.0484, - "step": 5291 - }, - { - "epoch": 2.3504330446369086, - "grad_norm": 0.4904858360330206, - "learning_rate": 4.343765551103859e-06, - "loss": 0.0431, - "step": 5292 - }, - { - "epoch": 2.3508771929824563, - "grad_norm": 0.418865690563144, - "learning_rate": 4.3418438326245134e-06, - "loss": 0.0264, - "step": 5293 - }, - { - "epoch": 2.3513213413280036, - "grad_norm": 0.3313326958383252, - "learning_rate": 4.33992221307722e-06, - "loss": 0.0359, - "step": 5294 - }, - { - "epoch": 2.351765489673551, - "grad_norm": 0.38342759888274064, - "learning_rate": 4.338000692750832e-06, - "loss": 0.03, - "step": 5295 - }, - { - "epoch": 2.352209638019098, - "grad_norm": 0.6016424768870177, - "learning_rate": 4.336079271934184e-06, - "loss": 0.0432, - "step": 5296 - }, - { - "epoch": 2.352653786364646, - "grad_norm": 0.5554548071020693, - "learning_rate": 4.334157950916098e-06, - "loss": 0.0659, - "step": 5297 - }, - { - "epoch": 2.3530979347101932, - "grad_norm": 0.5014661231015702, - "learning_rate": 4.332236729985385e-06, - "loss": 0.0343, - "step": 5298 - }, - { - "epoch": 2.3535420830557405, - "grad_norm": 0.5146747988924121, - "learning_rate": 4.330315609430835e-06, - "loss": 0.0351, - "step": 5299 - }, - { - "epoch": 2.3539862314012883, - "grad_norm": 0.3876173155044, - "learning_rate": 4.328394589541223e-06, - "loss": 0.0282, - "step": 5300 - }, - { - "epoch": 2.3544303797468356, - "grad_norm": 0.414824658223231, - "learning_rate": 4.326473670605315e-06, - "loss": 0.0323, - "step": 5301 - }, - { - "epoch": 2.354874528092383, - "grad_norm": 0.47728404717752354, - "learning_rate": 4.324552852911854e-06, - "loss": 0.0459, - "step": 5302 - }, - { - "epoch": 2.35531867643793, - "grad_norm": 0.40142086992053616, - "learning_rate": 4.322632136749572e-06, - "loss": 0.0329, - "step": 5303 - }, - { - "epoch": 2.355762824783478, - "grad_norm": 0.4704980131084727, - "learning_rate": 4.3207115224071874e-06, - "loss": 0.0366, - "step": 5304 - }, - { - "epoch": 2.356206973129025, - "grad_norm": 0.402777301290612, - "learning_rate": 4.318791010173401e-06, - "loss": 0.0319, - "step": 5305 - }, - { - "epoch": 2.3566511214745725, - "grad_norm": 0.41278158938376935, - "learning_rate": 4.316870600336896e-06, - "loss": 0.0217, - "step": 5306 - }, - { - "epoch": 2.3570952698201197, - "grad_norm": 0.5124207452989102, - "learning_rate": 4.314950293186346e-06, - "loss": 0.0398, - "step": 5307 - }, - { - "epoch": 2.3575394181656675, - "grad_norm": 0.4244779016373727, - "learning_rate": 4.3130300890104035e-06, - "loss": 0.0263, - "step": 5308 - }, - { - "epoch": 2.3579835665112148, - "grad_norm": 0.6692076129286184, - "learning_rate": 4.311109988097706e-06, - "loss": 0.0347, - "step": 5309 - }, - { - "epoch": 2.358427714856762, - "grad_norm": 0.5345819569804751, - "learning_rate": 4.30918999073688e-06, - "loss": 0.0406, - "step": 5310 - }, - { - "epoch": 2.3588718632023093, - "grad_norm": 0.4416731105459865, - "learning_rate": 4.307270097216535e-06, - "loss": 0.0478, - "step": 5311 - }, - { - "epoch": 2.359316011547857, - "grad_norm": 0.4789720007194078, - "learning_rate": 4.305350307825261e-06, - "loss": 0.0347, - "step": 5312 - }, - { - "epoch": 2.3597601598934044, - "grad_norm": 0.39908333172506805, - "learning_rate": 4.303430622851635e-06, - "loss": 0.0346, - "step": 5313 - }, - { - "epoch": 2.3602043082389517, - "grad_norm": 0.49076992885563064, - "learning_rate": 4.301511042584219e-06, - "loss": 0.035, - "step": 5314 - }, - { - "epoch": 2.3606484565844994, - "grad_norm": 0.3862322602452697, - "learning_rate": 4.29959156731156e-06, - "loss": 0.0409, - "step": 5315 - }, - { - "epoch": 2.3610926049300467, - "grad_norm": 0.4148195535388891, - "learning_rate": 4.297672197322186e-06, - "loss": 0.0382, - "step": 5316 - }, - { - "epoch": 2.361536753275594, - "grad_norm": 0.5373449885131869, - "learning_rate": 4.29575293290461e-06, - "loss": 0.0371, - "step": 5317 - }, - { - "epoch": 2.3619809016211413, - "grad_norm": 0.41088601906859734, - "learning_rate": 4.293833774347333e-06, - "loss": 0.0297, - "step": 5318 - }, - { - "epoch": 2.362425049966689, - "grad_norm": 1.1416110662239514, - "learning_rate": 4.291914721938835e-06, - "loss": 0.0382, - "step": 5319 - }, - { - "epoch": 2.3628691983122363, - "grad_norm": 0.4504483504568535, - "learning_rate": 4.289995775967581e-06, - "loss": 0.0297, - "step": 5320 - }, - { - "epoch": 2.3633133466577836, - "grad_norm": 0.38320982061969394, - "learning_rate": 4.2880769367220234e-06, - "loss": 0.024, - "step": 5321 - }, - { - "epoch": 2.3637574950033313, - "grad_norm": 0.3866521874991146, - "learning_rate": 4.2861582044905966e-06, - "loss": 0.0264, - "step": 5322 - }, - { - "epoch": 2.3642016433488786, - "grad_norm": 0.5381622631772952, - "learning_rate": 4.284239579561718e-06, - "loss": 0.0514, - "step": 5323 - }, - { - "epoch": 2.364645791694426, - "grad_norm": 0.4285272602850889, - "learning_rate": 4.282321062223788e-06, - "loss": 0.0365, - "step": 5324 - }, - { - "epoch": 2.365089940039973, - "grad_norm": 0.5903609551581849, - "learning_rate": 4.280402652765194e-06, - "loss": 0.0428, - "step": 5325 - }, - { - "epoch": 2.365534088385521, - "grad_norm": 0.6051857396994949, - "learning_rate": 4.278484351474303e-06, - "loss": 0.0336, - "step": 5326 - }, - { - "epoch": 2.365978236731068, - "grad_norm": 0.569315522539356, - "learning_rate": 4.2765661586394736e-06, - "loss": 0.0334, - "step": 5327 - }, - { - "epoch": 2.3664223850766155, - "grad_norm": 0.6976731675123818, - "learning_rate": 4.2746480745490385e-06, - "loss": 0.0367, - "step": 5328 - }, - { - "epoch": 2.3668665334221632, - "grad_norm": 0.5611945388042203, - "learning_rate": 4.272730099491319e-06, - "loss": 0.0407, - "step": 5329 - }, - { - "epoch": 2.3673106817677105, - "grad_norm": 0.49730153273797245, - "learning_rate": 4.27081223375462e-06, - "loss": 0.038, - "step": 5330 - }, - { - "epoch": 2.367754830113258, - "grad_norm": 0.5313463154313218, - "learning_rate": 4.268894477627229e-06, - "loss": 0.0435, - "step": 5331 - }, - { - "epoch": 2.368198978458805, - "grad_norm": 0.48983566744126533, - "learning_rate": 4.2669768313974155e-06, - "loss": 0.0373, - "step": 5332 - }, - { - "epoch": 2.3686431268043524, - "grad_norm": 0.5179274904316602, - "learning_rate": 4.265059295353439e-06, - "loss": 0.0445, - "step": 5333 - }, - { - "epoch": 2.3690872751499, - "grad_norm": 0.5588093911942073, - "learning_rate": 4.2631418697835335e-06, - "loss": 0.0351, - "step": 5334 - }, - { - "epoch": 2.3695314234954474, - "grad_norm": 0.3586724840114786, - "learning_rate": 4.261224554975923e-06, - "loss": 0.0214, - "step": 5335 - }, - { - "epoch": 2.3699755718409947, - "grad_norm": 0.418317991811463, - "learning_rate": 4.259307351218812e-06, - "loss": 0.0301, - "step": 5336 - }, - { - "epoch": 2.3704197201865425, - "grad_norm": 0.33540637763336467, - "learning_rate": 4.2573902588003844e-06, - "loss": 0.023, - "step": 5337 - }, - { - "epoch": 2.3708638685320897, - "grad_norm": 0.45375734251758604, - "learning_rate": 4.2554732780088185e-06, - "loss": 0.0394, - "step": 5338 - }, - { - "epoch": 2.371308016877637, - "grad_norm": 0.5337079104144298, - "learning_rate": 4.253556409132267e-06, - "loss": 0.0441, - "step": 5339 - }, - { - "epoch": 2.3717521652231843, - "grad_norm": 0.5739232408785224, - "learning_rate": 4.251639652458866e-06, - "loss": 0.0417, - "step": 5340 - }, - { - "epoch": 2.372196313568732, - "grad_norm": 0.6130343197365772, - "learning_rate": 4.249723008276737e-06, - "loss": 0.0387, - "step": 5341 - }, - { - "epoch": 2.3726404619142794, - "grad_norm": 0.4104117521503695, - "learning_rate": 4.247806476873987e-06, - "loss": 0.033, - "step": 5342 - }, - { - "epoch": 2.3730846102598266, - "grad_norm": 0.48677001689089716, - "learning_rate": 4.245890058538697e-06, - "loss": 0.0377, - "step": 5343 - }, - { - "epoch": 2.3735287586053744, - "grad_norm": 0.3714192923602575, - "learning_rate": 4.2439737535589455e-06, - "loss": 0.0294, - "step": 5344 - }, - { - "epoch": 2.3739729069509217, - "grad_norm": 0.40375546238165533, - "learning_rate": 4.2420575622227786e-06, - "loss": 0.0314, - "step": 5345 - }, - { - "epoch": 2.374417055296469, - "grad_norm": 0.7948951370956706, - "learning_rate": 4.240141484818238e-06, - "loss": 0.0393, - "step": 5346 - }, - { - "epoch": 2.3748612036420162, - "grad_norm": 0.7705250912114215, - "learning_rate": 4.238225521633339e-06, - "loss": 0.0446, - "step": 5347 - }, - { - "epoch": 2.375305351987564, - "grad_norm": 0.5226003605622348, - "learning_rate": 4.2363096729560824e-06, - "loss": 0.0397, - "step": 5348 - }, - { - "epoch": 2.3757495003331113, - "grad_norm": 0.37536659049033705, - "learning_rate": 4.234393939074456e-06, - "loss": 0.0264, - "step": 5349 - }, - { - "epoch": 2.3761936486786586, - "grad_norm": 0.35662293585999577, - "learning_rate": 4.2324783202764265e-06, - "loss": 0.0294, - "step": 5350 - }, - { - "epoch": 2.3766377970242063, - "grad_norm": 0.5673307864129802, - "learning_rate": 4.230562816849944e-06, - "loss": 0.0471, - "step": 5351 - }, - { - "epoch": 2.3770819453697536, - "grad_norm": 0.42418386406735814, - "learning_rate": 4.228647429082939e-06, - "loss": 0.0306, - "step": 5352 - }, - { - "epoch": 2.377526093715301, - "grad_norm": 0.5746067334007909, - "learning_rate": 4.22673215726333e-06, - "loss": 0.031, - "step": 5353 - }, - { - "epoch": 2.377970242060848, - "grad_norm": 0.3492096810181002, - "learning_rate": 4.224817001679011e-06, - "loss": 0.021, - "step": 5354 - }, - { - "epoch": 2.378414390406396, - "grad_norm": 0.5532015823221401, - "learning_rate": 4.222901962617867e-06, - "loss": 0.0432, - "step": 5355 - }, - { - "epoch": 2.378858538751943, - "grad_norm": 0.5532405005608227, - "learning_rate": 4.220987040367757e-06, - "loss": 0.0407, - "step": 5356 - }, - { - "epoch": 2.3793026870974905, - "grad_norm": 0.35180832498879067, - "learning_rate": 4.219072235216529e-06, - "loss": 0.028, - "step": 5357 - }, - { - "epoch": 2.379746835443038, - "grad_norm": 0.7448501299403424, - "learning_rate": 4.2171575474520084e-06, - "loss": 0.0328, - "step": 5358 - }, - { - "epoch": 2.3801909837885855, - "grad_norm": 0.4554992316854567, - "learning_rate": 4.215242977362009e-06, - "loss": 0.0301, - "step": 5359 - }, - { - "epoch": 2.380635132134133, - "grad_norm": 0.5712009195638152, - "learning_rate": 4.213328525234317e-06, - "loss": 0.0344, - "step": 5360 - }, - { - "epoch": 2.38107928047968, - "grad_norm": 0.46746160638085626, - "learning_rate": 4.211414191356714e-06, - "loss": 0.0387, - "step": 5361 - }, - { - "epoch": 2.3815234288252274, - "grad_norm": 0.4318040553120997, - "learning_rate": 4.209499976016953e-06, - "loss": 0.0359, - "step": 5362 - }, - { - "epoch": 2.381967577170775, - "grad_norm": 0.4278461319811184, - "learning_rate": 4.2075858795027745e-06, - "loss": 0.0368, - "step": 5363 - }, - { - "epoch": 2.3824117255163224, - "grad_norm": 0.32663597876713696, - "learning_rate": 4.205671902101899e-06, - "loss": 0.0256, - "step": 5364 - }, - { - "epoch": 2.3828558738618697, - "grad_norm": 0.389353159520307, - "learning_rate": 4.203758044102029e-06, - "loss": 0.0259, - "step": 5365 - }, - { - "epoch": 2.3833000222074174, - "grad_norm": 0.5653053154481631, - "learning_rate": 4.2018443057908495e-06, - "loss": 0.047, - "step": 5366 - }, - { - "epoch": 2.3837441705529647, - "grad_norm": 0.34982509990328564, - "learning_rate": 4.199930687456031e-06, - "loss": 0.0267, - "step": 5367 - }, - { - "epoch": 2.384188318898512, - "grad_norm": 0.41740396260225204, - "learning_rate": 4.198017189385221e-06, - "loss": 0.0293, - "step": 5368 - }, - { - "epoch": 2.3846324672440593, - "grad_norm": 0.38322501636096945, - "learning_rate": 4.1961038118660504e-06, - "loss": 0.0329, - "step": 5369 - }, - { - "epoch": 2.385076615589607, - "grad_norm": 0.45223291597674525, - "learning_rate": 4.194190555186133e-06, - "loss": 0.0347, - "step": 5370 - }, - { - "epoch": 2.3855207639351543, - "grad_norm": 0.5113109910932524, - "learning_rate": 4.1922774196330614e-06, - "loss": 0.037, - "step": 5371 - }, - { - "epoch": 2.3859649122807016, - "grad_norm": 0.4161439641942477, - "learning_rate": 4.190364405494417e-06, - "loss": 0.032, - "step": 5372 - }, - { - "epoch": 2.3864090606262494, - "grad_norm": 0.3241401849079966, - "learning_rate": 4.1884515130577545e-06, - "loss": 0.0277, - "step": 5373 - }, - { - "epoch": 2.3868532089717966, - "grad_norm": 0.41604888730983397, - "learning_rate": 4.1865387426106165e-06, - "loss": 0.0342, - "step": 5374 - }, - { - "epoch": 2.387297357317344, - "grad_norm": 0.4441137282989938, - "learning_rate": 4.184626094440524e-06, - "loss": 0.0284, - "step": 5375 - }, - { - "epoch": 2.3877415056628912, - "grad_norm": 0.5249221470565395, - "learning_rate": 4.182713568834979e-06, - "loss": 0.0363, - "step": 5376 - }, - { - "epoch": 2.388185654008439, - "grad_norm": 0.5338153166350271, - "learning_rate": 4.180801166081466e-06, - "loss": 0.0505, - "step": 5377 - }, - { - "epoch": 2.3886298023539863, - "grad_norm": 0.6473388727476899, - "learning_rate": 4.178888886467457e-06, - "loss": 0.0426, - "step": 5378 - }, - { - "epoch": 2.3890739506995335, - "grad_norm": 0.362371921356082, - "learning_rate": 4.176976730280396e-06, - "loss": 0.0367, - "step": 5379 - }, - { - "epoch": 2.3895180990450813, - "grad_norm": 0.401397401331949, - "learning_rate": 4.175064697807712e-06, - "loss": 0.0315, - "step": 5380 - }, - { - "epoch": 2.3899622473906286, - "grad_norm": 0.4918164976839154, - "learning_rate": 4.173152789336818e-06, - "loss": 0.0377, - "step": 5381 - }, - { - "epoch": 2.390406395736176, - "grad_norm": 0.4428239856696088, - "learning_rate": 4.171241005155105e-06, - "loss": 0.0424, - "step": 5382 - }, - { - "epoch": 2.390850544081723, - "grad_norm": 0.5071830732288508, - "learning_rate": 4.169329345549945e-06, - "loss": 0.0343, - "step": 5383 - }, - { - "epoch": 2.391294692427271, - "grad_norm": 0.5154632693129214, - "learning_rate": 4.167417810808698e-06, - "loss": 0.0342, - "step": 5384 - }, - { - "epoch": 2.391738840772818, - "grad_norm": 0.6827121614415474, - "learning_rate": 4.165506401218697e-06, - "loss": 0.0537, - "step": 5385 - }, - { - "epoch": 2.3921829891183655, - "grad_norm": 0.5360898324290861, - "learning_rate": 4.163595117067258e-06, - "loss": 0.0415, - "step": 5386 - }, - { - "epoch": 2.3926271374639128, - "grad_norm": 0.4268060401836661, - "learning_rate": 4.1616839586416825e-06, - "loss": 0.0295, - "step": 5387 - }, - { - "epoch": 2.3930712858094605, - "grad_norm": 0.6427383929847603, - "learning_rate": 4.159772926229247e-06, - "loss": 0.0312, - "step": 5388 - }, - { - "epoch": 2.393515434155008, - "grad_norm": 0.5325554425749491, - "learning_rate": 4.1578620201172144e-06, - "loss": 0.0354, - "step": 5389 - }, - { - "epoch": 2.393959582500555, - "grad_norm": 0.43938556377430205, - "learning_rate": 4.155951240592825e-06, - "loss": 0.0372, - "step": 5390 - }, - { - "epoch": 2.3944037308461024, - "grad_norm": 0.5152442233098135, - "learning_rate": 4.154040587943303e-06, - "loss": 0.0402, - "step": 5391 - }, - { - "epoch": 2.39484787919165, - "grad_norm": 0.42703323082019723, - "learning_rate": 4.1521300624558516e-06, - "loss": 0.0373, - "step": 5392 - }, - { - "epoch": 2.3952920275371974, - "grad_norm": 0.402722970762066, - "learning_rate": 4.150219664417653e-06, - "loss": 0.0311, - "step": 5393 - }, - { - "epoch": 2.3957361758827447, - "grad_norm": 0.6214432623647976, - "learning_rate": 4.148309394115872e-06, - "loss": 0.0364, - "step": 5394 - }, - { - "epoch": 2.3961803242282924, - "grad_norm": 0.44276990850604514, - "learning_rate": 4.14639925183766e-06, - "loss": 0.0341, - "step": 5395 - }, - { - "epoch": 2.3966244725738397, - "grad_norm": 0.4294877304529703, - "learning_rate": 4.144489237870141e-06, - "loss": 0.0298, - "step": 5396 - }, - { - "epoch": 2.397068620919387, - "grad_norm": 0.46553831317336336, - "learning_rate": 4.142579352500421e-06, - "loss": 0.0318, - "step": 5397 - }, - { - "epoch": 2.3975127692649343, - "grad_norm": 0.4319310212143851, - "learning_rate": 4.14066959601559e-06, - "loss": 0.0412, - "step": 5398 - }, - { - "epoch": 2.397956917610482, - "grad_norm": 0.6999580389169097, - "learning_rate": 4.138759968702716e-06, - "loss": 0.0496, - "step": 5399 - }, - { - "epoch": 2.3984010659560293, - "grad_norm": 0.6261420455213397, - "learning_rate": 4.1368504708488476e-06, - "loss": 0.0387, - "step": 5400 - }, - { - "epoch": 2.3988452143015766, - "grad_norm": 0.352239243573041, - "learning_rate": 4.134941102741016e-06, - "loss": 0.0286, - "step": 5401 - }, - { - "epoch": 2.3992893626471243, - "grad_norm": 0.4855267366461917, - "learning_rate": 4.133031864666232e-06, - "loss": 0.042, - "step": 5402 - }, - { - "epoch": 2.3997335109926716, - "grad_norm": 0.48074181838542007, - "learning_rate": 4.1311227569114855e-06, - "loss": 0.0516, - "step": 5403 - }, - { - "epoch": 2.400177659338219, - "grad_norm": 0.4770686273769997, - "learning_rate": 4.12921377976375e-06, - "loss": 0.0378, - "step": 5404 - }, - { - "epoch": 2.400621807683766, - "grad_norm": 0.5247613321525735, - "learning_rate": 4.127304933509972e-06, - "loss": 0.0414, - "step": 5405 - }, - { - "epoch": 2.401065956029314, - "grad_norm": 0.4623711119275284, - "learning_rate": 4.125396218437089e-06, - "loss": 0.0589, - "step": 5406 - }, - { - "epoch": 2.4015101043748612, - "grad_norm": 0.49698176360744994, - "learning_rate": 4.123487634832011e-06, - "loss": 0.0456, - "step": 5407 - }, - { - "epoch": 2.4019542527204085, - "grad_norm": 0.4203922139028562, - "learning_rate": 4.121579182981632e-06, - "loss": 0.0312, - "step": 5408 - }, - { - "epoch": 2.4023984010659563, - "grad_norm": 0.3669217976875289, - "learning_rate": 4.119670863172824e-06, - "loss": 0.0289, - "step": 5409 - }, - { - "epoch": 2.4028425494115035, - "grad_norm": 0.38804870930885565, - "learning_rate": 4.117762675692437e-06, - "loss": 0.0251, - "step": 5410 - }, - { - "epoch": 2.403286697757051, - "grad_norm": 0.45435459736787537, - "learning_rate": 4.115854620827306e-06, - "loss": 0.0257, - "step": 5411 - }, - { - "epoch": 2.403730846102598, - "grad_norm": 0.46297467981750196, - "learning_rate": 4.1139466988642475e-06, - "loss": 0.0433, - "step": 5412 - }, - { - "epoch": 2.404174994448146, - "grad_norm": 0.4824849195132302, - "learning_rate": 4.11203891009005e-06, - "loss": 0.0484, - "step": 5413 - }, - { - "epoch": 2.404619142793693, - "grad_norm": 0.45065972785277525, - "learning_rate": 4.110131254791489e-06, - "loss": 0.028, - "step": 5414 - }, - { - "epoch": 2.4050632911392404, - "grad_norm": 0.5431313239180846, - "learning_rate": 4.108223733255316e-06, - "loss": 0.0442, - "step": 5415 - }, - { - "epoch": 2.4055074394847877, - "grad_norm": 0.46821071422858296, - "learning_rate": 4.106316345768265e-06, - "loss": 0.027, - "step": 5416 - }, - { - "epoch": 2.4059515878303355, - "grad_norm": 0.4155972801004647, - "learning_rate": 4.104409092617047e-06, - "loss": 0.033, - "step": 5417 - }, - { - "epoch": 2.4063957361758828, - "grad_norm": 0.6893775471251186, - "learning_rate": 4.1025019740883556e-06, - "loss": 0.0504, - "step": 5418 - }, - { - "epoch": 2.40683988452143, - "grad_norm": 0.4083036792334355, - "learning_rate": 4.100594990468865e-06, - "loss": 0.0333, - "step": 5419 - }, - { - "epoch": 2.4072840328669773, - "grad_norm": 0.4849564201026396, - "learning_rate": 4.0986881420452254e-06, - "loss": 0.0338, - "step": 5420 - }, - { - "epoch": 2.407728181212525, - "grad_norm": 0.4268462015661056, - "learning_rate": 4.096781429104068e-06, - "loss": 0.0341, - "step": 5421 - }, - { - "epoch": 2.4081723295580724, - "grad_norm": 0.4834546635420465, - "learning_rate": 4.094874851932002e-06, - "loss": 0.035, - "step": 5422 - }, - { - "epoch": 2.4086164779036197, - "grad_norm": 0.46585094752698974, - "learning_rate": 4.092968410815625e-06, - "loss": 0.0288, - "step": 5423 - }, - { - "epoch": 2.4090606262491674, - "grad_norm": 0.6920655509720604, - "learning_rate": 4.091062106041504e-06, - "loss": 0.0296, - "step": 5424 - }, - { - "epoch": 2.4095047745947147, - "grad_norm": 0.5102739827979796, - "learning_rate": 4.089155937896187e-06, - "loss": 0.0316, - "step": 5425 - }, - { - "epoch": 2.409948922940262, - "grad_norm": 0.36415630770650137, - "learning_rate": 4.087249906666206e-06, - "loss": 0.0296, - "step": 5426 - }, - { - "epoch": 2.4103930712858093, - "grad_norm": 0.3263196544116076, - "learning_rate": 4.085344012638067e-06, - "loss": 0.0273, - "step": 5427 - }, - { - "epoch": 2.410837219631357, - "grad_norm": 0.5467141941835238, - "learning_rate": 4.083438256098261e-06, - "loss": 0.0368, - "step": 5428 - }, - { - "epoch": 2.4112813679769043, - "grad_norm": 0.5887360709523032, - "learning_rate": 4.081532637333255e-06, - "loss": 0.0389, - "step": 5429 - }, - { - "epoch": 2.4117255163224516, - "grad_norm": 0.4074807509483992, - "learning_rate": 4.079627156629497e-06, - "loss": 0.0239, - "step": 5430 - }, - { - "epoch": 2.4121696646679993, - "grad_norm": 0.3424789553984037, - "learning_rate": 4.07772181427341e-06, - "loss": 0.0301, - "step": 5431 - }, - { - "epoch": 2.4126138130135466, - "grad_norm": 0.607202231589749, - "learning_rate": 4.075816610551402e-06, - "loss": 0.0496, - "step": 5432 - }, - { - "epoch": 2.413057961359094, - "grad_norm": 0.47864092786503787, - "learning_rate": 4.073911545749857e-06, - "loss": 0.0316, - "step": 5433 - }, - { - "epoch": 2.413502109704641, - "grad_norm": 0.5034557440074656, - "learning_rate": 4.072006620155136e-06, - "loss": 0.026, - "step": 5434 - }, - { - "epoch": 2.413946258050189, - "grad_norm": 0.36403294815194714, - "learning_rate": 4.070101834053585e-06, - "loss": 0.0304, - "step": 5435 - }, - { - "epoch": 2.414390406395736, - "grad_norm": 0.4715313044352068, - "learning_rate": 4.068197187731526e-06, - "loss": 0.0369, - "step": 5436 - }, - { - "epoch": 2.4148345547412835, - "grad_norm": 0.27768669661494494, - "learning_rate": 4.066292681475257e-06, - "loss": 0.0174, - "step": 5437 - }, - { - "epoch": 2.4152787030868312, - "grad_norm": 0.6691350505746541, - "learning_rate": 4.064388315571059e-06, - "loss": 0.0318, - "step": 5438 - }, - { - "epoch": 2.4157228514323785, - "grad_norm": 0.4180829624879483, - "learning_rate": 4.062484090305191e-06, - "loss": 0.0416, - "step": 5439 - }, - { - "epoch": 2.416166999777926, - "grad_norm": 0.3034927493418548, - "learning_rate": 4.060580005963888e-06, - "loss": 0.0249, - "step": 5440 - }, - { - "epoch": 2.416611148123473, - "grad_norm": 0.42896525131188157, - "learning_rate": 4.05867606283337e-06, - "loss": 0.0345, - "step": 5441 - }, - { - "epoch": 2.417055296469021, - "grad_norm": 0.356634005220255, - "learning_rate": 4.0567722611998285e-06, - "loss": 0.0244, - "step": 5442 - }, - { - "epoch": 2.417499444814568, - "grad_norm": 0.49362452123065936, - "learning_rate": 4.054868601349441e-06, - "loss": 0.0349, - "step": 5443 - }, - { - "epoch": 2.4179435931601154, - "grad_norm": 0.3476206524561192, - "learning_rate": 4.052965083568356e-06, - "loss": 0.0262, - "step": 5444 - }, - { - "epoch": 2.4183877415056627, - "grad_norm": 0.4046428177539252, - "learning_rate": 4.051061708142705e-06, - "loss": 0.0308, - "step": 5445 - }, - { - "epoch": 2.4188318898512104, - "grad_norm": 0.6886851057569472, - "learning_rate": 4.0491584753586e-06, - "loss": 0.0489, - "step": 5446 - }, - { - "epoch": 2.4192760381967577, - "grad_norm": 0.4422724381498167, - "learning_rate": 4.047255385502129e-06, - "loss": 0.0338, - "step": 5447 - }, - { - "epoch": 2.419720186542305, - "grad_norm": 0.35168629623661846, - "learning_rate": 4.045352438859359e-06, - "loss": 0.0202, - "step": 5448 - }, - { - "epoch": 2.4201643348878523, - "grad_norm": 0.449273667282489, - "learning_rate": 4.043449635716332e-06, - "loss": 0.0396, - "step": 5449 - }, - { - "epoch": 2.4206084832334, - "grad_norm": 0.5131783172439474, - "learning_rate": 4.0415469763590745e-06, - "loss": 0.0416, - "step": 5450 - }, - { - "epoch": 2.4210526315789473, - "grad_norm": 0.46807114802560923, - "learning_rate": 4.0396444610735865e-06, - "loss": 0.0439, - "step": 5451 - }, - { - "epoch": 2.4214967799244946, - "grad_norm": 0.6426317590879459, - "learning_rate": 4.037742090145851e-06, - "loss": 0.05, - "step": 5452 - }, - { - "epoch": 2.4219409282700424, - "grad_norm": 0.5241231307427713, - "learning_rate": 4.0358398638618245e-06, - "loss": 0.0279, - "step": 5453 - }, - { - "epoch": 2.4223850766155897, - "grad_norm": 0.3603709645668666, - "learning_rate": 4.033937782507445e-06, - "loss": 0.0323, - "step": 5454 - }, - { - "epoch": 2.422829224961137, - "grad_norm": 0.4940232401715231, - "learning_rate": 4.032035846368627e-06, - "loss": 0.0316, - "step": 5455 - }, - { - "epoch": 2.4232733733066842, - "grad_norm": 0.4547687513915308, - "learning_rate": 4.030134055731266e-06, - "loss": 0.0398, - "step": 5456 - }, - { - "epoch": 2.423717521652232, - "grad_norm": 0.42013634054715737, - "learning_rate": 4.028232410881228e-06, - "loss": 0.0218, - "step": 5457 - }, - { - "epoch": 2.4241616699977793, - "grad_norm": 0.44875489093858906, - "learning_rate": 4.026330912104369e-06, - "loss": 0.0447, - "step": 5458 - }, - { - "epoch": 2.4246058183433266, - "grad_norm": 0.827939430580337, - "learning_rate": 4.024429559686513e-06, - "loss": 0.0678, - "step": 5459 - }, - { - "epoch": 2.4250499666888743, - "grad_norm": 0.7485263459812835, - "learning_rate": 4.022528353913466e-06, - "loss": 0.0442, - "step": 5460 - }, - { - "epoch": 2.4254941150344216, - "grad_norm": 0.3550612167644366, - "learning_rate": 4.020627295071012e-06, - "loss": 0.0334, - "step": 5461 - }, - { - "epoch": 2.425938263379969, - "grad_norm": 0.5079952410434588, - "learning_rate": 4.018726383444911e-06, - "loss": 0.0424, - "step": 5462 - }, - { - "epoch": 2.426382411725516, - "grad_norm": 0.3588389906173018, - "learning_rate": 4.016825619320904e-06, - "loss": 0.0256, - "step": 5463 - }, - { - "epoch": 2.426826560071064, - "grad_norm": 0.37557127358260195, - "learning_rate": 4.014925002984708e-06, - "loss": 0.027, - "step": 5464 - }, - { - "epoch": 2.427270708416611, - "grad_norm": 0.3878661608771635, - "learning_rate": 4.013024534722018e-06, - "loss": 0.0331, - "step": 5465 - }, - { - "epoch": 2.4277148567621585, - "grad_norm": 0.5433675019773777, - "learning_rate": 4.011124214818506e-06, - "loss": 0.032, - "step": 5466 - }, - { - "epoch": 2.428159005107706, - "grad_norm": 0.3899751784915578, - "learning_rate": 4.0092240435598225e-06, - "loss": 0.0356, - "step": 5467 - }, - { - "epoch": 2.4286031534532535, - "grad_norm": 0.4327878275713323, - "learning_rate": 4.007324021231594e-06, - "loss": 0.0319, - "step": 5468 - }, - { - "epoch": 2.429047301798801, - "grad_norm": 0.49698866083093113, - "learning_rate": 4.00542414811943e-06, - "loss": 0.0305, - "step": 5469 - }, - { - "epoch": 2.429491450144348, - "grad_norm": 0.4026428626742401, - "learning_rate": 4.00352442450891e-06, - "loss": 0.0348, - "step": 5470 - }, - { - "epoch": 2.4299355984898954, - "grad_norm": 0.48636219393391805, - "learning_rate": 4.001624850685598e-06, - "loss": 0.0346, - "step": 5471 - }, - { - "epoch": 2.430379746835443, - "grad_norm": 0.4112453328110524, - "learning_rate": 3.999725426935029e-06, - "loss": 0.0428, - "step": 5472 - }, - { - "epoch": 2.4308238951809904, - "grad_norm": 0.5337831095831886, - "learning_rate": 3.99782615354272e-06, - "loss": 0.0442, - "step": 5473 - }, - { - "epoch": 2.4312680435265377, - "grad_norm": 0.37305930397141557, - "learning_rate": 3.995927030794163e-06, - "loss": 0.0357, - "step": 5474 - }, - { - "epoch": 2.4317121918720854, - "grad_norm": 0.4326214227306239, - "learning_rate": 3.994028058974832e-06, - "loss": 0.0294, - "step": 5475 - }, - { - "epoch": 2.4321563402176327, - "grad_norm": 0.35769209966498045, - "learning_rate": 3.992129238370171e-06, - "loss": 0.033, - "step": 5476 - }, - { - "epoch": 2.43260048856318, - "grad_norm": 0.5540164347858187, - "learning_rate": 3.9902305692656056e-06, - "loss": 0.0396, - "step": 5477 - }, - { - "epoch": 2.4330446369087273, - "grad_norm": 0.6878308326999009, - "learning_rate": 3.98833205194654e-06, - "loss": 0.0291, - "step": 5478 - }, - { - "epoch": 2.433488785254275, - "grad_norm": 0.6656069204526499, - "learning_rate": 3.98643368669835e-06, - "loss": 0.0377, - "step": 5479 - }, - { - "epoch": 2.4339329335998223, - "grad_norm": 0.36440064361824326, - "learning_rate": 3.984535473806395e-06, - "loss": 0.031, - "step": 5480 - }, - { - "epoch": 2.4343770819453696, - "grad_norm": 0.46753874608394186, - "learning_rate": 3.98263741355601e-06, - "loss": 0.0296, - "step": 5481 - }, - { - "epoch": 2.4348212302909173, - "grad_norm": 0.41167122039004567, - "learning_rate": 3.980739506232503e-06, - "loss": 0.0308, - "step": 5482 - }, - { - "epoch": 2.4352653786364646, - "grad_norm": 0.4469078274911405, - "learning_rate": 3.978841752121161e-06, - "loss": 0.0355, - "step": 5483 - }, - { - "epoch": 2.435709526982012, - "grad_norm": 0.39826295605317574, - "learning_rate": 3.976944151507251e-06, - "loss": 0.0288, - "step": 5484 - }, - { - "epoch": 2.436153675327559, - "grad_norm": 0.4535357297321569, - "learning_rate": 3.975046704676014e-06, - "loss": 0.0286, - "step": 5485 - }, - { - "epoch": 2.436597823673107, - "grad_norm": 0.4396407655662091, - "learning_rate": 3.973149411912668e-06, - "loss": 0.0305, - "step": 5486 - }, - { - "epoch": 2.4370419720186542, - "grad_norm": 0.3507364407629326, - "learning_rate": 3.971252273502407e-06, - "loss": 0.0248, - "step": 5487 - }, - { - "epoch": 2.4374861203642015, - "grad_norm": 0.3450849459589068, - "learning_rate": 3.969355289730407e-06, - "loss": 0.0206, - "step": 5488 - }, - { - "epoch": 2.4379302687097493, - "grad_norm": 0.503140049702594, - "learning_rate": 3.967458460881815e-06, - "loss": 0.04, - "step": 5489 - }, - { - "epoch": 2.4383744170552966, - "grad_norm": 0.5640472250485181, - "learning_rate": 3.965561787241754e-06, - "loss": 0.0404, - "step": 5490 - }, - { - "epoch": 2.438818565400844, - "grad_norm": 0.6056306520006238, - "learning_rate": 3.963665269095328e-06, - "loss": 0.0404, - "step": 5491 - }, - { - "epoch": 2.439262713746391, - "grad_norm": 0.4258100611472551, - "learning_rate": 3.961768906727618e-06, - "loss": 0.028, - "step": 5492 - }, - { - "epoch": 2.439706862091939, - "grad_norm": 0.38264655994704755, - "learning_rate": 3.959872700423678e-06, - "loss": 0.0256, - "step": 5493 - }, - { - "epoch": 2.440151010437486, - "grad_norm": 0.389861248826636, - "learning_rate": 3.957976650468539e-06, - "loss": 0.0333, - "step": 5494 - }, - { - "epoch": 2.4405951587830335, - "grad_norm": 0.6710261752550767, - "learning_rate": 3.956080757147211e-06, - "loss": 0.0288, - "step": 5495 - }, - { - "epoch": 2.441039307128581, - "grad_norm": 0.533539192209703, - "learning_rate": 3.9541850207446754e-06, - "loss": 0.029, - "step": 5496 - }, - { - "epoch": 2.4414834554741285, - "grad_norm": 0.7167183091153162, - "learning_rate": 3.9522894415459e-06, - "loss": 0.0473, - "step": 5497 - }, - { - "epoch": 2.4419276038196758, - "grad_norm": 0.322074964494659, - "learning_rate": 3.950394019835817e-06, - "loss": 0.0258, - "step": 5498 - }, - { - "epoch": 2.442371752165223, - "grad_norm": 0.3900552673739537, - "learning_rate": 3.948498755899344e-06, - "loss": 0.0287, - "step": 5499 - }, - { - "epoch": 2.4428159005107704, - "grad_norm": 0.4466770793345454, - "learning_rate": 3.94660365002137e-06, - "loss": 0.0292, - "step": 5500 - }, - { - "epoch": 2.443260048856318, - "grad_norm": 0.5418862551373604, - "learning_rate": 3.94470870248676e-06, - "loss": 0.0434, - "step": 5501 - }, - { - "epoch": 2.4437041972018654, - "grad_norm": 0.5170924051250978, - "learning_rate": 3.942813913580358e-06, - "loss": 0.044, - "step": 5502 - }, - { - "epoch": 2.4441483455474127, - "grad_norm": 0.5438284418911795, - "learning_rate": 3.940919283586985e-06, - "loss": 0.0448, - "step": 5503 - }, - { - "epoch": 2.4445924938929604, - "grad_norm": 0.5198158479237611, - "learning_rate": 3.9390248127914325e-06, - "loss": 0.0528, - "step": 5504 - }, - { - "epoch": 2.4450366422385077, - "grad_norm": 0.520564892893801, - "learning_rate": 3.937130501478475e-06, - "loss": 0.0488, - "step": 5505 - }, - { - "epoch": 2.445480790584055, - "grad_norm": 0.46307199806507215, - "learning_rate": 3.935236349932858e-06, - "loss": 0.0334, - "step": 5506 - }, - { - "epoch": 2.4459249389296023, - "grad_norm": 0.5703039507859231, - "learning_rate": 3.933342358439304e-06, - "loss": 0.0388, - "step": 5507 - }, - { - "epoch": 2.44636908727515, - "grad_norm": 0.480094036827002, - "learning_rate": 3.931448527282512e-06, - "loss": 0.0383, - "step": 5508 - }, - { - "epoch": 2.4468132356206973, - "grad_norm": 0.5416338368617786, - "learning_rate": 3.9295548567471595e-06, - "loss": 0.044, - "step": 5509 - }, - { - "epoch": 2.4472573839662446, - "grad_norm": 0.3789299998421564, - "learning_rate": 3.927661347117896e-06, - "loss": 0.0308, - "step": 5510 - }, - { - "epoch": 2.4477015323117923, - "grad_norm": 0.42767639025063464, - "learning_rate": 3.925767998679347e-06, - "loss": 0.032, - "step": 5511 - }, - { - "epoch": 2.4481456806573396, - "grad_norm": 0.3723173022056738, - "learning_rate": 3.923874811716116e-06, - "loss": 0.0299, - "step": 5512 - }, - { - "epoch": 2.448589829002887, - "grad_norm": 0.647876325917259, - "learning_rate": 3.92198178651278e-06, - "loss": 0.0334, - "step": 5513 - }, - { - "epoch": 2.449033977348434, - "grad_norm": 0.3651110551254141, - "learning_rate": 3.920088923353895e-06, - "loss": 0.0404, - "step": 5514 - }, - { - "epoch": 2.449478125693982, - "grad_norm": 0.43323931222905593, - "learning_rate": 3.918196222523989e-06, - "loss": 0.0458, - "step": 5515 - }, - { - "epoch": 2.4499222740395292, - "grad_norm": 0.4066604237779864, - "learning_rate": 3.916303684307568e-06, - "loss": 0.0291, - "step": 5516 - }, - { - "epoch": 2.4503664223850765, - "grad_norm": 0.4166876666433253, - "learning_rate": 3.914411308989113e-06, - "loss": 0.027, - "step": 5517 - }, - { - "epoch": 2.4508105707306242, - "grad_norm": 0.45791540033658323, - "learning_rate": 3.9125190968530766e-06, - "loss": 0.0271, - "step": 5518 - }, - { - "epoch": 2.4512547190761715, - "grad_norm": 0.4364649579290464, - "learning_rate": 3.910627048183893e-06, - "loss": 0.0325, - "step": 5519 - }, - { - "epoch": 2.451698867421719, - "grad_norm": 0.34758907020261137, - "learning_rate": 3.908735163265971e-06, - "loss": 0.0225, - "step": 5520 - }, - { - "epoch": 2.452143015767266, - "grad_norm": 0.386095366133868, - "learning_rate": 3.906843442383691e-06, - "loss": 0.0253, - "step": 5521 - }, - { - "epoch": 2.452587164112814, - "grad_norm": 0.5285696022742041, - "learning_rate": 3.90495188582141e-06, - "loss": 0.0288, - "step": 5522 - }, - { - "epoch": 2.453031312458361, - "grad_norm": 0.5685349400918212, - "learning_rate": 3.903060493863463e-06, - "loss": 0.0384, - "step": 5523 - }, - { - "epoch": 2.4534754608039084, - "grad_norm": 0.33536529902813805, - "learning_rate": 3.901169266794158e-06, - "loss": 0.0256, - "step": 5524 - }, - { - "epoch": 2.4539196091494557, - "grad_norm": 0.3367209757616414, - "learning_rate": 3.899278204897777e-06, - "loss": 0.0308, - "step": 5525 - }, - { - "epoch": 2.4543637574950035, - "grad_norm": 0.35301257937407804, - "learning_rate": 3.89738730845858e-06, - "loss": 0.0326, - "step": 5526 - }, - { - "epoch": 2.4548079058405508, - "grad_norm": 0.44674618292196533, - "learning_rate": 3.895496577760802e-06, - "loss": 0.0378, - "step": 5527 - }, - { - "epoch": 2.455252054186098, - "grad_norm": 0.33090638899042374, - "learning_rate": 3.893606013088649e-06, - "loss": 0.0287, - "step": 5528 - }, - { - "epoch": 2.4556962025316453, - "grad_norm": 0.6629654415706786, - "learning_rate": 3.8917156147263075e-06, - "loss": 0.0361, - "step": 5529 - }, - { - "epoch": 2.456140350877193, - "grad_norm": 0.4137805309428058, - "learning_rate": 3.889825382957935e-06, - "loss": 0.0381, - "step": 5530 - }, - { - "epoch": 2.4565844992227404, - "grad_norm": 0.48510449046292115, - "learning_rate": 3.887935318067665e-06, - "loss": 0.0369, - "step": 5531 - }, - { - "epoch": 2.4570286475682876, - "grad_norm": 0.4301148483997256, - "learning_rate": 3.886045420339608e-06, - "loss": 0.0288, - "step": 5532 - }, - { - "epoch": 2.4574727959138354, - "grad_norm": 0.42246466717722975, - "learning_rate": 3.884155690057849e-06, - "loss": 0.0301, - "step": 5533 - }, - { - "epoch": 2.4579169442593827, - "grad_norm": 0.4345894945232888, - "learning_rate": 3.882266127506444e-06, - "loss": 0.0327, - "step": 5534 - }, - { - "epoch": 2.45836109260493, - "grad_norm": 0.47014262773033944, - "learning_rate": 3.880376732969427e-06, - "loss": 0.0406, - "step": 5535 - }, - { - "epoch": 2.4588052409504773, - "grad_norm": 0.5529245797017762, - "learning_rate": 3.8784875067308035e-06, - "loss": 0.0428, - "step": 5536 - }, - { - "epoch": 2.459249389296025, - "grad_norm": 0.5853637583441337, - "learning_rate": 3.876598449074561e-06, - "loss": 0.0484, - "step": 5537 - }, - { - "epoch": 2.4596935376415723, - "grad_norm": 0.4308021015437979, - "learning_rate": 3.874709560284655e-06, - "loss": 0.0331, - "step": 5538 - }, - { - "epoch": 2.4601376859871196, - "grad_norm": 0.41105828222307955, - "learning_rate": 3.872820840645017e-06, - "loss": 0.0291, - "step": 5539 - }, - { - "epoch": 2.4605818343326673, - "grad_norm": 0.4669061473028007, - "learning_rate": 3.8709322904395556e-06, - "loss": 0.0275, - "step": 5540 - }, - { - "epoch": 2.4610259826782146, - "grad_norm": 0.4419324556460313, - "learning_rate": 3.869043909952149e-06, - "loss": 0.046, - "step": 5541 - }, - { - "epoch": 2.461470131023762, - "grad_norm": 0.4298170656814399, - "learning_rate": 3.867155699466653e-06, - "loss": 0.0376, - "step": 5542 - }, - { - "epoch": 2.461914279369309, - "grad_norm": 0.9256029510191909, - "learning_rate": 3.865267659266901e-06, - "loss": 0.0365, - "step": 5543 - }, - { - "epoch": 2.462358427714857, - "grad_norm": 0.4535797528392373, - "learning_rate": 3.863379789636696e-06, - "loss": 0.0302, - "step": 5544 - }, - { - "epoch": 2.462802576060404, - "grad_norm": 0.5482760330729667, - "learning_rate": 3.861492090859816e-06, - "loss": 0.0365, - "step": 5545 - }, - { - "epoch": 2.4632467244059515, - "grad_norm": 0.36157460865248275, - "learning_rate": 3.8596045632200126e-06, - "loss": 0.0266, - "step": 5546 - }, - { - "epoch": 2.4636908727514992, - "grad_norm": 0.4873814379100533, - "learning_rate": 3.857717207001017e-06, - "loss": 0.037, - "step": 5547 - }, - { - "epoch": 2.4641350210970465, - "grad_norm": 0.7370873255745318, - "learning_rate": 3.855830022486528e-06, - "loss": 0.0415, - "step": 5548 - }, - { - "epoch": 2.464579169442594, - "grad_norm": 0.493249372834125, - "learning_rate": 3.853943009960225e-06, - "loss": 0.0386, - "step": 5549 - }, - { - "epoch": 2.465023317788141, - "grad_norm": 0.6919203726918821, - "learning_rate": 3.852056169705753e-06, - "loss": 0.0389, - "step": 5550 - }, - { - "epoch": 2.465467466133689, - "grad_norm": 0.4116990565345076, - "learning_rate": 3.850169502006741e-06, - "loss": 0.0302, - "step": 5551 - }, - { - "epoch": 2.465911614479236, - "grad_norm": 0.34358727354238183, - "learning_rate": 3.848283007146784e-06, - "loss": 0.0325, - "step": 5552 - }, - { - "epoch": 2.4663557628247834, - "grad_norm": 0.35614401203480484, - "learning_rate": 3.846396685409455e-06, - "loss": 0.0322, - "step": 5553 - }, - { - "epoch": 2.4667999111703307, - "grad_norm": 0.25982925770375787, - "learning_rate": 3.8445105370782995e-06, - "loss": 0.0262, - "step": 5554 - }, - { - "epoch": 2.4672440595158784, - "grad_norm": 0.3510865325198374, - "learning_rate": 3.842624562436841e-06, - "loss": 0.0386, - "step": 5555 - }, - { - "epoch": 2.4676882078614257, - "grad_norm": 0.4049390820424808, - "learning_rate": 3.8407387617685696e-06, - "loss": 0.0247, - "step": 5556 - }, - { - "epoch": 2.468132356206973, - "grad_norm": 0.42859540412907965, - "learning_rate": 3.838853135356956e-06, - "loss": 0.0368, - "step": 5557 - }, - { - "epoch": 2.4685765045525203, - "grad_norm": 0.35853923916250774, - "learning_rate": 3.836967683485441e-06, - "loss": 0.0329, - "step": 5558 - }, - { - "epoch": 2.469020652898068, - "grad_norm": 0.4278309028616722, - "learning_rate": 3.835082406437437e-06, - "loss": 0.0362, - "step": 5559 - }, - { - "epoch": 2.4694648012436153, - "grad_norm": 1.0923622589898494, - "learning_rate": 3.833197304496336e-06, - "loss": 0.044, - "step": 5560 - }, - { - "epoch": 2.4699089495891626, - "grad_norm": 0.37654012560879, - "learning_rate": 3.8313123779455035e-06, - "loss": 0.0262, - "step": 5561 - }, - { - "epoch": 2.4703530979347104, - "grad_norm": 0.41898160595248646, - "learning_rate": 3.829427627068272e-06, - "loss": 0.0344, - "step": 5562 - }, - { - "epoch": 2.4707972462802577, - "grad_norm": 0.3844261706628325, - "learning_rate": 3.827543052147952e-06, - "loss": 0.0273, - "step": 5563 - }, - { - "epoch": 2.471241394625805, - "grad_norm": 0.4489319056107684, - "learning_rate": 3.8256586534678285e-06, - "loss": 0.0318, - "step": 5564 - }, - { - "epoch": 2.4716855429713522, - "grad_norm": 0.6833288857085646, - "learning_rate": 3.8237744313111565e-06, - "loss": 0.0404, - "step": 5565 - }, - { - "epoch": 2.4721296913169, - "grad_norm": 0.430664114118146, - "learning_rate": 3.82189038596117e-06, - "loss": 0.0352, - "step": 5566 - }, - { - "epoch": 2.4725738396624473, - "grad_norm": 0.4030254224585019, - "learning_rate": 3.820006517701069e-06, - "loss": 0.0321, - "step": 5567 - }, - { - "epoch": 2.4730179880079945, - "grad_norm": 0.5685844819493571, - "learning_rate": 3.8181228268140354e-06, - "loss": 0.0507, - "step": 5568 - }, - { - "epoch": 2.4734621363535423, - "grad_norm": 0.44264389289592554, - "learning_rate": 3.816239313583217e-06, - "loss": 0.0376, - "step": 5569 - }, - { - "epoch": 2.4739062846990896, - "grad_norm": 0.3377904339220037, - "learning_rate": 3.814355978291736e-06, - "loss": 0.0327, - "step": 5570 - }, - { - "epoch": 2.474350433044637, - "grad_norm": 0.630683589245786, - "learning_rate": 3.8124728212226938e-06, - "loss": 0.0471, - "step": 5571 - }, - { - "epoch": 2.474794581390184, - "grad_norm": 0.42602963126310434, - "learning_rate": 3.810589842659159e-06, - "loss": 0.0297, - "step": 5572 - }, - { - "epoch": 2.475238729735732, - "grad_norm": 0.4633136103256966, - "learning_rate": 3.808707042884176e-06, - "loss": 0.028, - "step": 5573 - }, - { - "epoch": 2.475682878081279, - "grad_norm": 0.5782431299481687, - "learning_rate": 3.8068244221807606e-06, - "loss": 0.0383, - "step": 5574 - }, - { - "epoch": 2.4761270264268265, - "grad_norm": 0.5372901069709559, - "learning_rate": 3.8049419808319033e-06, - "loss": 0.036, - "step": 5575 - }, - { - "epoch": 2.476571174772374, - "grad_norm": 0.2611922701250809, - "learning_rate": 3.8030597191205643e-06, - "loss": 0.0217, - "step": 5576 - }, - { - "epoch": 2.4770153231179215, - "grad_norm": 0.36585043972090653, - "learning_rate": 3.8011776373296837e-06, - "loss": 0.0286, - "step": 5577 - }, - { - "epoch": 2.477459471463469, - "grad_norm": 0.5157568238144473, - "learning_rate": 3.79929573574217e-06, - "loss": 0.0343, - "step": 5578 - }, - { - "epoch": 2.477903619809016, - "grad_norm": 0.4610943022253674, - "learning_rate": 3.797414014640903e-06, - "loss": 0.0318, - "step": 5579 - }, - { - "epoch": 2.478347768154564, - "grad_norm": 0.47373180317876196, - "learning_rate": 3.795532474308737e-06, - "loss": 0.0427, - "step": 5580 - }, - { - "epoch": 2.478791916500111, - "grad_norm": 0.4145736062776239, - "learning_rate": 3.7936511150285014e-06, - "loss": 0.038, - "step": 5581 - }, - { - "epoch": 2.4792360648456584, - "grad_norm": 0.5254698644896055, - "learning_rate": 3.7917699370829935e-06, - "loss": 0.0349, - "step": 5582 - }, - { - "epoch": 2.4796802131912057, - "grad_norm": 0.4171576347283061, - "learning_rate": 3.789888940754991e-06, - "loss": 0.0303, - "step": 5583 - }, - { - "epoch": 2.4801243615367534, - "grad_norm": 0.39892945172204647, - "learning_rate": 3.788008126327235e-06, - "loss": 0.041, - "step": 5584 - }, - { - "epoch": 2.4805685098823007, - "grad_norm": 0.4156599218368941, - "learning_rate": 3.7861274940824473e-06, - "loss": 0.0308, - "step": 5585 - }, - { - "epoch": 2.481012658227848, - "grad_norm": 0.3949693643427857, - "learning_rate": 3.784247044303317e-06, - "loss": 0.0311, - "step": 5586 - }, - { - "epoch": 2.4814568065733953, - "grad_norm": 0.3619897883910703, - "learning_rate": 3.782366777272506e-06, - "loss": 0.0304, - "step": 5587 - }, - { - "epoch": 2.481900954918943, - "grad_norm": 0.3797535629139442, - "learning_rate": 3.7804866932726535e-06, - "loss": 0.0265, - "step": 5588 - }, - { - "epoch": 2.4823451032644903, - "grad_norm": 0.4514704220606347, - "learning_rate": 3.778606792586368e-06, - "loss": 0.0242, - "step": 5589 - }, - { - "epoch": 2.4827892516100376, - "grad_norm": 0.5885462347335189, - "learning_rate": 3.7767270754962294e-06, - "loss": 0.0462, - "step": 5590 - }, - { - "epoch": 2.4832333999555853, - "grad_norm": 0.46737727413834057, - "learning_rate": 3.7748475422847896e-06, - "loss": 0.0476, - "step": 5591 - }, - { - "epoch": 2.4836775483011326, - "grad_norm": 0.34347582817428995, - "learning_rate": 3.7729681932345776e-06, - "loss": 0.0309, - "step": 5592 - }, - { - "epoch": 2.48412169664668, - "grad_norm": 0.5416760213809743, - "learning_rate": 3.771089028628087e-06, - "loss": 0.0301, - "step": 5593 - }, - { - "epoch": 2.484565844992227, - "grad_norm": 0.5279294508428536, - "learning_rate": 3.7692100487477936e-06, - "loss": 0.0338, - "step": 5594 - }, - { - "epoch": 2.485009993337775, - "grad_norm": 0.40771734537444654, - "learning_rate": 3.7673312538761362e-06, - "loss": 0.0244, - "step": 5595 - }, - { - "epoch": 2.4854541416833222, - "grad_norm": 0.3225283085841615, - "learning_rate": 3.765452644295532e-06, - "loss": 0.0252, - "step": 5596 - }, - { - "epoch": 2.4858982900288695, - "grad_norm": 0.4343781070994849, - "learning_rate": 3.7635742202883664e-06, - "loss": 0.0301, - "step": 5597 - }, - { - "epoch": 2.4863424383744173, - "grad_norm": 0.48736918945639, - "learning_rate": 3.761695982136997e-06, - "loss": 0.0283, - "step": 5598 - }, - { - "epoch": 2.4867865867199646, - "grad_norm": 0.4628216568506521, - "learning_rate": 3.759817930123756e-06, - "loss": 0.0325, - "step": 5599 - }, - { - "epoch": 2.487230735065512, - "grad_norm": 0.4979234883742358, - "learning_rate": 3.75794006453095e-06, - "loss": 0.046, - "step": 5600 - }, - { - "epoch": 2.487674883411059, - "grad_norm": 0.5295486655610855, - "learning_rate": 3.7560623856408496e-06, - "loss": 0.0421, - "step": 5601 - }, - { - "epoch": 2.488119031756607, - "grad_norm": 0.3550831132877616, - "learning_rate": 3.7541848937357037e-06, - "loss": 0.0282, - "step": 5602 - }, - { - "epoch": 2.488563180102154, - "grad_norm": 0.4820776586497428, - "learning_rate": 3.7523075890977323e-06, - "loss": 0.0327, - "step": 5603 - }, - { - "epoch": 2.4890073284477015, - "grad_norm": 0.36069709664433147, - "learning_rate": 3.7504304720091227e-06, - "loss": 0.0282, - "step": 5604 - }, - { - "epoch": 2.489451476793249, - "grad_norm": 0.3378590317401163, - "learning_rate": 3.7485535427520393e-06, - "loss": 0.0327, - "step": 5605 - }, - { - "epoch": 2.4898956251387965, - "grad_norm": 0.5101551267438247, - "learning_rate": 3.7466768016086187e-06, - "loss": 0.0426, - "step": 5606 - }, - { - "epoch": 2.4903397734843438, - "grad_norm": 0.4554277064950327, - "learning_rate": 3.7448002488609647e-06, - "loss": 0.0363, - "step": 5607 - }, - { - "epoch": 2.490783921829891, - "grad_norm": 0.5190026980108126, - "learning_rate": 3.7429238847911555e-06, - "loss": 0.0303, - "step": 5608 - }, - { - "epoch": 2.4912280701754383, - "grad_norm": 0.4937954972214749, - "learning_rate": 3.7410477096812402e-06, - "loss": 0.0489, - "step": 5609 - }, - { - "epoch": 2.491672218520986, - "grad_norm": 0.36659365050297416, - "learning_rate": 3.7391717238132386e-06, - "loss": 0.0266, - "step": 5610 - }, - { - "epoch": 2.4921163668665334, - "grad_norm": 0.391793755436005, - "learning_rate": 3.737295927469146e-06, - "loss": 0.0288, - "step": 5611 - }, - { - "epoch": 2.4925605152120807, - "grad_norm": 0.386017966668196, - "learning_rate": 3.7354203209309246e-06, - "loss": 0.0347, - "step": 5612 - }, - { - "epoch": 2.4930046635576284, - "grad_norm": 0.30330961970277515, - "learning_rate": 3.733544904480512e-06, - "loss": 0.0248, - "step": 5613 - }, - { - "epoch": 2.4934488119031757, - "grad_norm": 0.4407907387683592, - "learning_rate": 3.7316696783998124e-06, - "loss": 0.0347, - "step": 5614 - }, - { - "epoch": 2.493892960248723, - "grad_norm": 0.37646444767265064, - "learning_rate": 3.7297946429707045e-06, - "loss": 0.0328, - "step": 5615 - }, - { - "epoch": 2.4943371085942703, - "grad_norm": 0.4575522855234119, - "learning_rate": 3.727919798475038e-06, - "loss": 0.0352, - "step": 5616 - }, - { - "epoch": 2.494781256939818, - "grad_norm": 0.35695162304512706, - "learning_rate": 3.7260451451946365e-06, - "loss": 0.0316, - "step": 5617 - }, - { - "epoch": 2.4952254052853653, - "grad_norm": 0.43360958825611745, - "learning_rate": 3.724170683411291e-06, - "loss": 0.0332, - "step": 5618 - }, - { - "epoch": 2.4956695536309126, - "grad_norm": 0.4262372003773419, - "learning_rate": 3.722296413406763e-06, - "loss": 0.0255, - "step": 5619 - }, - { - "epoch": 2.4961137019764603, - "grad_norm": 0.43363995486396417, - "learning_rate": 3.7204223354627894e-06, - "loss": 0.031, - "step": 5620 - }, - { - "epoch": 2.4965578503220076, - "grad_norm": 0.47249288635203707, - "learning_rate": 3.718548449861074e-06, - "loss": 0.0331, - "step": 5621 - }, - { - "epoch": 2.497001998667555, - "grad_norm": 0.4319106936736054, - "learning_rate": 3.716674756883295e-06, - "loss": 0.0392, - "step": 5622 - }, - { - "epoch": 2.497446147013102, - "grad_norm": 0.5119693456166591, - "learning_rate": 3.714801256811099e-06, - "loss": 0.0409, - "step": 5623 - }, - { - "epoch": 2.49789029535865, - "grad_norm": 0.40570657622630646, - "learning_rate": 3.712927949926108e-06, - "loss": 0.0266, - "step": 5624 - }, - { - "epoch": 2.498334443704197, - "grad_norm": 0.40821652834924144, - "learning_rate": 3.7110548365099075e-06, - "loss": 0.0402, - "step": 5625 - }, - { - "epoch": 2.4987785920497445, - "grad_norm": 0.3940755701591239, - "learning_rate": 3.7091819168440624e-06, - "loss": 0.0315, - "step": 5626 - }, - { - "epoch": 2.4992227403952922, - "grad_norm": 0.495948418850682, - "learning_rate": 3.7073091912101002e-06, - "loss": 0.0326, - "step": 5627 - }, - { - "epoch": 2.4996668887408395, - "grad_norm": 0.4435967840620372, - "learning_rate": 3.705436659889527e-06, - "loss": 0.0368, - "step": 5628 - }, - { - "epoch": 2.500111037086387, - "grad_norm": 0.32392962929044805, - "learning_rate": 3.7035643231638135e-06, - "loss": 0.029, - "step": 5629 - }, - { - "epoch": 2.500555185431934, - "grad_norm": 0.6179012687291638, - "learning_rate": 3.7016921813144063e-06, - "loss": 0.0382, - "step": 5630 - }, - { - "epoch": 2.5009993337774814, - "grad_norm": 0.45744910004367445, - "learning_rate": 3.6998202346227183e-06, - "loss": 0.0418, - "step": 5631 - }, - { - "epoch": 2.501443482123029, - "grad_norm": 0.5527549347471298, - "learning_rate": 3.697948483370135e-06, - "loss": 0.0257, - "step": 5632 - }, - { - "epoch": 2.5018876304685764, - "grad_norm": 0.5434768947853562, - "learning_rate": 3.696076927838011e-06, - "loss": 0.0389, - "step": 5633 - }, - { - "epoch": 2.502331778814124, - "grad_norm": 0.3410765055084389, - "learning_rate": 3.6942055683076767e-06, - "loss": 0.0299, - "step": 5634 - }, - { - "epoch": 2.5027759271596715, - "grad_norm": 0.7036711081682665, - "learning_rate": 3.692334405060427e-06, - "loss": 0.0405, - "step": 5635 - }, - { - "epoch": 2.5032200755052187, - "grad_norm": 0.3864467247613326, - "learning_rate": 3.6904634383775283e-06, - "loss": 0.0293, - "step": 5636 - }, - { - "epoch": 2.503664223850766, - "grad_norm": 0.5152666553526902, - "learning_rate": 3.6885926685402213e-06, - "loss": 0.042, - "step": 5637 - }, - { - "epoch": 2.5041083721963133, - "grad_norm": 0.3913873319828441, - "learning_rate": 3.6867220958297132e-06, - "loss": 0.0341, - "step": 5638 - }, - { - "epoch": 2.504552520541861, - "grad_norm": 0.3712133677859356, - "learning_rate": 3.6848517205271805e-06, - "loss": 0.0257, - "step": 5639 - }, - { - "epoch": 2.5049966688874084, - "grad_norm": 0.42947535443731244, - "learning_rate": 3.682981542913776e-06, - "loss": 0.0369, - "step": 5640 - }, - { - "epoch": 2.5054408172329556, - "grad_norm": 0.5274708204343582, - "learning_rate": 3.6811115632706185e-06, - "loss": 0.0324, - "step": 5641 - }, - { - "epoch": 2.5058849655785034, - "grad_norm": 0.4291181112678621, - "learning_rate": 3.6792417818787972e-06, - "loss": 0.0293, - "step": 5642 - }, - { - "epoch": 2.5063291139240507, - "grad_norm": 0.42231381186795797, - "learning_rate": 3.677372199019371e-06, - "loss": 0.0366, - "step": 5643 - }, - { - "epoch": 2.506773262269598, - "grad_norm": 0.4789995350453658, - "learning_rate": 3.6755028149733697e-06, - "loss": 0.0327, - "step": 5644 - }, - { - "epoch": 2.5072174106151452, - "grad_norm": 0.40689270267790856, - "learning_rate": 3.6736336300217964e-06, - "loss": 0.0237, - "step": 5645 - }, - { - "epoch": 2.507661558960693, - "grad_norm": 0.5913689239490344, - "learning_rate": 3.6717646444456196e-06, - "loss": 0.0366, - "step": 5646 - }, - { - "epoch": 2.5081057073062403, - "grad_norm": 0.3914862469261757, - "learning_rate": 3.669895858525778e-06, - "loss": 0.025, - "step": 5647 - }, - { - "epoch": 2.5085498556517876, - "grad_norm": 0.42201187516388383, - "learning_rate": 3.6680272725431854e-06, - "loss": 0.0336, - "step": 5648 - }, - { - "epoch": 2.5089940039973353, - "grad_norm": 0.42314579939123975, - "learning_rate": 3.6661588867787183e-06, - "loss": 0.0368, - "step": 5649 - }, - { - "epoch": 2.5094381523428826, - "grad_norm": 0.46190362474830615, - "learning_rate": 3.664290701513229e-06, - "loss": 0.0366, - "step": 5650 - }, - { - "epoch": 2.50988230068843, - "grad_norm": 0.5151193711763113, - "learning_rate": 3.662422717027536e-06, - "loss": 0.0343, - "step": 5651 - }, - { - "epoch": 2.510326449033977, - "grad_norm": 0.4683557027001918, - "learning_rate": 3.6605549336024327e-06, - "loss": 0.0502, - "step": 5652 - }, - { - "epoch": 2.510770597379525, - "grad_norm": 0.3966005086612889, - "learning_rate": 3.658687351518674e-06, - "loss": 0.0307, - "step": 5653 - }, - { - "epoch": 2.511214745725072, - "grad_norm": 0.4901749101545214, - "learning_rate": 3.656819971056992e-06, - "loss": 0.0514, - "step": 5654 - }, - { - "epoch": 2.5116588940706195, - "grad_norm": 0.44088737564599295, - "learning_rate": 3.654952792498086e-06, - "loss": 0.035, - "step": 5655 - }, - { - "epoch": 2.512103042416167, - "grad_norm": 0.637540920302593, - "learning_rate": 3.653085816122621e-06, - "loss": 0.0433, - "step": 5656 - }, - { - "epoch": 2.5125471907617145, - "grad_norm": 0.3989139686742009, - "learning_rate": 3.651219042211239e-06, - "loss": 0.0266, - "step": 5657 - }, - { - "epoch": 2.512991339107262, - "grad_norm": 0.49156765417598675, - "learning_rate": 3.649352471044548e-06, - "loss": 0.0232, - "step": 5658 - }, - { - "epoch": 2.513435487452809, - "grad_norm": 0.3796366437427131, - "learning_rate": 3.647486102903124e-06, - "loss": 0.0254, - "step": 5659 - }, - { - "epoch": 2.5138796357983564, - "grad_norm": 0.4428861844260197, - "learning_rate": 3.6456199380675128e-06, - "loss": 0.0336, - "step": 5660 - }, - { - "epoch": 2.514323784143904, - "grad_norm": 0.4660014764332606, - "learning_rate": 3.6437539768182305e-06, - "loss": 0.0379, - "step": 5661 - }, - { - "epoch": 2.5147679324894514, - "grad_norm": 0.48208527317355315, - "learning_rate": 3.6418882194357662e-06, - "loss": 0.0309, - "step": 5662 - }, - { - "epoch": 2.515212080834999, - "grad_norm": 0.46984872075734047, - "learning_rate": 3.6400226662005733e-06, - "loss": 0.0281, - "step": 5663 - }, - { - "epoch": 2.5156562291805464, - "grad_norm": 0.3674696428042144, - "learning_rate": 3.638157317393074e-06, - "loss": 0.0254, - "step": 5664 - }, - { - "epoch": 2.5161003775260937, - "grad_norm": 0.3790734362034619, - "learning_rate": 3.636292173293665e-06, - "loss": 0.0322, - "step": 5665 - }, - { - "epoch": 2.516544525871641, - "grad_norm": 0.36816453960949835, - "learning_rate": 3.634427234182708e-06, - "loss": 0.0339, - "step": 5666 - }, - { - "epoch": 2.5169886742171883, - "grad_norm": 0.310536315755628, - "learning_rate": 3.632562500340532e-06, - "loss": 0.0204, - "step": 5667 - }, - { - "epoch": 2.517432822562736, - "grad_norm": 0.5208883625261909, - "learning_rate": 3.6306979720474424e-06, - "loss": 0.0385, - "step": 5668 - }, - { - "epoch": 2.5178769709082833, - "grad_norm": 0.4448683196893281, - "learning_rate": 3.6288336495837085e-06, - "loss": 0.0357, - "step": 5669 - }, - { - "epoch": 2.5183211192538306, - "grad_norm": 0.44092160751797316, - "learning_rate": 3.6269695332295697e-06, - "loss": 0.0344, - "step": 5670 - }, - { - "epoch": 2.5187652675993784, - "grad_norm": 0.45635940839778255, - "learning_rate": 3.6251056232652327e-06, - "loss": 0.0305, - "step": 5671 - }, - { - "epoch": 2.5192094159449256, - "grad_norm": 0.5533263657116287, - "learning_rate": 3.6232419199708764e-06, - "loss": 0.0284, - "step": 5672 - }, - { - "epoch": 2.519653564290473, - "grad_norm": 0.37835546841851936, - "learning_rate": 3.6213784236266447e-06, - "loss": 0.0326, - "step": 5673 - }, - { - "epoch": 2.5200977126360202, - "grad_norm": 0.4665241718562087, - "learning_rate": 3.6195151345126556e-06, - "loss": 0.036, - "step": 5674 - }, - { - "epoch": 2.520541860981568, - "grad_norm": 0.3945669545233313, - "learning_rate": 3.6176520529089932e-06, - "loss": 0.0339, - "step": 5675 - }, - { - "epoch": 2.5209860093271153, - "grad_norm": 0.37055474601271443, - "learning_rate": 3.6157891790957096e-06, - "loss": 0.0339, - "step": 5676 - }, - { - "epoch": 2.5214301576726625, - "grad_norm": 0.492060267079228, - "learning_rate": 3.6139265133528246e-06, - "loss": 0.0316, - "step": 5677 - }, - { - "epoch": 2.5218743060182103, - "grad_norm": 0.5701337130019367, - "learning_rate": 3.612064055960331e-06, - "loss": 0.0464, - "step": 5678 - }, - { - "epoch": 2.5223184543637576, - "grad_norm": 0.39863450477642115, - "learning_rate": 3.6102018071981846e-06, - "loss": 0.0334, - "step": 5679 - }, - { - "epoch": 2.522762602709305, - "grad_norm": 0.515203363747466, - "learning_rate": 3.6083397673463172e-06, - "loss": 0.0486, - "step": 5680 - }, - { - "epoch": 2.523206751054852, - "grad_norm": 0.3893233450260606, - "learning_rate": 3.606477936684622e-06, - "loss": 0.0359, - "step": 5681 - }, - { - "epoch": 2.5236508994004, - "grad_norm": 0.45713551996484814, - "learning_rate": 3.6046163154929657e-06, - "loss": 0.0374, - "step": 5682 - }, - { - "epoch": 2.524095047745947, - "grad_norm": 0.5641320893715582, - "learning_rate": 3.6027549040511806e-06, - "loss": 0.058, - "step": 5683 - }, - { - "epoch": 2.5245391960914945, - "grad_norm": 0.3265494892390024, - "learning_rate": 3.600893702639067e-06, - "loss": 0.023, - "step": 5684 - }, - { - "epoch": 2.524983344437042, - "grad_norm": 0.41936797741330234, - "learning_rate": 3.5990327115363967e-06, - "loss": 0.0256, - "step": 5685 - }, - { - "epoch": 2.5254274927825895, - "grad_norm": 0.46801276167069494, - "learning_rate": 3.5971719310229093e-06, - "loss": 0.0254, - "step": 5686 - }, - { - "epoch": 2.525871641128137, - "grad_norm": 0.49189838872219027, - "learning_rate": 3.595311361378311e-06, - "loss": 0.0364, - "step": 5687 - }, - { - "epoch": 2.526315789473684, - "grad_norm": 0.5586944397026717, - "learning_rate": 3.593451002882275e-06, - "loss": 0.0335, - "step": 5688 - }, - { - "epoch": 2.5267599378192314, - "grad_norm": 0.4422926900473203, - "learning_rate": 3.5915908558144476e-06, - "loss": 0.0298, - "step": 5689 - }, - { - "epoch": 2.527204086164779, - "grad_norm": 0.3434949432587989, - "learning_rate": 3.5897309204544375e-06, - "loss": 0.0336, - "step": 5690 - }, - { - "epoch": 2.5276482345103264, - "grad_norm": 0.3621563955807786, - "learning_rate": 3.587871197081828e-06, - "loss": 0.0254, - "step": 5691 - }, - { - "epoch": 2.528092382855874, - "grad_norm": 0.4132257794043078, - "learning_rate": 3.586011685976164e-06, - "loss": 0.0332, - "step": 5692 - }, - { - "epoch": 2.5285365312014214, - "grad_norm": 0.5011026660419655, - "learning_rate": 3.5841523874169648e-06, - "loss": 0.0345, - "step": 5693 - }, - { - "epoch": 2.5289806795469687, - "grad_norm": 0.45248007625680975, - "learning_rate": 3.582293301683713e-06, - "loss": 0.0293, - "step": 5694 - }, - { - "epoch": 2.529424827892516, - "grad_norm": 0.7885398440569542, - "learning_rate": 3.580434429055859e-06, - "loss": 0.032, - "step": 5695 - }, - { - "epoch": 2.5298689762380633, - "grad_norm": 0.8455065643807562, - "learning_rate": 3.578575769812824e-06, - "loss": 0.0566, - "step": 5696 - }, - { - "epoch": 2.530313124583611, - "grad_norm": 0.3581406871410735, - "learning_rate": 3.576717324233998e-06, - "loss": 0.0283, - "step": 5697 - }, - { - "epoch": 2.5307572729291583, - "grad_norm": 0.37566727714871373, - "learning_rate": 3.5748590925987347e-06, - "loss": 0.0274, - "step": 5698 - }, - { - "epoch": 2.5312014212747056, - "grad_norm": 0.40595777932831095, - "learning_rate": 3.5730010751863605e-06, - "loss": 0.029, - "step": 5699 - }, - { - "epoch": 2.5316455696202533, - "grad_norm": 0.47532996991241144, - "learning_rate": 3.571143272276164e-06, - "loss": 0.0532, - "step": 5700 - }, - { - "epoch": 2.5320897179658006, - "grad_norm": 0.44327227035484773, - "learning_rate": 3.5692856841474045e-06, - "loss": 0.0355, - "step": 5701 - }, - { - "epoch": 2.532533866311348, - "grad_norm": 0.34290979724264187, - "learning_rate": 3.5674283110793105e-06, - "loss": 0.0219, - "step": 5702 - }, - { - "epoch": 2.532978014656895, - "grad_norm": 0.6517538589913022, - "learning_rate": 3.5655711533510783e-06, - "loss": 0.0354, - "step": 5703 - }, - { - "epoch": 2.533422163002443, - "grad_norm": 0.40477955297885887, - "learning_rate": 3.5637142112418684e-06, - "loss": 0.0292, - "step": 5704 - }, - { - "epoch": 2.5338663113479902, - "grad_norm": 0.47637177118935925, - "learning_rate": 3.5618574850308095e-06, - "loss": 0.0256, - "step": 5705 - }, - { - "epoch": 2.5343104596935375, - "grad_norm": 0.4050526950368258, - "learning_rate": 3.560000974997001e-06, - "loss": 0.0279, - "step": 5706 - }, - { - "epoch": 2.5347546080390853, - "grad_norm": 0.4510491005204953, - "learning_rate": 3.5581446814195054e-06, - "loss": 0.039, - "step": 5707 - }, - { - "epoch": 2.5351987563846325, - "grad_norm": 0.43059426602406603, - "learning_rate": 3.556288604577359e-06, - "loss": 0.0344, - "step": 5708 - }, - { - "epoch": 2.53564290473018, - "grad_norm": 0.38353556571368813, - "learning_rate": 3.5544327447495598e-06, - "loss": 0.033, - "step": 5709 - }, - { - "epoch": 2.536087053075727, - "grad_norm": 0.3857238286700737, - "learning_rate": 3.5525771022150746e-06, - "loss": 0.0282, - "step": 5710 - }, - { - "epoch": 2.536531201421275, - "grad_norm": 0.48091169542814494, - "learning_rate": 3.5507216772528392e-06, - "loss": 0.0354, - "step": 5711 - }, - { - "epoch": 2.536975349766822, - "grad_norm": 1.0810667528006974, - "learning_rate": 3.548866470141753e-06, - "loss": 0.0411, - "step": 5712 - }, - { - "epoch": 2.5374194981123694, - "grad_norm": 0.37822644450804105, - "learning_rate": 3.547011481160686e-06, - "loss": 0.0287, - "step": 5713 - }, - { - "epoch": 2.537863646457917, - "grad_norm": 0.44547431389156866, - "learning_rate": 3.5451567105884777e-06, - "loss": 0.0446, - "step": 5714 - }, - { - "epoch": 2.5383077948034645, - "grad_norm": 0.33403093136486245, - "learning_rate": 3.543302158703929e-06, - "loss": 0.0304, - "step": 5715 - }, - { - "epoch": 2.5387519431490118, - "grad_norm": 0.3853720547985273, - "learning_rate": 3.5414478257858097e-06, - "loss": 0.0316, - "step": 5716 - }, - { - "epoch": 2.539196091494559, - "grad_norm": 0.4078086186517431, - "learning_rate": 3.53959371211286e-06, - "loss": 0.0359, - "step": 5717 - }, - { - "epoch": 2.5396402398401063, - "grad_norm": 0.3855600056546902, - "learning_rate": 3.5377398179637807e-06, - "loss": 0.0201, - "step": 5718 - }, - { - "epoch": 2.540084388185654, - "grad_norm": 0.3712283572301996, - "learning_rate": 3.5358861436172487e-06, - "loss": 0.0327, - "step": 5719 - }, - { - "epoch": 2.5405285365312014, - "grad_norm": 0.4121457515255565, - "learning_rate": 3.5340326893518993e-06, - "loss": 0.0337, - "step": 5720 - }, - { - "epoch": 2.540972684876749, - "grad_norm": 0.4589358591248568, - "learning_rate": 3.5321794554463397e-06, - "loss": 0.0282, - "step": 5721 - }, - { - "epoch": 2.5414168332222964, - "grad_norm": 0.41283412784679185, - "learning_rate": 3.530326442179142e-06, - "loss": 0.0433, - "step": 5722 - }, - { - "epoch": 2.5418609815678437, - "grad_norm": 0.3832687248919424, - "learning_rate": 3.5284736498288452e-06, - "loss": 0.0275, - "step": 5723 - }, - { - "epoch": 2.542305129913391, - "grad_norm": 0.4017937297419123, - "learning_rate": 3.526621078673954e-06, - "loss": 0.0301, - "step": 5724 - }, - { - "epoch": 2.5427492782589383, - "grad_norm": 0.3799788724726438, - "learning_rate": 3.5247687289929443e-06, - "loss": 0.0352, - "step": 5725 - }, - { - "epoch": 2.543193426604486, - "grad_norm": 0.3381062594883136, - "learning_rate": 3.5229166010642544e-06, - "loss": 0.0237, - "step": 5726 - }, - { - "epoch": 2.5436375749500333, - "grad_norm": 0.9370246012561935, - "learning_rate": 3.521064695166292e-06, - "loss": 0.0417, - "step": 5727 - }, - { - "epoch": 2.5440817232955806, - "grad_norm": 0.5170089647125808, - "learning_rate": 3.5192130115774283e-06, - "loss": 0.0379, - "step": 5728 - }, - { - "epoch": 2.5445258716411283, - "grad_norm": 0.5478100844231081, - "learning_rate": 3.5173615505760015e-06, - "loss": 0.0327, - "step": 5729 - }, - { - "epoch": 2.5449700199866756, - "grad_norm": 0.3263956967060353, - "learning_rate": 3.5155103124403184e-06, - "loss": 0.0235, - "step": 5730 - }, - { - "epoch": 2.545414168332223, - "grad_norm": 0.4151765481340016, - "learning_rate": 3.513659297448655e-06, - "loss": 0.0315, - "step": 5731 - }, - { - "epoch": 2.54585831667777, - "grad_norm": 0.4249849360391111, - "learning_rate": 3.511808505879247e-06, - "loss": 0.037, - "step": 5732 - }, - { - "epoch": 2.546302465023318, - "grad_norm": 0.430816357070103, - "learning_rate": 3.5099579380103e-06, - "loss": 0.0313, - "step": 5733 - }, - { - "epoch": 2.546746613368865, - "grad_norm": 0.36068218836576676, - "learning_rate": 3.508107594119987e-06, - "loss": 0.0248, - "step": 5734 - }, - { - "epoch": 2.5471907617144125, - "grad_norm": 0.5603158518242672, - "learning_rate": 3.506257474486444e-06, - "loss": 0.0407, - "step": 5735 - }, - { - "epoch": 2.5476349100599602, - "grad_norm": 0.457105631896688, - "learning_rate": 3.5044075793877784e-06, - "loss": 0.0285, - "step": 5736 - }, - { - "epoch": 2.5480790584055075, - "grad_norm": 0.4513751440943639, - "learning_rate": 3.5025579091020584e-06, - "loss": 0.0295, - "step": 5737 - }, - { - "epoch": 2.548523206751055, - "grad_norm": 0.4467104682114999, - "learning_rate": 3.500708463907323e-06, - "loss": 0.0297, - "step": 5738 - }, - { - "epoch": 2.548967355096602, - "grad_norm": 0.3775580565507929, - "learning_rate": 3.498859244081573e-06, - "loss": 0.0302, - "step": 5739 - }, - { - "epoch": 2.54941150344215, - "grad_norm": 0.4736436006509606, - "learning_rate": 3.4970102499027787e-06, - "loss": 0.0324, - "step": 5740 - }, - { - "epoch": 2.549855651787697, - "grad_norm": 0.38970978243484516, - "learning_rate": 3.4951614816488733e-06, - "loss": 0.0239, - "step": 5741 - }, - { - "epoch": 2.5502998001332444, - "grad_norm": 0.3858729170075839, - "learning_rate": 3.4933129395977627e-06, - "loss": 0.0282, - "step": 5742 - }, - { - "epoch": 2.550743948478792, - "grad_norm": 0.46868923999899664, - "learning_rate": 3.491464624027311e-06, - "loss": 0.0309, - "step": 5743 - }, - { - "epoch": 2.5511880968243394, - "grad_norm": 0.42676283372780777, - "learning_rate": 3.489616535215351e-06, - "loss": 0.0288, - "step": 5744 - }, - { - "epoch": 2.5516322451698867, - "grad_norm": 0.47537306808660484, - "learning_rate": 3.487768673439684e-06, - "loss": 0.0409, - "step": 5745 - }, - { - "epoch": 2.552076393515434, - "grad_norm": 0.5509640851909691, - "learning_rate": 3.4859210389780717e-06, - "loss": 0.0537, - "step": 5746 - }, - { - "epoch": 2.5525205418609813, - "grad_norm": 0.4532699668327261, - "learning_rate": 3.484073632108248e-06, - "loss": 0.0374, - "step": 5747 - }, - { - "epoch": 2.552964690206529, - "grad_norm": 0.30162200474796635, - "learning_rate": 3.4822264531079074e-06, - "loss": 0.0346, - "step": 5748 - }, - { - "epoch": 2.5534088385520763, - "grad_norm": 0.41586878305661834, - "learning_rate": 3.4803795022547152e-06, - "loss": 0.0385, - "step": 5749 - }, - { - "epoch": 2.553852986897624, - "grad_norm": 0.5284306714224316, - "learning_rate": 3.478532779826297e-06, - "loss": 0.0329, - "step": 5750 - }, - { - "epoch": 2.5542971352431714, - "grad_norm": 0.3711615788486314, - "learning_rate": 3.476686286100247e-06, - "loss": 0.0277, - "step": 5751 - }, - { - "epoch": 2.5547412835887187, - "grad_norm": 0.4470649615235087, - "learning_rate": 3.4748400213541233e-06, - "loss": 0.0463, - "step": 5752 - }, - { - "epoch": 2.555185431934266, - "grad_norm": 0.40717240259190046, - "learning_rate": 3.4729939858654548e-06, - "loss": 0.0296, - "step": 5753 - }, - { - "epoch": 2.5556295802798132, - "grad_norm": 0.4846910790786083, - "learning_rate": 3.471148179911728e-06, - "loss": 0.0404, - "step": 5754 - }, - { - "epoch": 2.556073728625361, - "grad_norm": 0.37627232321927323, - "learning_rate": 3.4693026037704012e-06, - "loss": 0.0334, - "step": 5755 - }, - { - "epoch": 2.5565178769709083, - "grad_norm": 0.34629139309877494, - "learning_rate": 3.467457257718896e-06, - "loss": 0.0272, - "step": 5756 - }, - { - "epoch": 2.5569620253164556, - "grad_norm": 0.3421995312642631, - "learning_rate": 3.4656121420345968e-06, - "loss": 0.0317, - "step": 5757 - }, - { - "epoch": 2.5574061736620033, - "grad_norm": 0.44077802939094335, - "learning_rate": 3.463767256994856e-06, - "loss": 0.0549, - "step": 5758 - }, - { - "epoch": 2.5578503220075506, - "grad_norm": 0.3954224399718614, - "learning_rate": 3.461922602876995e-06, - "loss": 0.0334, - "step": 5759 - }, - { - "epoch": 2.558294470353098, - "grad_norm": 0.41169694619074404, - "learning_rate": 3.460078179958294e-06, - "loss": 0.0341, - "step": 5760 - }, - { - "epoch": 2.558738618698645, - "grad_norm": 0.4812985244340657, - "learning_rate": 3.458233988516e-06, - "loss": 0.0407, - "step": 5761 - }, - { - "epoch": 2.559182767044193, - "grad_norm": 0.3331989432004887, - "learning_rate": 3.4563900288273287e-06, - "loss": 0.0247, - "step": 5762 - }, - { - "epoch": 2.55962691538974, - "grad_norm": 0.3885261825147752, - "learning_rate": 3.454546301169458e-06, - "loss": 0.0294, - "step": 5763 - }, - { - "epoch": 2.5600710637352875, - "grad_norm": 0.43455546109798127, - "learning_rate": 3.4527028058195276e-06, - "loss": 0.0276, - "step": 5764 - }, - { - "epoch": 2.560515212080835, - "grad_norm": 0.41019774224604005, - "learning_rate": 3.4508595430546516e-06, - "loss": 0.0376, - "step": 5765 - }, - { - "epoch": 2.5609593604263825, - "grad_norm": 0.5340205931151287, - "learning_rate": 3.4490165131519027e-06, - "loss": 0.0363, - "step": 5766 - }, - { - "epoch": 2.56140350877193, - "grad_norm": 0.32981175279651753, - "learning_rate": 3.4471737163883178e-06, - "loss": 0.0341, - "step": 5767 - }, - { - "epoch": 2.561847657117477, - "grad_norm": 0.4191900996748185, - "learning_rate": 3.4453311530409008e-06, - "loss": 0.0336, - "step": 5768 - }, - { - "epoch": 2.5622918054630244, - "grad_norm": 0.4533154388512584, - "learning_rate": 3.4434888233866205e-06, - "loss": 0.0379, - "step": 5769 - }, - { - "epoch": 2.562735953808572, - "grad_norm": 0.4083356757917847, - "learning_rate": 3.4416467277024097e-06, - "loss": 0.0256, - "step": 5770 - }, - { - "epoch": 2.5631801021541194, - "grad_norm": 0.46425748132873407, - "learning_rate": 3.4398048662651693e-06, - "loss": 0.0455, - "step": 5771 - }, - { - "epoch": 2.563624250499667, - "grad_norm": 0.3043869275862561, - "learning_rate": 3.4379632393517593e-06, - "loss": 0.022, - "step": 5772 - }, - { - "epoch": 2.5640683988452144, - "grad_norm": 0.7475044005307006, - "learning_rate": 3.43612184723901e-06, - "loss": 0.0502, - "step": 5773 - }, - { - "epoch": 2.5645125471907617, - "grad_norm": 0.32220506593319287, - "learning_rate": 3.4342806902037118e-06, - "loss": 0.0271, - "step": 5774 - }, - { - "epoch": 2.564956695536309, - "grad_norm": 0.36616059681338226, - "learning_rate": 3.4324397685226217e-06, - "loss": 0.0327, - "step": 5775 - }, - { - "epoch": 2.5654008438818563, - "grad_norm": 0.4598617313466593, - "learning_rate": 3.4305990824724645e-06, - "loss": 0.0255, - "step": 5776 - }, - { - "epoch": 2.565844992227404, - "grad_norm": 0.4169024524406747, - "learning_rate": 3.428758632329925e-06, - "loss": 0.0261, - "step": 5777 - }, - { - "epoch": 2.5662891405729513, - "grad_norm": 0.46153415454886376, - "learning_rate": 3.426918418371652e-06, - "loss": 0.0313, - "step": 5778 - }, - { - "epoch": 2.5667332889184986, - "grad_norm": 0.5358677284520018, - "learning_rate": 3.4250784408742644e-06, - "loss": 0.045, - "step": 5779 - }, - { - "epoch": 2.5671774372640463, - "grad_norm": 0.4437829933438522, - "learning_rate": 3.4232387001143396e-06, - "loss": 0.0423, - "step": 5780 - }, - { - "epoch": 2.5676215856095936, - "grad_norm": 0.39105850673238723, - "learning_rate": 3.4213991963684212e-06, - "loss": 0.0353, - "step": 5781 - }, - { - "epoch": 2.568065733955141, - "grad_norm": 0.5049529429986211, - "learning_rate": 3.419559929913021e-06, - "loss": 0.0345, - "step": 5782 - }, - { - "epoch": 2.568509882300688, - "grad_norm": 0.4292519880189004, - "learning_rate": 3.4177209010246104e-06, - "loss": 0.0372, - "step": 5783 - }, - { - "epoch": 2.568954030646236, - "grad_norm": 0.373978887044069, - "learning_rate": 3.415882109979627e-06, - "loss": 0.0428, - "step": 5784 - }, - { - "epoch": 2.5693981789917832, - "grad_norm": 0.500647171885995, - "learning_rate": 3.4140435570544708e-06, - "loss": 0.036, - "step": 5785 - }, - { - "epoch": 2.5698423273373305, - "grad_norm": 0.5585745761796531, - "learning_rate": 3.4122052425255097e-06, - "loss": 0.0371, - "step": 5786 - }, - { - "epoch": 2.5702864756828783, - "grad_norm": 0.40623789381422887, - "learning_rate": 3.4103671666690706e-06, - "loss": 0.0346, - "step": 5787 - }, - { - "epoch": 2.5707306240284256, - "grad_norm": 0.5204708462096757, - "learning_rate": 3.4085293297614513e-06, - "loss": 0.0341, - "step": 5788 - }, - { - "epoch": 2.571174772373973, - "grad_norm": 0.38514191058110864, - "learning_rate": 3.406691732078907e-06, - "loss": 0.0247, - "step": 5789 - }, - { - "epoch": 2.57161892071952, - "grad_norm": 0.47415946915349233, - "learning_rate": 3.4048543738976624e-06, - "loss": 0.03, - "step": 5790 - }, - { - "epoch": 2.572063069065068, - "grad_norm": 0.35195512662300593, - "learning_rate": 3.4030172554939022e-06, - "loss": 0.0179, - "step": 5791 - }, - { - "epoch": 2.572507217410615, - "grad_norm": 0.455360916505429, - "learning_rate": 3.401180377143774e-06, - "loss": 0.0355, - "step": 5792 - }, - { - "epoch": 2.5729513657561625, - "grad_norm": 0.6594611426447926, - "learning_rate": 3.399343739123395e-06, - "loss": 0.04, - "step": 5793 - }, - { - "epoch": 2.57339551410171, - "grad_norm": 0.30764478468780626, - "learning_rate": 3.3975073417088445e-06, - "loss": 0.024, - "step": 5794 - }, - { - "epoch": 2.5738396624472575, - "grad_norm": 0.5139517791311652, - "learning_rate": 3.3956711851761603e-06, - "loss": 0.0386, - "step": 5795 - }, - { - "epoch": 2.5742838107928048, - "grad_norm": 0.44276110456272694, - "learning_rate": 3.393835269801351e-06, - "loss": 0.0276, - "step": 5796 - }, - { - "epoch": 2.574727959138352, - "grad_norm": 0.6084624820775043, - "learning_rate": 3.3919995958603845e-06, - "loss": 0.0349, - "step": 5797 - }, - { - "epoch": 2.5751721074838994, - "grad_norm": 0.6187475898080996, - "learning_rate": 3.3901641636291925e-06, - "loss": 0.0369, - "step": 5798 - }, - { - "epoch": 2.575616255829447, - "grad_norm": 0.4166403983258688, - "learning_rate": 3.388328973383673e-06, - "loss": 0.028, - "step": 5799 - }, - { - "epoch": 2.5760604041749944, - "grad_norm": 0.37895711615417893, - "learning_rate": 3.3864940253996885e-06, - "loss": 0.0309, - "step": 5800 - }, - { - "epoch": 2.576504552520542, - "grad_norm": 0.4165330638891502, - "learning_rate": 3.3846593199530598e-06, - "loss": 0.0324, - "step": 5801 - }, - { - "epoch": 2.5769487008660894, - "grad_norm": 0.3850803256916831, - "learning_rate": 3.3828248573195744e-06, - "loss": 0.0243, - "step": 5802 - }, - { - "epoch": 2.5773928492116367, - "grad_norm": 0.4535252370266945, - "learning_rate": 3.3809906377749853e-06, - "loss": 0.026, - "step": 5803 - }, - { - "epoch": 2.577836997557184, - "grad_norm": 0.4473597091096733, - "learning_rate": 3.3791566615950034e-06, - "loss": 0.0446, - "step": 5804 - }, - { - "epoch": 2.5782811459027313, - "grad_norm": 0.4461365886821962, - "learning_rate": 3.37732292905531e-06, - "loss": 0.0353, - "step": 5805 - }, - { - "epoch": 2.578725294248279, - "grad_norm": 0.4044232712398919, - "learning_rate": 3.375489440431544e-06, - "loss": 0.0356, - "step": 5806 - }, - { - "epoch": 2.5791694425938263, - "grad_norm": 0.4100077927626587, - "learning_rate": 3.373656195999312e-06, - "loss": 0.0288, - "step": 5807 - }, - { - "epoch": 2.5796135909393736, - "grad_norm": 0.49681714136189903, - "learning_rate": 3.3718231960341807e-06, - "loss": 0.0392, - "step": 5808 - }, - { - "epoch": 2.5800577392849213, - "grad_norm": 0.3508735282517198, - "learning_rate": 3.3699904408116778e-06, - "loss": 0.0266, - "step": 5809 - }, - { - "epoch": 2.5805018876304686, - "grad_norm": 0.4685055266479743, - "learning_rate": 3.368157930607303e-06, - "loss": 0.0322, - "step": 5810 - }, - { - "epoch": 2.580946035976016, - "grad_norm": 0.3392092135667048, - "learning_rate": 3.3663256656965115e-06, - "loss": 0.0266, - "step": 5811 - }, - { - "epoch": 2.581390184321563, - "grad_norm": 0.3673851977352648, - "learning_rate": 3.364493646354724e-06, - "loss": 0.0291, - "step": 5812 - }, - { - "epoch": 2.581834332667111, - "grad_norm": 0.4826180223086163, - "learning_rate": 3.3626618728573233e-06, - "loss": 0.0366, - "step": 5813 - }, - { - "epoch": 2.5822784810126582, - "grad_norm": 0.46379192115203366, - "learning_rate": 3.3608303454796578e-06, - "loss": 0.0359, - "step": 5814 - }, - { - "epoch": 2.5827226293582055, - "grad_norm": 0.33507634737295205, - "learning_rate": 3.3589990644970325e-06, - "loss": 0.0293, - "step": 5815 - }, - { - "epoch": 2.5831667777037532, - "grad_norm": 0.394019845804789, - "learning_rate": 3.3571680301847265e-06, - "loss": 0.0287, - "step": 5816 - }, - { - "epoch": 2.5836109260493005, - "grad_norm": 0.4267750495828387, - "learning_rate": 3.355337242817972e-06, - "loss": 0.0225, - "step": 5817 - }, - { - "epoch": 2.584055074394848, - "grad_norm": 0.40408538263164145, - "learning_rate": 3.3535067026719683e-06, - "loss": 0.0354, - "step": 5818 - }, - { - "epoch": 2.584499222740395, - "grad_norm": 0.3264537051893323, - "learning_rate": 3.3516764100218744e-06, - "loss": 0.0184, - "step": 5819 - }, - { - "epoch": 2.584943371085943, - "grad_norm": 0.5823548169064077, - "learning_rate": 3.3498463651428183e-06, - "loss": 0.0555, - "step": 5820 - }, - { - "epoch": 2.58538751943149, - "grad_norm": 0.6018406880891104, - "learning_rate": 3.348016568309882e-06, - "loss": 0.0323, - "step": 5821 - }, - { - "epoch": 2.5858316677770374, - "grad_norm": 0.4499473097806689, - "learning_rate": 3.3461870197981205e-06, - "loss": 0.0313, - "step": 5822 - }, - { - "epoch": 2.586275816122585, - "grad_norm": 0.4573988840299094, - "learning_rate": 3.3443577198825416e-06, - "loss": 0.0217, - "step": 5823 - }, - { - "epoch": 2.5867199644681325, - "grad_norm": 0.47710512867182214, - "learning_rate": 3.342528668838123e-06, - "loss": 0.0305, - "step": 5824 - }, - { - "epoch": 2.5871641128136798, - "grad_norm": 0.3333521539172945, - "learning_rate": 3.3406998669398015e-06, - "loss": 0.0276, - "step": 5825 - }, - { - "epoch": 2.587608261159227, - "grad_norm": 0.4557037415155258, - "learning_rate": 3.338871314462474e-06, - "loss": 0.0283, - "step": 5826 - }, - { - "epoch": 2.5880524095047743, - "grad_norm": 0.4444177366240355, - "learning_rate": 3.337043011681007e-06, - "loss": 0.0328, - "step": 5827 - }, - { - "epoch": 2.588496557850322, - "grad_norm": 0.3980372800704896, - "learning_rate": 3.335214958870225e-06, - "loss": 0.0312, - "step": 5828 - }, - { - "epoch": 2.5889407061958694, - "grad_norm": 0.38163158317470414, - "learning_rate": 3.333387156304914e-06, - "loss": 0.021, - "step": 5829 - }, - { - "epoch": 2.589384854541417, - "grad_norm": 0.3444340204979025, - "learning_rate": 3.3315596042598235e-06, - "loss": 0.0283, - "step": 5830 - }, - { - "epoch": 2.5898290028869644, - "grad_norm": 0.4610922606737187, - "learning_rate": 3.3297323030096672e-06, - "loss": 0.032, - "step": 5831 - }, - { - "epoch": 2.5902731512325117, - "grad_norm": 0.41870835356631375, - "learning_rate": 3.327905252829117e-06, - "loss": 0.0448, - "step": 5832 - }, - { - "epoch": 2.590717299578059, - "grad_norm": 0.4779584466171409, - "learning_rate": 3.326078453992813e-06, - "loss": 0.0242, - "step": 5833 - }, - { - "epoch": 2.5911614479236063, - "grad_norm": 0.5325628313786069, - "learning_rate": 3.324251906775351e-06, - "loss": 0.0395, - "step": 5834 - }, - { - "epoch": 2.591605596269154, - "grad_norm": 0.358759607760571, - "learning_rate": 3.3224256114512953e-06, - "loss": 0.0316, - "step": 5835 - }, - { - "epoch": 2.5920497446147013, - "grad_norm": 0.43363197580686524, - "learning_rate": 3.3205995682951666e-06, - "loss": 0.0334, - "step": 5836 - }, - { - "epoch": 2.5924938929602486, - "grad_norm": 0.4445842523123569, - "learning_rate": 3.31877377758145e-06, - "loss": 0.033, - "step": 5837 - }, - { - "epoch": 2.5929380413057963, - "grad_norm": 0.603779398622737, - "learning_rate": 3.316948239584592e-06, - "loss": 0.0449, - "step": 5838 - }, - { - "epoch": 2.5933821896513436, - "grad_norm": 0.4392799290696822, - "learning_rate": 3.3151229545790066e-06, - "loss": 0.0384, - "step": 5839 - }, - { - "epoch": 2.593826337996891, - "grad_norm": 0.38613834204137615, - "learning_rate": 3.3132979228390615e-06, - "loss": 0.0248, - "step": 5840 - }, - { - "epoch": 2.594270486342438, - "grad_norm": 0.31483135068991924, - "learning_rate": 3.3114731446390897e-06, - "loss": 0.0204, - "step": 5841 - }, - { - "epoch": 2.594714634687986, - "grad_norm": 0.42067924470792134, - "learning_rate": 3.3096486202533884e-06, - "loss": 0.0312, - "step": 5842 - }, - { - "epoch": 2.595158783033533, - "grad_norm": 0.4747651071435795, - "learning_rate": 3.3078243499562126e-06, - "loss": 0.0325, - "step": 5843 - }, - { - "epoch": 2.5956029313790805, - "grad_norm": 0.3567548194704181, - "learning_rate": 3.3060003340217822e-06, - "loss": 0.0241, - "step": 5844 - }, - { - "epoch": 2.5960470797246282, - "grad_norm": 0.3574932064483991, - "learning_rate": 3.3041765727242773e-06, - "loss": 0.0302, - "step": 5845 - }, - { - "epoch": 2.5964912280701755, - "grad_norm": 0.8739424998669374, - "learning_rate": 3.302353066337842e-06, - "loss": 0.0458, - "step": 5846 - }, - { - "epoch": 2.596935376415723, - "grad_norm": 0.4781713811806308, - "learning_rate": 3.300529815136577e-06, - "loss": 0.0328, - "step": 5847 - }, - { - "epoch": 2.59737952476127, - "grad_norm": 0.4686560316809108, - "learning_rate": 3.2987068193945515e-06, - "loss": 0.0392, - "step": 5848 - }, - { - "epoch": 2.597823673106818, - "grad_norm": 0.7682784843583998, - "learning_rate": 3.296884079385789e-06, - "loss": 0.0432, - "step": 5849 - }, - { - "epoch": 2.598267821452365, - "grad_norm": 0.43912852313892203, - "learning_rate": 3.2950615953842816e-06, - "loss": 0.0357, - "step": 5850 - }, - { - "epoch": 2.5987119697979124, - "grad_norm": 0.4363862214055466, - "learning_rate": 3.293239367663978e-06, - "loss": 0.0355, - "step": 5851 - }, - { - "epoch": 2.59915611814346, - "grad_norm": 0.4194478678628829, - "learning_rate": 3.2914173964987905e-06, - "loss": 0.0376, - "step": 5852 - }, - { - "epoch": 2.5996002664890074, - "grad_norm": 0.3939580447848396, - "learning_rate": 3.289595682162593e-06, - "loss": 0.0351, - "step": 5853 - }, - { - "epoch": 2.6000444148345547, - "grad_norm": 0.3563672060439901, - "learning_rate": 3.2877742249292174e-06, - "loss": 0.0264, - "step": 5854 - }, - { - "epoch": 2.600488563180102, - "grad_norm": 0.45772110095911156, - "learning_rate": 3.2859530250724604e-06, - "loss": 0.0286, - "step": 5855 - }, - { - "epoch": 2.6009327115256493, - "grad_norm": 0.3788943790281068, - "learning_rate": 3.284132082866083e-06, - "loss": 0.0231, - "step": 5856 - }, - { - "epoch": 2.601376859871197, - "grad_norm": 0.42056256980059764, - "learning_rate": 3.2823113985837996e-06, - "loss": 0.0301, - "step": 5857 - }, - { - "epoch": 2.6018210082167443, - "grad_norm": 0.4731371391351043, - "learning_rate": 3.2804909724992917e-06, - "loss": 0.0421, - "step": 5858 - }, - { - "epoch": 2.602265156562292, - "grad_norm": 0.4171211336356501, - "learning_rate": 3.2786708048862e-06, - "loss": 0.0267, - "step": 5859 - }, - { - "epoch": 2.6027093049078394, - "grad_norm": 0.33335158042812324, - "learning_rate": 3.276850896018128e-06, - "loss": 0.0287, - "step": 5860 - }, - { - "epoch": 2.6031534532533867, - "grad_norm": 0.6871125152697936, - "learning_rate": 3.2750312461686346e-06, - "loss": 0.0253, - "step": 5861 - }, - { - "epoch": 2.603597601598934, - "grad_norm": 0.38868974447379917, - "learning_rate": 3.273211855611248e-06, - "loss": 0.0384, - "step": 5862 - }, - { - "epoch": 2.6040417499444812, - "grad_norm": 0.41427271665417736, - "learning_rate": 3.271392724619454e-06, - "loss": 0.0283, - "step": 5863 - }, - { - "epoch": 2.604485898290029, - "grad_norm": 0.47635109894278754, - "learning_rate": 3.2695738534666964e-06, - "loss": 0.0307, - "step": 5864 - }, - { - "epoch": 2.6049300466355763, - "grad_norm": 0.43305461847503646, - "learning_rate": 3.2677552424263836e-06, - "loss": 0.0323, - "step": 5865 - }, - { - "epoch": 2.6053741949811235, - "grad_norm": 0.5925027302243593, - "learning_rate": 3.2659368917718813e-06, - "loss": 0.0349, - "step": 5866 - }, - { - "epoch": 2.6058183433266713, - "grad_norm": 0.5670868601287159, - "learning_rate": 3.264118801776524e-06, - "loss": 0.0272, - "step": 5867 - }, - { - "epoch": 2.6062624916722186, - "grad_norm": 0.4639294675991596, - "learning_rate": 3.262300972713598e-06, - "loss": 0.0281, - "step": 5868 - }, - { - "epoch": 2.606706640017766, - "grad_norm": 0.5113196767933541, - "learning_rate": 3.2604834048563527e-06, - "loss": 0.0258, - "step": 5869 - }, - { - "epoch": 2.607150788363313, - "grad_norm": 0.37055184296693755, - "learning_rate": 3.2586660984780017e-06, - "loss": 0.0292, - "step": 5870 - }, - { - "epoch": 2.607594936708861, - "grad_norm": 0.5340070208292783, - "learning_rate": 3.256849053851716e-06, - "loss": 0.0362, - "step": 5871 - }, - { - "epoch": 2.608039085054408, - "grad_norm": 0.6461515771963295, - "learning_rate": 3.2550322712506265e-06, - "loss": 0.0378, - "step": 5872 - }, - { - "epoch": 2.6084832333999555, - "grad_norm": 0.43751867677053585, - "learning_rate": 3.2532157509478313e-06, - "loss": 0.0297, - "step": 5873 - }, - { - "epoch": 2.608927381745503, - "grad_norm": 0.4731032790298419, - "learning_rate": 3.2513994932163806e-06, - "loss": 0.0426, - "step": 5874 - }, - { - "epoch": 2.6093715300910505, - "grad_norm": 0.40443116847842614, - "learning_rate": 3.2495834983292894e-06, - "loss": 0.0315, - "step": 5875 - }, - { - "epoch": 2.609815678436598, - "grad_norm": 0.4867165911113075, - "learning_rate": 3.2477677665595333e-06, - "loss": 0.0319, - "step": 5876 - }, - { - "epoch": 2.610259826782145, - "grad_norm": 0.42932234052521123, - "learning_rate": 3.2459522981800473e-06, - "loss": 0.0465, - "step": 5877 - }, - { - "epoch": 2.610703975127693, - "grad_norm": 0.35081568597263124, - "learning_rate": 3.244137093463725e-06, - "loss": 0.0194, - "step": 5878 - }, - { - "epoch": 2.61114812347324, - "grad_norm": 0.5690602488132523, - "learning_rate": 3.2423221526834253e-06, - "loss": 0.0381, - "step": 5879 - }, - { - "epoch": 2.6115922718187874, - "grad_norm": 0.36352934356142463, - "learning_rate": 3.2405074761119648e-06, - "loss": 0.0282, - "step": 5880 - }, - { - "epoch": 2.612036420164335, - "grad_norm": 0.3774759920653436, - "learning_rate": 3.2386930640221193e-06, - "loss": 0.0344, - "step": 5881 - }, - { - "epoch": 2.6124805685098824, - "grad_norm": 0.5371544305687926, - "learning_rate": 3.2368789166866244e-06, - "loss": 0.0497, - "step": 5882 - }, - { - "epoch": 2.6129247168554297, - "grad_norm": 0.38943009927535477, - "learning_rate": 3.2350650343781775e-06, - "loss": 0.0227, - "step": 5883 - }, - { - "epoch": 2.613368865200977, - "grad_norm": 0.4501911900625063, - "learning_rate": 3.2332514173694396e-06, - "loss": 0.0395, - "step": 5884 - }, - { - "epoch": 2.6138130135465243, - "grad_norm": 0.4234499617743984, - "learning_rate": 3.2314380659330246e-06, - "loss": 0.0301, - "step": 5885 - }, - { - "epoch": 2.614257161892072, - "grad_norm": 0.4318219863554864, - "learning_rate": 3.22962498034151e-06, - "loss": 0.0294, - "step": 5886 - }, - { - "epoch": 2.6147013102376193, - "grad_norm": 0.503946757308322, - "learning_rate": 3.227812160867436e-06, - "loss": 0.0394, - "step": 5887 - }, - { - "epoch": 2.615145458583167, - "grad_norm": 0.3890770124231559, - "learning_rate": 3.2259996077832976e-06, - "loss": 0.0267, - "step": 5888 - }, - { - "epoch": 2.6155896069287143, - "grad_norm": 0.3697580136655012, - "learning_rate": 3.2241873213615514e-06, - "loss": 0.034, - "step": 5889 - }, - { - "epoch": 2.6160337552742616, - "grad_norm": 0.5649235090654835, - "learning_rate": 3.2223753018746186e-06, - "loss": 0.0272, - "step": 5890 - }, - { - "epoch": 2.616477903619809, - "grad_norm": 0.3708345448824024, - "learning_rate": 3.220563549594874e-06, - "loss": 0.035, - "step": 5891 - }, - { - "epoch": 2.616922051965356, - "grad_norm": 0.3432122520668072, - "learning_rate": 3.2187520647946547e-06, - "loss": 0.0283, - "step": 5892 - }, - { - "epoch": 2.617366200310904, - "grad_norm": 0.42022838385345057, - "learning_rate": 3.2169408477462594e-06, - "loss": 0.0364, - "step": 5893 - }, - { - "epoch": 2.6178103486564512, - "grad_norm": 0.44719094760718237, - "learning_rate": 3.2151298987219437e-06, - "loss": 0.0325, - "step": 5894 - }, - { - "epoch": 2.6182544970019985, - "grad_norm": 0.35618319174657215, - "learning_rate": 3.2133192179939215e-06, - "loss": 0.0256, - "step": 5895 - }, - { - "epoch": 2.6186986453475463, - "grad_norm": 0.3925862521483163, - "learning_rate": 3.2115088058343725e-06, - "loss": 0.0282, - "step": 5896 - }, - { - "epoch": 2.6191427936930936, - "grad_norm": 0.45474329229543986, - "learning_rate": 3.209698662515432e-06, - "loss": 0.0296, - "step": 5897 - }, - { - "epoch": 2.619586942038641, - "grad_norm": 0.3953757491585112, - "learning_rate": 3.2078887883091948e-06, - "loss": 0.0422, - "step": 5898 - }, - { - "epoch": 2.620031090384188, - "grad_norm": 0.36604722451208876, - "learning_rate": 3.2060791834877136e-06, - "loss": 0.029, - "step": 5899 - }, - { - "epoch": 2.620475238729736, - "grad_norm": 0.32800664474353197, - "learning_rate": 3.204269848323004e-06, - "loss": 0.0295, - "step": 5900 - }, - { - "epoch": 2.620919387075283, - "grad_norm": 0.5458287273191036, - "learning_rate": 3.2024607830870424e-06, - "loss": 0.0475, - "step": 5901 - }, - { - "epoch": 2.6213635354208304, - "grad_norm": 0.43819732935782096, - "learning_rate": 3.2006519880517597e-06, - "loss": 0.0452, - "step": 5902 - }, - { - "epoch": 2.621807683766378, - "grad_norm": 0.36142275590555617, - "learning_rate": 3.1988434634890476e-06, - "loss": 0.0277, - "step": 5903 - }, - { - "epoch": 2.6222518321119255, - "grad_norm": 0.4477755001983317, - "learning_rate": 3.197035209670761e-06, - "loss": 0.0294, - "step": 5904 - }, - { - "epoch": 2.6226959804574728, - "grad_norm": 0.46965829732778464, - "learning_rate": 3.1952272268687083e-06, - "loss": 0.0336, - "step": 5905 - }, - { - "epoch": 2.62314012880302, - "grad_norm": 0.39994107522556216, - "learning_rate": 3.19341951535466e-06, - "loss": 0.0214, - "step": 5906 - }, - { - "epoch": 2.6235842771485673, - "grad_norm": 0.4238276596672287, - "learning_rate": 3.1916120754003475e-06, - "loss": 0.0269, - "step": 5907 - }, - { - "epoch": 2.624028425494115, - "grad_norm": 1.8594679605411257, - "learning_rate": 3.1898049072774605e-06, - "loss": 0.0367, - "step": 5908 - }, - { - "epoch": 2.6244725738396624, - "grad_norm": 0.31000445284347417, - "learning_rate": 3.1879980112576457e-06, - "loss": 0.0248, - "step": 5909 - }, - { - "epoch": 2.62491672218521, - "grad_norm": 0.40069203774397927, - "learning_rate": 3.1861913876125093e-06, - "loss": 0.0314, - "step": 5910 - }, - { - "epoch": 2.6253608705307574, - "grad_norm": 0.39410237773498447, - "learning_rate": 3.1843850366136198e-06, - "loss": 0.0293, - "step": 5911 - }, - { - "epoch": 2.6258050188763047, - "grad_norm": 0.3268432306409947, - "learning_rate": 3.182578958532499e-06, - "loss": 0.0266, - "step": 5912 - }, - { - "epoch": 2.626249167221852, - "grad_norm": 0.45108591338261844, - "learning_rate": 3.180773153640635e-06, - "loss": 0.0341, - "step": 5913 - }, - { - "epoch": 2.6266933155673993, - "grad_norm": 0.43982934139295143, - "learning_rate": 3.178967622209469e-06, - "loss": 0.0328, - "step": 5914 - }, - { - "epoch": 2.627137463912947, - "grad_norm": 0.34491560520514963, - "learning_rate": 3.177162364510404e-06, - "loss": 0.0243, - "step": 5915 - }, - { - "epoch": 2.6275816122584943, - "grad_norm": 0.404112232349346, - "learning_rate": 3.175357380814799e-06, - "loss": 0.0233, - "step": 5916 - }, - { - "epoch": 2.628025760604042, - "grad_norm": 0.4687180614976695, - "learning_rate": 3.1735526713939757e-06, - "loss": 0.0337, - "step": 5917 - }, - { - "epoch": 2.6284699089495893, - "grad_norm": 0.37747833327236135, - "learning_rate": 3.1717482365192106e-06, - "loss": 0.0263, - "step": 5918 - }, - { - "epoch": 2.6289140572951366, - "grad_norm": 0.38430475009185994, - "learning_rate": 3.1699440764617432e-06, - "loss": 0.0398, - "step": 5919 - }, - { - "epoch": 2.629358205640684, - "grad_norm": 0.39716398492985894, - "learning_rate": 3.1681401914927678e-06, - "loss": 0.0324, - "step": 5920 - }, - { - "epoch": 2.629802353986231, - "grad_norm": 0.40985834454742587, - "learning_rate": 3.1663365818834406e-06, - "loss": 0.0365, - "step": 5921 - }, - { - "epoch": 2.630246502331779, - "grad_norm": 0.32571897096876656, - "learning_rate": 3.1645332479048734e-06, - "loss": 0.03, - "step": 5922 - }, - { - "epoch": 2.630690650677326, - "grad_norm": 0.41303965455433855, - "learning_rate": 3.1627301898281364e-06, - "loss": 0.0256, - "step": 5923 - }, - { - "epoch": 2.6311347990228735, - "grad_norm": 0.4244488216427459, - "learning_rate": 3.1609274079242625e-06, - "loss": 0.0383, - "step": 5924 - }, - { - "epoch": 2.6315789473684212, - "grad_norm": 0.5719404451011891, - "learning_rate": 3.159124902464241e-06, - "loss": 0.0549, - "step": 5925 - }, - { - "epoch": 2.6320230957139685, - "grad_norm": 0.4271035366652883, - "learning_rate": 3.1573226737190164e-06, - "loss": 0.0365, - "step": 5926 - }, - { - "epoch": 2.632467244059516, - "grad_norm": 0.3779054549587856, - "learning_rate": 3.155520721959496e-06, - "loss": 0.0338, - "step": 5927 - }, - { - "epoch": 2.632911392405063, - "grad_norm": 0.3266233056591113, - "learning_rate": 3.1537190474565437e-06, - "loss": 0.0219, - "step": 5928 - }, - { - "epoch": 2.633355540750611, - "grad_norm": 0.6026007410470748, - "learning_rate": 3.151917650480979e-06, - "loss": 0.0593, - "step": 5929 - }, - { - "epoch": 2.633799689096158, - "grad_norm": 0.47698196221281425, - "learning_rate": 3.1501165313035877e-06, - "loss": 0.0393, - "step": 5930 - }, - { - "epoch": 2.6342438374417054, - "grad_norm": 0.47225489877722016, - "learning_rate": 3.148315690195104e-06, - "loss": 0.036, - "step": 5931 - }, - { - "epoch": 2.634687985787253, - "grad_norm": 0.32874590760012345, - "learning_rate": 3.146515127426228e-06, - "loss": 0.021, - "step": 5932 - }, - { - "epoch": 2.6351321341328005, - "grad_norm": 0.4293763106482794, - "learning_rate": 3.144714843267613e-06, - "loss": 0.0334, - "step": 5933 - }, - { - "epoch": 2.6355762824783477, - "grad_norm": 0.34918767908800713, - "learning_rate": 3.142914837989873e-06, - "loss": 0.0235, - "step": 5934 - }, - { - "epoch": 2.636020430823895, - "grad_norm": 0.514273957879937, - "learning_rate": 3.1411151118635774e-06, - "loss": 0.0335, - "step": 5935 - }, - { - "epoch": 2.6364645791694423, - "grad_norm": 0.42893295828776457, - "learning_rate": 3.1393156651592597e-06, - "loss": 0.0271, - "step": 5936 - }, - { - "epoch": 2.63690872751499, - "grad_norm": 0.5880664186723723, - "learning_rate": 3.137516498147405e-06, - "loss": 0.0346, - "step": 5937 - }, - { - "epoch": 2.6373528758605373, - "grad_norm": 0.4554614929699438, - "learning_rate": 3.1357176110984578e-06, - "loss": 0.0466, - "step": 5938 - }, - { - "epoch": 2.637797024206085, - "grad_norm": 0.4626649260029378, - "learning_rate": 3.1339190042828227e-06, - "loss": 0.0325, - "step": 5939 - }, - { - "epoch": 2.6382411725516324, - "grad_norm": 0.385619247407723, - "learning_rate": 3.132120677970859e-06, - "loss": 0.0317, - "step": 5940 - }, - { - "epoch": 2.6386853208971797, - "grad_norm": 0.772804476594651, - "learning_rate": 3.1303226324328896e-06, - "loss": 0.041, - "step": 5941 - }, - { - "epoch": 2.639129469242727, - "grad_norm": 0.7245537321538326, - "learning_rate": 3.1285248679391866e-06, - "loss": 0.0335, - "step": 5942 - }, - { - "epoch": 2.6395736175882742, - "grad_norm": 0.4668910845302412, - "learning_rate": 3.1267273847599888e-06, - "loss": 0.0282, - "step": 5943 - }, - { - "epoch": 2.640017765933822, - "grad_norm": 0.4437588379794779, - "learning_rate": 3.1249301831654842e-06, - "loss": 0.0248, - "step": 5944 - }, - { - "epoch": 2.6404619142793693, - "grad_norm": 0.391809580171727, - "learning_rate": 3.123133263425827e-06, - "loss": 0.0218, - "step": 5945 - }, - { - "epoch": 2.6409060626249166, - "grad_norm": 0.4573419094850795, - "learning_rate": 3.1213366258111207e-06, - "loss": 0.0377, - "step": 5946 - }, - { - "epoch": 2.6413502109704643, - "grad_norm": 0.3567944286524997, - "learning_rate": 3.1195402705914337e-06, - "loss": 0.0244, - "step": 5947 - }, - { - "epoch": 2.6417943593160116, - "grad_norm": 0.5003131202018632, - "learning_rate": 3.1177441980367873e-06, - "loss": 0.0389, - "step": 5948 - }, - { - "epoch": 2.642238507661559, - "grad_norm": 0.5664593011176533, - "learning_rate": 3.115948408417162e-06, - "loss": 0.027, - "step": 5949 - }, - { - "epoch": 2.642682656007106, - "grad_norm": 0.4398029849585822, - "learning_rate": 3.1141529020024964e-06, - "loss": 0.0306, - "step": 5950 - }, - { - "epoch": 2.643126804352654, - "grad_norm": 0.3797625962528218, - "learning_rate": 3.1123576790626825e-06, - "loss": 0.0266, - "step": 5951 - }, - { - "epoch": 2.643570952698201, - "grad_norm": 0.41053216679804944, - "learning_rate": 3.1105627398675743e-06, - "loss": 0.0352, - "step": 5952 - }, - { - "epoch": 2.6440151010437485, - "grad_norm": 0.3836633925963885, - "learning_rate": 3.1087680846869844e-06, - "loss": 0.0324, - "step": 5953 - }, - { - "epoch": 2.644459249389296, - "grad_norm": 0.439056518784003, - "learning_rate": 3.1069737137906776e-06, - "loss": 0.042, - "step": 5954 - }, - { - "epoch": 2.6449033977348435, - "grad_norm": 0.45357743660503874, - "learning_rate": 3.1051796274483776e-06, - "loss": 0.0343, - "step": 5955 - }, - { - "epoch": 2.645347546080391, - "grad_norm": 0.38982067943155413, - "learning_rate": 3.1033858259297677e-06, - "loss": 0.0374, - "step": 5956 - }, - { - "epoch": 2.645791694425938, - "grad_norm": 0.36926658548126234, - "learning_rate": 3.1015923095044844e-06, - "loss": 0.0307, - "step": 5957 - }, - { - "epoch": 2.646235842771486, - "grad_norm": 0.5321168036178672, - "learning_rate": 3.0997990784421273e-06, - "loss": 0.0571, - "step": 5958 - }, - { - "epoch": 2.646679991117033, - "grad_norm": 0.46761980336540304, - "learning_rate": 3.0980061330122463e-06, - "loss": 0.0398, - "step": 5959 - }, - { - "epoch": 2.6471241394625804, - "grad_norm": 0.41367845862555414, - "learning_rate": 3.096213473484354e-06, - "loss": 0.0211, - "step": 5960 - }, - { - "epoch": 2.647568287808128, - "grad_norm": 0.42099626188495787, - "learning_rate": 3.094421100127916e-06, - "loss": 0.0365, - "step": 5961 - }, - { - "epoch": 2.6480124361536754, - "grad_norm": 0.4995405826324344, - "learning_rate": 3.092629013212356e-06, - "loss": 0.0394, - "step": 5962 - }, - { - "epoch": 2.6484565844992227, - "grad_norm": 0.4212882639660684, - "learning_rate": 3.090837213007054e-06, - "loss": 0.0357, - "step": 5963 - }, - { - "epoch": 2.64890073284477, - "grad_norm": 0.4419706138728259, - "learning_rate": 3.0890456997813534e-06, - "loss": 0.0382, - "step": 5964 - }, - { - "epoch": 2.6493448811903173, - "grad_norm": 0.5281379267202536, - "learning_rate": 3.087254473804544e-06, - "loss": 0.0442, - "step": 5965 - }, - { - "epoch": 2.649789029535865, - "grad_norm": 0.9881997927190076, - "learning_rate": 3.0854635353458795e-06, - "loss": 0.0348, - "step": 5966 - }, - { - "epoch": 2.6502331778814123, - "grad_norm": 0.4051851073824622, - "learning_rate": 3.083672884674568e-06, - "loss": 0.0328, - "step": 5967 - }, - { - "epoch": 2.65067732622696, - "grad_norm": 0.43295642683770935, - "learning_rate": 3.081882522059774e-06, - "loss": 0.0445, - "step": 5968 - }, - { - "epoch": 2.6511214745725074, - "grad_norm": 0.7752250617010704, - "learning_rate": 3.0800924477706185e-06, - "loss": 0.0366, - "step": 5969 - }, - { - "epoch": 2.6515656229180546, - "grad_norm": 0.551166830656724, - "learning_rate": 3.0783026620761846e-06, - "loss": 0.0313, - "step": 5970 - }, - { - "epoch": 2.652009771263602, - "grad_norm": 0.35152614881285826, - "learning_rate": 3.076513165245504e-06, - "loss": 0.0279, - "step": 5971 - }, - { - "epoch": 2.6524539196091492, - "grad_norm": 0.39759843537124434, - "learning_rate": 3.0747239575475674e-06, - "loss": 0.0363, - "step": 5972 - }, - { - "epoch": 2.652898067954697, - "grad_norm": 0.4918571550335759, - "learning_rate": 3.072935039251327e-06, - "loss": 0.03, - "step": 5973 - }, - { - "epoch": 2.6533422163002442, - "grad_norm": 0.3903119645882834, - "learning_rate": 3.071146410625682e-06, - "loss": 0.0262, - "step": 5974 - }, - { - "epoch": 2.6537863646457915, - "grad_norm": 0.3990290251739691, - "learning_rate": 3.0693580719395e-06, - "loss": 0.0457, - "step": 5975 - }, - { - "epoch": 2.6542305129913393, - "grad_norm": 0.4382406546867813, - "learning_rate": 3.067570023461594e-06, - "loss": 0.0345, - "step": 5976 - }, - { - "epoch": 2.6546746613368866, - "grad_norm": 0.4165975886070993, - "learning_rate": 3.065782265460741e-06, - "loss": 0.0273, - "step": 5977 - }, - { - "epoch": 2.655118809682434, - "grad_norm": 0.5153364937600957, - "learning_rate": 3.06399479820567e-06, - "loss": 0.0386, - "step": 5978 - }, - { - "epoch": 2.655562958027981, - "grad_norm": 0.5238135987655357, - "learning_rate": 3.062207621965067e-06, - "loss": 0.0352, - "step": 5979 - }, - { - "epoch": 2.656007106373529, - "grad_norm": 0.427195897063526, - "learning_rate": 3.0604207370075743e-06, - "loss": 0.0252, - "step": 5980 - }, - { - "epoch": 2.656451254719076, - "grad_norm": 0.46584491776786274, - "learning_rate": 3.0586341436017954e-06, - "loss": 0.0476, - "step": 5981 - }, - { - "epoch": 2.6568954030646235, - "grad_norm": 0.43759944048446714, - "learning_rate": 3.056847842016282e-06, - "loss": 0.0273, - "step": 5982 - }, - { - "epoch": 2.657339551410171, - "grad_norm": 0.47962982437840673, - "learning_rate": 3.0550618325195457e-06, - "loss": 0.0292, - "step": 5983 - }, - { - "epoch": 2.6577836997557185, - "grad_norm": 0.40526954332365667, - "learning_rate": 3.053276115380055e-06, - "loss": 0.0249, - "step": 5984 - }, - { - "epoch": 2.6582278481012658, - "grad_norm": 0.5052423442608693, - "learning_rate": 3.0514906908662346e-06, - "loss": 0.0247, - "step": 5985 - }, - { - "epoch": 2.658671996446813, - "grad_norm": 0.5146431419630214, - "learning_rate": 3.0497055592464596e-06, - "loss": 0.0381, - "step": 5986 - }, - { - "epoch": 2.659116144792361, - "grad_norm": 0.40107763434385424, - "learning_rate": 3.04792072078907e-06, - "loss": 0.0305, - "step": 5987 - }, - { - "epoch": 2.659560293137908, - "grad_norm": 0.5005148653434598, - "learning_rate": 3.046136175762357e-06, - "loss": 0.0308, - "step": 5988 - }, - { - "epoch": 2.6600044414834554, - "grad_norm": 0.5429487251371317, - "learning_rate": 3.0443519244345666e-06, - "loss": 0.0359, - "step": 5989 - }, - { - "epoch": 2.660448589829003, - "grad_norm": 0.5062800108359077, - "learning_rate": 3.0425679670739026e-06, - "loss": 0.0409, - "step": 5990 - }, - { - "epoch": 2.6608927381745504, - "grad_norm": 0.37298776061190037, - "learning_rate": 3.040784303948523e-06, - "loss": 0.0314, - "step": 5991 - }, - { - "epoch": 2.6613368865200977, - "grad_norm": 0.6131690894387388, - "learning_rate": 3.0390009353265458e-06, - "loss": 0.0276, - "step": 5992 - }, - { - "epoch": 2.661781034865645, - "grad_norm": 0.4287293989906706, - "learning_rate": 3.0372178614760382e-06, - "loss": 0.038, - "step": 5993 - }, - { - "epoch": 2.6622251832111923, - "grad_norm": 0.494407979190499, - "learning_rate": 3.035435082665029e-06, - "loss": 0.0328, - "step": 5994 - }, - { - "epoch": 2.66266933155674, - "grad_norm": 0.4258763284446213, - "learning_rate": 3.033652599161499e-06, - "loss": 0.0268, - "step": 5995 - }, - { - "epoch": 2.6631134799022873, - "grad_norm": 0.4705172448160009, - "learning_rate": 3.0318704112333847e-06, - "loss": 0.0314, - "step": 5996 - }, - { - "epoch": 2.663557628247835, - "grad_norm": 0.4749657917164724, - "learning_rate": 3.0300885191485797e-06, - "loss": 0.0363, - "step": 5997 - }, - { - "epoch": 2.6640017765933823, - "grad_norm": 0.7383043422980394, - "learning_rate": 3.0283069231749344e-06, - "loss": 0.0361, - "step": 5998 - }, - { - "epoch": 2.6644459249389296, - "grad_norm": 0.38814202685884, - "learning_rate": 3.026525623580252e-06, - "loss": 0.026, - "step": 5999 - }, - { - "epoch": 2.664890073284477, - "grad_norm": 0.3764976664025451, - "learning_rate": 3.0247446206322916e-06, - "loss": 0.0321, - "step": 6000 - }, - { - "epoch": 2.665334221630024, - "grad_norm": 0.45096374436929443, - "learning_rate": 3.0229639145987687e-06, - "loss": 0.034, - "step": 6001 - }, - { - "epoch": 2.665778369975572, - "grad_norm": 0.36848556078596323, - "learning_rate": 3.021183505747354e-06, - "loss": 0.031, - "step": 6002 - }, - { - "epoch": 2.6662225183211192, - "grad_norm": 0.3699352216981277, - "learning_rate": 3.0194033943456696e-06, - "loss": 0.028, - "step": 6003 - }, - { - "epoch": 2.6666666666666665, - "grad_norm": 0.381719196592165, - "learning_rate": 3.0176235806613008e-06, - "loss": 0.0245, - "step": 6004 - }, - { - "epoch": 2.6671108150122143, - "grad_norm": 0.4602734928315702, - "learning_rate": 3.0158440649617836e-06, - "loss": 0.039, - "step": 6005 - }, - { - "epoch": 2.6675549633577615, - "grad_norm": 0.34646717181854414, - "learning_rate": 3.014064847514609e-06, - "loss": 0.0248, - "step": 6006 - }, - { - "epoch": 2.667999111703309, - "grad_norm": 0.41630402405704753, - "learning_rate": 3.0122859285872214e-06, - "loss": 0.0364, - "step": 6007 - }, - { - "epoch": 2.668443260048856, - "grad_norm": 0.39925464642353814, - "learning_rate": 3.010507308447025e-06, - "loss": 0.0286, - "step": 6008 - }, - { - "epoch": 2.668887408394404, - "grad_norm": 0.5709915993898592, - "learning_rate": 3.0087289873613746e-06, - "loss": 0.0409, - "step": 6009 - }, - { - "epoch": 2.669331556739951, - "grad_norm": 0.5153833490415598, - "learning_rate": 3.0069509655975835e-06, - "loss": 0.0388, - "step": 6010 - }, - { - "epoch": 2.6697757050854984, - "grad_norm": 0.37873158972984705, - "learning_rate": 3.0051732434229185e-06, - "loss": 0.0254, - "step": 6011 - }, - { - "epoch": 2.670219853431046, - "grad_norm": 0.4404773311355675, - "learning_rate": 3.003395821104602e-06, - "loss": 0.0308, - "step": 6012 - }, - { - "epoch": 2.6706640017765935, - "grad_norm": 0.5123588004903873, - "learning_rate": 3.001618698909809e-06, - "loss": 0.032, - "step": 6013 - }, - { - "epoch": 2.6711081501221408, - "grad_norm": 0.41512949784084213, - "learning_rate": 2.999841877105672e-06, - "loss": 0.0277, - "step": 6014 - }, - { - "epoch": 2.671552298467688, - "grad_norm": 0.4025208152025212, - "learning_rate": 2.9980653559592775e-06, - "loss": 0.0313, - "step": 6015 - }, - { - "epoch": 2.671996446813236, - "grad_norm": 0.5344242242777247, - "learning_rate": 2.996289135737668e-06, - "loss": 0.0351, - "step": 6016 - }, - { - "epoch": 2.672440595158783, - "grad_norm": 0.4264946062568879, - "learning_rate": 2.994513216707838e-06, - "loss": 0.0263, - "step": 6017 - }, - { - "epoch": 2.6728847435043304, - "grad_norm": 0.3811830713887272, - "learning_rate": 2.992737599136739e-06, - "loss": 0.0268, - "step": 6018 - }, - { - "epoch": 2.673328891849878, - "grad_norm": 0.386704376318305, - "learning_rate": 2.9909622832912767e-06, - "loss": 0.0315, - "step": 6019 - }, - { - "epoch": 2.6737730401954254, - "grad_norm": 0.4136667792416568, - "learning_rate": 2.989187269438308e-06, - "loss": 0.0307, - "step": 6020 - }, - { - "epoch": 2.6742171885409727, - "grad_norm": 0.4174591961736545, - "learning_rate": 2.987412557844653e-06, - "loss": 0.0319, - "step": 6021 - }, - { - "epoch": 2.67466133688652, - "grad_norm": 0.42669426474826655, - "learning_rate": 2.985638148777078e-06, - "loss": 0.0288, - "step": 6022 - }, - { - "epoch": 2.6751054852320673, - "grad_norm": 0.3748882818930551, - "learning_rate": 2.983864042502308e-06, - "loss": 0.0263, - "step": 6023 - }, - { - "epoch": 2.675549633577615, - "grad_norm": 0.4706253013201654, - "learning_rate": 2.9820902392870197e-06, - "loss": 0.0284, - "step": 6024 - }, - { - "epoch": 2.6759937819231623, - "grad_norm": 0.4662239351305156, - "learning_rate": 2.980316739397847e-06, - "loss": 0.0358, - "step": 6025 - }, - { - "epoch": 2.67643793026871, - "grad_norm": 0.314532858998878, - "learning_rate": 2.9785435431013755e-06, - "loss": 0.0232, - "step": 6026 - }, - { - "epoch": 2.6768820786142573, - "grad_norm": 0.5337532314358315, - "learning_rate": 2.97677065066415e-06, - "loss": 0.0333, - "step": 6027 - }, - { - "epoch": 2.6773262269598046, - "grad_norm": 0.5956371023489798, - "learning_rate": 2.9749980623526633e-06, - "loss": 0.0381, - "step": 6028 - }, - { - "epoch": 2.677770375305352, - "grad_norm": 0.3325969522067864, - "learning_rate": 2.9732257784333673e-06, - "loss": 0.0249, - "step": 6029 - }, - { - "epoch": 2.678214523650899, - "grad_norm": 0.44226896551023326, - "learning_rate": 2.9714537991726656e-06, - "loss": 0.0321, - "step": 6030 - }, - { - "epoch": 2.678658671996447, - "grad_norm": 0.407289289939863, - "learning_rate": 2.9696821248369152e-06, - "loss": 0.0294, - "step": 6031 - }, - { - "epoch": 2.679102820341994, - "grad_norm": 0.42539794841254347, - "learning_rate": 2.9679107556924314e-06, - "loss": 0.0312, - "step": 6032 - }, - { - "epoch": 2.6795469686875415, - "grad_norm": 0.3452430100807996, - "learning_rate": 2.966139692005481e-06, - "loss": 0.0311, - "step": 6033 - }, - { - "epoch": 2.6799911170330892, - "grad_norm": 0.47114526962385006, - "learning_rate": 2.9643689340422844e-06, - "loss": 0.0351, - "step": 6034 - }, - { - "epoch": 2.6804352653786365, - "grad_norm": 0.3056012283706184, - "learning_rate": 2.962598482069015e-06, - "loss": 0.0254, - "step": 6035 - }, - { - "epoch": 2.680879413724184, - "grad_norm": 0.39834900420012637, - "learning_rate": 2.960828336351804e-06, - "loss": 0.0329, - "step": 6036 - }, - { - "epoch": 2.681323562069731, - "grad_norm": 0.4921282055431307, - "learning_rate": 2.9590584971567327e-06, - "loss": 0.0378, - "step": 6037 - }, - { - "epoch": 2.681767710415279, - "grad_norm": 0.401942181093132, - "learning_rate": 2.957288964749839e-06, - "loss": 0.0282, - "step": 6038 - }, - { - "epoch": 2.682211858760826, - "grad_norm": 0.5998754085660062, - "learning_rate": 2.955519739397114e-06, - "loss": 0.04, - "step": 6039 - }, - { - "epoch": 2.6826560071063734, - "grad_norm": 0.5229301227786665, - "learning_rate": 2.9537508213645026e-06, - "loss": 0.0439, - "step": 6040 - }, - { - "epoch": 2.683100155451921, - "grad_norm": 0.4131624612623256, - "learning_rate": 2.9519822109179007e-06, - "loss": 0.0324, - "step": 6041 - }, - { - "epoch": 2.6835443037974684, - "grad_norm": 0.3861923836427622, - "learning_rate": 2.950213908323164e-06, - "loss": 0.0277, - "step": 6042 - }, - { - "epoch": 2.6839884521430157, - "grad_norm": 0.4648268575218616, - "learning_rate": 2.948445913846094e-06, - "loss": 0.0289, - "step": 6043 - }, - { - "epoch": 2.684432600488563, - "grad_norm": 0.4732042527119129, - "learning_rate": 2.9466782277524554e-06, - "loss": 0.0309, - "step": 6044 - }, - { - "epoch": 2.6848767488341103, - "grad_norm": 0.3982899300782419, - "learning_rate": 2.944910850307958e-06, - "loss": 0.0314, - "step": 6045 - }, - { - "epoch": 2.685320897179658, - "grad_norm": 0.4153109085037015, - "learning_rate": 2.9431437817782705e-06, - "loss": 0.0339, - "step": 6046 - }, - { - "epoch": 2.6857650455252053, - "grad_norm": 0.43324892108850316, - "learning_rate": 2.9413770224290126e-06, - "loss": 0.0311, - "step": 6047 - }, - { - "epoch": 2.686209193870753, - "grad_norm": 0.28406951308335854, - "learning_rate": 2.9396105725257563e-06, - "loss": 0.0231, - "step": 6048 - }, - { - "epoch": 2.6866533422163004, - "grad_norm": 0.39298370689744644, - "learning_rate": 2.9378444323340316e-06, - "loss": 0.0304, - "step": 6049 - }, - { - "epoch": 2.6870974905618477, - "grad_norm": 0.4485933834660012, - "learning_rate": 2.9360786021193192e-06, - "loss": 0.0285, - "step": 6050 - }, - { - "epoch": 2.687541638907395, - "grad_norm": 0.35520292176022183, - "learning_rate": 2.934313082147053e-06, - "loss": 0.0226, - "step": 6051 - }, - { - "epoch": 2.6879857872529422, - "grad_norm": 0.44687364826159626, - "learning_rate": 2.93254787268262e-06, - "loss": 0.0357, - "step": 6052 - }, - { - "epoch": 2.68842993559849, - "grad_norm": 0.5271674075113904, - "learning_rate": 2.930782973991362e-06, - "loss": 0.0377, - "step": 6053 - }, - { - "epoch": 2.6888740839440373, - "grad_norm": 0.4626973474509376, - "learning_rate": 2.929018386338571e-06, - "loss": 0.0502, - "step": 6054 - }, - { - "epoch": 2.689318232289585, - "grad_norm": 0.4073720832807278, - "learning_rate": 2.927254109989499e-06, - "loss": 0.0328, - "step": 6055 - }, - { - "epoch": 2.6897623806351323, - "grad_norm": 0.38757290739790723, - "learning_rate": 2.9254901452093424e-06, - "loss": 0.031, - "step": 6056 - }, - { - "epoch": 2.6902065289806796, - "grad_norm": 0.3981460707866334, - "learning_rate": 2.923726492263258e-06, - "loss": 0.0329, - "step": 6057 - }, - { - "epoch": 2.690650677326227, - "grad_norm": 0.34586475207518175, - "learning_rate": 2.9219631514163514e-06, - "loss": 0.0229, - "step": 6058 - }, - { - "epoch": 2.691094825671774, - "grad_norm": 0.3814494157202918, - "learning_rate": 2.9202001229336817e-06, - "loss": 0.0252, - "step": 6059 - }, - { - "epoch": 2.691538974017322, - "grad_norm": 0.5320910876205124, - "learning_rate": 2.9184374070802633e-06, - "loss": 0.0528, - "step": 6060 - }, - { - "epoch": 2.691983122362869, - "grad_norm": 0.5320769049550915, - "learning_rate": 2.916675004121062e-06, - "loss": 0.0374, - "step": 6061 - }, - { - "epoch": 2.6924272707084165, - "grad_norm": 0.46814435114343156, - "learning_rate": 2.9149129143209974e-06, - "loss": 0.0366, - "step": 6062 - }, - { - "epoch": 2.692871419053964, - "grad_norm": 0.46820101387242424, - "learning_rate": 2.9131511379449428e-06, - "loss": 0.0398, - "step": 6063 - }, - { - "epoch": 2.6933155673995115, - "grad_norm": 0.42299234767365035, - "learning_rate": 2.9113896752577205e-06, - "loss": 0.0306, - "step": 6064 - }, - { - "epoch": 2.693759715745059, - "grad_norm": 0.3540454635472545, - "learning_rate": 2.9096285265241063e-06, - "loss": 0.0253, - "step": 6065 - }, - { - "epoch": 2.694203864090606, - "grad_norm": 0.47552195678010667, - "learning_rate": 2.9078676920088378e-06, - "loss": 0.0304, - "step": 6066 - }, - { - "epoch": 2.694648012436154, - "grad_norm": 0.41330422015572915, - "learning_rate": 2.9061071719765933e-06, - "loss": 0.0334, - "step": 6067 - }, - { - "epoch": 2.695092160781701, - "grad_norm": 0.3903849704575043, - "learning_rate": 2.9043469666920088e-06, - "loss": 0.0261, - "step": 6068 - }, - { - "epoch": 2.6955363091272484, - "grad_norm": 0.37706353026097844, - "learning_rate": 2.902587076419676e-06, - "loss": 0.0294, - "step": 6069 - }, - { - "epoch": 2.695980457472796, - "grad_norm": 0.3862061240373804, - "learning_rate": 2.900827501424133e-06, - "loss": 0.0354, - "step": 6070 - }, - { - "epoch": 2.6964246058183434, - "grad_norm": 0.4432184785361431, - "learning_rate": 2.899068241969876e-06, - "loss": 0.032, - "step": 6071 - }, - { - "epoch": 2.6968687541638907, - "grad_norm": 0.4814495852537153, - "learning_rate": 2.8973092983213493e-06, - "loss": 0.0335, - "step": 6072 - }, - { - "epoch": 2.697312902509438, - "grad_norm": 0.444449262745695, - "learning_rate": 2.8955506707429545e-06, - "loss": 0.0428, - "step": 6073 - }, - { - "epoch": 2.6977570508549853, - "grad_norm": 0.38128392307642117, - "learning_rate": 2.8937923594990435e-06, - "loss": 0.0253, - "step": 6074 - }, - { - "epoch": 2.698201199200533, - "grad_norm": 0.4767935660247025, - "learning_rate": 2.8920343648539174e-06, - "loss": 0.0261, - "step": 6075 - }, - { - "epoch": 2.6986453475460803, - "grad_norm": 0.5621830464805478, - "learning_rate": 2.8902766870718347e-06, - "loss": 0.0478, - "step": 6076 - }, - { - "epoch": 2.699089495891628, - "grad_norm": 0.34049032210297114, - "learning_rate": 2.8885193264170036e-06, - "loss": 0.0294, - "step": 6077 - }, - { - "epoch": 2.6995336442371753, - "grad_norm": 0.4750456467312549, - "learning_rate": 2.886762283153586e-06, - "loss": 0.0371, - "step": 6078 - }, - { - "epoch": 2.6999777925827226, - "grad_norm": 0.4893300140355996, - "learning_rate": 2.885005557545694e-06, - "loss": 0.0285, - "step": 6079 - }, - { - "epoch": 2.70042194092827, - "grad_norm": 0.4397921048743652, - "learning_rate": 2.8832491498573965e-06, - "loss": 0.0365, - "step": 6080 - }, - { - "epoch": 2.700866089273817, - "grad_norm": 0.4948308694304273, - "learning_rate": 2.8814930603527067e-06, - "loss": 0.0318, - "step": 6081 - }, - { - "epoch": 2.701310237619365, - "grad_norm": 0.4221767743708031, - "learning_rate": 2.8797372892955978e-06, - "loss": 0.0355, - "step": 6082 - }, - { - "epoch": 2.7017543859649122, - "grad_norm": 0.548027854837134, - "learning_rate": 2.877981836949991e-06, - "loss": 0.0286, - "step": 6083 - }, - { - "epoch": 2.7021985343104595, - "grad_norm": 0.50981889703332, - "learning_rate": 2.8762267035797607e-06, - "loss": 0.0346, - "step": 6084 - }, - { - "epoch": 2.7026426826560073, - "grad_norm": 0.49304824695259686, - "learning_rate": 2.8744718894487345e-06, - "loss": 0.0392, - "step": 6085 - }, - { - "epoch": 2.7030868310015546, - "grad_norm": 0.33965710652598363, - "learning_rate": 2.8727173948206905e-06, - "loss": 0.0211, - "step": 6086 - }, - { - "epoch": 2.703530979347102, - "grad_norm": 0.4906097802841706, - "learning_rate": 2.870963219959357e-06, - "loss": 0.0339, - "step": 6087 - }, - { - "epoch": 2.703975127692649, - "grad_norm": 0.45131838531292917, - "learning_rate": 2.869209365128417e-06, - "loss": 0.0327, - "step": 6088 - }, - { - "epoch": 2.704419276038197, - "grad_norm": 0.44310343452513123, - "learning_rate": 2.8674558305915057e-06, - "loss": 0.0413, - "step": 6089 - }, - { - "epoch": 2.704863424383744, - "grad_norm": 0.4694661668866994, - "learning_rate": 2.865702616612208e-06, - "loss": 0.0303, - "step": 6090 - }, - { - "epoch": 2.7053075727292915, - "grad_norm": 0.42182727027273254, - "learning_rate": 2.8639497234540646e-06, - "loss": 0.0322, - "step": 6091 - }, - { - "epoch": 2.705751721074839, - "grad_norm": 0.34911280190910793, - "learning_rate": 2.862197151380561e-06, - "loss": 0.0297, - "step": 6092 - }, - { - "epoch": 2.7061958694203865, - "grad_norm": 0.5070815814024743, - "learning_rate": 2.8604449006551406e-06, - "loss": 0.0535, - "step": 6093 - }, - { - "epoch": 2.7066400177659338, - "grad_norm": 0.6056329049163846, - "learning_rate": 2.8586929715411963e-06, - "loss": 0.0589, - "step": 6094 - }, - { - "epoch": 2.707084166111481, - "grad_norm": 0.42386614097773856, - "learning_rate": 2.8569413643020725e-06, - "loss": 0.0337, - "step": 6095 - }, - { - "epoch": 2.707528314457029, - "grad_norm": 0.5481706485098858, - "learning_rate": 2.855190079201067e-06, - "loss": 0.0346, - "step": 6096 - }, - { - "epoch": 2.707972462802576, - "grad_norm": 0.4317878891723994, - "learning_rate": 2.8534391165014275e-06, - "loss": 0.0447, - "step": 6097 - }, - { - "epoch": 2.7084166111481234, - "grad_norm": 0.4182536684547791, - "learning_rate": 2.8516884764663512e-06, - "loss": 0.0381, - "step": 6098 - }, - { - "epoch": 2.708860759493671, - "grad_norm": 0.39160951294185736, - "learning_rate": 2.849938159358989e-06, - "loss": 0.0261, - "step": 6099 - }, - { - "epoch": 2.7093049078392184, - "grad_norm": 0.3551951087739855, - "learning_rate": 2.848188165442446e-06, - "loss": 0.0293, - "step": 6100 - }, - { - "epoch": 2.7097490561847657, - "grad_norm": 0.4415214034913667, - "learning_rate": 2.846438494979774e-06, - "loss": 0.0374, - "step": 6101 - }, - { - "epoch": 2.710193204530313, - "grad_norm": 0.6570425809867534, - "learning_rate": 2.844689148233979e-06, - "loss": 0.0493, - "step": 6102 - }, - { - "epoch": 2.7106373528758603, - "grad_norm": 0.42365442096686196, - "learning_rate": 2.842940125468019e-06, - "loss": 0.0382, - "step": 6103 - }, - { - "epoch": 2.711081501221408, - "grad_norm": 0.4801471324952032, - "learning_rate": 2.8411914269447984e-06, - "loss": 0.0252, - "step": 6104 - }, - { - "epoch": 2.7115256495669553, - "grad_norm": 0.3451437825598062, - "learning_rate": 2.8394430529271777e-06, - "loss": 0.0299, - "step": 6105 - }, - { - "epoch": 2.711969797912503, - "grad_norm": 0.5219358142266073, - "learning_rate": 2.8376950036779683e-06, - "loss": 0.0295, - "step": 6106 - }, - { - "epoch": 2.7124139462580503, - "grad_norm": 0.3693933163750994, - "learning_rate": 2.8359472794599307e-06, - "loss": 0.0277, - "step": 6107 - }, - { - "epoch": 2.7128580946035976, - "grad_norm": 0.4033904609462883, - "learning_rate": 2.8341998805357796e-06, - "loss": 0.0265, - "step": 6108 - }, - { - "epoch": 2.713302242949145, - "grad_norm": 0.4144874366737247, - "learning_rate": 2.832452807168175e-06, - "loss": 0.0268, - "step": 6109 - }, - { - "epoch": 2.713746391294692, - "grad_norm": 0.5838661054136332, - "learning_rate": 2.8307060596197337e-06, - "loss": 0.0384, - "step": 6110 - }, - { - "epoch": 2.71419053964024, - "grad_norm": 0.5420801857113182, - "learning_rate": 2.8289596381530214e-06, - "loss": 0.0305, - "step": 6111 - }, - { - "epoch": 2.714634687985787, - "grad_norm": 0.42444475722005587, - "learning_rate": 2.8272135430305558e-06, - "loss": 0.041, - "step": 6112 - }, - { - "epoch": 2.7150788363313345, - "grad_norm": 0.36578048462525586, - "learning_rate": 2.825467774514803e-06, - "loss": 0.0224, - "step": 6113 - }, - { - "epoch": 2.7155229846768822, - "grad_norm": 0.33007914592544324, - "learning_rate": 2.823722332868185e-06, - "loss": 0.0252, - "step": 6114 - }, - { - "epoch": 2.7159671330224295, - "grad_norm": 0.5564364593461901, - "learning_rate": 2.821977218353067e-06, - "loss": 0.0304, - "step": 6115 - }, - { - "epoch": 2.716411281367977, - "grad_norm": 0.539249473006622, - "learning_rate": 2.820232431231771e-06, - "loss": 0.0417, - "step": 6116 - }, - { - "epoch": 2.716855429713524, - "grad_norm": 0.39638502515608776, - "learning_rate": 2.818487971766568e-06, - "loss": 0.0276, - "step": 6117 - }, - { - "epoch": 2.717299578059072, - "grad_norm": 0.3923969070351993, - "learning_rate": 2.816743840219681e-06, - "loss": 0.0345, - "step": 6118 - }, - { - "epoch": 2.717743726404619, - "grad_norm": 0.7090345157048296, - "learning_rate": 2.8150000368532826e-06, - "loss": 0.0459, - "step": 6119 - }, - { - "epoch": 2.7181878747501664, - "grad_norm": 0.49769141697815394, - "learning_rate": 2.8132565619294943e-06, - "loss": 0.0312, - "step": 6120 - }, - { - "epoch": 2.718632023095714, - "grad_norm": 0.3567290862892367, - "learning_rate": 2.8115134157103906e-06, - "loss": 0.0427, - "step": 6121 - }, - { - "epoch": 2.7190761714412615, - "grad_norm": 0.41308801601010114, - "learning_rate": 2.809770598457997e-06, - "loss": 0.0308, - "step": 6122 - }, - { - "epoch": 2.7195203197868087, - "grad_norm": 0.42911807998450446, - "learning_rate": 2.8080281104342875e-06, - "loss": 0.0389, - "step": 6123 - }, - { - "epoch": 2.719964468132356, - "grad_norm": 0.3668652303738415, - "learning_rate": 2.8062859519011885e-06, - "loss": 0.031, - "step": 6124 - }, - { - "epoch": 2.7204086164779038, - "grad_norm": 0.4729128881363199, - "learning_rate": 2.8045441231205773e-06, - "loss": 0.0359, - "step": 6125 - }, - { - "epoch": 2.720852764823451, - "grad_norm": 0.49961860986096596, - "learning_rate": 2.802802624354276e-06, - "loss": 0.0284, - "step": 6126 - }, - { - "epoch": 2.7212969131689984, - "grad_norm": 0.38704332861461993, - "learning_rate": 2.8010614558640653e-06, - "loss": 0.0306, - "step": 6127 - }, - { - "epoch": 2.721741061514546, - "grad_norm": 0.36762472796404744, - "learning_rate": 2.7993206179116706e-06, - "loss": 0.0297, - "step": 6128 - }, - { - "epoch": 2.7221852098600934, - "grad_norm": 0.40237829108230383, - "learning_rate": 2.79758011075877e-06, - "loss": 0.0379, - "step": 6129 - }, - { - "epoch": 2.7226293582056407, - "grad_norm": 0.39422460053368863, - "learning_rate": 2.7958399346669916e-06, - "loss": 0.0324, - "step": 6130 - }, - { - "epoch": 2.723073506551188, - "grad_norm": 0.4944769619389698, - "learning_rate": 2.7941000898979153e-06, - "loss": 0.0365, - "step": 6131 - }, - { - "epoch": 2.7235176548967353, - "grad_norm": 0.43625796235808223, - "learning_rate": 2.7923605767130644e-06, - "loss": 0.0385, - "step": 6132 - }, - { - "epoch": 2.723961803242283, - "grad_norm": 0.39500536045009, - "learning_rate": 2.790621395373921e-06, - "loss": 0.0247, - "step": 6133 - }, - { - "epoch": 2.7244059515878303, - "grad_norm": 0.4784483734678597, - "learning_rate": 2.7888825461419124e-06, - "loss": 0.0226, - "step": 6134 - }, - { - "epoch": 2.724850099933378, - "grad_norm": 0.5298205495609496, - "learning_rate": 2.7871440292784167e-06, - "loss": 0.0407, - "step": 6135 - }, - { - "epoch": 2.7252942482789253, - "grad_norm": 0.46575202716742126, - "learning_rate": 2.7854058450447657e-06, - "loss": 0.0412, - "step": 6136 - }, - { - "epoch": 2.7257383966244726, - "grad_norm": 0.4738047535073062, - "learning_rate": 2.783667993702234e-06, - "loss": 0.0385, - "step": 6137 - }, - { - "epoch": 2.72618254497002, - "grad_norm": 0.3869731324535632, - "learning_rate": 2.7819304755120514e-06, - "loss": 0.0295, - "step": 6138 - }, - { - "epoch": 2.726626693315567, - "grad_norm": 0.3652035596631365, - "learning_rate": 2.7801932907353966e-06, - "loss": 0.0295, - "step": 6139 - }, - { - "epoch": 2.727070841661115, - "grad_norm": 0.41533452786136, - "learning_rate": 2.778456439633398e-06, - "loss": 0.0242, - "step": 6140 - }, - { - "epoch": 2.727514990006662, - "grad_norm": 0.35015673870621206, - "learning_rate": 2.776719922467135e-06, - "loss": 0.0313, - "step": 6141 - }, - { - "epoch": 2.7279591383522095, - "grad_norm": 0.48649742529511003, - "learning_rate": 2.7749837394976353e-06, - "loss": 0.0403, - "step": 6142 - }, - { - "epoch": 2.7284032866977572, - "grad_norm": 0.4158217744500075, - "learning_rate": 2.773247890985874e-06, - "loss": 0.0294, - "step": 6143 - }, - { - "epoch": 2.7288474350433045, - "grad_norm": 0.3886529547809444, - "learning_rate": 2.7715123771927817e-06, - "loss": 0.0285, - "step": 6144 - }, - { - "epoch": 2.729291583388852, - "grad_norm": 0.48618253354322, - "learning_rate": 2.7697771983792334e-06, - "loss": 0.0381, - "step": 6145 - }, - { - "epoch": 2.729735731734399, - "grad_norm": 0.4239190944164018, - "learning_rate": 2.7680423548060574e-06, - "loss": 0.028, - "step": 6146 - }, - { - "epoch": 2.730179880079947, - "grad_norm": 0.6790565769570187, - "learning_rate": 2.766307846734032e-06, - "loss": 0.0344, - "step": 6147 - }, - { - "epoch": 2.730624028425494, - "grad_norm": 0.4083045746295625, - "learning_rate": 2.764573674423879e-06, - "loss": 0.0292, - "step": 6148 - }, - { - "epoch": 2.7310681767710414, - "grad_norm": 0.39741625214319276, - "learning_rate": 2.7628398381362765e-06, - "loss": 0.0369, - "step": 6149 - }, - { - "epoch": 2.731512325116589, - "grad_norm": 0.3364754032489686, - "learning_rate": 2.7611063381318483e-06, - "loss": 0.0223, - "step": 6150 - }, - { - "epoch": 2.7319564734621364, - "grad_norm": 0.33915812547001584, - "learning_rate": 2.7593731746711695e-06, - "loss": 0.0276, - "step": 6151 - }, - { - "epoch": 2.7324006218076837, - "grad_norm": 0.3730496797762164, - "learning_rate": 2.757640348014764e-06, - "loss": 0.0274, - "step": 6152 - }, - { - "epoch": 2.732844770153231, - "grad_norm": 0.3751953423117338, - "learning_rate": 2.755907858423108e-06, - "loss": 0.0363, - "step": 6153 - }, - { - "epoch": 2.7332889184987788, - "grad_norm": 0.42372086556075383, - "learning_rate": 2.754175706156619e-06, - "loss": 0.0244, - "step": 6154 - }, - { - "epoch": 2.733733066844326, - "grad_norm": 0.3686003953358143, - "learning_rate": 2.7524438914756714e-06, - "loss": 0.0227, - "step": 6155 - }, - { - "epoch": 2.7341772151898733, - "grad_norm": 0.38850199266907187, - "learning_rate": 2.750712414640588e-06, - "loss": 0.0318, - "step": 6156 - }, - { - "epoch": 2.734621363535421, - "grad_norm": 0.4212753598660164, - "learning_rate": 2.748981275911633e-06, - "loss": 0.027, - "step": 6157 - }, - { - "epoch": 2.7350655118809684, - "grad_norm": 0.3950070118539609, - "learning_rate": 2.747250475549033e-06, - "loss": 0.0347, - "step": 6158 - }, - { - "epoch": 2.7355096602265156, - "grad_norm": 0.5989665019227255, - "learning_rate": 2.745520013812956e-06, - "loss": 0.0413, - "step": 6159 - }, - { - "epoch": 2.735953808572063, - "grad_norm": 0.4076839235525957, - "learning_rate": 2.743789890963516e-06, - "loss": 0.0239, - "step": 6160 - }, - { - "epoch": 2.7363979569176102, - "grad_norm": 0.4896550081248881, - "learning_rate": 2.742060107260781e-06, - "loss": 0.0284, - "step": 6161 - }, - { - "epoch": 2.736842105263158, - "grad_norm": 0.5961993037262533, - "learning_rate": 2.740330662964768e-06, - "loss": 0.0399, - "step": 6162 - }, - { - "epoch": 2.7372862536087053, - "grad_norm": 0.3528802270631205, - "learning_rate": 2.7386015583354414e-06, - "loss": 0.0271, - "step": 6163 - }, - { - "epoch": 2.737730401954253, - "grad_norm": 0.38484518351915703, - "learning_rate": 2.736872793632717e-06, - "loss": 0.0369, - "step": 6164 - }, - { - "epoch": 2.7381745502998003, - "grad_norm": 0.4319515735643185, - "learning_rate": 2.7351443691164537e-06, - "loss": 0.0228, - "step": 6165 - }, - { - "epoch": 2.7386186986453476, - "grad_norm": 0.42098419937374343, - "learning_rate": 2.7334162850464645e-06, - "loss": 0.0292, - "step": 6166 - }, - { - "epoch": 2.739062846990895, - "grad_norm": 0.35177937154537675, - "learning_rate": 2.7316885416825123e-06, - "loss": 0.0218, - "step": 6167 - }, - { - "epoch": 2.739506995336442, - "grad_norm": 0.41712238277892294, - "learning_rate": 2.7299611392843005e-06, - "loss": 0.0368, - "step": 6168 - }, - { - "epoch": 2.73995114368199, - "grad_norm": 0.39108614184700036, - "learning_rate": 2.7282340781114926e-06, - "loss": 0.0261, - "step": 6169 - }, - { - "epoch": 2.740395292027537, - "grad_norm": 0.526808362535383, - "learning_rate": 2.726507358423695e-06, - "loss": 0.0267, - "step": 6170 - }, - { - "epoch": 2.7408394403730845, - "grad_norm": 0.3749087607755556, - "learning_rate": 2.7247809804804593e-06, - "loss": 0.0198, - "step": 6171 - }, - { - "epoch": 2.741283588718632, - "grad_norm": 0.41805354629010627, - "learning_rate": 2.723054944541292e-06, - "loss": 0.0358, - "step": 6172 - }, - { - "epoch": 2.7417277370641795, - "grad_norm": 0.37604430696616314, - "learning_rate": 2.721329250865646e-06, - "loss": 0.0273, - "step": 6173 - }, - { - "epoch": 2.742171885409727, - "grad_norm": 0.4611005258922287, - "learning_rate": 2.719603899712919e-06, - "loss": 0.031, - "step": 6174 - }, - { - "epoch": 2.742616033755274, - "grad_norm": 0.41841947895126935, - "learning_rate": 2.7178788913424635e-06, - "loss": 0.0315, - "step": 6175 - }, - { - "epoch": 2.743060182100822, - "grad_norm": 0.41801694735137535, - "learning_rate": 2.7161542260135797e-06, - "loss": 0.0336, - "step": 6176 - }, - { - "epoch": 2.743504330446369, - "grad_norm": 0.4765481557698237, - "learning_rate": 2.7144299039855105e-06, - "loss": 0.0375, - "step": 6177 - }, - { - "epoch": 2.7439484787919164, - "grad_norm": 0.3580598459132636, - "learning_rate": 2.7127059255174504e-06, - "loss": 0.0286, - "step": 6178 - }, - { - "epoch": 2.744392627137464, - "grad_norm": 0.4101523804697921, - "learning_rate": 2.7109822908685445e-06, - "loss": 0.0274, - "step": 6179 - }, - { - "epoch": 2.7448367754830114, - "grad_norm": 0.4715955568359928, - "learning_rate": 2.7092590002978837e-06, - "loss": 0.0345, - "step": 6180 - }, - { - "epoch": 2.7452809238285587, - "grad_norm": 0.6089181044007468, - "learning_rate": 2.70753605406451e-06, - "loss": 0.0343, - "step": 6181 - }, - { - "epoch": 2.745725072174106, - "grad_norm": 0.5806324207660223, - "learning_rate": 2.7058134524274083e-06, - "loss": 0.0346, - "step": 6182 - }, - { - "epoch": 2.7461692205196533, - "grad_norm": 0.40045495221538185, - "learning_rate": 2.7040911956455153e-06, - "loss": 0.0278, - "step": 6183 - }, - { - "epoch": 2.746613368865201, - "grad_norm": 0.45888251149836196, - "learning_rate": 2.702369283977718e-06, - "loss": 0.0321, - "step": 6184 - }, - { - "epoch": 2.7470575172107483, - "grad_norm": 0.39535983779938544, - "learning_rate": 2.7006477176828443e-06, - "loss": 0.0291, - "step": 6185 - }, - { - "epoch": 2.747501665556296, - "grad_norm": 0.40307288228268773, - "learning_rate": 2.6989264970196795e-06, - "loss": 0.031, - "step": 6186 - }, - { - "epoch": 2.7479458139018433, - "grad_norm": 0.42537830184812087, - "learning_rate": 2.697205622246952e-06, - "loss": 0.0379, - "step": 6187 - }, - { - "epoch": 2.7483899622473906, - "grad_norm": 0.7384045353118717, - "learning_rate": 2.6954850936233357e-06, - "loss": 0.0388, - "step": 6188 - }, - { - "epoch": 2.748834110592938, - "grad_norm": 0.49237325688101374, - "learning_rate": 2.693764911407456e-06, - "loss": 0.0357, - "step": 6189 - }, - { - "epoch": 2.749278258938485, - "grad_norm": 0.43303507384385986, - "learning_rate": 2.6920450758578885e-06, - "loss": 0.0305, - "step": 6190 - }, - { - "epoch": 2.749722407284033, - "grad_norm": 0.5198323318463377, - "learning_rate": 2.690325587233148e-06, - "loss": 0.042, - "step": 6191 - }, - { - "epoch": 2.7501665556295802, - "grad_norm": 0.5579062089534479, - "learning_rate": 2.6886064457917094e-06, - "loss": 0.0442, - "step": 6192 - }, - { - "epoch": 2.750610703975128, - "grad_norm": 0.2992794413549194, - "learning_rate": 2.6868876517919845e-06, - "loss": 0.0214, - "step": 6193 - }, - { - "epoch": 2.7510548523206753, - "grad_norm": 0.5197412003207367, - "learning_rate": 2.6851692054923385e-06, - "loss": 0.0447, - "step": 6194 - }, - { - "epoch": 2.7514990006662225, - "grad_norm": 0.45629204802490403, - "learning_rate": 2.6834511071510823e-06, - "loss": 0.0335, - "step": 6195 - }, - { - "epoch": 2.75194314901177, - "grad_norm": 0.3939710962212484, - "learning_rate": 2.681733357026476e-06, - "loss": 0.0334, - "step": 6196 - }, - { - "epoch": 2.752387297357317, - "grad_norm": 0.4395100384242003, - "learning_rate": 2.680015955376727e-06, - "loss": 0.0269, - "step": 6197 - }, - { - "epoch": 2.752831445702865, - "grad_norm": 0.35669081976582817, - "learning_rate": 2.6782989024599913e-06, - "loss": 0.0239, - "step": 6198 - }, - { - "epoch": 2.753275594048412, - "grad_norm": 0.42101464193049454, - "learning_rate": 2.6765821985343676e-06, - "loss": 0.0297, - "step": 6199 - }, - { - "epoch": 2.7537197423939594, - "grad_norm": 0.4443116484682141, - "learning_rate": 2.6748658438579075e-06, - "loss": 0.0367, - "step": 6200 - }, - { - "epoch": 2.754163890739507, - "grad_norm": 0.3576485677615678, - "learning_rate": 2.6731498386886094e-06, - "loss": 0.0269, - "step": 6201 - }, - { - "epoch": 2.7546080390850545, - "grad_norm": 0.8029786113817223, - "learning_rate": 2.6714341832844137e-06, - "loss": 0.0366, - "step": 6202 - }, - { - "epoch": 2.7550521874306018, - "grad_norm": 0.4869826644782611, - "learning_rate": 2.6697188779032173e-06, - "loss": 0.0293, - "step": 6203 - }, - { - "epoch": 2.755496335776149, - "grad_norm": 0.43061987045181044, - "learning_rate": 2.6680039228028603e-06, - "loss": 0.0352, - "step": 6204 - }, - { - "epoch": 2.755940484121697, - "grad_norm": 0.3632393699444563, - "learning_rate": 2.6662893182411255e-06, - "loss": 0.0256, - "step": 6205 - }, - { - "epoch": 2.756384632467244, - "grad_norm": 0.37775571779218736, - "learning_rate": 2.6645750644757484e-06, - "loss": 0.0342, - "step": 6206 - }, - { - "epoch": 2.7568287808127914, - "grad_norm": 0.31327481469952234, - "learning_rate": 2.6628611617644133e-06, - "loss": 0.0209, - "step": 6207 - }, - { - "epoch": 2.757272929158339, - "grad_norm": 0.5059332837930003, - "learning_rate": 2.6611476103647425e-06, - "loss": 0.0286, - "step": 6208 - }, - { - "epoch": 2.7577170775038864, - "grad_norm": 0.39890166964039236, - "learning_rate": 2.6594344105343207e-06, - "loss": 0.0335, - "step": 6209 - }, - { - "epoch": 2.7581612258494337, - "grad_norm": 0.40337347616568126, - "learning_rate": 2.657721562530664e-06, - "loss": 0.0206, - "step": 6210 - }, - { - "epoch": 2.758605374194981, - "grad_norm": 0.44935773733297946, - "learning_rate": 2.656009066611244e-06, - "loss": 0.03, - "step": 6211 - }, - { - "epoch": 2.7590495225405283, - "grad_norm": 0.417217650015585, - "learning_rate": 2.654296923033481e-06, - "loss": 0.0354, - "step": 6212 - }, - { - "epoch": 2.759493670886076, - "grad_norm": 0.39156228698655793, - "learning_rate": 2.652585132054734e-06, - "loss": 0.0301, - "step": 6213 - }, - { - "epoch": 2.7599378192316233, - "grad_norm": 0.4181623279832991, - "learning_rate": 2.6508736939323187e-06, - "loss": 0.0416, - "step": 6214 - }, - { - "epoch": 2.760381967577171, - "grad_norm": 0.38946142096359226, - "learning_rate": 2.649162608923493e-06, - "loss": 0.022, - "step": 6215 - }, - { - "epoch": 2.7608261159227183, - "grad_norm": 0.28459219563440136, - "learning_rate": 2.6474518772854606e-06, - "loss": 0.0211, - "step": 6216 - }, - { - "epoch": 2.7612702642682656, - "grad_norm": 0.39516529374066633, - "learning_rate": 2.6457414992753728e-06, - "loss": 0.031, - "step": 6217 - }, - { - "epoch": 2.761714412613813, - "grad_norm": 0.6507735193198333, - "learning_rate": 2.6440314751503314e-06, - "loss": 0.0405, - "step": 6218 - }, - { - "epoch": 2.76215856095936, - "grad_norm": 0.39863052089616485, - "learning_rate": 2.6423218051673766e-06, - "loss": 0.0257, - "step": 6219 - }, - { - "epoch": 2.762602709304908, - "grad_norm": 0.45747750899314005, - "learning_rate": 2.6406124895835084e-06, - "loss": 0.0309, - "step": 6220 - }, - { - "epoch": 2.763046857650455, - "grad_norm": 0.39469825591698227, - "learning_rate": 2.6389035286556598e-06, - "loss": 0.0301, - "step": 6221 - }, - { - "epoch": 2.7634910059960025, - "grad_norm": 0.4493000737157054, - "learning_rate": 2.637194922640719e-06, - "loss": 0.0336, - "step": 6222 - }, - { - "epoch": 2.7639351543415502, - "grad_norm": 0.3609277682343467, - "learning_rate": 2.6354866717955186e-06, - "loss": 0.0303, - "step": 6223 - }, - { - "epoch": 2.7643793026870975, - "grad_norm": 0.3650445917615703, - "learning_rate": 2.6337787763768384e-06, - "loss": 0.0242, - "step": 6224 - }, - { - "epoch": 2.764823451032645, - "grad_norm": 0.48546314397013385, - "learning_rate": 2.6320712366414005e-06, - "loss": 0.0294, - "step": 6225 - }, - { - "epoch": 2.765267599378192, - "grad_norm": 0.32486540563930044, - "learning_rate": 2.6303640528458834e-06, - "loss": 0.0223, - "step": 6226 - }, - { - "epoch": 2.76571174772374, - "grad_norm": 0.3434852926148082, - "learning_rate": 2.6286572252469e-06, - "loss": 0.0316, - "step": 6227 - }, - { - "epoch": 2.766155896069287, - "grad_norm": 0.7142093600067161, - "learning_rate": 2.626950754101018e-06, - "loss": 0.056, - "step": 6228 - }, - { - "epoch": 2.7666000444148344, - "grad_norm": 0.6028670535609708, - "learning_rate": 2.6252446396647503e-06, - "loss": 0.0323, - "step": 6229 - }, - { - "epoch": 2.767044192760382, - "grad_norm": 0.42675838339191474, - "learning_rate": 2.6235388821945497e-06, - "loss": 0.0297, - "step": 6230 - }, - { - "epoch": 2.7674883411059295, - "grad_norm": 0.7192216736608809, - "learning_rate": 2.621833481946826e-06, - "loss": 0.0357, - "step": 6231 - }, - { - "epoch": 2.7679324894514767, - "grad_norm": 0.4766794360092108, - "learning_rate": 2.6201284391779303e-06, - "loss": 0.0331, - "step": 6232 - }, - { - "epoch": 2.768376637797024, - "grad_norm": 0.4250368407242259, - "learning_rate": 2.618423754144155e-06, - "loss": 0.0321, - "step": 6233 - }, - { - "epoch": 2.7688207861425718, - "grad_norm": 0.5212475478820174, - "learning_rate": 2.616719427101745e-06, - "loss": 0.0324, - "step": 6234 - }, - { - "epoch": 2.769264934488119, - "grad_norm": 0.3008339483542143, - "learning_rate": 2.6150154583068922e-06, - "loss": 0.0226, - "step": 6235 - }, - { - "epoch": 2.7697090828336663, - "grad_norm": 0.3816969610714372, - "learning_rate": 2.613311848015725e-06, - "loss": 0.0259, - "step": 6236 - }, - { - "epoch": 2.770153231179214, - "grad_norm": 0.38596290627054197, - "learning_rate": 2.611608596484335e-06, - "loss": 0.0288, - "step": 6237 - }, - { - "epoch": 2.7705973795247614, - "grad_norm": 0.4303882315778446, - "learning_rate": 2.609905703968742e-06, - "loss": 0.0223, - "step": 6238 - }, - { - "epoch": 2.7710415278703087, - "grad_norm": 0.4053342617572821, - "learning_rate": 2.6082031707249223e-06, - "loss": 0.03, - "step": 6239 - }, - { - "epoch": 2.771485676215856, - "grad_norm": 0.7823903528274498, - "learning_rate": 2.6065009970087974e-06, - "loss": 0.0324, - "step": 6240 - }, - { - "epoch": 2.7719298245614032, - "grad_norm": 0.42522914693804564, - "learning_rate": 2.6047991830762297e-06, - "loss": 0.038, - "step": 6241 - }, - { - "epoch": 2.772373972906951, - "grad_norm": 0.3532987358681675, - "learning_rate": 2.60309772918303e-06, - "loss": 0.0235, - "step": 6242 - }, - { - "epoch": 2.7728181212524983, - "grad_norm": 0.33902634374584634, - "learning_rate": 2.6013966355849618e-06, - "loss": 0.0327, - "step": 6243 - }, - { - "epoch": 2.773262269598046, - "grad_norm": 0.44149160136100024, - "learning_rate": 2.5996959025377224e-06, - "loss": 0.0492, - "step": 6244 - }, - { - "epoch": 2.7737064179435933, - "grad_norm": 0.48103215081730055, - "learning_rate": 2.597995530296963e-06, - "loss": 0.0267, - "step": 6245 - }, - { - "epoch": 2.7741505662891406, - "grad_norm": 0.3235934136296388, - "learning_rate": 2.5962955191182792e-06, - "loss": 0.0183, - "step": 6246 - }, - { - "epoch": 2.774594714634688, - "grad_norm": 0.3462677290866656, - "learning_rate": 2.59459586925721e-06, - "loss": 0.0272, - "step": 6247 - }, - { - "epoch": 2.775038862980235, - "grad_norm": 0.32294840719861506, - "learning_rate": 2.592896580969242e-06, - "loss": 0.0281, - "step": 6248 - }, - { - "epoch": 2.775483011325783, - "grad_norm": 0.3499956168537934, - "learning_rate": 2.591197654509807e-06, - "loss": 0.0309, - "step": 6249 - }, - { - "epoch": 2.77592715967133, - "grad_norm": 0.40008372827541966, - "learning_rate": 2.5894990901342833e-06, - "loss": 0.0268, - "step": 6250 - }, - { - "epoch": 2.7763713080168775, - "grad_norm": 0.40878589097636386, - "learning_rate": 2.587800888097993e-06, - "loss": 0.0313, - "step": 6251 - }, - { - "epoch": 2.776815456362425, - "grad_norm": 0.7688538094871912, - "learning_rate": 2.5861030486562084e-06, - "loss": 0.0479, - "step": 6252 - }, - { - "epoch": 2.7772596047079725, - "grad_norm": 0.40809425302097857, - "learning_rate": 2.5844055720641357e-06, - "loss": 0.0311, - "step": 6253 - }, - { - "epoch": 2.77770375305352, - "grad_norm": 0.37450235777366314, - "learning_rate": 2.5827084585769436e-06, - "loss": 0.0286, - "step": 6254 - }, - { - "epoch": 2.778147901399067, - "grad_norm": 0.3661400531713744, - "learning_rate": 2.581011708449731e-06, - "loss": 0.0271, - "step": 6255 - }, - { - "epoch": 2.778592049744615, - "grad_norm": 0.37170138943829045, - "learning_rate": 2.57931532193755e-06, - "loss": 0.0346, - "step": 6256 - }, - { - "epoch": 2.779036198090162, - "grad_norm": 0.30626642522716074, - "learning_rate": 2.577619299295398e-06, - "loss": 0.026, - "step": 6257 - }, - { - "epoch": 2.7794803464357094, - "grad_norm": 0.3007717640940463, - "learning_rate": 2.5759236407782128e-06, - "loss": 0.0247, - "step": 6258 - }, - { - "epoch": 2.779924494781257, - "grad_norm": 0.5105689813889596, - "learning_rate": 2.5742283466408803e-06, - "loss": 0.0477, - "step": 6259 - }, - { - "epoch": 2.7803686431268044, - "grad_norm": 0.49116305516555464, - "learning_rate": 2.572533417138237e-06, - "loss": 0.0257, - "step": 6260 - }, - { - "epoch": 2.7808127914723517, - "grad_norm": 0.356325832140254, - "learning_rate": 2.570838852525055e-06, - "loss": 0.0263, - "step": 6261 - }, - { - "epoch": 2.781256939817899, - "grad_norm": 0.4435162531660302, - "learning_rate": 2.569144653056058e-06, - "loss": 0.0435, - "step": 6262 - }, - { - "epoch": 2.7817010881634467, - "grad_norm": 0.5419805855499744, - "learning_rate": 2.5674508189859147e-06, - "loss": 0.0314, - "step": 6263 - }, - { - "epoch": 2.782145236508994, - "grad_norm": 0.3656656264231573, - "learning_rate": 2.565757350569233e-06, - "loss": 0.0241, - "step": 6264 - }, - { - "epoch": 2.7825893848545413, - "grad_norm": 0.43315585203736967, - "learning_rate": 2.5640642480605722e-06, - "loss": 0.0372, - "step": 6265 - }, - { - "epoch": 2.783033533200089, - "grad_norm": 0.6118620035354352, - "learning_rate": 2.5623715117144337e-06, - "loss": 0.0389, - "step": 6266 - }, - { - "epoch": 2.7834776815456364, - "grad_norm": 1.003007529298919, - "learning_rate": 2.5606791417852655e-06, - "loss": 0.0489, - "step": 6267 - }, - { - "epoch": 2.7839218298911836, - "grad_norm": 0.335632805390222, - "learning_rate": 2.558987138527461e-06, - "loss": 0.0298, - "step": 6268 - }, - { - "epoch": 2.784365978236731, - "grad_norm": 0.49218374523384917, - "learning_rate": 2.5572955021953525e-06, - "loss": 0.0504, - "step": 6269 - }, - { - "epoch": 2.7848101265822782, - "grad_norm": 0.3662564117457857, - "learning_rate": 2.555604233043224e-06, - "loss": 0.024, - "step": 6270 - }, - { - "epoch": 2.785254274927826, - "grad_norm": 0.4410048606518427, - "learning_rate": 2.553913331325305e-06, - "loss": 0.0308, - "step": 6271 - }, - { - "epoch": 2.7856984232733732, - "grad_norm": 0.6748277215653389, - "learning_rate": 2.5522227972957626e-06, - "loss": 0.0353, - "step": 6272 - }, - { - "epoch": 2.786142571618921, - "grad_norm": 0.4988731713313121, - "learning_rate": 2.550532631208713e-06, - "loss": 0.0262, - "step": 6273 - }, - { - "epoch": 2.7865867199644683, - "grad_norm": 0.3910346837842304, - "learning_rate": 2.5488428333182213e-06, - "loss": 0.0369, - "step": 6274 - }, - { - "epoch": 2.7870308683100156, - "grad_norm": 0.5189622685954451, - "learning_rate": 2.5471534038782876e-06, - "loss": 0.0365, - "step": 6275 - }, - { - "epoch": 2.787475016655563, - "grad_norm": 0.4108944491644082, - "learning_rate": 2.545464343142862e-06, - "loss": 0.0322, - "step": 6276 - }, - { - "epoch": 2.78791916500111, - "grad_norm": 0.3860699292761844, - "learning_rate": 2.543775651365844e-06, - "loss": 0.0232, - "step": 6277 - }, - { - "epoch": 2.788363313346658, - "grad_norm": 0.47557488338460535, - "learning_rate": 2.5420873288010682e-06, - "loss": 0.0387, - "step": 6278 - }, - { - "epoch": 2.788807461692205, - "grad_norm": 0.3839192710393702, - "learning_rate": 2.5403993757023193e-06, - "loss": 0.0261, - "step": 6279 - }, - { - "epoch": 2.7892516100377525, - "grad_norm": 0.4089956846684774, - "learning_rate": 2.538711792323328e-06, - "loss": 0.0393, - "step": 6280 - }, - { - "epoch": 2.7896957583833, - "grad_norm": 0.3534578778564665, - "learning_rate": 2.5370245789177615e-06, - "loss": 0.0234, - "step": 6281 - }, - { - "epoch": 2.7901399067288475, - "grad_norm": 0.38317967580423545, - "learning_rate": 2.53533773573924e-06, - "loss": 0.0264, - "step": 6282 - }, - { - "epoch": 2.7905840550743948, - "grad_norm": 0.42936934153083767, - "learning_rate": 2.533651263041324e-06, - "loss": 0.0393, - "step": 6283 - }, - { - "epoch": 2.791028203419942, - "grad_norm": 0.554120368450601, - "learning_rate": 2.5319651610775194e-06, - "loss": 0.0317, - "step": 6284 - }, - { - "epoch": 2.79147235176549, - "grad_norm": 0.4081200054049489, - "learning_rate": 2.5302794301012766e-06, - "loss": 0.0297, - "step": 6285 - }, - { - "epoch": 2.791916500111037, - "grad_norm": 0.7070187464980049, - "learning_rate": 2.528594070365988e-06, - "loss": 0.0356, - "step": 6286 - }, - { - "epoch": 2.7923606484565844, - "grad_norm": 0.45395646778800836, - "learning_rate": 2.52690908212499e-06, - "loss": 0.0344, - "step": 6287 - }, - { - "epoch": 2.792804796802132, - "grad_norm": 0.7713383991827947, - "learning_rate": 2.525224465631571e-06, - "loss": 0.0323, - "step": 6288 - }, - { - "epoch": 2.7932489451476794, - "grad_norm": 0.43262436851175007, - "learning_rate": 2.5235402211389525e-06, - "loss": 0.0311, - "step": 6289 - }, - { - "epoch": 2.7936930934932267, - "grad_norm": 0.9524822174469669, - "learning_rate": 2.5218563489003062e-06, - "loss": 0.0307, - "step": 6290 - }, - { - "epoch": 2.794137241838774, - "grad_norm": 0.35257313740001833, - "learning_rate": 2.520172849168749e-06, - "loss": 0.0321, - "step": 6291 - }, - { - "epoch": 2.7945813901843217, - "grad_norm": 0.4825645734393702, - "learning_rate": 2.518489722197335e-06, - "loss": 0.0374, - "step": 6292 - }, - { - "epoch": 2.795025538529869, - "grad_norm": 0.7297573078351898, - "learning_rate": 2.51680696823907e-06, - "loss": 0.041, - "step": 6293 - }, - { - "epoch": 2.7954696868754163, - "grad_norm": 0.4555687972584318, - "learning_rate": 2.5151245875468993e-06, - "loss": 0.0386, - "step": 6294 - }, - { - "epoch": 2.795913835220964, - "grad_norm": 0.4970797678676715, - "learning_rate": 2.5134425803737137e-06, - "loss": 0.0418, - "step": 6295 - }, - { - "epoch": 2.7963579835665113, - "grad_norm": 0.6816809012387476, - "learning_rate": 2.511760946972348e-06, - "loss": 0.0409, - "step": 6296 - }, - { - "epoch": 2.7968021319120586, - "grad_norm": 0.5264337163942395, - "learning_rate": 2.5100796875955815e-06, - "loss": 0.0473, - "step": 6297 - }, - { - "epoch": 2.797246280257606, - "grad_norm": 0.5625709988931173, - "learning_rate": 2.508398802496132e-06, - "loss": 0.0372, - "step": 6298 - }, - { - "epoch": 2.797690428603153, - "grad_norm": 0.5237928838120754, - "learning_rate": 2.5067182919266676e-06, - "loss": 0.0311, - "step": 6299 - }, - { - "epoch": 2.798134576948701, - "grad_norm": 0.4778467003244999, - "learning_rate": 2.5050381561397974e-06, - "loss": 0.0429, - "step": 6300 - }, - { - "epoch": 2.7985787252942482, - "grad_norm": 0.43069407371279944, - "learning_rate": 2.503358395388074e-06, - "loss": 0.0435, - "step": 6301 - }, - { - "epoch": 2.799022873639796, - "grad_norm": 0.35842408526989356, - "learning_rate": 2.501679009923997e-06, - "loss": 0.0279, - "step": 6302 - }, - { - "epoch": 2.7994670219853433, - "grad_norm": 0.3642207986414275, - "learning_rate": 2.5000000000000015e-06, - "loss": 0.0311, - "step": 6303 - }, - { - "epoch": 2.7999111703308905, - "grad_norm": 0.655911709523861, - "learning_rate": 2.498321365868471e-06, - "loss": 0.036, - "step": 6304 - }, - { - "epoch": 2.800355318676438, - "grad_norm": 0.3959150499907849, - "learning_rate": 2.49664310778174e-06, - "loss": 0.0253, - "step": 6305 - }, - { - "epoch": 2.800799467021985, - "grad_norm": 0.5170912208789272, - "learning_rate": 2.4949652259920727e-06, - "loss": 0.0279, - "step": 6306 - }, - { - "epoch": 2.801243615367533, - "grad_norm": 0.4192727714282743, - "learning_rate": 2.4932877207516844e-06, - "loss": 0.0288, - "step": 6307 - }, - { - "epoch": 2.80168776371308, - "grad_norm": 0.4736817072181479, - "learning_rate": 2.4916105923127355e-06, - "loss": 0.0395, - "step": 6308 - }, - { - "epoch": 2.8021319120586274, - "grad_norm": 0.4164208273038199, - "learning_rate": 2.489933840927323e-06, - "loss": 0.025, - "step": 6309 - }, - { - "epoch": 2.802576060404175, - "grad_norm": 0.4237935178364351, - "learning_rate": 2.4882574668474925e-06, - "loss": 0.0326, - "step": 6310 - }, - { - "epoch": 2.8030202087497225, - "grad_norm": 0.3766334421419762, - "learning_rate": 2.486581470325232e-06, - "loss": 0.0228, - "step": 6311 - }, - { - "epoch": 2.8034643570952698, - "grad_norm": 0.41806136620537815, - "learning_rate": 2.484905851612471e-06, - "loss": 0.0364, - "step": 6312 - }, - { - "epoch": 2.803908505440817, - "grad_norm": 0.5567970380481517, - "learning_rate": 2.4832306109610877e-06, - "loss": 0.0575, - "step": 6313 - }, - { - "epoch": 2.804352653786365, - "grad_norm": 0.38489435628632473, - "learning_rate": 2.4815557486228937e-06, - "loss": 0.0243, - "step": 6314 - }, - { - "epoch": 2.804796802131912, - "grad_norm": 0.33130559466084525, - "learning_rate": 2.479881264849651e-06, - "loss": 0.0237, - "step": 6315 - }, - { - "epoch": 2.8052409504774594, - "grad_norm": 1.680243538004499, - "learning_rate": 2.478207159893064e-06, - "loss": 0.032, - "step": 6316 - }, - { - "epoch": 2.805685098823007, - "grad_norm": 0.48052932859070213, - "learning_rate": 2.476533434004779e-06, - "loss": 0.0311, - "step": 6317 - }, - { - "epoch": 2.8061292471685544, - "grad_norm": 0.36896833958182274, - "learning_rate": 2.474860087436384e-06, - "loss": 0.0261, - "step": 6318 - }, - { - "epoch": 2.8065733955141017, - "grad_norm": 0.633026238867653, - "learning_rate": 2.4731871204394155e-06, - "loss": 0.048, - "step": 6319 - }, - { - "epoch": 2.807017543859649, - "grad_norm": 0.4418796227317967, - "learning_rate": 2.4715145332653433e-06, - "loss": 0.0359, - "step": 6320 - }, - { - "epoch": 2.8074616922051967, - "grad_norm": 0.4502178155781612, - "learning_rate": 2.4698423261655887e-06, - "loss": 0.0306, - "step": 6321 - }, - { - "epoch": 2.807905840550744, - "grad_norm": 0.4566900893012358, - "learning_rate": 2.468170499391512e-06, - "loss": 0.0362, - "step": 6322 - }, - { - "epoch": 2.8083499888962913, - "grad_norm": 0.38715597816758157, - "learning_rate": 2.4664990531944176e-06, - "loss": 0.0404, - "step": 6323 - }, - { - "epoch": 2.808794137241839, - "grad_norm": 0.43966470207668296, - "learning_rate": 2.4648279878255523e-06, - "loss": 0.0258, - "step": 6324 - }, - { - "epoch": 2.8092382855873863, - "grad_norm": 0.3596530155746306, - "learning_rate": 2.4631573035361073e-06, - "loss": 0.0294, - "step": 6325 - }, - { - "epoch": 2.8096824339329336, - "grad_norm": 0.44583220946986085, - "learning_rate": 2.4614870005772105e-06, - "loss": 0.0223, - "step": 6326 - }, - { - "epoch": 2.810126582278481, - "grad_norm": 0.5911442996714825, - "learning_rate": 2.45981707919994e-06, - "loss": 0.0393, - "step": 6327 - }, - { - "epoch": 2.810570730624028, - "grad_norm": 0.5581799888907024, - "learning_rate": 2.458147539655313e-06, - "loss": 0.0326, - "step": 6328 - }, - { - "epoch": 2.811014878969576, - "grad_norm": 0.39311733798131593, - "learning_rate": 2.4564783821942884e-06, - "loss": 0.029, - "step": 6329 - }, - { - "epoch": 2.811459027315123, - "grad_norm": 0.4450796457627704, - "learning_rate": 2.454809607067772e-06, - "loss": 0.0369, - "step": 6330 - }, - { - "epoch": 2.811903175660671, - "grad_norm": 0.43590500853966985, - "learning_rate": 2.4531412145266055e-06, - "loss": 0.0386, - "step": 6331 - }, - { - "epoch": 2.8123473240062182, - "grad_norm": 0.457972822762452, - "learning_rate": 2.4514732048215774e-06, - "loss": 0.042, - "step": 6332 - }, - { - "epoch": 2.8127914723517655, - "grad_norm": 0.32284714510978635, - "learning_rate": 2.4498055782034187e-06, - "loss": 0.0334, - "step": 6333 - }, - { - "epoch": 2.813235620697313, - "grad_norm": 0.36527473928195653, - "learning_rate": 2.4481383349228016e-06, - "loss": 0.0253, - "step": 6334 - }, - { - "epoch": 2.81367976904286, - "grad_norm": 0.3860105760601358, - "learning_rate": 2.446471475230342e-06, - "loss": 0.0391, - "step": 6335 - }, - { - "epoch": 2.814123917388408, - "grad_norm": 0.41855117247115486, - "learning_rate": 2.4448049993765975e-06, - "loss": 0.0296, - "step": 6336 - }, - { - "epoch": 2.814568065733955, - "grad_norm": 0.4146118074577428, - "learning_rate": 2.4431389076120657e-06, - "loss": 0.0325, - "step": 6337 - }, - { - "epoch": 2.8150122140795024, - "grad_norm": 0.3990029724898874, - "learning_rate": 2.4414732001871892e-06, - "loss": 0.0337, - "step": 6338 - }, - { - "epoch": 2.81545636242505, - "grad_norm": 0.5638606230897282, - "learning_rate": 2.4398078773523526e-06, - "loss": 0.0445, - "step": 6339 - }, - { - "epoch": 2.8159005107705974, - "grad_norm": 0.45902483366087526, - "learning_rate": 2.438142939357882e-06, - "loss": 0.0417, - "step": 6340 - }, - { - "epoch": 2.8163446591161447, - "grad_norm": 0.3183617092675609, - "learning_rate": 2.4364783864540482e-06, - "loss": 0.0252, - "step": 6341 - }, - { - "epoch": 2.816788807461692, - "grad_norm": 0.3554967849352285, - "learning_rate": 2.434814218891057e-06, - "loss": 0.0253, - "step": 6342 - }, - { - "epoch": 2.8172329558072398, - "grad_norm": 0.39321716328224376, - "learning_rate": 2.433150436919064e-06, - "loss": 0.0272, - "step": 6343 - }, - { - "epoch": 2.817677104152787, - "grad_norm": 0.5605795416471407, - "learning_rate": 2.4314870407881637e-06, - "loss": 0.0348, - "step": 6344 - }, - { - "epoch": 2.8181212524983343, - "grad_norm": 0.3732190806583137, - "learning_rate": 2.4298240307483923e-06, - "loss": 0.0233, - "step": 6345 - }, - { - "epoch": 2.818565400843882, - "grad_norm": 0.32751466948341074, - "learning_rate": 2.4281614070497282e-06, - "loss": 0.0284, - "step": 6346 - }, - { - "epoch": 2.8190095491894294, - "grad_norm": 0.38489431013084585, - "learning_rate": 2.4264991699420953e-06, - "loss": 0.0235, - "step": 6347 - }, - { - "epoch": 2.8194536975349767, - "grad_norm": 0.5207677415942126, - "learning_rate": 2.4248373196753512e-06, - "loss": 0.0362, - "step": 6348 - }, - { - "epoch": 2.819897845880524, - "grad_norm": 0.39019062549891126, - "learning_rate": 2.423175856499302e-06, - "loss": 0.0221, - "step": 6349 - }, - { - "epoch": 2.8203419942260712, - "grad_norm": 0.4152809637919385, - "learning_rate": 2.4215147806636942e-06, - "loss": 0.0314, - "step": 6350 - }, - { - "epoch": 2.820786142571619, - "grad_norm": 0.4000109524825699, - "learning_rate": 2.4198540924182156e-06, - "loss": 0.0307, - "step": 6351 - }, - { - "epoch": 2.8212302909171663, - "grad_norm": 0.4388997785747502, - "learning_rate": 2.4181937920124966e-06, - "loss": 0.0298, - "step": 6352 - }, - { - "epoch": 2.821674439262714, - "grad_norm": 0.4019919408619044, - "learning_rate": 2.4165338796961093e-06, - "loss": 0.033, - "step": 6353 - }, - { - "epoch": 2.8221185876082613, - "grad_norm": 0.570406566396068, - "learning_rate": 2.414874355718563e-06, - "loss": 0.0379, - "step": 6354 - }, - { - "epoch": 2.8225627359538086, - "grad_norm": 0.4407385335769093, - "learning_rate": 2.413215220329315e-06, - "loss": 0.0282, - "step": 6355 - }, - { - "epoch": 2.823006884299356, - "grad_norm": 0.38569267409608843, - "learning_rate": 2.411556473777761e-06, - "loss": 0.0282, - "step": 6356 - }, - { - "epoch": 2.823451032644903, - "grad_norm": 0.4606495839160357, - "learning_rate": 2.4098981163132395e-06, - "loss": 0.0342, - "step": 6357 - }, - { - "epoch": 2.823895180990451, - "grad_norm": 0.43263213722768684, - "learning_rate": 2.4082401481850306e-06, - "loss": 0.0369, - "step": 6358 - }, - { - "epoch": 2.824339329335998, - "grad_norm": 0.5551215018246358, - "learning_rate": 2.4065825696423522e-06, - "loss": 0.0327, - "step": 6359 - }, - { - "epoch": 2.8247834776815455, - "grad_norm": 0.3806554615954976, - "learning_rate": 2.4049253809343678e-06, - "loss": 0.0333, - "step": 6360 - }, - { - "epoch": 2.825227626027093, - "grad_norm": 0.4594230517569721, - "learning_rate": 2.4032685823101814e-06, - "loss": 0.0302, - "step": 6361 - }, - { - "epoch": 2.8256717743726405, - "grad_norm": 0.49282566341591966, - "learning_rate": 2.4016121740188375e-06, - "loss": 0.0283, - "step": 6362 - }, - { - "epoch": 2.826115922718188, - "grad_norm": 0.37575563744052526, - "learning_rate": 2.3999561563093234e-06, - "loss": 0.0349, - "step": 6363 - }, - { - "epoch": 2.826560071063735, - "grad_norm": 0.4760513100430366, - "learning_rate": 2.3983005294305673e-06, - "loss": 0.0374, - "step": 6364 - }, - { - "epoch": 2.827004219409283, - "grad_norm": 0.395278083467666, - "learning_rate": 2.396645293631435e-06, - "loss": 0.0248, - "step": 6365 - }, - { - "epoch": 2.82744836775483, - "grad_norm": 0.4876360922819094, - "learning_rate": 2.3949904491607384e-06, - "loss": 0.0342, - "step": 6366 - }, - { - "epoch": 2.8278925161003774, - "grad_norm": 0.40885930978989254, - "learning_rate": 2.393335996267229e-06, - "loss": 0.0325, - "step": 6367 - }, - { - "epoch": 2.828336664445925, - "grad_norm": 0.44935957035165386, - "learning_rate": 2.3916819351995984e-06, - "loss": 0.0273, - "step": 6368 - }, - { - "epoch": 2.8287808127914724, - "grad_norm": 1.506178960229344, - "learning_rate": 2.3900282662064806e-06, - "loss": 0.059, - "step": 6369 - }, - { - "epoch": 2.8292249611370197, - "grad_norm": 0.30959880923138633, - "learning_rate": 2.3883749895364523e-06, - "loss": 0.0217, - "step": 6370 - }, - { - "epoch": 2.829669109482567, - "grad_norm": 0.7823253783794836, - "learning_rate": 2.3867221054380244e-06, - "loss": 0.0329, - "step": 6371 - }, - { - "epoch": 2.8301132578281147, - "grad_norm": 0.4806013956209637, - "learning_rate": 2.3850696141596563e-06, - "loss": 0.0455, - "step": 6372 - }, - { - "epoch": 2.830557406173662, - "grad_norm": 0.6896449670646502, - "learning_rate": 2.3834175159497446e-06, - "loss": 0.0562, - "step": 6373 - }, - { - "epoch": 2.8310015545192093, - "grad_norm": 0.4709885726846783, - "learning_rate": 2.3817658110566288e-06, - "loss": 0.0326, - "step": 6374 - }, - { - "epoch": 2.831445702864757, - "grad_norm": 0.4631209021676538, - "learning_rate": 2.380114499728589e-06, - "loss": 0.0265, - "step": 6375 - }, - { - "epoch": 2.8318898512103043, - "grad_norm": 0.4360372773811446, - "learning_rate": 2.3784635822138424e-06, - "loss": 0.0409, - "step": 6376 - }, - { - "epoch": 2.8323339995558516, - "grad_norm": 0.36688129563252986, - "learning_rate": 2.3768130587605513e-06, - "loss": 0.0248, - "step": 6377 - }, - { - "epoch": 2.832778147901399, - "grad_norm": 0.37437289245252986, - "learning_rate": 2.3751629296168177e-06, - "loss": 0.0305, - "step": 6378 - }, - { - "epoch": 2.833222296246946, - "grad_norm": 0.531778233535108, - "learning_rate": 2.3735131950306845e-06, - "loss": 0.0352, - "step": 6379 - }, - { - "epoch": 2.833666444592494, - "grad_norm": 0.4416928778863043, - "learning_rate": 2.371863855250134e-06, - "loss": 0.0356, - "step": 6380 - }, - { - "epoch": 2.8341105929380412, - "grad_norm": 0.3539473400154312, - "learning_rate": 2.3702149105230914e-06, - "loss": 0.0329, - "step": 6381 - }, - { - "epoch": 2.834554741283589, - "grad_norm": 0.8474469374112829, - "learning_rate": 2.3685663610974193e-06, - "loss": 0.0339, - "step": 6382 - }, - { - "epoch": 2.8349988896291363, - "grad_norm": 0.42747029305832396, - "learning_rate": 2.3669182072209225e-06, - "loss": 0.0381, - "step": 6383 - }, - { - "epoch": 2.8354430379746836, - "grad_norm": 0.7608488814785124, - "learning_rate": 2.3652704491413477e-06, - "loss": 0.032, - "step": 6384 - }, - { - "epoch": 2.835887186320231, - "grad_norm": 0.388700958157769, - "learning_rate": 2.3636230871063803e-06, - "loss": 0.0273, - "step": 6385 - }, - { - "epoch": 2.836331334665778, - "grad_norm": 0.5022022192841024, - "learning_rate": 2.3619761213636496e-06, - "loss": 0.0437, - "step": 6386 - }, - { - "epoch": 2.836775483011326, - "grad_norm": 0.34657365126440165, - "learning_rate": 2.360329552160718e-06, - "loss": 0.0246, - "step": 6387 - }, - { - "epoch": 2.837219631356873, - "grad_norm": 0.3939450249882695, - "learning_rate": 2.358683379745094e-06, - "loss": 0.0267, - "step": 6388 - }, - { - "epoch": 2.8376637797024205, - "grad_norm": 0.41677776471298256, - "learning_rate": 2.357037604364229e-06, - "loss": 0.0324, - "step": 6389 - }, - { - "epoch": 2.838107928047968, - "grad_norm": 0.3562978131516035, - "learning_rate": 2.3553922262655045e-06, - "loss": 0.0289, - "step": 6390 - }, - { - "epoch": 2.8385520763935155, - "grad_norm": 0.3800097306965494, - "learning_rate": 2.3537472456962536e-06, - "loss": 0.0269, - "step": 6391 - }, - { - "epoch": 2.8389962247390628, - "grad_norm": 0.3927488750912727, - "learning_rate": 2.3521026629037456e-06, - "loss": 0.032, - "step": 6392 - }, - { - "epoch": 2.83944037308461, - "grad_norm": 0.6796389696750893, - "learning_rate": 2.3504584781351857e-06, - "loss": 0.035, - "step": 6393 - }, - { - "epoch": 2.839884521430158, - "grad_norm": 0.4312865368452842, - "learning_rate": 2.3488146916377246e-06, - "loss": 0.024, - "step": 6394 - }, - { - "epoch": 2.840328669775705, - "grad_norm": 0.5503469902463891, - "learning_rate": 2.3471713036584507e-06, - "loss": 0.0387, - "step": 6395 - }, - { - "epoch": 2.8407728181212524, - "grad_norm": 0.39537225808331916, - "learning_rate": 2.345528314444394e-06, - "loss": 0.0365, - "step": 6396 - }, - { - "epoch": 2.8412169664668, - "grad_norm": 0.4828036261450407, - "learning_rate": 2.343885724242523e-06, - "loss": 0.0359, - "step": 6397 - }, - { - "epoch": 2.8416611148123474, - "grad_norm": 0.3405572301380993, - "learning_rate": 2.342243533299749e-06, - "loss": 0.0296, - "step": 6398 - }, - { - "epoch": 2.8421052631578947, - "grad_norm": 0.3471570803326242, - "learning_rate": 2.3406017418629173e-06, - "loss": 0.0295, - "step": 6399 - }, - { - "epoch": 2.842549411503442, - "grad_norm": 0.30534728377719983, - "learning_rate": 2.3389603501788187e-06, - "loss": 0.0161, - "step": 6400 - }, - { - "epoch": 2.8429935598489897, - "grad_norm": 0.4261866679259168, - "learning_rate": 2.3373193584941833e-06, - "loss": 0.0213, - "step": 6401 - }, - { - "epoch": 2.843437708194537, - "grad_norm": 0.5685316006056542, - "learning_rate": 2.335678767055679e-06, - "loss": 0.0434, - "step": 6402 - }, - { - "epoch": 2.8438818565400843, - "grad_norm": 0.49631869614829277, - "learning_rate": 2.334038576109917e-06, - "loss": 0.0356, - "step": 6403 - }, - { - "epoch": 2.844326004885632, - "grad_norm": 0.5515165392461051, - "learning_rate": 2.332398785903442e-06, - "loss": 0.0363, - "step": 6404 - }, - { - "epoch": 2.8447701532311793, - "grad_norm": 0.532355259631848, - "learning_rate": 2.330759396682744e-06, - "loss": 0.0399, - "step": 6405 - }, - { - "epoch": 2.8452143015767266, - "grad_norm": 0.5194908272702733, - "learning_rate": 2.329120408694253e-06, - "loss": 0.0419, - "step": 6406 - }, - { - "epoch": 2.845658449922274, - "grad_norm": 0.4352160286424336, - "learning_rate": 2.327481822184331e-06, - "loss": 0.0323, - "step": 6407 - }, - { - "epoch": 2.846102598267821, - "grad_norm": 0.40556462446399455, - "learning_rate": 2.3258436373992914e-06, - "loss": 0.0295, - "step": 6408 - }, - { - "epoch": 2.846546746613369, - "grad_norm": 0.48488189817066807, - "learning_rate": 2.3242058545853806e-06, - "loss": 0.0323, - "step": 6409 - }, - { - "epoch": 2.846990894958916, - "grad_norm": 0.7693171033124235, - "learning_rate": 2.322568473988782e-06, - "loss": 0.0304, - "step": 6410 - }, - { - "epoch": 2.847435043304464, - "grad_norm": 0.6057507153145041, - "learning_rate": 2.3209314958556232e-06, - "loss": 0.0262, - "step": 6411 - }, - { - "epoch": 2.8478791916500112, - "grad_norm": 0.4479104368079513, - "learning_rate": 2.319294920431972e-06, - "loss": 0.0363, - "step": 6412 - }, - { - "epoch": 2.8483233399955585, - "grad_norm": 0.5011576633143359, - "learning_rate": 2.317658747963828e-06, - "loss": 0.0415, - "step": 6413 - }, - { - "epoch": 2.848767488341106, - "grad_norm": 0.43395301260402036, - "learning_rate": 2.316022978697143e-06, - "loss": 0.0305, - "step": 6414 - }, - { - "epoch": 2.849211636686653, - "grad_norm": 0.4161396554904535, - "learning_rate": 2.314387612877795e-06, - "loss": 0.031, - "step": 6415 - }, - { - "epoch": 2.849655785032201, - "grad_norm": 0.453771160365058, - "learning_rate": 2.312752650751609e-06, - "loss": 0.0328, - "step": 6416 - }, - { - "epoch": 2.850099933377748, - "grad_norm": 0.4057605385720707, - "learning_rate": 2.3111180925643477e-06, - "loss": 0.0308, - "step": 6417 - }, - { - "epoch": 2.8505440817232954, - "grad_norm": 0.4469537485385078, - "learning_rate": 2.309483938561714e-06, - "loss": 0.0345, - "step": 6418 - }, - { - "epoch": 2.850988230068843, - "grad_norm": 0.3300098185481985, - "learning_rate": 2.3078501889893477e-06, - "loss": 0.028, - "step": 6419 - }, - { - "epoch": 2.8514323784143905, - "grad_norm": 0.36465566926853343, - "learning_rate": 2.3062168440928324e-06, - "loss": 0.0299, - "step": 6420 - }, - { - "epoch": 2.8518765267599377, - "grad_norm": 0.39053368090966234, - "learning_rate": 2.304583904117682e-06, - "loss": 0.029, - "step": 6421 - }, - { - "epoch": 2.852320675105485, - "grad_norm": 0.3264096837914728, - "learning_rate": 2.302951369309358e-06, - "loss": 0.0245, - "step": 6422 - }, - { - "epoch": 2.8527648234510328, - "grad_norm": 0.4328229716057836, - "learning_rate": 2.30131923991326e-06, - "loss": 0.0383, - "step": 6423 - }, - { - "epoch": 2.85320897179658, - "grad_norm": 0.4077375134882527, - "learning_rate": 2.2996875161747194e-06, - "loss": 0.038, - "step": 6424 - }, - { - "epoch": 2.8536531201421274, - "grad_norm": 0.4094918043040497, - "learning_rate": 2.298056198339017e-06, - "loss": 0.0298, - "step": 6425 - }, - { - "epoch": 2.854097268487675, - "grad_norm": 0.32420798744454565, - "learning_rate": 2.296425286651368e-06, - "loss": 0.0335, - "step": 6426 - }, - { - "epoch": 2.8545414168332224, - "grad_norm": 0.4249764666477243, - "learning_rate": 2.294794781356922e-06, - "loss": 0.0373, - "step": 6427 - }, - { - "epoch": 2.8549855651787697, - "grad_norm": 0.8704460304111548, - "learning_rate": 2.293164682700774e-06, - "loss": 0.0456, - "step": 6428 - }, - { - "epoch": 2.855429713524317, - "grad_norm": 0.5067415787866943, - "learning_rate": 2.2915349909279573e-06, - "loss": 0.0348, - "step": 6429 - }, - { - "epoch": 2.8558738618698647, - "grad_norm": 0.38730263257871017, - "learning_rate": 2.2899057062834363e-06, - "loss": 0.0508, - "step": 6430 - }, - { - "epoch": 2.856318010215412, - "grad_norm": 0.426646849291928, - "learning_rate": 2.2882768290121277e-06, - "loss": 0.03, - "step": 6431 - }, - { - "epoch": 2.8567621585609593, - "grad_norm": 0.4433931975798466, - "learning_rate": 2.286648359358874e-06, - "loss": 0.0361, - "step": 6432 - }, - { - "epoch": 2.857206306906507, - "grad_norm": 0.41545375813689084, - "learning_rate": 2.2850202975684637e-06, - "loss": 0.0302, - "step": 6433 - }, - { - "epoch": 2.8576504552520543, - "grad_norm": 0.3777035808530584, - "learning_rate": 2.283392643885624e-06, - "loss": 0.0287, - "step": 6434 - }, - { - "epoch": 2.8580946035976016, - "grad_norm": 0.7240655817753611, - "learning_rate": 2.2817653985550132e-06, - "loss": 0.0336, - "step": 6435 - }, - { - "epoch": 2.858538751943149, - "grad_norm": 0.3261261166151642, - "learning_rate": 2.2801385618212395e-06, - "loss": 0.0244, - "step": 6436 - }, - { - "epoch": 2.858982900288696, - "grad_norm": 0.365601220986991, - "learning_rate": 2.2785121339288446e-06, - "loss": 0.0281, - "step": 6437 - }, - { - "epoch": 2.859427048634244, - "grad_norm": 0.4479492062462993, - "learning_rate": 2.276886115122304e-06, - "loss": 0.0345, - "step": 6438 - }, - { - "epoch": 2.859871196979791, - "grad_norm": 0.3824493182929994, - "learning_rate": 2.2752605056460374e-06, - "loss": 0.0263, - "step": 6439 - }, - { - "epoch": 2.860315345325339, - "grad_norm": 0.444006507921262, - "learning_rate": 2.2736353057444045e-06, - "loss": 0.0286, - "step": 6440 - }, - { - "epoch": 2.8607594936708862, - "grad_norm": 0.4135565678132424, - "learning_rate": 2.272010515661694e-06, - "loss": 0.031, - "step": 6441 - }, - { - "epoch": 2.8612036420164335, - "grad_norm": 0.4704071890757161, - "learning_rate": 2.2703861356421476e-06, - "loss": 0.0374, - "step": 6442 - }, - { - "epoch": 2.861647790361981, - "grad_norm": 0.5654612362616459, - "learning_rate": 2.268762165929931e-06, - "loss": 0.0298, - "step": 6443 - }, - { - "epoch": 2.862091938707528, - "grad_norm": 0.4525755398497248, - "learning_rate": 2.267138606769156e-06, - "loss": 0.0373, - "step": 6444 - }, - { - "epoch": 2.862536087053076, - "grad_norm": 0.3792079187603561, - "learning_rate": 2.2655154584038718e-06, - "loss": 0.0274, - "step": 6445 - }, - { - "epoch": 2.862980235398623, - "grad_norm": 0.6472477866875229, - "learning_rate": 2.263892721078067e-06, - "loss": 0.0306, - "step": 6446 - }, - { - "epoch": 2.8634243837441704, - "grad_norm": 0.4724763504374727, - "learning_rate": 2.2622703950356607e-06, - "loss": 0.0395, - "step": 6447 - }, - { - "epoch": 2.863868532089718, - "grad_norm": 0.5178919025923505, - "learning_rate": 2.2606484805205235e-06, - "loss": 0.0329, - "step": 6448 - }, - { - "epoch": 2.8643126804352654, - "grad_norm": 0.4061558909889993, - "learning_rate": 2.2590269777764516e-06, - "loss": 0.0299, - "step": 6449 - }, - { - "epoch": 2.8647568287808127, - "grad_norm": 0.39901609043961084, - "learning_rate": 2.257405887047186e-06, - "loss": 0.0328, - "step": 6450 - }, - { - "epoch": 2.86520097712636, - "grad_norm": 0.49608870962855456, - "learning_rate": 2.2557852085764053e-06, - "loss": 0.0303, - "step": 6451 - }, - { - "epoch": 2.8656451254719078, - "grad_norm": 0.38259419645750875, - "learning_rate": 2.254164942607721e-06, - "loss": 0.0219, - "step": 6452 - }, - { - "epoch": 2.866089273817455, - "grad_norm": 0.42520263669940206, - "learning_rate": 2.2525450893846906e-06, - "loss": 0.0248, - "step": 6453 - }, - { - "epoch": 2.8665334221630023, - "grad_norm": 0.343086064868294, - "learning_rate": 2.2509256491508063e-06, - "loss": 0.0226, - "step": 6454 - }, - { - "epoch": 2.86697757050855, - "grad_norm": 0.48422523578882104, - "learning_rate": 2.249306622149494e-06, - "loss": 0.0336, - "step": 6455 - }, - { - "epoch": 2.8674217188540974, - "grad_norm": 0.40460550415553925, - "learning_rate": 2.2476880086241225e-06, - "loss": 0.0325, - "step": 6456 - }, - { - "epoch": 2.8678658671996446, - "grad_norm": 0.38849375983156137, - "learning_rate": 2.2460698088179985e-06, - "loss": 0.034, - "step": 6457 - }, - { - "epoch": 2.868310015545192, - "grad_norm": 0.3659863472898491, - "learning_rate": 2.24445202297436e-06, - "loss": 0.0311, - "step": 6458 - }, - { - "epoch": 2.8687541638907397, - "grad_norm": 0.32032734176784455, - "learning_rate": 2.242834651336394e-06, - "loss": 0.0304, - "step": 6459 - }, - { - "epoch": 2.869198312236287, - "grad_norm": 0.2895901124804986, - "learning_rate": 2.2412176941472146e-06, - "loss": 0.0252, - "step": 6460 - }, - { - "epoch": 2.8696424605818343, - "grad_norm": 0.4544732858743473, - "learning_rate": 2.2396011516498794e-06, - "loss": 0.0378, - "step": 6461 - }, - { - "epoch": 2.870086608927382, - "grad_norm": 0.33869349353641764, - "learning_rate": 2.2379850240873836e-06, - "loss": 0.0243, - "step": 6462 - }, - { - "epoch": 2.8705307572729293, - "grad_norm": 0.39779581383793367, - "learning_rate": 2.2363693117026554e-06, - "loss": 0.0328, - "step": 6463 - }, - { - "epoch": 2.8709749056184766, - "grad_norm": 0.5359509284893266, - "learning_rate": 2.2347540147385636e-06, - "loss": 0.0276, - "step": 6464 - }, - { - "epoch": 2.871419053964024, - "grad_norm": 0.3901564758848709, - "learning_rate": 2.2331391334379205e-06, - "loss": 0.0286, - "step": 6465 - }, - { - "epoch": 2.871863202309571, - "grad_norm": 0.4664762506585298, - "learning_rate": 2.231524668043465e-06, - "loss": 0.0421, - "step": 6466 - }, - { - "epoch": 2.872307350655119, - "grad_norm": 0.4098655625284004, - "learning_rate": 2.229910618797879e-06, - "loss": 0.0376, - "step": 6467 - }, - { - "epoch": 2.872751499000666, - "grad_norm": 0.4583633277386958, - "learning_rate": 2.228296985943785e-06, - "loss": 0.0383, - "step": 6468 - }, - { - "epoch": 2.873195647346214, - "grad_norm": 0.37685547831341093, - "learning_rate": 2.226683769723734e-06, - "loss": 0.028, - "step": 6469 - }, - { - "epoch": 2.873639795691761, - "grad_norm": 0.400587688868656, - "learning_rate": 2.225070970380224e-06, - "loss": 0.0337, - "step": 6470 - }, - { - "epoch": 2.8740839440373085, - "grad_norm": 0.3702484950984152, - "learning_rate": 2.2234585881556864e-06, - "loss": 0.0274, - "step": 6471 - }, - { - "epoch": 2.874528092382856, - "grad_norm": 0.4674975669326644, - "learning_rate": 2.2218466232924867e-06, - "loss": 0.03, - "step": 6472 - }, - { - "epoch": 2.874972240728403, - "grad_norm": 0.37515826872200714, - "learning_rate": 2.2202350760329328e-06, - "loss": 0.0257, - "step": 6473 - }, - { - "epoch": 2.875416389073951, - "grad_norm": 0.3257808540366288, - "learning_rate": 2.2186239466192676e-06, - "loss": 0.0219, - "step": 6474 - }, - { - "epoch": 2.875860537419498, - "grad_norm": 0.4753925237563355, - "learning_rate": 2.2170132352936675e-06, - "loss": 0.0434, - "step": 6475 - }, - { - "epoch": 2.8763046857650454, - "grad_norm": 0.44581196717512095, - "learning_rate": 2.2154029422982563e-06, - "loss": 0.0371, - "step": 6476 - }, - { - "epoch": 2.876748834110593, - "grad_norm": 0.4263527151388236, - "learning_rate": 2.2137930678750835e-06, - "loss": 0.0376, - "step": 6477 - }, - { - "epoch": 2.8771929824561404, - "grad_norm": 0.46476168558248493, - "learning_rate": 2.2121836122661416e-06, - "loss": 0.034, - "step": 6478 - }, - { - "epoch": 2.8776371308016877, - "grad_norm": 0.34003139533262217, - "learning_rate": 2.2105745757133612e-06, - "loss": 0.0294, - "step": 6479 - }, - { - "epoch": 2.878081279147235, - "grad_norm": 0.49849907569693336, - "learning_rate": 2.2089659584586047e-06, - "loss": 0.0377, - "step": 6480 - }, - { - "epoch": 2.8785254274927827, - "grad_norm": 0.5382448752849822, - "learning_rate": 2.2073577607436737e-06, - "loss": 0.0443, - "step": 6481 - }, - { - "epoch": 2.87896957583833, - "grad_norm": 0.3879946256129708, - "learning_rate": 2.2057499828103142e-06, - "loss": 0.0327, - "step": 6482 - }, - { - "epoch": 2.8794137241838773, - "grad_norm": 0.5104922320743165, - "learning_rate": 2.2041426249001955e-06, - "loss": 0.0362, - "step": 6483 - }, - { - "epoch": 2.879857872529425, - "grad_norm": 0.4930647183156878, - "learning_rate": 2.2025356872549345e-06, - "loss": 0.0256, - "step": 6484 - }, - { - "epoch": 2.8803020208749723, - "grad_norm": 0.3966997553776206, - "learning_rate": 2.2009291701160817e-06, - "loss": 0.0271, - "step": 6485 - }, - { - "epoch": 2.8807461692205196, - "grad_norm": 0.4962925195644924, - "learning_rate": 2.1993230737251216e-06, - "loss": 0.0371, - "step": 6486 - }, - { - "epoch": 2.881190317566067, - "grad_norm": 0.42642379725740076, - "learning_rate": 2.197717398323477e-06, - "loss": 0.0263, - "step": 6487 - }, - { - "epoch": 2.881634465911614, - "grad_norm": 0.4141444359146802, - "learning_rate": 2.1961121441525113e-06, - "loss": 0.0344, - "step": 6488 - }, - { - "epoch": 2.882078614257162, - "grad_norm": 0.5311512207572496, - "learning_rate": 2.19450731145352e-06, - "loss": 0.0364, - "step": 6489 - }, - { - "epoch": 2.8825227626027092, - "grad_norm": 0.4267919708323801, - "learning_rate": 2.192902900467736e-06, - "loss": 0.0274, - "step": 6490 - }, - { - "epoch": 2.882966910948257, - "grad_norm": 0.31959536449480613, - "learning_rate": 2.1912989114363326e-06, - "loss": 0.0242, - "step": 6491 - }, - { - "epoch": 2.8834110592938043, - "grad_norm": 0.44718270123585074, - "learning_rate": 2.1896953446004104e-06, - "loss": 0.0397, - "step": 6492 - }, - { - "epoch": 2.8838552076393515, - "grad_norm": 0.4584663064416918, - "learning_rate": 2.1880922002010208e-06, - "loss": 0.0321, - "step": 6493 - }, - { - "epoch": 2.884299355984899, - "grad_norm": 0.6111455889547676, - "learning_rate": 2.186489478479137e-06, - "loss": 0.0423, - "step": 6494 - }, - { - "epoch": 2.884743504330446, - "grad_norm": 0.5635891360279379, - "learning_rate": 2.1848871796756784e-06, - "loss": 0.0559, - "step": 6495 - }, - { - "epoch": 2.885187652675994, - "grad_norm": 0.49224878038120795, - "learning_rate": 2.183285304031498e-06, - "loss": 0.0381, - "step": 6496 - }, - { - "epoch": 2.885631801021541, - "grad_norm": 0.412800271356595, - "learning_rate": 2.1816838517873834e-06, - "loss": 0.0281, - "step": 6497 - }, - { - "epoch": 2.8860759493670884, - "grad_norm": 0.42819522838748797, - "learning_rate": 2.1800828231840583e-06, - "loss": 0.0349, - "step": 6498 - }, - { - "epoch": 2.886520097712636, - "grad_norm": 0.3172148118997228, - "learning_rate": 2.178482218462191e-06, - "loss": 0.0198, - "step": 6499 - }, - { - "epoch": 2.8869642460581835, - "grad_norm": 0.6336802661233113, - "learning_rate": 2.176882037862373e-06, - "loss": 0.0527, - "step": 6500 - }, - { - "epoch": 2.8874083944037308, - "grad_norm": 0.6279273972202338, - "learning_rate": 2.1752822816251405e-06, - "loss": 0.0366, - "step": 6501 - }, - { - "epoch": 2.887852542749278, - "grad_norm": 0.6022931994329644, - "learning_rate": 2.173682949990968e-06, - "loss": 0.0306, - "step": 6502 - }, - { - "epoch": 2.888296691094826, - "grad_norm": 0.5362069754281357, - "learning_rate": 2.172084043200256e-06, - "loss": 0.0342, - "step": 6503 - }, - { - "epoch": 2.888740839440373, - "grad_norm": 0.7929642629941647, - "learning_rate": 2.17048556149335e-06, - "loss": 0.0333, - "step": 6504 - }, - { - "epoch": 2.8891849877859204, - "grad_norm": 0.33575039821832753, - "learning_rate": 2.16888750511053e-06, - "loss": 0.0242, - "step": 6505 - }, - { - "epoch": 2.889629136131468, - "grad_norm": 0.3741108552741579, - "learning_rate": 2.1672898742920094e-06, - "loss": 0.0273, - "step": 6506 - }, - { - "epoch": 2.8900732844770154, - "grad_norm": 0.3774493813914148, - "learning_rate": 2.1656926692779423e-06, - "loss": 0.0313, - "step": 6507 - }, - { - "epoch": 2.8905174328225627, - "grad_norm": 0.9718206954815033, - "learning_rate": 2.1640958903084118e-06, - "loss": 0.0401, - "step": 6508 - }, - { - "epoch": 2.89096158116811, - "grad_norm": 0.3694310192990195, - "learning_rate": 2.1624995376234403e-06, - "loss": 0.0348, - "step": 6509 - }, - { - "epoch": 2.8914057295136577, - "grad_norm": 0.6520572022397477, - "learning_rate": 2.1609036114629933e-06, - "loss": 0.0402, - "step": 6510 - }, - { - "epoch": 2.891849877859205, - "grad_norm": 0.4215772661238576, - "learning_rate": 2.159308112066959e-06, - "loss": 0.0293, - "step": 6511 - }, - { - "epoch": 2.8922940262047523, - "grad_norm": 0.38467024272056793, - "learning_rate": 2.1577130396751705e-06, - "loss": 0.0325, - "step": 6512 - }, - { - "epoch": 2.8927381745503, - "grad_norm": 0.38039523075207066, - "learning_rate": 2.1561183945273958e-06, - "loss": 0.029, - "step": 6513 - }, - { - "epoch": 2.8931823228958473, - "grad_norm": 0.35012157636808233, - "learning_rate": 2.154524176863334e-06, - "loss": 0.0298, - "step": 6514 - }, - { - "epoch": 2.8936264712413946, - "grad_norm": 0.32604790956444796, - "learning_rate": 2.1529303869226244e-06, - "loss": 0.0223, - "step": 6515 - }, - { - "epoch": 2.894070619586942, - "grad_norm": 0.6062038168527448, - "learning_rate": 2.151337024944841e-06, - "loss": 0.0328, - "step": 6516 - }, - { - "epoch": 2.894514767932489, - "grad_norm": 0.4616383089873512, - "learning_rate": 2.149744091169493e-06, - "loss": 0.0355, - "step": 6517 - }, - { - "epoch": 2.894958916278037, - "grad_norm": 0.31498629669392003, - "learning_rate": 2.1481515858360254e-06, - "loss": 0.0223, - "step": 6518 - }, - { - "epoch": 2.895403064623584, - "grad_norm": 0.3418552975452249, - "learning_rate": 2.1465595091838204e-06, - "loss": 0.0284, - "step": 6519 - }, - { - "epoch": 2.895847212969132, - "grad_norm": 0.6349150689262983, - "learning_rate": 2.144967861452191e-06, - "loss": 0.0288, - "step": 6520 - }, - { - "epoch": 2.8962913613146792, - "grad_norm": 0.3672718765769208, - "learning_rate": 2.143376642880391e-06, - "loss": 0.028, - "step": 6521 - }, - { - "epoch": 2.8967355096602265, - "grad_norm": 0.6329188736427485, - "learning_rate": 2.141785853707607e-06, - "loss": 0.0419, - "step": 6522 - }, - { - "epoch": 2.897179658005774, - "grad_norm": 0.35536621143471536, - "learning_rate": 2.1401954941729614e-06, - "loss": 0.0276, - "step": 6523 - }, - { - "epoch": 2.897623806351321, - "grad_norm": 0.4803884960432198, - "learning_rate": 2.1386055645155144e-06, - "loss": 0.0334, - "step": 6524 - }, - { - "epoch": 2.898067954696869, - "grad_norm": 0.7351089421454166, - "learning_rate": 2.137016064974256e-06, - "loss": 0.034, - "step": 6525 - }, - { - "epoch": 2.898512103042416, - "grad_norm": 0.35740753737071096, - "learning_rate": 2.135426995788115e-06, - "loss": 0.0255, - "step": 6526 - }, - { - "epoch": 2.8989562513879634, - "grad_norm": 0.40072320482835744, - "learning_rate": 2.133838357195961e-06, - "loss": 0.0369, - "step": 6527 - }, - { - "epoch": 2.899400399733511, - "grad_norm": 0.3671291054027193, - "learning_rate": 2.1322501494365873e-06, - "loss": 0.0248, - "step": 6528 - }, - { - "epoch": 2.8998445480790584, - "grad_norm": 0.46498735962046045, - "learning_rate": 2.1306623727487306e-06, - "loss": 0.0336, - "step": 6529 - }, - { - "epoch": 2.9002886964246057, - "grad_norm": 0.37582439553874314, - "learning_rate": 2.1290750273710625e-06, - "loss": 0.0311, - "step": 6530 - }, - { - "epoch": 2.900732844770153, - "grad_norm": 0.3769841744886795, - "learning_rate": 2.127488113542185e-06, - "loss": 0.0219, - "step": 6531 - }, - { - "epoch": 2.9011769931157008, - "grad_norm": 0.5408699565947783, - "learning_rate": 2.1259016315006388e-06, - "loss": 0.0369, - "step": 6532 - }, - { - "epoch": 2.901621141461248, - "grad_norm": 0.41566157060894093, - "learning_rate": 2.1243155814849003e-06, - "loss": 0.0322, - "step": 6533 - }, - { - "epoch": 2.9020652898067953, - "grad_norm": 0.4200346887421879, - "learning_rate": 2.1227299637333793e-06, - "loss": 0.0265, - "step": 6534 - }, - { - "epoch": 2.902509438152343, - "grad_norm": 0.481827142730724, - "learning_rate": 2.1211447784844223e-06, - "loss": 0.0332, - "step": 6535 - }, - { - "epoch": 2.9029535864978904, - "grad_norm": 0.41218410599797384, - "learning_rate": 2.1195600259763064e-06, - "loss": 0.0344, - "step": 6536 - }, - { - "epoch": 2.9033977348434377, - "grad_norm": 0.5125400394558322, - "learning_rate": 2.1179757064472495e-06, - "loss": 0.0311, - "step": 6537 - }, - { - "epoch": 2.903841883188985, - "grad_norm": 0.5243375032164893, - "learning_rate": 2.1163918201354005e-06, - "loss": 0.0442, - "step": 6538 - }, - { - "epoch": 2.9042860315345327, - "grad_norm": 0.36105640514776777, - "learning_rate": 2.114808367278845e-06, - "loss": 0.0315, - "step": 6539 - }, - { - "epoch": 2.90473017988008, - "grad_norm": 0.37491287513386307, - "learning_rate": 2.113225348115603e-06, - "loss": 0.0273, - "step": 6540 - }, - { - "epoch": 2.9051743282256273, - "grad_norm": 0.3675658951021628, - "learning_rate": 2.11164276288363e-06, - "loss": 0.0249, - "step": 6541 - }, - { - "epoch": 2.905618476571175, - "grad_norm": 0.47441864691687574, - "learning_rate": 2.110060611820813e-06, - "loss": 0.0439, - "step": 6542 - }, - { - "epoch": 2.9060626249167223, - "grad_norm": 0.4331767301461994, - "learning_rate": 2.1084788951649753e-06, - "loss": 0.0308, - "step": 6543 - }, - { - "epoch": 2.9065067732622696, - "grad_norm": 0.32889369683548264, - "learning_rate": 2.106897613153882e-06, - "loss": 0.0269, - "step": 6544 - }, - { - "epoch": 2.906950921607817, - "grad_norm": 0.46027509577610654, - "learning_rate": 2.105316766025221e-06, - "loss": 0.0309, - "step": 6545 - }, - { - "epoch": 2.907395069953364, - "grad_norm": 0.5494880014843939, - "learning_rate": 2.1037363540166224e-06, - "loss": 0.0273, - "step": 6546 - }, - { - "epoch": 2.907839218298912, - "grad_norm": 0.4011778001737706, - "learning_rate": 2.1021563773656493e-06, - "loss": 0.0352, - "step": 6547 - }, - { - "epoch": 2.908283366644459, - "grad_norm": 0.45266951475580575, - "learning_rate": 2.1005768363097977e-06, - "loss": 0.0301, - "step": 6548 - }, - { - "epoch": 2.908727514990007, - "grad_norm": 0.49868301394140047, - "learning_rate": 2.0989977310865e-06, - "loss": 0.0339, - "step": 6549 - }, - { - "epoch": 2.909171663335554, - "grad_norm": 0.6954722852910884, - "learning_rate": 2.0974190619331224e-06, - "loss": 0.0349, - "step": 6550 - }, - { - "epoch": 2.9096158116811015, - "grad_norm": 0.4383145753772345, - "learning_rate": 2.0958408290869662e-06, - "loss": 0.0356, - "step": 6551 - }, - { - "epoch": 2.910059960026649, - "grad_norm": 0.3704293568574071, - "learning_rate": 2.0942630327852687e-06, - "loss": 0.0303, - "step": 6552 - }, - { - "epoch": 2.910504108372196, - "grad_norm": 0.3498231235044318, - "learning_rate": 2.092685673265195e-06, - "loss": 0.0318, - "step": 6553 - }, - { - "epoch": 2.910948256717744, - "grad_norm": 0.36641412131018597, - "learning_rate": 2.0911087507638513e-06, - "loss": 0.0237, - "step": 6554 - }, - { - "epoch": 2.911392405063291, - "grad_norm": 0.4005634976837491, - "learning_rate": 2.0895322655182754e-06, - "loss": 0.0327, - "step": 6555 - }, - { - "epoch": 2.9118365534088384, - "grad_norm": 0.36560498009667564, - "learning_rate": 2.0879562177654404e-06, - "loss": 0.0247, - "step": 6556 - }, - { - "epoch": 2.912280701754386, - "grad_norm": 0.38952731591083495, - "learning_rate": 2.0863806077422534e-06, - "loss": 0.027, - "step": 6557 - }, - { - "epoch": 2.9127248500999334, - "grad_norm": 0.4138708556738211, - "learning_rate": 2.0848054356855557e-06, - "loss": 0.0304, - "step": 6558 - }, - { - "epoch": 2.9131689984454807, - "grad_norm": 0.5073707851794668, - "learning_rate": 2.08323070183212e-06, - "loss": 0.0353, - "step": 6559 - }, - { - "epoch": 2.913613146791028, - "grad_norm": 0.5942930274632409, - "learning_rate": 2.081656406418658e-06, - "loss": 0.0331, - "step": 6560 - }, - { - "epoch": 2.9140572951365757, - "grad_norm": 0.35640079528674945, - "learning_rate": 2.080082549681811e-06, - "loss": 0.0279, - "step": 6561 - }, - { - "epoch": 2.914501443482123, - "grad_norm": 0.3814193495027854, - "learning_rate": 2.0785091318581577e-06, - "loss": 0.0305, - "step": 6562 - }, - { - "epoch": 2.9149455918276703, - "grad_norm": 0.358555755728585, - "learning_rate": 2.076936153184211e-06, - "loss": 0.0239, - "step": 6563 - }, - { - "epoch": 2.915389740173218, - "grad_norm": 0.4250693357842293, - "learning_rate": 2.0753636138964134e-06, - "loss": 0.0286, - "step": 6564 - }, - { - "epoch": 2.9158338885187653, - "grad_norm": 0.6221059510930096, - "learning_rate": 2.0737915142311454e-06, - "loss": 0.0441, - "step": 6565 - }, - { - "epoch": 2.9162780368643126, - "grad_norm": 0.38815256704717155, - "learning_rate": 2.07221985442472e-06, - "loss": 0.0292, - "step": 6566 - }, - { - "epoch": 2.91672218520986, - "grad_norm": 0.420249928229787, - "learning_rate": 2.0706486347133853e-06, - "loss": 0.0289, - "step": 6567 - }, - { - "epoch": 2.9171663335554077, - "grad_norm": 0.39867842058238456, - "learning_rate": 2.0690778553333215e-06, - "loss": 0.0399, - "step": 6568 - }, - { - "epoch": 2.917610481900955, - "grad_norm": 0.35700246538754193, - "learning_rate": 2.0675075165206456e-06, - "loss": 0.0328, - "step": 6569 - }, - { - "epoch": 2.9180546302465022, - "grad_norm": 0.39862639265555505, - "learning_rate": 2.0659376185114024e-06, - "loss": 0.0282, - "step": 6570 - }, - { - "epoch": 2.91849877859205, - "grad_norm": 0.3874386180786859, - "learning_rate": 2.064368161541576e-06, - "loss": 0.0269, - "step": 6571 - }, - { - "epoch": 2.9189429269375973, - "grad_norm": 0.35734528243866814, - "learning_rate": 2.0627991458470826e-06, - "loss": 0.0277, - "step": 6572 - }, - { - "epoch": 2.9193870752831446, - "grad_norm": 0.43326587014764417, - "learning_rate": 2.061230571663772e-06, - "loss": 0.0287, - "step": 6573 - }, - { - "epoch": 2.919831223628692, - "grad_norm": 0.442123671529554, - "learning_rate": 2.0596624392274277e-06, - "loss": 0.0348, - "step": 6574 - }, - { - "epoch": 2.920275371974239, - "grad_norm": 0.41360740653208006, - "learning_rate": 2.058094748773768e-06, - "loss": 0.0245, - "step": 6575 - }, - { - "epoch": 2.920719520319787, - "grad_norm": 0.4521622442444811, - "learning_rate": 2.05652750053844e-06, - "loss": 0.0272, - "step": 6576 - }, - { - "epoch": 2.921163668665334, - "grad_norm": 0.6652238384193798, - "learning_rate": 2.0549606947570295e-06, - "loss": 0.0403, - "step": 6577 - }, - { - "epoch": 2.921607817010882, - "grad_norm": 0.33561551996607286, - "learning_rate": 2.053394331665054e-06, - "loss": 0.023, - "step": 6578 - }, - { - "epoch": 2.922051965356429, - "grad_norm": 0.4533033136781839, - "learning_rate": 2.051828411497964e-06, - "loss": 0.0418, - "step": 6579 - }, - { - "epoch": 2.9224961137019765, - "grad_norm": 0.2933903835866281, - "learning_rate": 2.0502629344911475e-06, - "loss": 0.0204, - "step": 6580 - }, - { - "epoch": 2.9229402620475238, - "grad_norm": 0.4807368532427772, - "learning_rate": 2.0486979008799164e-06, - "loss": 0.0388, - "step": 6581 - }, - { - "epoch": 2.923384410393071, - "grad_norm": 0.39312159469638, - "learning_rate": 2.047133310899525e-06, - "loss": 0.0352, - "step": 6582 - }, - { - "epoch": 2.923828558738619, - "grad_norm": 0.49386287194544953, - "learning_rate": 2.045569164785157e-06, - "loss": 0.0318, - "step": 6583 - }, - { - "epoch": 2.924272707084166, - "grad_norm": 0.3909685186859985, - "learning_rate": 2.044005462771931e-06, - "loss": 0.0244, - "step": 6584 - }, - { - "epoch": 2.9247168554297134, - "grad_norm": 0.4648834902668276, - "learning_rate": 2.0424422050948976e-06, - "loss": 0.0387, - "step": 6585 - }, - { - "epoch": 2.925161003775261, - "grad_norm": 0.36487456799403395, - "learning_rate": 2.0408793919890424e-06, - "loss": 0.0288, - "step": 6586 - }, - { - "epoch": 2.9256051521208084, - "grad_norm": 0.5349577863858896, - "learning_rate": 2.0393170236892795e-06, - "loss": 0.0426, - "step": 6587 - }, - { - "epoch": 2.9260493004663557, - "grad_norm": 0.35432394100233194, - "learning_rate": 2.0377551004304613e-06, - "loss": 0.0304, - "step": 6588 - }, - { - "epoch": 2.926493448811903, - "grad_norm": 0.5133489099915194, - "learning_rate": 2.036193622447371e-06, - "loss": 0.0436, - "step": 6589 - }, - { - "epoch": 2.9269375971574507, - "grad_norm": 0.40489974505235427, - "learning_rate": 2.034632589974726e-06, - "loss": 0.0319, - "step": 6590 - }, - { - "epoch": 2.927381745502998, - "grad_norm": 0.3994253955209103, - "learning_rate": 2.033072003247175e-06, - "loss": 0.0308, - "step": 6591 - }, - { - "epoch": 2.9278258938485453, - "grad_norm": 0.34532953088972845, - "learning_rate": 2.0315118624993035e-06, - "loss": 0.0261, - "step": 6592 - }, - { - "epoch": 2.928270042194093, - "grad_norm": 0.35520607631530465, - "learning_rate": 2.0299521679656225e-06, - "loss": 0.0228, - "step": 6593 - }, - { - "epoch": 2.9287141905396403, - "grad_norm": 0.32280141712366317, - "learning_rate": 2.0283929198805837e-06, - "loss": 0.0189, - "step": 6594 - }, - { - "epoch": 2.9291583388851876, - "grad_norm": 0.43110913730957606, - "learning_rate": 2.0268341184785674e-06, - "loss": 0.0378, - "step": 6595 - }, - { - "epoch": 2.929602487230735, - "grad_norm": 0.354328605582052, - "learning_rate": 2.025275763993888e-06, - "loss": 0.0313, - "step": 6596 - }, - { - "epoch": 2.9300466355762826, - "grad_norm": 0.3880427101102111, - "learning_rate": 2.023717856660795e-06, - "loss": 0.0283, - "step": 6597 - }, - { - "epoch": 2.93049078392183, - "grad_norm": 0.37092577723252107, - "learning_rate": 2.0221603967134645e-06, - "loss": 0.0309, - "step": 6598 - }, - { - "epoch": 2.9309349322673772, - "grad_norm": 0.3853764249821867, - "learning_rate": 2.0206033843860113e-06, - "loss": 0.036, - "step": 6599 - }, - { - "epoch": 2.931379080612925, - "grad_norm": 0.32195066732125943, - "learning_rate": 2.0190468199124804e-06, - "loss": 0.0266, - "step": 6600 - }, - { - "epoch": 2.9318232289584722, - "grad_norm": 0.377934707171494, - "learning_rate": 2.01749070352685e-06, - "loss": 0.0323, - "step": 6601 - }, - { - "epoch": 2.9322673773040195, - "grad_norm": 0.4090601636781386, - "learning_rate": 2.0159350354630307e-06, - "loss": 0.0275, - "step": 6602 - }, - { - "epoch": 2.932711525649567, - "grad_norm": 0.4773326358669816, - "learning_rate": 2.0143798159548677e-06, - "loss": 0.0331, - "step": 6603 - }, - { - "epoch": 2.933155673995114, - "grad_norm": 0.4210797373291252, - "learning_rate": 2.0128250452361334e-06, - "loss": 0.0362, - "step": 6604 - }, - { - "epoch": 2.933599822340662, - "grad_norm": 0.4248736186586538, - "learning_rate": 2.0112707235405386e-06, - "loss": 0.0309, - "step": 6605 - }, - { - "epoch": 2.934043970686209, - "grad_norm": 0.3339641218919188, - "learning_rate": 2.0097168511017234e-06, - "loss": 0.0187, - "step": 6606 - }, - { - "epoch": 2.934488119031757, - "grad_norm": 0.5167791996296286, - "learning_rate": 2.0081634281532613e-06, - "loss": 0.0315, - "step": 6607 - }, - { - "epoch": 2.934932267377304, - "grad_norm": 0.4991803228092949, - "learning_rate": 2.0066104549286602e-06, - "loss": 0.0436, - "step": 6608 - }, - { - "epoch": 2.9353764157228515, - "grad_norm": 0.3779855137295941, - "learning_rate": 2.005057931661355e-06, - "loss": 0.0273, - "step": 6609 - }, - { - "epoch": 2.9358205640683988, - "grad_norm": 0.5036801733855913, - "learning_rate": 2.0035058585847173e-06, - "loss": 0.0328, - "step": 6610 - }, - { - "epoch": 2.936264712413946, - "grad_norm": 0.3544894089098337, - "learning_rate": 2.001954235932051e-06, - "loss": 0.0235, - "step": 6611 - }, - { - "epoch": 2.9367088607594938, - "grad_norm": 0.42819328292612396, - "learning_rate": 2.0004030639365907e-06, - "loss": 0.0308, - "step": 6612 - }, - { - "epoch": 2.937153009105041, - "grad_norm": 0.39806520440496346, - "learning_rate": 1.9988523428315045e-06, - "loss": 0.0277, - "step": 6613 - }, - { - "epoch": 2.9375971574505884, - "grad_norm": 0.41174193415314053, - "learning_rate": 1.997302072849893e-06, - "loss": 0.0282, - "step": 6614 - }, - { - "epoch": 2.938041305796136, - "grad_norm": 0.4414650832704903, - "learning_rate": 1.995752254224786e-06, - "loss": 0.0303, - "step": 6615 - }, - { - "epoch": 2.9384854541416834, - "grad_norm": 0.4432623492720395, - "learning_rate": 1.994202887189148e-06, - "loss": 0.0253, - "step": 6616 - }, - { - "epoch": 2.9389296024872307, - "grad_norm": 0.4800911306612613, - "learning_rate": 1.9926539719758747e-06, - "loss": 0.0388, - "step": 6617 - }, - { - "epoch": 2.939373750832778, - "grad_norm": 0.4059304636548806, - "learning_rate": 1.9911055088177967e-06, - "loss": 0.0276, - "step": 6618 - }, - { - "epoch": 2.9398178991783257, - "grad_norm": 0.5632995645750524, - "learning_rate": 1.9895574979476717e-06, - "loss": 0.0364, - "step": 6619 - }, - { - "epoch": 2.940262047523873, - "grad_norm": 0.3730668889292687, - "learning_rate": 1.9880099395981954e-06, - "loss": 0.0266, - "step": 6620 - }, - { - "epoch": 2.9407061958694203, - "grad_norm": 0.4942132883415099, - "learning_rate": 1.986462834001989e-06, - "loss": 0.0424, - "step": 6621 - }, - { - "epoch": 2.941150344214968, - "grad_norm": 0.35051082450586374, - "learning_rate": 1.984916181391609e-06, - "loss": 0.029, - "step": 6622 - }, - { - "epoch": 2.9415944925605153, - "grad_norm": 0.49757968739541286, - "learning_rate": 1.983369981999544e-06, - "loss": 0.0391, - "step": 6623 - }, - { - "epoch": 2.9420386409060626, - "grad_norm": 0.3866532839130346, - "learning_rate": 1.9818242360582145e-06, - "loss": 0.0196, - "step": 6624 - }, - { - "epoch": 2.94248278925161, - "grad_norm": 0.3312297948056805, - "learning_rate": 1.980278943799974e-06, - "loss": 0.024, - "step": 6625 - }, - { - "epoch": 2.942926937597157, - "grad_norm": 0.369854753089198, - "learning_rate": 1.978734105457103e-06, - "loss": 0.0261, - "step": 6626 - }, - { - "epoch": 2.943371085942705, - "grad_norm": 0.40104725649008754, - "learning_rate": 1.9771897212618172e-06, - "loss": 0.0338, - "step": 6627 - }, - { - "epoch": 2.943815234288252, - "grad_norm": 0.34952669002936676, - "learning_rate": 1.9756457914462677e-06, - "loss": 0.0245, - "step": 6628 - }, - { - "epoch": 2.9442593826338, - "grad_norm": 0.41129129029247236, - "learning_rate": 1.9741023162425265e-06, - "loss": 0.0313, - "step": 6629 - }, - { - "epoch": 2.9447035309793472, - "grad_norm": 0.5211003073625236, - "learning_rate": 1.9725592958826102e-06, - "loss": 0.0421, - "step": 6630 - }, - { - "epoch": 2.9451476793248945, - "grad_norm": 0.3984417865901208, - "learning_rate": 1.9710167305984607e-06, - "loss": 0.0375, - "step": 6631 - }, - { - "epoch": 2.945591827670442, - "grad_norm": 0.30941970478869846, - "learning_rate": 1.9694746206219477e-06, - "loss": 0.0216, - "step": 6632 - }, - { - "epoch": 2.946035976015989, - "grad_norm": 0.4229304742479391, - "learning_rate": 1.9679329661848795e-06, - "loss": 0.034, - "step": 6633 - }, - { - "epoch": 2.946480124361537, - "grad_norm": 0.476591153079035, - "learning_rate": 1.966391767518992e-06, - "loss": 0.0516, - "step": 6634 - }, - { - "epoch": 2.946924272707084, - "grad_norm": 0.6821582454050819, - "learning_rate": 1.9648510248559546e-06, - "loss": 0.034, - "step": 6635 - }, - { - "epoch": 2.9473684210526314, - "grad_norm": 0.43772389004233875, - "learning_rate": 1.9633107384273668e-06, - "loss": 0.0296, - "step": 6636 - }, - { - "epoch": 2.947812569398179, - "grad_norm": 0.49311057599048846, - "learning_rate": 1.9617709084647584e-06, - "loss": 0.0471, - "step": 6637 - }, - { - "epoch": 2.9482567177437264, - "grad_norm": 0.48536816799978794, - "learning_rate": 1.9602315351995928e-06, - "loss": 0.0393, - "step": 6638 - }, - { - "epoch": 2.9487008660892737, - "grad_norm": 0.5141630455505187, - "learning_rate": 1.958692618863264e-06, - "loss": 0.0342, - "step": 6639 - }, - { - "epoch": 2.949145014434821, - "grad_norm": 0.4586328384368867, - "learning_rate": 1.9571541596870974e-06, - "loss": 0.0391, - "step": 6640 - }, - { - "epoch": 2.9495891627803688, - "grad_norm": 0.4178954318106239, - "learning_rate": 1.9556161579023493e-06, - "loss": 0.0315, - "step": 6641 - }, - { - "epoch": 2.950033311125916, - "grad_norm": 0.4366977616184424, - "learning_rate": 1.9540786137402097e-06, - "loss": 0.0325, - "step": 6642 - }, - { - "epoch": 2.9504774594714633, - "grad_norm": 0.35044020018653177, - "learning_rate": 1.952541527431794e-06, - "loss": 0.0238, - "step": 6643 - }, - { - "epoch": 2.950921607817011, - "grad_norm": 0.36474391941516165, - "learning_rate": 1.951004899208154e-06, - "loss": 0.0284, - "step": 6644 - }, - { - "epoch": 2.9513657561625584, - "grad_norm": 0.3808970857069157, - "learning_rate": 1.9494687293002724e-06, - "loss": 0.0262, - "step": 6645 - }, - { - "epoch": 2.9518099045081057, - "grad_norm": 0.49087581676544356, - "learning_rate": 1.947933017939057e-06, - "loss": 0.0334, - "step": 6646 - }, - { - "epoch": 2.952254052853653, - "grad_norm": 0.4067523864649502, - "learning_rate": 1.946397765355356e-06, - "loss": 0.0224, - "step": 6647 - }, - { - "epoch": 2.9526982011992007, - "grad_norm": 0.5385094196478863, - "learning_rate": 1.9448629717799444e-06, - "loss": 0.0274, - "step": 6648 - }, - { - "epoch": 2.953142349544748, - "grad_norm": 0.4534794423284923, - "learning_rate": 1.9433286374435243e-06, - "loss": 0.0307, - "step": 6649 - }, - { - "epoch": 2.9535864978902953, - "grad_norm": 0.42491711765749635, - "learning_rate": 1.9417947625767338e-06, - "loss": 0.0282, - "step": 6650 - }, - { - "epoch": 2.954030646235843, - "grad_norm": 0.43191372654900584, - "learning_rate": 1.9402613474101418e-06, - "loss": 0.0284, - "step": 6651 - }, - { - "epoch": 2.9544747945813903, - "grad_norm": 0.4659070261041793, - "learning_rate": 1.9387283921742417e-06, - "loss": 0.0348, - "step": 6652 - }, - { - "epoch": 2.9549189429269376, - "grad_norm": 0.3521658502328422, - "learning_rate": 1.9371958970994697e-06, - "loss": 0.0264, - "step": 6653 - }, - { - "epoch": 2.955363091272485, - "grad_norm": 0.3744257822368085, - "learning_rate": 1.935663862416181e-06, - "loss": 0.0233, - "step": 6654 - }, - { - "epoch": 2.955807239618032, - "grad_norm": 0.3533483955176666, - "learning_rate": 1.934132288354667e-06, - "loss": 0.0223, - "step": 6655 - }, - { - "epoch": 2.95625138796358, - "grad_norm": 0.48990756810689967, - "learning_rate": 1.9326011751451523e-06, - "loss": 0.0296, - "step": 6656 - }, - { - "epoch": 2.956695536309127, - "grad_norm": 0.4155057113271156, - "learning_rate": 1.9310705230177834e-06, - "loss": 0.032, - "step": 6657 - }, - { - "epoch": 2.957139684654675, - "grad_norm": 0.46467269039209746, - "learning_rate": 1.9295403322026485e-06, - "loss": 0.0302, - "step": 6658 - }, - { - "epoch": 2.957583833000222, - "grad_norm": 0.436904391432258, - "learning_rate": 1.928010602929762e-06, - "loss": 0.0249, - "step": 6659 - }, - { - "epoch": 2.9580279813457695, - "grad_norm": 0.4078298528030376, - "learning_rate": 1.9264813354290635e-06, - "loss": 0.0339, - "step": 6660 - }, - { - "epoch": 2.958472129691317, - "grad_norm": 0.43921352541689956, - "learning_rate": 1.92495252993043e-06, - "loss": 0.0293, - "step": 6661 - }, - { - "epoch": 2.958916278036864, - "grad_norm": 0.47221505355593735, - "learning_rate": 1.9234241866636693e-06, - "loss": 0.0267, - "step": 6662 - }, - { - "epoch": 2.959360426382412, - "grad_norm": 0.3695229541487737, - "learning_rate": 1.9218963058585117e-06, - "loss": 0.0273, - "step": 6663 - }, - { - "epoch": 2.959804574727959, - "grad_norm": 0.3129170332680405, - "learning_rate": 1.9203688877446285e-06, - "loss": 0.0177, - "step": 6664 - }, - { - "epoch": 2.9602487230735064, - "grad_norm": 0.3892558088874507, - "learning_rate": 1.9188419325516177e-06, - "loss": 0.0381, - "step": 6665 - }, - { - "epoch": 2.960692871419054, - "grad_norm": 0.3716168146474431, - "learning_rate": 1.9173154405090024e-06, - "loss": 0.0222, - "step": 6666 - }, - { - "epoch": 2.9611370197646014, - "grad_norm": 0.4147701146563668, - "learning_rate": 1.9157894118462416e-06, - "loss": 0.0307, - "step": 6667 - }, - { - "epoch": 2.9615811681101487, - "grad_norm": 0.39499526563472726, - "learning_rate": 1.9142638467927254e-06, - "loss": 0.0365, - "step": 6668 - }, - { - "epoch": 2.962025316455696, - "grad_norm": 0.3874596806249992, - "learning_rate": 1.9127387455777673e-06, - "loss": 0.0321, - "step": 6669 - }, - { - "epoch": 2.9624694648012437, - "grad_norm": 0.42027304319891273, - "learning_rate": 1.911214108430623e-06, - "loss": 0.033, - "step": 6670 - }, - { - "epoch": 2.962913613146791, - "grad_norm": 0.44739921526779486, - "learning_rate": 1.9096899355804655e-06, - "loss": 0.0275, - "step": 6671 - }, - { - "epoch": 2.9633577614923383, - "grad_norm": 0.37802801428083804, - "learning_rate": 1.9081662272564055e-06, - "loss": 0.0319, - "step": 6672 - }, - { - "epoch": 2.963801909837886, - "grad_norm": 0.38442939573511425, - "learning_rate": 1.9066429836874844e-06, - "loss": 0.0295, - "step": 6673 - }, - { - "epoch": 2.9642460581834333, - "grad_norm": 0.6866561926311151, - "learning_rate": 1.9051202051026669e-06, - "loss": 0.0381, - "step": 6674 - }, - { - "epoch": 2.9646902065289806, - "grad_norm": 0.39471908589991933, - "learning_rate": 1.9035978917308568e-06, - "loss": 0.0282, - "step": 6675 - }, - { - "epoch": 2.965134354874528, - "grad_norm": 0.3683316118105389, - "learning_rate": 1.902076043800884e-06, - "loss": 0.0237, - "step": 6676 - }, - { - "epoch": 2.9655785032200757, - "grad_norm": 0.37626637800602947, - "learning_rate": 1.9005546615415044e-06, - "loss": 0.0241, - "step": 6677 - }, - { - "epoch": 2.966022651565623, - "grad_norm": 0.3496930484588171, - "learning_rate": 1.8990337451814095e-06, - "loss": 0.0286, - "step": 6678 - }, - { - "epoch": 2.9664667999111702, - "grad_norm": 0.41344810193656406, - "learning_rate": 1.897513294949221e-06, - "loss": 0.0273, - "step": 6679 - }, - { - "epoch": 2.966910948256718, - "grad_norm": 0.378120591478157, - "learning_rate": 1.895993311073483e-06, - "loss": 0.0294, - "step": 6680 - }, - { - "epoch": 2.9673550966022653, - "grad_norm": 0.33697535541119333, - "learning_rate": 1.8944737937826813e-06, - "loss": 0.0273, - "step": 6681 - }, - { - "epoch": 2.9677992449478126, - "grad_norm": 0.3663439761387446, - "learning_rate": 1.8929547433052202e-06, - "loss": 0.032, - "step": 6682 - }, - { - "epoch": 2.96824339329336, - "grad_norm": 0.5047952130541636, - "learning_rate": 1.8914361598694408e-06, - "loss": 0.0339, - "step": 6683 - }, - { - "epoch": 2.968687541638907, - "grad_norm": 0.4039062494421022, - "learning_rate": 1.8899180437036119e-06, - "loss": 0.0375, - "step": 6684 - }, - { - "epoch": 2.969131689984455, - "grad_norm": 0.3025200589569862, - "learning_rate": 1.8884003950359337e-06, - "loss": 0.0262, - "step": 6685 - }, - { - "epoch": 2.969575838330002, - "grad_norm": 0.44229043636076315, - "learning_rate": 1.8868832140945297e-06, - "loss": 0.028, - "step": 6686 - }, - { - "epoch": 2.97001998667555, - "grad_norm": 0.4708612100607534, - "learning_rate": 1.8853665011074645e-06, - "loss": 0.027, - "step": 6687 - }, - { - "epoch": 2.970464135021097, - "grad_norm": 0.3722598107484293, - "learning_rate": 1.8838502563027212e-06, - "loss": 0.0253, - "step": 6688 - }, - { - "epoch": 2.9709082833666445, - "grad_norm": 0.6036467003416066, - "learning_rate": 1.8823344799082177e-06, - "loss": 0.0284, - "step": 6689 - }, - { - "epoch": 2.9713524317121918, - "grad_norm": 0.34918427015083503, - "learning_rate": 1.8808191721518043e-06, - "loss": 0.0294, - "step": 6690 - }, - { - "epoch": 2.971796580057739, - "grad_norm": 0.42849383719138034, - "learning_rate": 1.879304333261251e-06, - "loss": 0.0278, - "step": 6691 - }, - { - "epoch": 2.972240728403287, - "grad_norm": 0.48072735895909585, - "learning_rate": 1.87778996346427e-06, - "loss": 0.0417, - "step": 6692 - }, - { - "epoch": 2.972684876748834, - "grad_norm": 0.29126197319559194, - "learning_rate": 1.8762760629884958e-06, - "loss": 0.0243, - "step": 6693 - }, - { - "epoch": 2.9731290250943814, - "grad_norm": 0.3610203458255818, - "learning_rate": 1.8747626320614904e-06, - "loss": 0.0269, - "step": 6694 - }, - { - "epoch": 2.973573173439929, - "grad_norm": 0.4145287000272817, - "learning_rate": 1.87324967091075e-06, - "loss": 0.0461, - "step": 6695 - }, - { - "epoch": 2.9740173217854764, - "grad_norm": 0.3579965634087584, - "learning_rate": 1.8717371797637002e-06, - "loss": 0.0296, - "step": 6696 - }, - { - "epoch": 2.9744614701310237, - "grad_norm": 0.41883841344207373, - "learning_rate": 1.8702251588476889e-06, - "loss": 0.0314, - "step": 6697 - }, - { - "epoch": 2.974905618476571, - "grad_norm": 0.5313573343030277, - "learning_rate": 1.868713608390005e-06, - "loss": 0.0408, - "step": 6698 - }, - { - "epoch": 2.9753497668221187, - "grad_norm": 0.39733118203820383, - "learning_rate": 1.8672025286178546e-06, - "loss": 0.0357, - "step": 6699 - }, - { - "epoch": 2.975793915167666, - "grad_norm": 0.4305571327880009, - "learning_rate": 1.8656919197583816e-06, - "loss": 0.0287, - "step": 6700 - }, - { - "epoch": 2.9762380635132133, - "grad_norm": 0.38007221498778115, - "learning_rate": 1.8641817820386576e-06, - "loss": 0.0256, - "step": 6701 - }, - { - "epoch": 2.976682211858761, - "grad_norm": 0.35438367987841246, - "learning_rate": 1.862672115685678e-06, - "loss": 0.0318, - "step": 6702 - }, - { - "epoch": 2.9771263602043083, - "grad_norm": 0.3521742278005089, - "learning_rate": 1.861162920926372e-06, - "loss": 0.0246, - "step": 6703 - }, - { - "epoch": 2.9775705085498556, - "grad_norm": 0.4587000972835763, - "learning_rate": 1.8596541979876016e-06, - "loss": 0.0403, - "step": 6704 - }, - { - "epoch": 2.978014656895403, - "grad_norm": 0.4391686978547565, - "learning_rate": 1.8581459470961488e-06, - "loss": 0.033, - "step": 6705 - }, - { - "epoch": 2.9784588052409506, - "grad_norm": 0.8681641943800276, - "learning_rate": 1.856638168478731e-06, - "loss": 0.0271, - "step": 6706 - }, - { - "epoch": 2.978902953586498, - "grad_norm": 0.3412875403024399, - "learning_rate": 1.8551308623619945e-06, - "loss": 0.0278, - "step": 6707 - }, - { - "epoch": 2.979347101932045, - "grad_norm": 0.39261569640506333, - "learning_rate": 1.8536240289725078e-06, - "loss": 0.035, - "step": 6708 - }, - { - "epoch": 2.979791250277593, - "grad_norm": 0.3999387719956585, - "learning_rate": 1.8521176685367804e-06, - "loss": 0.0287, - "step": 6709 - }, - { - "epoch": 2.9802353986231402, - "grad_norm": 0.3873753971954603, - "learning_rate": 1.850611781281239e-06, - "loss": 0.0322, - "step": 6710 - }, - { - "epoch": 2.9806795469686875, - "grad_norm": 0.4380866274093617, - "learning_rate": 1.8491063674322457e-06, - "loss": 0.033, - "step": 6711 - }, - { - "epoch": 2.981123695314235, - "grad_norm": 0.4171386719486352, - "learning_rate": 1.8476014272160896e-06, - "loss": 0.0354, - "step": 6712 - }, - { - "epoch": 2.981567843659782, - "grad_norm": 0.37606765896917305, - "learning_rate": 1.8460969608589913e-06, - "loss": 0.0309, - "step": 6713 - }, - { - "epoch": 2.98201199200533, - "grad_norm": 0.33316278037626945, - "learning_rate": 1.8445929685870912e-06, - "loss": 0.0281, - "step": 6714 - }, - { - "epoch": 2.982456140350877, - "grad_norm": 0.4295124088150545, - "learning_rate": 1.8430894506264724e-06, - "loss": 0.0281, - "step": 6715 - }, - { - "epoch": 2.982900288696425, - "grad_norm": 0.6219155251106016, - "learning_rate": 1.8415864072031335e-06, - "loss": 0.0412, - "step": 6716 - }, - { - "epoch": 2.983344437041972, - "grad_norm": 1.0649791746325505, - "learning_rate": 1.8400838385430104e-06, - "loss": 0.0296, - "step": 6717 - }, - { - "epoch": 2.9837885853875195, - "grad_norm": 0.4321630566554828, - "learning_rate": 1.838581744871965e-06, - "loss": 0.0349, - "step": 6718 - }, - { - "epoch": 2.9842327337330667, - "grad_norm": 0.3860040470778094, - "learning_rate": 1.8370801264157857e-06, - "loss": 0.029, - "step": 6719 - }, - { - "epoch": 2.984676882078614, - "grad_norm": 0.38156983645011616, - "learning_rate": 1.8355789834001898e-06, - "loss": 0.0284, - "step": 6720 - }, - { - "epoch": 2.9851210304241618, - "grad_norm": 0.5127281233435997, - "learning_rate": 1.8340783160508297e-06, - "loss": 0.0355, - "step": 6721 - }, - { - "epoch": 2.985565178769709, - "grad_norm": 0.3767235771133534, - "learning_rate": 1.8325781245932772e-06, - "loss": 0.0297, - "step": 6722 - }, - { - "epoch": 2.9860093271152564, - "grad_norm": 0.4559507813085918, - "learning_rate": 1.8310784092530376e-06, - "loss": 0.0362, - "step": 6723 - }, - { - "epoch": 2.986453475460804, - "grad_norm": 0.36189720669347597, - "learning_rate": 1.8295791702555455e-06, - "loss": 0.0294, - "step": 6724 - }, - { - "epoch": 2.9868976238063514, - "grad_norm": 0.3457736093542848, - "learning_rate": 1.8280804078261577e-06, - "loss": 0.0305, - "step": 6725 - }, - { - "epoch": 2.9873417721518987, - "grad_norm": 0.5911976787943448, - "learning_rate": 1.826582122190167e-06, - "loss": 0.0368, - "step": 6726 - }, - { - "epoch": 2.987785920497446, - "grad_norm": 0.4315279094342813, - "learning_rate": 1.8250843135727898e-06, - "loss": 0.0365, - "step": 6727 - }, - { - "epoch": 2.9882300688429937, - "grad_norm": 0.3782115314248128, - "learning_rate": 1.8235869821991726e-06, - "loss": 0.0297, - "step": 6728 - }, - { - "epoch": 2.988674217188541, - "grad_norm": 0.42412903793940915, - "learning_rate": 1.8220901282943915e-06, - "loss": 0.0348, - "step": 6729 - }, - { - "epoch": 2.9891183655340883, - "grad_norm": 0.42286509801094985, - "learning_rate": 1.820593752083446e-06, - "loss": 0.0356, - "step": 6730 - }, - { - "epoch": 2.989562513879636, - "grad_norm": 0.48807857838384455, - "learning_rate": 1.8190978537912662e-06, - "loss": 0.0441, - "step": 6731 - }, - { - "epoch": 2.9900066622251833, - "grad_norm": 0.4660013011382873, - "learning_rate": 1.8176024336427167e-06, - "loss": 0.0356, - "step": 6732 - }, - { - "epoch": 2.9904508105707306, - "grad_norm": 0.3894973608346122, - "learning_rate": 1.8161074918625792e-06, - "loss": 0.0234, - "step": 6733 - }, - { - "epoch": 2.990894958916278, - "grad_norm": 0.43148032392899044, - "learning_rate": 1.8146130286755704e-06, - "loss": 0.0316, - "step": 6734 - }, - { - "epoch": 2.9913391072618256, - "grad_norm": 0.4541609973903636, - "learning_rate": 1.8131190443063357e-06, - "loss": 0.0228, - "step": 6735 - }, - { - "epoch": 2.991783255607373, - "grad_norm": 0.37931640734503375, - "learning_rate": 1.8116255389794418e-06, - "loss": 0.03, - "step": 6736 - }, - { - "epoch": 2.99222740395292, - "grad_norm": 0.38246202047737776, - "learning_rate": 1.8101325129193897e-06, - "loss": 0.0432, - "step": 6737 - }, - { - "epoch": 2.992671552298468, - "grad_norm": 0.42501044694023266, - "learning_rate": 1.8086399663506099e-06, - "loss": 0.0378, - "step": 6738 - }, - { - "epoch": 2.993115700644015, - "grad_norm": 0.4349574825819462, - "learning_rate": 1.8071478994974534e-06, - "loss": 0.022, - "step": 6739 - }, - { - "epoch": 2.9935598489895625, - "grad_norm": 0.36182919079127596, - "learning_rate": 1.8056563125842046e-06, - "loss": 0.0281, - "step": 6740 - }, - { - "epoch": 2.99400399733511, - "grad_norm": 0.3297563936891292, - "learning_rate": 1.8041652058350768e-06, - "loss": 0.0258, - "step": 6741 - }, - { - "epoch": 2.994448145680657, - "grad_norm": 0.7003299863726014, - "learning_rate": 1.802674579474204e-06, - "loss": 0.0351, - "step": 6742 - }, - { - "epoch": 2.994892294026205, - "grad_norm": 0.6139326715543403, - "learning_rate": 1.801184433725655e-06, - "loss": 0.0379, - "step": 6743 - }, - { - "epoch": 2.995336442371752, - "grad_norm": 0.5062057494487118, - "learning_rate": 1.7996947688134241e-06, - "loss": 0.0315, - "step": 6744 - }, - { - "epoch": 2.9957805907173, - "grad_norm": 0.7604714501213958, - "learning_rate": 1.7982055849614327e-06, - "loss": 0.0435, - "step": 6745 - }, - { - "epoch": 2.996224739062847, - "grad_norm": 0.4500952174598657, - "learning_rate": 1.7967168823935333e-06, - "loss": 0.0364, - "step": 6746 - }, - { - "epoch": 2.9966688874083944, - "grad_norm": 0.6302920187503901, - "learning_rate": 1.7952286613334986e-06, - "loss": 0.0319, - "step": 6747 - }, - { - "epoch": 2.9971130357539417, - "grad_norm": 0.36539112186296896, - "learning_rate": 1.793740922005034e-06, - "loss": 0.0308, - "step": 6748 - }, - { - "epoch": 2.997557184099489, - "grad_norm": 0.43381962390876766, - "learning_rate": 1.7922536646317767e-06, - "loss": 0.0377, - "step": 6749 - }, - { - "epoch": 2.9980013324450367, - "grad_norm": 0.33856021759003035, - "learning_rate": 1.7907668894372826e-06, - "loss": 0.0239, - "step": 6750 - }, - { - "epoch": 2.998445480790584, - "grad_norm": 0.37584653604138824, - "learning_rate": 1.78928059664504e-06, - "loss": 0.024, - "step": 6751 - }, - { - "epoch": 2.9988896291361313, - "grad_norm": 0.361912705997716, - "learning_rate": 1.7877947864784662e-06, - "loss": 0.0261, - "step": 6752 - }, - { - "epoch": 2.999333777481679, - "grad_norm": 0.35751593403707377, - "learning_rate": 1.7863094591609003e-06, - "loss": 0.0254, - "step": 6753 - }, - { - "epoch": 2.9997779258272264, - "grad_norm": 0.4724255587714232, - "learning_rate": 1.7848246149156134e-06, - "loss": 0.0421, - "step": 6754 - }, - { - "epoch": 2.9997779258272264, - "eval_loss": 0.03672339767217636, - "eval_runtime": 403.6019, - "eval_samples_per_second": 37.577, - "eval_steps_per_second": 1.174, - "step": 6754 - }, - { - "epoch": 3.0002220741727736, - "grad_norm": 0.3269520644923097, - "learning_rate": 1.783340253965803e-06, - "loss": 0.0215, - "step": 6755 - }, - { - "epoch": 3.000666222518321, - "grad_norm": 0.3360624322653449, - "learning_rate": 1.7818563765345942e-06, - "loss": 0.0284, - "step": 6756 - }, - { - "epoch": 3.0011103708638687, - "grad_norm": 0.3740872446254905, - "learning_rate": 1.7803729828450405e-06, - "loss": 0.0207, - "step": 6757 - }, - { - "epoch": 3.001554519209416, - "grad_norm": 0.29900746203751005, - "learning_rate": 1.7788900731201174e-06, - "loss": 0.0206, - "step": 6758 - }, - { - "epoch": 3.0019986675549633, - "grad_norm": 0.4119590022541271, - "learning_rate": 1.7774076475827335e-06, - "loss": 0.0239, - "step": 6759 - }, - { - "epoch": 3.002442815900511, - "grad_norm": 0.4073542383308494, - "learning_rate": 1.7759257064557229e-06, - "loss": 0.022, - "step": 6760 - }, - { - "epoch": 3.0028869642460583, - "grad_norm": 0.6159273406264103, - "learning_rate": 1.7744442499618453e-06, - "loss": 0.032, - "step": 6761 - }, - { - "epoch": 3.0033311125916056, - "grad_norm": 0.8514717794851285, - "learning_rate": 1.77296327832379e-06, - "loss": 0.0421, - "step": 6762 - }, - { - "epoch": 3.003775260937153, - "grad_norm": 0.3400576483770952, - "learning_rate": 1.7714827917641737e-06, - "loss": 0.0209, - "step": 6763 - }, - { - "epoch": 3.0042194092827006, - "grad_norm": 0.38696151832577724, - "learning_rate": 1.7700027905055344e-06, - "loss": 0.0232, - "step": 6764 - }, - { - "epoch": 3.004663557628248, - "grad_norm": 0.40863063034907293, - "learning_rate": 1.7685232747703424e-06, - "loss": 0.0246, - "step": 6765 - }, - { - "epoch": 3.005107705973795, - "grad_norm": 0.39214643564722806, - "learning_rate": 1.7670442447809989e-06, - "loss": 0.0283, - "step": 6766 - }, - { - "epoch": 3.0055518543193425, - "grad_norm": 0.5937092447872463, - "learning_rate": 1.7655657007598216e-06, - "loss": 0.0269, - "step": 6767 - }, - { - "epoch": 3.00599600266489, - "grad_norm": 0.43120240703534374, - "learning_rate": 1.7640876429290633e-06, - "loss": 0.0238, - "step": 6768 - }, - { - "epoch": 3.0064401510104375, - "grad_norm": 0.405113495539343, - "learning_rate": 1.7626100715109018e-06, - "loss": 0.0206, - "step": 6769 - }, - { - "epoch": 3.006884299355985, - "grad_norm": 0.35078016785761906, - "learning_rate": 1.761132986727439e-06, - "loss": 0.0207, - "step": 6770 - }, - { - "epoch": 3.0073284477015325, - "grad_norm": 0.36832682307040443, - "learning_rate": 1.7596563888007073e-06, - "loss": 0.0246, - "step": 6771 - }, - { - "epoch": 3.00777259604708, - "grad_norm": 0.38464153090067194, - "learning_rate": 1.7581802779526642e-06, - "loss": 0.0187, - "step": 6772 - }, - { - "epoch": 3.008216744392627, - "grad_norm": 0.44328209442836686, - "learning_rate": 1.7567046544051935e-06, - "loss": 0.0254, - "step": 6773 - }, - { - "epoch": 3.0086608927381744, - "grad_norm": 0.4311223508530591, - "learning_rate": 1.7552295183801093e-06, - "loss": 0.0282, - "step": 6774 - }, - { - "epoch": 3.009105041083722, - "grad_norm": 0.4187784150098612, - "learning_rate": 1.7537548700991463e-06, - "loss": 0.026, - "step": 6775 - }, - { - "epoch": 3.0095491894292694, - "grad_norm": 0.49752308362720904, - "learning_rate": 1.75228070978397e-06, - "loss": 0.0333, - "step": 6776 - }, - { - "epoch": 3.0099933377748167, - "grad_norm": 0.3661955272882175, - "learning_rate": 1.750807037656172e-06, - "loss": 0.0263, - "step": 6777 - }, - { - "epoch": 3.010437486120364, - "grad_norm": 0.3951032549450352, - "learning_rate": 1.7493338539372701e-06, - "loss": 0.0245, - "step": 6778 - }, - { - "epoch": 3.0108816344659117, - "grad_norm": 0.4288937494263082, - "learning_rate": 1.7478611588487098e-06, - "loss": 0.0224, - "step": 6779 - }, - { - "epoch": 3.011325782811459, - "grad_norm": 1.0043569602354414, - "learning_rate": 1.7463889526118628e-06, - "loss": 0.0321, - "step": 6780 - }, - { - "epoch": 3.0117699311570063, - "grad_norm": 0.345117052755733, - "learning_rate": 1.7449172354480236e-06, - "loss": 0.0222, - "step": 6781 - }, - { - "epoch": 3.012214079502554, - "grad_norm": 0.37263032859522277, - "learning_rate": 1.7434460075784183e-06, - "loss": 0.0371, - "step": 6782 - }, - { - "epoch": 3.0126582278481013, - "grad_norm": 0.42797870753405914, - "learning_rate": 1.741975269224197e-06, - "loss": 0.0275, - "step": 6783 - }, - { - "epoch": 3.0131023761936486, - "grad_norm": 0.4487558433311281, - "learning_rate": 1.7405050206064372e-06, - "loss": 0.0278, - "step": 6784 - }, - { - "epoch": 3.013546524539196, - "grad_norm": 0.3104547304379723, - "learning_rate": 1.739035261946142e-06, - "loss": 0.0162, - "step": 6785 - }, - { - "epoch": 3.0139906728847436, - "grad_norm": 0.4274226742898415, - "learning_rate": 1.7375659934642425e-06, - "loss": 0.0232, - "step": 6786 - }, - { - "epoch": 3.014434821230291, - "grad_norm": 0.4378508355204737, - "learning_rate": 1.7360972153815919e-06, - "loss": 0.0263, - "step": 6787 - }, - { - "epoch": 3.0148789695758382, - "grad_norm": 0.35914322608698723, - "learning_rate": 1.7346289279189732e-06, - "loss": 0.0236, - "step": 6788 - }, - { - "epoch": 3.015323117921386, - "grad_norm": 0.3717761462102794, - "learning_rate": 1.7331611312970965e-06, - "loss": 0.0223, - "step": 6789 - }, - { - "epoch": 3.0157672662669333, - "grad_norm": 0.49078555792657397, - "learning_rate": 1.7316938257365945e-06, - "loss": 0.0479, - "step": 6790 - }, - { - "epoch": 3.0162114146124805, - "grad_norm": 0.5303389635543914, - "learning_rate": 1.7302270114580316e-06, - "loss": 0.0257, - "step": 6791 - }, - { - "epoch": 3.016655562958028, - "grad_norm": 0.5963610689982617, - "learning_rate": 1.7287606886818914e-06, - "loss": 0.025, - "step": 6792 - }, - { - "epoch": 3.0170997113035756, - "grad_norm": 0.3358156185785528, - "learning_rate": 1.7272948576285874e-06, - "loss": 0.0259, - "step": 6793 - }, - { - "epoch": 3.017543859649123, - "grad_norm": 0.44451083436086486, - "learning_rate": 1.7258295185184604e-06, - "loss": 0.0371, - "step": 6794 - }, - { - "epoch": 3.01798800799467, - "grad_norm": 0.8409978484969116, - "learning_rate": 1.7243646715717754e-06, - "loss": 0.0198, - "step": 6795 - }, - { - "epoch": 3.0184321563402174, - "grad_norm": 0.40598942285053524, - "learning_rate": 1.7229003170087232e-06, - "loss": 0.0253, - "step": 6796 - }, - { - "epoch": 3.018876304685765, - "grad_norm": 0.40045735020677964, - "learning_rate": 1.7214364550494235e-06, - "loss": 0.0305, - "step": 6797 - }, - { - "epoch": 3.0193204530313125, - "grad_norm": 0.4205212377847487, - "learning_rate": 1.7199730859139157e-06, - "loss": 0.0233, - "step": 6798 - }, - { - "epoch": 3.0197646013768598, - "grad_norm": 0.39200127688818526, - "learning_rate": 1.7185102098221713e-06, - "loss": 0.0225, - "step": 6799 - }, - { - "epoch": 3.0202087497224075, - "grad_norm": 0.4741877419032876, - "learning_rate": 1.717047826994085e-06, - "loss": 0.0262, - "step": 6800 - }, - { - "epoch": 3.020652898067955, - "grad_norm": 0.40147810647504373, - "learning_rate": 1.7155859376494776e-06, - "loss": 0.0267, - "step": 6801 - }, - { - "epoch": 3.021097046413502, - "grad_norm": 0.4280335792767891, - "learning_rate": 1.7141245420080982e-06, - "loss": 0.0242, - "step": 6802 - }, - { - "epoch": 3.0215411947590494, - "grad_norm": 0.34267631232424817, - "learning_rate": 1.7126636402896158e-06, - "loss": 0.0228, - "step": 6803 - }, - { - "epoch": 3.021985343104597, - "grad_norm": 0.517542942077693, - "learning_rate": 1.7112032327136296e-06, - "loss": 0.0289, - "step": 6804 - }, - { - "epoch": 3.0224294914501444, - "grad_norm": 0.48842613500647986, - "learning_rate": 1.7097433194996654e-06, - "loss": 0.0362, - "step": 6805 - }, - { - "epoch": 3.0228736397956917, - "grad_norm": 0.5122441524272605, - "learning_rate": 1.7082839008671714e-06, - "loss": 0.0327, - "step": 6806 - }, - { - "epoch": 3.023317788141239, - "grad_norm": 0.3656414000536619, - "learning_rate": 1.706824977035524e-06, - "loss": 0.0239, - "step": 6807 - }, - { - "epoch": 3.0237619364867867, - "grad_norm": 0.35844316810361615, - "learning_rate": 1.705366548224025e-06, - "loss": 0.022, - "step": 6808 - }, - { - "epoch": 3.024206084832334, - "grad_norm": 0.5208246273651607, - "learning_rate": 1.7039086146518986e-06, - "loss": 0.0396, - "step": 6809 - }, - { - "epoch": 3.0246502331778813, - "grad_norm": 0.35888427376762055, - "learning_rate": 1.7024511765382978e-06, - "loss": 0.0218, - "step": 6810 - }, - { - "epoch": 3.025094381523429, - "grad_norm": 0.4996038911163621, - "learning_rate": 1.7009942341023012e-06, - "loss": 0.031, - "step": 6811 - }, - { - "epoch": 3.0255385298689763, - "grad_norm": 0.4715140989011031, - "learning_rate": 1.699537787562911e-06, - "loss": 0.027, - "step": 6812 - }, - { - "epoch": 3.0259826782145236, - "grad_norm": 0.4700851815071929, - "learning_rate": 1.6980818371390567e-06, - "loss": 0.0352, - "step": 6813 - }, - { - "epoch": 3.026426826560071, - "grad_norm": 0.42536546436391975, - "learning_rate": 1.6966263830495939e-06, - "loss": 0.0307, - "step": 6814 - }, - { - "epoch": 3.0268709749056186, - "grad_norm": 0.3928810675523391, - "learning_rate": 1.6951714255132985e-06, - "loss": 0.0317, - "step": 6815 - }, - { - "epoch": 3.027315123251166, - "grad_norm": 0.36328069775398497, - "learning_rate": 1.6937169647488765e-06, - "loss": 0.0225, - "step": 6816 - }, - { - "epoch": 3.027759271596713, - "grad_norm": 0.4176283792585422, - "learning_rate": 1.6922630009749592e-06, - "loss": 0.0335, - "step": 6817 - }, - { - "epoch": 3.0282034199422605, - "grad_norm": 0.45941453677809496, - "learning_rate": 1.6908095344101016e-06, - "loss": 0.0307, - "step": 6818 - }, - { - "epoch": 3.0286475682878082, - "grad_norm": 0.4022633883354449, - "learning_rate": 1.6893565652727857e-06, - "loss": 0.0285, - "step": 6819 - }, - { - "epoch": 3.0290917166333555, - "grad_norm": 0.46606194138773704, - "learning_rate": 1.687904093781414e-06, - "loss": 0.0346, - "step": 6820 - }, - { - "epoch": 3.029535864978903, - "grad_norm": 0.3781526947765934, - "learning_rate": 1.68645212015432e-06, - "loss": 0.0217, - "step": 6821 - }, - { - "epoch": 3.0299800133244505, - "grad_norm": 0.40815913991276614, - "learning_rate": 1.68500064460976e-06, - "loss": 0.0303, - "step": 6822 - }, - { - "epoch": 3.030424161669998, - "grad_norm": 0.42153363035695735, - "learning_rate": 1.6835496673659145e-06, - "loss": 0.0329, - "step": 6823 - }, - { - "epoch": 3.030868310015545, - "grad_norm": 0.5257269502253524, - "learning_rate": 1.6820991886408911e-06, - "loss": 0.0342, - "step": 6824 - }, - { - "epoch": 3.0313124583610924, - "grad_norm": 0.36375569874164343, - "learning_rate": 1.6806492086527226e-06, - "loss": 0.0268, - "step": 6825 - }, - { - "epoch": 3.03175660670664, - "grad_norm": 0.4517518524381332, - "learning_rate": 1.6791997276193623e-06, - "loss": 0.0291, - "step": 6826 - }, - { - "epoch": 3.0322007550521874, - "grad_norm": 0.41358501413398874, - "learning_rate": 1.6777507457586933e-06, - "loss": 0.0263, - "step": 6827 - }, - { - "epoch": 3.0326449033977347, - "grad_norm": 0.4371773593425646, - "learning_rate": 1.6763022632885223e-06, - "loss": 0.026, - "step": 6828 - }, - { - "epoch": 3.0330890517432825, - "grad_norm": 0.41878585193753987, - "learning_rate": 1.674854280426581e-06, - "loss": 0.0289, - "step": 6829 - }, - { - "epoch": 3.0335332000888298, - "grad_norm": 0.6072899498645479, - "learning_rate": 1.6734067973905272e-06, - "loss": 0.046, - "step": 6830 - }, - { - "epoch": 3.033977348434377, - "grad_norm": 0.37768879943900463, - "learning_rate": 1.6719598143979392e-06, - "loss": 0.0175, - "step": 6831 - }, - { - "epoch": 3.0344214967799243, - "grad_norm": 0.42243370237101113, - "learning_rate": 1.6705133316663247e-06, - "loss": 0.0318, - "step": 6832 - }, - { - "epoch": 3.034865645125472, - "grad_norm": 0.4272111914281022, - "learning_rate": 1.6690673494131143e-06, - "loss": 0.0318, - "step": 6833 - }, - { - "epoch": 3.0353097934710194, - "grad_norm": 0.555542458187917, - "learning_rate": 1.6676218678556637e-06, - "loss": 0.0333, - "step": 6834 - }, - { - "epoch": 3.0357539418165667, - "grad_norm": 0.5174806483008364, - "learning_rate": 1.6661768872112544e-06, - "loss": 0.0257, - "step": 6835 - }, - { - "epoch": 3.036198090162114, - "grad_norm": 0.40261446781293897, - "learning_rate": 1.6647324076970917e-06, - "loss": 0.0271, - "step": 6836 - }, - { - "epoch": 3.0366422385076617, - "grad_norm": 0.28515858656757614, - "learning_rate": 1.663288429530303e-06, - "loss": 0.0139, - "step": 6837 - }, - { - "epoch": 3.037086386853209, - "grad_norm": 0.3801745026945722, - "learning_rate": 1.661844952927944e-06, - "loss": 0.0278, - "step": 6838 - }, - { - "epoch": 3.0375305351987563, - "grad_norm": 0.5653418682210891, - "learning_rate": 1.660401978106994e-06, - "loss": 0.0356, - "step": 6839 - }, - { - "epoch": 3.037974683544304, - "grad_norm": 0.4351497414805404, - "learning_rate": 1.6589595052843567e-06, - "loss": 0.0255, - "step": 6840 - }, - { - "epoch": 3.0384188318898513, - "grad_norm": 0.36924524660599933, - "learning_rate": 1.6575175346768597e-06, - "loss": 0.0213, - "step": 6841 - }, - { - "epoch": 3.0388629802353986, - "grad_norm": 0.36560298465093416, - "learning_rate": 1.6560760665012581e-06, - "loss": 0.0291, - "step": 6842 - }, - { - "epoch": 3.039307128580946, - "grad_norm": 0.5231650605635862, - "learning_rate": 1.6546351009742252e-06, - "loss": 0.0416, - "step": 6843 - }, - { - "epoch": 3.0397512769264936, - "grad_norm": 0.40002607438188575, - "learning_rate": 1.6531946383123647e-06, - "loss": 0.0279, - "step": 6844 - }, - { - "epoch": 3.040195425272041, - "grad_norm": 0.3820335905823042, - "learning_rate": 1.6517546787322019e-06, - "loss": 0.0332, - "step": 6845 - }, - { - "epoch": 3.040639573617588, - "grad_norm": 0.4052075541407361, - "learning_rate": 1.6503152224501883e-06, - "loss": 0.0273, - "step": 6846 - }, - { - "epoch": 3.0410837219631355, - "grad_norm": 0.40616006159309487, - "learning_rate": 1.6488762696826992e-06, - "loss": 0.029, - "step": 6847 - }, - { - "epoch": 3.041527870308683, - "grad_norm": 0.41149108296545567, - "learning_rate": 1.6474378206460306e-06, - "loss": 0.0239, - "step": 6848 - }, - { - "epoch": 3.0419720186542305, - "grad_norm": 0.35312483842889825, - "learning_rate": 1.6459998755564078e-06, - "loss": 0.0265, - "step": 6849 - }, - { - "epoch": 3.042416166999778, - "grad_norm": 0.44423084298706683, - "learning_rate": 1.64456243462998e-06, - "loss": 0.0214, - "step": 6850 - }, - { - "epoch": 3.0428603153453255, - "grad_norm": 0.3441262864593607, - "learning_rate": 1.6431254980828137e-06, - "loss": 0.0261, - "step": 6851 - }, - { - "epoch": 3.043304463690873, - "grad_norm": 0.5333130750896956, - "learning_rate": 1.6416890661309098e-06, - "loss": 0.0345, - "step": 6852 - }, - { - "epoch": 3.04374861203642, - "grad_norm": 0.32916273546434566, - "learning_rate": 1.6402531389901894e-06, - "loss": 0.0196, - "step": 6853 - }, - { - "epoch": 3.0441927603819674, - "grad_norm": 0.4575672416898921, - "learning_rate": 1.6388177168764919e-06, - "loss": 0.0316, - "step": 6854 - }, - { - "epoch": 3.044636908727515, - "grad_norm": 0.4231274981211455, - "learning_rate": 1.6373828000055886e-06, - "loss": 0.0267, - "step": 6855 - }, - { - "epoch": 3.0450810570730624, - "grad_norm": 0.35267922234888455, - "learning_rate": 1.6359483885931709e-06, - "loss": 0.0224, - "step": 6856 - }, - { - "epoch": 3.0455252054186097, - "grad_norm": 0.4270485321761482, - "learning_rate": 1.634514482854856e-06, - "loss": 0.0215, - "step": 6857 - }, - { - "epoch": 3.045969353764157, - "grad_norm": 0.45838498705619496, - "learning_rate": 1.6330810830061833e-06, - "loss": 0.0259, - "step": 6858 - }, - { - "epoch": 3.0464135021097047, - "grad_norm": 0.39622697822045655, - "learning_rate": 1.6316481892626202e-06, - "loss": 0.0227, - "step": 6859 - }, - { - "epoch": 3.046857650455252, - "grad_norm": 0.4767421948555728, - "learning_rate": 1.6302158018395504e-06, - "loss": 0.021, - "step": 6860 - }, - { - "epoch": 3.0473017988007993, - "grad_norm": 0.4108572908804955, - "learning_rate": 1.6287839209522883e-06, - "loss": 0.0243, - "step": 6861 - }, - { - "epoch": 3.047745947146347, - "grad_norm": 0.3361055439360816, - "learning_rate": 1.62735254681607e-06, - "loss": 0.016, - "step": 6862 - }, - { - "epoch": 3.0481900954918943, - "grad_norm": 0.4890820081997742, - "learning_rate": 1.6259216796460553e-06, - "loss": 0.0332, - "step": 6863 - }, - { - "epoch": 3.0486342438374416, - "grad_norm": 0.3446651596488856, - "learning_rate": 1.6244913196573291e-06, - "loss": 0.0231, - "step": 6864 - }, - { - "epoch": 3.049078392182989, - "grad_norm": 0.4912264351287514, - "learning_rate": 1.623061467064896e-06, - "loss": 0.0272, - "step": 6865 - }, - { - "epoch": 3.0495225405285367, - "grad_norm": 0.4925885654573531, - "learning_rate": 1.6216321220836885e-06, - "loss": 0.0248, - "step": 6866 - }, - { - "epoch": 3.049966688874084, - "grad_norm": 0.3824243589770029, - "learning_rate": 1.6202032849285626e-06, - "loss": 0.0225, - "step": 6867 - }, - { - "epoch": 3.0504108372196312, - "grad_norm": 0.36019367540666763, - "learning_rate": 1.618774955814293e-06, - "loss": 0.0179, - "step": 6868 - }, - { - "epoch": 3.050854985565179, - "grad_norm": 0.5143104427412662, - "learning_rate": 1.6173471349555858e-06, - "loss": 0.0376, - "step": 6869 - }, - { - "epoch": 3.0512991339107263, - "grad_norm": 0.5934473311861377, - "learning_rate": 1.6159198225670676e-06, - "loss": 0.0448, - "step": 6870 - }, - { - "epoch": 3.0517432822562736, - "grad_norm": 0.45164863726044957, - "learning_rate": 1.6144930188632835e-06, - "loss": 0.0278, - "step": 6871 - }, - { - "epoch": 3.052187430601821, - "grad_norm": 0.5798490797388325, - "learning_rate": 1.6130667240587083e-06, - "loss": 0.0297, - "step": 6872 - }, - { - "epoch": 3.0526315789473686, - "grad_norm": 0.38501590071391395, - "learning_rate": 1.6116409383677383e-06, - "loss": 0.0244, - "step": 6873 - }, - { - "epoch": 3.053075727292916, - "grad_norm": 0.4626822552615348, - "learning_rate": 1.6102156620046937e-06, - "loss": 0.0234, - "step": 6874 - }, - { - "epoch": 3.053519875638463, - "grad_norm": 0.4709097565849538, - "learning_rate": 1.6087908951838193e-06, - "loss": 0.0325, - "step": 6875 - }, - { - "epoch": 3.0539640239840105, - "grad_norm": 0.38512614079510565, - "learning_rate": 1.6073666381192777e-06, - "loss": 0.0185, - "step": 6876 - }, - { - "epoch": 3.054408172329558, - "grad_norm": 0.3981638223693639, - "learning_rate": 1.6059428910251617e-06, - "loss": 0.0297, - "step": 6877 - }, - { - "epoch": 3.0548523206751055, - "grad_norm": 0.40972645998312435, - "learning_rate": 1.604519654115484e-06, - "loss": 0.0217, - "step": 6878 - }, - { - "epoch": 3.0552964690206528, - "grad_norm": 0.44788376402765917, - "learning_rate": 1.6030969276041813e-06, - "loss": 0.0255, - "step": 6879 - }, - { - "epoch": 3.0557406173662005, - "grad_norm": 0.45493435461264287, - "learning_rate": 1.6016747117051135e-06, - "loss": 0.0367, - "step": 6880 - }, - { - "epoch": 3.056184765711748, - "grad_norm": 0.5367527315236188, - "learning_rate": 1.6002530066320659e-06, - "loss": 0.0386, - "step": 6881 - }, - { - "epoch": 3.056628914057295, - "grad_norm": 0.36716772288492844, - "learning_rate": 1.5988318125987412e-06, - "loss": 0.0243, - "step": 6882 - }, - { - "epoch": 3.0570730624028424, - "grad_norm": 0.39700109646152437, - "learning_rate": 1.597411129818771e-06, - "loss": 0.0331, - "step": 6883 - }, - { - "epoch": 3.05751721074839, - "grad_norm": 0.4200578523175907, - "learning_rate": 1.5959909585057099e-06, - "loss": 0.0254, - "step": 6884 - }, - { - "epoch": 3.0579613590939374, - "grad_norm": 0.5911121451638379, - "learning_rate": 1.5945712988730278e-06, - "loss": 0.0418, - "step": 6885 - }, - { - "epoch": 3.0584055074394847, - "grad_norm": 0.3316421396306147, - "learning_rate": 1.5931521511341292e-06, - "loss": 0.0266, - "step": 6886 - }, - { - "epoch": 3.058849655785032, - "grad_norm": 0.38444781154877616, - "learning_rate": 1.5917335155023368e-06, - "loss": 0.018, - "step": 6887 - }, - { - "epoch": 3.0592938041305797, - "grad_norm": 0.38538195654260493, - "learning_rate": 1.590315392190891e-06, - "loss": 0.0223, - "step": 6888 - }, - { - "epoch": 3.059737952476127, - "grad_norm": 0.3188424830635555, - "learning_rate": 1.5888977814129625e-06, - "loss": 0.0162, - "step": 6889 - }, - { - "epoch": 3.0601821008216743, - "grad_norm": 0.5604048534911154, - "learning_rate": 1.5874806833816436e-06, - "loss": 0.0451, - "step": 6890 - }, - { - "epoch": 3.060626249167222, - "grad_norm": 0.4396556865933855, - "learning_rate": 1.5860640983099435e-06, - "loss": 0.027, - "step": 6891 - }, - { - "epoch": 3.0610703975127693, - "grad_norm": 0.5338475568401356, - "learning_rate": 1.584648026410805e-06, - "loss": 0.0428, - "step": 6892 - }, - { - "epoch": 3.0615145458583166, - "grad_norm": 0.41364738868710765, - "learning_rate": 1.583232467897083e-06, - "loss": 0.0255, - "step": 6893 - }, - { - "epoch": 3.061958694203864, - "grad_norm": 0.44128675402771145, - "learning_rate": 1.581817422981562e-06, - "loss": 0.0266, - "step": 6894 - }, - { - "epoch": 3.0624028425494116, - "grad_norm": 0.40811877248427353, - "learning_rate": 1.5804028918769488e-06, - "loss": 0.0202, - "step": 6895 - }, - { - "epoch": 3.062846990894959, - "grad_norm": 0.38420681148774133, - "learning_rate": 1.5789888747958666e-06, - "loss": 0.0299, - "step": 6896 - }, - { - "epoch": 3.0632911392405062, - "grad_norm": 0.3556137019009215, - "learning_rate": 1.5775753719508708e-06, - "loss": 0.021, - "step": 6897 - }, - { - "epoch": 3.063735287586054, - "grad_norm": 0.2882748081505788, - "learning_rate": 1.5761623835544348e-06, - "loss": 0.0258, - "step": 6898 - }, - { - "epoch": 3.0641794359316012, - "grad_norm": 0.3787886197229445, - "learning_rate": 1.5747499098189524e-06, - "loss": 0.0281, - "step": 6899 - }, - { - "epoch": 3.0646235842771485, - "grad_norm": 0.3354175757689207, - "learning_rate": 1.5733379509567426e-06, - "loss": 0.0234, - "step": 6900 - }, - { - "epoch": 3.065067732622696, - "grad_norm": 0.3209071922045546, - "learning_rate": 1.5719265071800498e-06, - "loss": 0.0182, - "step": 6901 - }, - { - "epoch": 3.0655118809682436, - "grad_norm": 0.43501967745626874, - "learning_rate": 1.5705155787010324e-06, - "loss": 0.0353, - "step": 6902 - }, - { - "epoch": 3.065956029313791, - "grad_norm": 0.4929319158649113, - "learning_rate": 1.5691051657317835e-06, - "loss": 0.0271, - "step": 6903 - }, - { - "epoch": 3.066400177659338, - "grad_norm": 0.3584810561885387, - "learning_rate": 1.5676952684843072e-06, - "loss": 0.0258, - "step": 6904 - }, - { - "epoch": 3.0668443260048854, - "grad_norm": 0.4638657269405188, - "learning_rate": 1.5662858871705366e-06, - "loss": 0.0236, - "step": 6905 - }, - { - "epoch": 3.067288474350433, - "grad_norm": 0.5065060680688717, - "learning_rate": 1.5648770220023263e-06, - "loss": 0.0287, - "step": 6906 - }, - { - "epoch": 3.0677326226959805, - "grad_norm": 0.4859275697928332, - "learning_rate": 1.5634686731914533e-06, - "loss": 0.0434, - "step": 6907 - }, - { - "epoch": 3.0681767710415278, - "grad_norm": 0.38551762262809786, - "learning_rate": 1.562060840949612e-06, - "loss": 0.0265, - "step": 6908 - }, - { - "epoch": 3.0686209193870755, - "grad_norm": 0.6596634602131578, - "learning_rate": 1.5606535254884297e-06, - "loss": 0.0343, - "step": 6909 - }, - { - "epoch": 3.0690650677326228, - "grad_norm": 0.40308217569658306, - "learning_rate": 1.5592467270194456e-06, - "loss": 0.0306, - "step": 6910 - }, - { - "epoch": 3.06950921607817, - "grad_norm": 0.6298585513540671, - "learning_rate": 1.5578404457541264e-06, - "loss": 0.0276, - "step": 6911 - }, - { - "epoch": 3.0699533644237174, - "grad_norm": 0.6519358046715888, - "learning_rate": 1.5564346819038616e-06, - "loss": 0.0335, - "step": 6912 - }, - { - "epoch": 3.070397512769265, - "grad_norm": 0.4424893479760814, - "learning_rate": 1.5550294356799573e-06, - "loss": 0.0249, - "step": 6913 - }, - { - "epoch": 3.0708416611148124, - "grad_norm": 0.44065297318246016, - "learning_rate": 1.55362470729365e-06, - "loss": 0.031, - "step": 6914 - }, - { - "epoch": 3.0712858094603597, - "grad_norm": 0.3171014816611921, - "learning_rate": 1.5522204969560945e-06, - "loss": 0.0228, - "step": 6915 - }, - { - "epoch": 3.071729957805907, - "grad_norm": 0.48982852285102907, - "learning_rate": 1.5508168048783645e-06, - "loss": 0.0243, - "step": 6916 - }, - { - "epoch": 3.0721741061514547, - "grad_norm": 0.36638227513318433, - "learning_rate": 1.5494136312714598e-06, - "loss": 0.0209, - "step": 6917 - }, - { - "epoch": 3.072618254497002, - "grad_norm": 0.40243990074588076, - "learning_rate": 1.5480109763463031e-06, - "loss": 0.0323, - "step": 6918 - }, - { - "epoch": 3.0730624028425493, - "grad_norm": 0.6138342320939462, - "learning_rate": 1.5466088403137326e-06, - "loss": 0.0303, - "step": 6919 - }, - { - "epoch": 3.073506551188097, - "grad_norm": 0.41638032443840284, - "learning_rate": 1.5452072233845194e-06, - "loss": 0.0302, - "step": 6920 - }, - { - "epoch": 3.0739506995336443, - "grad_norm": 0.3958771196139635, - "learning_rate": 1.5438061257693459e-06, - "loss": 0.025, - "step": 6921 - }, - { - "epoch": 3.0743948478791916, - "grad_norm": 0.409201101395247, - "learning_rate": 1.5424055476788219e-06, - "loss": 0.0281, - "step": 6922 - }, - { - "epoch": 3.074838996224739, - "grad_norm": 0.36270140865155653, - "learning_rate": 1.54100548932348e-06, - "loss": 0.0211, - "step": 6923 - }, - { - "epoch": 3.0752831445702866, - "grad_norm": 0.3939239641466573, - "learning_rate": 1.5396059509137694e-06, - "loss": 0.0221, - "step": 6924 - }, - { - "epoch": 3.075727292915834, - "grad_norm": 0.3910124202096161, - "learning_rate": 1.5382069326600645e-06, - "loss": 0.0247, - "step": 6925 - }, - { - "epoch": 3.076171441261381, - "grad_norm": 0.4045068362861821, - "learning_rate": 1.536808434772667e-06, - "loss": 0.0252, - "step": 6926 - }, - { - "epoch": 3.076615589606929, - "grad_norm": 0.48842679698944363, - "learning_rate": 1.5354104574617889e-06, - "loss": 0.0272, - "step": 6927 - }, - { - "epoch": 3.0770597379524762, - "grad_norm": 0.5568048405499652, - "learning_rate": 1.5340130009375725e-06, - "loss": 0.0271, - "step": 6928 - }, - { - "epoch": 3.0775038862980235, - "grad_norm": 0.35344319652310796, - "learning_rate": 1.5326160654100803e-06, - "loss": 0.0203, - "step": 6929 - }, - { - "epoch": 3.077948034643571, - "grad_norm": 0.41134278821618964, - "learning_rate": 1.5312196510892907e-06, - "loss": 0.0203, - "step": 6930 - }, - { - "epoch": 3.0783921829891185, - "grad_norm": 0.43439941775477803, - "learning_rate": 1.529823758185115e-06, - "loss": 0.0307, - "step": 6931 - }, - { - "epoch": 3.078836331334666, - "grad_norm": 0.36710103440734276, - "learning_rate": 1.5284283869073753e-06, - "loss": 0.0226, - "step": 6932 - }, - { - "epoch": 3.079280479680213, - "grad_norm": 0.3795190028300577, - "learning_rate": 1.5270335374658202e-06, - "loss": 0.0193, - "step": 6933 - }, - { - "epoch": 3.0797246280257604, - "grad_norm": 0.416086597648837, - "learning_rate": 1.5256392100701201e-06, - "loss": 0.0341, - "step": 6934 - }, - { - "epoch": 3.080168776371308, - "grad_norm": 0.47903570547844193, - "learning_rate": 1.5242454049298672e-06, - "loss": 0.0326, - "step": 6935 - }, - { - "epoch": 3.0806129247168554, - "grad_norm": 0.41924576417637155, - "learning_rate": 1.5228521222545694e-06, - "loss": 0.0232, - "step": 6936 - }, - { - "epoch": 3.0810570730624027, - "grad_norm": 0.4086739351412735, - "learning_rate": 1.5214593622536677e-06, - "loss": 0.0191, - "step": 6937 - }, - { - "epoch": 3.0815012214079505, - "grad_norm": 0.42090655404898863, - "learning_rate": 1.5200671251365118e-06, - "loss": 0.0234, - "step": 6938 - }, - { - "epoch": 3.0819453697534978, - "grad_norm": 0.4491694516637582, - "learning_rate": 1.5186754111123814e-06, - "loss": 0.0309, - "step": 6939 - }, - { - "epoch": 3.082389518099045, - "grad_norm": 0.3593852978556601, - "learning_rate": 1.5172842203904752e-06, - "loss": 0.0219, - "step": 6940 - }, - { - "epoch": 3.0828336664445923, - "grad_norm": 0.4142188627731481, - "learning_rate": 1.5158935531799102e-06, - "loss": 0.0231, - "step": 6941 - }, - { - "epoch": 3.08327781479014, - "grad_norm": 0.3433910778382263, - "learning_rate": 1.5145034096897271e-06, - "loss": 0.0232, - "step": 6942 - }, - { - "epoch": 3.0837219631356874, - "grad_norm": 0.42040065985676595, - "learning_rate": 1.5131137901288928e-06, - "loss": 0.0274, - "step": 6943 - }, - { - "epoch": 3.0841661114812347, - "grad_norm": 0.37811344114245793, - "learning_rate": 1.5117246947062864e-06, - "loss": 0.0278, - "step": 6944 - }, - { - "epoch": 3.084610259826782, - "grad_norm": 0.6332645566079246, - "learning_rate": 1.5103361236307135e-06, - "loss": 0.0199, - "step": 6945 - }, - { - "epoch": 3.0850544081723297, - "grad_norm": 0.3367889244804879, - "learning_rate": 1.5089480771109021e-06, - "loss": 0.0221, - "step": 6946 - }, - { - "epoch": 3.085498556517877, - "grad_norm": 0.45979021485293203, - "learning_rate": 1.507560555355494e-06, - "loss": 0.0259, - "step": 6947 - }, - { - "epoch": 3.0859427048634243, - "grad_norm": 0.3846307657242651, - "learning_rate": 1.5061735585730636e-06, - "loss": 0.0252, - "step": 6948 - }, - { - "epoch": 3.086386853208972, - "grad_norm": 0.36534660664962226, - "learning_rate": 1.504787086972096e-06, - "loss": 0.0173, - "step": 6949 - }, - { - "epoch": 3.0868310015545193, - "grad_norm": 0.6011247419581762, - "learning_rate": 1.5034011407610021e-06, - "loss": 0.0399, - "step": 6950 - }, - { - "epoch": 3.0872751499000666, - "grad_norm": 0.4220887506749395, - "learning_rate": 1.502015720148115e-06, - "loss": 0.0265, - "step": 6951 - }, - { - "epoch": 3.087719298245614, - "grad_norm": 0.383622304930661, - "learning_rate": 1.5006308253416846e-06, - "loss": 0.0235, - "step": 6952 - }, - { - "epoch": 3.0881634465911616, - "grad_norm": 0.48234757617698376, - "learning_rate": 1.4992464565498831e-06, - "loss": 0.0354, - "step": 6953 - }, - { - "epoch": 3.088607594936709, - "grad_norm": 0.4645377835459482, - "learning_rate": 1.4978626139808094e-06, - "loss": 0.0332, - "step": 6954 - }, - { - "epoch": 3.089051743282256, - "grad_norm": 0.4061976190338302, - "learning_rate": 1.4964792978424746e-06, - "loss": 0.0231, - "step": 6955 - }, - { - "epoch": 3.089495891627804, - "grad_norm": 0.5482133012861865, - "learning_rate": 1.495096508342816e-06, - "loss": 0.0369, - "step": 6956 - }, - { - "epoch": 3.089940039973351, - "grad_norm": 0.3661650923331457, - "learning_rate": 1.4937142456896907e-06, - "loss": 0.0231, - "step": 6957 - }, - { - "epoch": 3.0903841883188985, - "grad_norm": 0.4124097316559942, - "learning_rate": 1.4923325100908749e-06, - "loss": 0.026, - "step": 6958 - }, - { - "epoch": 3.090828336664446, - "grad_norm": 0.5506003037235153, - "learning_rate": 1.490951301754066e-06, - "loss": 0.0272, - "step": 6959 - }, - { - "epoch": 3.0912724850099935, - "grad_norm": 0.39939980904986866, - "learning_rate": 1.4895706208868876e-06, - "loss": 0.028, - "step": 6960 - }, - { - "epoch": 3.091716633355541, - "grad_norm": 0.46904750344961466, - "learning_rate": 1.4881904676968756e-06, - "loss": 0.0258, - "step": 6961 - }, - { - "epoch": 3.092160781701088, - "grad_norm": 0.46041949493275547, - "learning_rate": 1.4868108423914913e-06, - "loss": 0.0284, - "step": 6962 - }, - { - "epoch": 3.0926049300466354, - "grad_norm": 0.3681186282877754, - "learning_rate": 1.4854317451781175e-06, - "loss": 0.0232, - "step": 6963 - }, - { - "epoch": 3.093049078392183, - "grad_norm": 0.4723461677240559, - "learning_rate": 1.4840531762640524e-06, - "loss": 0.0277, - "step": 6964 - }, - { - "epoch": 3.0934932267377304, - "grad_norm": 0.38175424189288976, - "learning_rate": 1.4826751358565211e-06, - "loss": 0.019, - "step": 6965 - }, - { - "epoch": 3.0939373750832777, - "grad_norm": 0.41086502374434375, - "learning_rate": 1.4812976241626659e-06, - "loss": 0.0287, - "step": 6966 - }, - { - "epoch": 3.0943815234288254, - "grad_norm": 0.39348852384191785, - "learning_rate": 1.4799206413895494e-06, - "loss": 0.026, - "step": 6967 - }, - { - "epoch": 3.0948256717743727, - "grad_norm": 0.4897251060835583, - "learning_rate": 1.4785441877441587e-06, - "loss": 0.0283, - "step": 6968 - }, - { - "epoch": 3.09526982011992, - "grad_norm": 0.396902884934688, - "learning_rate": 1.4771682634333933e-06, - "loss": 0.0241, - "step": 6969 - }, - { - "epoch": 3.0957139684654673, - "grad_norm": 0.6024718135186303, - "learning_rate": 1.4757928686640788e-06, - "loss": 0.0324, - "step": 6970 - }, - { - "epoch": 3.096158116811015, - "grad_norm": 0.3483981306238304, - "learning_rate": 1.4744180036429656e-06, - "loss": 0.0203, - "step": 6971 - }, - { - "epoch": 3.0966022651565623, - "grad_norm": 0.45268776356014967, - "learning_rate": 1.4730436685767135e-06, - "loss": 0.0302, - "step": 6972 - }, - { - "epoch": 3.0970464135021096, - "grad_norm": 0.5030795577403537, - "learning_rate": 1.4716698636719107e-06, - "loss": 0.0291, - "step": 6973 - }, - { - "epoch": 3.097490561847657, - "grad_norm": 0.4396401120191148, - "learning_rate": 1.470296589135065e-06, - "loss": 0.0342, - "step": 6974 - }, - { - "epoch": 3.0979347101932047, - "grad_norm": 0.3979132301675201, - "learning_rate": 1.4689238451725995e-06, - "loss": 0.0201, - "step": 6975 - }, - { - "epoch": 3.098378858538752, - "grad_norm": 0.3787151125083963, - "learning_rate": 1.4675516319908629e-06, - "loss": 0.0257, - "step": 6976 - }, - { - "epoch": 3.0988230068842992, - "grad_norm": 0.37709012703404926, - "learning_rate": 1.466179949796121e-06, - "loss": 0.031, - "step": 6977 - }, - { - "epoch": 3.099267155229847, - "grad_norm": 0.46313083842641917, - "learning_rate": 1.4648087987945625e-06, - "loss": 0.0187, - "step": 6978 - }, - { - "epoch": 3.0997113035753943, - "grad_norm": 0.41171533054686754, - "learning_rate": 1.4634381791922936e-06, - "loss": 0.0265, - "step": 6979 - }, - { - "epoch": 3.1001554519209416, - "grad_norm": 0.35930958286366377, - "learning_rate": 1.4620680911953433e-06, - "loss": 0.0287, - "step": 6980 - }, - { - "epoch": 3.100599600266489, - "grad_norm": 0.5934414847112055, - "learning_rate": 1.460698535009657e-06, - "loss": 0.021, - "step": 6981 - }, - { - "epoch": 3.1010437486120366, - "grad_norm": 0.4183538172458836, - "learning_rate": 1.4593295108411027e-06, - "loss": 0.0239, - "step": 6982 - }, - { - "epoch": 3.101487896957584, - "grad_norm": 0.3854427898688108, - "learning_rate": 1.4579610188954685e-06, - "loss": 0.0274, - "step": 6983 - }, - { - "epoch": 3.101932045303131, - "grad_norm": 0.4711207189434762, - "learning_rate": 1.4565930593784616e-06, - "loss": 0.0282, - "step": 6984 - }, - { - "epoch": 3.1023761936486784, - "grad_norm": 0.4616599969953312, - "learning_rate": 1.455225632495712e-06, - "loss": 0.029, - "step": 6985 - }, - { - "epoch": 3.102820341994226, - "grad_norm": 0.4076291882386512, - "learning_rate": 1.453858738452763e-06, - "loss": 0.0326, - "step": 6986 - }, - { - "epoch": 3.1032644903397735, - "grad_norm": 0.31683467511767754, - "learning_rate": 1.4524923774550825e-06, - "loss": 0.0176, - "step": 6987 - }, - { - "epoch": 3.1037086386853208, - "grad_norm": 0.39424949712455315, - "learning_rate": 1.4511265497080624e-06, - "loss": 0.0302, - "step": 6988 - }, - { - "epoch": 3.1041527870308685, - "grad_norm": 0.4103257196866521, - "learning_rate": 1.4497612554170054e-06, - "loss": 0.0273, - "step": 6989 - }, - { - "epoch": 3.104596935376416, - "grad_norm": 0.39987357613853897, - "learning_rate": 1.4483964947871392e-06, - "loss": 0.0255, - "step": 6990 - }, - { - "epoch": 3.105041083721963, - "grad_norm": 0.41100905630593115, - "learning_rate": 1.4470322680236132e-06, - "loss": 0.0302, - "step": 6991 - }, - { - "epoch": 3.1054852320675104, - "grad_norm": 0.4415871306778436, - "learning_rate": 1.4456685753314898e-06, - "loss": 0.0248, - "step": 6992 - }, - { - "epoch": 3.105929380413058, - "grad_norm": 0.3985527776293508, - "learning_rate": 1.4443054169157566e-06, - "loss": 0.024, - "step": 6993 - }, - { - "epoch": 3.1063735287586054, - "grad_norm": 0.5025368665991552, - "learning_rate": 1.4429427929813205e-06, - "loss": 0.0327, - "step": 6994 - }, - { - "epoch": 3.1068176771041527, - "grad_norm": 0.543210885126374, - "learning_rate": 1.4415807037330065e-06, - "loss": 0.0244, - "step": 6995 - }, - { - "epoch": 3.1072618254497, - "grad_norm": 0.5795921769157866, - "learning_rate": 1.4402191493755614e-06, - "loss": 0.0319, - "step": 6996 - }, - { - "epoch": 3.1077059737952477, - "grad_norm": 0.33181453971757086, - "learning_rate": 1.4388581301136463e-06, - "loss": 0.0239, - "step": 6997 - }, - { - "epoch": 3.108150122140795, - "grad_norm": 0.35538456672100055, - "learning_rate": 1.4374976461518475e-06, - "loss": 0.0202, - "step": 6998 - }, - { - "epoch": 3.1085942704863423, - "grad_norm": 0.30394370356689626, - "learning_rate": 1.436137697694669e-06, - "loss": 0.0148, - "step": 6999 - }, - { - "epoch": 3.10903841883189, - "grad_norm": 0.5358782537926752, - "learning_rate": 1.4347782849465335e-06, - "loss": 0.0361, - "step": 7000 - }, - { - "epoch": 3.1094825671774373, - "grad_norm": 0.45423657841045967, - "learning_rate": 1.4334194081117853e-06, - "loss": 0.0296, - "step": 7001 - }, - { - "epoch": 3.1099267155229846, - "grad_norm": 0.391451201654507, - "learning_rate": 1.4320610673946862e-06, - "loss": 0.0219, - "step": 7002 - }, - { - "epoch": 3.110370863868532, - "grad_norm": 0.3695407430957425, - "learning_rate": 1.4307032629994162e-06, - "loss": 0.0275, - "step": 7003 - }, - { - "epoch": 3.1108150122140796, - "grad_norm": 0.3522320469665786, - "learning_rate": 1.4293459951300775e-06, - "loss": 0.0191, - "step": 7004 - }, - { - "epoch": 3.111259160559627, - "grad_norm": 0.39441008031342695, - "learning_rate": 1.4279892639906906e-06, - "loss": 0.022, - "step": 7005 - }, - { - "epoch": 3.111703308905174, - "grad_norm": 0.3604951087918798, - "learning_rate": 1.4266330697851955e-06, - "loss": 0.0291, - "step": 7006 - }, - { - "epoch": 3.112147457250722, - "grad_norm": 0.3966852264525251, - "learning_rate": 1.4252774127174502e-06, - "loss": 0.0336, - "step": 7007 - }, - { - "epoch": 3.1125916055962692, - "grad_norm": 0.4920706934740084, - "learning_rate": 1.4239222929912354e-06, - "loss": 0.0339, - "step": 7008 - }, - { - "epoch": 3.1130357539418165, - "grad_norm": 0.452968414032015, - "learning_rate": 1.422567710810246e-06, - "loss": 0.0334, - "step": 7009 - }, - { - "epoch": 3.113479902287364, - "grad_norm": 0.34946018464557227, - "learning_rate": 1.421213666378099e-06, - "loss": 0.0259, - "step": 7010 - }, - { - "epoch": 3.1139240506329116, - "grad_norm": 0.3785739486603564, - "learning_rate": 1.419860159898331e-06, - "loss": 0.0248, - "step": 7011 - }, - { - "epoch": 3.114368198978459, - "grad_norm": 0.48985400850238237, - "learning_rate": 1.418507191574397e-06, - "loss": 0.0245, - "step": 7012 - }, - { - "epoch": 3.114812347324006, - "grad_norm": 0.37256560600518596, - "learning_rate": 1.4171547616096726e-06, - "loss": 0.0263, - "step": 7013 - }, - { - "epoch": 3.1152564956695534, - "grad_norm": 0.4745149660869735, - "learning_rate": 1.4158028702074478e-06, - "loss": 0.0315, - "step": 7014 - }, - { - "epoch": 3.115700644015101, - "grad_norm": 0.4272234241381395, - "learning_rate": 1.4144515175709366e-06, - "loss": 0.0359, - "step": 7015 - }, - { - "epoch": 3.1161447923606485, - "grad_norm": 0.4717714680449159, - "learning_rate": 1.4131007039032702e-06, - "loss": 0.0247, - "step": 7016 - }, - { - "epoch": 3.1165889407061957, - "grad_norm": 0.3661175444111234, - "learning_rate": 1.4117504294074985e-06, - "loss": 0.0199, - "step": 7017 - }, - { - "epoch": 3.1170330890517435, - "grad_norm": 0.39156431702822175, - "learning_rate": 1.4104006942865911e-06, - "loss": 0.0225, - "step": 7018 - }, - { - "epoch": 3.1174772373972908, - "grad_norm": 0.4841602294941045, - "learning_rate": 1.4090514987434372e-06, - "loss": 0.0224, - "step": 7019 - }, - { - "epoch": 3.117921385742838, - "grad_norm": 0.3409883421268121, - "learning_rate": 1.4077028429808415e-06, - "loss": 0.0184, - "step": 7020 - }, - { - "epoch": 3.1183655340883853, - "grad_norm": 0.4571971842074867, - "learning_rate": 1.4063547272015305e-06, - "loss": 0.0334, - "step": 7021 - }, - { - "epoch": 3.118809682433933, - "grad_norm": 0.4859572155144659, - "learning_rate": 1.4050071516081499e-06, - "loss": 0.0236, - "step": 7022 - }, - { - "epoch": 3.1192538307794804, - "grad_norm": 0.36054003070051327, - "learning_rate": 1.4036601164032626e-06, - "loss": 0.0244, - "step": 7023 - }, - { - "epoch": 3.1196979791250277, - "grad_norm": 0.4164627314286096, - "learning_rate": 1.4023136217893518e-06, - "loss": 0.0276, - "step": 7024 - }, - { - "epoch": 3.120142127470575, - "grad_norm": 0.5468105573468193, - "learning_rate": 1.4009676679688167e-06, - "loss": 0.0344, - "step": 7025 - }, - { - "epoch": 3.1205862758161227, - "grad_norm": 0.5699602944440265, - "learning_rate": 1.399622255143978e-06, - "loss": 0.0227, - "step": 7026 - }, - { - "epoch": 3.12103042416167, - "grad_norm": 0.3763296846537518, - "learning_rate": 1.3982773835170738e-06, - "loss": 0.0233, - "step": 7027 - }, - { - "epoch": 3.1214745725072173, - "grad_norm": 0.3809901917507222, - "learning_rate": 1.396933053290262e-06, - "loss": 0.0261, - "step": 7028 - }, - { - "epoch": 3.121918720852765, - "grad_norm": 0.3942636268702358, - "learning_rate": 1.3955892646656172e-06, - "loss": 0.0244, - "step": 7029 - }, - { - "epoch": 3.1223628691983123, - "grad_norm": 0.4034606683568663, - "learning_rate": 1.3942460178451357e-06, - "loss": 0.0267, - "step": 7030 - }, - { - "epoch": 3.1228070175438596, - "grad_norm": 0.4219843417449712, - "learning_rate": 1.3929033130307273e-06, - "loss": 0.0291, - "step": 7031 - }, - { - "epoch": 3.123251165889407, - "grad_norm": 0.39268086756994935, - "learning_rate": 1.3915611504242248e-06, - "loss": 0.0242, - "step": 7032 - }, - { - "epoch": 3.1236953142349546, - "grad_norm": 0.41199677628842846, - "learning_rate": 1.390219530227378e-06, - "loss": 0.0284, - "step": 7033 - }, - { - "epoch": 3.124139462580502, - "grad_norm": 0.39148197640312005, - "learning_rate": 1.3888784526418552e-06, - "loss": 0.0268, - "step": 7034 - }, - { - "epoch": 3.124583610926049, - "grad_norm": 0.6462152373026261, - "learning_rate": 1.3875379178692433e-06, - "loss": 0.042, - "step": 7035 - }, - { - "epoch": 3.125027759271597, - "grad_norm": 0.5120068554721335, - "learning_rate": 1.3861979261110493e-06, - "loss": 0.0352, - "step": 7036 - }, - { - "epoch": 3.125471907617144, - "grad_norm": 0.5096494527748306, - "learning_rate": 1.3848584775686923e-06, - "loss": 0.0329, - "step": 7037 - }, - { - "epoch": 3.1259160559626915, - "grad_norm": 0.49019540876863293, - "learning_rate": 1.3835195724435175e-06, - "loss": 0.0274, - "step": 7038 - }, - { - "epoch": 3.126360204308239, - "grad_norm": 0.4378287851338807, - "learning_rate": 1.3821812109367838e-06, - "loss": 0.0261, - "step": 7039 - }, - { - "epoch": 3.1268043526537865, - "grad_norm": 0.4665401569139363, - "learning_rate": 1.38084339324967e-06, - "loss": 0.0244, - "step": 7040 - }, - { - "epoch": 3.127248500999334, - "grad_norm": 0.4839568704489146, - "learning_rate": 1.3795061195832749e-06, - "loss": 0.0185, - "step": 7041 - }, - { - "epoch": 3.127692649344881, - "grad_norm": 0.38633411841281007, - "learning_rate": 1.3781693901386094e-06, - "loss": 0.0206, - "step": 7042 - }, - { - "epoch": 3.1281367976904284, - "grad_norm": 0.3852498984003089, - "learning_rate": 1.3768332051166089e-06, - "loss": 0.023, - "step": 7043 - }, - { - "epoch": 3.128580946035976, - "grad_norm": 0.3527312929509418, - "learning_rate": 1.3754975647181245e-06, - "loss": 0.0272, - "step": 7044 - }, - { - "epoch": 3.1290250943815234, - "grad_norm": 0.4078162247196801, - "learning_rate": 1.374162469143926e-06, - "loss": 0.0184, - "step": 7045 - }, - { - "epoch": 3.1294692427270707, - "grad_norm": 0.6947202101134006, - "learning_rate": 1.3728279185947002e-06, - "loss": 0.0287, - "step": 7046 - }, - { - "epoch": 3.1299133910726185, - "grad_norm": 0.3650330793591096, - "learning_rate": 1.3714939132710547e-06, - "loss": 0.0203, - "step": 7047 - }, - { - "epoch": 3.1303575394181657, - "grad_norm": 0.3894198333673228, - "learning_rate": 1.3701604533735102e-06, - "loss": 0.0383, - "step": 7048 - }, - { - "epoch": 3.130801687763713, - "grad_norm": 0.3626357108946466, - "learning_rate": 1.3688275391025096e-06, - "loss": 0.019, - "step": 7049 - }, - { - "epoch": 3.1312458361092603, - "grad_norm": 0.421447173128161, - "learning_rate": 1.3674951706584134e-06, - "loss": 0.0182, - "step": 7050 - }, - { - "epoch": 3.131689984454808, - "grad_norm": 0.4627577920703621, - "learning_rate": 1.3661633482414977e-06, - "loss": 0.0265, - "step": 7051 - }, - { - "epoch": 3.1321341328003554, - "grad_norm": 0.43244182202980463, - "learning_rate": 1.3648320720519592e-06, - "loss": 0.0196, - "step": 7052 - }, - { - "epoch": 3.1325782811459026, - "grad_norm": 0.4592125815303857, - "learning_rate": 1.3635013422899124e-06, - "loss": 0.0298, - "step": 7053 - }, - { - "epoch": 3.13302242949145, - "grad_norm": 0.44770730468191844, - "learning_rate": 1.3621711591553854e-06, - "loss": 0.0347, - "step": 7054 - }, - { - "epoch": 3.1334665778369977, - "grad_norm": 0.4417156773267622, - "learning_rate": 1.3608415228483291e-06, - "loss": 0.0286, - "step": 7055 - }, - { - "epoch": 3.133910726182545, - "grad_norm": 0.33340946983600855, - "learning_rate": 1.3595124335686104e-06, - "loss": 0.0239, - "step": 7056 - }, - { - "epoch": 3.1343548745280922, - "grad_norm": 0.45018385116758614, - "learning_rate": 1.3581838915160145e-06, - "loss": 0.0307, - "step": 7057 - }, - { - "epoch": 3.13479902287364, - "grad_norm": 0.49692015647969867, - "learning_rate": 1.3568558968902445e-06, - "loss": 0.0291, - "step": 7058 - }, - { - "epoch": 3.1352431712191873, - "grad_norm": 0.43270232873861747, - "learning_rate": 1.3555284498909183e-06, - "loss": 0.031, - "step": 7059 - }, - { - "epoch": 3.1356873195647346, - "grad_norm": 0.40820462560013654, - "learning_rate": 1.3542015507175743e-06, - "loss": 0.0235, - "step": 7060 - }, - { - "epoch": 3.136131467910282, - "grad_norm": 0.4972923121321135, - "learning_rate": 1.3528751995696688e-06, - "loss": 0.051, - "step": 7061 - }, - { - "epoch": 3.1365756162558296, - "grad_norm": 0.41578377070138484, - "learning_rate": 1.3515493966465743e-06, - "loss": 0.028, - "step": 7062 - }, - { - "epoch": 3.137019764601377, - "grad_norm": 0.4491202264177913, - "learning_rate": 1.350224142147582e-06, - "loss": 0.0172, - "step": 7063 - }, - { - "epoch": 3.137463912946924, - "grad_norm": 0.4586826960329923, - "learning_rate": 1.3488994362719016e-06, - "loss": 0.0244, - "step": 7064 - }, - { - "epoch": 3.137908061292472, - "grad_norm": 0.3366156664630001, - "learning_rate": 1.3475752792186559e-06, - "loss": 0.0177, - "step": 7065 - }, - { - "epoch": 3.138352209638019, - "grad_norm": 0.4038403979608836, - "learning_rate": 1.3462516711868894e-06, - "loss": 0.0283, - "step": 7066 - }, - { - "epoch": 3.1387963579835665, - "grad_norm": 0.36057350319064324, - "learning_rate": 1.3449286123755628e-06, - "loss": 0.0282, - "step": 7067 - }, - { - "epoch": 3.1392405063291138, - "grad_norm": 0.4323404311148623, - "learning_rate": 1.343606102983555e-06, - "loss": 0.0285, - "step": 7068 - }, - { - "epoch": 3.1396846546746615, - "grad_norm": 0.42881049252651987, - "learning_rate": 1.3422841432096623e-06, - "loss": 0.0299, - "step": 7069 - }, - { - "epoch": 3.140128803020209, - "grad_norm": 0.2902742598750208, - "learning_rate": 1.3409627332525954e-06, - "loss": 0.0181, - "step": 7070 - }, - { - "epoch": 3.140572951365756, - "grad_norm": 0.4433992896224678, - "learning_rate": 1.3396418733109856e-06, - "loss": 0.0257, - "step": 7071 - }, - { - "epoch": 3.1410170997113034, - "grad_norm": 0.6894028385625418, - "learning_rate": 1.3383215635833829e-06, - "loss": 0.0304, - "step": 7072 - }, - { - "epoch": 3.141461248056851, - "grad_norm": 0.5245286511325312, - "learning_rate": 1.337001804268247e-06, - "loss": 0.0269, - "step": 7073 - }, - { - "epoch": 3.1419053964023984, - "grad_norm": 0.5000099376315333, - "learning_rate": 1.3356825955639645e-06, - "loss": 0.0308, - "step": 7074 - }, - { - "epoch": 3.1423495447479457, - "grad_norm": 0.6132659503324829, - "learning_rate": 1.3343639376688355e-06, - "loss": 0.032, - "step": 7075 - }, - { - "epoch": 3.1427936930934934, - "grad_norm": 0.4546873288387578, - "learning_rate": 1.3330458307810734e-06, - "loss": 0.0246, - "step": 7076 - }, - { - "epoch": 3.1432378414390407, - "grad_norm": 0.385185952241093, - "learning_rate": 1.3317282750988137e-06, - "loss": 0.0184, - "step": 7077 - }, - { - "epoch": 3.143681989784588, - "grad_norm": 0.39438378600816204, - "learning_rate": 1.3304112708201073e-06, - "loss": 0.0279, - "step": 7078 - }, - { - "epoch": 3.1441261381301353, - "grad_norm": 0.4806029181761233, - "learning_rate": 1.329094818142922e-06, - "loss": 0.0241, - "step": 7079 - }, - { - "epoch": 3.144570286475683, - "grad_norm": 0.5228838911668552, - "learning_rate": 1.327778917265144e-06, - "loss": 0.0274, - "step": 7080 - }, - { - "epoch": 3.1450144348212303, - "grad_norm": 0.46531223651019554, - "learning_rate": 1.3264635683845755e-06, - "loss": 0.0241, - "step": 7081 - }, - { - "epoch": 3.1454585831667776, - "grad_norm": 0.32902026350154623, - "learning_rate": 1.3251487716989341e-06, - "loss": 0.0169, - "step": 7082 - }, - { - "epoch": 3.145902731512325, - "grad_norm": 0.3895325085859838, - "learning_rate": 1.3238345274058572e-06, - "loss": 0.0219, - "step": 7083 - }, - { - "epoch": 3.1463468798578726, - "grad_norm": 0.4481405965406686, - "learning_rate": 1.322520835702898e-06, - "loss": 0.025, - "step": 7084 - }, - { - "epoch": 3.14679102820342, - "grad_norm": 0.5128203909715668, - "learning_rate": 1.3212076967875265e-06, - "loss": 0.0348, - "step": 7085 - }, - { - "epoch": 3.1472351765489672, - "grad_norm": 0.46447445611495575, - "learning_rate": 1.3198951108571312e-06, - "loss": 0.0313, - "step": 7086 - }, - { - "epoch": 3.147679324894515, - "grad_norm": 0.29983798874260054, - "learning_rate": 1.3185830781090136e-06, - "loss": 0.0142, - "step": 7087 - }, - { - "epoch": 3.1481234732400623, - "grad_norm": 0.39996284685169026, - "learning_rate": 1.3172715987403955e-06, - "loss": 0.0279, - "step": 7088 - }, - { - "epoch": 3.1485676215856095, - "grad_norm": 0.44271681407903213, - "learning_rate": 1.3159606729484165e-06, - "loss": 0.0309, - "step": 7089 - }, - { - "epoch": 3.149011769931157, - "grad_norm": 0.39836027232808446, - "learning_rate": 1.3146503009301258e-06, - "loss": 0.0258, - "step": 7090 - }, - { - "epoch": 3.1494559182767046, - "grad_norm": 0.38922372733595656, - "learning_rate": 1.3133404828824998e-06, - "loss": 0.0249, - "step": 7091 - }, - { - "epoch": 3.149900066622252, - "grad_norm": 0.4629939120495913, - "learning_rate": 1.3120312190024265e-06, - "loss": 0.0177, - "step": 7092 - }, - { - "epoch": 3.150344214967799, - "grad_norm": 0.5175669897186087, - "learning_rate": 1.3107225094867066e-06, - "loss": 0.0352, - "step": 7093 - }, - { - "epoch": 3.150788363313347, - "grad_norm": 0.4487683854548024, - "learning_rate": 1.3094143545320636e-06, - "loss": 0.0309, - "step": 7094 - }, - { - "epoch": 3.151232511658894, - "grad_norm": 0.4853986633083335, - "learning_rate": 1.3081067543351351e-06, - "loss": 0.0351, - "step": 7095 - }, - { - "epoch": 3.1516766600044415, - "grad_norm": 0.4346141380035006, - "learning_rate": 1.3067997090924755e-06, - "loss": 0.0275, - "step": 7096 - }, - { - "epoch": 3.1521208083499888, - "grad_norm": 0.7565904341486527, - "learning_rate": 1.305493219000558e-06, - "loss": 0.0331, - "step": 7097 - }, - { - "epoch": 3.1525649566955365, - "grad_norm": 0.4041064888863333, - "learning_rate": 1.3041872842557669e-06, - "loss": 0.0303, - "step": 7098 - }, - { - "epoch": 3.153009105041084, - "grad_norm": 0.39758421079788286, - "learning_rate": 1.3028819050544078e-06, - "loss": 0.02, - "step": 7099 - }, - { - "epoch": 3.153453253386631, - "grad_norm": 0.43003387120599507, - "learning_rate": 1.3015770815927009e-06, - "loss": 0.034, - "step": 7100 - }, - { - "epoch": 3.1538974017321784, - "grad_norm": 0.4278201479573488, - "learning_rate": 1.3002728140667847e-06, - "loss": 0.0233, - "step": 7101 - }, - { - "epoch": 3.154341550077726, - "grad_norm": 0.37161324406542584, - "learning_rate": 1.2989691026727114e-06, - "loss": 0.0197, - "step": 7102 - }, - { - "epoch": 3.1547856984232734, - "grad_norm": 0.49261598275828133, - "learning_rate": 1.2976659476064528e-06, - "loss": 0.0332, - "step": 7103 - }, - { - "epoch": 3.1552298467688207, - "grad_norm": 0.41968097121084996, - "learning_rate": 1.2963633490638927e-06, - "loss": 0.0266, - "step": 7104 - }, - { - "epoch": 3.155673995114368, - "grad_norm": 0.37929111645310976, - "learning_rate": 1.2950613072408352e-06, - "loss": 0.0206, - "step": 7105 - }, - { - "epoch": 3.1561181434599157, - "grad_norm": 0.4490037141777807, - "learning_rate": 1.2937598223330006e-06, - "loss": 0.0335, - "step": 7106 - }, - { - "epoch": 3.156562291805463, - "grad_norm": 0.6757819685260033, - "learning_rate": 1.2924588945360195e-06, - "loss": 0.0387, - "step": 7107 - }, - { - "epoch": 3.1570064401510103, - "grad_norm": 0.38030068064549954, - "learning_rate": 1.2911585240454483e-06, - "loss": 0.0225, - "step": 7108 - }, - { - "epoch": 3.157450588496558, - "grad_norm": 0.3303111905779037, - "learning_rate": 1.2898587110567546e-06, - "loss": 0.0158, - "step": 7109 - }, - { - "epoch": 3.1578947368421053, - "grad_norm": 0.36066815697274596, - "learning_rate": 1.2885594557653197e-06, - "loss": 0.0271, - "step": 7110 - }, - { - "epoch": 3.1583388851876526, - "grad_norm": 0.5116023254049137, - "learning_rate": 1.2872607583664443e-06, - "loss": 0.0332, - "step": 7111 - }, - { - "epoch": 3.1587830335332, - "grad_norm": 0.4098679802743982, - "learning_rate": 1.2859626190553459e-06, - "loss": 0.0206, - "step": 7112 - }, - { - "epoch": 3.1592271818787476, - "grad_norm": 0.3664260571550095, - "learning_rate": 1.2846650380271563e-06, - "loss": 0.0215, - "step": 7113 - }, - { - "epoch": 3.159671330224295, - "grad_norm": 0.4801826025425368, - "learning_rate": 1.283368015476925e-06, - "loss": 0.0207, - "step": 7114 - }, - { - "epoch": 3.160115478569842, - "grad_norm": 0.43450502438961686, - "learning_rate": 1.2820715515996146e-06, - "loss": 0.02, - "step": 7115 - }, - { - "epoch": 3.16055962691539, - "grad_norm": 0.4768531731139893, - "learning_rate": 1.280775646590106e-06, - "loss": 0.0321, - "step": 7116 - }, - { - "epoch": 3.1610037752609372, - "grad_norm": 0.3425693376069199, - "learning_rate": 1.2794803006431984e-06, - "loss": 0.0188, - "step": 7117 - }, - { - "epoch": 3.1614479236064845, - "grad_norm": 0.5214149879054594, - "learning_rate": 1.2781855139535988e-06, - "loss": 0.0262, - "step": 7118 - }, - { - "epoch": 3.161892071952032, - "grad_norm": 0.40187878997324955, - "learning_rate": 1.2768912867159406e-06, - "loss": 0.0361, - "step": 7119 - }, - { - "epoch": 3.1623362202975795, - "grad_norm": 0.5083763707824389, - "learning_rate": 1.2755976191247682e-06, - "loss": 0.021, - "step": 7120 - }, - { - "epoch": 3.162780368643127, - "grad_norm": 0.4055192408506625, - "learning_rate": 1.2743045113745385e-06, - "loss": 0.0227, - "step": 7121 - }, - { - "epoch": 3.163224516988674, - "grad_norm": 0.4094976103066204, - "learning_rate": 1.2730119636596288e-06, - "loss": 0.0281, - "step": 7122 - }, - { - "epoch": 3.163668665334222, - "grad_norm": 0.45282087571650875, - "learning_rate": 1.2717199761743336e-06, - "loss": 0.0251, - "step": 7123 - }, - { - "epoch": 3.164112813679769, - "grad_norm": 0.4299624187324527, - "learning_rate": 1.2704285491128553e-06, - "loss": 0.0283, - "step": 7124 - }, - { - "epoch": 3.1645569620253164, - "grad_norm": 0.4009907594057322, - "learning_rate": 1.2691376826693235e-06, - "loss": 0.0281, - "step": 7125 - }, - { - "epoch": 3.1650011103708637, - "grad_norm": 0.36139049623487757, - "learning_rate": 1.2678473770377726e-06, - "loss": 0.0227, - "step": 7126 - }, - { - "epoch": 3.1654452587164115, - "grad_norm": 0.3911090099172185, - "learning_rate": 1.2665576324121587e-06, - "loss": 0.019, - "step": 7127 - }, - { - "epoch": 3.1658894070619588, - "grad_norm": 0.4537050187193512, - "learning_rate": 1.2652684489863532e-06, - "loss": 0.0171, - "step": 7128 - }, - { - "epoch": 3.166333555407506, - "grad_norm": 0.4737431638279446, - "learning_rate": 1.2639798269541432e-06, - "loss": 0.0262, - "step": 7129 - }, - { - "epoch": 3.1667777037530533, - "grad_norm": 0.5356720104817232, - "learning_rate": 1.2626917665092265e-06, - "loss": 0.0367, - "step": 7130 - }, - { - "epoch": 3.167221852098601, - "grad_norm": 0.41884008779805304, - "learning_rate": 1.2614042678452254e-06, - "loss": 0.0277, - "step": 7131 - }, - { - "epoch": 3.1676660004441484, - "grad_norm": 0.3813088744442865, - "learning_rate": 1.260117331155669e-06, - "loss": 0.0241, - "step": 7132 - }, - { - "epoch": 3.1681101487896957, - "grad_norm": 0.4824659904614872, - "learning_rate": 1.258830956634008e-06, - "loss": 0.0266, - "step": 7133 - }, - { - "epoch": 3.168554297135243, - "grad_norm": 0.3933046865818541, - "learning_rate": 1.2575451444736065e-06, - "loss": 0.0241, - "step": 7134 - }, - { - "epoch": 3.1689984454807907, - "grad_norm": 0.4675792699661695, - "learning_rate": 1.25625989486774e-06, - "loss": 0.0231, - "step": 7135 - }, - { - "epoch": 3.169442593826338, - "grad_norm": 0.47235339239033924, - "learning_rate": 1.2549752080096078e-06, - "loss": 0.0228, - "step": 7136 - }, - { - "epoch": 3.1698867421718853, - "grad_norm": 0.5375969536724705, - "learning_rate": 1.2536910840923205e-06, - "loss": 0.0221, - "step": 7137 - }, - { - "epoch": 3.170330890517433, - "grad_norm": 0.4497621301881044, - "learning_rate": 1.2524075233089e-06, - "loss": 0.0343, - "step": 7138 - }, - { - "epoch": 3.1707750388629803, - "grad_norm": 0.292034357924613, - "learning_rate": 1.251124525852289e-06, - "loss": 0.0126, - "step": 7139 - }, - { - "epoch": 3.1712191872085276, - "grad_norm": 0.35964553866038385, - "learning_rate": 1.2498420919153464e-06, - "loss": 0.0275, - "step": 7140 - }, - { - "epoch": 3.171663335554075, - "grad_norm": 0.35000228782660847, - "learning_rate": 1.2485602216908378e-06, - "loss": 0.0175, - "step": 7141 - }, - { - "epoch": 3.1721074838996226, - "grad_norm": 0.3682428993293255, - "learning_rate": 1.2472789153714572e-06, - "loss": 0.0239, - "step": 7142 - }, - { - "epoch": 3.17255163224517, - "grad_norm": 0.5552689170116238, - "learning_rate": 1.245998173149801e-06, - "loss": 0.0296, - "step": 7143 - }, - { - "epoch": 3.172995780590717, - "grad_norm": 0.4582090480824963, - "learning_rate": 1.244717995218389e-06, - "loss": 0.0331, - "step": 7144 - }, - { - "epoch": 3.173439928936265, - "grad_norm": 0.5212899725680629, - "learning_rate": 1.2434383817696548e-06, - "loss": 0.0258, - "step": 7145 - }, - { - "epoch": 3.173884077281812, - "grad_norm": 0.381695700080771, - "learning_rate": 1.2421593329959437e-06, - "loss": 0.0243, - "step": 7146 - }, - { - "epoch": 3.1743282256273595, - "grad_norm": 0.3974320066485413, - "learning_rate": 1.2408808490895176e-06, - "loss": 0.0296, - "step": 7147 - }, - { - "epoch": 3.174772373972907, - "grad_norm": 0.37908843626651684, - "learning_rate": 1.2396029302425589e-06, - "loss": 0.0233, - "step": 7148 - }, - { - "epoch": 3.1752165223184545, - "grad_norm": 0.3916584808786519, - "learning_rate": 1.2383255766471564e-06, - "loss": 0.0231, - "step": 7149 - }, - { - "epoch": 3.175660670664002, - "grad_norm": 0.4921323330468778, - "learning_rate": 1.2370487884953198e-06, - "loss": 0.0282, - "step": 7150 - }, - { - "epoch": 3.176104819009549, - "grad_norm": 0.4474378513318365, - "learning_rate": 1.2357725659789727e-06, - "loss": 0.0316, - "step": 7151 - }, - { - "epoch": 3.1765489673550964, - "grad_norm": 0.507768145335404, - "learning_rate": 1.234496909289949e-06, - "loss": 0.0314, - "step": 7152 - }, - { - "epoch": 3.176993115700644, - "grad_norm": 0.43929762431880826, - "learning_rate": 1.2332218186200062e-06, - "loss": 0.036, - "step": 7153 - }, - { - "epoch": 3.1774372640461914, - "grad_norm": 0.45050762765015684, - "learning_rate": 1.2319472941608118e-06, - "loss": 0.0243, - "step": 7154 - }, - { - "epoch": 3.1778814123917387, - "grad_norm": 0.35294587274588873, - "learning_rate": 1.2306733361039457e-06, - "loss": 0.0214, - "step": 7155 - }, - { - "epoch": 3.1783255607372864, - "grad_norm": 0.41208302752606807, - "learning_rate": 1.2293999446409067e-06, - "loss": 0.0261, - "step": 7156 - }, - { - "epoch": 3.1787697090828337, - "grad_norm": 0.5041315813475921, - "learning_rate": 1.228127119963109e-06, - "loss": 0.0285, - "step": 7157 - }, - { - "epoch": 3.179213857428381, - "grad_norm": 0.44300292068122016, - "learning_rate": 1.2268548622618753e-06, - "loss": 0.0297, - "step": 7158 - }, - { - "epoch": 3.1796580057739283, - "grad_norm": 0.45740073144316085, - "learning_rate": 1.2255831717284528e-06, - "loss": 0.0279, - "step": 7159 - }, - { - "epoch": 3.180102154119476, - "grad_norm": 0.3604803514556297, - "learning_rate": 1.2243120485539944e-06, - "loss": 0.0223, - "step": 7160 - }, - { - "epoch": 3.1805463024650233, - "grad_norm": 0.3321847278445319, - "learning_rate": 1.223041492929573e-06, - "loss": 0.0165, - "step": 7161 - }, - { - "epoch": 3.1809904508105706, - "grad_norm": 0.35593201085972326, - "learning_rate": 1.221771505046176e-06, - "loss": 0.0214, - "step": 7162 - }, - { - "epoch": 3.181434599156118, - "grad_norm": 0.48430071345607734, - "learning_rate": 1.2205020850947009e-06, - "loss": 0.0324, - "step": 7163 - }, - { - "epoch": 3.1818787475016657, - "grad_norm": 0.3786369491408754, - "learning_rate": 1.219233233265964e-06, - "loss": 0.0196, - "step": 7164 - }, - { - "epoch": 3.182322895847213, - "grad_norm": 0.422396003313245, - "learning_rate": 1.2179649497506984e-06, - "loss": 0.0235, - "step": 7165 - }, - { - "epoch": 3.1827670441927602, - "grad_norm": 0.5088819210251279, - "learning_rate": 1.216697234739545e-06, - "loss": 0.0245, - "step": 7166 - }, - { - "epoch": 3.183211192538308, - "grad_norm": 0.44593024209496107, - "learning_rate": 1.2154300884230647e-06, - "loss": 0.0248, - "step": 7167 - }, - { - "epoch": 3.1836553408838553, - "grad_norm": 0.44151282819218973, - "learning_rate": 1.2141635109917322e-06, - "loss": 0.0243, - "step": 7168 - }, - { - "epoch": 3.1840994892294026, - "grad_norm": 0.5179310919609202, - "learning_rate": 1.2128975026359308e-06, - "loss": 0.0303, - "step": 7169 - }, - { - "epoch": 3.18454363757495, - "grad_norm": 0.47378558079444577, - "learning_rate": 1.2116320635459694e-06, - "loss": 0.0308, - "step": 7170 - }, - { - "epoch": 3.1849877859204976, - "grad_norm": 0.3483756788167636, - "learning_rate": 1.2103671939120603e-06, - "loss": 0.0205, - "step": 7171 - }, - { - "epoch": 3.185431934266045, - "grad_norm": 0.43627278086001237, - "learning_rate": 1.2091028939243372e-06, - "loss": 0.0348, - "step": 7172 - }, - { - "epoch": 3.185876082611592, - "grad_norm": 0.49521087150532456, - "learning_rate": 1.207839163772845e-06, - "loss": 0.0312, - "step": 7173 - }, - { - "epoch": 3.18632023095714, - "grad_norm": 0.4856268760629859, - "learning_rate": 1.206576003647545e-06, - "loss": 0.02, - "step": 7174 - }, - { - "epoch": 3.186764379302687, - "grad_norm": 0.39637861480749204, - "learning_rate": 1.2053134137383082e-06, - "loss": 0.0255, - "step": 7175 - }, - { - "epoch": 3.1872085276482345, - "grad_norm": 0.43819489132093886, - "learning_rate": 1.2040513942349285e-06, - "loss": 0.0197, - "step": 7176 - }, - { - "epoch": 3.1876526759937818, - "grad_norm": 0.4216663259343465, - "learning_rate": 1.2027899453271046e-06, - "loss": 0.023, - "step": 7177 - }, - { - "epoch": 3.1880968243393295, - "grad_norm": 0.36509628207338957, - "learning_rate": 1.2015290672044555e-06, - "loss": 0.0213, - "step": 7178 - }, - { - "epoch": 3.188540972684877, - "grad_norm": 0.5214855632257979, - "learning_rate": 1.2002687600565138e-06, - "loss": 0.0326, - "step": 7179 - }, - { - "epoch": 3.188985121030424, - "grad_norm": 0.4141851354865393, - "learning_rate": 1.199009024072722e-06, - "loss": 0.0224, - "step": 7180 - }, - { - "epoch": 3.1894292693759714, - "grad_norm": 0.33059616966272004, - "learning_rate": 1.1977498594424404e-06, - "loss": 0.0197, - "step": 7181 - }, - { - "epoch": 3.189873417721519, - "grad_norm": 0.5492371676948207, - "learning_rate": 1.196491266354946e-06, - "loss": 0.0251, - "step": 7182 - }, - { - "epoch": 3.1903175660670664, - "grad_norm": 0.4321665285793292, - "learning_rate": 1.1952332449994236e-06, - "loss": 0.0254, - "step": 7183 - }, - { - "epoch": 3.1907617144126137, - "grad_norm": 0.5421889351399802, - "learning_rate": 1.1939757955649762e-06, - "loss": 0.0205, - "step": 7184 - }, - { - "epoch": 3.1912058627581614, - "grad_norm": 0.3637124855912809, - "learning_rate": 1.1927189182406207e-06, - "loss": 0.028, - "step": 7185 - }, - { - "epoch": 3.1916500111037087, - "grad_norm": 0.4272124427935157, - "learning_rate": 1.191462613215284e-06, - "loss": 0.0245, - "step": 7186 - }, - { - "epoch": 3.192094159449256, - "grad_norm": 0.5695918619211989, - "learning_rate": 1.190206880677815e-06, - "loss": 0.0275, - "step": 7187 - }, - { - "epoch": 3.1925383077948033, - "grad_norm": 0.47921802084665843, - "learning_rate": 1.188951720816967e-06, - "loss": 0.0327, - "step": 7188 - }, - { - "epoch": 3.192982456140351, - "grad_norm": 0.47676512936038173, - "learning_rate": 1.1876971338214144e-06, - "loss": 0.0237, - "step": 7189 - }, - { - "epoch": 3.1934266044858983, - "grad_norm": 0.32414238199829704, - "learning_rate": 1.1864431198797433e-06, - "loss": 0.0223, - "step": 7190 - }, - { - "epoch": 3.1938707528314456, - "grad_norm": 0.37243806607221175, - "learning_rate": 1.1851896791804507e-06, - "loss": 0.0226, - "step": 7191 - }, - { - "epoch": 3.194314901176993, - "grad_norm": 0.45790234199301505, - "learning_rate": 1.1839368119119504e-06, - "loss": 0.0366, - "step": 7192 - }, - { - "epoch": 3.1947590495225406, - "grad_norm": 0.3980186449229834, - "learning_rate": 1.182684518262574e-06, - "loss": 0.0285, - "step": 7193 - }, - { - "epoch": 3.195203197868088, - "grad_norm": 0.37521542650727085, - "learning_rate": 1.1814327984205576e-06, - "loss": 0.0262, - "step": 7194 - }, - { - "epoch": 3.195647346213635, - "grad_norm": 0.5161650488172221, - "learning_rate": 1.1801816525740578e-06, - "loss": 0.0314, - "step": 7195 - }, - { - "epoch": 3.196091494559183, - "grad_norm": 0.4308857816197324, - "learning_rate": 1.1789310809111444e-06, - "loss": 0.0214, - "step": 7196 - }, - { - "epoch": 3.1965356429047302, - "grad_norm": 0.34195463841205975, - "learning_rate": 1.1776810836197965e-06, - "loss": 0.0148, - "step": 7197 - }, - { - "epoch": 3.1969797912502775, - "grad_norm": 0.40193187911181305, - "learning_rate": 1.1764316608879122e-06, - "loss": 0.0246, - "step": 7198 - }, - { - "epoch": 3.197423939595825, - "grad_norm": 0.45090018760439604, - "learning_rate": 1.1751828129033e-06, - "loss": 0.0332, - "step": 7199 - }, - { - "epoch": 3.1978680879413726, - "grad_norm": 0.45570245011519855, - "learning_rate": 1.1739345398536834e-06, - "loss": 0.0287, - "step": 7200 - }, - { - "epoch": 3.19831223628692, - "grad_norm": 0.3951478023748843, - "learning_rate": 1.1726868419266985e-06, - "loss": 0.022, - "step": 7201 - }, - { - "epoch": 3.198756384632467, - "grad_norm": 0.45157628259237137, - "learning_rate": 1.1714397193098975e-06, - "loss": 0.0254, - "step": 7202 - }, - { - "epoch": 3.199200532978015, - "grad_norm": 0.577449890397031, - "learning_rate": 1.1701931721907417e-06, - "loss": 0.0475, - "step": 7203 - }, - { - "epoch": 3.199644681323562, - "grad_norm": 0.3377142483011571, - "learning_rate": 1.1689472007566082e-06, - "loss": 0.0233, - "step": 7204 - }, - { - "epoch": 3.2000888296691095, - "grad_norm": 0.5632120326062124, - "learning_rate": 1.1677018051947898e-06, - "loss": 0.0306, - "step": 7205 - }, - { - "epoch": 3.2005329780146567, - "grad_norm": 0.3154600517639766, - "learning_rate": 1.1664569856924885e-06, - "loss": 0.0185, - "step": 7206 - }, - { - "epoch": 3.2009771263602045, - "grad_norm": 0.3451810701202185, - "learning_rate": 1.1652127424368248e-06, - "loss": 0.0188, - "step": 7207 - }, - { - "epoch": 3.2014212747057518, - "grad_norm": 0.4315345094850551, - "learning_rate": 1.1639690756148258e-06, - "loss": 0.0247, - "step": 7208 - }, - { - "epoch": 3.201865423051299, - "grad_norm": 0.46106139760465975, - "learning_rate": 1.162725985413436e-06, - "loss": 0.0373, - "step": 7209 - }, - { - "epoch": 3.2023095713968464, - "grad_norm": 0.35725860550074096, - "learning_rate": 1.1614834720195173e-06, - "loss": 0.0225, - "step": 7210 - }, - { - "epoch": 3.202753719742394, - "grad_norm": 0.4457350062298839, - "learning_rate": 1.1602415356198366e-06, - "loss": 0.0257, - "step": 7211 - }, - { - "epoch": 3.2031978680879414, - "grad_norm": 0.414436238208724, - "learning_rate": 1.1590001764010795e-06, - "loss": 0.0233, - "step": 7212 - }, - { - "epoch": 3.2036420164334887, - "grad_norm": 0.5552444496185882, - "learning_rate": 1.1577593945498439e-06, - "loss": 0.0267, - "step": 7213 - }, - { - "epoch": 3.2040861647790364, - "grad_norm": 0.9572529892460224, - "learning_rate": 1.156519190252638e-06, - "loss": 0.0208, - "step": 7214 - }, - { - "epoch": 3.2045303131245837, - "grad_norm": 0.4000797957812149, - "learning_rate": 1.1552795636958874e-06, - "loss": 0.021, - "step": 7215 - }, - { - "epoch": 3.204974461470131, - "grad_norm": 0.44923949008034364, - "learning_rate": 1.154040515065929e-06, - "loss": 0.0257, - "step": 7216 - }, - { - "epoch": 3.2054186098156783, - "grad_norm": 0.3620392807393453, - "learning_rate": 1.1528020445490122e-06, - "loss": 0.0168, - "step": 7217 - }, - { - "epoch": 3.205862758161226, - "grad_norm": 0.40007534369795833, - "learning_rate": 1.1515641523313026e-06, - "loss": 0.019, - "step": 7218 - }, - { - "epoch": 3.2063069065067733, - "grad_norm": 0.48298838416688933, - "learning_rate": 1.1503268385988726e-06, - "loss": 0.0289, - "step": 7219 - }, - { - "epoch": 3.2067510548523206, - "grad_norm": 0.42002715971416216, - "learning_rate": 1.1490901035377127e-06, - "loss": 0.0218, - "step": 7220 - }, - { - "epoch": 3.207195203197868, - "grad_norm": 0.3286114150705751, - "learning_rate": 1.147853947333727e-06, - "loss": 0.0197, - "step": 7221 - }, - { - "epoch": 3.2076393515434156, - "grad_norm": 0.4609222221897965, - "learning_rate": 1.1466183701727285e-06, - "loss": 0.0324, - "step": 7222 - }, - { - "epoch": 3.208083499888963, - "grad_norm": 0.685447809015932, - "learning_rate": 1.1453833722404467e-06, - "loss": 0.0324, - "step": 7223 - }, - { - "epoch": 3.20852764823451, - "grad_norm": 0.3436945045836064, - "learning_rate": 1.1441489537225242e-06, - "loss": 0.0157, - "step": 7224 - }, - { - "epoch": 3.208971796580058, - "grad_norm": 0.5032138454546828, - "learning_rate": 1.142915114804512e-06, - "loss": 0.0305, - "step": 7225 - }, - { - "epoch": 3.2094159449256052, - "grad_norm": 0.4731526252440794, - "learning_rate": 1.1416818556718766e-06, - "loss": 0.0213, - "step": 7226 - }, - { - "epoch": 3.2098600932711525, - "grad_norm": 0.4103989387772799, - "learning_rate": 1.1404491765100028e-06, - "loss": 0.0233, - "step": 7227 - }, - { - "epoch": 3.2103042416167, - "grad_norm": 0.4494816164510247, - "learning_rate": 1.1392170775041788e-06, - "loss": 0.0297, - "step": 7228 - }, - { - "epoch": 3.2107483899622475, - "grad_norm": 0.49371762049111106, - "learning_rate": 1.1379855588396111e-06, - "loss": 0.0352, - "step": 7229 - }, - { - "epoch": 3.211192538307795, - "grad_norm": 0.5061710158808476, - "learning_rate": 1.1367546207014197e-06, - "loss": 0.029, - "step": 7230 - }, - { - "epoch": 3.211636686653342, - "grad_norm": 0.8145152873713564, - "learning_rate": 1.1355242632746322e-06, - "loss": 0.0262, - "step": 7231 - }, - { - "epoch": 3.21208083499889, - "grad_norm": 0.3714708449077717, - "learning_rate": 1.134294486744194e-06, - "loss": 0.0195, - "step": 7232 - }, - { - "epoch": 3.212524983344437, - "grad_norm": 0.3746783694058875, - "learning_rate": 1.1330652912949614e-06, - "loss": 0.0315, - "step": 7233 - }, - { - "epoch": 3.2129691316899844, - "grad_norm": 0.3552761658812224, - "learning_rate": 1.131836677111703e-06, - "loss": 0.0195, - "step": 7234 - }, - { - "epoch": 3.2134132800355317, - "grad_norm": 0.425007652814607, - "learning_rate": 1.130608644379102e-06, - "loss": 0.025, - "step": 7235 - }, - { - "epoch": 3.2138574283810795, - "grad_norm": 0.43005151453900625, - "learning_rate": 1.12938119328175e-06, - "loss": 0.0209, - "step": 7236 - }, - { - "epoch": 3.2143015767266268, - "grad_norm": 0.4595931551885754, - "learning_rate": 1.1281543240041553e-06, - "loss": 0.0299, - "step": 7237 - }, - { - "epoch": 3.214745725072174, - "grad_norm": 0.46371024210057155, - "learning_rate": 1.1269280367307366e-06, - "loss": 0.0323, - "step": 7238 - }, - { - "epoch": 3.2151898734177213, - "grad_norm": 0.6313936018053397, - "learning_rate": 1.125702331645826e-06, - "loss": 0.045, - "step": 7239 - }, - { - "epoch": 3.215634021763269, - "grad_norm": 0.4072252776090633, - "learning_rate": 1.1244772089336676e-06, - "loss": 0.0201, - "step": 7240 - }, - { - "epoch": 3.2160781701088164, - "grad_norm": 0.4916818117136901, - "learning_rate": 1.1232526687784196e-06, - "loss": 0.0321, - "step": 7241 - }, - { - "epoch": 3.2165223184543636, - "grad_norm": 0.4020576743291894, - "learning_rate": 1.1220287113641487e-06, - "loss": 0.0235, - "step": 7242 - }, - { - "epoch": 3.216966466799911, - "grad_norm": 0.3816616945761622, - "learning_rate": 1.1208053368748379e-06, - "loss": 0.0195, - "step": 7243 - }, - { - "epoch": 3.2174106151454587, - "grad_norm": 0.7096021175300753, - "learning_rate": 1.1195825454943805e-06, - "loss": 0.0277, - "step": 7244 - }, - { - "epoch": 3.217854763491006, - "grad_norm": 0.4537143505718799, - "learning_rate": 1.1183603374065832e-06, - "loss": 0.0317, - "step": 7245 - }, - { - "epoch": 3.2182989118365533, - "grad_norm": 0.3124829853038914, - "learning_rate": 1.1171387127951667e-06, - "loss": 0.0134, - "step": 7246 - }, - { - "epoch": 3.218743060182101, - "grad_norm": 0.3523041765470259, - "learning_rate": 1.1159176718437581e-06, - "loss": 0.0197, - "step": 7247 - }, - { - "epoch": 3.2191872085276483, - "grad_norm": 0.46781406748141774, - "learning_rate": 1.114697214735903e-06, - "loss": 0.03, - "step": 7248 - }, - { - "epoch": 3.2196313568731956, - "grad_norm": 0.45497498562705085, - "learning_rate": 1.113477341655056e-06, - "loss": 0.0326, - "step": 7249 - }, - { - "epoch": 3.220075505218743, - "grad_norm": 0.34355725383797536, - "learning_rate": 1.1122580527845844e-06, - "loss": 0.0199, - "step": 7250 - }, - { - "epoch": 3.2205196535642906, - "grad_norm": 0.4222581331896639, - "learning_rate": 1.1110393483077697e-06, - "loss": 0.0248, - "step": 7251 - }, - { - "epoch": 3.220963801909838, - "grad_norm": 0.3466800241459592, - "learning_rate": 1.1098212284078037e-06, - "loss": 0.0262, - "step": 7252 - }, - { - "epoch": 3.221407950255385, - "grad_norm": 0.4157855377746249, - "learning_rate": 1.108603693267788e-06, - "loss": 0.0256, - "step": 7253 - }, - { - "epoch": 3.221852098600933, - "grad_norm": 0.383389548238552, - "learning_rate": 1.1073867430707409e-06, - "loss": 0.024, - "step": 7254 - }, - { - "epoch": 3.22229624694648, - "grad_norm": 0.30999218839566933, - "learning_rate": 1.1061703779995903e-06, - "loss": 0.0132, - "step": 7255 - }, - { - "epoch": 3.2227403952920275, - "grad_norm": 0.4028830849965787, - "learning_rate": 1.1049545982371763e-06, - "loss": 0.0237, - "step": 7256 - }, - { - "epoch": 3.223184543637575, - "grad_norm": 0.399211499707528, - "learning_rate": 1.1037394039662514e-06, - "loss": 0.025, - "step": 7257 - }, - { - "epoch": 3.2236286919831225, - "grad_norm": 0.4718629067386526, - "learning_rate": 1.1025247953694812e-06, - "loss": 0.022, - "step": 7258 - }, - { - "epoch": 3.22407284032867, - "grad_norm": 0.37912161530644367, - "learning_rate": 1.1013107726294398e-06, - "loss": 0.0218, - "step": 7259 - }, - { - "epoch": 3.224516988674217, - "grad_norm": 0.34533958399869225, - "learning_rate": 1.100097335928616e-06, - "loss": 0.0165, - "step": 7260 - }, - { - "epoch": 3.224961137019765, - "grad_norm": 0.5551950864778356, - "learning_rate": 1.0988844854494108e-06, - "loss": 0.0351, - "step": 7261 - }, - { - "epoch": 3.225405285365312, - "grad_norm": 0.5024454449508828, - "learning_rate": 1.0976722213741353e-06, - "loss": 0.0264, - "step": 7262 - }, - { - "epoch": 3.2258494337108594, - "grad_norm": 0.44064209832820683, - "learning_rate": 1.0964605438850157e-06, - "loss": 0.0277, - "step": 7263 - }, - { - "epoch": 3.2262935820564067, - "grad_norm": 0.3886834550548472, - "learning_rate": 1.0952494531641845e-06, - "loss": 0.0254, - "step": 7264 - }, - { - "epoch": 3.2267377304019544, - "grad_norm": 0.44369145821666817, - "learning_rate": 1.0940389493936903e-06, - "loss": 0.0305, - "step": 7265 - }, - { - "epoch": 3.2271818787475017, - "grad_norm": 0.5083410379311883, - "learning_rate": 1.092829032755493e-06, - "loss": 0.0234, - "step": 7266 - }, - { - "epoch": 3.227626027093049, - "grad_norm": 0.5641232827648482, - "learning_rate": 1.091619703431463e-06, - "loss": 0.033, - "step": 7267 - }, - { - "epoch": 3.2280701754385963, - "grad_norm": 0.49450571188792775, - "learning_rate": 1.0904109616033837e-06, - "loss": 0.0259, - "step": 7268 - }, - { - "epoch": 3.228514323784144, - "grad_norm": 0.40475539158874035, - "learning_rate": 1.0892028074529504e-06, - "loss": 0.0304, - "step": 7269 - }, - { - "epoch": 3.2289584721296913, - "grad_norm": 0.3822868070952512, - "learning_rate": 1.0879952411617668e-06, - "loss": 0.0258, - "step": 7270 - }, - { - "epoch": 3.2294026204752386, - "grad_norm": 0.32534348009223135, - "learning_rate": 1.0867882629113512e-06, - "loss": 0.0165, - "step": 7271 - }, - { - "epoch": 3.229846768820786, - "grad_norm": 0.44997009638741614, - "learning_rate": 1.085581872883134e-06, - "loss": 0.0288, - "step": 7272 - }, - { - "epoch": 3.2302909171663337, - "grad_norm": 0.3775740023239124, - "learning_rate": 1.0843760712584557e-06, - "loss": 0.0191, - "step": 7273 - }, - { - "epoch": 3.230735065511881, - "grad_norm": 0.36873567439006616, - "learning_rate": 1.0831708582185684e-06, - "loss": 0.0215, - "step": 7274 - }, - { - "epoch": 3.2311792138574282, - "grad_norm": 0.41830159166514674, - "learning_rate": 1.081966233944638e-06, - "loss": 0.0349, - "step": 7275 - }, - { - "epoch": 3.231623362202976, - "grad_norm": 0.3474261761721994, - "learning_rate": 1.0807621986177369e-06, - "loss": 0.0197, - "step": 7276 - }, - { - "epoch": 3.2320675105485233, - "grad_norm": 0.4726466217876324, - "learning_rate": 1.0795587524188532e-06, - "loss": 0.0257, - "step": 7277 - }, - { - "epoch": 3.2325116588940705, - "grad_norm": 0.5853092492971105, - "learning_rate": 1.0783558955288864e-06, - "loss": 0.033, - "step": 7278 - }, - { - "epoch": 3.232955807239618, - "grad_norm": 0.4076799037255477, - "learning_rate": 1.0771536281286454e-06, - "loss": 0.0251, - "step": 7279 - }, - { - "epoch": 3.2333999555851656, - "grad_norm": 0.3975030901670652, - "learning_rate": 1.0759519503988525e-06, - "loss": 0.0223, - "step": 7280 - }, - { - "epoch": 3.233844103930713, - "grad_norm": 0.40051231781743457, - "learning_rate": 1.0747508625201387e-06, - "loss": 0.0247, - "step": 7281 - }, - { - "epoch": 3.23428825227626, - "grad_norm": 0.44390849962202206, - "learning_rate": 1.0735503646730483e-06, - "loss": 0.0281, - "step": 7282 - }, - { - "epoch": 3.234732400621808, - "grad_norm": 0.4052200206226338, - "learning_rate": 1.0723504570380367e-06, - "loss": 0.0286, - "step": 7283 - }, - { - "epoch": 3.235176548967355, - "grad_norm": 0.37902885647278484, - "learning_rate": 1.0711511397954706e-06, - "loss": 0.0205, - "step": 7284 - }, - { - "epoch": 3.2356206973129025, - "grad_norm": 0.3395460769637531, - "learning_rate": 1.0699524131256273e-06, - "loss": 0.0201, - "step": 7285 - }, - { - "epoch": 3.2360648456584498, - "grad_norm": 0.3790836643740825, - "learning_rate": 1.0687542772086978e-06, - "loss": 0.0213, - "step": 7286 - }, - { - "epoch": 3.2365089940039975, - "grad_norm": 0.3344270224539923, - "learning_rate": 1.0675567322247794e-06, - "loss": 0.0188, - "step": 7287 - }, - { - "epoch": 3.236953142349545, - "grad_norm": 0.49193821672434174, - "learning_rate": 1.0663597783538843e-06, - "loss": 0.0258, - "step": 7288 - }, - { - "epoch": 3.237397290695092, - "grad_norm": 0.40685832395170973, - "learning_rate": 1.0651634157759361e-06, - "loss": 0.0266, - "step": 7289 - }, - { - "epoch": 3.2378414390406394, - "grad_norm": 0.4090407200808684, - "learning_rate": 1.063967644670767e-06, - "loss": 0.0264, - "step": 7290 - }, - { - "epoch": 3.238285587386187, - "grad_norm": 0.3883951968082906, - "learning_rate": 1.0627724652181237e-06, - "loss": 0.033, - "step": 7291 - }, - { - "epoch": 3.2387297357317344, - "grad_norm": 0.43888578494036506, - "learning_rate": 1.06157787759766e-06, - "loss": 0.0291, - "step": 7292 - }, - { - "epoch": 3.2391738840772817, - "grad_norm": 0.5336092270346524, - "learning_rate": 1.0603838819889429e-06, - "loss": 0.031, - "step": 7293 - }, - { - "epoch": 3.2396180324228294, - "grad_norm": 0.4556708038719475, - "learning_rate": 1.0591904785714507e-06, - "loss": 0.0189, - "step": 7294 - }, - { - "epoch": 3.2400621807683767, - "grad_norm": 0.42245566909222265, - "learning_rate": 1.0579976675245724e-06, - "loss": 0.0241, - "step": 7295 - }, - { - "epoch": 3.240506329113924, - "grad_norm": 0.4528542240509129, - "learning_rate": 1.0568054490276075e-06, - "loss": 0.0251, - "step": 7296 - }, - { - "epoch": 3.2409504774594713, - "grad_norm": 0.4541061461339724, - "learning_rate": 1.0556138232597684e-06, - "loss": 0.0201, - "step": 7297 - }, - { - "epoch": 3.241394625805019, - "grad_norm": 0.3790673698790023, - "learning_rate": 1.054422790400173e-06, - "loss": 0.019, - "step": 7298 - }, - { - "epoch": 3.2418387741505663, - "grad_norm": 0.41427572592093276, - "learning_rate": 1.0532323506278564e-06, - "loss": 0.0302, - "step": 7299 - }, - { - "epoch": 3.2422829224961136, - "grad_norm": 0.4164805864455333, - "learning_rate": 1.0520425041217613e-06, - "loss": 0.0374, - "step": 7300 - }, - { - "epoch": 3.242727070841661, - "grad_norm": 0.5061586285788595, - "learning_rate": 1.0508532510607421e-06, - "loss": 0.0369, - "step": 7301 - }, - { - "epoch": 3.2431712191872086, - "grad_norm": 0.51039712429684, - "learning_rate": 1.049664591623563e-06, - "loss": 0.0256, - "step": 7302 - }, - { - "epoch": 3.243615367532756, - "grad_norm": 0.3147661792422783, - "learning_rate": 1.0484765259889024e-06, - "loss": 0.0178, - "step": 7303 - }, - { - "epoch": 3.244059515878303, - "grad_norm": 0.34967275161446504, - "learning_rate": 1.0472890543353425e-06, - "loss": 0.022, - "step": 7304 - }, - { - "epoch": 3.244503664223851, - "grad_norm": 0.425569161792133, - "learning_rate": 1.0461021768413827e-06, - "loss": 0.0327, - "step": 7305 - }, - { - "epoch": 3.2449478125693982, - "grad_norm": 0.3260603432721134, - "learning_rate": 1.0449158936854308e-06, - "loss": 0.019, - "step": 7306 - }, - { - "epoch": 3.2453919609149455, - "grad_norm": 0.40232295144807395, - "learning_rate": 1.0437302050458053e-06, - "loss": 0.0343, - "step": 7307 - }, - { - "epoch": 3.245836109260493, - "grad_norm": 0.435190410566104, - "learning_rate": 1.0425451111007368e-06, - "loss": 0.0371, - "step": 7308 - }, - { - "epoch": 3.2462802576060406, - "grad_norm": 0.36886684345005827, - "learning_rate": 1.0413606120283616e-06, - "loss": 0.0159, - "step": 7309 - }, - { - "epoch": 3.246724405951588, - "grad_norm": 0.6185604106742186, - "learning_rate": 1.040176708006732e-06, - "loss": 0.0317, - "step": 7310 - }, - { - "epoch": 3.247168554297135, - "grad_norm": 0.4287929879516384, - "learning_rate": 1.0389933992138106e-06, - "loss": 0.0285, - "step": 7311 - }, - { - "epoch": 3.247612702642683, - "grad_norm": 0.5172218558312743, - "learning_rate": 1.0378106858274639e-06, - "loss": 0.0269, - "step": 7312 - }, - { - "epoch": 3.24805685098823, - "grad_norm": 0.47500334875701583, - "learning_rate": 1.036628568025479e-06, - "loss": 0.0291, - "step": 7313 - }, - { - "epoch": 3.2485009993337775, - "grad_norm": 0.36315045000352464, - "learning_rate": 1.035447045985547e-06, - "loss": 0.0205, - "step": 7314 - }, - { - "epoch": 3.2489451476793247, - "grad_norm": 0.3702839335927883, - "learning_rate": 1.0342661198852689e-06, - "loss": 0.0272, - "step": 7315 - }, - { - "epoch": 3.2493892960248725, - "grad_norm": 0.4172702979013527, - "learning_rate": 1.0330857899021584e-06, - "loss": 0.0288, - "step": 7316 - }, - { - "epoch": 3.2498334443704198, - "grad_norm": 0.4003396546345466, - "learning_rate": 1.03190605621364e-06, - "loss": 0.0249, - "step": 7317 - }, - { - "epoch": 3.250277592715967, - "grad_norm": 0.36419349273758006, - "learning_rate": 1.0307269189970482e-06, - "loss": 0.0227, - "step": 7318 - }, - { - "epoch": 3.250721741061515, - "grad_norm": 0.46337961532186456, - "learning_rate": 1.0295483784296274e-06, - "loss": 0.0292, - "step": 7319 - }, - { - "epoch": 3.251165889407062, - "grad_norm": 0.4844395758839405, - "learning_rate": 1.0283704346885303e-06, - "loss": 0.0315, - "step": 7320 - }, - { - "epoch": 3.2516100377526094, - "grad_norm": 0.43388879742046443, - "learning_rate": 1.027193087950823e-06, - "loss": 0.0253, - "step": 7321 - }, - { - "epoch": 3.2520541860981567, - "grad_norm": 0.43253571684648157, - "learning_rate": 1.0260163383934807e-06, - "loss": 0.0292, - "step": 7322 - }, - { - "epoch": 3.252498334443704, - "grad_norm": 0.37106610809103213, - "learning_rate": 1.0248401861933888e-06, - "loss": 0.0208, - "step": 7323 - }, - { - "epoch": 3.2529424827892517, - "grad_norm": 0.35259048739158694, - "learning_rate": 1.0236646315273436e-06, - "loss": 0.0223, - "step": 7324 - }, - { - "epoch": 3.253386631134799, - "grad_norm": 0.5151899180549023, - "learning_rate": 1.0224896745720513e-06, - "loss": 0.0313, - "step": 7325 - }, - { - "epoch": 3.2538307794803463, - "grad_norm": 0.4063218873557718, - "learning_rate": 1.0213153155041255e-06, - "loss": 0.0291, - "step": 7326 - }, - { - "epoch": 3.254274927825894, - "grad_norm": 0.6251543341151611, - "learning_rate": 1.0201415545000941e-06, - "loss": 0.0267, - "step": 7327 - }, - { - "epoch": 3.2547190761714413, - "grad_norm": 0.4171212598426182, - "learning_rate": 1.0189683917363947e-06, - "loss": 0.0227, - "step": 7328 - }, - { - "epoch": 3.2551632245169886, - "grad_norm": 0.49598266444833977, - "learning_rate": 1.0177958273893684e-06, - "loss": 0.0239, - "step": 7329 - }, - { - "epoch": 3.255607372862536, - "grad_norm": 0.3899965486282789, - "learning_rate": 1.016623861635277e-06, - "loss": 0.0209, - "step": 7330 - }, - { - "epoch": 3.2560515212080836, - "grad_norm": 0.43687191924955027, - "learning_rate": 1.0154524946502864e-06, - "loss": 0.0272, - "step": 7331 - }, - { - "epoch": 3.256495669553631, - "grad_norm": 0.4063747702825569, - "learning_rate": 1.01428172661047e-06, - "loss": 0.0255, - "step": 7332 - }, - { - "epoch": 3.256939817899178, - "grad_norm": 0.48223387404929335, - "learning_rate": 1.0131115576918154e-06, - "loss": 0.031, - "step": 7333 - }, - { - "epoch": 3.257383966244726, - "grad_norm": 0.39589428565081675, - "learning_rate": 1.011941988070219e-06, - "loss": 0.0222, - "step": 7334 - }, - { - "epoch": 3.257828114590273, - "grad_norm": 0.4100082037653643, - "learning_rate": 1.0107730179214875e-06, - "loss": 0.0359, - "step": 7335 - }, - { - "epoch": 3.2582722629358205, - "grad_norm": 0.7048819567321716, - "learning_rate": 1.0096046474213378e-06, - "loss": 0.0239, - "step": 7336 - }, - { - "epoch": 3.258716411281368, - "grad_norm": 0.4205899532427439, - "learning_rate": 1.008436876745393e-06, - "loss": 0.0329, - "step": 7337 - }, - { - "epoch": 3.2591605596269155, - "grad_norm": 0.3747110714323454, - "learning_rate": 1.00726970606919e-06, - "loss": 0.0217, - "step": 7338 - }, - { - "epoch": 3.259604707972463, - "grad_norm": 0.4970634544493451, - "learning_rate": 1.0061031355681766e-06, - "loss": 0.029, - "step": 7339 - }, - { - "epoch": 3.26004885631801, - "grad_norm": 0.35455430411205807, - "learning_rate": 1.0049371654177036e-06, - "loss": 0.0241, - "step": 7340 - }, - { - "epoch": 3.260493004663558, - "grad_norm": 0.38458418543297923, - "learning_rate": 1.0037717957930404e-06, - "loss": 0.0264, - "step": 7341 - }, - { - "epoch": 3.260937153009105, - "grad_norm": 0.44567315711458705, - "learning_rate": 1.0026070268693616e-06, - "loss": 0.033, - "step": 7342 - }, - { - "epoch": 3.2613813013546524, - "grad_norm": 0.6191592999982571, - "learning_rate": 1.0014428588217495e-06, - "loss": 0.0433, - "step": 7343 - }, - { - "epoch": 3.2618254497001997, - "grad_norm": 0.31811859198013076, - "learning_rate": 1.0002792918251991e-06, - "loss": 0.0178, - "step": 7344 - }, - { - "epoch": 3.2622695980457475, - "grad_norm": 0.4500830212917085, - "learning_rate": 9.991163260546154e-07, - "loss": 0.0283, - "step": 7345 - }, - { - "epoch": 3.2627137463912947, - "grad_norm": 0.45351163208898665, - "learning_rate": 9.979539616848088e-07, - "loss": 0.0347, - "step": 7346 - }, - { - "epoch": 3.263157894736842, - "grad_norm": 0.35335672660967254, - "learning_rate": 9.96792198890506e-07, - "loss": 0.0188, - "step": 7347 - }, - { - "epoch": 3.2636020430823893, - "grad_norm": 0.44507187430087025, - "learning_rate": 9.956310378463397e-07, - "loss": 0.028, - "step": 7348 - }, - { - "epoch": 3.264046191427937, - "grad_norm": 0.5621390992373146, - "learning_rate": 9.94470478726849e-07, - "loss": 0.0318, - "step": 7349 - }, - { - "epoch": 3.2644903397734844, - "grad_norm": 0.4752468135015428, - "learning_rate": 9.933105217064876e-07, - "loss": 0.0301, - "step": 7350 - }, - { - "epoch": 3.2649344881190316, - "grad_norm": 0.4866383052323718, - "learning_rate": 9.921511669596169e-07, - "loss": 0.0258, - "step": 7351 - }, - { - "epoch": 3.265378636464579, - "grad_norm": 0.4858680526762395, - "learning_rate": 9.909924146605065e-07, - "loss": 0.0337, - "step": 7352 - }, - { - "epoch": 3.2658227848101267, - "grad_norm": 0.39515608234465965, - "learning_rate": 9.898342649833392e-07, - "loss": 0.0192, - "step": 7353 - }, - { - "epoch": 3.266266933155674, - "grad_norm": 0.36506881604203717, - "learning_rate": 9.88676718102201e-07, - "loss": 0.021, - "step": 7354 - }, - { - "epoch": 3.2667110815012212, - "grad_norm": 0.4187792232437751, - "learning_rate": 9.87519774191093e-07, - "loss": 0.0331, - "step": 7355 - }, - { - "epoch": 3.267155229846769, - "grad_norm": 0.46579982125230246, - "learning_rate": 9.863634334239241e-07, - "loss": 0.0231, - "step": 7356 - }, - { - "epoch": 3.2675993781923163, - "grad_norm": 0.38694241705258164, - "learning_rate": 9.852076959745082e-07, - "loss": 0.0259, - "step": 7357 - }, - { - "epoch": 3.2680435265378636, - "grad_norm": 0.46212859105639015, - "learning_rate": 9.840525620165763e-07, - "loss": 0.0317, - "step": 7358 - }, - { - "epoch": 3.268487674883411, - "grad_norm": 0.5473555001130638, - "learning_rate": 9.828980317237652e-07, - "loss": 0.0396, - "step": 7359 - }, - { - "epoch": 3.2689318232289586, - "grad_norm": 0.36341029826160076, - "learning_rate": 9.817441052696164e-07, - "loss": 0.0185, - "step": 7360 - }, - { - "epoch": 3.269375971574506, - "grad_norm": 0.44926471264353585, - "learning_rate": 9.805907828275874e-07, - "loss": 0.0315, - "step": 7361 - }, - { - "epoch": 3.269820119920053, - "grad_norm": 0.3017207630293574, - "learning_rate": 9.794380645710428e-07, - "loss": 0.017, - "step": 7362 - }, - { - "epoch": 3.270264268265601, - "grad_norm": 0.4066309090272054, - "learning_rate": 9.782859506732517e-07, - "loss": 0.0252, - "step": 7363 - }, - { - "epoch": 3.270708416611148, - "grad_norm": 0.45822398666676095, - "learning_rate": 9.771344413074018e-07, - "loss": 0.0289, - "step": 7364 - }, - { - "epoch": 3.2711525649566955, - "grad_norm": 0.45754692936117375, - "learning_rate": 9.75983536646581e-07, - "loss": 0.0204, - "step": 7365 - }, - { - "epoch": 3.2715967133022428, - "grad_norm": 0.4003872658949929, - "learning_rate": 9.748332368637903e-07, - "loss": 0.0239, - "step": 7366 - }, - { - "epoch": 3.2720408616477905, - "grad_norm": 0.40491759770498775, - "learning_rate": 9.736835421319397e-07, - "loss": 0.016, - "step": 7367 - }, - { - "epoch": 3.272485009993338, - "grad_norm": 0.40672999266918314, - "learning_rate": 9.725344526238495e-07, - "loss": 0.0275, - "step": 7368 - }, - { - "epoch": 3.272929158338885, - "grad_norm": 0.4720762654089572, - "learning_rate": 9.713859685122428e-07, - "loss": 0.0267, - "step": 7369 - }, - { - "epoch": 3.273373306684433, - "grad_norm": 0.41020011208140905, - "learning_rate": 9.702380899697621e-07, - "loss": 0.0231, - "step": 7370 - }, - { - "epoch": 3.27381745502998, - "grad_norm": 0.51306227756855, - "learning_rate": 9.69090817168949e-07, - "loss": 0.0213, - "step": 7371 - }, - { - "epoch": 3.2742616033755274, - "grad_norm": 0.5514386921409069, - "learning_rate": 9.67944150282259e-07, - "loss": 0.0388, - "step": 7372 - }, - { - "epoch": 3.2747057517210747, - "grad_norm": 0.47876201361463355, - "learning_rate": 9.667980894820572e-07, - "loss": 0.0402, - "step": 7373 - }, - { - "epoch": 3.2751499000666224, - "grad_norm": 0.35596410172080417, - "learning_rate": 9.65652634940612e-07, - "loss": 0.0211, - "step": 7374 - }, - { - "epoch": 3.2755940484121697, - "grad_norm": 0.5330920908251411, - "learning_rate": 9.64507786830109e-07, - "loss": 0.0312, - "step": 7375 - }, - { - "epoch": 3.276038196757717, - "grad_norm": 0.44331071434194597, - "learning_rate": 9.633635453226376e-07, - "loss": 0.0241, - "step": 7376 - }, - { - "epoch": 3.2764823451032643, - "grad_norm": 0.4178851244614469, - "learning_rate": 9.622199105901947e-07, - "loss": 0.0251, - "step": 7377 - }, - { - "epoch": 3.276926493448812, - "grad_norm": 0.4925055411838903, - "learning_rate": 9.610768828046891e-07, - "loss": 0.0312, - "step": 7378 - }, - { - "epoch": 3.2773706417943593, - "grad_norm": 0.3674042441682116, - "learning_rate": 9.59934462137938e-07, - "loss": 0.0176, - "step": 7379 - }, - { - "epoch": 3.2778147901399066, - "grad_norm": 0.41837211074854475, - "learning_rate": 9.58792648761664e-07, - "loss": 0.0272, - "step": 7380 - }, - { - "epoch": 3.278258938485454, - "grad_norm": 0.35983457178846384, - "learning_rate": 9.576514428475058e-07, - "loss": 0.0182, - "step": 7381 - }, - { - "epoch": 3.2787030868310016, - "grad_norm": 0.5213483263717448, - "learning_rate": 9.565108445670013e-07, - "loss": 0.0285, - "step": 7382 - }, - { - "epoch": 3.279147235176549, - "grad_norm": 0.3798413510246776, - "learning_rate": 9.55370854091604e-07, - "loss": 0.0199, - "step": 7383 - }, - { - "epoch": 3.2795913835220962, - "grad_norm": 0.40316029556208016, - "learning_rate": 9.542314715926753e-07, - "loss": 0.023, - "step": 7384 - }, - { - "epoch": 3.280035531867644, - "grad_norm": 0.3279127936611918, - "learning_rate": 9.5309269724148e-07, - "loss": 0.0197, - "step": 7385 - }, - { - "epoch": 3.2804796802131913, - "grad_norm": 0.3077370020937381, - "learning_rate": 9.519545312091966e-07, - "loss": 0.0186, - "step": 7386 - }, - { - "epoch": 3.2809238285587385, - "grad_norm": 0.29637415446133053, - "learning_rate": 9.508169736669137e-07, - "loss": 0.0149, - "step": 7387 - }, - { - "epoch": 3.281367976904286, - "grad_norm": 0.36806133901057364, - "learning_rate": 9.496800247856219e-07, - "loss": 0.0255, - "step": 7388 - }, - { - "epoch": 3.2818121252498336, - "grad_norm": 0.42318209672526275, - "learning_rate": 9.485436847362257e-07, - "loss": 0.0368, - "step": 7389 - }, - { - "epoch": 3.282256273595381, - "grad_norm": 0.4481117630581104, - "learning_rate": 9.474079536895365e-07, - "loss": 0.0323, - "step": 7390 - }, - { - "epoch": 3.282700421940928, - "grad_norm": 0.4094546536057865, - "learning_rate": 9.462728318162712e-07, - "loss": 0.0216, - "step": 7391 - }, - { - "epoch": 3.283144570286476, - "grad_norm": 0.4833214117317346, - "learning_rate": 9.451383192870623e-07, - "loss": 0.0314, - "step": 7392 - }, - { - "epoch": 3.283588718632023, - "grad_norm": 0.49172490663115703, - "learning_rate": 9.440044162724432e-07, - "loss": 0.0229, - "step": 7393 - }, - { - "epoch": 3.2840328669775705, - "grad_norm": 0.3837206768061979, - "learning_rate": 9.428711229428594e-07, - "loss": 0.0281, - "step": 7394 - }, - { - "epoch": 3.2844770153231178, - "grad_norm": 0.38763854772417794, - "learning_rate": 9.417384394686646e-07, - "loss": 0.0256, - "step": 7395 - }, - { - "epoch": 3.2849211636686655, - "grad_norm": 0.4292993654748122, - "learning_rate": 9.406063660201214e-07, - "loss": 0.0178, - "step": 7396 - }, - { - "epoch": 3.285365312014213, - "grad_norm": 0.4608324783734576, - "learning_rate": 9.394749027673955e-07, - "loss": 0.03, - "step": 7397 - }, - { - "epoch": 3.28580946035976, - "grad_norm": 0.4116770755294882, - "learning_rate": 9.383440498805712e-07, - "loss": 0.0266, - "step": 7398 - }, - { - "epoch": 3.286253608705308, - "grad_norm": 0.4151574100082846, - "learning_rate": 9.3721380752963e-07, - "loss": 0.0316, - "step": 7399 - }, - { - "epoch": 3.286697757050855, - "grad_norm": 0.5201578859027275, - "learning_rate": 9.36084175884468e-07, - "loss": 0.0273, - "step": 7400 - }, - { - "epoch": 3.2871419053964024, - "grad_norm": 0.517408046194756, - "learning_rate": 9.3495515511489e-07, - "loss": 0.039, - "step": 7401 - }, - { - "epoch": 3.2875860537419497, - "grad_norm": 0.4065347285711147, - "learning_rate": 9.338267453906036e-07, - "loss": 0.0193, - "step": 7402 - }, - { - "epoch": 3.2880302020874974, - "grad_norm": 0.35637207302672474, - "learning_rate": 9.326989468812281e-07, - "loss": 0.0252, - "step": 7403 - }, - { - "epoch": 3.2884743504330447, - "grad_norm": 0.3942806613898594, - "learning_rate": 9.315717597562951e-07, - "loss": 0.0224, - "step": 7404 - }, - { - "epoch": 3.288918498778592, - "grad_norm": 0.38601773480071766, - "learning_rate": 9.304451841852358e-07, - "loss": 0.0291, - "step": 7405 - }, - { - "epoch": 3.2893626471241393, - "grad_norm": 0.4412874940460958, - "learning_rate": 9.293192203373952e-07, - "loss": 0.0223, - "step": 7406 - }, - { - "epoch": 3.289806795469687, - "grad_norm": 0.36047506716707345, - "learning_rate": 9.281938683820258e-07, - "loss": 0.0196, - "step": 7407 - }, - { - "epoch": 3.2902509438152343, - "grad_norm": 0.38105181354876544, - "learning_rate": 9.270691284882826e-07, - "loss": 0.0215, - "step": 7408 - }, - { - "epoch": 3.2906950921607816, - "grad_norm": 0.5134105812079635, - "learning_rate": 9.259450008252396e-07, - "loss": 0.0407, - "step": 7409 - }, - { - "epoch": 3.291139240506329, - "grad_norm": 0.3562398576839381, - "learning_rate": 9.248214855618676e-07, - "loss": 0.0267, - "step": 7410 - }, - { - "epoch": 3.2915833888518766, - "grad_norm": 0.40663195402797286, - "learning_rate": 9.236985828670519e-07, - "loss": 0.0187, - "step": 7411 - }, - { - "epoch": 3.292027537197424, - "grad_norm": 0.7855995128015365, - "learning_rate": 9.225762929095844e-07, - "loss": 0.0372, - "step": 7412 - }, - { - "epoch": 3.292471685542971, - "grad_norm": 0.4097302898181189, - "learning_rate": 9.214546158581622e-07, - "loss": 0.0252, - "step": 7413 - }, - { - "epoch": 3.292915833888519, - "grad_norm": 0.4179153487359494, - "learning_rate": 9.203335518813922e-07, - "loss": 0.0285, - "step": 7414 - }, - { - "epoch": 3.2933599822340662, - "grad_norm": 0.5431971211223403, - "learning_rate": 9.192131011477934e-07, - "loss": 0.0303, - "step": 7415 - }, - { - "epoch": 3.2938041305796135, - "grad_norm": 0.5065980372967815, - "learning_rate": 9.180932638257845e-07, - "loss": 0.028, - "step": 7416 - }, - { - "epoch": 3.294248278925161, - "grad_norm": 0.38822134135830527, - "learning_rate": 9.169740400836974e-07, - "loss": 0.029, - "step": 7417 - }, - { - "epoch": 3.2946924272707085, - "grad_norm": 0.3332433458833639, - "learning_rate": 9.158554300897727e-07, - "loss": 0.0174, - "step": 7418 - }, - { - "epoch": 3.295136575616256, - "grad_norm": 0.3544713822711631, - "learning_rate": 9.147374340121523e-07, - "loss": 0.022, - "step": 7419 - }, - { - "epoch": 3.295580723961803, - "grad_norm": 0.32602497933389224, - "learning_rate": 9.13620052018892e-07, - "loss": 0.0205, - "step": 7420 - }, - { - "epoch": 3.296024872307351, - "grad_norm": 0.45839560888184433, - "learning_rate": 9.125032842779535e-07, - "loss": 0.0285, - "step": 7421 - }, - { - "epoch": 3.296469020652898, - "grad_norm": 0.4451229775575657, - "learning_rate": 9.113871309572059e-07, - "loss": 0.0314, - "step": 7422 - }, - { - "epoch": 3.2969131689984454, - "grad_norm": 0.4294843373554615, - "learning_rate": 9.10271592224426e-07, - "loss": 0.0279, - "step": 7423 - }, - { - "epoch": 3.2973573173439927, - "grad_norm": 0.49025642020365773, - "learning_rate": 9.091566682472991e-07, - "loss": 0.0319, - "step": 7424 - }, - { - "epoch": 3.2978014656895405, - "grad_norm": 0.39446663575000246, - "learning_rate": 9.08042359193414e-07, - "loss": 0.0264, - "step": 7425 - }, - { - "epoch": 3.2982456140350878, - "grad_norm": 0.3073338148065069, - "learning_rate": 9.06928665230275e-07, - "loss": 0.022, - "step": 7426 - }, - { - "epoch": 3.298689762380635, - "grad_norm": 0.5363424487989449, - "learning_rate": 9.058155865252854e-07, - "loss": 0.0369, - "step": 7427 - }, - { - "epoch": 3.299133910726183, - "grad_norm": 0.4241449124561232, - "learning_rate": 9.047031232457609e-07, - "loss": 0.0284, - "step": 7428 - }, - { - "epoch": 3.29957805907173, - "grad_norm": 0.5039477021882938, - "learning_rate": 9.035912755589254e-07, - "loss": 0.0296, - "step": 7429 - }, - { - "epoch": 3.3000222074172774, - "grad_norm": 0.6325313716948877, - "learning_rate": 9.024800436319059e-07, - "loss": 0.0356, - "step": 7430 - }, - { - "epoch": 3.3004663557628247, - "grad_norm": 0.35326550695737485, - "learning_rate": 9.013694276317392e-07, - "loss": 0.0293, - "step": 7431 - }, - { - "epoch": 3.3009105041083724, - "grad_norm": 0.3943172052588995, - "learning_rate": 9.002594277253735e-07, - "loss": 0.0256, - "step": 7432 - }, - { - "epoch": 3.3013546524539197, - "grad_norm": 0.3559437962740476, - "learning_rate": 8.991500440796569e-07, - "loss": 0.0261, - "step": 7433 - }, - { - "epoch": 3.301798800799467, - "grad_norm": 0.3905214750787488, - "learning_rate": 8.9804127686135e-07, - "loss": 0.0166, - "step": 7434 - }, - { - "epoch": 3.3022429491450143, - "grad_norm": 0.5100939910295724, - "learning_rate": 8.969331262371206e-07, - "loss": 0.0305, - "step": 7435 - }, - { - "epoch": 3.302687097490562, - "grad_norm": 0.4081830294338292, - "learning_rate": 8.958255923735404e-07, - "loss": 0.0229, - "step": 7436 - }, - { - "epoch": 3.3031312458361093, - "grad_norm": 0.41704083656049834, - "learning_rate": 8.947186754370907e-07, - "loss": 0.0281, - "step": 7437 - }, - { - "epoch": 3.3035753941816566, - "grad_norm": 0.4789522896492798, - "learning_rate": 8.936123755941611e-07, - "loss": 0.0309, - "step": 7438 - }, - { - "epoch": 3.304019542527204, - "grad_norm": 0.5252768558443547, - "learning_rate": 8.925066930110465e-07, - "loss": 0.0265, - "step": 7439 - }, - { - "epoch": 3.3044636908727516, - "grad_norm": 0.34510316412181224, - "learning_rate": 8.914016278539516e-07, - "loss": 0.019, - "step": 7440 - }, - { - "epoch": 3.304907839218299, - "grad_norm": 0.5857701871825775, - "learning_rate": 8.902971802889832e-07, - "loss": 0.0387, - "step": 7441 - }, - { - "epoch": 3.305351987563846, - "grad_norm": 0.5033019496461394, - "learning_rate": 8.891933504821604e-07, - "loss": 0.0275, - "step": 7442 - }, - { - "epoch": 3.305796135909394, - "grad_norm": 0.3999151889282477, - "learning_rate": 8.880901385994079e-07, - "loss": 0.0233, - "step": 7443 - }, - { - "epoch": 3.306240284254941, - "grad_norm": 0.45097536087731294, - "learning_rate": 8.869875448065563e-07, - "loss": 0.0281, - "step": 7444 - }, - { - "epoch": 3.3066844326004885, - "grad_norm": 0.4146176937666059, - "learning_rate": 8.858855692693446e-07, - "loss": 0.0294, - "step": 7445 - }, - { - "epoch": 3.307128580946036, - "grad_norm": 0.32341587117342246, - "learning_rate": 8.847842121534195e-07, - "loss": 0.0254, - "step": 7446 - }, - { - "epoch": 3.3075727292915835, - "grad_norm": 0.3794842748021583, - "learning_rate": 8.836834736243316e-07, - "loss": 0.0219, - "step": 7447 - }, - { - "epoch": 3.308016877637131, - "grad_norm": 0.5092740919087244, - "learning_rate": 8.825833538475403e-07, - "loss": 0.0279, - "step": 7448 - }, - { - "epoch": 3.308461025982678, - "grad_norm": 0.3833821462873475, - "learning_rate": 8.814838529884162e-07, - "loss": 0.0263, - "step": 7449 - }, - { - "epoch": 3.308905174328226, - "grad_norm": 0.7887453397226605, - "learning_rate": 8.803849712122292e-07, - "loss": 0.0208, - "step": 7450 - }, - { - "epoch": 3.309349322673773, - "grad_norm": 0.3907962831950114, - "learning_rate": 8.792867086841605e-07, - "loss": 0.0206, - "step": 7451 - }, - { - "epoch": 3.3097934710193204, - "grad_norm": 0.3760419094116477, - "learning_rate": 8.781890655692998e-07, - "loss": 0.022, - "step": 7452 - }, - { - "epoch": 3.3102376193648677, - "grad_norm": 0.32255052704787457, - "learning_rate": 8.770920420326384e-07, - "loss": 0.0181, - "step": 7453 - }, - { - "epoch": 3.3106817677104154, - "grad_norm": 0.6578667511681319, - "learning_rate": 8.759956382390794e-07, - "loss": 0.0254, - "step": 7454 - }, - { - "epoch": 3.3111259160559627, - "grad_norm": 0.42423201463173366, - "learning_rate": 8.748998543534304e-07, - "loss": 0.0331, - "step": 7455 - }, - { - "epoch": 3.31157006440151, - "grad_norm": 0.4695388522211957, - "learning_rate": 8.738046905404069e-07, - "loss": 0.04, - "step": 7456 - }, - { - "epoch": 3.3120142127470578, - "grad_norm": 0.488606567568898, - "learning_rate": 8.72710146964631e-07, - "loss": 0.0274, - "step": 7457 - }, - { - "epoch": 3.312458361092605, - "grad_norm": 0.5406502377024667, - "learning_rate": 8.716162237906289e-07, - "loss": 0.028, - "step": 7458 - }, - { - "epoch": 3.3129025094381523, - "grad_norm": 0.5699097094725243, - "learning_rate": 8.705229211828376e-07, - "loss": 0.03, - "step": 7459 - }, - { - "epoch": 3.3133466577836996, - "grad_norm": 0.44717354800350295, - "learning_rate": 8.694302393055992e-07, - "loss": 0.0208, - "step": 7460 - }, - { - "epoch": 3.313790806129247, - "grad_norm": 0.4253321381991377, - "learning_rate": 8.683381783231615e-07, - "loss": 0.0234, - "step": 7461 - }, - { - "epoch": 3.3142349544747947, - "grad_norm": 0.3858599803822746, - "learning_rate": 8.672467383996802e-07, - "loss": 0.0281, - "step": 7462 - }, - { - "epoch": 3.314679102820342, - "grad_norm": 1.0071452454502907, - "learning_rate": 8.661559196992186e-07, - "loss": 0.029, - "step": 7463 - }, - { - "epoch": 3.3151232511658892, - "grad_norm": 0.5059569170522437, - "learning_rate": 8.650657223857428e-07, - "loss": 0.0304, - "step": 7464 - }, - { - "epoch": 3.315567399511437, - "grad_norm": 0.4605836239110675, - "learning_rate": 8.639761466231294e-07, - "loss": 0.0374, - "step": 7465 - }, - { - "epoch": 3.3160115478569843, - "grad_norm": 0.5663222302112452, - "learning_rate": 8.628871925751598e-07, - "loss": 0.029, - "step": 7466 - }, - { - "epoch": 3.3164556962025316, - "grad_norm": 0.4770838107638845, - "learning_rate": 8.617988604055222e-07, - "loss": 0.029, - "step": 7467 - }, - { - "epoch": 3.316899844548079, - "grad_norm": 0.5186866939183644, - "learning_rate": 8.607111502778121e-07, - "loss": 0.0367, - "step": 7468 - }, - { - "epoch": 3.3173439928936266, - "grad_norm": 0.43011652502356673, - "learning_rate": 8.596240623555313e-07, - "loss": 0.0276, - "step": 7469 - }, - { - "epoch": 3.317788141239174, - "grad_norm": 0.39270558043084164, - "learning_rate": 8.585375968020854e-07, - "loss": 0.0244, - "step": 7470 - }, - { - "epoch": 3.318232289584721, - "grad_norm": 0.3922390336468883, - "learning_rate": 8.574517537807897e-07, - "loss": 0.0247, - "step": 7471 - }, - { - "epoch": 3.318676437930269, - "grad_norm": 0.4905605263072924, - "learning_rate": 8.563665334548654e-07, - "loss": 0.0281, - "step": 7472 - }, - { - "epoch": 3.319120586275816, - "grad_norm": 0.4695814471855208, - "learning_rate": 8.552819359874387e-07, - "loss": 0.0222, - "step": 7473 - }, - { - "epoch": 3.3195647346213635, - "grad_norm": 0.49486185686147216, - "learning_rate": 8.541979615415446e-07, - "loss": 0.0304, - "step": 7474 - }, - { - "epoch": 3.3200088829669108, - "grad_norm": 0.40290781496927697, - "learning_rate": 8.531146102801208e-07, - "loss": 0.0221, - "step": 7475 - }, - { - "epoch": 3.3204530313124585, - "grad_norm": 0.45538047230870293, - "learning_rate": 8.520318823660146e-07, - "loss": 0.0257, - "step": 7476 - }, - { - "epoch": 3.320897179658006, - "grad_norm": 0.4425830829496296, - "learning_rate": 8.50949777961978e-07, - "loss": 0.027, - "step": 7477 - }, - { - "epoch": 3.321341328003553, - "grad_norm": 0.32164145024703766, - "learning_rate": 8.498682972306693e-07, - "loss": 0.0171, - "step": 7478 - }, - { - "epoch": 3.321785476349101, - "grad_norm": 0.33243355641729805, - "learning_rate": 8.487874403346547e-07, - "loss": 0.0171, - "step": 7479 - }, - { - "epoch": 3.322229624694648, - "grad_norm": 0.4774020513789674, - "learning_rate": 8.477072074364051e-07, - "loss": 0.0242, - "step": 7480 - }, - { - "epoch": 3.3226737730401954, - "grad_norm": 0.7842160045286404, - "learning_rate": 8.466275986982963e-07, - "loss": 0.0482, - "step": 7481 - }, - { - "epoch": 3.3231179213857427, - "grad_norm": 0.4462293368851325, - "learning_rate": 8.455486142826135e-07, - "loss": 0.0206, - "step": 7482 - }, - { - "epoch": 3.3235620697312904, - "grad_norm": 0.3371098785501344, - "learning_rate": 8.444702543515454e-07, - "loss": 0.016, - "step": 7483 - }, - { - "epoch": 3.3240062180768377, - "grad_norm": 0.4689818865427122, - "learning_rate": 8.433925190671876e-07, - "loss": 0.0294, - "step": 7484 - }, - { - "epoch": 3.324450366422385, - "grad_norm": 0.46126294870782475, - "learning_rate": 8.423154085915447e-07, - "loss": 0.0405, - "step": 7485 - }, - { - "epoch": 3.3248945147679323, - "grad_norm": 0.4269127171229397, - "learning_rate": 8.412389230865209e-07, - "loss": 0.0215, - "step": 7486 - }, - { - "epoch": 3.32533866311348, - "grad_norm": 0.3601691103633931, - "learning_rate": 8.401630627139317e-07, - "loss": 0.0283, - "step": 7487 - }, - { - "epoch": 3.3257828114590273, - "grad_norm": 0.3308167717075761, - "learning_rate": 8.39087827635498e-07, - "loss": 0.0203, - "step": 7488 - }, - { - "epoch": 3.3262269598045746, - "grad_norm": 0.4504744849786426, - "learning_rate": 8.380132180128453e-07, - "loss": 0.0308, - "step": 7489 - }, - { - "epoch": 3.326671108150122, - "grad_norm": 0.43274682572551426, - "learning_rate": 8.369392340075056e-07, - "loss": 0.0192, - "step": 7490 - }, - { - "epoch": 3.3271152564956696, - "grad_norm": 0.38752988514350534, - "learning_rate": 8.358658757809179e-07, - "loss": 0.0326, - "step": 7491 - }, - { - "epoch": 3.327559404841217, - "grad_norm": 0.3966193669552188, - "learning_rate": 8.347931434944245e-07, - "loss": 0.0218, - "step": 7492 - }, - { - "epoch": 3.328003553186764, - "grad_norm": 0.37005902190632034, - "learning_rate": 8.337210373092763e-07, - "loss": 0.02, - "step": 7493 - }, - { - "epoch": 3.328447701532312, - "grad_norm": 0.3982123833521082, - "learning_rate": 8.326495573866284e-07, - "loss": 0.0216, - "step": 7494 - }, - { - "epoch": 3.3288918498778592, - "grad_norm": 0.41678676657338065, - "learning_rate": 8.315787038875434e-07, - "loss": 0.0284, - "step": 7495 - }, - { - "epoch": 3.3293359982234065, - "grad_norm": 0.36841312927749376, - "learning_rate": 8.305084769729882e-07, - "loss": 0.0294, - "step": 7496 - }, - { - "epoch": 3.329780146568954, - "grad_norm": 0.42365099250088206, - "learning_rate": 8.294388768038375e-07, - "loss": 0.0294, - "step": 7497 - }, - { - "epoch": 3.3302242949145016, - "grad_norm": 0.40673867597662916, - "learning_rate": 8.283699035408677e-07, - "loss": 0.0235, - "step": 7498 - }, - { - "epoch": 3.330668443260049, - "grad_norm": 0.40817718954362553, - "learning_rate": 8.273015573447646e-07, - "loss": 0.0229, - "step": 7499 - }, - { - "epoch": 3.331112591605596, - "grad_norm": 0.47791402955813106, - "learning_rate": 8.262338383761199e-07, - "loss": 0.0306, - "step": 7500 - }, - { - "epoch": 3.331556739951144, - "grad_norm": 0.3730312585666912, - "learning_rate": 8.251667467954289e-07, - "loss": 0.0241, - "step": 7501 - }, - { - "epoch": 3.332000888296691, - "grad_norm": 0.5006016285563061, - "learning_rate": 8.241002827630945e-07, - "loss": 0.0284, - "step": 7502 - }, - { - "epoch": 3.3324450366422385, - "grad_norm": 0.5039257261200133, - "learning_rate": 8.230344464394236e-07, - "loss": 0.0234, - "step": 7503 - }, - { - "epoch": 3.3328891849877857, - "grad_norm": 0.38046904572274926, - "learning_rate": 8.219692379846289e-07, - "loss": 0.033, - "step": 7504 - }, - { - "epoch": 3.3333333333333335, - "grad_norm": 0.3757296330009624, - "learning_rate": 8.209046575588303e-07, - "loss": 0.0291, - "step": 7505 - }, - { - "epoch": 3.3337774816788808, - "grad_norm": 0.380415197644764, - "learning_rate": 8.198407053220519e-07, - "loss": 0.0219, - "step": 7506 - }, - { - "epoch": 3.334221630024428, - "grad_norm": 0.39647003849848267, - "learning_rate": 8.187773814342242e-07, - "loss": 0.0307, - "step": 7507 - }, - { - "epoch": 3.334665778369976, - "grad_norm": 0.48671927289549416, - "learning_rate": 8.177146860551838e-07, - "loss": 0.0289, - "step": 7508 - }, - { - "epoch": 3.335109926715523, - "grad_norm": 0.4169191245545791, - "learning_rate": 8.166526193446695e-07, - "loss": 0.0286, - "step": 7509 - }, - { - "epoch": 3.3355540750610704, - "grad_norm": 0.6209631358998482, - "learning_rate": 8.155911814623291e-07, - "loss": 0.0268, - "step": 7510 - }, - { - "epoch": 3.3359982234066177, - "grad_norm": 0.4569957343588699, - "learning_rate": 8.145303725677145e-07, - "loss": 0.025, - "step": 7511 - }, - { - "epoch": 3.3364423717521654, - "grad_norm": 0.45123275529280976, - "learning_rate": 8.134701928202843e-07, - "loss": 0.0207, - "step": 7512 - }, - { - "epoch": 3.3368865200977127, - "grad_norm": 0.3644402406540846, - "learning_rate": 8.124106423794015e-07, - "loss": 0.0156, - "step": 7513 - }, - { - "epoch": 3.33733066844326, - "grad_norm": 0.6935267382582291, - "learning_rate": 8.113517214043326e-07, - "loss": 0.042, - "step": 7514 - }, - { - "epoch": 3.3377748167888073, - "grad_norm": 0.40069923978491, - "learning_rate": 8.102934300542531e-07, - "loss": 0.0282, - "step": 7515 - }, - { - "epoch": 3.338218965134355, - "grad_norm": 0.4259846167113701, - "learning_rate": 8.092357684882413e-07, - "loss": 0.0289, - "step": 7516 - }, - { - "epoch": 3.3386631134799023, - "grad_norm": 0.4192011063222261, - "learning_rate": 8.081787368652822e-07, - "loss": 0.023, - "step": 7517 - }, - { - "epoch": 3.3391072618254496, - "grad_norm": 0.35804521930200645, - "learning_rate": 8.071223353442658e-07, - "loss": 0.0294, - "step": 7518 - }, - { - "epoch": 3.339551410170997, - "grad_norm": 0.3873707899082061, - "learning_rate": 8.060665640839882e-07, - "loss": 0.0228, - "step": 7519 - }, - { - "epoch": 3.3399955585165446, - "grad_norm": 0.33653268211511883, - "learning_rate": 8.050114232431472e-07, - "loss": 0.0229, - "step": 7520 - }, - { - "epoch": 3.340439706862092, - "grad_norm": 0.4424670871666316, - "learning_rate": 8.039569129803493e-07, - "loss": 0.0273, - "step": 7521 - }, - { - "epoch": 3.340883855207639, - "grad_norm": 0.36754810698365975, - "learning_rate": 8.029030334541061e-07, - "loss": 0.0212, - "step": 7522 - }, - { - "epoch": 3.341328003553187, - "grad_norm": 0.40011975008051315, - "learning_rate": 8.01849784822833e-07, - "loss": 0.0213, - "step": 7523 - }, - { - "epoch": 3.3417721518987342, - "grad_norm": 0.4091440045335287, - "learning_rate": 8.007971672448511e-07, - "loss": 0.0326, - "step": 7524 - }, - { - "epoch": 3.3422163002442815, - "grad_norm": 0.3979067624482782, - "learning_rate": 7.997451808783884e-07, - "loss": 0.0292, - "step": 7525 - }, - { - "epoch": 3.342660448589829, - "grad_norm": 0.4350141704240594, - "learning_rate": 7.986938258815741e-07, - "loss": 0.0294, - "step": 7526 - }, - { - "epoch": 3.3431045969353765, - "grad_norm": 0.3288238940788284, - "learning_rate": 7.976431024124448e-07, - "loss": 0.02, - "step": 7527 - }, - { - "epoch": 3.343548745280924, - "grad_norm": 0.40499639306856394, - "learning_rate": 7.965930106289432e-07, - "loss": 0.0249, - "step": 7528 - }, - { - "epoch": 3.343992893626471, - "grad_norm": 0.4511936733531736, - "learning_rate": 7.955435506889154e-07, - "loss": 0.0225, - "step": 7529 - }, - { - "epoch": 3.344437041972019, - "grad_norm": 0.32996474391647085, - "learning_rate": 7.944947227501143e-07, - "loss": 0.019, - "step": 7530 - }, - { - "epoch": 3.344881190317566, - "grad_norm": 0.33989523301533825, - "learning_rate": 7.934465269701941e-07, - "loss": 0.0281, - "step": 7531 - }, - { - "epoch": 3.3453253386631134, - "grad_norm": 0.5424696702981813, - "learning_rate": 7.923989635067181e-07, - "loss": 0.0341, - "step": 7532 - }, - { - "epoch": 3.3457694870086607, - "grad_norm": 0.5354455170131127, - "learning_rate": 7.913520325171537e-07, - "loss": 0.0332, - "step": 7533 - }, - { - "epoch": 3.3462136353542085, - "grad_norm": 0.5188210769342587, - "learning_rate": 7.903057341588683e-07, - "loss": 0.0346, - "step": 7534 - }, - { - "epoch": 3.3466577836997558, - "grad_norm": 0.32324444187136564, - "learning_rate": 7.892600685891433e-07, - "loss": 0.0202, - "step": 7535 - }, - { - "epoch": 3.347101932045303, - "grad_norm": 0.38834278141098566, - "learning_rate": 7.882150359651586e-07, - "loss": 0.0233, - "step": 7536 - }, - { - "epoch": 3.3475460803908508, - "grad_norm": 0.44587329880583815, - "learning_rate": 7.871706364439985e-07, - "loss": 0.0285, - "step": 7537 - }, - { - "epoch": 3.347990228736398, - "grad_norm": 0.38089139300391106, - "learning_rate": 7.861268701826552e-07, - "loss": 0.0264, - "step": 7538 - }, - { - "epoch": 3.3484343770819454, - "grad_norm": 0.3825436500451749, - "learning_rate": 7.850837373380244e-07, - "loss": 0.0206, - "step": 7539 - }, - { - "epoch": 3.3488785254274926, - "grad_norm": 0.3827903172923659, - "learning_rate": 7.840412380669071e-07, - "loss": 0.0221, - "step": 7540 - }, - { - "epoch": 3.3493226737730404, - "grad_norm": 0.38625911378052546, - "learning_rate": 7.829993725260082e-07, - "loss": 0.0192, - "step": 7541 - }, - { - "epoch": 3.3497668221185877, - "grad_norm": 0.37721835784709495, - "learning_rate": 7.81958140871939e-07, - "loss": 0.0291, - "step": 7542 - }, - { - "epoch": 3.350210970464135, - "grad_norm": 0.39810907293172026, - "learning_rate": 7.809175432612126e-07, - "loss": 0.0239, - "step": 7543 - }, - { - "epoch": 3.3506551188096823, - "grad_norm": 1.1010267945006187, - "learning_rate": 7.798775798502484e-07, - "loss": 0.0314, - "step": 7544 - }, - { - "epoch": 3.35109926715523, - "grad_norm": 0.4353628491116733, - "learning_rate": 7.788382507953718e-07, - "loss": 0.022, - "step": 7545 - }, - { - "epoch": 3.3515434155007773, - "grad_norm": 0.392235080799356, - "learning_rate": 7.777995562528107e-07, - "loss": 0.0187, - "step": 7546 - }, - { - "epoch": 3.3519875638463246, - "grad_norm": 0.4931466449389555, - "learning_rate": 7.767614963787007e-07, - "loss": 0.0176, - "step": 7547 - }, - { - "epoch": 3.352431712191872, - "grad_norm": 0.39853671183474193, - "learning_rate": 7.757240713290764e-07, - "loss": 0.0259, - "step": 7548 - }, - { - "epoch": 3.3528758605374196, - "grad_norm": 0.4566981980981673, - "learning_rate": 7.746872812598821e-07, - "loss": 0.0255, - "step": 7549 - }, - { - "epoch": 3.353320008882967, - "grad_norm": 0.4075307870306547, - "learning_rate": 7.736511263269664e-07, - "loss": 0.0259, - "step": 7550 - }, - { - "epoch": 3.353764157228514, - "grad_norm": 0.44508580022829525, - "learning_rate": 7.726156066860769e-07, - "loss": 0.0221, - "step": 7551 - }, - { - "epoch": 3.354208305574062, - "grad_norm": 0.5633393812829065, - "learning_rate": 7.715807224928734e-07, - "loss": 0.0295, - "step": 7552 - }, - { - "epoch": 3.354652453919609, - "grad_norm": 0.4540973249501273, - "learning_rate": 7.705464739029172e-07, - "loss": 0.0262, - "step": 7553 - }, - { - "epoch": 3.3550966022651565, - "grad_norm": 0.5423667462933897, - "learning_rate": 7.695128610716707e-07, - "loss": 0.0373, - "step": 7554 - }, - { - "epoch": 3.355540750610704, - "grad_norm": 0.41533591822222976, - "learning_rate": 7.684798841545043e-07, - "loss": 0.02, - "step": 7555 - }, - { - "epoch": 3.3559848989562515, - "grad_norm": 0.37206716629030406, - "learning_rate": 7.674475433066925e-07, - "loss": 0.027, - "step": 7556 - }, - { - "epoch": 3.356429047301799, - "grad_norm": 0.489116896622887, - "learning_rate": 7.664158386834131e-07, - "loss": 0.0276, - "step": 7557 - }, - { - "epoch": 3.356873195647346, - "grad_norm": 0.3415873437320046, - "learning_rate": 7.653847704397504e-07, - "loss": 0.0199, - "step": 7558 - }, - { - "epoch": 3.357317343992894, - "grad_norm": 0.4918207209258021, - "learning_rate": 7.643543387306896e-07, - "loss": 0.0232, - "step": 7559 - }, - { - "epoch": 3.357761492338441, - "grad_norm": 0.4284932846117368, - "learning_rate": 7.63324543711122e-07, - "loss": 0.0304, - "step": 7560 - }, - { - "epoch": 3.3582056406839884, - "grad_norm": 0.3697104694086724, - "learning_rate": 7.622953855358456e-07, - "loss": 0.0206, - "step": 7561 - }, - { - "epoch": 3.3586497890295357, - "grad_norm": 0.3548831840036858, - "learning_rate": 7.612668643595561e-07, - "loss": 0.0241, - "step": 7562 - }, - { - "epoch": 3.3590939373750834, - "grad_norm": 0.3814303904037155, - "learning_rate": 7.60238980336862e-07, - "loss": 0.0285, - "step": 7563 - }, - { - "epoch": 3.3595380857206307, - "grad_norm": 0.40972342736425, - "learning_rate": 7.592117336222709e-07, - "loss": 0.0231, - "step": 7564 - }, - { - "epoch": 3.359982234066178, - "grad_norm": 0.4313904313100148, - "learning_rate": 7.581851243701938e-07, - "loss": 0.0272, - "step": 7565 - }, - { - "epoch": 3.3604263824117258, - "grad_norm": 0.42249063117216307, - "learning_rate": 7.571591527349481e-07, - "loss": 0.0245, - "step": 7566 - }, - { - "epoch": 3.360870530757273, - "grad_norm": 0.6077788918648797, - "learning_rate": 7.561338188707562e-07, - "loss": 0.0245, - "step": 7567 - }, - { - "epoch": 3.3613146791028203, - "grad_norm": 0.3504403283983759, - "learning_rate": 7.551091229317398e-07, - "loss": 0.0233, - "step": 7568 - }, - { - "epoch": 3.3617588274483676, - "grad_norm": 0.5216233787292206, - "learning_rate": 7.540850650719317e-07, - "loss": 0.0323, - "step": 7569 - }, - { - "epoch": 3.3622029757939154, - "grad_norm": 0.350140151698931, - "learning_rate": 7.530616454452644e-07, - "loss": 0.0177, - "step": 7570 - }, - { - "epoch": 3.3626471241394627, - "grad_norm": 0.4407132954191557, - "learning_rate": 7.520388642055737e-07, - "loss": 0.0315, - "step": 7571 - }, - { - "epoch": 3.36309127248501, - "grad_norm": 0.39660867426600405, - "learning_rate": 7.510167215066022e-07, - "loss": 0.0219, - "step": 7572 - }, - { - "epoch": 3.3635354208305572, - "grad_norm": 0.4580507155342704, - "learning_rate": 7.499952175019947e-07, - "loss": 0.0289, - "step": 7573 - }, - { - "epoch": 3.363979569176105, - "grad_norm": 0.3682289326212527, - "learning_rate": 7.489743523453013e-07, - "loss": 0.0239, - "step": 7574 - }, - { - "epoch": 3.3644237175216523, - "grad_norm": 0.488168671743432, - "learning_rate": 7.479541261899758e-07, - "loss": 0.0257, - "step": 7575 - }, - { - "epoch": 3.3648678658671995, - "grad_norm": 0.5131286442085937, - "learning_rate": 7.469345391893739e-07, - "loss": 0.0251, - "step": 7576 - }, - { - "epoch": 3.365312014212747, - "grad_norm": 0.5086234654496984, - "learning_rate": 7.459155914967581e-07, - "loss": 0.0312, - "step": 7577 - }, - { - "epoch": 3.3657561625582946, - "grad_norm": 1.2159009232304825, - "learning_rate": 7.448972832652939e-07, - "loss": 0.034, - "step": 7578 - }, - { - "epoch": 3.366200310903842, - "grad_norm": 0.42264429800124254, - "learning_rate": 7.438796146480471e-07, - "loss": 0.0366, - "step": 7579 - }, - { - "epoch": 3.366644459249389, - "grad_norm": 0.474511735916579, - "learning_rate": 7.428625857979943e-07, - "loss": 0.0282, - "step": 7580 - }, - { - "epoch": 3.367088607594937, - "grad_norm": 0.48689953790784196, - "learning_rate": 7.418461968680124e-07, - "loss": 0.0303, - "step": 7581 - }, - { - "epoch": 3.367532755940484, - "grad_norm": 0.34637453409423774, - "learning_rate": 7.408304480108791e-07, - "loss": 0.0213, - "step": 7582 - }, - { - "epoch": 3.3679769042860315, - "grad_norm": 0.4489470189672078, - "learning_rate": 7.398153393792801e-07, - "loss": 0.0314, - "step": 7583 - }, - { - "epoch": 3.3684210526315788, - "grad_norm": 0.38625164102595216, - "learning_rate": 7.388008711258049e-07, - "loss": 0.0205, - "step": 7584 - }, - { - "epoch": 3.3688652009771265, - "grad_norm": 0.38974030926942727, - "learning_rate": 7.37787043402941e-07, - "loss": 0.0263, - "step": 7585 - }, - { - "epoch": 3.369309349322674, - "grad_norm": 0.43774082705869743, - "learning_rate": 7.367738563630894e-07, - "loss": 0.0218, - "step": 7586 - }, - { - "epoch": 3.369753497668221, - "grad_norm": 0.6108985899563969, - "learning_rate": 7.357613101585459e-07, - "loss": 0.0311, - "step": 7587 - }, - { - "epoch": 3.370197646013769, - "grad_norm": 0.46815029644071443, - "learning_rate": 7.347494049415139e-07, - "loss": 0.031, - "step": 7588 - }, - { - "epoch": 3.370641794359316, - "grad_norm": 0.38759059737412377, - "learning_rate": 7.337381408641004e-07, - "loss": 0.0201, - "step": 7589 - }, - { - "epoch": 3.3710859427048634, - "grad_norm": 0.44352496378452017, - "learning_rate": 7.327275180783156e-07, - "loss": 0.0321, - "step": 7590 - }, - { - "epoch": 3.3715300910504107, - "grad_norm": 0.48865244874942404, - "learning_rate": 7.317175367360729e-07, - "loss": 0.024, - "step": 7591 - }, - { - "epoch": 3.3719742393959584, - "grad_norm": 0.4173965077203295, - "learning_rate": 7.30708196989191e-07, - "loss": 0.0208, - "step": 7592 - }, - { - "epoch": 3.3724183877415057, - "grad_norm": 0.4555470735195906, - "learning_rate": 7.296994989893885e-07, - "loss": 0.0217, - "step": 7593 - }, - { - "epoch": 3.372862536087053, - "grad_norm": 0.3350515129151666, - "learning_rate": 7.286914428882913e-07, - "loss": 0.021, - "step": 7594 - }, - { - "epoch": 3.3733066844326007, - "grad_norm": 0.310648697981219, - "learning_rate": 7.276840288374281e-07, - "loss": 0.0257, - "step": 7595 - }, - { - "epoch": 3.373750832778148, - "grad_norm": 0.5863731801123462, - "learning_rate": 7.266772569882269e-07, - "loss": 0.0311, - "step": 7596 - }, - { - "epoch": 3.3741949811236953, - "grad_norm": 0.31868586904538343, - "learning_rate": 7.256711274920264e-07, - "loss": 0.0194, - "step": 7597 - }, - { - "epoch": 3.3746391294692426, - "grad_norm": 0.3360320574193569, - "learning_rate": 7.246656405000646e-07, - "loss": 0.0219, - "step": 7598 - }, - { - "epoch": 3.37508327781479, - "grad_norm": 0.4187991374923001, - "learning_rate": 7.236607961634812e-07, - "loss": 0.0266, - "step": 7599 - }, - { - "epoch": 3.3755274261603376, - "grad_norm": 0.3828524070852837, - "learning_rate": 7.22656594633322e-07, - "loss": 0.026, - "step": 7600 - }, - { - "epoch": 3.375971574505885, - "grad_norm": 0.4895737837652768, - "learning_rate": 7.216530360605379e-07, - "loss": 0.0413, - "step": 7601 - }, - { - "epoch": 3.376415722851432, - "grad_norm": 0.5857587373369051, - "learning_rate": 7.206501205959759e-07, - "loss": 0.0219, - "step": 7602 - }, - { - "epoch": 3.37685987119698, - "grad_norm": 0.383441583877617, - "learning_rate": 7.196478483903968e-07, - "loss": 0.0196, - "step": 7603 - }, - { - "epoch": 3.3773040195425272, - "grad_norm": 0.33141940449265894, - "learning_rate": 7.186462195944555e-07, - "loss": 0.0217, - "step": 7604 - }, - { - "epoch": 3.3777481678880745, - "grad_norm": 0.37300245379755964, - "learning_rate": 7.176452343587148e-07, - "loss": 0.0216, - "step": 7605 - }, - { - "epoch": 3.378192316233622, - "grad_norm": 0.33936395007321735, - "learning_rate": 7.166448928336411e-07, - "loss": 0.0222, - "step": 7606 - }, - { - "epoch": 3.3786364645791696, - "grad_norm": 0.5246214544898155, - "learning_rate": 7.156451951696003e-07, - "loss": 0.0235, - "step": 7607 - }, - { - "epoch": 3.379080612924717, - "grad_norm": 0.3948135131762046, - "learning_rate": 7.146461415168637e-07, - "loss": 0.0231, - "step": 7608 - }, - { - "epoch": 3.379524761270264, - "grad_norm": 0.37493268321499074, - "learning_rate": 7.136477320256102e-07, - "loss": 0.0328, - "step": 7609 - }, - { - "epoch": 3.379968909615812, - "grad_norm": 0.38860406022862326, - "learning_rate": 7.126499668459135e-07, - "loss": 0.0215, - "step": 7610 - }, - { - "epoch": 3.380413057961359, - "grad_norm": 0.443708157737675, - "learning_rate": 7.116528461277561e-07, - "loss": 0.0272, - "step": 7611 - }, - { - "epoch": 3.3808572063069064, - "grad_norm": 0.4409205615918148, - "learning_rate": 7.106563700210234e-07, - "loss": 0.0199, - "step": 7612 - }, - { - "epoch": 3.3813013546524537, - "grad_norm": 0.37530972086273984, - "learning_rate": 7.096605386754995e-07, - "loss": 0.0291, - "step": 7613 - }, - { - "epoch": 3.3817455029980015, - "grad_norm": 0.4774461806256775, - "learning_rate": 7.086653522408788e-07, - "loss": 0.0274, - "step": 7614 - }, - { - "epoch": 3.3821896513435488, - "grad_norm": 0.4294747376572903, - "learning_rate": 7.076708108667512e-07, - "loss": 0.024, - "step": 7615 - }, - { - "epoch": 3.382633799689096, - "grad_norm": 0.41100245317627226, - "learning_rate": 7.066769147026154e-07, - "loss": 0.0287, - "step": 7616 - }, - { - "epoch": 3.383077948034644, - "grad_norm": 0.3045061371856552, - "learning_rate": 7.056836638978698e-07, - "loss": 0.0198, - "step": 7617 - }, - { - "epoch": 3.383522096380191, - "grad_norm": 0.3136165513411521, - "learning_rate": 7.046910586018186e-07, - "loss": 0.0145, - "step": 7618 - }, - { - "epoch": 3.3839662447257384, - "grad_norm": 0.443848462635569, - "learning_rate": 7.036990989636628e-07, - "loss": 0.0296, - "step": 7619 - }, - { - "epoch": 3.3844103930712857, - "grad_norm": 0.4329635290692527, - "learning_rate": 7.027077851325164e-07, - "loss": 0.0286, - "step": 7620 - }, - { - "epoch": 3.3848545414168334, - "grad_norm": 0.4425986943763542, - "learning_rate": 7.017171172573872e-07, - "loss": 0.0319, - "step": 7621 - }, - { - "epoch": 3.3852986897623807, - "grad_norm": 0.3486098761277207, - "learning_rate": 7.007270954871903e-07, - "loss": 0.0269, - "step": 7622 - }, - { - "epoch": 3.385742838107928, - "grad_norm": 0.35813998328060914, - "learning_rate": 6.997377199707439e-07, - "loss": 0.0212, - "step": 7623 - }, - { - "epoch": 3.3861869864534753, - "grad_norm": 0.38415273327037347, - "learning_rate": 6.987489908567663e-07, - "loss": 0.0283, - "step": 7624 - }, - { - "epoch": 3.386631134799023, - "grad_norm": 0.42667107713144753, - "learning_rate": 6.977609082938791e-07, - "loss": 0.0364, - "step": 7625 - }, - { - "epoch": 3.3870752831445703, - "grad_norm": 0.4255004286761174, - "learning_rate": 6.967734724306119e-07, - "loss": 0.0374, - "step": 7626 - }, - { - "epoch": 3.3875194314901176, - "grad_norm": 0.3765149927746781, - "learning_rate": 6.957866834153898e-07, - "loss": 0.028, - "step": 7627 - }, - { - "epoch": 3.387963579835665, - "grad_norm": 0.5330001693156503, - "learning_rate": 6.948005413965448e-07, - "loss": 0.0379, - "step": 7628 - }, - { - "epoch": 3.3884077281812126, - "grad_norm": 0.5059080740347456, - "learning_rate": 6.938150465223126e-07, - "loss": 0.0244, - "step": 7629 - }, - { - "epoch": 3.38885187652676, - "grad_norm": 0.43454836222929405, - "learning_rate": 6.928301989408253e-07, - "loss": 0.0323, - "step": 7630 - }, - { - "epoch": 3.389296024872307, - "grad_norm": 0.3908471485523718, - "learning_rate": 6.918459988001281e-07, - "loss": 0.0312, - "step": 7631 - }, - { - "epoch": 3.389740173217855, - "grad_norm": 0.4420676727706239, - "learning_rate": 6.908624462481584e-07, - "loss": 0.026, - "step": 7632 - }, - { - "epoch": 3.390184321563402, - "grad_norm": 0.35748857241333987, - "learning_rate": 6.898795414327624e-07, - "loss": 0.0212, - "step": 7633 - }, - { - "epoch": 3.3906284699089495, - "grad_norm": 0.3032775147821817, - "learning_rate": 6.888972845016889e-07, - "loss": 0.0255, - "step": 7634 - }, - { - "epoch": 3.391072618254497, - "grad_norm": 0.3982622931840567, - "learning_rate": 6.879156756025851e-07, - "loss": 0.0281, - "step": 7635 - }, - { - "epoch": 3.3915167666000445, - "grad_norm": 0.5203243077622092, - "learning_rate": 6.869347148830035e-07, - "loss": 0.0249, - "step": 7636 - }, - { - "epoch": 3.391960914945592, - "grad_norm": 0.5073777888896281, - "learning_rate": 6.85954402490403e-07, - "loss": 0.0408, - "step": 7637 - }, - { - "epoch": 3.392405063291139, - "grad_norm": 0.6112073451304307, - "learning_rate": 6.849747385721373e-07, - "loss": 0.0262, - "step": 7638 - }, - { - "epoch": 3.392849211636687, - "grad_norm": 0.4334382118989557, - "learning_rate": 6.839957232754679e-07, - "loss": 0.0247, - "step": 7639 - }, - { - "epoch": 3.393293359982234, - "grad_norm": 0.37151198090225157, - "learning_rate": 6.830173567475584e-07, - "loss": 0.0251, - "step": 7640 - }, - { - "epoch": 3.3937375083277814, - "grad_norm": 0.47432508657043243, - "learning_rate": 6.820396391354722e-07, - "loss": 0.0361, - "step": 7641 - }, - { - "epoch": 3.3941816566733287, - "grad_norm": 0.39488273115178324, - "learning_rate": 6.810625705861762e-07, - "loss": 0.0226, - "step": 7642 - }, - { - "epoch": 3.3946258050188765, - "grad_norm": 0.5607339834034196, - "learning_rate": 6.80086151246544e-07, - "loss": 0.0403, - "step": 7643 - }, - { - "epoch": 3.3950699533644237, - "grad_norm": 0.4287176433981967, - "learning_rate": 6.791103812633443e-07, - "loss": 0.026, - "step": 7644 - }, - { - "epoch": 3.395514101709971, - "grad_norm": 0.4160527081898972, - "learning_rate": 6.781352607832536e-07, - "loss": 0.0248, - "step": 7645 - }, - { - "epoch": 3.3959582500555188, - "grad_norm": 0.31792810620229006, - "learning_rate": 6.771607899528504e-07, - "loss": 0.0179, - "step": 7646 - }, - { - "epoch": 3.396402398401066, - "grad_norm": 0.5611086221445625, - "learning_rate": 6.761869689186101e-07, - "loss": 0.0266, - "step": 7647 - }, - { - "epoch": 3.3968465467466133, - "grad_norm": 0.5050890369850448, - "learning_rate": 6.752137978269191e-07, - "loss": 0.0362, - "step": 7648 - }, - { - "epoch": 3.3972906950921606, - "grad_norm": 0.3650794330064718, - "learning_rate": 6.742412768240586e-07, - "loss": 0.0259, - "step": 7649 - }, - { - "epoch": 3.3977348434377084, - "grad_norm": 0.38912734986088593, - "learning_rate": 6.732694060562162e-07, - "loss": 0.0222, - "step": 7650 - }, - { - "epoch": 3.3981789917832557, - "grad_norm": 0.3696530901709022, - "learning_rate": 6.722981856694811e-07, - "loss": 0.0242, - "step": 7651 - }, - { - "epoch": 3.398623140128803, - "grad_norm": 0.35581748069826435, - "learning_rate": 6.713276158098425e-07, - "loss": 0.0203, - "step": 7652 - }, - { - "epoch": 3.3990672884743502, - "grad_norm": 0.3267819000046619, - "learning_rate": 6.703576966231939e-07, - "loss": 0.0177, - "step": 7653 - }, - { - "epoch": 3.399511436819898, - "grad_norm": 1.0254709261428898, - "learning_rate": 6.693884282553332e-07, - "loss": 0.0272, - "step": 7654 - }, - { - "epoch": 3.3999555851654453, - "grad_norm": 0.5090905796653779, - "learning_rate": 6.684198108519546e-07, - "loss": 0.026, - "step": 7655 - }, - { - "epoch": 3.4003997335109926, - "grad_norm": 0.47438772486404496, - "learning_rate": 6.674518445586592e-07, - "loss": 0.0293, - "step": 7656 - }, - { - "epoch": 3.40084388185654, - "grad_norm": 0.508053594793761, - "learning_rate": 6.664845295209499e-07, - "loss": 0.0397, - "step": 7657 - }, - { - "epoch": 3.4012880302020876, - "grad_norm": 0.361105004393522, - "learning_rate": 6.655178658842282e-07, - "loss": 0.0197, - "step": 7658 - }, - { - "epoch": 3.401732178547635, - "grad_norm": 0.36194311552740505, - "learning_rate": 6.645518537938012e-07, - "loss": 0.0233, - "step": 7659 - }, - { - "epoch": 3.402176326893182, - "grad_norm": 0.4835513612825343, - "learning_rate": 6.635864933948771e-07, - "loss": 0.0243, - "step": 7660 - }, - { - "epoch": 3.40262047523873, - "grad_norm": 0.40734978570499325, - "learning_rate": 6.626217848325656e-07, - "loss": 0.0235, - "step": 7661 - }, - { - "epoch": 3.403064623584277, - "grad_norm": 0.48301835002180094, - "learning_rate": 6.616577282518794e-07, - "loss": 0.0307, - "step": 7662 - }, - { - "epoch": 3.4035087719298245, - "grad_norm": 0.4251997392123568, - "learning_rate": 6.606943237977331e-07, - "loss": 0.0193, - "step": 7663 - }, - { - "epoch": 3.4039529202753718, - "grad_norm": 0.45591767603385597, - "learning_rate": 6.597315716149394e-07, - "loss": 0.0348, - "step": 7664 - }, - { - "epoch": 3.4043970686209195, - "grad_norm": 0.38978320299224184, - "learning_rate": 6.587694718482213e-07, - "loss": 0.024, - "step": 7665 - }, - { - "epoch": 3.404841216966467, - "grad_norm": 0.36098640085183586, - "learning_rate": 6.578080246421947e-07, - "loss": 0.0231, - "step": 7666 - }, - { - "epoch": 3.405285365312014, - "grad_norm": 0.42381127870689095, - "learning_rate": 6.568472301413836e-07, - "loss": 0.0293, - "step": 7667 - }, - { - "epoch": 3.405729513657562, - "grad_norm": 0.36978104460309597, - "learning_rate": 6.558870884902119e-07, - "loss": 0.0228, - "step": 7668 - }, - { - "epoch": 3.406173662003109, - "grad_norm": 0.5467829172657439, - "learning_rate": 6.549275998330029e-07, - "loss": 0.027, - "step": 7669 - }, - { - "epoch": 3.4066178103486564, - "grad_norm": 0.42439207241368543, - "learning_rate": 6.539687643139847e-07, - "loss": 0.0243, - "step": 7670 - }, - { - "epoch": 3.4070619586942037, - "grad_norm": 0.34272251460913966, - "learning_rate": 6.530105820772897e-07, - "loss": 0.0151, - "step": 7671 - }, - { - "epoch": 3.4075061070397514, - "grad_norm": 0.4182566186357329, - "learning_rate": 6.52053053266945e-07, - "loss": 0.0205, - "step": 7672 - }, - { - "epoch": 3.4079502553852987, - "grad_norm": 0.350434375510393, - "learning_rate": 6.51096178026886e-07, - "loss": 0.0237, - "step": 7673 - }, - { - "epoch": 3.408394403730846, - "grad_norm": 0.36842396127077415, - "learning_rate": 6.50139956500947e-07, - "loss": 0.0248, - "step": 7674 - }, - { - "epoch": 3.4088385520763937, - "grad_norm": 0.3776577384417059, - "learning_rate": 6.491843888328625e-07, - "loss": 0.026, - "step": 7675 - }, - { - "epoch": 3.409282700421941, - "grad_norm": 0.4739319012198071, - "learning_rate": 6.482294751662721e-07, - "loss": 0.0353, - "step": 7676 - }, - { - "epoch": 3.4097268487674883, - "grad_norm": 0.35847335034757755, - "learning_rate": 6.472752156447148e-07, - "loss": 0.0198, - "step": 7677 - }, - { - "epoch": 3.4101709971130356, - "grad_norm": 0.41470003345548434, - "learning_rate": 6.463216104116327e-07, - "loss": 0.0206, - "step": 7678 - }, - { - "epoch": 3.4106151454585834, - "grad_norm": 0.41774456358583356, - "learning_rate": 6.453686596103697e-07, - "loss": 0.0296, - "step": 7679 - }, - { - "epoch": 3.4110592938041306, - "grad_norm": 0.4014386567309541, - "learning_rate": 6.444163633841688e-07, - "loss": 0.0283, - "step": 7680 - }, - { - "epoch": 3.411503442149678, - "grad_norm": 0.5677232388670659, - "learning_rate": 6.434647218761764e-07, - "loss": 0.0399, - "step": 7681 - }, - { - "epoch": 3.4119475904952252, - "grad_norm": 0.38057897363042437, - "learning_rate": 6.425137352294408e-07, - "loss": 0.0241, - "step": 7682 - }, - { - "epoch": 3.412391738840773, - "grad_norm": 0.4520213263061807, - "learning_rate": 6.415634035869117e-07, - "loss": 0.0242, - "step": 7683 - }, - { - "epoch": 3.4128358871863202, - "grad_norm": 0.4445568386773207, - "learning_rate": 6.406137270914404e-07, - "loss": 0.0327, - "step": 7684 - }, - { - "epoch": 3.4132800355318675, - "grad_norm": 0.48242749215653374, - "learning_rate": 6.396647058857792e-07, - "loss": 0.031, - "step": 7685 - }, - { - "epoch": 3.413724183877415, - "grad_norm": 0.3708344900625483, - "learning_rate": 6.387163401125812e-07, - "loss": 0.0233, - "step": 7686 - }, - { - "epoch": 3.4141683322229626, - "grad_norm": 0.3782593306434555, - "learning_rate": 6.377686299144025e-07, - "loss": 0.0269, - "step": 7687 - }, - { - "epoch": 3.41461248056851, - "grad_norm": 0.43908345913471114, - "learning_rate": 6.368215754337004e-07, - "loss": 0.0307, - "step": 7688 - }, - { - "epoch": 3.415056628914057, - "grad_norm": 0.5407716737755903, - "learning_rate": 6.358751768128324e-07, - "loss": 0.0259, - "step": 7689 - }, - { - "epoch": 3.415500777259605, - "grad_norm": 0.3714076929525467, - "learning_rate": 6.349294341940593e-07, - "loss": 0.0182, - "step": 7690 - }, - { - "epoch": 3.415944925605152, - "grad_norm": 0.5168319772596983, - "learning_rate": 6.339843477195423e-07, - "loss": 0.0405, - "step": 7691 - }, - { - "epoch": 3.4163890739506995, - "grad_norm": 0.470888647389716, - "learning_rate": 6.330399175313429e-07, - "loss": 0.0326, - "step": 7692 - }, - { - "epoch": 3.4168332222962468, - "grad_norm": 0.3446195227420253, - "learning_rate": 6.320961437714257e-07, - "loss": 0.0136, - "step": 7693 - }, - { - "epoch": 3.4172773706417945, - "grad_norm": 0.37511059576115724, - "learning_rate": 6.311530265816551e-07, - "loss": 0.0336, - "step": 7694 - }, - { - "epoch": 3.4177215189873418, - "grad_norm": 0.362187597907496, - "learning_rate": 6.302105661037988e-07, - "loss": 0.0212, - "step": 7695 - }, - { - "epoch": 3.418165667332889, - "grad_norm": 0.4435954280766343, - "learning_rate": 6.292687624795257e-07, - "loss": 0.0315, - "step": 7696 - }, - { - "epoch": 3.418609815678437, - "grad_norm": 0.36818994844478536, - "learning_rate": 6.283276158504015e-07, - "loss": 0.0226, - "step": 7697 - }, - { - "epoch": 3.419053964023984, - "grad_norm": 0.35373090512917094, - "learning_rate": 6.27387126357899e-07, - "loss": 0.0165, - "step": 7698 - }, - { - "epoch": 3.4194981123695314, - "grad_norm": 0.364744423870996, - "learning_rate": 6.264472941433886e-07, - "loss": 0.0159, - "step": 7699 - }, - { - "epoch": 3.4199422607150787, - "grad_norm": 0.43254863668472154, - "learning_rate": 6.255081193481438e-07, - "loss": 0.0269, - "step": 7700 - }, - { - "epoch": 3.4203864090606264, - "grad_norm": 0.3606692968436507, - "learning_rate": 6.24569602113338e-07, - "loss": 0.0304, - "step": 7701 - }, - { - "epoch": 3.4208305574061737, - "grad_norm": 0.5007533786139542, - "learning_rate": 6.236317425800481e-07, - "loss": 0.0287, - "step": 7702 - }, - { - "epoch": 3.421274705751721, - "grad_norm": 0.41340230947723855, - "learning_rate": 6.226945408892477e-07, - "loss": 0.0199, - "step": 7703 - }, - { - "epoch": 3.4217188540972687, - "grad_norm": 0.365085912872857, - "learning_rate": 6.21757997181815e-07, - "loss": 0.021, - "step": 7704 - }, - { - "epoch": 3.422163002442816, - "grad_norm": 0.5548854003945028, - "learning_rate": 6.208221115985285e-07, - "loss": 0.0336, - "step": 7705 - }, - { - "epoch": 3.4226071507883633, - "grad_norm": 0.5898794387281542, - "learning_rate": 6.198868842800681e-07, - "loss": 0.0329, - "step": 7706 - }, - { - "epoch": 3.4230512991339106, - "grad_norm": 0.670696139265725, - "learning_rate": 6.189523153670152e-07, - "loss": 0.0411, - "step": 7707 - }, - { - "epoch": 3.4234954474794583, - "grad_norm": 0.44881661557818314, - "learning_rate": 6.180184049998489e-07, - "loss": 0.0294, - "step": 7708 - }, - { - "epoch": 3.4239395958250056, - "grad_norm": 0.41059208128468105, - "learning_rate": 6.170851533189537e-07, - "loss": 0.0221, - "step": 7709 - }, - { - "epoch": 3.424383744170553, - "grad_norm": 0.4264078976239932, - "learning_rate": 6.161525604646124e-07, - "loss": 0.0247, - "step": 7710 - }, - { - "epoch": 3.4248278925161, - "grad_norm": 0.4696315553933537, - "learning_rate": 6.152206265770095e-07, - "loss": 0.026, - "step": 7711 - }, - { - "epoch": 3.425272040861648, - "grad_norm": 0.45378823926056083, - "learning_rate": 6.142893517962312e-07, - "loss": 0.0235, - "step": 7712 - }, - { - "epoch": 3.4257161892071952, - "grad_norm": 0.4121833782140256, - "learning_rate": 6.133587362622645e-07, - "loss": 0.0272, - "step": 7713 - }, - { - "epoch": 3.4261603375527425, - "grad_norm": 0.3934369295423382, - "learning_rate": 6.124287801149942e-07, - "loss": 0.0254, - "step": 7714 - }, - { - "epoch": 3.42660448589829, - "grad_norm": 0.5182408476406597, - "learning_rate": 6.114994834942106e-07, - "loss": 0.0311, - "step": 7715 - }, - { - "epoch": 3.4270486342438375, - "grad_norm": 0.443783825192744, - "learning_rate": 6.105708465396021e-07, - "loss": 0.0245, - "step": 7716 - }, - { - "epoch": 3.427492782589385, - "grad_norm": 0.47129625335261466, - "learning_rate": 6.096428693907591e-07, - "loss": 0.0254, - "step": 7717 - }, - { - "epoch": 3.427936930934932, - "grad_norm": 0.38995292895347056, - "learning_rate": 6.087155521871713e-07, - "loss": 0.028, - "step": 7718 - }, - { - "epoch": 3.42838107928048, - "grad_norm": 0.38715826339897036, - "learning_rate": 6.077888950682326e-07, - "loss": 0.0228, - "step": 7719 - }, - { - "epoch": 3.428825227626027, - "grad_norm": 0.4108315391155564, - "learning_rate": 6.068628981732322e-07, - "loss": 0.0235, - "step": 7720 - }, - { - "epoch": 3.4292693759715744, - "grad_norm": 0.3702934691579192, - "learning_rate": 6.059375616413643e-07, - "loss": 0.0211, - "step": 7721 - }, - { - "epoch": 3.4297135243171217, - "grad_norm": 0.3963620092994827, - "learning_rate": 6.050128856117232e-07, - "loss": 0.019, - "step": 7722 - }, - { - "epoch": 3.4301576726626695, - "grad_norm": 0.37150524249459593, - "learning_rate": 6.040888702233033e-07, - "loss": 0.0224, - "step": 7723 - }, - { - "epoch": 3.4306018210082168, - "grad_norm": 0.44821126190431915, - "learning_rate": 6.031655156150007e-07, - "loss": 0.0305, - "step": 7724 - }, - { - "epoch": 3.431045969353764, - "grad_norm": 0.35283254586034607, - "learning_rate": 6.022428219256087e-07, - "loss": 0.024, - "step": 7725 - }, - { - "epoch": 3.431490117699312, - "grad_norm": 0.4935930259013348, - "learning_rate": 6.013207892938261e-07, - "loss": 0.0457, - "step": 7726 - }, - { - "epoch": 3.431934266044859, - "grad_norm": 0.45369029101077796, - "learning_rate": 6.003994178582489e-07, - "loss": 0.0243, - "step": 7727 - }, - { - "epoch": 3.4323784143904064, - "grad_norm": 0.48244218877941225, - "learning_rate": 5.994787077573754e-07, - "loss": 0.0231, - "step": 7728 - }, - { - "epoch": 3.4328225627359537, - "grad_norm": 0.3599007846172956, - "learning_rate": 5.985586591296044e-07, - "loss": 0.0182, - "step": 7729 - }, - { - "epoch": 3.4332667110815014, - "grad_norm": 0.40382161309843, - "learning_rate": 5.976392721132351e-07, - "loss": 0.0152, - "step": 7730 - }, - { - "epoch": 3.4337108594270487, - "grad_norm": 0.44806877408277146, - "learning_rate": 5.967205468464648e-07, - "loss": 0.026, - "step": 7731 - }, - { - "epoch": 3.434155007772596, - "grad_norm": 0.3566806457323869, - "learning_rate": 5.958024834673953e-07, - "loss": 0.0192, - "step": 7732 - }, - { - "epoch": 3.4345991561181437, - "grad_norm": 0.4251189473202698, - "learning_rate": 5.948850821140267e-07, - "loss": 0.0218, - "step": 7733 - }, - { - "epoch": 3.435043304463691, - "grad_norm": 0.38225003080929293, - "learning_rate": 5.939683429242604e-07, - "loss": 0.0217, - "step": 7734 - }, - { - "epoch": 3.4354874528092383, - "grad_norm": 0.34970498765009345, - "learning_rate": 5.930522660358973e-07, - "loss": 0.0167, - "step": 7735 - }, - { - "epoch": 3.4359316011547856, - "grad_norm": 0.41748998353130423, - "learning_rate": 5.921368515866405e-07, - "loss": 0.0314, - "step": 7736 - }, - { - "epoch": 3.436375749500333, - "grad_norm": 0.5166214998053038, - "learning_rate": 5.912220997140905e-07, - "loss": 0.0327, - "step": 7737 - }, - { - "epoch": 3.4368198978458806, - "grad_norm": 0.4503364511144795, - "learning_rate": 5.903080105557507e-07, - "loss": 0.0305, - "step": 7738 - }, - { - "epoch": 3.437264046191428, - "grad_norm": 0.38794068759388, - "learning_rate": 5.893945842490245e-07, - "loss": 0.0234, - "step": 7739 - }, - { - "epoch": 3.437708194536975, - "grad_norm": 0.4805056088393158, - "learning_rate": 5.884818209312159e-07, - "loss": 0.0298, - "step": 7740 - }, - { - "epoch": 3.438152342882523, - "grad_norm": 0.5076559758304271, - "learning_rate": 5.875697207395286e-07, - "loss": 0.035, - "step": 7741 - }, - { - "epoch": 3.43859649122807, - "grad_norm": 0.42338511678046004, - "learning_rate": 5.866582838110657e-07, - "loss": 0.0363, - "step": 7742 - }, - { - "epoch": 3.4390406395736175, - "grad_norm": 0.36808395417139134, - "learning_rate": 5.857475102828325e-07, - "loss": 0.0188, - "step": 7743 - }, - { - "epoch": 3.439484787919165, - "grad_norm": 0.3177380221983695, - "learning_rate": 5.848374002917329e-07, - "loss": 0.0193, - "step": 7744 - }, - { - "epoch": 3.4399289362647125, - "grad_norm": 0.3914364144553906, - "learning_rate": 5.839279539745729e-07, - "loss": 0.0223, - "step": 7745 - }, - { - "epoch": 3.44037308461026, - "grad_norm": 0.402651938874195, - "learning_rate": 5.830191714680578e-07, - "loss": 0.0199, - "step": 7746 - }, - { - "epoch": 3.440817232955807, - "grad_norm": 0.3522412455115687, - "learning_rate": 5.821110529087932e-07, - "loss": 0.0247, - "step": 7747 - }, - { - "epoch": 3.441261381301355, - "grad_norm": 0.4021635163074727, - "learning_rate": 5.812035984332832e-07, - "loss": 0.0229, - "step": 7748 - }, - { - "epoch": 3.441705529646902, - "grad_norm": 0.4251094753016676, - "learning_rate": 5.802968081779342e-07, - "loss": 0.024, - "step": 7749 - }, - { - "epoch": 3.4421496779924494, - "grad_norm": 0.3662362137440131, - "learning_rate": 5.79390682279053e-07, - "loss": 0.0247, - "step": 7750 - }, - { - "epoch": 3.4425938263379967, - "grad_norm": 0.4585215842279442, - "learning_rate": 5.784852208728453e-07, - "loss": 0.0295, - "step": 7751 - }, - { - "epoch": 3.4430379746835444, - "grad_norm": 0.5141537675737569, - "learning_rate": 5.775804240954181e-07, - "loss": 0.0318, - "step": 7752 - }, - { - "epoch": 3.4434821230290917, - "grad_norm": 0.5180769012624266, - "learning_rate": 5.766762920827762e-07, - "loss": 0.0255, - "step": 7753 - }, - { - "epoch": 3.443926271374639, - "grad_norm": 0.4003620998147694, - "learning_rate": 5.757728249708261e-07, - "loss": 0.0271, - "step": 7754 - }, - { - "epoch": 3.4443704197201868, - "grad_norm": 0.3418440737923465, - "learning_rate": 5.748700228953758e-07, - "loss": 0.0184, - "step": 7755 - }, - { - "epoch": 3.444814568065734, - "grad_norm": 0.687451428270991, - "learning_rate": 5.739678859921299e-07, - "loss": 0.0362, - "step": 7756 - }, - { - "epoch": 3.4452587164112813, - "grad_norm": 0.5024182127797775, - "learning_rate": 5.730664143966969e-07, - "loss": 0.0278, - "step": 7757 - }, - { - "epoch": 3.4457028647568286, - "grad_norm": 0.38303277714696055, - "learning_rate": 5.721656082445825e-07, - "loss": 0.0193, - "step": 7758 - }, - { - "epoch": 3.4461470131023764, - "grad_norm": 0.47510936774303797, - "learning_rate": 5.712654676711921e-07, - "loss": 0.0246, - "step": 7759 - }, - { - "epoch": 3.4465911614479237, - "grad_norm": 0.3741311721785513, - "learning_rate": 5.703659928118333e-07, - "loss": 0.0181, - "step": 7760 - }, - { - "epoch": 3.447035309793471, - "grad_norm": 0.4576763768736328, - "learning_rate": 5.694671838017119e-07, - "loss": 0.0274, - "step": 7761 - }, - { - "epoch": 3.4474794581390182, - "grad_norm": 0.36322150440607, - "learning_rate": 5.685690407759342e-07, - "loss": 0.0198, - "step": 7762 - }, - { - "epoch": 3.447923606484566, - "grad_norm": 0.5039426250866698, - "learning_rate": 5.676715638695063e-07, - "loss": 0.0299, - "step": 7763 - }, - { - "epoch": 3.4483677548301133, - "grad_norm": 0.499760373842697, - "learning_rate": 5.667747532173362e-07, - "loss": 0.0244, - "step": 7764 - }, - { - "epoch": 3.4488119031756606, - "grad_norm": 0.42349126436586376, - "learning_rate": 5.658786089542262e-07, - "loss": 0.0265, - "step": 7765 - }, - { - "epoch": 3.449256051521208, - "grad_norm": 0.5133192715069692, - "learning_rate": 5.649831312148845e-07, - "loss": 0.0381, - "step": 7766 - }, - { - "epoch": 3.4497001998667556, - "grad_norm": 0.4265674001652618, - "learning_rate": 5.640883201339154e-07, - "loss": 0.0254, - "step": 7767 - }, - { - "epoch": 3.450144348212303, - "grad_norm": 0.5615805590965591, - "learning_rate": 5.631941758458254e-07, - "loss": 0.028, - "step": 7768 - }, - { - "epoch": 3.45058849655785, - "grad_norm": 0.6650175323233352, - "learning_rate": 5.623006984850193e-07, - "loss": 0.0313, - "step": 7769 - }, - { - "epoch": 3.451032644903398, - "grad_norm": 0.4291032020769662, - "learning_rate": 5.61407888185801e-07, - "loss": 0.031, - "step": 7770 - }, - { - "epoch": 3.451476793248945, - "grad_norm": 0.438818710394981, - "learning_rate": 5.60515745082375e-07, - "loss": 0.0271, - "step": 7771 - }, - { - "epoch": 3.4519209415944925, - "grad_norm": 0.4109123087663869, - "learning_rate": 5.596242693088478e-07, - "loss": 0.022, - "step": 7772 - }, - { - "epoch": 3.4523650899400398, - "grad_norm": 0.5366106862877935, - "learning_rate": 5.587334609992195e-07, - "loss": 0.04, - "step": 7773 - }, - { - "epoch": 3.4528092382855875, - "grad_norm": 0.37769149540290886, - "learning_rate": 5.578433202873967e-07, - "loss": 0.0292, - "step": 7774 - }, - { - "epoch": 3.453253386631135, - "grad_norm": 0.4649378802579783, - "learning_rate": 5.569538473071834e-07, - "loss": 0.0227, - "step": 7775 - }, - { - "epoch": 3.453697534976682, - "grad_norm": 0.48743583603374796, - "learning_rate": 5.560650421922798e-07, - "loss": 0.0315, - "step": 7776 - }, - { - "epoch": 3.45414168332223, - "grad_norm": 0.5101610442676535, - "learning_rate": 5.551769050762895e-07, - "loss": 0.03, - "step": 7777 - }, - { - "epoch": 3.454585831667777, - "grad_norm": 0.3279417409470981, - "learning_rate": 5.542894360927148e-07, - "loss": 0.0171, - "step": 7778 - }, - { - "epoch": 3.4550299800133244, - "grad_norm": 0.36215562405473767, - "learning_rate": 5.534026353749572e-07, - "loss": 0.0249, - "step": 7779 - }, - { - "epoch": 3.4554741283588717, - "grad_norm": 0.33716482541449544, - "learning_rate": 5.52516503056319e-07, - "loss": 0.0205, - "step": 7780 - }, - { - "epoch": 3.4559182767044194, - "grad_norm": 0.3903797663782476, - "learning_rate": 5.516310392699991e-07, - "loss": 0.0192, - "step": 7781 - }, - { - "epoch": 3.4563624250499667, - "grad_norm": 0.4369763600207604, - "learning_rate": 5.507462441490985e-07, - "loss": 0.0254, - "step": 7782 - }, - { - "epoch": 3.456806573395514, - "grad_norm": 0.4196928785980343, - "learning_rate": 5.498621178266167e-07, - "loss": 0.0242, - "step": 7783 - }, - { - "epoch": 3.4572507217410617, - "grad_norm": 0.36290486329076327, - "learning_rate": 5.489786604354535e-07, - "loss": 0.0284, - "step": 7784 - }, - { - "epoch": 3.457694870086609, - "grad_norm": 0.4926206504376682, - "learning_rate": 5.480958721084074e-07, - "loss": 0.0281, - "step": 7785 - }, - { - "epoch": 3.4581390184321563, - "grad_norm": 0.4384128683223243, - "learning_rate": 5.472137529781768e-07, - "loss": 0.0281, - "step": 7786 - }, - { - "epoch": 3.4585831667777036, - "grad_norm": 0.42664316505284405, - "learning_rate": 5.463323031773581e-07, - "loss": 0.0286, - "step": 7787 - }, - { - "epoch": 3.4590273151232513, - "grad_norm": 0.47490376622622354, - "learning_rate": 5.454515228384493e-07, - "loss": 0.0208, - "step": 7788 - }, - { - "epoch": 3.4594714634687986, - "grad_norm": 0.40714987915665685, - "learning_rate": 5.445714120938467e-07, - "loss": 0.0321, - "step": 7789 - }, - { - "epoch": 3.459915611814346, - "grad_norm": 0.3507834802550716, - "learning_rate": 5.436919710758432e-07, - "loss": 0.0225, - "step": 7790 - }, - { - "epoch": 3.460359760159893, - "grad_norm": 0.3642385320392474, - "learning_rate": 5.42813199916637e-07, - "loss": 0.025, - "step": 7791 - }, - { - "epoch": 3.460803908505441, - "grad_norm": 0.3676585444584738, - "learning_rate": 5.419350987483224e-07, - "loss": 0.026, - "step": 7792 - }, - { - "epoch": 3.4612480568509882, - "grad_norm": 0.42929474369532705, - "learning_rate": 5.410576677028906e-07, - "loss": 0.0316, - "step": 7793 - }, - { - "epoch": 3.4616922051965355, - "grad_norm": 0.45518167323880976, - "learning_rate": 5.401809069122354e-07, - "loss": 0.0284, - "step": 7794 - }, - { - "epoch": 3.462136353542083, - "grad_norm": 0.4983281791130265, - "learning_rate": 5.393048165081493e-07, - "loss": 0.0411, - "step": 7795 - }, - { - "epoch": 3.4625805018876306, - "grad_norm": 0.419339473620287, - "learning_rate": 5.384293966223231e-07, - "loss": 0.0215, - "step": 7796 - }, - { - "epoch": 3.463024650233178, - "grad_norm": 0.3599252115295799, - "learning_rate": 5.37554647386348e-07, - "loss": 0.0254, - "step": 7797 - }, - { - "epoch": 3.463468798578725, - "grad_norm": 0.35977012849103657, - "learning_rate": 5.366805689317129e-07, - "loss": 0.0293, - "step": 7798 - }, - { - "epoch": 3.463912946924273, - "grad_norm": 0.5206144817249008, - "learning_rate": 5.358071613898064e-07, - "loss": 0.0272, - "step": 7799 - }, - { - "epoch": 3.46435709526982, - "grad_norm": 0.2890681249275682, - "learning_rate": 5.349344248919175e-07, - "loss": 0.0199, - "step": 7800 - }, - { - "epoch": 3.4648012436153675, - "grad_norm": 0.4535682999458182, - "learning_rate": 5.340623595692313e-07, - "loss": 0.0312, - "step": 7801 - }, - { - "epoch": 3.4652453919609147, - "grad_norm": 0.4712735624226538, - "learning_rate": 5.331909655528361e-07, - "loss": 0.0294, - "step": 7802 - }, - { - "epoch": 3.4656895403064625, - "grad_norm": 0.39969281186884054, - "learning_rate": 5.323202429737179e-07, - "loss": 0.0214, - "step": 7803 - }, - { - "epoch": 3.4661336886520098, - "grad_norm": 0.40892930316739623, - "learning_rate": 5.31450191962759e-07, - "loss": 0.0273, - "step": 7804 - }, - { - "epoch": 3.466577836997557, - "grad_norm": 0.40420963258096243, - "learning_rate": 5.305808126507433e-07, - "loss": 0.0257, - "step": 7805 - }, - { - "epoch": 3.467021985343105, - "grad_norm": 0.4298719054004784, - "learning_rate": 5.297121051683546e-07, - "loss": 0.0202, - "step": 7806 - }, - { - "epoch": 3.467466133688652, - "grad_norm": 0.4156335033725289, - "learning_rate": 5.288440696461716e-07, - "loss": 0.0206, - "step": 7807 - }, - { - "epoch": 3.4679102820341994, - "grad_norm": 0.6442580410348813, - "learning_rate": 5.279767062146784e-07, - "loss": 0.0335, - "step": 7808 - }, - { - "epoch": 3.4683544303797467, - "grad_norm": 0.42219725211126974, - "learning_rate": 5.271100150042518e-07, - "loss": 0.0263, - "step": 7809 - }, - { - "epoch": 3.4687985787252944, - "grad_norm": 0.39548539897151663, - "learning_rate": 5.262439961451709e-07, - "loss": 0.0249, - "step": 7810 - }, - { - "epoch": 3.4692427270708417, - "grad_norm": 0.3080627076539832, - "learning_rate": 5.253786497676134e-07, - "loss": 0.0189, - "step": 7811 - }, - { - "epoch": 3.469686875416389, - "grad_norm": 0.435674321335036, - "learning_rate": 5.245139760016549e-07, - "loss": 0.026, - "step": 7812 - }, - { - "epoch": 3.4701310237619367, - "grad_norm": 0.3308167388953746, - "learning_rate": 5.236499749772716e-07, - "loss": 0.0192, - "step": 7813 - }, - { - "epoch": 3.470575172107484, - "grad_norm": 0.411730156267874, - "learning_rate": 5.227866468243376e-07, - "loss": 0.0278, - "step": 7814 - }, - { - "epoch": 3.4710193204530313, - "grad_norm": 0.4770932092793088, - "learning_rate": 5.219239916726243e-07, - "loss": 0.0356, - "step": 7815 - }, - { - "epoch": 3.4714634687985786, - "grad_norm": 0.5082942621753359, - "learning_rate": 5.210620096518044e-07, - "loss": 0.0335, - "step": 7816 - }, - { - "epoch": 3.4719076171441263, - "grad_norm": 0.5849666623331924, - "learning_rate": 5.202007008914489e-07, - "loss": 0.0298, - "step": 7817 - }, - { - "epoch": 3.4723517654896736, - "grad_norm": 0.5043080758971297, - "learning_rate": 5.193400655210251e-07, - "loss": 0.0257, - "step": 7818 - }, - { - "epoch": 3.472795913835221, - "grad_norm": 0.35362037908035154, - "learning_rate": 5.184801036699033e-07, - "loss": 0.0195, - "step": 7819 - }, - { - "epoch": 3.473240062180768, - "grad_norm": 0.3793466003982311, - "learning_rate": 5.176208154673502e-07, - "loss": 0.0198, - "step": 7820 - }, - { - "epoch": 3.473684210526316, - "grad_norm": 0.3602963289642836, - "learning_rate": 5.167622010425305e-07, - "loss": 0.0241, - "step": 7821 - }, - { - "epoch": 3.474128358871863, - "grad_norm": 0.4045415464931093, - "learning_rate": 5.159042605245085e-07, - "loss": 0.0308, - "step": 7822 - }, - { - "epoch": 3.4745725072174105, - "grad_norm": 0.3631349056889571, - "learning_rate": 5.150469940422487e-07, - "loss": 0.0199, - "step": 7823 - }, - { - "epoch": 3.475016655562958, - "grad_norm": 0.47233718849299045, - "learning_rate": 5.141904017246097e-07, - "loss": 0.0192, - "step": 7824 - }, - { - "epoch": 3.4754608039085055, - "grad_norm": 0.4948999022011918, - "learning_rate": 5.133344837003557e-07, - "loss": 0.0354, - "step": 7825 - }, - { - "epoch": 3.475904952254053, - "grad_norm": 0.37036897500750726, - "learning_rate": 5.124792400981432e-07, - "loss": 0.0236, - "step": 7826 - }, - { - "epoch": 3.4763491005996, - "grad_norm": 0.33159665048430614, - "learning_rate": 5.116246710465306e-07, - "loss": 0.0243, - "step": 7827 - }, - { - "epoch": 3.476793248945148, - "grad_norm": 0.4934189727153532, - "learning_rate": 5.10770776673975e-07, - "loss": 0.0221, - "step": 7828 - }, - { - "epoch": 3.477237397290695, - "grad_norm": 0.40096875281363703, - "learning_rate": 5.099175571088283e-07, - "loss": 0.0224, - "step": 7829 - }, - { - "epoch": 3.4776815456362424, - "grad_norm": 0.5694968604223205, - "learning_rate": 5.090650124793472e-07, - "loss": 0.0344, - "step": 7830 - }, - { - "epoch": 3.4781256939817897, - "grad_norm": 0.347016380477264, - "learning_rate": 5.082131429136833e-07, - "loss": 0.0246, - "step": 7831 - }, - { - "epoch": 3.4785698423273375, - "grad_norm": 0.3939110962708003, - "learning_rate": 5.073619485398845e-07, - "loss": 0.0216, - "step": 7832 - }, - { - "epoch": 3.4790139906728847, - "grad_norm": 0.5312038258908143, - "learning_rate": 5.065114294859019e-07, - "loss": 0.026, - "step": 7833 - }, - { - "epoch": 3.479458139018432, - "grad_norm": 0.3820612120014884, - "learning_rate": 5.056615858795838e-07, - "loss": 0.0231, - "step": 7834 - }, - { - "epoch": 3.4799022873639798, - "grad_norm": 0.5500948375085558, - "learning_rate": 5.048124178486724e-07, - "loss": 0.0371, - "step": 7835 - }, - { - "epoch": 3.480346435709527, - "grad_norm": 0.4385367139000315, - "learning_rate": 5.039639255208156e-07, - "loss": 0.032, - "step": 7836 - }, - { - "epoch": 3.4807905840550744, - "grad_norm": 0.3825085947680331, - "learning_rate": 5.031161090235559e-07, - "loss": 0.0214, - "step": 7837 - }, - { - "epoch": 3.4812347324006216, - "grad_norm": 0.42377290578412, - "learning_rate": 5.022689684843329e-07, - "loss": 0.0253, - "step": 7838 - }, - { - "epoch": 3.4816788807461694, - "grad_norm": 0.3200977768252115, - "learning_rate": 5.014225040304871e-07, - "loss": 0.0172, - "step": 7839 - }, - { - "epoch": 3.4821230290917167, - "grad_norm": 0.4075927728120571, - "learning_rate": 5.005767157892572e-07, - "loss": 0.0234, - "step": 7840 - }, - { - "epoch": 3.482567177437264, - "grad_norm": 0.494712633880696, - "learning_rate": 4.99731603887777e-07, - "loss": 0.0308, - "step": 7841 - }, - { - "epoch": 3.4830113257828117, - "grad_norm": 0.3951757300545286, - "learning_rate": 4.98887168453085e-07, - "loss": 0.0249, - "step": 7842 - }, - { - "epoch": 3.483455474128359, - "grad_norm": 0.3887321324351885, - "learning_rate": 4.980434096121106e-07, - "loss": 0.0299, - "step": 7843 - }, - { - "epoch": 3.4838996224739063, - "grad_norm": 0.403812873928378, - "learning_rate": 4.97200327491687e-07, - "loss": 0.0219, - "step": 7844 - }, - { - "epoch": 3.4843437708194536, - "grad_norm": 0.4454713027127471, - "learning_rate": 4.963579222185444e-07, - "loss": 0.0247, - "step": 7845 - }, - { - "epoch": 3.4847879191650013, - "grad_norm": 0.41374791040066133, - "learning_rate": 4.955161939193087e-07, - "loss": 0.0213, - "step": 7846 - }, - { - "epoch": 3.4852320675105486, - "grad_norm": 0.460381233413127, - "learning_rate": 4.946751427205054e-07, - "loss": 0.0359, - "step": 7847 - }, - { - "epoch": 3.485676215856096, - "grad_norm": 0.5274863598727876, - "learning_rate": 4.938347687485629e-07, - "loss": 0.0249, - "step": 7848 - }, - { - "epoch": 3.486120364201643, - "grad_norm": 0.3376353042657825, - "learning_rate": 4.929950721297993e-07, - "loss": 0.0197, - "step": 7849 - }, - { - "epoch": 3.486564512547191, - "grad_norm": 0.4896159029658331, - "learning_rate": 4.921560529904374e-07, - "loss": 0.0291, - "step": 7850 - }, - { - "epoch": 3.487008660892738, - "grad_norm": 0.36649328899036365, - "learning_rate": 4.913177114565964e-07, - "loss": 0.0198, - "step": 7851 - }, - { - "epoch": 3.4874528092382855, - "grad_norm": 0.46757175677374957, - "learning_rate": 4.90480047654291e-07, - "loss": 0.0359, - "step": 7852 - }, - { - "epoch": 3.487896957583833, - "grad_norm": 0.4027846982490894, - "learning_rate": 4.896430617094389e-07, - "loss": 0.0284, - "step": 7853 - }, - { - "epoch": 3.4883411059293805, - "grad_norm": 0.41506130883345627, - "learning_rate": 4.888067537478519e-07, - "loss": 0.0253, - "step": 7854 - }, - { - "epoch": 3.488785254274928, - "grad_norm": 0.4526519663181836, - "learning_rate": 4.879711238952412e-07, - "loss": 0.0327, - "step": 7855 - }, - { - "epoch": 3.489229402620475, - "grad_norm": 0.3779652676361818, - "learning_rate": 4.871361722772166e-07, - "loss": 0.0336, - "step": 7856 - }, - { - "epoch": 3.489673550966023, - "grad_norm": 0.6066583572691198, - "learning_rate": 4.86301899019287e-07, - "loss": 0.0277, - "step": 7857 - }, - { - "epoch": 3.49011769931157, - "grad_norm": 0.516592567980747, - "learning_rate": 4.854683042468538e-07, - "loss": 0.0345, - "step": 7858 - }, - { - "epoch": 3.4905618476571174, - "grad_norm": 0.3300726184397244, - "learning_rate": 4.84635388085225e-07, - "loss": 0.0208, - "step": 7859 - }, - { - "epoch": 3.4910059960026647, - "grad_norm": 0.4075507938842562, - "learning_rate": 4.838031506595992e-07, - "loss": 0.0253, - "step": 7860 - }, - { - "epoch": 3.4914501443482124, - "grad_norm": 0.41601787060330486, - "learning_rate": 4.829715920950761e-07, - "loss": 0.0237, - "step": 7861 - }, - { - "epoch": 3.4918942926937597, - "grad_norm": 0.5863384888723406, - "learning_rate": 4.821407125166549e-07, - "loss": 0.0264, - "step": 7862 - }, - { - "epoch": 3.492338441039307, - "grad_norm": 0.47433808650477144, - "learning_rate": 4.81310512049229e-07, - "loss": 0.0285, - "step": 7863 - }, - { - "epoch": 3.4927825893848548, - "grad_norm": 0.4906763276486267, - "learning_rate": 4.804809908175911e-07, - "loss": 0.0251, - "step": 7864 - }, - { - "epoch": 3.493226737730402, - "grad_norm": 0.46826469445803337, - "learning_rate": 4.796521489464351e-07, - "loss": 0.0269, - "step": 7865 - }, - { - "epoch": 3.4936708860759493, - "grad_norm": 0.3869063106968133, - "learning_rate": 4.788239865603478e-07, - "loss": 0.0304, - "step": 7866 - }, - { - "epoch": 3.4941150344214966, - "grad_norm": 0.4155671863362175, - "learning_rate": 4.779965037838164e-07, - "loss": 0.0222, - "step": 7867 - }, - { - "epoch": 3.4945591827670444, - "grad_norm": 0.40399166433294564, - "learning_rate": 4.771697007412268e-07, - "loss": 0.0296, - "step": 7868 - }, - { - "epoch": 3.4950033311125916, - "grad_norm": 0.4132725467300106, - "learning_rate": 4.763435775568592e-07, - "loss": 0.0263, - "step": 7869 - }, - { - "epoch": 3.495447479458139, - "grad_norm": 0.40246185290766967, - "learning_rate": 4.7551813435489703e-07, - "loss": 0.0256, - "step": 7870 - }, - { - "epoch": 3.4958916278036867, - "grad_norm": 0.4363439376254432, - "learning_rate": 4.746933712594154e-07, - "loss": 0.0303, - "step": 7871 - }, - { - "epoch": 3.496335776149234, - "grad_norm": 0.346578595904209, - "learning_rate": 4.7386928839439183e-07, - "loss": 0.0216, - "step": 7872 - }, - { - "epoch": 3.4967799244947813, - "grad_norm": 0.40218292971261804, - "learning_rate": 4.7304588588370113e-07, - "loss": 0.022, - "step": 7873 - }, - { - "epoch": 3.4972240728403285, - "grad_norm": 0.3613752874610863, - "learning_rate": 4.722231638511121e-07, - "loss": 0.0188, - "step": 7874 - }, - { - "epoch": 3.497668221185876, - "grad_norm": 0.4305489342659995, - "learning_rate": 4.7140112242029356e-07, - "loss": 0.0244, - "step": 7875 - }, - { - "epoch": 3.4981123695314236, - "grad_norm": 0.3319846218251371, - "learning_rate": 4.7057976171481614e-07, - "loss": 0.0159, - "step": 7876 - }, - { - "epoch": 3.498556517876971, - "grad_norm": 0.3685184737755979, - "learning_rate": 4.69759081858141e-07, - "loss": 0.0248, - "step": 7877 - }, - { - "epoch": 3.499000666222518, - "grad_norm": 0.5404043718315673, - "learning_rate": 4.689390829736312e-07, - "loss": 0.031, - "step": 7878 - }, - { - "epoch": 3.499444814568066, - "grad_norm": 0.3833802780855093, - "learning_rate": 4.681197651845476e-07, - "loss": 0.0156, - "step": 7879 - }, - { - "epoch": 3.499888962913613, - "grad_norm": 0.387890807698906, - "learning_rate": 4.6730112861404497e-07, - "loss": 0.028, - "step": 7880 - }, - { - "epoch": 3.5003331112591605, - "grad_norm": 0.5278897289398781, - "learning_rate": 4.6648317338518045e-07, - "loss": 0.0305, - "step": 7881 - }, - { - "epoch": 3.5007772596047078, - "grad_norm": 0.429242176272356, - "learning_rate": 4.656658996209057e-07, - "loss": 0.0299, - "step": 7882 - }, - { - "epoch": 3.5012214079502555, - "grad_norm": 0.4369670295816076, - "learning_rate": 4.6484930744407074e-07, - "loss": 0.0327, - "step": 7883 - }, - { - "epoch": 3.501665556295803, - "grad_norm": 0.4901187441061219, - "learning_rate": 4.6403339697742413e-07, - "loss": 0.0228, - "step": 7884 - }, - { - "epoch": 3.50210970464135, - "grad_norm": 0.3420486455934448, - "learning_rate": 4.63218168343611e-07, - "loss": 0.0216, - "step": 7885 - }, - { - "epoch": 3.502553852986898, - "grad_norm": 0.40939983156980164, - "learning_rate": 4.624036216651723e-07, - "loss": 0.0263, - "step": 7886 - }, - { - "epoch": 3.502998001332445, - "grad_norm": 0.3910656718501315, - "learning_rate": 4.615897570645511e-07, - "loss": 0.0183, - "step": 7887 - }, - { - "epoch": 3.5034421496779924, - "grad_norm": 0.4314655959506559, - "learning_rate": 4.6077657466408245e-07, - "loss": 0.0312, - "step": 7888 - }, - { - "epoch": 3.5038862980235397, - "grad_norm": 0.3568409941841216, - "learning_rate": 4.599640745860029e-07, - "loss": 0.0271, - "step": 7889 - }, - { - "epoch": 3.5043304463690874, - "grad_norm": 0.4093953752586413, - "learning_rate": 4.5915225695244536e-07, - "loss": 0.0241, - "step": 7890 - }, - { - "epoch": 3.5047745947146347, - "grad_norm": 0.3977398657560647, - "learning_rate": 4.583411218854383e-07, - "loss": 0.0237, - "step": 7891 - }, - { - "epoch": 3.505218743060182, - "grad_norm": 0.4275747909847334, - "learning_rate": 4.575306695069087e-07, - "loss": 0.0301, - "step": 7892 - }, - { - "epoch": 3.5056628914057297, - "grad_norm": 0.5388719609229746, - "learning_rate": 4.567208999386852e-07, - "loss": 0.035, - "step": 7893 - }, - { - "epoch": 3.506107039751277, - "grad_norm": 0.36096697143744105, - "learning_rate": 4.5591181330248534e-07, - "loss": 0.0207, - "step": 7894 - }, - { - "epoch": 3.5065511880968243, - "grad_norm": 0.4142429440699439, - "learning_rate": 4.5510340971993086e-07, - "loss": 0.0186, - "step": 7895 - }, - { - "epoch": 3.5069953364423716, - "grad_norm": 0.4393049642057595, - "learning_rate": 4.542956893125394e-07, - "loss": 0.025, - "step": 7896 - }, - { - "epoch": 3.507439484787919, - "grad_norm": 0.6384180216184727, - "learning_rate": 4.534886522017229e-07, - "loss": 0.0266, - "step": 7897 - }, - { - "epoch": 3.5078836331334666, - "grad_norm": 0.43433435288410693, - "learning_rate": 4.526822985087931e-07, - "loss": 0.031, - "step": 7898 - }, - { - "epoch": 3.508327781479014, - "grad_norm": 0.4028630240642223, - "learning_rate": 4.5187662835495974e-07, - "loss": 0.0225, - "step": 7899 - }, - { - "epoch": 3.5087719298245617, - "grad_norm": 0.3939689206368112, - "learning_rate": 4.510716418613281e-07, - "loss": 0.0224, - "step": 7900 - }, - { - "epoch": 3.509216078170109, - "grad_norm": 0.515126016444997, - "learning_rate": 4.502673391489026e-07, - "loss": 0.029, - "step": 7901 - }, - { - "epoch": 3.5096602265156562, - "grad_norm": 0.4945699098085951, - "learning_rate": 4.4946372033858157e-07, - "loss": 0.028, - "step": 7902 - }, - { - "epoch": 3.5101043748612035, - "grad_norm": 0.43373798757249604, - "learning_rate": 4.486607855511627e-07, - "loss": 0.0272, - "step": 7903 - }, - { - "epoch": 3.510548523206751, - "grad_norm": 0.40314045697996204, - "learning_rate": 4.4785853490734277e-07, - "loss": 0.027, - "step": 7904 - }, - { - "epoch": 3.5109926715522985, - "grad_norm": 0.4288828381204982, - "learning_rate": 4.470569685277115e-07, - "loss": 0.021, - "step": 7905 - }, - { - "epoch": 3.511436819897846, - "grad_norm": 0.47380514557902603, - "learning_rate": 4.462560865327592e-07, - "loss": 0.0352, - "step": 7906 - }, - { - "epoch": 3.511880968243393, - "grad_norm": 0.6718664840187395, - "learning_rate": 4.454558890428728e-07, - "loss": 0.037, - "step": 7907 - }, - { - "epoch": 3.512325116588941, - "grad_norm": 0.42187384792878063, - "learning_rate": 4.446563761783329e-07, - "loss": 0.0265, - "step": 7908 - }, - { - "epoch": 3.512769264934488, - "grad_norm": 0.4115565734686187, - "learning_rate": 4.43857548059321e-07, - "loss": 0.0292, - "step": 7909 - }, - { - "epoch": 3.5132134132800354, - "grad_norm": 0.5047831437385433, - "learning_rate": 4.430594048059167e-07, - "loss": 0.0283, - "step": 7910 - }, - { - "epoch": 3.5136575616255827, - "grad_norm": 0.4497401686326858, - "learning_rate": 4.422619465380917e-07, - "loss": 0.0257, - "step": 7911 - }, - { - "epoch": 3.5141017099711305, - "grad_norm": 0.41364265915281817, - "learning_rate": 4.4146517337571857e-07, - "loss": 0.0219, - "step": 7912 - }, - { - "epoch": 3.5145458583166778, - "grad_norm": 0.5685124202462428, - "learning_rate": 4.4066908543856704e-07, - "loss": 0.0421, - "step": 7913 - }, - { - "epoch": 3.514990006662225, - "grad_norm": 0.31669420534645987, - "learning_rate": 4.3987368284630015e-07, - "loss": 0.0161, - "step": 7914 - }, - { - "epoch": 3.515434155007773, - "grad_norm": 0.3911190961806719, - "learning_rate": 4.3907896571848187e-07, - "loss": 0.0295, - "step": 7915 - }, - { - "epoch": 3.51587830335332, - "grad_norm": 0.49151230231883697, - "learning_rate": 4.382849341745715e-07, - "loss": 0.0323, - "step": 7916 - }, - { - "epoch": 3.5163224516988674, - "grad_norm": 0.3734463542020498, - "learning_rate": 4.3749158833392535e-07, - "loss": 0.0233, - "step": 7917 - }, - { - "epoch": 3.5167666000444147, - "grad_norm": 0.4234162479441925, - "learning_rate": 4.366989283157985e-07, - "loss": 0.0221, - "step": 7918 - }, - { - "epoch": 3.5172107483899624, - "grad_norm": 0.36581058726451665, - "learning_rate": 4.3590695423933795e-07, - "loss": 0.0232, - "step": 7919 - }, - { - "epoch": 3.5176548967355097, - "grad_norm": 0.41877616966330095, - "learning_rate": 4.3511566622359224e-07, - "loss": 0.032, - "step": 7920 - }, - { - "epoch": 3.518099045081057, - "grad_norm": 0.381478457217342, - "learning_rate": 4.3432506438750745e-07, - "loss": 0.0198, - "step": 7921 - }, - { - "epoch": 3.5185431934266047, - "grad_norm": 0.5312067893718723, - "learning_rate": 4.335351488499218e-07, - "loss": 0.0341, - "step": 7922 - }, - { - "epoch": 3.518987341772152, - "grad_norm": 0.32613641832900486, - "learning_rate": 4.327459197295736e-07, - "loss": 0.0153, - "step": 7923 - }, - { - "epoch": 3.5194314901176993, - "grad_norm": 0.4186436124064849, - "learning_rate": 4.319573771450991e-07, - "loss": 0.0239, - "step": 7924 - }, - { - "epoch": 3.5198756384632466, - "grad_norm": 0.5213492953580083, - "learning_rate": 4.3116952121502686e-07, - "loss": 0.0245, - "step": 7925 - }, - { - "epoch": 3.520319786808794, - "grad_norm": 0.4470067109711501, - "learning_rate": 4.303823520577871e-07, - "loss": 0.029, - "step": 7926 - }, - { - "epoch": 3.5207639351543416, - "grad_norm": 0.44926277605141984, - "learning_rate": 4.295958697917035e-07, - "loss": 0.0251, - "step": 7927 - }, - { - "epoch": 3.521208083499889, - "grad_norm": 0.4428984780991236, - "learning_rate": 4.288100745349988e-07, - "loss": 0.0283, - "step": 7928 - }, - { - "epoch": 3.5216522318454366, - "grad_norm": 0.4649455292363394, - "learning_rate": 4.2802496640579115e-07, - "loss": 0.0261, - "step": 7929 - }, - { - "epoch": 3.522096380190984, - "grad_norm": 0.4546600325025468, - "learning_rate": 4.2724054552209515e-07, - "loss": 0.0272, - "step": 7930 - }, - { - "epoch": 3.522540528536531, - "grad_norm": 0.44460414877352883, - "learning_rate": 4.2645681200182197e-07, - "loss": 0.02, - "step": 7931 - }, - { - "epoch": 3.5229846768820785, - "grad_norm": 0.3712603269325949, - "learning_rate": 4.256737659627813e-07, - "loss": 0.0252, - "step": 7932 - }, - { - "epoch": 3.523428825227626, - "grad_norm": 0.4419446907695471, - "learning_rate": 4.248914075226779e-07, - "loss": 0.0296, - "step": 7933 - }, - { - "epoch": 3.5238729735731735, - "grad_norm": 0.4371622849898781, - "learning_rate": 4.2410973679911317e-07, - "loss": 0.0193, - "step": 7934 - }, - { - "epoch": 3.524317121918721, - "grad_norm": 0.42263243970502656, - "learning_rate": 4.2332875390958707e-07, - "loss": 0.0284, - "step": 7935 - }, - { - "epoch": 3.524761270264268, - "grad_norm": 0.4360454559382049, - "learning_rate": 4.225484589714918e-07, - "loss": 0.0267, - "step": 7936 - }, - { - "epoch": 3.525205418609816, - "grad_norm": 0.4221730269522906, - "learning_rate": 4.2176885210212127e-07, - "loss": 0.0283, - "step": 7937 - }, - { - "epoch": 3.525649566955363, - "grad_norm": 0.34554558425959747, - "learning_rate": 4.209899334186623e-07, - "loss": 0.015, - "step": 7938 - }, - { - "epoch": 3.5260937153009104, - "grad_norm": 0.3944057611136553, - "learning_rate": 4.2021170303820025e-07, - "loss": 0.0247, - "step": 7939 - }, - { - "epoch": 3.5265378636464577, - "grad_norm": 0.3950267732155638, - "learning_rate": 4.1943416107771585e-07, - "loss": 0.0237, - "step": 7940 - }, - { - "epoch": 3.5269820119920055, - "grad_norm": 0.5276030208132706, - "learning_rate": 4.186573076540884e-07, - "loss": 0.0293, - "step": 7941 - }, - { - "epoch": 3.5274261603375527, - "grad_norm": 0.42130456420073215, - "learning_rate": 4.178811428840901e-07, - "loss": 0.0221, - "step": 7942 - }, - { - "epoch": 3.5278703086831, - "grad_norm": 0.4967582028112826, - "learning_rate": 4.1710566688439314e-07, - "loss": 0.0264, - "step": 7943 - }, - { - "epoch": 3.5283144570286478, - "grad_norm": 0.38558337467937864, - "learning_rate": 4.163308797715637e-07, - "loss": 0.0276, - "step": 7944 - }, - { - "epoch": 3.528758605374195, - "grad_norm": 0.5044853945425457, - "learning_rate": 4.155567816620659e-07, - "loss": 0.0204, - "step": 7945 - }, - { - "epoch": 3.5292027537197423, - "grad_norm": 0.8869805733234495, - "learning_rate": 4.147833726722611e-07, - "loss": 0.0291, - "step": 7946 - }, - { - "epoch": 3.5296469020652896, - "grad_norm": 0.3536249761317266, - "learning_rate": 4.140106529184035e-07, - "loss": 0.0235, - "step": 7947 - }, - { - "epoch": 3.5300910504108374, - "grad_norm": 0.4154592460581955, - "learning_rate": 4.1323862251664684e-07, - "loss": 0.0233, - "step": 7948 - }, - { - "epoch": 3.5305351987563847, - "grad_norm": 0.3504099993535625, - "learning_rate": 4.1246728158304107e-07, - "loss": 0.0187, - "step": 7949 - }, - { - "epoch": 3.530979347101932, - "grad_norm": 0.4437614260756121, - "learning_rate": 4.1169663023353124e-07, - "loss": 0.0262, - "step": 7950 - }, - { - "epoch": 3.5314234954474797, - "grad_norm": 0.3600199231026137, - "learning_rate": 4.109266685839597e-07, - "loss": 0.02, - "step": 7951 - }, - { - "epoch": 3.531867643793027, - "grad_norm": 0.3877433229131516, - "learning_rate": 4.101573967500655e-07, - "loss": 0.0236, - "step": 7952 - }, - { - "epoch": 3.5323117921385743, - "grad_norm": 0.39474586031469927, - "learning_rate": 4.0938881484748116e-07, - "loss": 0.0261, - "step": 7953 - }, - { - "epoch": 3.5327559404841216, - "grad_norm": 0.4148159595077121, - "learning_rate": 4.086209229917387e-07, - "loss": 0.0255, - "step": 7954 - }, - { - "epoch": 3.533200088829669, - "grad_norm": 0.47070012710839254, - "learning_rate": 4.0785372129826586e-07, - "loss": 0.0318, - "step": 7955 - }, - { - "epoch": 3.5336442371752166, - "grad_norm": 0.3680844624737353, - "learning_rate": 4.0708720988238584e-07, - "loss": 0.0166, - "step": 7956 - }, - { - "epoch": 3.534088385520764, - "grad_norm": 0.3875786535924789, - "learning_rate": 4.063213888593176e-07, - "loss": 0.0256, - "step": 7957 - }, - { - "epoch": 3.5345325338663116, - "grad_norm": 0.39292909474931037, - "learning_rate": 4.0555625834417857e-07, - "loss": 0.0244, - "step": 7958 - }, - { - "epoch": 3.534976682211859, - "grad_norm": 0.4238019487678821, - "learning_rate": 4.047918184519789e-07, - "loss": 0.0265, - "step": 7959 - }, - { - "epoch": 3.535420830557406, - "grad_norm": 0.3868503914395472, - "learning_rate": 4.040280692976278e-07, - "loss": 0.025, - "step": 7960 - }, - { - "epoch": 3.5358649789029535, - "grad_norm": 0.5061016125162293, - "learning_rate": 4.032650109959302e-07, - "loss": 0.0305, - "step": 7961 - }, - { - "epoch": 3.5363091272485008, - "grad_norm": 0.44915195593836177, - "learning_rate": 4.0250264366158643e-07, - "loss": 0.0227, - "step": 7962 - }, - { - "epoch": 3.5367532755940485, - "grad_norm": 0.3701090299363028, - "learning_rate": 4.017409674091932e-07, - "loss": 0.0275, - "step": 7963 - }, - { - "epoch": 3.537197423939596, - "grad_norm": 0.33071866294982566, - "learning_rate": 4.009799823532434e-07, - "loss": 0.0166, - "step": 7964 - }, - { - "epoch": 3.537641572285143, - "grad_norm": 0.3869700046276894, - "learning_rate": 4.0021968860812556e-07, - "loss": 0.025, - "step": 7965 - }, - { - "epoch": 3.538085720630691, - "grad_norm": 0.34946266928394254, - "learning_rate": 3.994600862881248e-07, - "loss": 0.0205, - "step": 7966 - }, - { - "epoch": 3.538529868976238, - "grad_norm": 0.391173218273872, - "learning_rate": 3.9870117550742273e-07, - "loss": 0.0263, - "step": 7967 - }, - { - "epoch": 3.5389740173217854, - "grad_norm": 0.3665430028174577, - "learning_rate": 3.9794295638009683e-07, - "loss": 0.022, - "step": 7968 - }, - { - "epoch": 3.5394181656673327, - "grad_norm": 0.3931083199354571, - "learning_rate": 3.971854290201205e-07, - "loss": 0.0233, - "step": 7969 - }, - { - "epoch": 3.5398623140128804, - "grad_norm": 0.4886210610765913, - "learning_rate": 3.964285935413609e-07, - "loss": 0.0213, - "step": 7970 - }, - { - "epoch": 3.5403064623584277, - "grad_norm": 0.3576029073891739, - "learning_rate": 3.9567245005758537e-07, - "loss": 0.023, - "step": 7971 - }, - { - "epoch": 3.540750610703975, - "grad_norm": 0.4994177352645637, - "learning_rate": 3.9491699868245414e-07, - "loss": 0.0419, - "step": 7972 - }, - { - "epoch": 3.5411947590495227, - "grad_norm": 0.43622296138249816, - "learning_rate": 3.941622395295247e-07, - "loss": 0.0318, - "step": 7973 - }, - { - "epoch": 3.54163890739507, - "grad_norm": 0.4309663417901952, - "learning_rate": 3.934081727122513e-07, - "loss": 0.0342, - "step": 7974 - }, - { - "epoch": 3.5420830557406173, - "grad_norm": 0.4349122435505889, - "learning_rate": 3.9265479834398103e-07, - "loss": 0.0286, - "step": 7975 - }, - { - "epoch": 3.5425272040861646, - "grad_norm": 0.4420907520326438, - "learning_rate": 3.919021165379594e-07, - "loss": 0.0192, - "step": 7976 - }, - { - "epoch": 3.5429713524317124, - "grad_norm": 0.40134602793452784, - "learning_rate": 3.911501274073276e-07, - "loss": 0.0222, - "step": 7977 - }, - { - "epoch": 3.5434155007772596, - "grad_norm": 0.4162128270891882, - "learning_rate": 3.9039883106512243e-07, - "loss": 0.0263, - "step": 7978 - }, - { - "epoch": 3.543859649122807, - "grad_norm": 0.43122424865043063, - "learning_rate": 3.8964822762427633e-07, - "loss": 0.0251, - "step": 7979 - }, - { - "epoch": 3.5443037974683547, - "grad_norm": 0.351905898436228, - "learning_rate": 3.888983171976185e-07, - "loss": 0.0225, - "step": 7980 - }, - { - "epoch": 3.544747945813902, - "grad_norm": 0.37615773316072754, - "learning_rate": 3.8814909989787155e-07, - "loss": 0.02, - "step": 7981 - }, - { - "epoch": 3.5451920941594492, - "grad_norm": 0.42395585895532933, - "learning_rate": 3.87400575837657e-07, - "loss": 0.0202, - "step": 7982 - }, - { - "epoch": 3.5456362425049965, - "grad_norm": 0.3990970301083849, - "learning_rate": 3.8665274512948994e-07, - "loss": 0.0201, - "step": 7983 - }, - { - "epoch": 3.546080390850544, - "grad_norm": 0.5764101771289384, - "learning_rate": 3.859056078857826e-07, - "loss": 0.0381, - "step": 7984 - }, - { - "epoch": 3.5465245391960916, - "grad_norm": 0.3820488932414435, - "learning_rate": 3.851591642188418e-07, - "loss": 0.0242, - "step": 7985 - }, - { - "epoch": 3.546968687541639, - "grad_norm": 0.3459706819898099, - "learning_rate": 3.8441341424087233e-07, - "loss": 0.0238, - "step": 7986 - }, - { - "epoch": 3.5474128358871866, - "grad_norm": 0.4227398045664523, - "learning_rate": 3.836683580639705e-07, - "loss": 0.0244, - "step": 7987 - }, - { - "epoch": 3.547856984232734, - "grad_norm": 0.4309060643690936, - "learning_rate": 3.829239958001324e-07, - "loss": 0.0194, - "step": 7988 - }, - { - "epoch": 3.548301132578281, - "grad_norm": 0.42116836328880336, - "learning_rate": 3.8218032756124844e-07, - "loss": 0.0204, - "step": 7989 - }, - { - "epoch": 3.5487452809238285, - "grad_norm": 0.37821657931686165, - "learning_rate": 3.814373534591037e-07, - "loss": 0.0194, - "step": 7990 - }, - { - "epoch": 3.5491894292693758, - "grad_norm": 0.42284081107970184, - "learning_rate": 3.8069507360538163e-07, - "loss": 0.0221, - "step": 7991 - }, - { - "epoch": 3.5496335776149235, - "grad_norm": 0.4662610237314549, - "learning_rate": 3.799534881116573e-07, - "loss": 0.0276, - "step": 7992 - }, - { - "epoch": 3.5500777259604708, - "grad_norm": 0.45003555474688933, - "learning_rate": 3.7921259708940503e-07, - "loss": 0.0274, - "step": 7993 - }, - { - "epoch": 3.550521874306018, - "grad_norm": 0.5571173920439147, - "learning_rate": 3.7847240064999233e-07, - "loss": 0.0294, - "step": 7994 - }, - { - "epoch": 3.550966022651566, - "grad_norm": 0.4728510369440864, - "learning_rate": 3.7773289890468414e-07, - "loss": 0.026, - "step": 7995 - }, - { - "epoch": 3.551410170997113, - "grad_norm": 0.3553733406102519, - "learning_rate": 3.7699409196463977e-07, - "loss": 0.0175, - "step": 7996 - }, - { - "epoch": 3.5518543193426604, - "grad_norm": 0.4021957600650995, - "learning_rate": 3.762559799409149e-07, - "loss": 0.0268, - "step": 7997 - }, - { - "epoch": 3.5522984676882077, - "grad_norm": 0.5246882857506893, - "learning_rate": 3.7551856294445967e-07, - "loss": 0.0243, - "step": 7998 - }, - { - "epoch": 3.5527426160337554, - "grad_norm": 0.4805020243101749, - "learning_rate": 3.7478184108612036e-07, - "loss": 0.0185, - "step": 7999 - }, - { - "epoch": 3.5531867643793027, - "grad_norm": 0.49276293248029474, - "learning_rate": 3.74045814476639e-07, - "loss": 0.0322, - "step": 8000 - }, - { - "epoch": 3.55363091272485, - "grad_norm": 0.5382419991792012, - "learning_rate": 3.733104832266532e-07, - "loss": 0.0273, - "step": 8001 - }, - { - "epoch": 3.5540750610703977, - "grad_norm": 0.4236743982252115, - "learning_rate": 3.7257584744669615e-07, - "loss": 0.024, - "step": 8002 - }, - { - "epoch": 3.554519209415945, - "grad_norm": 0.3569656738216824, - "learning_rate": 3.718419072471946e-07, - "loss": 0.0206, - "step": 8003 - }, - { - "epoch": 3.5549633577614923, - "grad_norm": 0.6957355605647527, - "learning_rate": 3.7110866273847356e-07, - "loss": 0.0286, - "step": 8004 - }, - { - "epoch": 3.5554075061070396, - "grad_norm": 0.39146983502282784, - "learning_rate": 3.70376114030751e-07, - "loss": 0.0211, - "step": 8005 - }, - { - "epoch": 3.5558516544525873, - "grad_norm": 0.4183758252296795, - "learning_rate": 3.696442612341422e-07, - "loss": 0.023, - "step": 8006 - }, - { - "epoch": 3.5562958027981346, - "grad_norm": 0.4440464207564376, - "learning_rate": 3.6891310445865693e-07, - "loss": 0.0284, - "step": 8007 - }, - { - "epoch": 3.556739951143682, - "grad_norm": 0.3721283097773306, - "learning_rate": 3.681826438142011e-07, - "loss": 0.0228, - "step": 8008 - }, - { - "epoch": 3.5571840994892296, - "grad_norm": 0.4139366078303485, - "learning_rate": 3.6745287941057417e-07, - "loss": 0.0229, - "step": 8009 - }, - { - "epoch": 3.557628247834777, - "grad_norm": 0.5055064614278328, - "learning_rate": 3.6672381135747284e-07, - "loss": 0.0278, - "step": 8010 - }, - { - "epoch": 3.5580723961803242, - "grad_norm": 0.44529281390449127, - "learning_rate": 3.6599543976448884e-07, - "loss": 0.0261, - "step": 8011 - }, - { - "epoch": 3.5585165445258715, - "grad_norm": 0.4907105184886079, - "learning_rate": 3.6526776474110627e-07, - "loss": 0.0244, - "step": 8012 - }, - { - "epoch": 3.558960692871419, - "grad_norm": 0.4951645075676849, - "learning_rate": 3.645407863967104e-07, - "loss": 0.037, - "step": 8013 - }, - { - "epoch": 3.5594048412169665, - "grad_norm": 0.37863500443452736, - "learning_rate": 3.6381450484057777e-07, - "loss": 0.0252, - "step": 8014 - }, - { - "epoch": 3.559848989562514, - "grad_norm": 0.39735984529923846, - "learning_rate": 3.630889201818788e-07, - "loss": 0.0206, - "step": 8015 - }, - { - "epoch": 3.5602931379080616, - "grad_norm": 0.4346987177337589, - "learning_rate": 3.623640325296829e-07, - "loss": 0.0257, - "step": 8016 - }, - { - "epoch": 3.560737286253609, - "grad_norm": 0.33730436829577654, - "learning_rate": 3.616398419929523e-07, - "loss": 0.0189, - "step": 8017 - }, - { - "epoch": 3.561181434599156, - "grad_norm": 0.3665779162327361, - "learning_rate": 3.6091634868054557e-07, - "loss": 0.0189, - "step": 8018 - }, - { - "epoch": 3.5616255829447034, - "grad_norm": 0.3886589674388881, - "learning_rate": 3.601935527012168e-07, - "loss": 0.0225, - "step": 8019 - }, - { - "epoch": 3.5620697312902507, - "grad_norm": 0.46005381668223383, - "learning_rate": 3.594714541636124e-07, - "loss": 0.0251, - "step": 8020 - }, - { - "epoch": 3.5625138796357985, - "grad_norm": 0.437136127998239, - "learning_rate": 3.5875005317627776e-07, - "loss": 0.0336, - "step": 8021 - }, - { - "epoch": 3.5629580279813458, - "grad_norm": 0.41144565522854637, - "learning_rate": 3.580293498476517e-07, - "loss": 0.0321, - "step": 8022 - }, - { - "epoch": 3.563402176326893, - "grad_norm": 0.4402714548710384, - "learning_rate": 3.573093442860659e-07, - "loss": 0.0218, - "step": 8023 - }, - { - "epoch": 3.563846324672441, - "grad_norm": 0.46313910275690856, - "learning_rate": 3.565900365997521e-07, - "loss": 0.038, - "step": 8024 - }, - { - "epoch": 3.564290473017988, - "grad_norm": 0.4585923557925007, - "learning_rate": 3.558714268968344e-07, - "loss": 0.0242, - "step": 8025 - }, - { - "epoch": 3.5647346213635354, - "grad_norm": 0.40511430211397, - "learning_rate": 3.5515351528533024e-07, - "loss": 0.0217, - "step": 8026 - }, - { - "epoch": 3.5651787697090827, - "grad_norm": 0.4897536707816401, - "learning_rate": 3.5443630187315504e-07, - "loss": 0.0293, - "step": 8027 - }, - { - "epoch": 3.5656229180546304, - "grad_norm": 0.3947409838204125, - "learning_rate": 3.537197867681191e-07, - "loss": 0.0231, - "step": 8028 - }, - { - "epoch": 3.5660670664001777, - "grad_norm": 0.42936961733669526, - "learning_rate": 3.5300397007792364e-07, - "loss": 0.0266, - "step": 8029 - }, - { - "epoch": 3.566511214745725, - "grad_norm": 0.3453771987941547, - "learning_rate": 3.5228885191017084e-07, - "loss": 0.0181, - "step": 8030 - }, - { - "epoch": 3.5669553630912727, - "grad_norm": 0.38903220671574995, - "learning_rate": 3.515744323723558e-07, - "loss": 0.0254, - "step": 8031 - }, - { - "epoch": 3.56739951143682, - "grad_norm": 0.39781787732509344, - "learning_rate": 3.508607115718654e-07, - "loss": 0.0181, - "step": 8032 - }, - { - "epoch": 3.5678436597823673, - "grad_norm": 0.42441099532371085, - "learning_rate": 3.50147689615985e-07, - "loss": 0.0222, - "step": 8033 - }, - { - "epoch": 3.5682878081279146, - "grad_norm": 0.38842537901922425, - "learning_rate": 3.494353666118938e-07, - "loss": 0.0378, - "step": 8034 - }, - { - "epoch": 3.568731956473462, - "grad_norm": 0.35803044170961684, - "learning_rate": 3.4872374266666674e-07, - "loss": 0.0241, - "step": 8035 - }, - { - "epoch": 3.5691761048190096, - "grad_norm": 0.41680398082335124, - "learning_rate": 3.4801281788727326e-07, - "loss": 0.0226, - "step": 8036 - }, - { - "epoch": 3.569620253164557, - "grad_norm": 0.4978684696151685, - "learning_rate": 3.4730259238057563e-07, - "loss": 0.0265, - "step": 8037 - }, - { - "epoch": 3.5700644015101046, - "grad_norm": 0.526050521323082, - "learning_rate": 3.46593066253334e-07, - "loss": 0.0241, - "step": 8038 - }, - { - "epoch": 3.570508549855652, - "grad_norm": 0.4100317457053481, - "learning_rate": 3.4588423961220306e-07, - "loss": 0.0244, - "step": 8039 - }, - { - "epoch": 3.570952698201199, - "grad_norm": 0.4007810456784438, - "learning_rate": 3.4517611256372875e-07, - "loss": 0.0207, - "step": 8040 - }, - { - "epoch": 3.5713968465467465, - "grad_norm": 0.44507883967406275, - "learning_rate": 3.444686852143575e-07, - "loss": 0.0203, - "step": 8041 - }, - { - "epoch": 3.571840994892294, - "grad_norm": 0.4110302975726491, - "learning_rate": 3.4376195767042706e-07, - "loss": 0.0196, - "step": 8042 - }, - { - "epoch": 3.5722851432378415, - "grad_norm": 0.42428375307760136, - "learning_rate": 3.4305593003816917e-07, - "loss": 0.0239, - "step": 8043 - }, - { - "epoch": 3.572729291583389, - "grad_norm": 0.4068665526914318, - "learning_rate": 3.423506024237122e-07, - "loss": 0.0251, - "step": 8044 - }, - { - "epoch": 3.573173439928936, - "grad_norm": 0.5621905349071596, - "learning_rate": 3.416459749330808e-07, - "loss": 0.0436, - "step": 8045 - }, - { - "epoch": 3.573617588274484, - "grad_norm": 0.4013053666322752, - "learning_rate": 3.409420476721892e-07, - "loss": 0.0233, - "step": 8046 - }, - { - "epoch": 3.574061736620031, - "grad_norm": 0.40409509056363074, - "learning_rate": 3.4023882074685266e-07, - "loss": 0.0196, - "step": 8047 - }, - { - "epoch": 3.5745058849655784, - "grad_norm": 0.401583653285129, - "learning_rate": 3.3953629426277666e-07, - "loss": 0.0254, - "step": 8048 - }, - { - "epoch": 3.5749500333111257, - "grad_norm": 0.39247780188844755, - "learning_rate": 3.3883446832556286e-07, - "loss": 0.0226, - "step": 8049 - }, - { - "epoch": 3.5753941816566734, - "grad_norm": 0.4595707850818669, - "learning_rate": 3.381333430407074e-07, - "loss": 0.0331, - "step": 8050 - }, - { - "epoch": 3.5758383300022207, - "grad_norm": 0.38262273059202667, - "learning_rate": 3.3743291851360215e-07, - "loss": 0.032, - "step": 8051 - }, - { - "epoch": 3.576282478347768, - "grad_norm": 0.5184355648640612, - "learning_rate": 3.3673319484953224e-07, - "loss": 0.0535, - "step": 8052 - }, - { - "epoch": 3.5767266266933158, - "grad_norm": 0.4837920320850102, - "learning_rate": 3.3603417215367916e-07, - "loss": 0.028, - "step": 8053 - }, - { - "epoch": 3.577170775038863, - "grad_norm": 0.4493839583313447, - "learning_rate": 3.3533585053111604e-07, - "loss": 0.0239, - "step": 8054 - }, - { - "epoch": 3.5776149233844103, - "grad_norm": 0.45676379041549287, - "learning_rate": 3.346382300868134e-07, - "loss": 0.0183, - "step": 8055 - }, - { - "epoch": 3.5780590717299576, - "grad_norm": 0.42408641826733806, - "learning_rate": 3.339413109256362e-07, - "loss": 0.019, - "step": 8056 - }, - { - "epoch": 3.5785032200755054, - "grad_norm": 0.41032911192121174, - "learning_rate": 3.3324509315234066e-07, - "loss": 0.0243, - "step": 8057 - }, - { - "epoch": 3.5789473684210527, - "grad_norm": 0.3781126839349984, - "learning_rate": 3.325495768715831e-07, - "loss": 0.023, - "step": 8058 - }, - { - "epoch": 3.5793915167666, - "grad_norm": 0.48422608200816336, - "learning_rate": 3.318547621879109e-07, - "loss": 0.0209, - "step": 8059 - }, - { - "epoch": 3.5798356651121477, - "grad_norm": 0.5803863155007879, - "learning_rate": 3.311606492057651e-07, - "loss": 0.03, - "step": 8060 - }, - { - "epoch": 3.580279813457695, - "grad_norm": 0.4295397381772821, - "learning_rate": 3.304672380294832e-07, - "loss": 0.0276, - "step": 8061 - }, - { - "epoch": 3.5807239618032423, - "grad_norm": 0.4359738260640305, - "learning_rate": 3.2977452876329806e-07, - "loss": 0.0226, - "step": 8062 - }, - { - "epoch": 3.5811681101487896, - "grad_norm": 0.6578716400312555, - "learning_rate": 3.290825215113325e-07, - "loss": 0.0317, - "step": 8063 - }, - { - "epoch": 3.581612258494337, - "grad_norm": 0.31354942464141744, - "learning_rate": 3.2839121637761095e-07, - "loss": 0.021, - "step": 8064 - }, - { - "epoch": 3.5820564068398846, - "grad_norm": 0.3962803838449033, - "learning_rate": 3.277006134660454e-07, - "loss": 0.0279, - "step": 8065 - }, - { - "epoch": 3.582500555185432, - "grad_norm": 0.4214624266956511, - "learning_rate": 3.270107128804462e-07, - "loss": 0.0344, - "step": 8066 - }, - { - "epoch": 3.5829447035309796, - "grad_norm": 0.3602571631127595, - "learning_rate": 3.26321514724518e-07, - "loss": 0.0228, - "step": 8067 - }, - { - "epoch": 3.583388851876527, - "grad_norm": 0.37827106337184596, - "learning_rate": 3.2563301910185585e-07, - "loss": 0.0229, - "step": 8068 - }, - { - "epoch": 3.583833000222074, - "grad_norm": 0.3969145699088532, - "learning_rate": 3.249452261159558e-07, - "loss": 0.0216, - "step": 8069 - }, - { - "epoch": 3.5842771485676215, - "grad_norm": 0.3539428309291324, - "learning_rate": 3.242581358702046e-07, - "loss": 0.0219, - "step": 8070 - }, - { - "epoch": 3.5847212969131688, - "grad_norm": 0.46162564562225794, - "learning_rate": 3.235717484678813e-07, - "loss": 0.0219, - "step": 8071 - }, - { - "epoch": 3.5851654452587165, - "grad_norm": 0.395257543151881, - "learning_rate": 3.2288606401216283e-07, - "loss": 0.0283, - "step": 8072 - }, - { - "epoch": 3.585609593604264, - "grad_norm": 0.3543800684377955, - "learning_rate": 3.2220108260612e-07, - "loss": 0.022, - "step": 8073 - }, - { - "epoch": 3.586053741949811, - "grad_norm": 0.3956501467663245, - "learning_rate": 3.2151680435271504e-07, - "loss": 0.0227, - "step": 8074 - }, - { - "epoch": 3.586497890295359, - "grad_norm": 0.6908876097601867, - "learning_rate": 3.208332293548094e-07, - "loss": 0.0402, - "step": 8075 - }, - { - "epoch": 3.586942038640906, - "grad_norm": 0.41210563747436485, - "learning_rate": 3.2015035771515377e-07, - "loss": 0.0207, - "step": 8076 - }, - { - "epoch": 3.5873861869864534, - "grad_norm": 0.4210761069284605, - "learning_rate": 3.1946818953639604e-07, - "loss": 0.0199, - "step": 8077 - }, - { - "epoch": 3.5878303353320007, - "grad_norm": 0.562680504099952, - "learning_rate": 3.1878672492107796e-07, - "loss": 0.0333, - "step": 8078 - }, - { - "epoch": 3.5882744836775484, - "grad_norm": 0.45181012070367105, - "learning_rate": 3.181059639716355e-07, - "loss": 0.0292, - "step": 8079 - }, - { - "epoch": 3.5887186320230957, - "grad_norm": 0.3560150149469517, - "learning_rate": 3.1742590679039675e-07, - "loss": 0.0263, - "step": 8080 - }, - { - "epoch": 3.589162780368643, - "grad_norm": 0.2727495861254254, - "learning_rate": 3.167465534795888e-07, - "loss": 0.0142, - "step": 8081 - }, - { - "epoch": 3.5896069287141907, - "grad_norm": 0.3723555834846713, - "learning_rate": 3.1606790414132784e-07, - "loss": 0.0204, - "step": 8082 - }, - { - "epoch": 3.590051077059738, - "grad_norm": 0.3414027162319298, - "learning_rate": 3.153899588776266e-07, - "loss": 0.0229, - "step": 8083 - }, - { - "epoch": 3.5904952254052853, - "grad_norm": 0.42338168106939433, - "learning_rate": 3.147127177903936e-07, - "loss": 0.0232, - "step": 8084 - }, - { - "epoch": 3.5909393737508326, - "grad_norm": 0.41145971331036213, - "learning_rate": 3.1403618098142683e-07, - "loss": 0.0196, - "step": 8085 - }, - { - "epoch": 3.5913835220963803, - "grad_norm": 0.4400619534919001, - "learning_rate": 3.133603485524217e-07, - "loss": 0.0305, - "step": 8086 - }, - { - "epoch": 3.5918276704419276, - "grad_norm": 0.3445253112185815, - "learning_rate": 3.126852206049702e-07, - "loss": 0.0184, - "step": 8087 - }, - { - "epoch": 3.592271818787475, - "grad_norm": 0.4780681637104892, - "learning_rate": 3.1201079724055284e-07, - "loss": 0.0217, - "step": 8088 - }, - { - "epoch": 3.5927159671330227, - "grad_norm": 0.3835860710733381, - "learning_rate": 3.113370785605474e-07, - "loss": 0.0177, - "step": 8089 - }, - { - "epoch": 3.59316011547857, - "grad_norm": 0.36022249138379353, - "learning_rate": 3.106640646662268e-07, - "loss": 0.0229, - "step": 8090 - }, - { - "epoch": 3.5936042638241172, - "grad_norm": 0.3513194805512465, - "learning_rate": 3.099917556587534e-07, - "loss": 0.0183, - "step": 8091 - }, - { - "epoch": 3.5940484121696645, - "grad_norm": 0.44275629565228397, - "learning_rate": 3.0932015163918973e-07, - "loss": 0.0295, - "step": 8092 - }, - { - "epoch": 3.594492560515212, - "grad_norm": 0.4202067622758764, - "learning_rate": 3.0864925270848725e-07, - "loss": 0.022, - "step": 8093 - }, - { - "epoch": 3.5949367088607596, - "grad_norm": 0.35964770608777313, - "learning_rate": 3.079790589674947e-07, - "loss": 0.019, - "step": 8094 - }, - { - "epoch": 3.595380857206307, - "grad_norm": 0.33143293773682697, - "learning_rate": 3.073095705169532e-07, - "loss": 0.0174, - "step": 8095 - }, - { - "epoch": 3.5958250055518546, - "grad_norm": 0.35370082577225564, - "learning_rate": 3.066407874574978e-07, - "loss": 0.0245, - "step": 8096 - }, - { - "epoch": 3.596269153897402, - "grad_norm": 0.46712833979581136, - "learning_rate": 3.05972709889657e-07, - "loss": 0.0248, - "step": 8097 - }, - { - "epoch": 3.596713302242949, - "grad_norm": 0.38025100976241183, - "learning_rate": 3.0530533791385765e-07, - "loss": 0.0244, - "step": 8098 - }, - { - "epoch": 3.5971574505884965, - "grad_norm": 0.342220120184616, - "learning_rate": 3.0463867163041396e-07, - "loss": 0.0155, - "step": 8099 - }, - { - "epoch": 3.5976015989340437, - "grad_norm": 0.36933627653900136, - "learning_rate": 3.0397271113953796e-07, - "loss": 0.0209, - "step": 8100 - }, - { - "epoch": 3.5980457472795915, - "grad_norm": 0.3882041575523787, - "learning_rate": 3.0330745654133576e-07, - "loss": 0.0289, - "step": 8101 - }, - { - "epoch": 3.5984898956251388, - "grad_norm": 0.39071440323579304, - "learning_rate": 3.026429079358051e-07, - "loss": 0.0225, - "step": 8102 - }, - { - "epoch": 3.598934043970686, - "grad_norm": 0.5224321778667499, - "learning_rate": 3.0197906542283996e-07, - "loss": 0.0366, - "step": 8103 - }, - { - "epoch": 3.599378192316234, - "grad_norm": 0.463268142143758, - "learning_rate": 3.013159291022261e-07, - "loss": 0.0254, - "step": 8104 - }, - { - "epoch": 3.599822340661781, - "grad_norm": 0.4758792312691063, - "learning_rate": 3.006534990736448e-07, - "loss": 0.0177, - "step": 8105 - }, - { - "epoch": 3.6002664890073284, - "grad_norm": 0.3801891704764608, - "learning_rate": 2.99991775436671e-07, - "loss": 0.0285, - "step": 8106 - }, - { - "epoch": 3.6007106373528757, - "grad_norm": 0.47832635752425606, - "learning_rate": 2.993307582907728e-07, - "loss": 0.0393, - "step": 8107 - }, - { - "epoch": 3.6011547856984234, - "grad_norm": 0.4174887456808177, - "learning_rate": 2.9867044773531083e-07, - "loss": 0.0304, - "step": 8108 - }, - { - "epoch": 3.6015989340439707, - "grad_norm": 0.4160783686882648, - "learning_rate": 2.9801084386954337e-07, - "loss": 0.0224, - "step": 8109 - }, - { - "epoch": 3.602043082389518, - "grad_norm": 0.3728837062303793, - "learning_rate": 2.9735194679261835e-07, - "loss": 0.0212, - "step": 8110 - }, - { - "epoch": 3.6024872307350657, - "grad_norm": 0.7022594125847725, - "learning_rate": 2.966937566035799e-07, - "loss": 0.0301, - "step": 8111 - }, - { - "epoch": 3.602931379080613, - "grad_norm": 0.4052906698509884, - "learning_rate": 2.9603627340136553e-07, - "loss": 0.0293, - "step": 8112 - }, - { - "epoch": 3.6033755274261603, - "grad_norm": 0.3921299918104816, - "learning_rate": 2.953794972848051e-07, - "loss": 0.025, - "step": 8113 - }, - { - "epoch": 3.6038196757717076, - "grad_norm": 0.406918068376974, - "learning_rate": 2.947234283526229e-07, - "loss": 0.0278, - "step": 8114 - }, - { - "epoch": 3.6042638241172553, - "grad_norm": 0.4175532634627423, - "learning_rate": 2.940680667034396e-07, - "loss": 0.0318, - "step": 8115 - }, - { - "epoch": 3.6047079724628026, - "grad_norm": 0.43540375334253767, - "learning_rate": 2.934134124357646e-07, - "loss": 0.0268, - "step": 8116 - }, - { - "epoch": 3.60515212080835, - "grad_norm": 0.4718399188563787, - "learning_rate": 2.927594656480054e-07, - "loss": 0.0319, - "step": 8117 - }, - { - "epoch": 3.6055962691538976, - "grad_norm": 0.4746986163908478, - "learning_rate": 2.921062264384605e-07, - "loss": 0.0239, - "step": 8118 - }, - { - "epoch": 3.606040417499445, - "grad_norm": 0.43178883447784155, - "learning_rate": 2.914536949053226e-07, - "loss": 0.0252, - "step": 8119 - }, - { - "epoch": 3.606484565844992, - "grad_norm": 0.44193392412448756, - "learning_rate": 2.908018711466787e-07, - "loss": 0.0293, - "step": 8120 - }, - { - "epoch": 3.6069287141905395, - "grad_norm": 0.361976350777308, - "learning_rate": 2.901507552605087e-07, - "loss": 0.0155, - "step": 8121 - }, - { - "epoch": 3.607372862536087, - "grad_norm": 0.4277695581236822, - "learning_rate": 2.895003473446861e-07, - "loss": 0.0286, - "step": 8122 - }, - { - "epoch": 3.6078170108816345, - "grad_norm": 0.4459278029555017, - "learning_rate": 2.8885064749697987e-07, - "loss": 0.028, - "step": 8123 - }, - { - "epoch": 3.608261159227182, - "grad_norm": 0.566646999620044, - "learning_rate": 2.882016558150491e-07, - "loss": 0.0364, - "step": 8124 - }, - { - "epoch": 3.6087053075727296, - "grad_norm": 0.4313671970565694, - "learning_rate": 2.87553372396448e-07, - "loss": 0.031, - "step": 8125 - }, - { - "epoch": 3.609149455918277, - "grad_norm": 0.40292145142350155, - "learning_rate": 2.869057973386269e-07, - "loss": 0.0272, - "step": 8126 - }, - { - "epoch": 3.609593604263824, - "grad_norm": 0.41179464266273536, - "learning_rate": 2.8625893073892577e-07, - "loss": 0.0208, - "step": 8127 - }, - { - "epoch": 3.6100377526093714, - "grad_norm": 0.4344939010412826, - "learning_rate": 2.85612772694579e-07, - "loss": 0.0246, - "step": 8128 - }, - { - "epoch": 3.6104819009549187, - "grad_norm": 0.4503615177393324, - "learning_rate": 2.8496732330271726e-07, - "loss": 0.0318, - "step": 8129 - }, - { - "epoch": 3.6109260493004665, - "grad_norm": 0.49743560082851446, - "learning_rate": 2.8432258266036016e-07, - "loss": 0.0242, - "step": 8130 - }, - { - "epoch": 3.6113701976460137, - "grad_norm": 0.418557316697792, - "learning_rate": 2.8367855086442353e-07, - "loss": 0.0279, - "step": 8131 - }, - { - "epoch": 3.611814345991561, - "grad_norm": 0.3450331986237824, - "learning_rate": 2.830352280117188e-07, - "loss": 0.022, - "step": 8132 - }, - { - "epoch": 3.6122584943371088, - "grad_norm": 0.44492026164748794, - "learning_rate": 2.8239261419894526e-07, - "loss": 0.0323, - "step": 8133 - }, - { - "epoch": 3.612702642682656, - "grad_norm": 0.39841298833512895, - "learning_rate": 2.8175070952270014e-07, - "loss": 0.0285, - "step": 8134 - }, - { - "epoch": 3.6131467910282034, - "grad_norm": 0.43301418955026805, - "learning_rate": 2.811095140794734e-07, - "loss": 0.0274, - "step": 8135 - }, - { - "epoch": 3.6135909393737506, - "grad_norm": 0.5096476202086357, - "learning_rate": 2.804690279656458e-07, - "loss": 0.051, - "step": 8136 - }, - { - "epoch": 3.6140350877192984, - "grad_norm": 0.3293367472926153, - "learning_rate": 2.7982925127749416e-07, - "loss": 0.0227, - "step": 8137 - }, - { - "epoch": 3.6144792360648457, - "grad_norm": 0.37776627801259316, - "learning_rate": 2.791901841111877e-07, - "loss": 0.0254, - "step": 8138 - }, - { - "epoch": 3.614923384410393, - "grad_norm": 0.40922593503138716, - "learning_rate": 2.78551826562789e-07, - "loss": 0.045, - "step": 8139 - }, - { - "epoch": 3.6153675327559407, - "grad_norm": 0.42201659981374173, - "learning_rate": 2.779141787282547e-07, - "loss": 0.0251, - "step": 8140 - }, - { - "epoch": 3.615811681101488, - "grad_norm": 0.4053495089948873, - "learning_rate": 2.7727724070343296e-07, - "loss": 0.023, - "step": 8141 - }, - { - "epoch": 3.6162558294470353, - "grad_norm": 0.4556874573880963, - "learning_rate": 2.7664101258406626e-07, - "loss": 0.0227, - "step": 8142 - }, - { - "epoch": 3.6166999777925826, - "grad_norm": 0.3450822468016936, - "learning_rate": 2.7600549446579306e-07, - "loss": 0.018, - "step": 8143 - }, - { - "epoch": 3.6171441261381303, - "grad_norm": 0.35001380471776244, - "learning_rate": 2.753706864441391e-07, - "loss": 0.0232, - "step": 8144 - }, - { - "epoch": 3.6175882744836776, - "grad_norm": 0.364015851517707, - "learning_rate": 2.7473658861452923e-07, - "loss": 0.0185, - "step": 8145 - }, - { - "epoch": 3.618032422829225, - "grad_norm": 0.4713893596705623, - "learning_rate": 2.741032010722788e-07, - "loss": 0.0337, - "step": 8146 - }, - { - "epoch": 3.6184765711747726, - "grad_norm": 0.3440041200074425, - "learning_rate": 2.734705239125951e-07, - "loss": 0.0197, - "step": 8147 - }, - { - "epoch": 3.61892071952032, - "grad_norm": 0.41811195093373094, - "learning_rate": 2.728385572305814e-07, - "loss": 0.0278, - "step": 8148 - }, - { - "epoch": 3.619364867865867, - "grad_norm": 0.339333589639759, - "learning_rate": 2.7220730112123337e-07, - "loss": 0.0187, - "step": 8149 - }, - { - "epoch": 3.6198090162114145, - "grad_norm": 0.7462572452071613, - "learning_rate": 2.715767556794391e-07, - "loss": 0.0425, - "step": 8150 - }, - { - "epoch": 3.620253164556962, - "grad_norm": 0.5298727144349012, - "learning_rate": 2.7094692099997986e-07, - "loss": 0.0308, - "step": 8151 - }, - { - "epoch": 3.6206973129025095, - "grad_norm": 0.411102053477688, - "learning_rate": 2.7031779717753223e-07, - "loss": 0.0249, - "step": 8152 - }, - { - "epoch": 3.621141461248057, - "grad_norm": 0.4354411151955604, - "learning_rate": 2.696893843066617e-07, - "loss": 0.0307, - "step": 8153 - }, - { - "epoch": 3.6215856095936045, - "grad_norm": 0.3792115584655087, - "learning_rate": 2.6906168248183095e-07, - "loss": 0.0248, - "step": 8154 - }, - { - "epoch": 3.622029757939152, - "grad_norm": 0.3457882220586487, - "learning_rate": 2.68434691797394e-07, - "loss": 0.0197, - "step": 8155 - }, - { - "epoch": 3.622473906284699, - "grad_norm": 0.5361251704803487, - "learning_rate": 2.6780841234759826e-07, - "loss": 0.0382, - "step": 8156 - }, - { - "epoch": 3.6229180546302464, - "grad_norm": 0.45121861235515354, - "learning_rate": 2.6718284422658447e-07, - "loss": 0.0226, - "step": 8157 - }, - { - "epoch": 3.6233622029757937, - "grad_norm": 0.3726443210546779, - "learning_rate": 2.665579875283847e-07, - "loss": 0.0223, - "step": 8158 - }, - { - "epoch": 3.6238063513213414, - "grad_norm": 0.39308466518304386, - "learning_rate": 2.6593384234692597e-07, - "loss": 0.0259, - "step": 8159 - }, - { - "epoch": 3.6242504996668887, - "grad_norm": 0.3787552610454571, - "learning_rate": 2.6531040877602997e-07, - "loss": 0.0194, - "step": 8160 - }, - { - "epoch": 3.624694648012436, - "grad_norm": 0.41262964342629177, - "learning_rate": 2.646876869094073e-07, - "loss": 0.0197, - "step": 8161 - }, - { - "epoch": 3.6251387963579838, - "grad_norm": 0.34296275500584317, - "learning_rate": 2.640656768406641e-07, - "loss": 0.0215, - "step": 8162 - }, - { - "epoch": 3.625582944703531, - "grad_norm": 0.37447434249834716, - "learning_rate": 2.634443786632995e-07, - "loss": 0.0251, - "step": 8163 - }, - { - "epoch": 3.6260270930490783, - "grad_norm": 0.44671265535557425, - "learning_rate": 2.628237924707044e-07, - "loss": 0.0251, - "step": 8164 - }, - { - "epoch": 3.6264712413946256, - "grad_norm": 0.39092522082367537, - "learning_rate": 2.622039183561642e-07, - "loss": 0.0245, - "step": 8165 - }, - { - "epoch": 3.6269153897401734, - "grad_norm": 0.3971038919445469, - "learning_rate": 2.6158475641285544e-07, - "loss": 0.0297, - "step": 8166 - }, - { - "epoch": 3.6273595380857206, - "grad_norm": 0.7481153045495028, - "learning_rate": 2.609663067338497e-07, - "loss": 0.0379, - "step": 8167 - }, - { - "epoch": 3.627803686431268, - "grad_norm": 0.3705770719942717, - "learning_rate": 2.6034856941211104e-07, - "loss": 0.0238, - "step": 8168 - }, - { - "epoch": 3.6282478347768157, - "grad_norm": 0.42989268956281285, - "learning_rate": 2.597315445404941e-07, - "loss": 0.0264, - "step": 8169 - }, - { - "epoch": 3.628691983122363, - "grad_norm": 0.3871931353713456, - "learning_rate": 2.5911523221174963e-07, - "loss": 0.024, - "step": 8170 - }, - { - "epoch": 3.6291361314679103, - "grad_norm": 0.5499381201238311, - "learning_rate": 2.584996325185185e-07, - "loss": 0.036, - "step": 8171 - }, - { - "epoch": 3.6295802798134575, - "grad_norm": 0.42718459557037247, - "learning_rate": 2.5788474555333675e-07, - "loss": 0.0284, - "step": 8172 - }, - { - "epoch": 3.630024428159005, - "grad_norm": 0.45150263461678425, - "learning_rate": 2.5727057140863266e-07, - "loss": 0.0424, - "step": 8173 - }, - { - "epoch": 3.6304685765045526, - "grad_norm": 0.4604751454319313, - "learning_rate": 2.566571101767268e-07, - "loss": 0.0229, - "step": 8174 - }, - { - "epoch": 3.6309127248501, - "grad_norm": 0.40368285144270716, - "learning_rate": 2.5604436194983204e-07, - "loss": 0.0241, - "step": 8175 - }, - { - "epoch": 3.6313568731956476, - "grad_norm": 0.35454391047270123, - "learning_rate": 2.554323268200559e-07, - "loss": 0.0194, - "step": 8176 - }, - { - "epoch": 3.631801021541195, - "grad_norm": 0.4841769515138133, - "learning_rate": 2.548210048793964e-07, - "loss": 0.0253, - "step": 8177 - }, - { - "epoch": 3.632245169886742, - "grad_norm": 0.34838665066054253, - "learning_rate": 2.5421039621974677e-07, - "loss": 0.0219, - "step": 8178 - }, - { - "epoch": 3.6326893182322895, - "grad_norm": 0.40000850001012683, - "learning_rate": 2.5360050093289123e-07, - "loss": 0.0281, - "step": 8179 - }, - { - "epoch": 3.6331334665778368, - "grad_norm": 0.3910550154100924, - "learning_rate": 2.529913191105088e-07, - "loss": 0.0223, - "step": 8180 - }, - { - "epoch": 3.6335776149233845, - "grad_norm": 0.48803942739172496, - "learning_rate": 2.523828508441672e-07, - "loss": 0.025, - "step": 8181 - }, - { - "epoch": 3.634021763268932, - "grad_norm": 0.5754863723730677, - "learning_rate": 2.5177509622533183e-07, - "loss": 0.0406, - "step": 8182 - }, - { - "epoch": 3.634465911614479, - "grad_norm": 0.4286654639108965, - "learning_rate": 2.511680553453572e-07, - "loss": 0.0216, - "step": 8183 - }, - { - "epoch": 3.634910059960027, - "grad_norm": 0.35950293556792007, - "learning_rate": 2.5056172829549254e-07, - "loss": 0.0331, - "step": 8184 - }, - { - "epoch": 3.635354208305574, - "grad_norm": 0.40214970790890675, - "learning_rate": 2.4995611516688003e-07, - "loss": 0.0233, - "step": 8185 - }, - { - "epoch": 3.6357983566511214, - "grad_norm": 0.36548024593908557, - "learning_rate": 2.4935121605055125e-07, - "loss": 0.0261, - "step": 8186 - }, - { - "epoch": 3.6362425049966687, - "grad_norm": 0.5386358859158857, - "learning_rate": 2.487470310374346e-07, - "loss": 0.0215, - "step": 8187 - }, - { - "epoch": 3.6366866533422164, - "grad_norm": 0.38408368355582256, - "learning_rate": 2.481435602183485e-07, - "loss": 0.0227, - "step": 8188 - }, - { - "epoch": 3.6371308016877637, - "grad_norm": 0.3671311848219369, - "learning_rate": 2.475408036840055e-07, - "loss": 0.0203, - "step": 8189 - }, - { - "epoch": 3.637574950033311, - "grad_norm": 0.46572660180603004, - "learning_rate": 2.469387615250096e-07, - "loss": 0.0274, - "step": 8190 - }, - { - "epoch": 3.6380190983788587, - "grad_norm": 0.4911123345635677, - "learning_rate": 2.4633743383185917e-07, - "loss": 0.0252, - "step": 8191 - }, - { - "epoch": 3.638463246724406, - "grad_norm": 0.3574418971645557, - "learning_rate": 2.4573682069494234e-07, - "loss": 0.0205, - "step": 8192 - }, - { - "epoch": 3.6389073950699533, - "grad_norm": 0.4154556222921757, - "learning_rate": 2.451369222045419e-07, - "loss": 0.0248, - "step": 8193 - }, - { - "epoch": 3.6393515434155006, - "grad_norm": 0.3834951658389965, - "learning_rate": 2.445377384508335e-07, - "loss": 0.0228, - "step": 8194 - }, - { - "epoch": 3.6397956917610483, - "grad_norm": 0.3958357851929866, - "learning_rate": 2.4393926952388405e-07, - "loss": 0.0195, - "step": 8195 - }, - { - "epoch": 3.6402398401065956, - "grad_norm": 0.481708316236042, - "learning_rate": 2.433415155136543e-07, - "loss": 0.0245, - "step": 8196 - }, - { - "epoch": 3.640683988452143, - "grad_norm": 0.45974860157380204, - "learning_rate": 2.427444765099951e-07, - "loss": 0.0228, - "step": 8197 - }, - { - "epoch": 3.6411281367976907, - "grad_norm": 0.4731819261447121, - "learning_rate": 2.4214815260265367e-07, - "loss": 0.0498, - "step": 8198 - }, - { - "epoch": 3.641572285143238, - "grad_norm": 0.40140095766392536, - "learning_rate": 2.4155254388126605e-07, - "loss": 0.0309, - "step": 8199 - }, - { - "epoch": 3.6420164334887852, - "grad_norm": 0.34222283638473855, - "learning_rate": 2.4095765043536335e-07, - "loss": 0.0196, - "step": 8200 - }, - { - "epoch": 3.6424605818343325, - "grad_norm": 0.4008139911989502, - "learning_rate": 2.403634723543674e-07, - "loss": 0.0163, - "step": 8201 - }, - { - "epoch": 3.64290473017988, - "grad_norm": 0.4485823633640726, - "learning_rate": 2.3977000972759454e-07, - "loss": 0.0247, - "step": 8202 - }, - { - "epoch": 3.6433488785254275, - "grad_norm": 0.32021601023059565, - "learning_rate": 2.391772626442507e-07, - "loss": 0.0162, - "step": 8203 - }, - { - "epoch": 3.643793026870975, - "grad_norm": 0.43897160516812744, - "learning_rate": 2.385852311934367e-07, - "loss": 0.0192, - "step": 8204 - }, - { - "epoch": 3.6442371752165226, - "grad_norm": 0.35383635594441887, - "learning_rate": 2.379939154641442e-07, - "loss": 0.0224, - "step": 8205 - }, - { - "epoch": 3.64468132356207, - "grad_norm": 0.4656360521009951, - "learning_rate": 2.3740331554525875e-07, - "loss": 0.0281, - "step": 8206 - }, - { - "epoch": 3.645125471907617, - "grad_norm": 0.3752654330901412, - "learning_rate": 2.3681343152555768e-07, - "loss": 0.021, - "step": 8207 - }, - { - "epoch": 3.6455696202531644, - "grad_norm": 0.43380693965119627, - "learning_rate": 2.3622426349371064e-07, - "loss": 0.0233, - "step": 8208 - }, - { - "epoch": 3.6460137685987117, - "grad_norm": 0.3468026081865424, - "learning_rate": 2.3563581153827897e-07, - "loss": 0.0208, - "step": 8209 - }, - { - "epoch": 3.6464579169442595, - "grad_norm": 0.3963696177580075, - "learning_rate": 2.3504807574771638e-07, - "loss": 0.0234, - "step": 8210 - }, - { - "epoch": 3.6469020652898068, - "grad_norm": 0.3493278085805506, - "learning_rate": 2.3446105621037108e-07, - "loss": 0.018, - "step": 8211 - }, - { - "epoch": 3.647346213635354, - "grad_norm": 0.448389498368257, - "learning_rate": 2.3387475301448138e-07, - "loss": 0.0262, - "step": 8212 - }, - { - "epoch": 3.647790361980902, - "grad_norm": 0.4639744403941518, - "learning_rate": 2.33289166248179e-07, - "loss": 0.0299, - "step": 8213 - }, - { - "epoch": 3.648234510326449, - "grad_norm": 0.34846046299317557, - "learning_rate": 2.327042959994863e-07, - "loss": 0.0197, - "step": 8214 - }, - { - "epoch": 3.6486786586719964, - "grad_norm": 0.35001828631869614, - "learning_rate": 2.3212014235632074e-07, - "loss": 0.0184, - "step": 8215 - }, - { - "epoch": 3.6491228070175437, - "grad_norm": 0.3539191157147139, - "learning_rate": 2.3153670540648932e-07, - "loss": 0.0202, - "step": 8216 - }, - { - "epoch": 3.6495669553630914, - "grad_norm": 0.39166688802077126, - "learning_rate": 2.3095398523769353e-07, - "loss": 0.0237, - "step": 8217 - }, - { - "epoch": 3.6500111037086387, - "grad_norm": 0.41163896619179474, - "learning_rate": 2.3037198193752553e-07, - "loss": 0.0254, - "step": 8218 - }, - { - "epoch": 3.650455252054186, - "grad_norm": 0.43685004953046314, - "learning_rate": 2.2979069559347088e-07, - "loss": 0.0237, - "step": 8219 - }, - { - "epoch": 3.6508994003997337, - "grad_norm": 0.5309766077496497, - "learning_rate": 2.292101262929064e-07, - "loss": 0.0292, - "step": 8220 - }, - { - "epoch": 3.651343548745281, - "grad_norm": 0.41452507271756617, - "learning_rate": 2.2863027412310056e-07, - "loss": 0.0292, - "step": 8221 - }, - { - "epoch": 3.6517876970908283, - "grad_norm": 0.44136007566907587, - "learning_rate": 2.2805113917121647e-07, - "loss": 0.0284, - "step": 8222 - }, - { - "epoch": 3.6522318454363756, - "grad_norm": 0.3476979126592021, - "learning_rate": 2.274727215243072e-07, - "loss": 0.0197, - "step": 8223 - }, - { - "epoch": 3.6526759937819233, - "grad_norm": 0.38817929599514056, - "learning_rate": 2.2689502126931938e-07, - "loss": 0.0224, - "step": 8224 - }, - { - "epoch": 3.6531201421274706, - "grad_norm": 0.4746782142221425, - "learning_rate": 2.2631803849309076e-07, - "loss": 0.0352, - "step": 8225 - }, - { - "epoch": 3.653564290473018, - "grad_norm": 0.40968095773887653, - "learning_rate": 2.2574177328235137e-07, - "loss": 0.0276, - "step": 8226 - }, - { - "epoch": 3.6540084388185656, - "grad_norm": 0.41401682518721233, - "learning_rate": 2.2516622572372416e-07, - "loss": 0.0283, - "step": 8227 - }, - { - "epoch": 3.654452587164113, - "grad_norm": 0.4117127298389683, - "learning_rate": 2.2459139590372325e-07, - "loss": 0.0228, - "step": 8228 - }, - { - "epoch": 3.65489673550966, - "grad_norm": 0.4311069896164078, - "learning_rate": 2.240172839087551e-07, - "loss": 0.0281, - "step": 8229 - }, - { - "epoch": 3.6553408838552075, - "grad_norm": 0.4327836235259352, - "learning_rate": 2.2344388982512012e-07, - "loss": 0.0228, - "step": 8230 - }, - { - "epoch": 3.655785032200755, - "grad_norm": 0.4573606402349546, - "learning_rate": 2.2287121373900712e-07, - "loss": 0.0319, - "step": 8231 - }, - { - "epoch": 3.6562291805463025, - "grad_norm": 0.4345901205357684, - "learning_rate": 2.2229925573650001e-07, - "loss": 0.0269, - "step": 8232 - }, - { - "epoch": 3.65667332889185, - "grad_norm": 0.3825914316334593, - "learning_rate": 2.2172801590357395e-07, - "loss": 0.0282, - "step": 8233 - }, - { - "epoch": 3.6571174772373976, - "grad_norm": 0.31366037192320456, - "learning_rate": 2.2115749432609524e-07, - "loss": 0.0167, - "step": 8234 - }, - { - "epoch": 3.657561625582945, - "grad_norm": 0.3788275375636668, - "learning_rate": 2.205876910898236e-07, - "loss": 0.0306, - "step": 8235 - }, - { - "epoch": 3.658005773928492, - "grad_norm": 0.38419523963619157, - "learning_rate": 2.2001860628041106e-07, - "loss": 0.0299, - "step": 8236 - }, - { - "epoch": 3.6584499222740394, - "grad_norm": 0.4550088476883181, - "learning_rate": 2.1945023998339865e-07, - "loss": 0.0347, - "step": 8237 - }, - { - "epoch": 3.6588940706195867, - "grad_norm": 0.597248639500098, - "learning_rate": 2.1888259228422248e-07, - "loss": 0.0194, - "step": 8238 - }, - { - "epoch": 3.6593382189651344, - "grad_norm": 0.5188207400851287, - "learning_rate": 2.1831566326820986e-07, - "loss": 0.0276, - "step": 8239 - }, - { - "epoch": 3.6597823673106817, - "grad_norm": 0.32831002856910196, - "learning_rate": 2.177494530205798e-07, - "loss": 0.0245, - "step": 8240 - }, - { - "epoch": 3.660226515656229, - "grad_norm": 0.3836254147289484, - "learning_rate": 2.1718396162644319e-07, - "loss": 0.0212, - "step": 8241 - }, - { - "epoch": 3.6606706640017768, - "grad_norm": 0.4548723138202921, - "learning_rate": 2.1661918917080304e-07, - "loss": 0.0253, - "step": 8242 - }, - { - "epoch": 3.661114812347324, - "grad_norm": 0.3684605663304441, - "learning_rate": 2.1605513573855375e-07, - "loss": 0.0178, - "step": 8243 - }, - { - "epoch": 3.6615589606928713, - "grad_norm": 0.3733701839690256, - "learning_rate": 2.1549180141448356e-07, - "loss": 0.0194, - "step": 8244 - }, - { - "epoch": 3.6620031090384186, - "grad_norm": 0.4105795750947663, - "learning_rate": 2.1492918628326864e-07, - "loss": 0.0148, - "step": 8245 - }, - { - "epoch": 3.6624472573839664, - "grad_norm": 0.41761384153375, - "learning_rate": 2.143672904294819e-07, - "loss": 0.0293, - "step": 8246 - }, - { - "epoch": 3.6628914057295137, - "grad_norm": 0.3728653241577783, - "learning_rate": 2.1380611393758576e-07, - "loss": 0.0223, - "step": 8247 - }, - { - "epoch": 3.663335554075061, - "grad_norm": 0.3955291650313776, - "learning_rate": 2.1324565689193332e-07, - "loss": 0.025, - "step": 8248 - }, - { - "epoch": 3.6637797024206087, - "grad_norm": 0.5976258676447095, - "learning_rate": 2.1268591937677164e-07, - "loss": 0.0328, - "step": 8249 - }, - { - "epoch": 3.664223850766156, - "grad_norm": 0.7950146359972061, - "learning_rate": 2.1212690147623894e-07, - "loss": 0.0295, - "step": 8250 - }, - { - "epoch": 3.6646679991117033, - "grad_norm": 0.5530704513836777, - "learning_rate": 2.1156860327436302e-07, - "loss": 0.0302, - "step": 8251 - }, - { - "epoch": 3.6651121474572506, - "grad_norm": 0.29807912775738393, - "learning_rate": 2.1101102485506842e-07, - "loss": 0.0178, - "step": 8252 - }, - { - "epoch": 3.6655562958027983, - "grad_norm": 0.4517710106710957, - "learning_rate": 2.1045416630216808e-07, - "loss": 0.0242, - "step": 8253 - }, - { - "epoch": 3.6660004441483456, - "grad_norm": 0.4254157446074955, - "learning_rate": 2.0989802769936563e-07, - "loss": 0.0268, - "step": 8254 - }, - { - "epoch": 3.666444592493893, - "grad_norm": 0.3693301648290188, - "learning_rate": 2.0934260913025973e-07, - "loss": 0.0266, - "step": 8255 - }, - { - "epoch": 3.6668887408394406, - "grad_norm": 0.4631155691374193, - "learning_rate": 2.0878791067833805e-07, - "loss": 0.0293, - "step": 8256 - }, - { - "epoch": 3.667332889184988, - "grad_norm": 0.3868441750288522, - "learning_rate": 2.0823393242698275e-07, - "loss": 0.0261, - "step": 8257 - }, - { - "epoch": 3.667777037530535, - "grad_norm": 0.430361544922243, - "learning_rate": 2.0768067445946506e-07, - "loss": 0.0247, - "step": 8258 - }, - { - "epoch": 3.6682211858760825, - "grad_norm": 0.4116233802052011, - "learning_rate": 2.0712813685894894e-07, - "loss": 0.0315, - "step": 8259 - }, - { - "epoch": 3.6686653342216298, - "grad_norm": 0.3788192226900461, - "learning_rate": 2.0657631970849078e-07, - "loss": 0.0229, - "step": 8260 - }, - { - "epoch": 3.6691094825671775, - "grad_norm": 0.37557385002997257, - "learning_rate": 2.0602522309103813e-07, - "loss": 0.0236, - "step": 8261 - }, - { - "epoch": 3.669553630912725, - "grad_norm": 0.3805674438167674, - "learning_rate": 2.054748470894291e-07, - "loss": 0.0252, - "step": 8262 - }, - { - "epoch": 3.6699977792582725, - "grad_norm": 0.3690281782315337, - "learning_rate": 2.0492519178639536e-07, - "loss": 0.0161, - "step": 8263 - }, - { - "epoch": 3.67044192760382, - "grad_norm": 0.7116450161368049, - "learning_rate": 2.0437625726456024e-07, - "loss": 0.0292, - "step": 8264 - }, - { - "epoch": 3.670886075949367, - "grad_norm": 0.5939366820148858, - "learning_rate": 2.0382804360643603e-07, - "loss": 0.0382, - "step": 8265 - }, - { - "epoch": 3.6713302242949144, - "grad_norm": 0.4269858268062977, - "learning_rate": 2.0328055089443023e-07, - "loss": 0.0236, - "step": 8266 - }, - { - "epoch": 3.6717743726404617, - "grad_norm": 0.43163831702859096, - "learning_rate": 2.027337792108397e-07, - "loss": 0.0244, - "step": 8267 - }, - { - "epoch": 3.6722185209860094, - "grad_norm": 0.5025109334901467, - "learning_rate": 2.0218772863785263e-07, - "loss": 0.0296, - "step": 8268 - }, - { - "epoch": 3.6726626693315567, - "grad_norm": 0.6343391256263229, - "learning_rate": 2.016423992575517e-07, - "loss": 0.0248, - "step": 8269 - }, - { - "epoch": 3.673106817677104, - "grad_norm": 0.4000275239829487, - "learning_rate": 2.0109779115190742e-07, - "loss": 0.0198, - "step": 8270 - }, - { - "epoch": 3.6735509660226517, - "grad_norm": 0.3303791647606292, - "learning_rate": 2.0055390440278376e-07, - "loss": 0.0258, - "step": 8271 - }, - { - "epoch": 3.673995114368199, - "grad_norm": 0.36510639486980745, - "learning_rate": 2.0001073909193702e-07, - "loss": 0.0173, - "step": 8272 - }, - { - "epoch": 3.6744392627137463, - "grad_norm": 0.4445660217933993, - "learning_rate": 1.9946829530101408e-07, - "loss": 0.0279, - "step": 8273 - }, - { - "epoch": 3.6748834110592936, - "grad_norm": 0.3323295635321309, - "learning_rate": 1.989265731115525e-07, - "loss": 0.0211, - "step": 8274 - }, - { - "epoch": 3.6753275594048413, - "grad_norm": 0.4082377088884518, - "learning_rate": 1.983855726049838e-07, - "loss": 0.0281, - "step": 8275 - }, - { - "epoch": 3.6757717077503886, - "grad_norm": 0.5538219755733254, - "learning_rate": 1.9784529386262798e-07, - "loss": 0.0375, - "step": 8276 - }, - { - "epoch": 3.676215856095936, - "grad_norm": 0.4054313962604177, - "learning_rate": 1.9730573696569888e-07, - "loss": 0.0221, - "step": 8277 - }, - { - "epoch": 3.6766600044414837, - "grad_norm": 0.338502488617252, - "learning_rate": 1.9676690199530169e-07, - "loss": 0.0231, - "step": 8278 - }, - { - "epoch": 3.677104152787031, - "grad_norm": 0.4016411596672554, - "learning_rate": 1.9622878903243104e-07, - "loss": 0.024, - "step": 8279 - }, - { - "epoch": 3.6775483011325782, - "grad_norm": 0.4045075471165877, - "learning_rate": 1.95691398157975e-07, - "loss": 0.0179, - "step": 8280 - }, - { - "epoch": 3.6779924494781255, - "grad_norm": 0.40898342731485937, - "learning_rate": 1.9515472945271396e-07, - "loss": 0.0337, - "step": 8281 - }, - { - "epoch": 3.6784365978236733, - "grad_norm": 0.3742680249776215, - "learning_rate": 1.946187829973162e-07, - "loss": 0.0181, - "step": 8282 - }, - { - "epoch": 3.6788807461692206, - "grad_norm": 0.38742125913942227, - "learning_rate": 1.9408355887234443e-07, - "loss": 0.0267, - "step": 8283 - }, - { - "epoch": 3.679324894514768, - "grad_norm": 0.31803662853945275, - "learning_rate": 1.9354905715825323e-07, - "loss": 0.0106, - "step": 8284 - }, - { - "epoch": 3.6797690428603156, - "grad_norm": 0.3842318920408417, - "learning_rate": 1.9301527793538445e-07, - "loss": 0.0208, - "step": 8285 - }, - { - "epoch": 3.680213191205863, - "grad_norm": 0.34972059951342765, - "learning_rate": 1.9248222128397663e-07, - "loss": 0.0209, - "step": 8286 - }, - { - "epoch": 3.68065733955141, - "grad_norm": 0.4945754865360583, - "learning_rate": 1.9194988728415632e-07, - "loss": 0.0285, - "step": 8287 - }, - { - "epoch": 3.6811014878969575, - "grad_norm": 0.4665722830251576, - "learning_rate": 1.9141827601594221e-07, - "loss": 0.0228, - "step": 8288 - }, - { - "epoch": 3.6815456362425047, - "grad_norm": 0.33231118424653433, - "learning_rate": 1.908873875592454e-07, - "loss": 0.0215, - "step": 8289 - }, - { - "epoch": 3.6819897845880525, - "grad_norm": 0.41202362800291603, - "learning_rate": 1.9035722199386542e-07, - "loss": 0.0219, - "step": 8290 - }, - { - "epoch": 3.6824339329335998, - "grad_norm": 0.4209126047122881, - "learning_rate": 1.8982777939949736e-07, - "loss": 0.0356, - "step": 8291 - }, - { - "epoch": 3.6828780812791475, - "grad_norm": 0.35749851244891667, - "learning_rate": 1.8929905985572484e-07, - "loss": 0.0194, - "step": 8292 - }, - { - "epoch": 3.683322229624695, - "grad_norm": 0.38704831620698, - "learning_rate": 1.8877106344202312e-07, - "loss": 0.0329, - "step": 8293 - }, - { - "epoch": 3.683766377970242, - "grad_norm": 0.526720346124437, - "learning_rate": 1.8824379023775874e-07, - "loss": 0.0357, - "step": 8294 - }, - { - "epoch": 3.6842105263157894, - "grad_norm": 0.42113199922195677, - "learning_rate": 1.877172403221905e-07, - "loss": 0.0315, - "step": 8295 - }, - { - "epoch": 3.6846546746613367, - "grad_norm": 0.3078574734152821, - "learning_rate": 1.871914137744668e-07, - "loss": 0.0165, - "step": 8296 - }, - { - "epoch": 3.6850988230068844, - "grad_norm": 0.4123152188226272, - "learning_rate": 1.866663106736294e-07, - "loss": 0.0401, - "step": 8297 - }, - { - "epoch": 3.6855429713524317, - "grad_norm": 0.3318217390451302, - "learning_rate": 1.8614193109860955e-07, - "loss": 0.0228, - "step": 8298 - }, - { - "epoch": 3.685987119697979, - "grad_norm": 0.4377759250920508, - "learning_rate": 1.8561827512823095e-07, - "loss": 0.0271, - "step": 8299 - }, - { - "epoch": 3.6864312680435267, - "grad_norm": 0.8207219751826093, - "learning_rate": 1.8509534284120721e-07, - "loss": 0.0344, - "step": 8300 - }, - { - "epoch": 3.686875416389074, - "grad_norm": 0.8375758066775824, - "learning_rate": 1.84573134316145e-07, - "loss": 0.0367, - "step": 8301 - }, - { - "epoch": 3.6873195647346213, - "grad_norm": 0.36428841642403553, - "learning_rate": 1.840516496315392e-07, - "loss": 0.0217, - "step": 8302 - }, - { - "epoch": 3.6877637130801686, - "grad_norm": 0.5076188683241221, - "learning_rate": 1.8353088886578053e-07, - "loss": 0.0294, - "step": 8303 - }, - { - "epoch": 3.6882078614257163, - "grad_norm": 0.6798347292105588, - "learning_rate": 1.830108520971463e-07, - "loss": 0.0405, - "step": 8304 - }, - { - "epoch": 3.6886520097712636, - "grad_norm": 0.3568052667311966, - "learning_rate": 1.8249153940380738e-07, - "loss": 0.0279, - "step": 8305 - }, - { - "epoch": 3.689096158116811, - "grad_norm": 0.4023381862191607, - "learning_rate": 1.8197295086382515e-07, - "loss": 0.0271, - "step": 8306 - }, - { - "epoch": 3.6895403064623586, - "grad_norm": 0.3720480087573231, - "learning_rate": 1.8145508655515177e-07, - "loss": 0.0199, - "step": 8307 - }, - { - "epoch": 3.689984454807906, - "grad_norm": 0.4144873947838932, - "learning_rate": 1.8093794655563214e-07, - "loss": 0.0215, - "step": 8308 - }, - { - "epoch": 3.6904286031534532, - "grad_norm": 0.39511282629465616, - "learning_rate": 1.804215309430013e-07, - "loss": 0.0175, - "step": 8309 - }, - { - "epoch": 3.6908727514990005, - "grad_norm": 0.5527099360678772, - "learning_rate": 1.799058397948844e-07, - "loss": 0.0381, - "step": 8310 - }, - { - "epoch": 3.691316899844548, - "grad_norm": 0.46007077628363413, - "learning_rate": 1.7939087318879833e-07, - "loss": 0.0333, - "step": 8311 - }, - { - "epoch": 3.6917610481900955, - "grad_norm": 0.43813513461840425, - "learning_rate": 1.788766312021528e-07, - "loss": 0.0298, - "step": 8312 - }, - { - "epoch": 3.692205196535643, - "grad_norm": 0.4101350408780889, - "learning_rate": 1.7836311391224494e-07, - "loss": 0.0259, - "step": 8313 - }, - { - "epoch": 3.6926493448811906, - "grad_norm": 0.2992536206219798, - "learning_rate": 1.7785032139626734e-07, - "loss": 0.0185, - "step": 8314 - }, - { - "epoch": 3.693093493226738, - "grad_norm": 0.365121204268502, - "learning_rate": 1.7733825373129954e-07, - "loss": 0.0216, - "step": 8315 - }, - { - "epoch": 3.693537641572285, - "grad_norm": 0.416511886265595, - "learning_rate": 1.7682691099431548e-07, - "loss": 0.0372, - "step": 8316 - }, - { - "epoch": 3.6939817899178324, - "grad_norm": 0.3845252122577115, - "learning_rate": 1.763162932621787e-07, - "loss": 0.0257, - "step": 8317 - }, - { - "epoch": 3.6944259382633797, - "grad_norm": 0.4233868381729098, - "learning_rate": 1.7580640061164223e-07, - "loss": 0.0294, - "step": 8318 - }, - { - "epoch": 3.6948700866089275, - "grad_norm": 0.462463687883106, - "learning_rate": 1.7529723311935198e-07, - "loss": 0.0364, - "step": 8319 - }, - { - "epoch": 3.6953142349544748, - "grad_norm": 0.43549161369245626, - "learning_rate": 1.7478879086184564e-07, - "loss": 0.0263, - "step": 8320 - }, - { - "epoch": 3.695758383300022, - "grad_norm": 0.39333187210295867, - "learning_rate": 1.742810739155504e-07, - "loss": 0.0246, - "step": 8321 - }, - { - "epoch": 3.6962025316455698, - "grad_norm": 0.35014124457318635, - "learning_rate": 1.737740823567835e-07, - "loss": 0.0218, - "step": 8322 - }, - { - "epoch": 3.696646679991117, - "grad_norm": 0.42978622961517393, - "learning_rate": 1.7326781626175627e-07, - "loss": 0.0257, - "step": 8323 - }, - { - "epoch": 3.6970908283366644, - "grad_norm": 0.4118470726335261, - "learning_rate": 1.727622757065678e-07, - "loss": 0.0261, - "step": 8324 - }, - { - "epoch": 3.6975349766822116, - "grad_norm": 0.471980787223015, - "learning_rate": 1.7225746076720894e-07, - "loss": 0.0226, - "step": 8325 - }, - { - "epoch": 3.6979791250277594, - "grad_norm": 0.4565879663220365, - "learning_rate": 1.717533715195635e-07, - "loss": 0.0248, - "step": 8326 - }, - { - "epoch": 3.6984232733733067, - "grad_norm": 0.3976292965837061, - "learning_rate": 1.712500080394036e-07, - "loss": 0.0324, - "step": 8327 - }, - { - "epoch": 3.698867421718854, - "grad_norm": 0.4111827692476157, - "learning_rate": 1.7074737040239375e-07, - "loss": 0.0248, - "step": 8328 - }, - { - "epoch": 3.6993115700644017, - "grad_norm": 0.36585319212766676, - "learning_rate": 1.7024545868408903e-07, - "loss": 0.0209, - "step": 8329 - }, - { - "epoch": 3.699755718409949, - "grad_norm": 0.47005920699211934, - "learning_rate": 1.6974427295993412e-07, - "loss": 0.0203, - "step": 8330 - }, - { - "epoch": 3.7001998667554963, - "grad_norm": 0.36932538349452104, - "learning_rate": 1.6924381330526817e-07, - "loss": 0.0212, - "step": 8331 - }, - { - "epoch": 3.7006440151010436, - "grad_norm": 0.446723580160887, - "learning_rate": 1.6874407979531604e-07, - "loss": 0.0247, - "step": 8332 - }, - { - "epoch": 3.7010881634465913, - "grad_norm": 0.445687716290222, - "learning_rate": 1.682450725051976e-07, - "loss": 0.0251, - "step": 8333 - }, - { - "epoch": 3.7015323117921386, - "grad_norm": 0.3855963805554051, - "learning_rate": 1.677467915099229e-07, - "loss": 0.0161, - "step": 8334 - }, - { - "epoch": 3.701976460137686, - "grad_norm": 0.39452015734075696, - "learning_rate": 1.6724923688439033e-07, - "loss": 0.0238, - "step": 8335 - }, - { - "epoch": 3.7024206084832336, - "grad_norm": 0.45003118141173504, - "learning_rate": 1.667524087033906e-07, - "loss": 0.0264, - "step": 8336 - }, - { - "epoch": 3.702864756828781, - "grad_norm": 0.42598757489225364, - "learning_rate": 1.6625630704160788e-07, - "loss": 0.0336, - "step": 8337 - }, - { - "epoch": 3.703308905174328, - "grad_norm": 0.38079855634441273, - "learning_rate": 1.6576093197361253e-07, - "loss": 0.0286, - "step": 8338 - }, - { - "epoch": 3.7037530535198755, - "grad_norm": 0.3983365488271041, - "learning_rate": 1.652662835738683e-07, - "loss": 0.0205, - "step": 8339 - }, - { - "epoch": 3.704197201865423, - "grad_norm": 0.29963760935621175, - "learning_rate": 1.6477236191673018e-07, - "loss": 0.0189, - "step": 8340 - }, - { - "epoch": 3.7046413502109705, - "grad_norm": 0.771557737936414, - "learning_rate": 1.6427916707644153e-07, - "loss": 0.0319, - "step": 8341 - }, - { - "epoch": 3.705085498556518, - "grad_norm": 0.4894556058318927, - "learning_rate": 1.6378669912713862e-07, - "loss": 0.0316, - "step": 8342 - }, - { - "epoch": 3.7055296469020655, - "grad_norm": 0.3795606721666376, - "learning_rate": 1.6329495814284778e-07, - "loss": 0.0196, - "step": 8343 - }, - { - "epoch": 3.705973795247613, - "grad_norm": 0.3778626203357572, - "learning_rate": 1.62803944197486e-07, - "loss": 0.0266, - "step": 8344 - }, - { - "epoch": 3.70641794359316, - "grad_norm": 0.4426801167498142, - "learning_rate": 1.6231365736486093e-07, - "loss": 0.0288, - "step": 8345 - }, - { - "epoch": 3.7068620919387074, - "grad_norm": 0.35264417606088994, - "learning_rate": 1.6182409771867137e-07, - "loss": 0.0189, - "step": 8346 - }, - { - "epoch": 3.7073062402842547, - "grad_norm": 0.38892860903541526, - "learning_rate": 1.6133526533250566e-07, - "loss": 0.0318, - "step": 8347 - }, - { - "epoch": 3.7077503886298024, - "grad_norm": 0.3867272658310112, - "learning_rate": 1.6084716027984503e-07, - "loss": 0.0246, - "step": 8348 - }, - { - "epoch": 3.7081945369753497, - "grad_norm": 0.6221413345427776, - "learning_rate": 1.6035978263405804e-07, - "loss": 0.0333, - "step": 8349 - }, - { - "epoch": 3.708638685320897, - "grad_norm": 0.5154361170774239, - "learning_rate": 1.5987313246840718e-07, - "loss": 0.0342, - "step": 8350 - }, - { - "epoch": 3.7090828336664448, - "grad_norm": 0.3524314183478963, - "learning_rate": 1.593872098560445e-07, - "loss": 0.0211, - "step": 8351 - }, - { - "epoch": 3.709526982011992, - "grad_norm": 0.38476894824992575, - "learning_rate": 1.58902014870011e-07, - "loss": 0.0211, - "step": 8352 - }, - { - "epoch": 3.7099711303575393, - "grad_norm": 0.4245656298307411, - "learning_rate": 1.5841754758324058e-07, - "loss": 0.0209, - "step": 8353 - }, - { - "epoch": 3.7104152787030866, - "grad_norm": 0.3820971674731869, - "learning_rate": 1.579338080685572e-07, - "loss": 0.0221, - "step": 8354 - }, - { - "epoch": 3.7108594270486344, - "grad_norm": 0.37340680941918303, - "learning_rate": 1.5745079639867488e-07, - "loss": 0.0297, - "step": 8355 - }, - { - "epoch": 3.7113035753941817, - "grad_norm": 0.29722253097229934, - "learning_rate": 1.5696851264619785e-07, - "loss": 0.0178, - "step": 8356 - }, - { - "epoch": 3.711747723739729, - "grad_norm": 0.4252952177272931, - "learning_rate": 1.5648695688362304e-07, - "loss": 0.0257, - "step": 8357 - }, - { - "epoch": 3.7121918720852767, - "grad_norm": 0.49778617697788585, - "learning_rate": 1.560061291833348e-07, - "loss": 0.0377, - "step": 8358 - }, - { - "epoch": 3.712636020430824, - "grad_norm": 0.45908962801821235, - "learning_rate": 1.5552602961761033e-07, - "loss": 0.0236, - "step": 8359 - }, - { - "epoch": 3.7130801687763713, - "grad_norm": 0.35236790788170025, - "learning_rate": 1.5504665825861687e-07, - "loss": 0.018, - "step": 8360 - }, - { - "epoch": 3.7135243171219185, - "grad_norm": 0.516154033831261, - "learning_rate": 1.5456801517841236e-07, - "loss": 0.0338, - "step": 8361 - }, - { - "epoch": 3.7139684654674663, - "grad_norm": 0.2906873978074194, - "learning_rate": 1.540901004489448e-07, - "loss": 0.0257, - "step": 8362 - }, - { - "epoch": 3.7144126138130136, - "grad_norm": 0.363688530548789, - "learning_rate": 1.5361291414205226e-07, - "loss": 0.0175, - "step": 8363 - }, - { - "epoch": 3.714856762158561, - "grad_norm": 0.4720314053715725, - "learning_rate": 1.5313645632946407e-07, - "loss": 0.0406, - "step": 8364 - }, - { - "epoch": 3.7153009105041086, - "grad_norm": 0.36832914963768065, - "learning_rate": 1.5266072708280177e-07, - "loss": 0.0232, - "step": 8365 - }, - { - "epoch": 3.715745058849656, - "grad_norm": 0.40219479844398387, - "learning_rate": 1.5218572647357265e-07, - "loss": 0.0325, - "step": 8366 - }, - { - "epoch": 3.716189207195203, - "grad_norm": 0.3686119581029194, - "learning_rate": 1.517114545731796e-07, - "loss": 0.019, - "step": 8367 - }, - { - "epoch": 3.7166333555407505, - "grad_norm": 0.4442003891466178, - "learning_rate": 1.5123791145291332e-07, - "loss": 0.0275, - "step": 8368 - }, - { - "epoch": 3.7170775038862978, - "grad_norm": 0.36490090535494746, - "learning_rate": 1.5076509718395416e-07, - "loss": 0.0155, - "step": 8369 - }, - { - "epoch": 3.7175216522318455, - "grad_norm": 0.4009221914723727, - "learning_rate": 1.502930118373752e-07, - "loss": 0.0223, - "step": 8370 - }, - { - "epoch": 3.717965800577393, - "grad_norm": 0.43768126067136603, - "learning_rate": 1.4982165548413862e-07, - "loss": 0.0192, - "step": 8371 - }, - { - "epoch": 3.7184099489229405, - "grad_norm": 0.5261889966594541, - "learning_rate": 1.4935102819509717e-07, - "loss": 0.0353, - "step": 8372 - }, - { - "epoch": 3.718854097268488, - "grad_norm": 0.34594837099051023, - "learning_rate": 1.488811300409948e-07, - "loss": 0.0188, - "step": 8373 - }, - { - "epoch": 3.719298245614035, - "grad_norm": 0.5250244707730272, - "learning_rate": 1.4841196109246448e-07, - "loss": 0.0355, - "step": 8374 - }, - { - "epoch": 3.7197423939595824, - "grad_norm": 0.3336300196833185, - "learning_rate": 1.4794352142003088e-07, - "loss": 0.0199, - "step": 8375 - }, - { - "epoch": 3.7201865423051297, - "grad_norm": 0.37689522491294797, - "learning_rate": 1.4747581109410713e-07, - "loss": 0.0174, - "step": 8376 - }, - { - "epoch": 3.7206306906506774, - "grad_norm": 0.48406735678739193, - "learning_rate": 1.4700883018499979e-07, - "loss": 0.025, - "step": 8377 - }, - { - "epoch": 3.7210748389962247, - "grad_norm": 0.45821951845296044, - "learning_rate": 1.4654257876290267e-07, - "loss": 0.0392, - "step": 8378 - }, - { - "epoch": 3.721518987341772, - "grad_norm": 0.43841923148082784, - "learning_rate": 1.4607705689790197e-07, - "loss": 0.0213, - "step": 8379 - }, - { - "epoch": 3.7219631356873197, - "grad_norm": 0.38180624149856224, - "learning_rate": 1.4561226465997337e-07, - "loss": 0.0244, - "step": 8380 - }, - { - "epoch": 3.722407284032867, - "grad_norm": 0.38238991365264746, - "learning_rate": 1.4514820211898263e-07, - "loss": 0.0235, - "step": 8381 - }, - { - "epoch": 3.7228514323784143, - "grad_norm": 0.4147995047117634, - "learning_rate": 1.4468486934468728e-07, - "loss": 0.0244, - "step": 8382 - }, - { - "epoch": 3.7232955807239616, - "grad_norm": 0.37825138010304116, - "learning_rate": 1.442222664067333e-07, - "loss": 0.0204, - "step": 8383 - }, - { - "epoch": 3.7237397290695093, - "grad_norm": 0.43713370136688856, - "learning_rate": 1.437603933746573e-07, - "loss": 0.0283, - "step": 8384 - }, - { - "epoch": 3.7241838774150566, - "grad_norm": 0.3959829514177559, - "learning_rate": 1.4329925031788815e-07, - "loss": 0.024, - "step": 8385 - }, - { - "epoch": 3.724628025760604, - "grad_norm": 0.36795641365580717, - "learning_rate": 1.4283883730574212e-07, - "loss": 0.0242, - "step": 8386 - }, - { - "epoch": 3.7250721741061517, - "grad_norm": 0.33923664126749403, - "learning_rate": 1.4237915440742768e-07, - "loss": 0.0259, - "step": 8387 - }, - { - "epoch": 3.725516322451699, - "grad_norm": 0.43212191542717354, - "learning_rate": 1.4192020169204292e-07, - "loss": 0.0205, - "step": 8388 - }, - { - "epoch": 3.7259604707972462, - "grad_norm": 0.5186925399242536, - "learning_rate": 1.4146197922857597e-07, - "loss": 0.032, - "step": 8389 - }, - { - "epoch": 3.7264046191427935, - "grad_norm": 0.3400385668731632, - "learning_rate": 1.410044870859062e-07, - "loss": 0.0224, - "step": 8390 - }, - { - "epoch": 3.7268487674883413, - "grad_norm": 0.4227136704820241, - "learning_rate": 1.4054772533280137e-07, - "loss": 0.0217, - "step": 8391 - }, - { - "epoch": 3.7272929158338886, - "grad_norm": 0.4011138415574072, - "learning_rate": 1.4009169403792154e-07, - "loss": 0.025, - "step": 8392 - }, - { - "epoch": 3.727737064179436, - "grad_norm": 0.3003293847098222, - "learning_rate": 1.396363932698147e-07, - "loss": 0.0174, - "step": 8393 - }, - { - "epoch": 3.7281812125249836, - "grad_norm": 0.4465399668144551, - "learning_rate": 1.3918182309692164e-07, - "loss": 0.0361, - "step": 8394 - }, - { - "epoch": 3.728625360870531, - "grad_norm": 0.3571377988241023, - "learning_rate": 1.3872798358757155e-07, - "loss": 0.0186, - "step": 8395 - }, - { - "epoch": 3.729069509216078, - "grad_norm": 0.3618570693279145, - "learning_rate": 1.3827487480998437e-07, - "loss": 0.0204, - "step": 8396 - }, - { - "epoch": 3.7295136575616255, - "grad_norm": 0.4415246137854366, - "learning_rate": 1.3782249683226946e-07, - "loss": 0.0215, - "step": 8397 - }, - { - "epoch": 3.7299578059071727, - "grad_norm": 0.3988226147588891, - "learning_rate": 1.373708497224263e-07, - "loss": 0.025, - "step": 8398 - }, - { - "epoch": 3.7304019542527205, - "grad_norm": 0.3588668524798711, - "learning_rate": 1.3691993354834733e-07, - "loss": 0.017, - "step": 8399 - }, - { - "epoch": 3.7308461025982678, - "grad_norm": 0.4766338898375096, - "learning_rate": 1.3646974837781102e-07, - "loss": 0.0246, - "step": 8400 - }, - { - "epoch": 3.7312902509438155, - "grad_norm": 0.4346113992522893, - "learning_rate": 1.3602029427848885e-07, - "loss": 0.0224, - "step": 8401 - }, - { - "epoch": 3.731734399289363, - "grad_norm": 0.4308704837627075, - "learning_rate": 1.355715713179412e-07, - "loss": 0.0228, - "step": 8402 - }, - { - "epoch": 3.73217854763491, - "grad_norm": 0.3138469968218578, - "learning_rate": 1.35123579563618e-07, - "loss": 0.0211, - "step": 8403 - }, - { - "epoch": 3.7326226959804574, - "grad_norm": 0.3603696166551371, - "learning_rate": 1.346763190828604e-07, - "loss": 0.0202, - "step": 8404 - }, - { - "epoch": 3.7330668443260047, - "grad_norm": 0.472833085205998, - "learning_rate": 1.3422978994290014e-07, - "loss": 0.032, - "step": 8405 - }, - { - "epoch": 3.7335109926715524, - "grad_norm": 0.5373078797879084, - "learning_rate": 1.3378399221085691e-07, - "loss": 0.0331, - "step": 8406 - }, - { - "epoch": 3.7339551410170997, - "grad_norm": 0.32376977842916665, - "learning_rate": 1.3333892595374265e-07, - "loss": 0.0193, - "step": 8407 - }, - { - "epoch": 3.734399289362647, - "grad_norm": 0.5384681977899171, - "learning_rate": 1.3289459123845772e-07, - "loss": 0.0439, - "step": 8408 - }, - { - "epoch": 3.7348434377081947, - "grad_norm": 0.360367652411676, - "learning_rate": 1.3245098813179315e-07, - "loss": 0.0259, - "step": 8409 - }, - { - "epoch": 3.735287586053742, - "grad_norm": 0.34043662992938706, - "learning_rate": 1.3200811670043057e-07, - "loss": 0.0156, - "step": 8410 - }, - { - "epoch": 3.7357317343992893, - "grad_norm": 0.46845973568867966, - "learning_rate": 1.3156597701094065e-07, - "loss": 0.0223, - "step": 8411 - }, - { - "epoch": 3.7361758827448366, - "grad_norm": 0.4170724326037999, - "learning_rate": 1.3112456912978467e-07, - "loss": 0.0183, - "step": 8412 - }, - { - "epoch": 3.7366200310903843, - "grad_norm": 0.6062425923257145, - "learning_rate": 1.3068389312331398e-07, - "loss": 0.0326, - "step": 8413 - }, - { - "epoch": 3.7370641794359316, - "grad_norm": 0.32063888344376806, - "learning_rate": 1.3024394905776893e-07, - "loss": 0.0155, - "step": 8414 - }, - { - "epoch": 3.737508327781479, - "grad_norm": 0.4839068579509274, - "learning_rate": 1.298047369992811e-07, - "loss": 0.0285, - "step": 8415 - }, - { - "epoch": 3.7379524761270266, - "grad_norm": 0.4686148975397421, - "learning_rate": 1.2936625701387152e-07, - "loss": 0.0291, - "step": 8416 - }, - { - "epoch": 3.738396624472574, - "grad_norm": 0.40247010635972097, - "learning_rate": 1.289285091674508e-07, - "loss": 0.0248, - "step": 8417 - }, - { - "epoch": 3.738840772818121, - "grad_norm": 0.3788928148883821, - "learning_rate": 1.2849149352582135e-07, - "loss": 0.0277, - "step": 8418 - }, - { - "epoch": 3.7392849211636685, - "grad_norm": 0.45807438580819443, - "learning_rate": 1.280552101546717e-07, - "loss": 0.0311, - "step": 8419 - }, - { - "epoch": 3.7397290695092162, - "grad_norm": 0.513273904365022, - "learning_rate": 1.2761965911958385e-07, - "loss": 0.0245, - "step": 8420 - }, - { - "epoch": 3.7401732178547635, - "grad_norm": 0.39246829057407184, - "learning_rate": 1.2718484048602876e-07, - "loss": 0.0231, - "step": 8421 - }, - { - "epoch": 3.740617366200311, - "grad_norm": 0.41649329925637596, - "learning_rate": 1.267507543193669e-07, - "loss": 0.0199, - "step": 8422 - }, - { - "epoch": 3.7410615145458586, - "grad_norm": 0.42862616530797626, - "learning_rate": 1.2631740068484888e-07, - "loss": 0.0307, - "step": 8423 - }, - { - "epoch": 3.741505662891406, - "grad_norm": 0.35644680831072795, - "learning_rate": 1.258847796476148e-07, - "loss": 0.0241, - "step": 8424 - }, - { - "epoch": 3.741949811236953, - "grad_norm": 0.32721758016106417, - "learning_rate": 1.2545289127269488e-07, - "loss": 0.0235, - "step": 8425 - }, - { - "epoch": 3.7423939595825004, - "grad_norm": 0.40082921797531834, - "learning_rate": 1.2502173562500995e-07, - "loss": 0.021, - "step": 8426 - }, - { - "epoch": 3.7428381079280477, - "grad_norm": 0.39626883986681716, - "learning_rate": 1.2459131276936876e-07, - "loss": 0.0242, - "step": 8427 - }, - { - "epoch": 3.7432822562735955, - "grad_norm": 0.5073039426598518, - "learning_rate": 1.241616227704723e-07, - "loss": 0.0278, - "step": 8428 - }, - { - "epoch": 3.7437264046191427, - "grad_norm": 0.3603567472819875, - "learning_rate": 1.2373266569290997e-07, - "loss": 0.032, - "step": 8429 - }, - { - "epoch": 3.7441705529646905, - "grad_norm": 0.4679950484890861, - "learning_rate": 1.2330444160116196e-07, - "loss": 0.0208, - "step": 8430 - }, - { - "epoch": 3.7446147013102378, - "grad_norm": 0.468095341335435, - "learning_rate": 1.2287695055959615e-07, - "loss": 0.0223, - "step": 8431 - }, - { - "epoch": 3.745058849655785, - "grad_norm": 0.4819559753911326, - "learning_rate": 1.2245019263247283e-07, - "loss": 0.0262, - "step": 8432 - }, - { - "epoch": 3.7455029980013324, - "grad_norm": 0.4954170402461803, - "learning_rate": 1.2202416788394067e-07, - "loss": 0.0333, - "step": 8433 - }, - { - "epoch": 3.7459471463468796, - "grad_norm": 0.6174858065052228, - "learning_rate": 1.215988763780379e-07, - "loss": 0.0313, - "step": 8434 - }, - { - "epoch": 3.7463912946924274, - "grad_norm": 0.42561144433663495, - "learning_rate": 1.2117431817869453e-07, - "loss": 0.0277, - "step": 8435 - }, - { - "epoch": 3.7468354430379747, - "grad_norm": 0.3709807838740829, - "learning_rate": 1.207504933497272e-07, - "loss": 0.0243, - "step": 8436 - }, - { - "epoch": 3.747279591383522, - "grad_norm": 0.3000902649747397, - "learning_rate": 1.2032740195484448e-07, - "loss": 0.0147, - "step": 8437 - }, - { - "epoch": 3.7477237397290697, - "grad_norm": 0.5581005324860493, - "learning_rate": 1.1990504405764492e-07, - "loss": 0.0302, - "step": 8438 - }, - { - "epoch": 3.748167888074617, - "grad_norm": 0.5633312132178465, - "learning_rate": 1.1948341972161492e-07, - "loss": 0.0309, - "step": 8439 - }, - { - "epoch": 3.7486120364201643, - "grad_norm": 0.36629488791856035, - "learning_rate": 1.1906252901013271e-07, - "loss": 0.0238, - "step": 8440 - }, - { - "epoch": 3.7490561847657116, - "grad_norm": 0.49491041444267353, - "learning_rate": 1.1864237198646544e-07, - "loss": 0.0298, - "step": 8441 - }, - { - "epoch": 3.7495003331112593, - "grad_norm": 0.564678297749189, - "learning_rate": 1.1822294871376928e-07, - "loss": 0.0305, - "step": 8442 - }, - { - "epoch": 3.7499444814568066, - "grad_norm": 0.40055447684951356, - "learning_rate": 1.1780425925509043e-07, - "loss": 0.0272, - "step": 8443 - }, - { - "epoch": 3.750388629802354, - "grad_norm": 0.470018676790432, - "learning_rate": 1.1738630367336579e-07, - "loss": 0.032, - "step": 8444 - }, - { - "epoch": 3.7508327781479016, - "grad_norm": 0.4384932264169123, - "learning_rate": 1.1696908203142066e-07, - "loss": 0.0256, - "step": 8445 - }, - { - "epoch": 3.751276926493449, - "grad_norm": 0.4653823127550869, - "learning_rate": 1.1655259439197042e-07, - "loss": 0.03, - "step": 8446 - }, - { - "epoch": 3.751721074838996, - "grad_norm": 0.41947700796617987, - "learning_rate": 1.1613684081762111e-07, - "loss": 0.0236, - "step": 8447 - }, - { - "epoch": 3.7521652231845435, - "grad_norm": 0.38536838427097736, - "learning_rate": 1.1572182137086662e-07, - "loss": 0.0242, - "step": 8448 - }, - { - "epoch": 3.7526093715300908, - "grad_norm": 0.341291576535971, - "learning_rate": 1.1530753611409151e-07, - "loss": 0.0229, - "step": 8449 - }, - { - "epoch": 3.7530535198756385, - "grad_norm": 0.3792987155782225, - "learning_rate": 1.1489398510957039e-07, - "loss": 0.0237, - "step": 8450 - }, - { - "epoch": 3.753497668221186, - "grad_norm": 0.37854108977812184, - "learning_rate": 1.1448116841946688e-07, - "loss": 0.0287, - "step": 8451 - }, - { - "epoch": 3.7539418165667335, - "grad_norm": 0.46875753276752063, - "learning_rate": 1.1406908610583467e-07, - "loss": 0.0311, - "step": 8452 - }, - { - "epoch": 3.754385964912281, - "grad_norm": 0.4064331207182262, - "learning_rate": 1.1365773823061532e-07, - "loss": 0.0283, - "step": 8453 - }, - { - "epoch": 3.754830113257828, - "grad_norm": 0.4002763856777381, - "learning_rate": 1.1324712485564271e-07, - "loss": 0.024, - "step": 8454 - }, - { - "epoch": 3.7552742616033754, - "grad_norm": 0.45827533835175305, - "learning_rate": 1.1283724604263857e-07, - "loss": 0.0246, - "step": 8455 - }, - { - "epoch": 3.7557184099489227, - "grad_norm": 0.39394686171582316, - "learning_rate": 1.1242810185321473e-07, - "loss": 0.03, - "step": 8456 - }, - { - "epoch": 3.7561625582944704, - "grad_norm": 0.42301627464562236, - "learning_rate": 1.1201969234887256e-07, - "loss": 0.0321, - "step": 8457 - }, - { - "epoch": 3.7566067066400177, - "grad_norm": 0.5331607328416476, - "learning_rate": 1.1161201759100349e-07, - "loss": 0.0279, - "step": 8458 - }, - { - "epoch": 3.757050854985565, - "grad_norm": 0.44317622469926854, - "learning_rate": 1.1120507764088684e-07, - "loss": 0.0449, - "step": 8459 - }, - { - "epoch": 3.7574950033311127, - "grad_norm": 0.3625167956575573, - "learning_rate": 1.1079887255969257e-07, - "loss": 0.0194, - "step": 8460 - }, - { - "epoch": 3.75793915167666, - "grad_norm": 0.38235614382527433, - "learning_rate": 1.1039340240848129e-07, - "loss": 0.0286, - "step": 8461 - }, - { - "epoch": 3.7583833000222073, - "grad_norm": 0.4781732717681245, - "learning_rate": 1.0998866724820145e-07, - "loss": 0.0275, - "step": 8462 - }, - { - "epoch": 3.7588274483677546, - "grad_norm": 0.36277030824808554, - "learning_rate": 1.0958466713969218e-07, - "loss": 0.02, - "step": 8463 - }, - { - "epoch": 3.7592715967133024, - "grad_norm": 0.4310996548128226, - "learning_rate": 1.09181402143681e-07, - "loss": 0.0271, - "step": 8464 - }, - { - "epoch": 3.7597157450588496, - "grad_norm": 0.44245437066064086, - "learning_rate": 1.0877887232078499e-07, - "loss": 0.0221, - "step": 8465 - }, - { - "epoch": 3.760159893404397, - "grad_norm": 0.45765152233325074, - "learning_rate": 1.0837707773151185e-07, - "loss": 0.0229, - "step": 8466 - }, - { - "epoch": 3.7606040417499447, - "grad_norm": 0.5026222951099458, - "learning_rate": 1.0797601843625827e-07, - "loss": 0.0253, - "step": 8467 - }, - { - "epoch": 3.761048190095492, - "grad_norm": 0.5808566690208385, - "learning_rate": 1.0757569449530991e-07, - "loss": 0.0229, - "step": 8468 - }, - { - "epoch": 3.7614923384410393, - "grad_norm": 0.36877164338905205, - "learning_rate": 1.0717610596884309e-07, - "loss": 0.0285, - "step": 8469 - }, - { - "epoch": 3.7619364867865865, - "grad_norm": 0.3700848691513958, - "learning_rate": 1.0677725291692143e-07, - "loss": 0.0204, - "step": 8470 - }, - { - "epoch": 3.7623806351321343, - "grad_norm": 0.3125009535356397, - "learning_rate": 1.0637913539950029e-07, - "loss": 0.0181, - "step": 8471 - }, - { - "epoch": 3.7628247834776816, - "grad_norm": 0.4808509442055829, - "learning_rate": 1.0598175347642293e-07, - "loss": 0.028, - "step": 8472 - }, - { - "epoch": 3.763268931823229, - "grad_norm": 0.4442187709817902, - "learning_rate": 1.0558510720742265e-07, - "loss": 0.0316, - "step": 8473 - }, - { - "epoch": 3.7637130801687766, - "grad_norm": 0.5324409399106215, - "learning_rate": 1.0518919665212235e-07, - "loss": 0.0319, - "step": 8474 - }, - { - "epoch": 3.764157228514324, - "grad_norm": 0.42314697759763437, - "learning_rate": 1.0479402187003496e-07, - "loss": 0.0222, - "step": 8475 - }, - { - "epoch": 3.764601376859871, - "grad_norm": 0.378666602483765, - "learning_rate": 1.0439958292056074e-07, - "loss": 0.029, - "step": 8476 - }, - { - "epoch": 3.7650455252054185, - "grad_norm": 0.5003419916114011, - "learning_rate": 1.040058798629906e-07, - "loss": 0.0259, - "step": 8477 - }, - { - "epoch": 3.7654896735509658, - "grad_norm": 0.5187802895889871, - "learning_rate": 1.0361291275650498e-07, - "loss": 0.0412, - "step": 8478 - }, - { - "epoch": 3.7659338218965135, - "grad_norm": 0.41358816649715524, - "learning_rate": 1.0322068166017386e-07, - "loss": 0.0325, - "step": 8479 - }, - { - "epoch": 3.766377970242061, - "grad_norm": 0.3554104231788337, - "learning_rate": 1.0282918663295616e-07, - "loss": 0.0266, - "step": 8480 - }, - { - "epoch": 3.7668221185876085, - "grad_norm": 0.433977374572574, - "learning_rate": 1.0243842773369983e-07, - "loss": 0.0262, - "step": 8481 - }, - { - "epoch": 3.767266266933156, - "grad_norm": 0.3050089012609615, - "learning_rate": 1.0204840502114288e-07, - "loss": 0.016, - "step": 8482 - }, - { - "epoch": 3.767710415278703, - "grad_norm": 0.35035155995481493, - "learning_rate": 1.0165911855391286e-07, - "loss": 0.022, - "step": 8483 - }, - { - "epoch": 3.7681545636242504, - "grad_norm": 0.3852773325944005, - "learning_rate": 1.0127056839052462e-07, - "loss": 0.017, - "step": 8484 - }, - { - "epoch": 3.7685987119697977, - "grad_norm": 0.43026798549784784, - "learning_rate": 1.0088275458938535e-07, - "loss": 0.0246, - "step": 8485 - }, - { - "epoch": 3.7690428603153454, - "grad_norm": 0.49903822216529625, - "learning_rate": 1.004956772087895e-07, - "loss": 0.0278, - "step": 8486 - }, - { - "epoch": 3.7694870086608927, - "grad_norm": 0.30382121537402157, - "learning_rate": 1.0010933630692166e-07, - "loss": 0.0168, - "step": 8487 - }, - { - "epoch": 3.76993115700644, - "grad_norm": 0.41575029483477577, - "learning_rate": 9.972373194185481e-08, - "loss": 0.0273, - "step": 8488 - }, - { - "epoch": 3.7703753053519877, - "grad_norm": 0.425652535729631, - "learning_rate": 9.933886417155258e-08, - "loss": 0.0258, - "step": 8489 - }, - { - "epoch": 3.770819453697535, - "grad_norm": 0.4620847686387116, - "learning_rate": 9.895473305386593e-08, - "loss": 0.024, - "step": 8490 - }, - { - "epoch": 3.7712636020430823, - "grad_norm": 0.4272116851275043, - "learning_rate": 9.857133864653812e-08, - "loss": 0.0246, - "step": 8491 - }, - { - "epoch": 3.7717077503886296, - "grad_norm": 0.3869470039777947, - "learning_rate": 9.818868100719803e-08, - "loss": 0.0266, - "step": 8492 - }, - { - "epoch": 3.7721518987341773, - "grad_norm": 0.38019015167069964, - "learning_rate": 9.780676019336632e-08, - "loss": 0.0235, - "step": 8493 - }, - { - "epoch": 3.7725960470797246, - "grad_norm": 0.6659937799546494, - "learning_rate": 9.742557626245264e-08, - "loss": 0.0336, - "step": 8494 - }, - { - "epoch": 3.773040195425272, - "grad_norm": 0.5490763929102886, - "learning_rate": 9.704512927175502e-08, - "loss": 0.0325, - "step": 8495 - }, - { - "epoch": 3.7734843437708196, - "grad_norm": 0.5157099798547079, - "learning_rate": 9.666541927846107e-08, - "loss": 0.0408, - "step": 8496 - }, - { - "epoch": 3.773928492116367, - "grad_norm": 0.6662930057806212, - "learning_rate": 9.62864463396479e-08, - "loss": 0.0181, - "step": 8497 - }, - { - "epoch": 3.7743726404619142, - "grad_norm": 0.36908564910307085, - "learning_rate": 9.590821051228105e-08, - "loss": 0.0196, - "step": 8498 - }, - { - "epoch": 3.7748167888074615, - "grad_norm": 0.34743147522098267, - "learning_rate": 9.553071185321616e-08, - "loss": 0.0177, - "step": 8499 - }, - { - "epoch": 3.7752609371530093, - "grad_norm": 0.4868308303940559, - "learning_rate": 9.515395041919839e-08, - "loss": 0.0292, - "step": 8500 - }, - { - "epoch": 3.7757050854985565, - "grad_norm": 0.4939860503734759, - "learning_rate": 9.477792626685966e-08, - "loss": 0.026, - "step": 8501 - }, - { - "epoch": 3.776149233844104, - "grad_norm": 0.31320876766724953, - "learning_rate": 9.440263945272365e-08, - "loss": 0.018, - "step": 8502 - }, - { - "epoch": 3.7765933821896516, - "grad_norm": 0.36925404461957756, - "learning_rate": 9.402809003320357e-08, - "loss": 0.022, - "step": 8503 - }, - { - "epoch": 3.777037530535199, - "grad_norm": 0.5099202980127734, - "learning_rate": 9.365427806459826e-08, - "loss": 0.0219, - "step": 8504 - }, - { - "epoch": 3.777481678880746, - "grad_norm": 0.4048961589102498, - "learning_rate": 9.32812036031e-08, - "loss": 0.02, - "step": 8505 - }, - { - "epoch": 3.7779258272262934, - "grad_norm": 0.3828893218593422, - "learning_rate": 9.290886670478727e-08, - "loss": 0.0329, - "step": 8506 - }, - { - "epoch": 3.7783699755718407, - "grad_norm": 0.47619859186660657, - "learning_rate": 9.253726742562808e-08, - "loss": 0.0339, - "step": 8507 - }, - { - "epoch": 3.7788141239173885, - "grad_norm": 0.3817560157892672, - "learning_rate": 9.216640582148218e-08, - "loss": 0.0245, - "step": 8508 - }, - { - "epoch": 3.7792582722629358, - "grad_norm": 0.44955998588034224, - "learning_rate": 9.179628194809387e-08, - "loss": 0.036, - "step": 8509 - }, - { - "epoch": 3.7797024206084835, - "grad_norm": 0.39409371816193695, - "learning_rate": 9.142689586110032e-08, - "loss": 0.0243, - "step": 8510 - }, - { - "epoch": 3.780146568954031, - "grad_norm": 0.3329695501761576, - "learning_rate": 9.105824761602711e-08, - "loss": 0.0177, - "step": 8511 - }, - { - "epoch": 3.780590717299578, - "grad_norm": 0.41637622163142424, - "learning_rate": 9.069033726828657e-08, - "loss": 0.0243, - "step": 8512 - }, - { - "epoch": 3.7810348656451254, - "grad_norm": 0.513878740141547, - "learning_rate": 9.032316487318338e-08, - "loss": 0.0422, - "step": 8513 - }, - { - "epoch": 3.7814790139906727, - "grad_norm": 0.3779536127482641, - "learning_rate": 8.995673048591002e-08, - "loss": 0.0243, - "step": 8514 - }, - { - "epoch": 3.7819231623362204, - "grad_norm": 0.40656693503772895, - "learning_rate": 8.959103416154635e-08, - "loss": 0.0204, - "step": 8515 - }, - { - "epoch": 3.7823673106817677, - "grad_norm": 0.37125663642968887, - "learning_rate": 8.922607595506339e-08, - "loss": 0.0222, - "step": 8516 - }, - { - "epoch": 3.782811459027315, - "grad_norm": 0.4060879405476042, - "learning_rate": 8.886185592132113e-08, - "loss": 0.0213, - "step": 8517 - }, - { - "epoch": 3.7832556073728627, - "grad_norm": 0.4488358277812707, - "learning_rate": 8.849837411506745e-08, - "loss": 0.0344, - "step": 8518 - }, - { - "epoch": 3.78369975571841, - "grad_norm": 0.4066455322610377, - "learning_rate": 8.813563059093977e-08, - "loss": 0.0242, - "step": 8519 - }, - { - "epoch": 3.7841439040639573, - "grad_norm": 0.4551394122387141, - "learning_rate": 8.777362540346501e-08, - "loss": 0.0293, - "step": 8520 - }, - { - "epoch": 3.7845880524095046, - "grad_norm": 0.42457221786596655, - "learning_rate": 8.741235860705855e-08, - "loss": 0.0259, - "step": 8521 - }, - { - "epoch": 3.7850322007550523, - "grad_norm": 0.3043935300931366, - "learning_rate": 8.70518302560247e-08, - "loss": 0.0182, - "step": 8522 - }, - { - "epoch": 3.7854763491005996, - "grad_norm": 0.4462173753124753, - "learning_rate": 8.669204040455737e-08, - "loss": 0.0265, - "step": 8523 - }, - { - "epoch": 3.785920497446147, - "grad_norm": 0.41601964127930957, - "learning_rate": 8.633298910673826e-08, - "loss": 0.0288, - "step": 8524 - }, - { - "epoch": 3.7863646457916946, - "grad_norm": 0.2813068890343098, - "learning_rate": 8.597467641654034e-08, - "loss": 0.0212, - "step": 8525 - }, - { - "epoch": 3.786808794137242, - "grad_norm": 0.43623034836188324, - "learning_rate": 8.561710238782272e-08, - "loss": 0.0249, - "step": 8526 - }, - { - "epoch": 3.787252942482789, - "grad_norm": 0.5751855788252586, - "learning_rate": 8.526026707433577e-08, - "loss": 0.0413, - "step": 8527 - }, - { - "epoch": 3.7876970908283365, - "grad_norm": 0.3868182612344314, - "learning_rate": 8.490417052971766e-08, - "loss": 0.0281, - "step": 8528 - }, - { - "epoch": 3.7881412391738842, - "grad_norm": 0.356372518499826, - "learning_rate": 8.45488128074945e-08, - "loss": 0.022, - "step": 8529 - }, - { - "epoch": 3.7885853875194315, - "grad_norm": 0.32262084504669236, - "learning_rate": 8.419419396108464e-08, - "loss": 0.0168, - "step": 8530 - }, - { - "epoch": 3.789029535864979, - "grad_norm": 0.4043798709101995, - "learning_rate": 8.384031404379211e-08, - "loss": 0.024, - "step": 8531 - }, - { - "epoch": 3.7894736842105265, - "grad_norm": 0.41478103237109665, - "learning_rate": 8.34871731088116e-08, - "loss": 0.027, - "step": 8532 - }, - { - "epoch": 3.789917832556074, - "grad_norm": 0.4196449951874737, - "learning_rate": 8.313477120922563e-08, - "loss": 0.0209, - "step": 8533 - }, - { - "epoch": 3.790361980901621, - "grad_norm": 0.4317330818971433, - "learning_rate": 8.278310839800685e-08, - "loss": 0.0284, - "step": 8534 - }, - { - "epoch": 3.7908061292471684, - "grad_norm": 0.44713201608661013, - "learning_rate": 8.243218472801461e-08, - "loss": 0.0438, - "step": 8535 - }, - { - "epoch": 3.7912502775927157, - "grad_norm": 0.4371441171214935, - "learning_rate": 8.208200025200119e-08, - "loss": 0.0201, - "step": 8536 - }, - { - "epoch": 3.7916944259382634, - "grad_norm": 0.5043700394965174, - "learning_rate": 8.173255502260336e-08, - "loss": 0.0285, - "step": 8537 - }, - { - "epoch": 3.7921385742838107, - "grad_norm": 0.3893482313023411, - "learning_rate": 8.138384909234964e-08, - "loss": 0.0258, - "step": 8538 - }, - { - "epoch": 3.7925827226293585, - "grad_norm": 0.3403931667558864, - "learning_rate": 8.103588251365534e-08, - "loss": 0.0199, - "step": 8539 - }, - { - "epoch": 3.7930268709749058, - "grad_norm": 0.4249904672408077, - "learning_rate": 8.068865533882752e-08, - "loss": 0.0221, - "step": 8540 - }, - { - "epoch": 3.793471019320453, - "grad_norm": 0.4067407706983934, - "learning_rate": 8.034216762005831e-08, - "loss": 0.0374, - "step": 8541 - }, - { - "epoch": 3.7939151676660003, - "grad_norm": 0.41570749972762994, - "learning_rate": 7.99964194094327e-08, - "loss": 0.024, - "step": 8542 - }, - { - "epoch": 3.7943593160115476, - "grad_norm": 0.48499039450946113, - "learning_rate": 7.96514107589208e-08, - "loss": 0.0341, - "step": 8543 - }, - { - "epoch": 3.7948034643570954, - "grad_norm": 0.40373614464609714, - "learning_rate": 7.93071417203839e-08, - "loss": 0.0268, - "step": 8544 - }, - { - "epoch": 3.7952476127026427, - "grad_norm": 0.35452268236632206, - "learning_rate": 7.896361234557226e-08, - "loss": 0.0192, - "step": 8545 - }, - { - "epoch": 3.79569176104819, - "grad_norm": 0.4666753180876556, - "learning_rate": 7.862082268612237e-08, - "loss": 0.0342, - "step": 8546 - }, - { - "epoch": 3.7961359093937377, - "grad_norm": 0.39409303578975985, - "learning_rate": 7.8278772793563e-08, - "loss": 0.0244, - "step": 8547 - }, - { - "epoch": 3.796580057739285, - "grad_norm": 0.543942317931089, - "learning_rate": 7.793746271930968e-08, - "loss": 0.0232, - "step": 8548 - }, - { - "epoch": 3.7970242060848323, - "grad_norm": 0.4331154507830711, - "learning_rate": 7.759689251466695e-08, - "loss": 0.0314, - "step": 8549 - }, - { - "epoch": 3.7974683544303796, - "grad_norm": 0.4001739441704062, - "learning_rate": 7.72570622308283e-08, - "loss": 0.0201, - "step": 8550 - }, - { - "epoch": 3.7979125027759273, - "grad_norm": 0.4891526654283636, - "learning_rate": 7.691797191887618e-08, - "loss": 0.027, - "step": 8551 - }, - { - "epoch": 3.7983566511214746, - "grad_norm": 0.40920185175638346, - "learning_rate": 7.657962162978038e-08, - "loss": 0.0264, - "step": 8552 - }, - { - "epoch": 3.798800799467022, - "grad_norm": 0.4710270896413282, - "learning_rate": 7.624201141440301e-08, - "loss": 0.0295, - "step": 8553 - }, - { - "epoch": 3.7992449478125696, - "grad_norm": 0.3793884463205298, - "learning_rate": 7.59051413234907e-08, - "loss": 0.0162, - "step": 8554 - }, - { - "epoch": 3.799689096158117, - "grad_norm": 0.2877108795069413, - "learning_rate": 7.556901140768125e-08, - "loss": 0.0134, - "step": 8555 - }, - { - "epoch": 3.800133244503664, - "grad_norm": 0.3350085177589909, - "learning_rate": 7.523362171750148e-08, - "loss": 0.0139, - "step": 8556 - }, - { - "epoch": 3.8005773928492115, - "grad_norm": 0.4349812650446206, - "learning_rate": 7.489897230336496e-08, - "loss": 0.0276, - "step": 8557 - }, - { - "epoch": 3.801021541194759, - "grad_norm": 0.3673667656791951, - "learning_rate": 7.456506321557533e-08, - "loss": 0.0212, - "step": 8558 - }, - { - "epoch": 3.8014656895403065, - "grad_norm": 0.4327339896844779, - "learning_rate": 7.423189450432633e-08, - "loss": 0.0234, - "step": 8559 - }, - { - "epoch": 3.801909837885854, - "grad_norm": 0.3414463099110009, - "learning_rate": 7.389946621969679e-08, - "loss": 0.02, - "step": 8560 - }, - { - "epoch": 3.8023539862314015, - "grad_norm": 0.35401161038931783, - "learning_rate": 7.356777841165786e-08, - "loss": 0.0236, - "step": 8561 - }, - { - "epoch": 3.802798134576949, - "grad_norm": 0.3318458174565355, - "learning_rate": 7.32368311300674e-08, - "loss": 0.0228, - "step": 8562 - }, - { - "epoch": 3.803242282922496, - "grad_norm": 0.4407338624172669, - "learning_rate": 7.290662442467178e-08, - "loss": 0.0243, - "step": 8563 - }, - { - "epoch": 3.8036864312680434, - "grad_norm": 0.3688164392706434, - "learning_rate": 7.257715834510737e-08, - "loss": 0.0194, - "step": 8564 - }, - { - "epoch": 3.8041305796135907, - "grad_norm": 0.4497260712320215, - "learning_rate": 7.224843294089844e-08, - "loss": 0.0287, - "step": 8565 - }, - { - "epoch": 3.8045747279591384, - "grad_norm": 0.33236407987313205, - "learning_rate": 7.192044826145772e-08, - "loss": 0.0191, - "step": 8566 - }, - { - "epoch": 3.8050188763046857, - "grad_norm": 0.37564613529503493, - "learning_rate": 7.159320435608741e-08, - "loss": 0.0161, - "step": 8567 - }, - { - "epoch": 3.8054630246502335, - "grad_norm": 0.3522232268407622, - "learning_rate": 7.126670127397705e-08, - "loss": 0.0215, - "step": 8568 - }, - { - "epoch": 3.8059071729957807, - "grad_norm": 0.429595537532537, - "learning_rate": 7.094093906420629e-08, - "loss": 0.0288, - "step": 8569 - }, - { - "epoch": 3.806351321341328, - "grad_norm": 0.4178495453889158, - "learning_rate": 7.061591777574261e-08, - "loss": 0.0273, - "step": 8570 - }, - { - "epoch": 3.8067954696868753, - "grad_norm": 0.34370415504576896, - "learning_rate": 7.029163745744194e-08, - "loss": 0.0198, - "step": 8571 - }, - { - "epoch": 3.8072396180324226, - "grad_norm": 0.6738541750686097, - "learning_rate": 6.996809815804917e-08, - "loss": 0.0216, - "step": 8572 - }, - { - "epoch": 3.8076837663779703, - "grad_norm": 0.3104036826233125, - "learning_rate": 6.964529992619817e-08, - "loss": 0.0158, - "step": 8573 - }, - { - "epoch": 3.8081279147235176, - "grad_norm": 0.3875694214420725, - "learning_rate": 6.932324281041014e-08, - "loss": 0.0282, - "step": 8574 - }, - { - "epoch": 3.808572063069065, - "grad_norm": 0.7370247616254277, - "learning_rate": 6.900192685909635e-08, - "loss": 0.0427, - "step": 8575 - }, - { - "epoch": 3.8090162114146127, - "grad_norm": 0.3713027373129603, - "learning_rate": 6.868135212055649e-08, - "loss": 0.0273, - "step": 8576 - }, - { - "epoch": 3.80946035976016, - "grad_norm": 0.4118005174789893, - "learning_rate": 6.836151864297702e-08, - "loss": 0.0256, - "step": 8577 - }, - { - "epoch": 3.8099045081057072, - "grad_norm": 0.47276389861446366, - "learning_rate": 6.80424264744356e-08, - "loss": 0.0345, - "step": 8578 - }, - { - "epoch": 3.8103486564512545, - "grad_norm": 0.49450924911427574, - "learning_rate": 6.772407566289718e-08, - "loss": 0.0299, - "step": 8579 - }, - { - "epoch": 3.8107928047968023, - "grad_norm": 0.3914626594412097, - "learning_rate": 6.740646625621461e-08, - "loss": 0.0244, - "step": 8580 - }, - { - "epoch": 3.8112369531423496, - "grad_norm": 0.3930999395745575, - "learning_rate": 6.708959830213024e-08, - "loss": 0.0359, - "step": 8581 - }, - { - "epoch": 3.811681101487897, - "grad_norm": 0.49573525692180653, - "learning_rate": 6.677347184827487e-08, - "loss": 0.0319, - "step": 8582 - }, - { - "epoch": 3.8121252498334446, - "grad_norm": 0.49861370512502395, - "learning_rate": 6.645808694216715e-08, - "loss": 0.0256, - "step": 8583 - }, - { - "epoch": 3.812569398178992, - "grad_norm": 0.4401777666781036, - "learning_rate": 6.614344363121583e-08, - "loss": 0.0223, - "step": 8584 - }, - { - "epoch": 3.813013546524539, - "grad_norm": 0.36977940669747483, - "learning_rate": 6.582954196271641e-08, - "loss": 0.0214, - "step": 8585 - }, - { - "epoch": 3.8134576948700865, - "grad_norm": 0.4927485327079211, - "learning_rate": 6.55163819838528e-08, - "loss": 0.0287, - "step": 8586 - }, - { - "epoch": 3.8139018432156337, - "grad_norm": 0.37546001627014713, - "learning_rate": 6.520396374170013e-08, - "loss": 0.0229, - "step": 8587 - }, - { - "epoch": 3.8143459915611815, - "grad_norm": 0.4196889003684699, - "learning_rate": 6.489228728321917e-08, - "loss": 0.0276, - "step": 8588 - }, - { - "epoch": 3.8147901399067288, - "grad_norm": 0.4820349858082287, - "learning_rate": 6.458135265525967e-08, - "loss": 0.0274, - "step": 8589 - }, - { - "epoch": 3.8152342882522765, - "grad_norm": 0.3797443254174065, - "learning_rate": 6.427115990456201e-08, - "loss": 0.0237, - "step": 8590 - }, - { - "epoch": 3.815678436597824, - "grad_norm": 0.4390761052826011, - "learning_rate": 6.396170907775167e-08, - "loss": 0.03, - "step": 8591 - }, - { - "epoch": 3.816122584943371, - "grad_norm": 0.32997954286919473, - "learning_rate": 6.365300022134479e-08, - "loss": 0.0242, - "step": 8592 - }, - { - "epoch": 3.8165667332889184, - "grad_norm": 0.45875132604087615, - "learning_rate": 6.334503338174646e-08, - "loss": 0.0269, - "step": 8593 - }, - { - "epoch": 3.8170108816344657, - "grad_norm": 0.48148563808244804, - "learning_rate": 6.303780860524855e-08, - "loss": 0.0273, - "step": 8594 - }, - { - "epoch": 3.8174550299800134, - "grad_norm": 0.4353447524714429, - "learning_rate": 6.273132593803189e-08, - "loss": 0.0195, - "step": 8595 - }, - { - "epoch": 3.8178991783255607, - "grad_norm": 0.38567398492319127, - "learning_rate": 6.242558542616739e-08, - "loss": 0.0193, - "step": 8596 - }, - { - "epoch": 3.818343326671108, - "grad_norm": 0.4173237164922603, - "learning_rate": 6.212058711561165e-08, - "loss": 0.0278, - "step": 8597 - }, - { - "epoch": 3.8187874750166557, - "grad_norm": 0.3154210166282003, - "learning_rate": 6.18163310522113e-08, - "loss": 0.0189, - "step": 8598 - }, - { - "epoch": 3.819231623362203, - "grad_norm": 0.5134503050749415, - "learning_rate": 6.151281728170144e-08, - "loss": 0.0289, - "step": 8599 - }, - { - "epoch": 3.8196757717077503, - "grad_norm": 0.46640864477037924, - "learning_rate": 6.121004584970558e-08, - "loss": 0.0308, - "step": 8600 - }, - { - "epoch": 3.8201199200532976, - "grad_norm": 0.36566694826665086, - "learning_rate": 6.090801680173563e-08, - "loss": 0.022, - "step": 8601 - }, - { - "epoch": 3.8205640683988453, - "grad_norm": 0.39260388522181044, - "learning_rate": 6.060673018319085e-08, - "loss": 0.033, - "step": 8602 - }, - { - "epoch": 3.8210082167443926, - "grad_norm": 0.32628501291346773, - "learning_rate": 6.030618603935945e-08, - "loss": 0.0195, - "step": 8603 - }, - { - "epoch": 3.82145236508994, - "grad_norm": 0.5032358609244834, - "learning_rate": 6.000638441542029e-08, - "loss": 0.0367, - "step": 8604 - }, - { - "epoch": 3.8218965134354876, - "grad_norm": 0.49029558220343267, - "learning_rate": 5.970732535643675e-08, - "loss": 0.0357, - "step": 8605 - }, - { - "epoch": 3.822340661781035, - "grad_norm": 0.4041938487036102, - "learning_rate": 5.94090089073629e-08, - "loss": 0.0192, - "step": 8606 - }, - { - "epoch": 3.8227848101265822, - "grad_norm": 0.35662580614791967, - "learning_rate": 5.911143511304174e-08, - "loss": 0.0223, - "step": 8607 - }, - { - "epoch": 3.8232289584721295, - "grad_norm": 0.42334321858420404, - "learning_rate": 5.8814604018202494e-08, - "loss": 0.0333, - "step": 8608 - }, - { - "epoch": 3.8236731068176772, - "grad_norm": 0.3454601103621082, - "learning_rate": 5.851851566746392e-08, - "loss": 0.0184, - "step": 8609 - }, - { - "epoch": 3.8241172551632245, - "grad_norm": 0.39337857078220745, - "learning_rate": 5.8223170105333734e-08, - "loss": 0.0228, - "step": 8610 - }, - { - "epoch": 3.824561403508772, - "grad_norm": 0.3888924660914583, - "learning_rate": 5.792856737620756e-08, - "loss": 0.0222, - "step": 8611 - }, - { - "epoch": 3.8250055518543196, - "grad_norm": 0.3633339990171769, - "learning_rate": 5.763470752436884e-08, - "loss": 0.0178, - "step": 8612 - }, - { - "epoch": 3.825449700199867, - "grad_norm": 0.5069652365071539, - "learning_rate": 5.734159059398947e-08, - "loss": 0.0311, - "step": 8613 - }, - { - "epoch": 3.825893848545414, - "grad_norm": 0.36481775204860845, - "learning_rate": 5.7049216629129764e-08, - "loss": 0.0214, - "step": 8614 - }, - { - "epoch": 3.8263379968909614, - "grad_norm": 0.4066225715814007, - "learning_rate": 5.6757585673739014e-08, - "loss": 0.0196, - "step": 8615 - }, - { - "epoch": 3.8267821452365087, - "grad_norm": 0.38508300233088066, - "learning_rate": 5.6466697771654365e-08, - "loss": 0.0359, - "step": 8616 - }, - { - "epoch": 3.8272262935820565, - "grad_norm": 0.3857136200508219, - "learning_rate": 5.617655296660085e-08, - "loss": 0.0295, - "step": 8617 - }, - { - "epoch": 3.8276704419276038, - "grad_norm": 0.38361748458889544, - "learning_rate": 5.5887151302192465e-08, - "loss": 0.0225, - "step": 8618 - }, - { - "epoch": 3.8281145902731515, - "grad_norm": 0.4136696628077852, - "learning_rate": 5.5598492821931083e-08, - "loss": 0.0259, - "step": 8619 - }, - { - "epoch": 3.8285587386186988, - "grad_norm": 0.4157902458106096, - "learning_rate": 5.531057756920644e-08, - "loss": 0.0299, - "step": 8620 - }, - { - "epoch": 3.829002886964246, - "grad_norm": 0.41824752196070275, - "learning_rate": 5.502340558729835e-08, - "loss": 0.0276, - "step": 8621 - }, - { - "epoch": 3.8294470353097934, - "grad_norm": 0.3542732476447781, - "learning_rate": 5.4736976919372295e-08, - "loss": 0.0214, - "step": 8622 - }, - { - "epoch": 3.8298911836553406, - "grad_norm": 0.47614032589088634, - "learning_rate": 5.445129160848384e-08, - "loss": 0.0271, - "step": 8623 - }, - { - "epoch": 3.8303353320008884, - "grad_norm": 0.38508281181821147, - "learning_rate": 5.416634969757695e-08, - "loss": 0.0184, - "step": 8624 - }, - { - "epoch": 3.8307794803464357, - "grad_norm": 0.3910260501124829, - "learning_rate": 5.388215122948237e-08, - "loss": 0.0259, - "step": 8625 - }, - { - "epoch": 3.831223628691983, - "grad_norm": 0.31939094873163437, - "learning_rate": 5.359869624692038e-08, - "loss": 0.0197, - "step": 8626 - }, - { - "epoch": 3.8316677770375307, - "grad_norm": 0.30534186293474436, - "learning_rate": 5.331598479249911e-08, - "loss": 0.0201, - "step": 8627 - }, - { - "epoch": 3.832111925383078, - "grad_norm": 0.33277037895962464, - "learning_rate": 5.303401690871457e-08, - "loss": 0.0203, - "step": 8628 - }, - { - "epoch": 3.8325560737286253, - "grad_norm": 0.46652287015112265, - "learning_rate": 5.275279263795175e-08, - "loss": 0.0282, - "step": 8629 - }, - { - "epoch": 3.8330002220741726, - "grad_norm": 0.3848369302003942, - "learning_rate": 5.2472312022483486e-08, - "loss": 0.0258, - "step": 8630 - }, - { - "epoch": 3.8334443704197203, - "grad_norm": 0.4448218074585927, - "learning_rate": 5.2192575104469956e-08, - "loss": 0.0254, - "step": 8631 - }, - { - "epoch": 3.8338885187652676, - "grad_norm": 0.384660032948373, - "learning_rate": 5.1913581925960853e-08, - "loss": 0.0298, - "step": 8632 - }, - { - "epoch": 3.834332667110815, - "grad_norm": 0.4204621984497172, - "learning_rate": 5.16353325288943e-08, - "loss": 0.0242, - "step": 8633 - }, - { - "epoch": 3.8347768154563626, - "grad_norm": 0.46188483366801475, - "learning_rate": 5.135782695509461e-08, - "loss": 0.0231, - "step": 8634 - }, - { - "epoch": 3.83522096380191, - "grad_norm": 0.31629549729913453, - "learning_rate": 5.1081065246277314e-08, - "loss": 0.0158, - "step": 8635 - }, - { - "epoch": 3.835665112147457, - "grad_norm": 0.4353107611550551, - "learning_rate": 5.0805047444042467e-08, - "loss": 0.0287, - "step": 8636 - }, - { - "epoch": 3.8361092604930045, - "grad_norm": 0.40299033179334315, - "learning_rate": 5.0529773589881315e-08, - "loss": 0.0195, - "step": 8637 - }, - { - "epoch": 3.8365534088385522, - "grad_norm": 0.39085359021822447, - "learning_rate": 5.0255243725171876e-08, - "loss": 0.0279, - "step": 8638 - }, - { - "epoch": 3.8369975571840995, - "grad_norm": 0.48519972906519826, - "learning_rate": 4.998145789118114e-08, - "loss": 0.0333, - "step": 8639 - }, - { - "epoch": 3.837441705529647, - "grad_norm": 0.3517351658799901, - "learning_rate": 4.970841612906285e-08, - "loss": 0.0224, - "step": 8640 - }, - { - "epoch": 3.8378858538751945, - "grad_norm": 0.4190846054803923, - "learning_rate": 4.943611847986085e-08, - "loss": 0.0234, - "step": 8641 - }, - { - "epoch": 3.838330002220742, - "grad_norm": 0.35500926856677334, - "learning_rate": 4.9164564984505723e-08, - "loss": 0.0198, - "step": 8642 - }, - { - "epoch": 3.838774150566289, - "grad_norm": 0.34433663873966086, - "learning_rate": 4.889375568381594e-08, - "loss": 0.0174, - "step": 8643 - }, - { - "epoch": 3.8392182989118364, - "grad_norm": 0.38443528004632466, - "learning_rate": 4.8623690618499474e-08, - "loss": 0.0229, - "step": 8644 - }, - { - "epoch": 3.8396624472573837, - "grad_norm": 0.6274521829246321, - "learning_rate": 4.835436982915165e-08, - "loss": 0.0344, - "step": 8645 - }, - { - "epoch": 3.8401065956029314, - "grad_norm": 0.6266172275143739, - "learning_rate": 4.808579335625563e-08, - "loss": 0.0327, - "step": 8646 - }, - { - "epoch": 3.8405507439484787, - "grad_norm": 0.34058549807195543, - "learning_rate": 4.7817961240183567e-08, - "loss": 0.0224, - "step": 8647 - }, - { - "epoch": 3.8409948922940265, - "grad_norm": 0.4390835521149124, - "learning_rate": 4.7550873521194364e-08, - "loss": 0.0195, - "step": 8648 - }, - { - "epoch": 3.8414390406395738, - "grad_norm": 0.3842688631078936, - "learning_rate": 4.728453023943591e-08, - "loss": 0.0231, - "step": 8649 - }, - { - "epoch": 3.841883188985121, - "grad_norm": 0.4692045722646363, - "learning_rate": 4.701893143494507e-08, - "loss": 0.0324, - "step": 8650 - }, - { - "epoch": 3.8423273373306683, - "grad_norm": 0.45747006524072764, - "learning_rate": 4.675407714764491e-08, - "loss": 0.0308, - "step": 8651 - }, - { - "epoch": 3.8427714856762156, - "grad_norm": 0.3514460328352755, - "learning_rate": 4.648996741734857e-08, - "loss": 0.0207, - "step": 8652 - }, - { - "epoch": 3.8432156340217634, - "grad_norm": 0.3912542737417533, - "learning_rate": 4.622660228375486e-08, - "loss": 0.0219, - "step": 8653 - }, - { - "epoch": 3.8436597823673107, - "grad_norm": 0.3136962524397564, - "learning_rate": 4.596398178645323e-08, - "loss": 0.0178, - "step": 8654 - }, - { - "epoch": 3.844103930712858, - "grad_norm": 0.35110936427287714, - "learning_rate": 4.5702105964919305e-08, - "loss": 0.0163, - "step": 8655 - }, - { - "epoch": 3.8445480790584057, - "grad_norm": 0.3924180108841284, - "learning_rate": 4.5440974858517174e-08, - "loss": 0.0185, - "step": 8656 - }, - { - "epoch": 3.844992227403953, - "grad_norm": 0.41446900223517336, - "learning_rate": 4.5180588506500424e-08, - "loss": 0.0294, - "step": 8657 - }, - { - "epoch": 3.8454363757495003, - "grad_norm": 0.3695057701384476, - "learning_rate": 4.492094694800886e-08, - "loss": 0.0269, - "step": 8658 - }, - { - "epoch": 3.8458805240950475, - "grad_norm": 0.3819021189729295, - "learning_rate": 4.4662050222070707e-08, - "loss": 0.0242, - "step": 8659 - }, - { - "epoch": 3.8463246724405953, - "grad_norm": 0.4011303876812642, - "learning_rate": 4.440389836760317e-08, - "loss": 0.028, - "step": 8660 - }, - { - "epoch": 3.8467688207861426, - "grad_norm": 0.42320733559927765, - "learning_rate": 4.414649142341021e-08, - "loss": 0.023, - "step": 8661 - }, - { - "epoch": 3.84721296913169, - "grad_norm": 0.4205022645129385, - "learning_rate": 4.388982942818476e-08, - "loss": 0.0261, - "step": 8662 - }, - { - "epoch": 3.8476571174772376, - "grad_norm": 0.3312900714579485, - "learning_rate": 4.363391242050819e-08, - "loss": 0.0191, - "step": 8663 - }, - { - "epoch": 3.848101265822785, - "grad_norm": 0.40420539853558396, - "learning_rate": 4.3378740438848045e-08, - "loss": 0.0317, - "step": 8664 - }, - { - "epoch": 3.848545414168332, - "grad_norm": 0.41550220010293626, - "learning_rate": 4.312431352156143e-08, - "loss": 0.0255, - "step": 8665 - }, - { - "epoch": 3.8489895625138795, - "grad_norm": 0.34141480859605966, - "learning_rate": 4.287063170689332e-08, - "loss": 0.0219, - "step": 8666 - }, - { - "epoch": 3.849433710859427, - "grad_norm": 0.38457339309127786, - "learning_rate": 4.261769503297597e-08, - "loss": 0.0241, - "step": 8667 - }, - { - "epoch": 3.8498778592049745, - "grad_norm": 0.3865401623267944, - "learning_rate": 4.236550353783009e-08, - "loss": 0.0177, - "step": 8668 - }, - { - "epoch": 3.850322007550522, - "grad_norm": 0.40767308229409815, - "learning_rate": 4.211405725936535e-08, - "loss": 0.0255, - "step": 8669 - }, - { - "epoch": 3.8507661558960695, - "grad_norm": 0.3151055767858847, - "learning_rate": 4.186335623537707e-08, - "loss": 0.0233, - "step": 8670 - }, - { - "epoch": 3.851210304241617, - "grad_norm": 0.3928367577202992, - "learning_rate": 4.1613400503550114e-08, - "loss": 0.0212, - "step": 8671 - }, - { - "epoch": 3.851654452587164, - "grad_norm": 0.4221600614877391, - "learning_rate": 4.13641901014572e-08, - "loss": 0.034, - "step": 8672 - }, - { - "epoch": 3.8520986009327114, - "grad_norm": 0.3485554952889989, - "learning_rate": 4.1115725066559476e-08, - "loss": 0.0189, - "step": 8673 - }, - { - "epoch": 3.8525427492782587, - "grad_norm": 0.4184669816003682, - "learning_rate": 4.086800543620484e-08, - "loss": 0.0265, - "step": 8674 - }, - { - "epoch": 3.8529868976238064, - "grad_norm": 0.40633919115050987, - "learning_rate": 4.062103124763017e-08, - "loss": 0.0178, - "step": 8675 - }, - { - "epoch": 3.8534310459693537, - "grad_norm": 0.5255893189755798, - "learning_rate": 4.0374802537959114e-08, - "loss": 0.0255, - "step": 8676 - }, - { - "epoch": 3.8538751943149014, - "grad_norm": 0.4396123179486424, - "learning_rate": 4.012931934420483e-08, - "loss": 0.0317, - "step": 8677 - }, - { - "epoch": 3.8543193426604487, - "grad_norm": 0.3657385642630762, - "learning_rate": 3.9884581703267254e-08, - "loss": 0.022, - "step": 8678 - }, - { - "epoch": 3.854763491005996, - "grad_norm": 0.34626474417977715, - "learning_rate": 3.964058965193473e-08, - "loss": 0.022, - "step": 8679 - }, - { - "epoch": 3.8552076393515433, - "grad_norm": 0.38562426634481894, - "learning_rate": 3.939734322688349e-08, - "loss": 0.0338, - "step": 8680 - }, - { - "epoch": 3.8556517876970906, - "grad_norm": 0.4035987782299242, - "learning_rate": 3.9154842464677045e-08, - "loss": 0.0236, - "step": 8681 - }, - { - "epoch": 3.8560959360426383, - "grad_norm": 0.4159647047153014, - "learning_rate": 3.8913087401767914e-08, - "loss": 0.024, - "step": 8682 - }, - { - "epoch": 3.8565400843881856, - "grad_norm": 0.5090881621984651, - "learning_rate": 3.867207807449591e-08, - "loss": 0.03, - "step": 8683 - }, - { - "epoch": 3.856984232733733, - "grad_norm": 0.3724391004939136, - "learning_rate": 3.843181451908928e-08, - "loss": 0.0222, - "step": 8684 - }, - { - "epoch": 3.8574283810792807, - "grad_norm": 0.3994107927570375, - "learning_rate": 3.8192296771663026e-08, - "loss": 0.0365, - "step": 8685 - }, - { - "epoch": 3.857872529424828, - "grad_norm": 0.534388923736043, - "learning_rate": 3.795352486822057e-08, - "loss": 0.0286, - "step": 8686 - }, - { - "epoch": 3.8583166777703752, - "grad_norm": 0.3902311138190663, - "learning_rate": 3.7715498844653755e-08, - "loss": 0.0332, - "step": 8687 - }, - { - "epoch": 3.8587608261159225, - "grad_norm": 0.6121843349896643, - "learning_rate": 3.7478218736742286e-08, - "loss": 0.0354, - "step": 8688 - }, - { - "epoch": 3.8592049744614703, - "grad_norm": 0.3370534483485381, - "learning_rate": 3.724168458015265e-08, - "loss": 0.0221, - "step": 8689 - }, - { - "epoch": 3.8596491228070176, - "grad_norm": 0.450573063405829, - "learning_rate": 3.700589641044083e-08, - "loss": 0.0225, - "step": 8690 - }, - { - "epoch": 3.860093271152565, - "grad_norm": 0.49278309629574163, - "learning_rate": 3.677085426304905e-08, - "loss": 0.029, - "step": 8691 - }, - { - "epoch": 3.8605374194981126, - "grad_norm": 0.4055229539649227, - "learning_rate": 3.6536558173308476e-08, - "loss": 0.025, - "step": 8692 - }, - { - "epoch": 3.86098156784366, - "grad_norm": 0.3771030172546445, - "learning_rate": 3.630300817643762e-08, - "loss": 0.0196, - "step": 8693 - }, - { - "epoch": 3.861425716189207, - "grad_norm": 0.4460409041626926, - "learning_rate": 3.607020430754338e-08, - "loss": 0.0263, - "step": 8694 - }, - { - "epoch": 3.8618698645347544, - "grad_norm": 0.36455946204318906, - "learning_rate": 3.583814660161944e-08, - "loss": 0.0254, - "step": 8695 - }, - { - "epoch": 3.862314012880302, - "grad_norm": 0.45869268016899095, - "learning_rate": 3.5606835093548456e-08, - "loss": 0.0364, - "step": 8696 - }, - { - "epoch": 3.8627581612258495, - "grad_norm": 0.3785302300726189, - "learning_rate": 3.537626981810094e-08, - "loss": 0.0203, - "step": 8697 - }, - { - "epoch": 3.8632023095713968, - "grad_norm": 0.41102132869975794, - "learning_rate": 3.514645080993362e-08, - "loss": 0.0304, - "step": 8698 - }, - { - "epoch": 3.8636464579169445, - "grad_norm": 0.3842024608267308, - "learning_rate": 3.49173781035933e-08, - "loss": 0.0217, - "step": 8699 - }, - { - "epoch": 3.864090606262492, - "grad_norm": 0.5453458258267285, - "learning_rate": 3.4689051733513e-08, - "loss": 0.0423, - "step": 8700 - }, - { - "epoch": 3.864534754608039, - "grad_norm": 0.4282646993236422, - "learning_rate": 3.446147173401415e-08, - "loss": 0.0283, - "step": 8701 - }, - { - "epoch": 3.8649789029535864, - "grad_norm": 0.3536636171621975, - "learning_rate": 3.4234638139306055e-08, - "loss": 0.0239, - "step": 8702 - }, - { - "epoch": 3.8654230512991337, - "grad_norm": 0.32067352971181584, - "learning_rate": 3.4008550983484766e-08, - "loss": 0.019, - "step": 8703 - }, - { - "epoch": 3.8658671996446814, - "grad_norm": 0.42623278208292326, - "learning_rate": 3.378321030053644e-08, - "loss": 0.0235, - "step": 8704 - }, - { - "epoch": 3.8663113479902287, - "grad_norm": 0.4469975288314765, - "learning_rate": 3.355861612433231e-08, - "loss": 0.0309, - "step": 8705 - }, - { - "epoch": 3.8667554963357764, - "grad_norm": 0.4959636073318917, - "learning_rate": 3.3334768488633706e-08, - "loss": 0.0284, - "step": 8706 - }, - { - "epoch": 3.8671996446813237, - "grad_norm": 0.503087622712729, - "learning_rate": 3.31116674270876e-08, - "loss": 0.0256, - "step": 8707 - }, - { - "epoch": 3.867643793026871, - "grad_norm": 0.36447403400661404, - "learning_rate": 3.2889312973231616e-08, - "loss": 0.0187, - "step": 8708 - }, - { - "epoch": 3.8680879413724183, - "grad_norm": 0.39131073302761393, - "learning_rate": 3.266770516048734e-08, - "loss": 0.0227, - "step": 8709 - }, - { - "epoch": 3.8685320897179656, - "grad_norm": 0.39335266998944496, - "learning_rate": 3.2446844022167576e-08, - "loss": 0.0302, - "step": 8710 - }, - { - "epoch": 3.8689762380635133, - "grad_norm": 0.3587677625718898, - "learning_rate": 3.2226729591471326e-08, - "loss": 0.0196, - "step": 8711 - }, - { - "epoch": 3.8694203864090606, - "grad_norm": 0.360223764784636, - "learning_rate": 3.2007361901485455e-08, - "loss": 0.0202, - "step": 8712 - }, - { - "epoch": 3.869864534754608, - "grad_norm": 0.45416360753066903, - "learning_rate": 3.1788740985184144e-08, - "loss": 0.0296, - "step": 8713 - }, - { - "epoch": 3.8703086831001556, - "grad_norm": 0.4540255704066019, - "learning_rate": 3.1570866875430536e-08, - "loss": 0.03, - "step": 8714 - }, - { - "epoch": 3.870752831445703, - "grad_norm": 0.39674557183309156, - "learning_rate": 3.135373960497401e-08, - "loss": 0.0246, - "step": 8715 - }, - { - "epoch": 3.87119697979125, - "grad_norm": 0.4615034431538654, - "learning_rate": 3.113735920645344e-08, - "loss": 0.0209, - "step": 8716 - }, - { - "epoch": 3.8716411281367975, - "grad_norm": 0.40129516372174695, - "learning_rate": 3.092172571239338e-08, - "loss": 0.0239, - "step": 8717 - }, - { - "epoch": 3.8720852764823452, - "grad_norm": 0.3795932466717671, - "learning_rate": 3.070683915520845e-08, - "loss": 0.0275, - "step": 8718 - }, - { - "epoch": 3.8725294248278925, - "grad_norm": 0.5210559447705239, - "learning_rate": 3.049269956719891e-08, - "loss": 0.0261, - "step": 8719 - }, - { - "epoch": 3.87297357317344, - "grad_norm": 0.4437613175714331, - "learning_rate": 3.0279306980554034e-08, - "loss": 0.0295, - "step": 8720 - }, - { - "epoch": 3.8734177215189876, - "grad_norm": 0.3896516822182538, - "learning_rate": 3.006666142734982e-08, - "loss": 0.0246, - "step": 8721 - }, - { - "epoch": 3.873861869864535, - "grad_norm": 0.43115789241542724, - "learning_rate": 2.9854762939551254e-08, - "loss": 0.0314, - "step": 8722 - }, - { - "epoch": 3.874306018210082, - "grad_norm": 0.34436582503878505, - "learning_rate": 2.9643611549008967e-08, - "loss": 0.0219, - "step": 8723 - }, - { - "epoch": 3.8747501665556294, - "grad_norm": 0.430010546023126, - "learning_rate": 2.9433207287464238e-08, - "loss": 0.0267, - "step": 8724 - }, - { - "epoch": 3.8751943149011767, - "grad_norm": 0.28855368882245647, - "learning_rate": 2.9223550186543435e-08, - "loss": 0.0137, - "step": 8725 - }, - { - "epoch": 3.8756384632467245, - "grad_norm": 0.3534494715975773, - "learning_rate": 2.9014640277761353e-08, - "loss": 0.0274, - "step": 8726 - }, - { - "epoch": 3.8760826115922717, - "grad_norm": 0.3093760532300058, - "learning_rate": 2.8806477592521755e-08, - "loss": 0.015, - "step": 8727 - }, - { - "epoch": 3.8765267599378195, - "grad_norm": 0.6497895297842237, - "learning_rate": 2.8599062162114056e-08, - "loss": 0.037, - "step": 8728 - }, - { - "epoch": 3.8769709082833668, - "grad_norm": 0.5014893102635929, - "learning_rate": 2.8392394017716095e-08, - "loss": 0.0344, - "step": 8729 - }, - { - "epoch": 3.877415056628914, - "grad_norm": 0.3936192272437833, - "learning_rate": 2.8186473190395246e-08, - "loss": 0.0303, - "step": 8730 - }, - { - "epoch": 3.8778592049744613, - "grad_norm": 0.4010118354626057, - "learning_rate": 2.798129971110286e-08, - "loss": 0.0307, - "step": 8731 - }, - { - "epoch": 3.8783033533200086, - "grad_norm": 0.3784503093468852, - "learning_rate": 2.7776873610681486e-08, - "loss": 0.02, - "step": 8732 - }, - { - "epoch": 3.8787475016655564, - "grad_norm": 0.4230263125616594, - "learning_rate": 2.7573194919859325e-08, - "loss": 0.0224, - "step": 8733 - }, - { - "epoch": 3.8791916500111037, - "grad_norm": 0.4140738675854264, - "learning_rate": 2.737026366925244e-08, - "loss": 0.0258, - "step": 8734 - }, - { - "epoch": 3.879635798356651, - "grad_norm": 0.42420423811982005, - "learning_rate": 2.716807988936532e-08, - "loss": 0.022, - "step": 8735 - }, - { - "epoch": 3.8800799467021987, - "grad_norm": 0.3980720494346127, - "learning_rate": 2.696664361058976e-08, - "loss": 0.0269, - "step": 8736 - }, - { - "epoch": 3.880524095047746, - "grad_norm": 0.3946254166839832, - "learning_rate": 2.6765954863204323e-08, - "loss": 0.0288, - "step": 8737 - }, - { - "epoch": 3.8809682433932933, - "grad_norm": 0.3788227216815116, - "learning_rate": 2.6566013677376545e-08, - "loss": 0.0238, - "step": 8738 - }, - { - "epoch": 3.8814123917388406, - "grad_norm": 0.34261939630726157, - "learning_rate": 2.6366820083160715e-08, - "loss": 0.0145, - "step": 8739 - }, - { - "epoch": 3.8818565400843883, - "grad_norm": 0.42749605488948866, - "learning_rate": 2.6168374110498995e-08, - "loss": 0.0257, - "step": 8740 - }, - { - "epoch": 3.8823006884299356, - "grad_norm": 0.4042334016798025, - "learning_rate": 2.5970675789220855e-08, - "loss": 0.0186, - "step": 8741 - }, - { - "epoch": 3.882744836775483, - "grad_norm": 0.39831255423745965, - "learning_rate": 2.577372514904475e-08, - "loss": 0.0209, - "step": 8742 - }, - { - "epoch": 3.8831889851210306, - "grad_norm": 0.42360207246789, - "learning_rate": 2.5577522219575324e-08, - "loss": 0.0275, - "step": 8743 - }, - { - "epoch": 3.883633133466578, - "grad_norm": 0.48707455874974, - "learning_rate": 2.5382067030304546e-08, - "loss": 0.0282, - "step": 8744 - }, - { - "epoch": 3.884077281812125, - "grad_norm": 0.4723400265193081, - "learning_rate": 2.5187359610612805e-08, - "loss": 0.0382, - "step": 8745 - }, - { - "epoch": 3.8845214301576725, - "grad_norm": 0.6222325293211652, - "learning_rate": 2.499339998976835e-08, - "loss": 0.0342, - "step": 8746 - }, - { - "epoch": 3.88496557850322, - "grad_norm": 0.35612361294226674, - "learning_rate": 2.4800188196926757e-08, - "loss": 0.0183, - "step": 8747 - }, - { - "epoch": 3.8854097268487675, - "grad_norm": 0.42945931063185067, - "learning_rate": 2.4607724261130893e-08, - "loss": 0.0249, - "step": 8748 - }, - { - "epoch": 3.885853875194315, - "grad_norm": 0.3845905954635827, - "learning_rate": 2.441600821131096e-08, - "loss": 0.0219, - "step": 8749 - }, - { - "epoch": 3.8862980235398625, - "grad_norm": 0.3836148595852599, - "learning_rate": 2.422504007628501e-08, - "loss": 0.0207, - "step": 8750 - }, - { - "epoch": 3.88674217188541, - "grad_norm": 0.3404491277084874, - "learning_rate": 2.4034819884759532e-08, - "loss": 0.0198, - "step": 8751 - }, - { - "epoch": 3.887186320230957, - "grad_norm": 0.3619930083510835, - "learning_rate": 2.3845347665327202e-08, - "loss": 0.0191, - "step": 8752 - }, - { - "epoch": 3.8876304685765044, - "grad_norm": 0.4286874704038195, - "learning_rate": 2.3656623446469684e-08, - "loss": 0.0299, - "step": 8753 - }, - { - "epoch": 3.8880746169220517, - "grad_norm": 0.3418186033416821, - "learning_rate": 2.3468647256554845e-08, - "loss": 0.0201, - "step": 8754 - }, - { - "epoch": 3.8885187652675994, - "grad_norm": 0.4273211578376309, - "learning_rate": 2.3281419123838966e-08, - "loss": 0.0254, - "step": 8755 - }, - { - "epoch": 3.8889629136131467, - "grad_norm": 0.46117222518267137, - "learning_rate": 2.3094939076465095e-08, - "loss": 0.0235, - "step": 8756 - }, - { - "epoch": 3.8894070619586945, - "grad_norm": 0.30961316233144026, - "learning_rate": 2.2909207142464695e-08, - "loss": 0.0178, - "step": 8757 - }, - { - "epoch": 3.8898512103042417, - "grad_norm": 0.4313089227592287, - "learning_rate": 2.2724223349756547e-08, - "loss": 0.0334, - "step": 8758 - }, - { - "epoch": 3.890295358649789, - "grad_norm": 0.45892558729550764, - "learning_rate": 2.253998772614674e-08, - "loss": 0.0358, - "step": 8759 - }, - { - "epoch": 3.8907395069953363, - "grad_norm": 0.4320146677307238, - "learning_rate": 2.235650029932923e-08, - "loss": 0.0275, - "step": 8760 - }, - { - "epoch": 3.8911836553408836, - "grad_norm": 0.47469416867571373, - "learning_rate": 2.2173761096884737e-08, - "loss": 0.0313, - "step": 8761 - }, - { - "epoch": 3.8916278036864314, - "grad_norm": 0.38809247080958487, - "learning_rate": 2.1991770146282953e-08, - "loss": 0.0254, - "step": 8762 - }, - { - "epoch": 3.8920719520319786, - "grad_norm": 0.36883556562596115, - "learning_rate": 2.181052747487922e-08, - "loss": 0.0223, - "step": 8763 - }, - { - "epoch": 3.892516100377526, - "grad_norm": 0.4140272360800753, - "learning_rate": 2.1630033109918403e-08, - "loss": 0.0305, - "step": 8764 - }, - { - "epoch": 3.8929602487230737, - "grad_norm": 0.4360012174993987, - "learning_rate": 2.1450287078531028e-08, - "loss": 0.0247, - "step": 8765 - }, - { - "epoch": 3.893404397068621, - "grad_norm": 0.46749106481514396, - "learning_rate": 2.127128940773604e-08, - "loss": 0.0336, - "step": 8766 - }, - { - "epoch": 3.8938485454141682, - "grad_norm": 0.48628664662774723, - "learning_rate": 2.1093040124440246e-08, - "loss": 0.0364, - "step": 8767 - }, - { - "epoch": 3.8942926937597155, - "grad_norm": 0.4655918973858269, - "learning_rate": 2.091553925543721e-08, - "loss": 0.029, - "step": 8768 - }, - { - "epoch": 3.8947368421052633, - "grad_norm": 0.4385695873892939, - "learning_rate": 2.073878682740893e-08, - "loss": 0.0332, - "step": 8769 - }, - { - "epoch": 3.8951809904508106, - "grad_norm": 0.42920683680763916, - "learning_rate": 2.056278286692359e-08, - "loss": 0.0234, - "step": 8770 - }, - { - "epoch": 3.895625138796358, - "grad_norm": 0.44077294633964265, - "learning_rate": 2.0387527400437812e-08, - "loss": 0.0171, - "step": 8771 - }, - { - "epoch": 3.8960692871419056, - "grad_norm": 0.4448703366882169, - "learning_rate": 2.0213020454295517e-08, - "loss": 0.0241, - "step": 8772 - }, - { - "epoch": 3.896513435487453, - "grad_norm": 0.3962409747625572, - "learning_rate": 2.003926205472795e-08, - "loss": 0.0303, - "step": 8773 - }, - { - "epoch": 3.896957583833, - "grad_norm": 0.45182968264253826, - "learning_rate": 1.986625222785421e-08, - "loss": 0.0517, - "step": 8774 - }, - { - "epoch": 3.8974017321785475, - "grad_norm": 0.3768583215614216, - "learning_rate": 1.9693990999680167e-08, - "loss": 0.0243, - "step": 8775 - }, - { - "epoch": 3.897845880524095, - "grad_norm": 0.37480588059892506, - "learning_rate": 1.952247839610011e-08, - "loss": 0.0232, - "step": 8776 - }, - { - "epoch": 3.8982900288696425, - "grad_norm": 0.43502480214398537, - "learning_rate": 1.9351714442895077e-08, - "loss": 0.0243, - "step": 8777 - }, - { - "epoch": 3.8987341772151898, - "grad_norm": 0.33331146266153816, - "learning_rate": 1.918169916573398e-08, - "loss": 0.0223, - "step": 8778 - }, - { - "epoch": 3.8991783255607375, - "grad_norm": 0.3283297894096133, - "learning_rate": 1.9012432590172493e-08, - "loss": 0.0182, - "step": 8779 - }, - { - "epoch": 3.899622473906285, - "grad_norm": 0.4613996841914982, - "learning_rate": 1.8843914741654146e-08, - "loss": 0.0264, - "step": 8780 - }, - { - "epoch": 3.900066622251832, - "grad_norm": 0.3728645160784641, - "learning_rate": 1.8676145645511456e-08, - "loss": 0.0219, - "step": 8781 - }, - { - "epoch": 3.9005107705973794, - "grad_norm": 0.367364355087082, - "learning_rate": 1.850912532696092e-08, - "loss": 0.0169, - "step": 8782 - }, - { - "epoch": 3.9009549189429267, - "grad_norm": 0.43782054130287257, - "learning_rate": 1.8342853811110227e-08, - "loss": 0.0244, - "step": 8783 - }, - { - "epoch": 3.9013990672884744, - "grad_norm": 0.46621513501531153, - "learning_rate": 1.817733112295217e-08, - "loss": 0.0237, - "step": 8784 - }, - { - "epoch": 3.9018432156340217, - "grad_norm": 0.3948393185866851, - "learning_rate": 1.8012557287367394e-08, - "loss": 0.0197, - "step": 8785 - }, - { - "epoch": 3.9022873639795694, - "grad_norm": 0.5251739552887689, - "learning_rate": 1.7848532329124978e-08, - "loss": 0.0376, - "step": 8786 - }, - { - "epoch": 3.9027315123251167, - "grad_norm": 0.3262674897809267, - "learning_rate": 1.7685256272879646e-08, - "loss": 0.0189, - "step": 8787 - }, - { - "epoch": 3.903175660670664, - "grad_norm": 0.32400288553912415, - "learning_rate": 1.7522729143174545e-08, - "loss": 0.0185, - "step": 8788 - }, - { - "epoch": 3.9036198090162113, - "grad_norm": 0.36601512988828766, - "learning_rate": 1.7360950964441236e-08, - "loss": 0.0196, - "step": 8789 - }, - { - "epoch": 3.9040639573617586, - "grad_norm": 0.5081663386558348, - "learning_rate": 1.7199921760997494e-08, - "loss": 0.0312, - "step": 8790 - }, - { - "epoch": 3.9045081057073063, - "grad_norm": 0.717337078541349, - "learning_rate": 1.7039641557048402e-08, - "loss": 0.035, - "step": 8791 - }, - { - "epoch": 3.9049522540528536, - "grad_norm": 0.44298957493772145, - "learning_rate": 1.6880110376686353e-08, - "loss": 0.0368, - "step": 8792 - }, - { - "epoch": 3.905396402398401, - "grad_norm": 0.34392561304252184, - "learning_rate": 1.672132824389272e-08, - "loss": 0.0178, - "step": 8793 - }, - { - "epoch": 3.9058405507439486, - "grad_norm": 0.42295010360189583, - "learning_rate": 1.6563295182534524e-08, - "loss": 0.0251, - "step": 8794 - }, - { - "epoch": 3.906284699089496, - "grad_norm": 0.3790549559366509, - "learning_rate": 1.6406011216366647e-08, - "loss": 0.0235, - "step": 8795 - }, - { - "epoch": 3.9067288474350432, - "grad_norm": 0.41052130644928436, - "learning_rate": 1.6249476369031845e-08, - "loss": 0.0186, - "step": 8796 - }, - { - "epoch": 3.9071729957805905, - "grad_norm": 0.4367038611052701, - "learning_rate": 1.6093690664059635e-08, - "loss": 0.0339, - "step": 8797 - }, - { - "epoch": 3.9076171441261383, - "grad_norm": 0.37822957010835845, - "learning_rate": 1.5938654124867394e-08, - "loss": 0.0215, - "step": 8798 - }, - { - "epoch": 3.9080612924716855, - "grad_norm": 0.3875394125371318, - "learning_rate": 1.5784366774760362e-08, - "loss": 0.0217, - "step": 8799 - }, - { - "epoch": 3.908505440817233, - "grad_norm": 0.4043786874203682, - "learning_rate": 1.563082863692944e-08, - "loss": 0.0236, - "step": 8800 - }, - { - "epoch": 3.9089495891627806, - "grad_norm": 0.4592008857137564, - "learning_rate": 1.5478039734455053e-08, - "loss": 0.0229, - "step": 8801 - }, - { - "epoch": 3.909393737508328, - "grad_norm": 0.29589640326829375, - "learning_rate": 1.5326000090303272e-08, - "loss": 0.0162, - "step": 8802 - }, - { - "epoch": 3.909837885853875, - "grad_norm": 0.42576417588302456, - "learning_rate": 1.5174709727328595e-08, - "loss": 0.0359, - "step": 8803 - }, - { - "epoch": 3.9102820341994224, - "grad_norm": 0.4093180434069703, - "learning_rate": 1.5024168668272275e-08, - "loss": 0.0264, - "step": 8804 - }, - { - "epoch": 3.91072618254497, - "grad_norm": 0.46276973652514886, - "learning_rate": 1.4874376935763434e-08, - "loss": 0.0289, - "step": 8805 - }, - { - "epoch": 3.9111703308905175, - "grad_norm": 0.4688883489908628, - "learning_rate": 1.4725334552318504e-08, - "loss": 0.0181, - "step": 8806 - }, - { - "epoch": 3.9116144792360648, - "grad_norm": 0.3039807302776599, - "learning_rate": 1.4577041540340676e-08, - "loss": 0.015, - "step": 8807 - }, - { - "epoch": 3.9120586275816125, - "grad_norm": 0.3741536582807147, - "learning_rate": 1.442949792212045e-08, - "loss": 0.0235, - "step": 8808 - }, - { - "epoch": 3.91250277592716, - "grad_norm": 0.5513005564638584, - "learning_rate": 1.428270371983731e-08, - "loss": 0.0238, - "step": 8809 - }, - { - "epoch": 3.912946924272707, - "grad_norm": 0.37263633834750676, - "learning_rate": 1.4136658955556381e-08, - "loss": 0.0249, - "step": 8810 - }, - { - "epoch": 3.9133910726182544, - "grad_norm": 0.36432340872278923, - "learning_rate": 1.3991363651230106e-08, - "loss": 0.0207, - "step": 8811 - }, - { - "epoch": 3.9138352209638017, - "grad_norm": 0.49111074770917923, - "learning_rate": 1.38468178286999e-08, - "loss": 0.0296, - "step": 8812 - }, - { - "epoch": 3.9142793693093494, - "grad_norm": 0.36763184203730237, - "learning_rate": 1.3703021509692827e-08, - "loss": 0.0262, - "step": 8813 - }, - { - "epoch": 3.9147235176548967, - "grad_norm": 0.4095976827275787, - "learning_rate": 1.3559974715823266e-08, - "loss": 0.0297, - "step": 8814 - }, - { - "epoch": 3.9151676660004444, - "grad_norm": 0.4248033682883231, - "learning_rate": 1.3417677468595125e-08, - "loss": 0.025, - "step": 8815 - }, - { - "epoch": 3.9156118143459917, - "grad_norm": 0.4638888518779016, - "learning_rate": 1.3276129789397407e-08, - "loss": 0.0225, - "step": 8816 - }, - { - "epoch": 3.916055962691539, - "grad_norm": 0.5594291449958435, - "learning_rate": 1.3135331699506426e-08, - "loss": 0.0285, - "step": 8817 - }, - { - "epoch": 3.9165001110370863, - "grad_norm": 0.5046033957125147, - "learning_rate": 1.2995283220087473e-08, - "loss": 0.0329, - "step": 8818 - }, - { - "epoch": 3.9169442593826336, - "grad_norm": 0.4673687167446593, - "learning_rate": 1.2855984372191488e-08, - "loss": 0.0256, - "step": 8819 - }, - { - "epoch": 3.9173884077281813, - "grad_norm": 0.37484792767681735, - "learning_rate": 1.2717435176758386e-08, - "loss": 0.0271, - "step": 8820 - }, - { - "epoch": 3.9178325560737286, - "grad_norm": 0.5235629211435013, - "learning_rate": 1.2579635654613176e-08, - "loss": 0.0353, - "step": 8821 - }, - { - "epoch": 3.918276704419276, - "grad_norm": 0.37938171555742584, - "learning_rate": 1.24425858264704e-08, - "loss": 0.0236, - "step": 8822 - }, - { - "epoch": 3.9187208527648236, - "grad_norm": 0.31241390505019695, - "learning_rate": 1.2306285712931354e-08, - "loss": 0.0175, - "step": 8823 - }, - { - "epoch": 3.919165001110371, - "grad_norm": 0.3890372627975678, - "learning_rate": 1.2170735334482986e-08, - "loss": 0.0323, - "step": 8824 - }, - { - "epoch": 3.919609149455918, - "grad_norm": 0.5060461363110739, - "learning_rate": 1.2035934711501773e-08, - "loss": 0.0213, - "step": 8825 - }, - { - "epoch": 3.9200532978014655, - "grad_norm": 0.4210215040945434, - "learning_rate": 1.1901883864250396e-08, - "loss": 0.0201, - "step": 8826 - }, - { - "epoch": 3.9204974461470132, - "grad_norm": 0.3987058322406445, - "learning_rate": 1.1768582812878848e-08, - "loss": 0.0265, - "step": 8827 - }, - { - "epoch": 3.9209415944925605, - "grad_norm": 0.3908166480296464, - "learning_rate": 1.1636031577424434e-08, - "loss": 0.0204, - "step": 8828 - }, - { - "epoch": 3.921385742838108, - "grad_norm": 0.4484182047406765, - "learning_rate": 1.150423017781177e-08, - "loss": 0.0262, - "step": 8829 - }, - { - "epoch": 3.9218298911836555, - "grad_norm": 0.39560743278914223, - "learning_rate": 1.1373178633853344e-08, - "loss": 0.0301, - "step": 8830 - }, - { - "epoch": 3.922274039529203, - "grad_norm": 0.44427517578613046, - "learning_rate": 1.124287696524784e-08, - "loss": 0.033, - "step": 8831 - }, - { - "epoch": 3.92271818787475, - "grad_norm": 0.48713519459485133, - "learning_rate": 1.111332519158237e-08, - "loss": 0.0293, - "step": 8832 - }, - { - "epoch": 3.9231623362202974, - "grad_norm": 0.38920760124214965, - "learning_rate": 1.0984523332330244e-08, - "loss": 0.0313, - "step": 8833 - }, - { - "epoch": 3.923606484565845, - "grad_norm": 0.39034349333664137, - "learning_rate": 1.0856471406852642e-08, - "loss": 0.0227, - "step": 8834 - }, - { - "epoch": 3.9240506329113924, - "grad_norm": 0.5491500358691043, - "learning_rate": 1.0729169434398613e-08, - "loss": 0.046, - "step": 8835 - }, - { - "epoch": 3.9244947812569397, - "grad_norm": 0.38151095276050573, - "learning_rate": 1.0602617434102846e-08, - "loss": 0.0248, - "step": 8836 - }, - { - "epoch": 3.9249389296024875, - "grad_norm": 0.4149228267505079, - "learning_rate": 1.0476815424989018e-08, - "loss": 0.0212, - "step": 8837 - }, - { - "epoch": 3.9253830779480348, - "grad_norm": 0.4455456792947203, - "learning_rate": 1.0351763425966999e-08, - "loss": 0.0276, - "step": 8838 - }, - { - "epoch": 3.925827226293582, - "grad_norm": 0.38613196310803816, - "learning_rate": 1.022746145583453e-08, - "loss": 0.0224, - "step": 8839 - }, - { - "epoch": 3.9262713746391293, - "grad_norm": 0.4192361066154168, - "learning_rate": 1.0103909533275557e-08, - "loss": 0.0299, - "step": 8840 - }, - { - "epoch": 3.9267155229846766, - "grad_norm": 0.6267500325799473, - "learning_rate": 9.981107676862444e-09, - "loss": 0.0255, - "step": 8841 - }, - { - "epoch": 3.9271596713302244, - "grad_norm": 0.43799023094491873, - "learning_rate": 9.859055905054871e-09, - "loss": 0.0318, - "step": 8842 - }, - { - "epoch": 3.9276038196757717, - "grad_norm": 0.40925366502801463, - "learning_rate": 9.737754236198716e-09, - "loss": 0.0237, - "step": 8843 - }, - { - "epoch": 3.9280479680213194, - "grad_norm": 0.34421339563935327, - "learning_rate": 9.617202688527727e-09, - "loss": 0.02, - "step": 8844 - }, - { - "epoch": 3.9284921163668667, - "grad_norm": 0.36452089076221383, - "learning_rate": 9.497401280162966e-09, - "loss": 0.0272, - "step": 8845 - }, - { - "epoch": 3.928936264712414, - "grad_norm": 0.37821446185307267, - "learning_rate": 9.378350029112248e-09, - "loss": 0.0187, - "step": 8846 - }, - { - "epoch": 3.9293804130579613, - "grad_norm": 0.4769435842108248, - "learning_rate": 9.260048953271817e-09, - "loss": 0.0304, - "step": 8847 - }, - { - "epoch": 3.9298245614035086, - "grad_norm": 0.42416826190385143, - "learning_rate": 9.142498070424111e-09, - "loss": 0.0252, - "step": 8848 - }, - { - "epoch": 3.9302687097490563, - "grad_norm": 0.4780719986413718, - "learning_rate": 9.02569739823833e-09, - "loss": 0.0243, - "step": 8849 - }, - { - "epoch": 3.9307128580946036, - "grad_norm": 0.5064893232422892, - "learning_rate": 8.9096469542721e-09, - "loss": 0.029, - "step": 8850 - }, - { - "epoch": 3.931157006440151, - "grad_norm": 0.472928807421496, - "learning_rate": 8.794346755969795e-09, - "loss": 0.0234, - "step": 8851 - }, - { - "epoch": 3.9316011547856986, - "grad_norm": 0.40013223823456623, - "learning_rate": 8.679796820663111e-09, - "loss": 0.0344, - "step": 8852 - }, - { - "epoch": 3.932045303131246, - "grad_norm": 0.40854096697741643, - "learning_rate": 8.565997165570494e-09, - "loss": 0.0317, - "step": 8853 - }, - { - "epoch": 3.932489451476793, - "grad_norm": 0.6774113697293435, - "learning_rate": 8.452947807798261e-09, - "loss": 0.0265, - "step": 8854 - }, - { - "epoch": 3.9329335998223405, - "grad_norm": 0.47756331700457766, - "learning_rate": 8.340648764339487e-09, - "loss": 0.0245, - "step": 8855 - }, - { - "epoch": 3.933377748167888, - "grad_norm": 0.408015599055906, - "learning_rate": 8.229100052074557e-09, - "loss": 0.0266, - "step": 8856 - }, - { - "epoch": 3.9338218965134355, - "grad_norm": 0.3723159509813085, - "learning_rate": 8.118301687771169e-09, - "loss": 0.0244, - "step": 8857 - }, - { - "epoch": 3.934266044858983, - "grad_norm": 0.44846859119809684, - "learning_rate": 8.008253688084888e-09, - "loss": 0.0199, - "step": 8858 - }, - { - "epoch": 3.9347101932045305, - "grad_norm": 0.3532724419502754, - "learning_rate": 7.898956069556375e-09, - "loss": 0.0167, - "step": 8859 - }, - { - "epoch": 3.935154341550078, - "grad_norm": 0.38377139505288993, - "learning_rate": 7.790408848616371e-09, - "loss": 0.025, - "step": 8860 - }, - { - "epoch": 3.935598489895625, - "grad_norm": 0.47706384193903933, - "learning_rate": 7.682612041580161e-09, - "loss": 0.0298, - "step": 8861 - }, - { - "epoch": 3.9360426382411724, - "grad_norm": 0.47799572814835295, - "learning_rate": 7.575565664652562e-09, - "loss": 0.0232, - "step": 8862 - }, - { - "epoch": 3.9364867865867197, - "grad_norm": 0.42569464791407546, - "learning_rate": 7.469269733923478e-09, - "loss": 0.0284, - "step": 8863 - }, - { - "epoch": 3.9369309349322674, - "grad_norm": 0.49128556583785543, - "learning_rate": 7.363724265371796e-09, - "loss": 0.0322, - "step": 8864 - }, - { - "epoch": 3.9373750832778147, - "grad_norm": 0.4426824222148885, - "learning_rate": 7.258929274862048e-09, - "loss": 0.0244, - "step": 8865 - }, - { - "epoch": 3.9378192316233624, - "grad_norm": 0.4220498782198706, - "learning_rate": 7.154884778147187e-09, - "loss": 0.0381, - "step": 8866 - }, - { - "epoch": 3.9382633799689097, - "grad_norm": 0.40573724780385634, - "learning_rate": 7.051590790866925e-09, - "loss": 0.0208, - "step": 8867 - }, - { - "epoch": 3.938707528314457, - "grad_norm": 0.36192582224558784, - "learning_rate": 6.949047328547731e-09, - "loss": 0.0186, - "step": 8868 - }, - { - "epoch": 3.9391516766600043, - "grad_norm": 0.44081181805446706, - "learning_rate": 6.847254406603943e-09, - "loss": 0.0305, - "step": 8869 - }, - { - "epoch": 3.9395958250055516, - "grad_norm": 0.47409619445972684, - "learning_rate": 6.746212040336653e-09, - "loss": 0.0276, - "step": 8870 - }, - { - "epoch": 3.9400399733510993, - "grad_norm": 0.3907822464278477, - "learning_rate": 6.645920244934267e-09, - "loss": 0.0265, - "step": 8871 - }, - { - "epoch": 3.9404841216966466, - "grad_norm": 0.3877502095250313, - "learning_rate": 6.546379035472505e-09, - "loss": 0.0196, - "step": 8872 - }, - { - "epoch": 3.9409282700421944, - "grad_norm": 0.36359890877779866, - "learning_rate": 6.447588426913287e-09, - "loss": 0.0207, - "step": 8873 - }, - { - "epoch": 3.9413724183877417, - "grad_norm": 0.4046244907134214, - "learning_rate": 6.349548434108066e-09, - "loss": 0.0245, - "step": 8874 - }, - { - "epoch": 3.941816566733289, - "grad_norm": 0.30688221794365883, - "learning_rate": 6.252259071792277e-09, - "loss": 0.0252, - "step": 8875 - }, - { - "epoch": 3.9422607150788362, - "grad_norm": 0.3706992283845886, - "learning_rate": 6.155720354590888e-09, - "loss": 0.0179, - "step": 8876 - }, - { - "epoch": 3.9427048634243835, - "grad_norm": 0.3606911516685271, - "learning_rate": 6.059932297015625e-09, - "loss": 0.0155, - "step": 8877 - }, - { - "epoch": 3.9431490117699313, - "grad_norm": 0.3448866184147424, - "learning_rate": 5.964894913464969e-09, - "loss": 0.0238, - "step": 8878 - }, - { - "epoch": 3.9435931601154786, - "grad_norm": 0.42743435978316485, - "learning_rate": 5.8706082182241605e-09, - "loss": 0.019, - "step": 8879 - }, - { - "epoch": 3.944037308461026, - "grad_norm": 0.38374298902655224, - "learning_rate": 5.777072225466307e-09, - "loss": 0.0232, - "step": 8880 - }, - { - "epoch": 3.9444814568065736, - "grad_norm": 0.3737396272194393, - "learning_rate": 5.684286949251272e-09, - "loss": 0.0218, - "step": 8881 - }, - { - "epoch": 3.944925605152121, - "grad_norm": 0.3717422178901769, - "learning_rate": 5.592252403526788e-09, - "loss": 0.018, - "step": 8882 - }, - { - "epoch": 3.945369753497668, - "grad_norm": 0.4530028394270273, - "learning_rate": 5.500968602126788e-09, - "loss": 0.0311, - "step": 8883 - }, - { - "epoch": 3.9458139018432155, - "grad_norm": 0.34242197949439085, - "learning_rate": 5.410435558773075e-09, - "loss": 0.0229, - "step": 8884 - }, - { - "epoch": 3.946258050188763, - "grad_norm": 0.39126976030692906, - "learning_rate": 5.3206532870742065e-09, - "loss": 0.0253, - "step": 8885 - }, - { - "epoch": 3.9467021985343105, - "grad_norm": 0.43259511059270805, - "learning_rate": 5.231621800525499e-09, - "loss": 0.0222, - "step": 8886 - }, - { - "epoch": 3.9471463468798578, - "grad_norm": 0.5231951089873627, - "learning_rate": 5.143341112510691e-09, - "loss": 0.0432, - "step": 8887 - }, - { - "epoch": 3.9475904952254055, - "grad_norm": 0.4699803462882175, - "learning_rate": 5.055811236299724e-09, - "loss": 0.0242, - "step": 8888 - }, - { - "epoch": 3.948034643570953, - "grad_norm": 0.4043702599111712, - "learning_rate": 4.969032185049294e-09, - "loss": 0.0367, - "step": 8889 - }, - { - "epoch": 3.9484787919165, - "grad_norm": 0.41944855374085116, - "learning_rate": 4.883003971803968e-09, - "loss": 0.0236, - "step": 8890 - }, - { - "epoch": 3.9489229402620474, - "grad_norm": 0.40457657146672565, - "learning_rate": 4.797726609495623e-09, - "loss": 0.0188, - "step": 8891 - }, - { - "epoch": 3.9493670886075947, - "grad_norm": 0.4052288455507649, - "learning_rate": 4.7132001109423396e-09, - "loss": 0.0266, - "step": 8892 - }, - { - "epoch": 3.9498112369531424, - "grad_norm": 0.5765436543403538, - "learning_rate": 4.629424488850065e-09, - "loss": 0.0362, - "step": 8893 - }, - { - "epoch": 3.9502553852986897, - "grad_norm": 0.4676308344783614, - "learning_rate": 4.546399755812059e-09, - "loss": 0.0249, - "step": 8894 - }, - { - "epoch": 3.9506995336442374, - "grad_norm": 0.28808412670077804, - "learning_rate": 4.4641259243077825e-09, - "loss": 0.0137, - "step": 8895 - }, - { - "epoch": 3.9511436819897847, - "grad_norm": 0.47058698890390266, - "learning_rate": 4.382603006705121e-09, - "loss": 0.0319, - "step": 8896 - }, - { - "epoch": 3.951587830335332, - "grad_norm": 0.41572017055736116, - "learning_rate": 4.301831015257607e-09, - "loss": 0.0221, - "step": 8897 - }, - { - "epoch": 3.9520319786808793, - "grad_norm": 0.39457081647577197, - "learning_rate": 4.221809962107193e-09, - "loss": 0.0248, - "step": 8898 - }, - { - "epoch": 3.9524761270264266, - "grad_norm": 0.5337090298271753, - "learning_rate": 4.142539859282035e-09, - "loss": 0.036, - "step": 8899 - }, - { - "epoch": 3.9529202753719743, - "grad_norm": 0.4292452210463301, - "learning_rate": 4.064020718698158e-09, - "loss": 0.0276, - "step": 8900 - }, - { - "epoch": 3.9533644237175216, - "grad_norm": 0.35441583703246865, - "learning_rate": 3.986252552157788e-09, - "loss": 0.0228, - "step": 8901 - }, - { - "epoch": 3.953808572063069, - "grad_norm": 0.4031250072105655, - "learning_rate": 3.909235371351017e-09, - "loss": 0.028, - "step": 8902 - }, - { - "epoch": 3.9542527204086166, - "grad_norm": 0.32842995453695917, - "learning_rate": 3.832969187855251e-09, - "loss": 0.0244, - "step": 8903 - }, - { - "epoch": 3.954696868754164, - "grad_norm": 0.4192351275506548, - "learning_rate": 3.757454013134099e-09, - "loss": 0.0215, - "step": 8904 - }, - { - "epoch": 3.955141017099711, - "grad_norm": 0.4517856702968858, - "learning_rate": 3.682689858539035e-09, - "loss": 0.0308, - "step": 8905 - }, - { - "epoch": 3.9555851654452585, - "grad_norm": 0.4000805272591098, - "learning_rate": 3.608676735308292e-09, - "loss": 0.0257, - "step": 8906 - }, - { - "epoch": 3.9560293137908062, - "grad_norm": 0.39861614075630786, - "learning_rate": 3.5354146545668597e-09, - "loss": 0.0268, - "step": 8907 - }, - { - "epoch": 3.9564734621363535, - "grad_norm": 0.3761331007710691, - "learning_rate": 3.462903627328151e-09, - "loss": 0.0212, - "step": 8908 - }, - { - "epoch": 3.956917610481901, - "grad_norm": 0.552448221226994, - "learning_rate": 3.3911436644912256e-09, - "loss": 0.0272, - "step": 8909 - }, - { - "epoch": 3.9573617588274486, - "grad_norm": 0.40174202534071046, - "learning_rate": 3.3201347768430093e-09, - "loss": 0.0289, - "step": 8910 - }, - { - "epoch": 3.957805907172996, - "grad_norm": 0.3916127999245288, - "learning_rate": 3.249876975057187e-09, - "loss": 0.0251, - "step": 8911 - }, - { - "epoch": 3.958250055518543, - "grad_norm": 0.40161063795102814, - "learning_rate": 3.1803702696947547e-09, - "loss": 0.0201, - "step": 8912 - }, - { - "epoch": 3.9586942038640904, - "grad_norm": 0.4175398836504673, - "learning_rate": 3.111614671204022e-09, - "loss": 0.0236, - "step": 8913 - }, - { - "epoch": 3.959138352209638, - "grad_norm": 0.5695134220357339, - "learning_rate": 3.043610189919499e-09, - "loss": 0.0289, - "step": 8914 - }, - { - "epoch": 3.9595825005551855, - "grad_norm": 0.47624104829660213, - "learning_rate": 2.97635683606412e-09, - "loss": 0.0319, - "step": 8915 - }, - { - "epoch": 3.9600266489007327, - "grad_norm": 0.4611136348425102, - "learning_rate": 2.909854619747021e-09, - "loss": 0.0207, - "step": 8916 - }, - { - "epoch": 3.9604707972462805, - "grad_norm": 0.49713796334737675, - "learning_rate": 2.8441035509640947e-09, - "loss": 0.0273, - "step": 8917 - }, - { - "epoch": 3.9609149455918278, - "grad_norm": 0.5029210915242077, - "learning_rate": 2.7791036395996563e-09, - "loss": 0.0413, - "step": 8918 - }, - { - "epoch": 3.961359093937375, - "grad_norm": 0.4809399615159702, - "learning_rate": 2.7148548954236687e-09, - "loss": 0.0326, - "step": 8919 - }, - { - "epoch": 3.9618032422829224, - "grad_norm": 0.4107926474040777, - "learning_rate": 2.6513573280939618e-09, - "loss": 0.0243, - "step": 8920 - }, - { - "epoch": 3.9622473906284696, - "grad_norm": 0.39038109738475957, - "learning_rate": 2.5886109471551233e-09, - "loss": 0.0251, - "step": 8921 - }, - { - "epoch": 3.9626915389740174, - "grad_norm": 0.328940131759137, - "learning_rate": 2.526615762039608e-09, - "loss": 0.0163, - "step": 8922 - }, - { - "epoch": 3.9631356873195647, - "grad_norm": 0.4003085651392341, - "learning_rate": 2.465371782066073e-09, - "loss": 0.0236, - "step": 8923 - }, - { - "epoch": 3.9635798356651124, - "grad_norm": 0.4454242880030863, - "learning_rate": 2.4048790164404866e-09, - "loss": 0.0232, - "step": 8924 - }, - { - "epoch": 3.9640239840106597, - "grad_norm": 0.34629533945323293, - "learning_rate": 2.3451374742555764e-09, - "loss": 0.0177, - "step": 8925 - }, - { - "epoch": 3.964468132356207, - "grad_norm": 0.4133618406412272, - "learning_rate": 2.2861471644919363e-09, - "loss": 0.0191, - "step": 8926 - }, - { - "epoch": 3.9649122807017543, - "grad_norm": 0.42649199370055857, - "learning_rate": 2.2279080960163625e-09, - "loss": 0.027, - "step": 8927 - }, - { - "epoch": 3.9653564290473016, - "grad_norm": 0.3956675634414063, - "learning_rate": 2.170420277584073e-09, - "loss": 0.0263, - "step": 8928 - }, - { - "epoch": 3.9658005773928493, - "grad_norm": 0.4346162397069076, - "learning_rate": 2.1136837178353797e-09, - "loss": 0.0258, - "step": 8929 - }, - { - "epoch": 3.9662447257383966, - "grad_norm": 0.4508730863193407, - "learning_rate": 2.05769842529957e-09, - "loss": 0.0355, - "step": 8930 - }, - { - "epoch": 3.966688874083944, - "grad_norm": 0.44677554596928787, - "learning_rate": 2.002464408392135e-09, - "loss": 0.0245, - "step": 8931 - }, - { - "epoch": 3.9671330224294916, - "grad_norm": 0.3494473501914632, - "learning_rate": 1.9479816754147672e-09, - "loss": 0.0231, - "step": 8932 - }, - { - "epoch": 3.967577170775039, - "grad_norm": 0.3733325402809653, - "learning_rate": 1.894250234558137e-09, - "loss": 0.0177, - "step": 8933 - }, - { - "epoch": 3.968021319120586, - "grad_norm": 0.42732301014072877, - "learning_rate": 1.8412700938985618e-09, - "loss": 0.0245, - "step": 8934 - }, - { - "epoch": 3.9684654674661335, - "grad_norm": 0.5850533107887496, - "learning_rate": 1.7890412614002262e-09, - "loss": 0.0173, - "step": 8935 - }, - { - "epoch": 3.9689096158116812, - "grad_norm": 0.42327533255156274, - "learning_rate": 1.7375637449135174e-09, - "loss": 0.0217, - "step": 8936 - }, - { - "epoch": 3.9693537641572285, - "grad_norm": 0.45340718199873464, - "learning_rate": 1.68683755217669e-09, - "loss": 0.0329, - "step": 8937 - }, - { - "epoch": 3.969797912502776, - "grad_norm": 0.43589126986170634, - "learning_rate": 1.6368626908147556e-09, - "loss": 0.0294, - "step": 8938 - }, - { - "epoch": 3.9702420608483235, - "grad_norm": 0.37544409700894205, - "learning_rate": 1.5876391683400383e-09, - "loss": 0.0356, - "step": 8939 - }, - { - "epoch": 3.970686209193871, - "grad_norm": 0.3711146055897954, - "learning_rate": 1.5391669921505093e-09, - "loss": 0.0304, - "step": 8940 - }, - { - "epoch": 3.971130357539418, - "grad_norm": 0.40437917019760156, - "learning_rate": 1.4914461695336723e-09, - "loss": 0.0186, - "step": 8941 - }, - { - "epoch": 3.9715745058849654, - "grad_norm": 0.36049022928808616, - "learning_rate": 1.4444767076626787e-09, - "loss": 0.0221, - "step": 8942 - }, - { - "epoch": 3.972018654230513, - "grad_norm": 0.49140822525474664, - "learning_rate": 1.3982586135968813e-09, - "loss": 0.0267, - "step": 8943 - }, - { - "epoch": 3.9724628025760604, - "grad_norm": 0.5302909084020938, - "learning_rate": 1.3527918942840556e-09, - "loss": 0.041, - "step": 8944 - }, - { - "epoch": 3.9729069509216077, - "grad_norm": 0.5476906455774254, - "learning_rate": 1.3080765565592902e-09, - "loss": 0.0273, - "step": 8945 - }, - { - "epoch": 3.9733510992671555, - "grad_norm": 0.3333489521628196, - "learning_rate": 1.2641126071433197e-09, - "loss": 0.0268, - "step": 8946 - }, - { - "epoch": 3.9737952476127028, - "grad_norm": 0.39064699714492984, - "learning_rate": 1.2209000526447469e-09, - "loss": 0.0204, - "step": 8947 - }, - { - "epoch": 3.97423939595825, - "grad_norm": 0.5569247363465006, - "learning_rate": 1.1784388995594864e-09, - "loss": 0.0321, - "step": 8948 - }, - { - "epoch": 3.9746835443037973, - "grad_norm": 0.4766119680419059, - "learning_rate": 1.1367291542702107e-09, - "loss": 0.0284, - "step": 8949 - }, - { - "epoch": 3.9751276926493446, - "grad_norm": 0.5034524006899211, - "learning_rate": 1.0957708230457942e-09, - "loss": 0.0271, - "step": 8950 - }, - { - "epoch": 3.9755718409948924, - "grad_norm": 0.3381689403401387, - "learning_rate": 1.0555639120440887e-09, - "loss": 0.0201, - "step": 8951 - }, - { - "epoch": 3.9760159893404396, - "grad_norm": 0.46151159178049117, - "learning_rate": 1.0161084273080378e-09, - "loss": 0.0296, - "step": 8952 - }, - { - "epoch": 3.9764601376859874, - "grad_norm": 0.4521959026876067, - "learning_rate": 9.774043747690087e-10, - "loss": 0.0311, - "step": 8953 - }, - { - "epoch": 3.9769042860315347, - "grad_norm": 0.48398453476246417, - "learning_rate": 9.394517602445697e-10, - "loss": 0.0309, - "step": 8954 - }, - { - "epoch": 3.977348434377082, - "grad_norm": 0.3856389581537802, - "learning_rate": 9.022505894396017e-10, - "loss": 0.0216, - "step": 8955 - }, - { - "epoch": 3.9777925827226293, - "grad_norm": 0.3450747091674961, - "learning_rate": 8.658008679462981e-10, - "loss": 0.0157, - "step": 8956 - }, - { - "epoch": 3.9782367310681765, - "grad_norm": 0.3798481275168741, - "learning_rate": 8.301026012436098e-10, - "loss": 0.0197, - "step": 8957 - }, - { - "epoch": 3.9786808794137243, - "grad_norm": 0.4175802111530846, - "learning_rate": 7.951557946972444e-10, - "loss": 0.0254, - "step": 8958 - }, - { - "epoch": 3.9791250277592716, - "grad_norm": 0.3948056525475575, - "learning_rate": 7.609604535613324e-10, - "loss": 0.0198, - "step": 8959 - }, - { - "epoch": 3.979569176104819, - "grad_norm": 0.5080884160142413, - "learning_rate": 7.275165829745412e-10, - "loss": 0.0204, - "step": 8960 - }, - { - "epoch": 3.9800133244503666, - "grad_norm": 0.3995310763869156, - "learning_rate": 6.948241879650708e-10, - "loss": 0.0296, - "step": 8961 - }, - { - "epoch": 3.980457472795914, - "grad_norm": 0.37430290093081664, - "learning_rate": 6.628832734467683e-10, - "loss": 0.0238, - "step": 8962 - }, - { - "epoch": 3.980901621141461, - "grad_norm": 0.43982737443124764, - "learning_rate": 6.316938442213483e-10, - "loss": 0.0209, - "step": 8963 - }, - { - "epoch": 3.9813457694870085, - "grad_norm": 0.3775182755482849, - "learning_rate": 6.012559049761723e-10, - "loss": 0.025, - "step": 8964 - }, - { - "epoch": 3.981789917832556, - "grad_norm": 0.39039801224690585, - "learning_rate": 5.715694602875799e-10, - "loss": 0.0207, - "step": 8965 - }, - { - "epoch": 3.9822340661781035, - "grad_norm": 0.4199762038802494, - "learning_rate": 5.426345146175571e-10, - "loss": 0.0191, - "step": 8966 - }, - { - "epoch": 3.982678214523651, - "grad_norm": 0.38333717972878917, - "learning_rate": 5.144510723154028e-10, - "loss": 0.0284, - "step": 8967 - }, - { - "epoch": 3.9831223628691985, - "grad_norm": 0.4104443185350045, - "learning_rate": 4.87019137617728e-10, - "loss": 0.0189, - "step": 8968 - }, - { - "epoch": 3.983566511214746, - "grad_norm": 0.5433078848500579, - "learning_rate": 4.60338714647901e-10, - "loss": 0.0302, - "step": 8969 - }, - { - "epoch": 3.984010659560293, - "grad_norm": 0.3909125068421461, - "learning_rate": 4.3440980741660254e-10, - "loss": 0.0238, - "step": 8970 - }, - { - "epoch": 3.9844548079058404, - "grad_norm": 0.41307839626778975, - "learning_rate": 4.092324198212705e-10, - "loss": 0.0235, - "step": 8971 - }, - { - "epoch": 3.984898956251388, - "grad_norm": 0.321331298647483, - "learning_rate": 3.848065556461e-10, - "loss": 0.0236, - "step": 8972 - }, - { - "epoch": 3.9853431045969354, - "grad_norm": 0.3312110185029968, - "learning_rate": 3.6113221856370896e-10, - "loss": 0.0219, - "step": 8973 - }, - { - "epoch": 3.9857872529424827, - "grad_norm": 0.4192103414685381, - "learning_rate": 3.382094121318069e-10, - "loss": 0.0266, - "step": 8974 - }, - { - "epoch": 3.9862314012880304, - "grad_norm": 0.35796707903705355, - "learning_rate": 3.160381397965262e-10, - "loss": 0.0177, - "step": 8975 - }, - { - "epoch": 3.9866755496335777, - "grad_norm": 0.3908253098336717, - "learning_rate": 2.946184048902012e-10, - "loss": 0.0235, - "step": 8976 - }, - { - "epoch": 3.987119697979125, - "grad_norm": 0.42457467356965617, - "learning_rate": 2.7395021063303385e-10, - "loss": 0.0397, - "step": 8977 - }, - { - "epoch": 3.9875638463246723, - "grad_norm": 0.4884888876304642, - "learning_rate": 2.540335601319832e-10, - "loss": 0.0228, - "step": 8978 - }, - { - "epoch": 3.9880079946702196, - "grad_norm": 0.45397167330183835, - "learning_rate": 2.348684563802106e-10, - "loss": 0.0215, - "step": 8979 - }, - { - "epoch": 3.9884521430157673, - "grad_norm": 0.48150914332233963, - "learning_rate": 2.1645490225929989e-10, - "loss": 0.0207, - "step": 8980 - }, - { - "epoch": 3.9888962913613146, - "grad_norm": 0.518549165106476, - "learning_rate": 1.9879290053592682e-10, - "loss": 0.0276, - "step": 8981 - }, - { - "epoch": 3.9893404397068624, - "grad_norm": 0.3673159296164483, - "learning_rate": 1.8188245386629998e-10, - "loss": 0.0188, - "step": 8982 - }, - { - "epoch": 3.9897845880524097, - "grad_norm": 0.34298096212254114, - "learning_rate": 1.657235647917199e-10, - "loss": 0.0169, - "step": 8983 - }, - { - "epoch": 3.990228736397957, - "grad_norm": 0.4555639884295353, - "learning_rate": 1.5031623574135456e-10, - "loss": 0.0288, - "step": 8984 - }, - { - "epoch": 3.9906728847435042, - "grad_norm": 0.3767875220229666, - "learning_rate": 1.3566046903057405e-10, - "loss": 0.0245, - "step": 8985 - }, - { - "epoch": 3.9911170330890515, - "grad_norm": 0.3435105743106647, - "learning_rate": 1.217562668631711e-10, - "loss": 0.0192, - "step": 8986 - }, - { - "epoch": 3.9915611814345993, - "grad_norm": 0.5264624768016674, - "learning_rate": 1.0860363132914053e-10, - "loss": 0.0215, - "step": 8987 - }, - { - "epoch": 3.9920053297801465, - "grad_norm": 0.4654928703557639, - "learning_rate": 9.620256440467934e-11, - "loss": 0.0253, - "step": 8988 - }, - { - "epoch": 3.992449478125694, - "grad_norm": 0.42855294671291005, - "learning_rate": 8.455306795496221e-11, - "loss": 0.0322, - "step": 8989 - }, - { - "epoch": 3.9928936264712416, - "grad_norm": 0.33395128737336627, - "learning_rate": 7.365514373081084e-11, - "loss": 0.0159, - "step": 8990 - }, - { - "epoch": 3.993337774816789, - "grad_norm": 0.7884624510448948, - "learning_rate": 6.35087933698042e-11, - "loss": 0.0429, - "step": 8991 - }, - { - "epoch": 3.993781923162336, - "grad_norm": 0.4471065505434232, - "learning_rate": 5.411401839738872e-11, - "loss": 0.032, - "step": 8992 - }, - { - "epoch": 3.9942260715078834, - "grad_norm": 0.3926302149338509, - "learning_rate": 4.547082022632321e-11, - "loss": 0.0269, - "step": 8993 - }, - { - "epoch": 3.994670219853431, - "grad_norm": 0.35432194327597677, - "learning_rate": 3.7579200155013483e-11, - "loss": 0.024, - "step": 8994 - }, - { - "epoch": 3.9951143681989785, - "grad_norm": 0.39044461483214893, - "learning_rate": 3.043915937028796e-11, - "loss": 0.025, - "step": 8995 - }, - { - "epoch": 3.9955585165445258, - "grad_norm": 0.3981648535488869, - "learning_rate": 2.4050698944622087e-11, - "loss": 0.0221, - "step": 8996 - }, - { - "epoch": 3.9960026648900735, - "grad_norm": 0.42669487275827683, - "learning_rate": 1.8413819839468993e-11, - "loss": 0.0231, - "step": 8997 - }, - { - "epoch": 3.996446813235621, - "grad_norm": 0.33538547874165714, - "learning_rate": 1.3528522901373741e-11, - "loss": 0.0253, - "step": 8998 - }, - { - "epoch": 3.996890961581168, - "grad_norm": 0.4935415446009399, - "learning_rate": 9.394808864748861e-12, - "loss": 0.0315, - "step": 8999 - }, - { - "epoch": 3.9973351099267154, - "grad_norm": 0.37018534448771545, - "learning_rate": 6.012678351319245e-12, - "loss": 0.0243, - "step": 9000 - }, - { - "epoch": 3.9977792582722627, - "grad_norm": 0.38372140221372397, - "learning_rate": 3.3821318690119286e-12, - "loss": 0.021, - "step": 9001 - }, - { - "epoch": 3.9982234066178104, - "grad_norm": 0.3907968845917238, - "learning_rate": 1.503169813621419e-12, - "loss": 0.0222, - "step": 9002 - }, - { - "epoch": 3.9986675549633577, - "grad_norm": 0.4131506866019968, - "learning_rate": 3.757924676994762e-13, - "loss": 0.0311, - "step": 9003 - }, - { - "epoch": 3.9991117033089054, - "grad_norm": 0.47497534756496, - "learning_rate": 0.0, - "loss": 0.0269, - "step": 9004 - }, - { - "epoch": 3.9991117033089054, - "eval_loss": 0.03634560480713844, - "eval_runtime": 402.6359, - "eval_samples_per_second": 37.667, - "eval_steps_per_second": 1.177, - "step": 9004 + "epoch": 4.0, + "eval_loss": 0.04085389897227287, + "eval_runtime": 77.9654, + "eval_samples_per_second": 194.522, + "eval_steps_per_second": 3.04, + "step": 4504 }, { - "epoch": 3.9991117033089054, - "step": 9004, - "total_flos": 1010387024977920.0, - "train_loss": 0.05092642861352228, - "train_runtime": 98184.5209, - "train_samples_per_second": 11.738, - "train_steps_per_second": 0.092 + "epoch": 4.0, + "step": 4504, + "total_flos": 1010769106173952.0, + "train_loss": 0.055861650320550765, + "train_runtime": 18499.8374, + "train_samples_per_second": 62.299, + "train_steps_per_second": 0.243 } ], "logging_steps": 1, - "max_steps": 9004, + "max_steps": 4504, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, @@ -63095,7 +31595,7 @@ "attributes": {} } }, - "total_flos": 1010387024977920.0, + "total_flos": 1010769106173952.0, "train_batch_size": 16, "trial_name": null, "trial_params": null