{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999499081649691, "eval_steps": 500, "global_step": 8982, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003339455668725998, "grad_norm": 18.08110072524588, "learning_rate": 1.1123470522803115e-08, "loss": 1.1965, "step": 1 }, { "epoch": 0.0006678911337451996, "grad_norm": 19.552894943582405, "learning_rate": 2.224694104560623e-08, "loss": 1.2467, "step": 2 }, { "epoch": 0.0010018367006177993, "grad_norm": 18.45573642932544, "learning_rate": 3.337041156840935e-08, "loss": 1.1939, "step": 3 }, { "epoch": 0.0013357822674903992, "grad_norm": 19.51921878235958, "learning_rate": 4.449388209121246e-08, "loss": 1.2352, "step": 4 }, { "epoch": 0.0016697278343629988, "grad_norm": 19.438335705136836, "learning_rate": 5.561735261401558e-08, "loss": 1.2511, "step": 5 }, { "epoch": 0.0020036734012355987, "grad_norm": 19.874015492468935, "learning_rate": 6.67408231368187e-08, "loss": 1.2588, "step": 6 }, { "epoch": 0.0023376189681081983, "grad_norm": 18.81666516443105, "learning_rate": 7.78642936596218e-08, "loss": 1.1735, "step": 7 }, { "epoch": 0.0026715645349807983, "grad_norm": 18.796402872724705, "learning_rate": 8.898776418242492e-08, "loss": 1.1907, "step": 8 }, { "epoch": 0.003005510101853398, "grad_norm": 18.406989033693506, "learning_rate": 1.0011123470522804e-07, "loss": 1.1884, "step": 9 }, { "epoch": 0.0033394556687259976, "grad_norm": 17.666473890858295, "learning_rate": 1.1123470522803116e-07, "loss": 1.156, "step": 10 }, { "epoch": 0.0036734012355985972, "grad_norm": 18.312626054013474, "learning_rate": 1.2235817575083427e-07, "loss": 1.1896, "step": 11 }, { "epoch": 0.004007346802471197, "grad_norm": 19.21746711412166, "learning_rate": 1.334816462736374e-07, "loss": 1.2486, "step": 12 }, { "epoch": 0.004341292369343797, "grad_norm": 18.645287861138588, "learning_rate": 1.446051167964405e-07, "loss": 1.2237, "step": 13 }, { "epoch": 0.0046752379362163966, "grad_norm": 18.389360599838056, "learning_rate": 1.557285873192436e-07, "loss": 1.174, "step": 14 }, { "epoch": 0.005009183503088996, "grad_norm": 18.094045001801177, "learning_rate": 1.6685205784204674e-07, "loss": 1.2052, "step": 15 }, { "epoch": 0.005343129069961597, "grad_norm": 18.4374027292432, "learning_rate": 1.7797552836484985e-07, "loss": 1.2309, "step": 16 }, { "epoch": 0.005677074636834196, "grad_norm": 17.289354366652493, "learning_rate": 1.8909899888765295e-07, "loss": 1.1751, "step": 17 }, { "epoch": 0.006011020203706796, "grad_norm": 18.24833811594456, "learning_rate": 2.0022246941045608e-07, "loss": 1.2278, "step": 18 }, { "epoch": 0.006344965770579396, "grad_norm": 17.053322563470108, "learning_rate": 2.113459399332592e-07, "loss": 1.1721, "step": 19 }, { "epoch": 0.006678911337451995, "grad_norm": 17.129792113985143, "learning_rate": 2.2246941045606232e-07, "loss": 1.1354, "step": 20 }, { "epoch": 0.007012856904324595, "grad_norm": 13.47458828617283, "learning_rate": 2.3359288097886543e-07, "loss": 1.0349, "step": 21 }, { "epoch": 0.0073468024711971945, "grad_norm": 14.161487265468253, "learning_rate": 2.4471635150166853e-07, "loss": 1.12, "step": 22 }, { "epoch": 0.007680748038069795, "grad_norm": 13.161488444055747, "learning_rate": 2.5583982202447166e-07, "loss": 1.0514, "step": 23 }, { "epoch": 0.008014693604942395, "grad_norm": 14.570394994187327, "learning_rate": 2.669632925472748e-07, "loss": 1.123, "step": 24 }, { "epoch": 0.008348639171814994, "grad_norm": 14.666246390111493, "learning_rate": 2.780867630700779e-07, "loss": 1.1715, "step": 25 }, { "epoch": 0.008682584738687594, "grad_norm": 14.577788314814974, "learning_rate": 2.89210233592881e-07, "loss": 1.1681, "step": 26 }, { "epoch": 0.009016530305560193, "grad_norm": 13.214866163706802, "learning_rate": 3.003337041156841e-07, "loss": 1.0505, "step": 27 }, { "epoch": 0.009350475872432793, "grad_norm": 10.769628248339709, "learning_rate": 3.114571746384872e-07, "loss": 0.9765, "step": 28 }, { "epoch": 0.009684421439305393, "grad_norm": 10.098307468296536, "learning_rate": 3.2258064516129035e-07, "loss": 0.9672, "step": 29 }, { "epoch": 0.010018367006177992, "grad_norm": 10.418771316687504, "learning_rate": 3.337041156840935e-07, "loss": 0.9561, "step": 30 }, { "epoch": 0.010352312573050592, "grad_norm": 8.978717627730614, "learning_rate": 3.4482758620689656e-07, "loss": 0.8463, "step": 31 }, { "epoch": 0.010686258139923193, "grad_norm": 10.286653113951482, "learning_rate": 3.559510567296997e-07, "loss": 0.9665, "step": 32 }, { "epoch": 0.011020203706795793, "grad_norm": 10.462353476788321, "learning_rate": 3.670745272525028e-07, "loss": 0.9434, "step": 33 }, { "epoch": 0.011354149273668393, "grad_norm": 9.714490170332663, "learning_rate": 3.781979977753059e-07, "loss": 0.9324, "step": 34 }, { "epoch": 0.011688094840540992, "grad_norm": 10.083825623996413, "learning_rate": 3.8932146829810904e-07, "loss": 0.9361, "step": 35 }, { "epoch": 0.012022040407413592, "grad_norm": 9.694323120504304, "learning_rate": 4.0044493882091217e-07, "loss": 0.9045, "step": 36 }, { "epoch": 0.012355985974286192, "grad_norm": 9.10401434831957, "learning_rate": 4.115684093437153e-07, "loss": 0.8305, "step": 37 }, { "epoch": 0.012689931541158791, "grad_norm": 8.912869023969314, "learning_rate": 4.226918798665184e-07, "loss": 0.7016, "step": 38 }, { "epoch": 0.01302387710803139, "grad_norm": 9.949234492145816, "learning_rate": 4.338153503893215e-07, "loss": 0.6717, "step": 39 }, { "epoch": 0.01335782267490399, "grad_norm": 9.107965901272914, "learning_rate": 4.4493882091212464e-07, "loss": 0.6031, "step": 40 }, { "epoch": 0.01369176824177659, "grad_norm": 8.999748095069597, "learning_rate": 4.560622914349278e-07, "loss": 0.6396, "step": 41 }, { "epoch": 0.01402571380864919, "grad_norm": 8.795599856241033, "learning_rate": 4.6718576195773085e-07, "loss": 0.5788, "step": 42 }, { "epoch": 0.01435965937552179, "grad_norm": 9.433146095223425, "learning_rate": 4.783092324805339e-07, "loss": 0.6102, "step": 43 }, { "epoch": 0.014693604942394389, "grad_norm": 7.960008599611662, "learning_rate": 4.894327030033371e-07, "loss": 0.5373, "step": 44 }, { "epoch": 0.01502755050926699, "grad_norm": 7.366530046034347, "learning_rate": 5.005561735261402e-07, "loss": 0.5052, "step": 45 }, { "epoch": 0.01536149607613959, "grad_norm": 6.363036907340728, "learning_rate": 5.116796440489433e-07, "loss": 0.4649, "step": 46 }, { "epoch": 0.01569544164301219, "grad_norm": 5.556475848189961, "learning_rate": 5.228031145717465e-07, "loss": 0.4856, "step": 47 }, { "epoch": 0.01602938720988479, "grad_norm": 3.83682323248533, "learning_rate": 5.339265850945496e-07, "loss": 0.4527, "step": 48 }, { "epoch": 0.01636333277675739, "grad_norm": 3.4969259770523546, "learning_rate": 5.450500556173527e-07, "loss": 0.4612, "step": 49 }, { "epoch": 0.01669727834362999, "grad_norm": 2.9909282013080722, "learning_rate": 5.561735261401558e-07, "loss": 0.4505, "step": 50 }, { "epoch": 0.017031223910502588, "grad_norm": 2.7014984810055025, "learning_rate": 5.672969966629589e-07, "loss": 0.4161, "step": 51 }, { "epoch": 0.017365169477375188, "grad_norm": 2.6387827313567604, "learning_rate": 5.78420467185762e-07, "loss": 0.4301, "step": 52 }, { "epoch": 0.017699115044247787, "grad_norm": 2.4645004633554577, "learning_rate": 5.89543937708565e-07, "loss": 0.4103, "step": 53 }, { "epoch": 0.018033060611120387, "grad_norm": 2.148956503468746, "learning_rate": 6.006674082313682e-07, "loss": 0.4049, "step": 54 }, { "epoch": 0.018367006177992987, "grad_norm": 2.0812867730472076, "learning_rate": 6.117908787541713e-07, "loss": 0.4195, "step": 55 }, { "epoch": 0.018700951744865586, "grad_norm": 1.97350230594877, "learning_rate": 6.229143492769744e-07, "loss": 0.3871, "step": 56 }, { "epoch": 0.019034897311738186, "grad_norm": 2.0261853192304873, "learning_rate": 6.340378197997777e-07, "loss": 0.3907, "step": 57 }, { "epoch": 0.019368842878610786, "grad_norm": 1.7884536259702397, "learning_rate": 6.451612903225807e-07, "loss": 0.3725, "step": 58 }, { "epoch": 0.019702788445483385, "grad_norm": 1.7578524964487043, "learning_rate": 6.562847608453838e-07, "loss": 0.364, "step": 59 }, { "epoch": 0.020036734012355985, "grad_norm": 1.575642860109224, "learning_rate": 6.67408231368187e-07, "loss": 0.3917, "step": 60 }, { "epoch": 0.020370679579228584, "grad_norm": 1.4422891613432185, "learning_rate": 6.785317018909901e-07, "loss": 0.3494, "step": 61 }, { "epoch": 0.020704625146101184, "grad_norm": 1.522233247296773, "learning_rate": 6.896551724137931e-07, "loss": 0.3539, "step": 62 }, { "epoch": 0.021038570712973784, "grad_norm": 1.6394568552150803, "learning_rate": 7.007786429365964e-07, "loss": 0.3686, "step": 63 }, { "epoch": 0.021372516279846387, "grad_norm": 1.4992220967488337, "learning_rate": 7.119021134593994e-07, "loss": 0.3675, "step": 64 }, { "epoch": 0.021706461846718986, "grad_norm": 1.7873771972580739, "learning_rate": 7.230255839822026e-07, "loss": 0.3792, "step": 65 }, { "epoch": 0.022040407413591586, "grad_norm": 1.5301039424791039, "learning_rate": 7.341490545050057e-07, "loss": 0.3561, "step": 66 }, { "epoch": 0.022374352980464186, "grad_norm": 1.462805387783217, "learning_rate": 7.452725250278087e-07, "loss": 0.363, "step": 67 }, { "epoch": 0.022708298547336785, "grad_norm": 1.4879055388402056, "learning_rate": 7.563959955506118e-07, "loss": 0.3724, "step": 68 }, { "epoch": 0.023042244114209385, "grad_norm": 1.4681875012051924, "learning_rate": 7.675194660734149e-07, "loss": 0.3651, "step": 69 }, { "epoch": 0.023376189681081985, "grad_norm": 1.547854625804747, "learning_rate": 7.786429365962181e-07, "loss": 0.3816, "step": 70 }, { "epoch": 0.023710135247954584, "grad_norm": 1.7263953674720272, "learning_rate": 7.897664071190211e-07, "loss": 0.3699, "step": 71 }, { "epoch": 0.024044080814827184, "grad_norm": 1.2751997064298404, "learning_rate": 8.008898776418243e-07, "loss": 0.3418, "step": 72 }, { "epoch": 0.024378026381699783, "grad_norm": 1.4359100529226376, "learning_rate": 8.120133481646274e-07, "loss": 0.3693, "step": 73 }, { "epoch": 0.024711971948572383, "grad_norm": 1.2749760525328362, "learning_rate": 8.231368186874306e-07, "loss": 0.3323, "step": 74 }, { "epoch": 0.025045917515444983, "grad_norm": 1.2855735849326455, "learning_rate": 8.342602892102336e-07, "loss": 0.3407, "step": 75 }, { "epoch": 0.025379863082317582, "grad_norm": 1.2508196970444088, "learning_rate": 8.453837597330368e-07, "loss": 0.3518, "step": 76 }, { "epoch": 0.025713808649190182, "grad_norm": 1.406994531333698, "learning_rate": 8.565072302558399e-07, "loss": 0.3381, "step": 77 }, { "epoch": 0.02604775421606278, "grad_norm": 1.4630422438671775, "learning_rate": 8.67630700778643e-07, "loss": 0.3213, "step": 78 }, { "epoch": 0.02638169978293538, "grad_norm": 1.4111611274728295, "learning_rate": 8.78754171301446e-07, "loss": 0.348, "step": 79 }, { "epoch": 0.02671564534980798, "grad_norm": 1.2796063016816839, "learning_rate": 8.898776418242493e-07, "loss": 0.3248, "step": 80 }, { "epoch": 0.02704959091668058, "grad_norm": 1.5090068302982718, "learning_rate": 9.010011123470523e-07, "loss": 0.3559, "step": 81 }, { "epoch": 0.02738353648355318, "grad_norm": 1.2343790950745581, "learning_rate": 9.121245828698556e-07, "loss": 0.3082, "step": 82 }, { "epoch": 0.02771748205042578, "grad_norm": 1.1891735311805842, "learning_rate": 9.232480533926586e-07, "loss": 0.3116, "step": 83 }, { "epoch": 0.02805142761729838, "grad_norm": 1.3062993215992929, "learning_rate": 9.343715239154617e-07, "loss": 0.3292, "step": 84 }, { "epoch": 0.02838537318417098, "grad_norm": 1.2819392779749308, "learning_rate": 9.454949944382647e-07, "loss": 0.3395, "step": 85 }, { "epoch": 0.02871931875104358, "grad_norm": 1.4416842302356845, "learning_rate": 9.566184649610679e-07, "loss": 0.336, "step": 86 }, { "epoch": 0.02905326431791618, "grad_norm": 1.3987710774933, "learning_rate": 9.67741935483871e-07, "loss": 0.3063, "step": 87 }, { "epoch": 0.029387209884788778, "grad_norm": 1.4418014348260515, "learning_rate": 9.788654060066741e-07, "loss": 0.3607, "step": 88 }, { "epoch": 0.029721155451661378, "grad_norm": 1.585329350183969, "learning_rate": 9.899888765294773e-07, "loss": 0.3271, "step": 89 }, { "epoch": 0.03005510101853398, "grad_norm": 1.6528669573974764, "learning_rate": 1.0011123470522804e-06, "loss": 0.3312, "step": 90 }, { "epoch": 0.03038904658540658, "grad_norm": 1.5673416929359354, "learning_rate": 1.0122358175750835e-06, "loss": 0.3362, "step": 91 }, { "epoch": 0.03072299215227918, "grad_norm": 1.3538512444375355, "learning_rate": 1.0233592880978867e-06, "loss": 0.3103, "step": 92 }, { "epoch": 0.03105693771915178, "grad_norm": 1.266414511944769, "learning_rate": 1.0344827586206898e-06, "loss": 0.3476, "step": 93 }, { "epoch": 0.03139088328602438, "grad_norm": 1.2381813473063292, "learning_rate": 1.045606229143493e-06, "loss": 0.322, "step": 94 }, { "epoch": 0.03172482885289698, "grad_norm": 1.3517795579781238, "learning_rate": 1.056729699666296e-06, "loss": 0.3428, "step": 95 }, { "epoch": 0.03205877441976958, "grad_norm": 1.2503553987008789, "learning_rate": 1.0678531701890992e-06, "loss": 0.3324, "step": 96 }, { "epoch": 0.03239271998664218, "grad_norm": 1.2235838797074612, "learning_rate": 1.0789766407119021e-06, "loss": 0.3077, "step": 97 }, { "epoch": 0.03272666555351478, "grad_norm": 1.2342368229237566, "learning_rate": 1.0901001112347055e-06, "loss": 0.3123, "step": 98 }, { "epoch": 0.03306061112038738, "grad_norm": 1.3993217559841238, "learning_rate": 1.1012235817575084e-06, "loss": 0.3157, "step": 99 }, { "epoch": 0.03339455668725998, "grad_norm": 1.187047636224087, "learning_rate": 1.1123470522803115e-06, "loss": 0.3115, "step": 100 }, { "epoch": 0.03372850225413258, "grad_norm": 1.1888949396982949, "learning_rate": 1.1234705228031146e-06, "loss": 0.314, "step": 101 }, { "epoch": 0.034062447821005176, "grad_norm": 1.3282629053014763, "learning_rate": 1.1345939933259178e-06, "loss": 0.3134, "step": 102 }, { "epoch": 0.034396393387877776, "grad_norm": 1.409494192960795, "learning_rate": 1.145717463848721e-06, "loss": 0.343, "step": 103 }, { "epoch": 0.034730338954750375, "grad_norm": 1.3955053377524733, "learning_rate": 1.156840934371524e-06, "loss": 0.3294, "step": 104 }, { "epoch": 0.035064284521622975, "grad_norm": 1.1921009474473023, "learning_rate": 1.1679644048943272e-06, "loss": 0.2927, "step": 105 }, { "epoch": 0.035398230088495575, "grad_norm": 1.342026976228603, "learning_rate": 1.17908787541713e-06, "loss": 0.3005, "step": 106 }, { "epoch": 0.035732175655368174, "grad_norm": 1.4229030988806093, "learning_rate": 1.1902113459399334e-06, "loss": 0.333, "step": 107 }, { "epoch": 0.036066121222240774, "grad_norm": 1.1373972993813148, "learning_rate": 1.2013348164627363e-06, "loss": 0.3059, "step": 108 }, { "epoch": 0.036400066789113374, "grad_norm": 1.3470188636614417, "learning_rate": 1.2124582869855397e-06, "loss": 0.3168, "step": 109 }, { "epoch": 0.03673401235598597, "grad_norm": 1.2640906475959344, "learning_rate": 1.2235817575083426e-06, "loss": 0.3064, "step": 110 }, { "epoch": 0.03706795792285857, "grad_norm": 1.244712541251493, "learning_rate": 1.2347052280311457e-06, "loss": 0.3063, "step": 111 }, { "epoch": 0.03740190348973117, "grad_norm": 1.0580996310724993, "learning_rate": 1.2458286985539489e-06, "loss": 0.2849, "step": 112 }, { "epoch": 0.03773584905660377, "grad_norm": 1.3180651427074217, "learning_rate": 1.256952169076752e-06, "loss": 0.3112, "step": 113 }, { "epoch": 0.03806979462347637, "grad_norm": 1.285650915666917, "learning_rate": 1.2680756395995554e-06, "loss": 0.3156, "step": 114 }, { "epoch": 0.03840374019034897, "grad_norm": 1.2921513400307592, "learning_rate": 1.2791991101223583e-06, "loss": 0.3102, "step": 115 }, { "epoch": 0.03873768575722157, "grad_norm": 1.1878797730987274, "learning_rate": 1.2903225806451614e-06, "loss": 0.2986, "step": 116 }, { "epoch": 0.03907163132409417, "grad_norm": 1.2560584793651444, "learning_rate": 1.3014460511679643e-06, "loss": 0.2989, "step": 117 }, { "epoch": 0.03940557689096677, "grad_norm": 1.2643345435783993, "learning_rate": 1.3125695216907677e-06, "loss": 0.3233, "step": 118 }, { "epoch": 0.03973952245783937, "grad_norm": 1.3041047613547743, "learning_rate": 1.3236929922135708e-06, "loss": 0.336, "step": 119 }, { "epoch": 0.04007346802471197, "grad_norm": 1.2031369067707778, "learning_rate": 1.334816462736374e-06, "loss": 0.304, "step": 120 }, { "epoch": 0.04040741359158457, "grad_norm": 1.1399223289092129, "learning_rate": 1.3459399332591769e-06, "loss": 0.2892, "step": 121 }, { "epoch": 0.04074135915845717, "grad_norm": 1.303855873648094, "learning_rate": 1.3570634037819802e-06, "loss": 0.3095, "step": 122 }, { "epoch": 0.04107530472532977, "grad_norm": 1.0649829493695344, "learning_rate": 1.3681868743047833e-06, "loss": 0.285, "step": 123 }, { "epoch": 0.04140925029220237, "grad_norm": 1.415926257488088, "learning_rate": 1.3793103448275862e-06, "loss": 0.3394, "step": 124 }, { "epoch": 0.04174319585907497, "grad_norm": 1.183128643867534, "learning_rate": 1.3904338153503894e-06, "loss": 0.3079, "step": 125 }, { "epoch": 0.04207714142594757, "grad_norm": 1.3177462295230522, "learning_rate": 1.4015572858731927e-06, "loss": 0.3369, "step": 126 }, { "epoch": 0.04241108699282017, "grad_norm": 1.3473486897101608, "learning_rate": 1.4126807563959956e-06, "loss": 0.3045, "step": 127 }, { "epoch": 0.042745032559692774, "grad_norm": 1.2390438395244772, "learning_rate": 1.4238042269187988e-06, "loss": 0.2975, "step": 128 }, { "epoch": 0.04307897812656537, "grad_norm": 1.2740946670644597, "learning_rate": 1.434927697441602e-06, "loss": 0.3059, "step": 129 }, { "epoch": 0.04341292369343797, "grad_norm": 1.3012091566446777, "learning_rate": 1.4460511679644053e-06, "loss": 0.3079, "step": 130 }, { "epoch": 0.04374686926031057, "grad_norm": 1.1947097037496526, "learning_rate": 1.4571746384872082e-06, "loss": 0.3052, "step": 131 }, { "epoch": 0.04408081482718317, "grad_norm": 1.1432786230589478, "learning_rate": 1.4682981090100113e-06, "loss": 0.3139, "step": 132 }, { "epoch": 0.04441476039405577, "grad_norm": 1.3142624785291002, "learning_rate": 1.4794215795328142e-06, "loss": 0.3004, "step": 133 }, { "epoch": 0.04474870596092837, "grad_norm": 1.1091244974414516, "learning_rate": 1.4905450500556174e-06, "loss": 0.2872, "step": 134 }, { "epoch": 0.04508265152780097, "grad_norm": 1.3022437706824233, "learning_rate": 1.5016685205784207e-06, "loss": 0.3114, "step": 135 }, { "epoch": 0.04541659709467357, "grad_norm": 1.323275845400312, "learning_rate": 1.5127919911012236e-06, "loss": 0.3265, "step": 136 }, { "epoch": 0.04575054266154617, "grad_norm": 1.24079589522644, "learning_rate": 1.5239154616240268e-06, "loss": 0.2973, "step": 137 }, { "epoch": 0.04608448822841877, "grad_norm": 1.0994743200549912, "learning_rate": 1.5350389321468299e-06, "loss": 0.2917, "step": 138 }, { "epoch": 0.04641843379529137, "grad_norm": 1.1757509816140939, "learning_rate": 1.5461624026696332e-06, "loss": 0.2896, "step": 139 }, { "epoch": 0.04675237936216397, "grad_norm": 1.097534938340695, "learning_rate": 1.5572858731924361e-06, "loss": 0.281, "step": 140 }, { "epoch": 0.04708632492903657, "grad_norm": 1.1159015324002537, "learning_rate": 1.5684093437152393e-06, "loss": 0.2902, "step": 141 }, { "epoch": 0.04742027049590917, "grad_norm": 1.2320769642378695, "learning_rate": 1.5795328142380422e-06, "loss": 0.3015, "step": 142 }, { "epoch": 0.04775421606278177, "grad_norm": 1.2268199663025805, "learning_rate": 1.5906562847608455e-06, "loss": 0.2986, "step": 143 }, { "epoch": 0.04808816162965437, "grad_norm": 1.106082896796778, "learning_rate": 1.6017797552836487e-06, "loss": 0.2824, "step": 144 }, { "epoch": 0.04842210719652697, "grad_norm": 1.0688508167162885, "learning_rate": 1.6129032258064516e-06, "loss": 0.2887, "step": 145 }, { "epoch": 0.04875605276339957, "grad_norm": 1.2355684355753798, "learning_rate": 1.6240266963292547e-06, "loss": 0.3029, "step": 146 }, { "epoch": 0.049089998330272167, "grad_norm": 1.2608745789622653, "learning_rate": 1.635150166852058e-06, "loss": 0.3053, "step": 147 }, { "epoch": 0.049423943897144766, "grad_norm": 1.2087145826619097, "learning_rate": 1.6462736373748612e-06, "loss": 0.3074, "step": 148 }, { "epoch": 0.049757889464017366, "grad_norm": 1.4191211752662398, "learning_rate": 1.6573971078976641e-06, "loss": 0.3018, "step": 149 }, { "epoch": 0.050091835030889965, "grad_norm": 1.6005040312929983, "learning_rate": 1.6685205784204673e-06, "loss": 0.2992, "step": 150 }, { "epoch": 0.050425780597762565, "grad_norm": 1.3177431441471539, "learning_rate": 1.6796440489432706e-06, "loss": 0.3013, "step": 151 }, { "epoch": 0.050759726164635165, "grad_norm": 1.0667233844048953, "learning_rate": 1.6907675194660735e-06, "loss": 0.2853, "step": 152 }, { "epoch": 0.051093671731507764, "grad_norm": 1.2197105719883872, "learning_rate": 1.7018909899888767e-06, "loss": 0.2957, "step": 153 }, { "epoch": 0.051427617298380364, "grad_norm": 1.2749427439891368, "learning_rate": 1.7130144605116798e-06, "loss": 0.2826, "step": 154 }, { "epoch": 0.051761562865252964, "grad_norm": 1.3919022833764747, "learning_rate": 1.724137931034483e-06, "loss": 0.2828, "step": 155 }, { "epoch": 0.05209550843212556, "grad_norm": 1.1706667684484162, "learning_rate": 1.735261401557286e-06, "loss": 0.2834, "step": 156 }, { "epoch": 0.05242945399899816, "grad_norm": 1.1385584136115947, "learning_rate": 1.7463848720800892e-06, "loss": 0.278, "step": 157 }, { "epoch": 0.05276339956587076, "grad_norm": 1.2784468592463725, "learning_rate": 1.757508342602892e-06, "loss": 0.2878, "step": 158 }, { "epoch": 0.05309734513274336, "grad_norm": 1.0828791709613537, "learning_rate": 1.7686318131256954e-06, "loss": 0.2843, "step": 159 }, { "epoch": 0.05343129069961596, "grad_norm": 1.0858986938430426, "learning_rate": 1.7797552836484986e-06, "loss": 0.2803, "step": 160 }, { "epoch": 0.05376523626648856, "grad_norm": 1.4160711682479377, "learning_rate": 1.7908787541713015e-06, "loss": 0.2681, "step": 161 }, { "epoch": 0.05409918183336116, "grad_norm": 1.1622241308379426, "learning_rate": 1.8020022246941046e-06, "loss": 0.2942, "step": 162 }, { "epoch": 0.05443312740023376, "grad_norm": 1.3244697401189178, "learning_rate": 1.813125695216908e-06, "loss": 0.3069, "step": 163 }, { "epoch": 0.05476707296710636, "grad_norm": 1.1651688282873625, "learning_rate": 1.824249165739711e-06, "loss": 0.2942, "step": 164 }, { "epoch": 0.05510101853397896, "grad_norm": 1.3401516990160998, "learning_rate": 1.835372636262514e-06, "loss": 0.2979, "step": 165 }, { "epoch": 0.05543496410085156, "grad_norm": 1.1232106728216595, "learning_rate": 1.8464961067853172e-06, "loss": 0.2919, "step": 166 }, { "epoch": 0.05576890966772416, "grad_norm": 0.9645342716426896, "learning_rate": 1.85761957730812e-06, "loss": 0.2833, "step": 167 }, { "epoch": 0.05610285523459676, "grad_norm": 1.2735387795044881, "learning_rate": 1.8687430478309234e-06, "loss": 0.311, "step": 168 }, { "epoch": 0.05643680080146936, "grad_norm": 1.1593324040382949, "learning_rate": 1.8798665183537266e-06, "loss": 0.3005, "step": 169 }, { "epoch": 0.05677074636834196, "grad_norm": 1.0571733025122132, "learning_rate": 1.8909899888765295e-06, "loss": 0.2871, "step": 170 }, { "epoch": 0.05710469193521456, "grad_norm": 1.1772637818135905, "learning_rate": 1.9021134593993326e-06, "loss": 0.2963, "step": 171 }, { "epoch": 0.05743863750208716, "grad_norm": 1.2648890164784086, "learning_rate": 1.9132369299221357e-06, "loss": 0.284, "step": 172 }, { "epoch": 0.05777258306895976, "grad_norm": 1.257741155911166, "learning_rate": 1.924360400444939e-06, "loss": 0.3123, "step": 173 }, { "epoch": 0.05810652863583236, "grad_norm": 1.2076339575951947, "learning_rate": 1.935483870967742e-06, "loss": 0.2968, "step": 174 }, { "epoch": 0.058440474202704956, "grad_norm": 1.3831077669462297, "learning_rate": 1.946607341490545e-06, "loss": 0.302, "step": 175 }, { "epoch": 0.058774419769577556, "grad_norm": 1.2392310046049393, "learning_rate": 1.9577308120133483e-06, "loss": 0.3019, "step": 176 }, { "epoch": 0.059108365336450155, "grad_norm": 1.0630699642754082, "learning_rate": 1.9688542825361514e-06, "loss": 0.2794, "step": 177 }, { "epoch": 0.059442310903322755, "grad_norm": 1.2231288730901275, "learning_rate": 1.9799777530589545e-06, "loss": 0.3036, "step": 178 }, { "epoch": 0.059776256470195355, "grad_norm": 1.054287375357212, "learning_rate": 1.9911012235817577e-06, "loss": 0.2776, "step": 179 }, { "epoch": 0.06011020203706796, "grad_norm": 1.3813659231450461, "learning_rate": 2.002224694104561e-06, "loss": 0.312, "step": 180 }, { "epoch": 0.06044414760394056, "grad_norm": 1.223098684904635, "learning_rate": 2.013348164627364e-06, "loss": 0.2986, "step": 181 }, { "epoch": 0.06077809317081316, "grad_norm": 1.306919633091064, "learning_rate": 2.024471635150167e-06, "loss": 0.3263, "step": 182 }, { "epoch": 0.06111203873768576, "grad_norm": 1.2007296717327645, "learning_rate": 2.03559510567297e-06, "loss": 0.2856, "step": 183 }, { "epoch": 0.06144598430455836, "grad_norm": 1.2020773671581122, "learning_rate": 2.0467185761957733e-06, "loss": 0.2777, "step": 184 }, { "epoch": 0.06177992987143096, "grad_norm": 1.196861175909905, "learning_rate": 2.0578420467185764e-06, "loss": 0.2833, "step": 185 }, { "epoch": 0.06211387543830356, "grad_norm": 1.2558516865319087, "learning_rate": 2.0689655172413796e-06, "loss": 0.2763, "step": 186 }, { "epoch": 0.06244782100517616, "grad_norm": 1.0444075488420261, "learning_rate": 2.0800889877641823e-06, "loss": 0.2759, "step": 187 }, { "epoch": 0.06278176657204876, "grad_norm": 1.4420044463146366, "learning_rate": 2.091212458286986e-06, "loss": 0.3064, "step": 188 }, { "epoch": 0.06311571213892135, "grad_norm": 1.2416958553865685, "learning_rate": 2.102335928809789e-06, "loss": 0.3063, "step": 189 }, { "epoch": 0.06344965770579396, "grad_norm": 1.0748461696024694, "learning_rate": 2.113459399332592e-06, "loss": 0.2752, "step": 190 }, { "epoch": 0.06378360327266655, "grad_norm": 1.1742498266635828, "learning_rate": 2.124582869855395e-06, "loss": 0.2744, "step": 191 }, { "epoch": 0.06411754883953916, "grad_norm": 1.0861534931135144, "learning_rate": 2.1357063403781984e-06, "loss": 0.2776, "step": 192 }, { "epoch": 0.06445149440641175, "grad_norm": 1.3347076544814778, "learning_rate": 2.1468298109010015e-06, "loss": 0.2735, "step": 193 }, { "epoch": 0.06478543997328436, "grad_norm": 1.2383353005287574, "learning_rate": 2.1579532814238042e-06, "loss": 0.274, "step": 194 }, { "epoch": 0.06511938554015695, "grad_norm": 1.054486003296379, "learning_rate": 2.1690767519466073e-06, "loss": 0.284, "step": 195 }, { "epoch": 0.06545333110702956, "grad_norm": 1.2422895734951493, "learning_rate": 2.180200222469411e-06, "loss": 0.271, "step": 196 }, { "epoch": 0.06578727667390215, "grad_norm": 1.2952773756126603, "learning_rate": 2.1913236929922136e-06, "loss": 0.2972, "step": 197 }, { "epoch": 0.06612122224077475, "grad_norm": 1.2211536848947333, "learning_rate": 2.2024471635150167e-06, "loss": 0.2841, "step": 198 }, { "epoch": 0.06645516780764735, "grad_norm": 1.197513650785667, "learning_rate": 2.21357063403782e-06, "loss": 0.3226, "step": 199 }, { "epoch": 0.06678911337451995, "grad_norm": 1.2718959471586204, "learning_rate": 2.224694104560623e-06, "loss": 0.3052, "step": 200 }, { "epoch": 0.06712305894139255, "grad_norm": 1.11207328849927, "learning_rate": 2.235817575083426e-06, "loss": 0.2752, "step": 201 }, { "epoch": 0.06745700450826515, "grad_norm": 1.0206175010699121, "learning_rate": 2.2469410456062293e-06, "loss": 0.2919, "step": 202 }, { "epoch": 0.06779095007513775, "grad_norm": 1.0839833033777182, "learning_rate": 2.2580645161290324e-06, "loss": 0.2942, "step": 203 }, { "epoch": 0.06812489564201035, "grad_norm": 1.078908508453811, "learning_rate": 2.2691879866518355e-06, "loss": 0.2758, "step": 204 }, { "epoch": 0.06845884120888296, "grad_norm": 1.4625898609451118, "learning_rate": 2.2803114571746387e-06, "loss": 0.2909, "step": 205 }, { "epoch": 0.06879278677575555, "grad_norm": 1.1657831625637678, "learning_rate": 2.291434927697442e-06, "loss": 0.2901, "step": 206 }, { "epoch": 0.06912673234262816, "grad_norm": 1.1163150602739882, "learning_rate": 2.302558398220245e-06, "loss": 0.2836, "step": 207 }, { "epoch": 0.06946067790950075, "grad_norm": 1.0394061522813909, "learning_rate": 2.313681868743048e-06, "loss": 0.2928, "step": 208 }, { "epoch": 0.06979462347637336, "grad_norm": 1.0257281218495777, "learning_rate": 2.324805339265851e-06, "loss": 0.282, "step": 209 }, { "epoch": 0.07012856904324595, "grad_norm": 1.060343943193521, "learning_rate": 2.3359288097886543e-06, "loss": 0.2494, "step": 210 }, { "epoch": 0.07046251461011856, "grad_norm": 1.1754632761919408, "learning_rate": 2.3470522803114575e-06, "loss": 0.2875, "step": 211 }, { "epoch": 0.07079646017699115, "grad_norm": 1.3889723569823251, "learning_rate": 2.35817575083426e-06, "loss": 0.2776, "step": 212 }, { "epoch": 0.07113040574386376, "grad_norm": 1.1026304990108282, "learning_rate": 2.3692992213570637e-06, "loss": 0.2813, "step": 213 }, { "epoch": 0.07146435131073635, "grad_norm": 1.3540421203167579, "learning_rate": 2.380422691879867e-06, "loss": 0.3084, "step": 214 }, { "epoch": 0.07179829687760896, "grad_norm": 0.9644640688215597, "learning_rate": 2.39154616240267e-06, "loss": 0.2526, "step": 215 }, { "epoch": 0.07213224244448155, "grad_norm": 1.077328685424387, "learning_rate": 2.4026696329254727e-06, "loss": 0.2925, "step": 216 }, { "epoch": 0.07246618801135415, "grad_norm": 1.1941534402934753, "learning_rate": 2.4137931034482762e-06, "loss": 0.2912, "step": 217 }, { "epoch": 0.07280013357822675, "grad_norm": 1.2216996006544307, "learning_rate": 2.4249165739710794e-06, "loss": 0.2926, "step": 218 }, { "epoch": 0.07313407914509935, "grad_norm": 1.0288345472230127, "learning_rate": 2.436040044493882e-06, "loss": 0.2699, "step": 219 }, { "epoch": 0.07346802471197195, "grad_norm": 1.108118580799365, "learning_rate": 2.4471635150166852e-06, "loss": 0.2705, "step": 220 }, { "epoch": 0.07380197027884455, "grad_norm": 1.0837312318999974, "learning_rate": 2.4582869855394888e-06, "loss": 0.2926, "step": 221 }, { "epoch": 0.07413591584571715, "grad_norm": 1.078034579186348, "learning_rate": 2.4694104560622915e-06, "loss": 0.2625, "step": 222 }, { "epoch": 0.07446986141258975, "grad_norm": 1.1387659740587366, "learning_rate": 2.4805339265850946e-06, "loss": 0.2878, "step": 223 }, { "epoch": 0.07480380697946235, "grad_norm": 1.1289059064466833, "learning_rate": 2.4916573971078977e-06, "loss": 0.2725, "step": 224 }, { "epoch": 0.07513775254633495, "grad_norm": 1.1154801276679143, "learning_rate": 2.502780867630701e-06, "loss": 0.2796, "step": 225 }, { "epoch": 0.07547169811320754, "grad_norm": 1.1637231705683622, "learning_rate": 2.513904338153504e-06, "loss": 0.2988, "step": 226 }, { "epoch": 0.07580564368008015, "grad_norm": 1.0890226181580156, "learning_rate": 2.5250278086763076e-06, "loss": 0.2884, "step": 227 }, { "epoch": 0.07613958924695274, "grad_norm": 1.133532164208556, "learning_rate": 2.5361512791991107e-06, "loss": 0.2896, "step": 228 }, { "epoch": 0.07647353481382535, "grad_norm": 1.0864431293764834, "learning_rate": 2.5472747497219134e-06, "loss": 0.2778, "step": 229 }, { "epoch": 0.07680748038069794, "grad_norm": 1.146220316681448, "learning_rate": 2.5583982202447165e-06, "loss": 0.3008, "step": 230 }, { "epoch": 0.07714142594757055, "grad_norm": 0.9719070473034946, "learning_rate": 2.5695216907675197e-06, "loss": 0.2865, "step": 231 }, { "epoch": 0.07747537151444314, "grad_norm": 0.9482297604156307, "learning_rate": 2.580645161290323e-06, "loss": 0.2541, "step": 232 }, { "epoch": 0.07780931708131575, "grad_norm": 1.154538692385191, "learning_rate": 2.591768631813126e-06, "loss": 0.2743, "step": 233 }, { "epoch": 0.07814326264818834, "grad_norm": 1.228755098669564, "learning_rate": 2.6028921023359286e-06, "loss": 0.2903, "step": 234 }, { "epoch": 0.07847720821506095, "grad_norm": 1.225373106150885, "learning_rate": 2.6140155728587318e-06, "loss": 0.2766, "step": 235 }, { "epoch": 0.07881115378193354, "grad_norm": 1.1650718474694142, "learning_rate": 2.6251390433815353e-06, "loss": 0.2647, "step": 236 }, { "epoch": 0.07914509934880615, "grad_norm": 0.9954249618414314, "learning_rate": 2.6362625139043385e-06, "loss": 0.263, "step": 237 }, { "epoch": 0.07947904491567874, "grad_norm": 1.137248317039158, "learning_rate": 2.6473859844271416e-06, "loss": 0.2792, "step": 238 }, { "epoch": 0.07981299048255135, "grad_norm": 1.0766424441389253, "learning_rate": 2.6585094549499447e-06, "loss": 0.295, "step": 239 }, { "epoch": 0.08014693604942394, "grad_norm": 1.133299162391096, "learning_rate": 2.669632925472748e-06, "loss": 0.2874, "step": 240 }, { "epoch": 0.08048088161629655, "grad_norm": 1.6190237814839594, "learning_rate": 2.6807563959955506e-06, "loss": 0.2775, "step": 241 }, { "epoch": 0.08081482718316914, "grad_norm": 1.0781192090641312, "learning_rate": 2.6918798665183537e-06, "loss": 0.2782, "step": 242 }, { "epoch": 0.08114877275004174, "grad_norm": 1.1272432543088633, "learning_rate": 2.703003337041157e-06, "loss": 0.2843, "step": 243 }, { "epoch": 0.08148271831691434, "grad_norm": 1.2925306771786247, "learning_rate": 2.7141268075639604e-06, "loss": 0.2803, "step": 244 }, { "epoch": 0.08181666388378694, "grad_norm": 1.016699529674434, "learning_rate": 2.7252502780867635e-06, "loss": 0.2605, "step": 245 }, { "epoch": 0.08215060945065954, "grad_norm": 1.0899875102025414, "learning_rate": 2.7363737486095667e-06, "loss": 0.2841, "step": 246 }, { "epoch": 0.08248455501753214, "grad_norm": 1.289631959826523, "learning_rate": 2.7474972191323694e-06, "loss": 0.2904, "step": 247 }, { "epoch": 0.08281850058440474, "grad_norm": 1.2578086396698145, "learning_rate": 2.7586206896551725e-06, "loss": 0.2691, "step": 248 }, { "epoch": 0.08315244615127734, "grad_norm": 0.9712514626265429, "learning_rate": 2.7697441601779756e-06, "loss": 0.2771, "step": 249 }, { "epoch": 0.08348639171814994, "grad_norm": 1.2110417920398469, "learning_rate": 2.7808676307007788e-06, "loss": 0.2746, "step": 250 }, { "epoch": 0.08382033728502254, "grad_norm": 1.1917994392155153, "learning_rate": 2.791991101223582e-06, "loss": 0.281, "step": 251 }, { "epoch": 0.08415428285189513, "grad_norm": 1.0773885616519046, "learning_rate": 2.8031145717463854e-06, "loss": 0.2766, "step": 252 }, { "epoch": 0.08448822841876774, "grad_norm": 1.1477058723357543, "learning_rate": 2.8142380422691886e-06, "loss": 0.2868, "step": 253 }, { "epoch": 0.08482217398564033, "grad_norm": 1.1308647572547197, "learning_rate": 2.8253615127919913e-06, "loss": 0.2809, "step": 254 }, { "epoch": 0.08515611955251294, "grad_norm": 1.1886012198694917, "learning_rate": 2.8364849833147944e-06, "loss": 0.2838, "step": 255 }, { "epoch": 0.08549006511938555, "grad_norm": 1.0917302174368306, "learning_rate": 2.8476084538375975e-06, "loss": 0.2966, "step": 256 }, { "epoch": 0.08582401068625814, "grad_norm": 1.0628706382181317, "learning_rate": 2.8587319243604007e-06, "loss": 0.2746, "step": 257 }, { "epoch": 0.08615795625313075, "grad_norm": 0.9011209090285437, "learning_rate": 2.869855394883204e-06, "loss": 0.2663, "step": 258 }, { "epoch": 0.08649190182000334, "grad_norm": 0.9699244069128968, "learning_rate": 2.8809788654060065e-06, "loss": 0.2732, "step": 259 }, { "epoch": 0.08682584738687595, "grad_norm": 1.0368860734439413, "learning_rate": 2.8921023359288105e-06, "loss": 0.2703, "step": 260 }, { "epoch": 0.08715979295374854, "grad_norm": 1.1688091750555711, "learning_rate": 2.903225806451613e-06, "loss": 0.2888, "step": 261 }, { "epoch": 0.08749373852062114, "grad_norm": 1.004959730063203, "learning_rate": 2.9143492769744163e-06, "loss": 0.2576, "step": 262 }, { "epoch": 0.08782768408749374, "grad_norm": 0.9991808356904461, "learning_rate": 2.9254727474972195e-06, "loss": 0.2625, "step": 263 }, { "epoch": 0.08816162965436634, "grad_norm": 0.9825510412554146, "learning_rate": 2.9365962180200226e-06, "loss": 0.2632, "step": 264 }, { "epoch": 0.08849557522123894, "grad_norm": 0.9821153217304937, "learning_rate": 2.9477196885428257e-06, "loss": 0.2713, "step": 265 }, { "epoch": 0.08882952078811154, "grad_norm": 1.1614726347763442, "learning_rate": 2.9588431590656284e-06, "loss": 0.282, "step": 266 }, { "epoch": 0.08916346635498414, "grad_norm": 1.7691717435465628, "learning_rate": 2.9699666295884316e-06, "loss": 0.2799, "step": 267 }, { "epoch": 0.08949741192185674, "grad_norm": 0.9974536728011792, "learning_rate": 2.9810901001112347e-06, "loss": 0.2754, "step": 268 }, { "epoch": 0.08983135748872934, "grad_norm": 1.1409552236974378, "learning_rate": 2.9922135706340383e-06, "loss": 0.2974, "step": 269 }, { "epoch": 0.09016530305560194, "grad_norm": 0.938700704239528, "learning_rate": 3.0033370411568414e-06, "loss": 0.2595, "step": 270 }, { "epoch": 0.09049924862247453, "grad_norm": 1.1094304035830367, "learning_rate": 3.0144605116796445e-06, "loss": 0.2783, "step": 271 }, { "epoch": 0.09083319418934714, "grad_norm": 1.0187603452237721, "learning_rate": 3.0255839822024472e-06, "loss": 0.272, "step": 272 }, { "epoch": 0.09116713975621973, "grad_norm": 0.9507482461915819, "learning_rate": 3.0367074527252504e-06, "loss": 0.2786, "step": 273 }, { "epoch": 0.09150108532309234, "grad_norm": 0.9623017053402451, "learning_rate": 3.0478309232480535e-06, "loss": 0.2563, "step": 274 }, { "epoch": 0.09183503088996493, "grad_norm": 0.9705519249362503, "learning_rate": 3.0589543937708566e-06, "loss": 0.2634, "step": 275 }, { "epoch": 0.09216897645683754, "grad_norm": 1.1162146922113336, "learning_rate": 3.0700778642936598e-06, "loss": 0.2814, "step": 276 }, { "epoch": 0.09250292202371013, "grad_norm": 1.0488911817978013, "learning_rate": 3.0812013348164633e-06, "loss": 0.2736, "step": 277 }, { "epoch": 0.09283686759058274, "grad_norm": 1.143098412646465, "learning_rate": 3.0923248053392665e-06, "loss": 0.2961, "step": 278 }, { "epoch": 0.09317081315745533, "grad_norm": 1.0543653338829788, "learning_rate": 3.103448275862069e-06, "loss": 0.2921, "step": 279 }, { "epoch": 0.09350475872432794, "grad_norm": 0.9983554927496489, "learning_rate": 3.1145717463848723e-06, "loss": 0.2694, "step": 280 }, { "epoch": 0.09383870429120053, "grad_norm": 1.0555480006962337, "learning_rate": 3.1256952169076754e-06, "loss": 0.2583, "step": 281 }, { "epoch": 0.09417264985807314, "grad_norm": 0.986411867922012, "learning_rate": 3.1368186874304786e-06, "loss": 0.2814, "step": 282 }, { "epoch": 0.09450659542494573, "grad_norm": 1.0115618448923618, "learning_rate": 3.1479421579532817e-06, "loss": 0.2602, "step": 283 }, { "epoch": 0.09484054099181834, "grad_norm": 1.074066553143631, "learning_rate": 3.1590656284760844e-06, "loss": 0.2577, "step": 284 }, { "epoch": 0.09517448655869093, "grad_norm": 0.978641957387922, "learning_rate": 3.170189098998888e-06, "loss": 0.278, "step": 285 }, { "epoch": 0.09550843212556354, "grad_norm": 0.9937876447676249, "learning_rate": 3.181312569521691e-06, "loss": 0.2532, "step": 286 }, { "epoch": 0.09584237769243613, "grad_norm": 1.0298119478855567, "learning_rate": 3.1924360400444942e-06, "loss": 0.2669, "step": 287 }, { "epoch": 0.09617632325930874, "grad_norm": 1.0380635876797846, "learning_rate": 3.2035595105672973e-06, "loss": 0.2849, "step": 288 }, { "epoch": 0.09651026882618133, "grad_norm": 0.8705028041214296, "learning_rate": 3.2146829810901005e-06, "loss": 0.2631, "step": 289 }, { "epoch": 0.09684421439305393, "grad_norm": 0.9554719138367959, "learning_rate": 3.225806451612903e-06, "loss": 0.2699, "step": 290 }, { "epoch": 0.09717815995992653, "grad_norm": 1.1070871658729817, "learning_rate": 3.2369299221357063e-06, "loss": 0.2699, "step": 291 }, { "epoch": 0.09751210552679913, "grad_norm": 1.1569397616168657, "learning_rate": 3.2480533926585095e-06, "loss": 0.2901, "step": 292 }, { "epoch": 0.09784605109367173, "grad_norm": 0.8906942173665268, "learning_rate": 3.259176863181313e-06, "loss": 0.2491, "step": 293 }, { "epoch": 0.09817999666054433, "grad_norm": 0.9616142547096681, "learning_rate": 3.270300333704116e-06, "loss": 0.2479, "step": 294 }, { "epoch": 0.09851394222741693, "grad_norm": 0.9424174400332765, "learning_rate": 3.2814238042269193e-06, "loss": 0.2555, "step": 295 }, { "epoch": 0.09884788779428953, "grad_norm": 0.969602671488551, "learning_rate": 3.2925472747497224e-06, "loss": 0.2522, "step": 296 }, { "epoch": 0.09918183336116213, "grad_norm": 1.0778054535009527, "learning_rate": 3.303670745272525e-06, "loss": 0.2916, "step": 297 }, { "epoch": 0.09951577892803473, "grad_norm": 0.950681407817474, "learning_rate": 3.3147942157953282e-06, "loss": 0.2624, "step": 298 }, { "epoch": 0.09984972449490732, "grad_norm": 1.0136126498415492, "learning_rate": 3.3259176863181314e-06, "loss": 0.2933, "step": 299 }, { "epoch": 0.10018367006177993, "grad_norm": 1.1157879759767526, "learning_rate": 3.3370411568409345e-06, "loss": 0.2752, "step": 300 }, { "epoch": 0.10051761562865252, "grad_norm": 0.9659786911496452, "learning_rate": 3.3481646273637376e-06, "loss": 0.2637, "step": 301 }, { "epoch": 0.10085156119552513, "grad_norm": 1.0334037162956897, "learning_rate": 3.359288097886541e-06, "loss": 0.2826, "step": 302 }, { "epoch": 0.10118550676239772, "grad_norm": 0.8974929775545794, "learning_rate": 3.3704115684093443e-06, "loss": 0.2592, "step": 303 }, { "epoch": 0.10151945232927033, "grad_norm": 0.9908654321167929, "learning_rate": 3.381535038932147e-06, "loss": 0.2514, "step": 304 }, { "epoch": 0.10185339789614292, "grad_norm": 1.013861682980457, "learning_rate": 3.39265850945495e-06, "loss": 0.2659, "step": 305 }, { "epoch": 0.10218734346301553, "grad_norm": 1.0310009709599692, "learning_rate": 3.4037819799777533e-06, "loss": 0.2936, "step": 306 }, { "epoch": 0.10252128902988812, "grad_norm": 0.8989162049291942, "learning_rate": 3.4149054505005564e-06, "loss": 0.2524, "step": 307 }, { "epoch": 0.10285523459676073, "grad_norm": 1.0325604088157159, "learning_rate": 3.4260289210233596e-06, "loss": 0.2725, "step": 308 }, { "epoch": 0.10318918016363333, "grad_norm": 0.9980252685246547, "learning_rate": 3.4371523915461623e-06, "loss": 0.2663, "step": 309 }, { "epoch": 0.10352312573050593, "grad_norm": 0.9993480947882514, "learning_rate": 3.448275862068966e-06, "loss": 0.2619, "step": 310 }, { "epoch": 0.10385707129737853, "grad_norm": 1.2466860459144706, "learning_rate": 3.459399332591769e-06, "loss": 0.2864, "step": 311 }, { "epoch": 0.10419101686425113, "grad_norm": 1.2924240507601645, "learning_rate": 3.470522803114572e-06, "loss": 0.2759, "step": 312 }, { "epoch": 0.10452496243112373, "grad_norm": 1.117231355808104, "learning_rate": 3.4816462736373752e-06, "loss": 0.2493, "step": 313 }, { "epoch": 0.10485890799799633, "grad_norm": 1.0013319983489994, "learning_rate": 3.4927697441601784e-06, "loss": 0.2694, "step": 314 }, { "epoch": 0.10519285356486893, "grad_norm": 0.8939567136330437, "learning_rate": 3.503893214682981e-06, "loss": 0.2555, "step": 315 }, { "epoch": 0.10552679913174152, "grad_norm": 1.239242556867951, "learning_rate": 3.515016685205784e-06, "loss": 0.2728, "step": 316 }, { "epoch": 0.10586074469861413, "grad_norm": 1.0565860100298228, "learning_rate": 3.5261401557285873e-06, "loss": 0.2663, "step": 317 }, { "epoch": 0.10619469026548672, "grad_norm": 0.9597333088021871, "learning_rate": 3.537263626251391e-06, "loss": 0.2729, "step": 318 }, { "epoch": 0.10652863583235933, "grad_norm": 0.8695321802277968, "learning_rate": 3.548387096774194e-06, "loss": 0.2586, "step": 319 }, { "epoch": 0.10686258139923192, "grad_norm": 1.035738979892239, "learning_rate": 3.559510567296997e-06, "loss": 0.2671, "step": 320 }, { "epoch": 0.10719652696610453, "grad_norm": 0.9575178575212548, "learning_rate": 3.5706340378198003e-06, "loss": 0.2828, "step": 321 }, { "epoch": 0.10753047253297712, "grad_norm": 1.1300397562818, "learning_rate": 3.581757508342603e-06, "loss": 0.2949, "step": 322 }, { "epoch": 0.10786441809984973, "grad_norm": 1.1537677216616982, "learning_rate": 3.592880978865406e-06, "loss": 0.2656, "step": 323 }, { "epoch": 0.10819836366672232, "grad_norm": 0.9627144838033714, "learning_rate": 3.6040044493882093e-06, "loss": 0.2785, "step": 324 }, { "epoch": 0.10853230923359493, "grad_norm": 0.9449257156505781, "learning_rate": 3.6151279199110124e-06, "loss": 0.2676, "step": 325 }, { "epoch": 0.10886625480046752, "grad_norm": 1.0206213863568905, "learning_rate": 3.626251390433816e-06, "loss": 0.2643, "step": 326 }, { "epoch": 0.10920020036734013, "grad_norm": 0.9815487853303332, "learning_rate": 3.637374860956619e-06, "loss": 0.2789, "step": 327 }, { "epoch": 0.10953414593421272, "grad_norm": 1.010306702915483, "learning_rate": 3.648498331479422e-06, "loss": 0.2683, "step": 328 }, { "epoch": 0.10986809150108533, "grad_norm": 1.0690064824290435, "learning_rate": 3.659621802002225e-06, "loss": 0.2819, "step": 329 }, { "epoch": 0.11020203706795792, "grad_norm": 1.0546653253133171, "learning_rate": 3.670745272525028e-06, "loss": 0.2869, "step": 330 }, { "epoch": 0.11053598263483053, "grad_norm": 1.0596495202097613, "learning_rate": 3.681868743047831e-06, "loss": 0.2692, "step": 331 }, { "epoch": 0.11086992820170312, "grad_norm": 0.9257332414938142, "learning_rate": 3.6929922135706343e-06, "loss": 0.2468, "step": 332 }, { "epoch": 0.11120387376857573, "grad_norm": 0.9835419475940986, "learning_rate": 3.7041156840934374e-06, "loss": 0.2793, "step": 333 }, { "epoch": 0.11153781933544832, "grad_norm": 0.9771967275391085, "learning_rate": 3.71523915461624e-06, "loss": 0.2845, "step": 334 }, { "epoch": 0.11187176490232092, "grad_norm": 0.9602977118557694, "learning_rate": 3.7263626251390437e-06, "loss": 0.2742, "step": 335 }, { "epoch": 0.11220571046919352, "grad_norm": 1.0029205944461674, "learning_rate": 3.737486095661847e-06, "loss": 0.2687, "step": 336 }, { "epoch": 0.11253965603606612, "grad_norm": 1.0201328545542836, "learning_rate": 3.74860956618465e-06, "loss": 0.2708, "step": 337 }, { "epoch": 0.11287360160293872, "grad_norm": 0.9103855657952339, "learning_rate": 3.759733036707453e-06, "loss": 0.2792, "step": 338 }, { "epoch": 0.11320754716981132, "grad_norm": 0.9601017467552823, "learning_rate": 3.7708565072302562e-06, "loss": 0.2678, "step": 339 }, { "epoch": 0.11354149273668392, "grad_norm": 1.0089053411102482, "learning_rate": 3.781979977753059e-06, "loss": 0.2743, "step": 340 }, { "epoch": 0.11387543830355652, "grad_norm": 0.9504969314518689, "learning_rate": 3.793103448275862e-06, "loss": 0.2725, "step": 341 }, { "epoch": 0.11420938387042912, "grad_norm": 0.9049400864683504, "learning_rate": 3.804226918798665e-06, "loss": 0.2545, "step": 342 }, { "epoch": 0.11454332943730172, "grad_norm": 0.9801080200173922, "learning_rate": 3.815350389321469e-06, "loss": 0.2602, "step": 343 }, { "epoch": 0.11487727500417431, "grad_norm": 0.9494051183285684, "learning_rate": 3.8264738598442715e-06, "loss": 0.2718, "step": 344 }, { "epoch": 0.11521122057104692, "grad_norm": 0.9624224416135067, "learning_rate": 3.837597330367075e-06, "loss": 0.273, "step": 345 }, { "epoch": 0.11554516613791951, "grad_norm": 1.2213226228441383, "learning_rate": 3.848720800889878e-06, "loss": 0.2825, "step": 346 }, { "epoch": 0.11587911170479212, "grad_norm": 0.9776624862349191, "learning_rate": 3.859844271412681e-06, "loss": 0.277, "step": 347 }, { "epoch": 0.11621305727166471, "grad_norm": 0.9947846421904093, "learning_rate": 3.870967741935484e-06, "loss": 0.2629, "step": 348 }, { "epoch": 0.11654700283853732, "grad_norm": 0.9240560147186092, "learning_rate": 3.8820912124582876e-06, "loss": 0.2546, "step": 349 }, { "epoch": 0.11688094840540991, "grad_norm": 0.963768770591079, "learning_rate": 3.89321468298109e-06, "loss": 0.2765, "step": 350 }, { "epoch": 0.11721489397228252, "grad_norm": 0.9589949517884051, "learning_rate": 3.904338153503894e-06, "loss": 0.2775, "step": 351 }, { "epoch": 0.11754883953915511, "grad_norm": 0.9782433798778546, "learning_rate": 3.9154616240266965e-06, "loss": 0.2799, "step": 352 }, { "epoch": 0.11788278510602772, "grad_norm": 0.9858329287017443, "learning_rate": 3.9265850945495e-06, "loss": 0.2711, "step": 353 }, { "epoch": 0.11821673067290031, "grad_norm": 0.9039100597255357, "learning_rate": 3.937708565072303e-06, "loss": 0.2581, "step": 354 }, { "epoch": 0.11855067623977292, "grad_norm": 1.1034122762806373, "learning_rate": 3.948832035595106e-06, "loss": 0.2783, "step": 355 }, { "epoch": 0.11888462180664551, "grad_norm": 0.960339590267913, "learning_rate": 3.959955506117909e-06, "loss": 0.2892, "step": 356 }, { "epoch": 0.11921856737351812, "grad_norm": 1.0002994472569884, "learning_rate": 3.971078976640712e-06, "loss": 0.2866, "step": 357 }, { "epoch": 0.11955251294039071, "grad_norm": 1.0057324471749156, "learning_rate": 3.982202447163515e-06, "loss": 0.2577, "step": 358 }, { "epoch": 0.11988645850726332, "grad_norm": 0.9933056307505863, "learning_rate": 3.993325917686319e-06, "loss": 0.2671, "step": 359 }, { "epoch": 0.12022040407413592, "grad_norm": 1.0118000022039888, "learning_rate": 4.004449388209122e-06, "loss": 0.2782, "step": 360 }, { "epoch": 0.12055434964100852, "grad_norm": 0.9104306956415678, "learning_rate": 4.015572858731925e-06, "loss": 0.2719, "step": 361 }, { "epoch": 0.12088829520788112, "grad_norm": 1.0409646407223883, "learning_rate": 4.026696329254728e-06, "loss": 0.2824, "step": 362 }, { "epoch": 0.12122224077475371, "grad_norm": 0.9353332418061575, "learning_rate": 4.0378197997775306e-06, "loss": 0.2779, "step": 363 }, { "epoch": 0.12155618634162632, "grad_norm": 0.9117928694164866, "learning_rate": 4.048943270300334e-06, "loss": 0.2645, "step": 364 }, { "epoch": 0.12189013190849891, "grad_norm": 0.9516320491269368, "learning_rate": 4.060066740823137e-06, "loss": 0.2763, "step": 365 }, { "epoch": 0.12222407747537152, "grad_norm": 0.8980496133272654, "learning_rate": 4.07119021134594e-06, "loss": 0.2479, "step": 366 }, { "epoch": 0.12255802304224411, "grad_norm": 0.8722081882192233, "learning_rate": 4.082313681868743e-06, "loss": 0.254, "step": 367 }, { "epoch": 0.12289196860911672, "grad_norm": 0.9489762799911274, "learning_rate": 4.093437152391547e-06, "loss": 0.2794, "step": 368 }, { "epoch": 0.12322591417598931, "grad_norm": 0.8260848475511359, "learning_rate": 4.104560622914349e-06, "loss": 0.2581, "step": 369 }, { "epoch": 0.12355985974286192, "grad_norm": 1.012870852293905, "learning_rate": 4.115684093437153e-06, "loss": 0.2795, "step": 370 }, { "epoch": 0.12389380530973451, "grad_norm": 0.9078068672567016, "learning_rate": 4.126807563959956e-06, "loss": 0.2724, "step": 371 }, { "epoch": 0.12422775087660712, "grad_norm": 0.9367356712295183, "learning_rate": 4.137931034482759e-06, "loss": 0.2772, "step": 372 }, { "epoch": 0.12456169644347971, "grad_norm": 0.9769142088201448, "learning_rate": 4.149054505005562e-06, "loss": 0.273, "step": 373 }, { "epoch": 0.12489564201035232, "grad_norm": 0.8714374593023121, "learning_rate": 4.160177975528365e-06, "loss": 0.2523, "step": 374 }, { "epoch": 0.1252295875772249, "grad_norm": 0.9713759181008551, "learning_rate": 4.171301446051168e-06, "loss": 0.2627, "step": 375 }, { "epoch": 0.12556353314409752, "grad_norm": 1.0024543615178043, "learning_rate": 4.182424916573972e-06, "loss": 0.2736, "step": 376 }, { "epoch": 0.12589747871097012, "grad_norm": 0.8841635902459595, "learning_rate": 4.193548387096774e-06, "loss": 0.266, "step": 377 }, { "epoch": 0.1262314242778427, "grad_norm": 0.9422878272730257, "learning_rate": 4.204671857619578e-06, "loss": 0.2763, "step": 378 }, { "epoch": 0.1265653698447153, "grad_norm": 0.9737880805971011, "learning_rate": 4.215795328142381e-06, "loss": 0.2604, "step": 379 }, { "epoch": 0.12689931541158792, "grad_norm": 0.9311503938608767, "learning_rate": 4.226918798665184e-06, "loss": 0.2687, "step": 380 }, { "epoch": 0.12723326097846052, "grad_norm": 0.9099240809857687, "learning_rate": 4.238042269187987e-06, "loss": 0.2615, "step": 381 }, { "epoch": 0.1275672065453331, "grad_norm": 0.8644535162182144, "learning_rate": 4.24916573971079e-06, "loss": 0.2629, "step": 382 }, { "epoch": 0.1279011521122057, "grad_norm": 1.08354660489156, "learning_rate": 4.260289210233593e-06, "loss": 0.2757, "step": 383 }, { "epoch": 0.1282350976790783, "grad_norm": 0.9059885118793621, "learning_rate": 4.271412680756397e-06, "loss": 0.2741, "step": 384 }, { "epoch": 0.12856904324595092, "grad_norm": 0.8214511989438817, "learning_rate": 4.2825361512791995e-06, "loss": 0.2588, "step": 385 }, { "epoch": 0.1289029888128235, "grad_norm": 0.8920712731302586, "learning_rate": 4.293659621802003e-06, "loss": 0.2628, "step": 386 }, { "epoch": 0.1292369343796961, "grad_norm": 1.02140475986345, "learning_rate": 4.304783092324806e-06, "loss": 0.2769, "step": 387 }, { "epoch": 0.1295708799465687, "grad_norm": 1.1041718598040544, "learning_rate": 4.3159065628476084e-06, "loss": 0.2829, "step": 388 }, { "epoch": 0.12990482551344132, "grad_norm": 0.9103286130796249, "learning_rate": 4.327030033370412e-06, "loss": 0.2616, "step": 389 }, { "epoch": 0.1302387710803139, "grad_norm": 0.8952232727330045, "learning_rate": 4.338153503893215e-06, "loss": 0.2484, "step": 390 }, { "epoch": 0.1305727166471865, "grad_norm": 0.7877032707324497, "learning_rate": 4.349276974416018e-06, "loss": 0.2615, "step": 391 }, { "epoch": 0.1309066622140591, "grad_norm": 0.881238803534704, "learning_rate": 4.360400444938822e-06, "loss": 0.2606, "step": 392 }, { "epoch": 0.13124060778093172, "grad_norm": 0.8156813680994354, "learning_rate": 4.3715239154616245e-06, "loss": 0.2483, "step": 393 }, { "epoch": 0.1315745533478043, "grad_norm": 0.8460744085644406, "learning_rate": 4.382647385984427e-06, "loss": 0.266, "step": 394 }, { "epoch": 0.1319084989146769, "grad_norm": 0.8909110880065707, "learning_rate": 4.393770856507231e-06, "loss": 0.2507, "step": 395 }, { "epoch": 0.1322424444815495, "grad_norm": 1.0610350746398265, "learning_rate": 4.4048943270300335e-06, "loss": 0.2962, "step": 396 }, { "epoch": 0.13257639004842212, "grad_norm": 1.2159344229127298, "learning_rate": 4.416017797552837e-06, "loss": 0.2628, "step": 397 }, { "epoch": 0.1329103356152947, "grad_norm": 0.9343938441218402, "learning_rate": 4.42714126807564e-06, "loss": 0.2809, "step": 398 }, { "epoch": 0.1332442811821673, "grad_norm": 0.9460286809085141, "learning_rate": 4.4382647385984425e-06, "loss": 0.2672, "step": 399 }, { "epoch": 0.1335782267490399, "grad_norm": 0.9028504129341358, "learning_rate": 4.449388209121246e-06, "loss": 0.253, "step": 400 }, { "epoch": 0.13391217231591251, "grad_norm": 0.777636732453647, "learning_rate": 4.4605116796440496e-06, "loss": 0.252, "step": 401 }, { "epoch": 0.1342461178827851, "grad_norm": 0.8787403586719423, "learning_rate": 4.471635150166852e-06, "loss": 0.2681, "step": 402 }, { "epoch": 0.1345800634496577, "grad_norm": 0.8623473953489393, "learning_rate": 4.482758620689656e-06, "loss": 0.2615, "step": 403 }, { "epoch": 0.1349140090165303, "grad_norm": 0.8590329595872587, "learning_rate": 4.4938820912124585e-06, "loss": 0.2693, "step": 404 }, { "epoch": 0.1352479545834029, "grad_norm": 1.0942605325821617, "learning_rate": 4.505005561735262e-06, "loss": 0.276, "step": 405 }, { "epoch": 0.1355819001502755, "grad_norm": 0.8384871494712324, "learning_rate": 4.516129032258065e-06, "loss": 0.2539, "step": 406 }, { "epoch": 0.1359158457171481, "grad_norm": 0.9713974468286866, "learning_rate": 4.5272525027808675e-06, "loss": 0.248, "step": 407 }, { "epoch": 0.1362497912840207, "grad_norm": 0.8912405350992786, "learning_rate": 4.538375973303671e-06, "loss": 0.2606, "step": 408 }, { "epoch": 0.1365837368508933, "grad_norm": 0.8504112799682982, "learning_rate": 4.549499443826475e-06, "loss": 0.2478, "step": 409 }, { "epoch": 0.13691768241776592, "grad_norm": 0.8845213824948364, "learning_rate": 4.560622914349277e-06, "loss": 0.2549, "step": 410 }, { "epoch": 0.1372516279846385, "grad_norm": 0.9170303075568879, "learning_rate": 4.571746384872081e-06, "loss": 0.2824, "step": 411 }, { "epoch": 0.1375855735515111, "grad_norm": 0.9418395539373073, "learning_rate": 4.582869855394884e-06, "loss": 0.2512, "step": 412 }, { "epoch": 0.1379195191183837, "grad_norm": 0.961468515239987, "learning_rate": 4.593993325917686e-06, "loss": 0.2798, "step": 413 }, { "epoch": 0.13825346468525632, "grad_norm": 0.884383561019613, "learning_rate": 4.60511679644049e-06, "loss": 0.2693, "step": 414 }, { "epoch": 0.1385874102521289, "grad_norm": 0.8938825429085708, "learning_rate": 4.6162402669632926e-06, "loss": 0.2686, "step": 415 }, { "epoch": 0.1389213558190015, "grad_norm": 0.8354946158818547, "learning_rate": 4.627363737486096e-06, "loss": 0.245, "step": 416 }, { "epoch": 0.1392553013858741, "grad_norm": 0.9373626614204349, "learning_rate": 4.6384872080089e-06, "loss": 0.2802, "step": 417 }, { "epoch": 0.13958924695274672, "grad_norm": 0.7859209899855684, "learning_rate": 4.649610678531702e-06, "loss": 0.2445, "step": 418 }, { "epoch": 0.1399231925196193, "grad_norm": 0.9807671068134438, "learning_rate": 4.660734149054505e-06, "loss": 0.2644, "step": 419 }, { "epoch": 0.1402571380864919, "grad_norm": 0.851020616239925, "learning_rate": 4.671857619577309e-06, "loss": 0.2551, "step": 420 }, { "epoch": 0.1405910836533645, "grad_norm": 0.9306635039098415, "learning_rate": 4.682981090100111e-06, "loss": 0.2559, "step": 421 }, { "epoch": 0.1409250292202371, "grad_norm": 0.9028478012185194, "learning_rate": 4.694104560622915e-06, "loss": 0.2797, "step": 422 }, { "epoch": 0.1412589747871097, "grad_norm": 0.8282408886330277, "learning_rate": 4.705228031145718e-06, "loss": 0.2634, "step": 423 }, { "epoch": 0.1415929203539823, "grad_norm": 0.9583571910070512, "learning_rate": 4.71635150166852e-06, "loss": 0.2737, "step": 424 }, { "epoch": 0.1419268659208549, "grad_norm": 0.8598845314007896, "learning_rate": 4.727474972191325e-06, "loss": 0.2734, "step": 425 }, { "epoch": 0.1422608114877275, "grad_norm": 0.9823961957996282, "learning_rate": 4.7385984427141274e-06, "loss": 0.2756, "step": 426 }, { "epoch": 0.1425947570546001, "grad_norm": 0.8402666268767217, "learning_rate": 4.74972191323693e-06, "loss": 0.2644, "step": 427 }, { "epoch": 0.1429287026214727, "grad_norm": 0.9504374640973223, "learning_rate": 4.760845383759734e-06, "loss": 0.2802, "step": 428 }, { "epoch": 0.1432626481883453, "grad_norm": 0.860942841549608, "learning_rate": 4.771968854282536e-06, "loss": 0.2642, "step": 429 }, { "epoch": 0.1435965937552179, "grad_norm": 0.7976260693507284, "learning_rate": 4.78309232480534e-06, "loss": 0.2396, "step": 430 }, { "epoch": 0.1439305393220905, "grad_norm": 0.9444299816616959, "learning_rate": 4.794215795328143e-06, "loss": 0.2647, "step": 431 }, { "epoch": 0.1442644848889631, "grad_norm": 0.9810683931758797, "learning_rate": 4.805339265850945e-06, "loss": 0.2755, "step": 432 }, { "epoch": 0.1445984304558357, "grad_norm": 0.8774017600524443, "learning_rate": 4.816462736373749e-06, "loss": 0.2516, "step": 433 }, { "epoch": 0.1449323760227083, "grad_norm": 0.9482573960208934, "learning_rate": 4.8275862068965525e-06, "loss": 0.2862, "step": 434 }, { "epoch": 0.1452663215895809, "grad_norm": 0.8175895495379112, "learning_rate": 4.838709677419355e-06, "loss": 0.2573, "step": 435 }, { "epoch": 0.1456002671564535, "grad_norm": 0.9000739545892836, "learning_rate": 4.849833147942159e-06, "loss": 0.2596, "step": 436 }, { "epoch": 0.1459342127233261, "grad_norm": 0.9000554099286614, "learning_rate": 4.8609566184649615e-06, "loss": 0.2525, "step": 437 }, { "epoch": 0.1462681582901987, "grad_norm": 1.035965080184084, "learning_rate": 4.872080088987764e-06, "loss": 0.2746, "step": 438 }, { "epoch": 0.1466021038570713, "grad_norm": 0.9495692424587069, "learning_rate": 4.883203559510568e-06, "loss": 0.2765, "step": 439 }, { "epoch": 0.1469360494239439, "grad_norm": 0.8468127334478587, "learning_rate": 4.8943270300333704e-06, "loss": 0.2499, "step": 440 }, { "epoch": 0.1472699949908165, "grad_norm": 0.9430177643905899, "learning_rate": 4.905450500556174e-06, "loss": 0.2596, "step": 441 }, { "epoch": 0.1476039405576891, "grad_norm": 0.8564111776747324, "learning_rate": 4.9165739710789776e-06, "loss": 0.2742, "step": 442 }, { "epoch": 0.14793788612456168, "grad_norm": 0.9034833859232495, "learning_rate": 4.92769744160178e-06, "loss": 0.2624, "step": 443 }, { "epoch": 0.1482718316914343, "grad_norm": 1.0085778060835962, "learning_rate": 4.938820912124583e-06, "loss": 0.2788, "step": 444 }, { "epoch": 0.1486057772583069, "grad_norm": 0.8486548720543253, "learning_rate": 4.9499443826473865e-06, "loss": 0.2499, "step": 445 }, { "epoch": 0.1489397228251795, "grad_norm": 0.8647171380949372, "learning_rate": 4.961067853170189e-06, "loss": 0.2467, "step": 446 }, { "epoch": 0.14927366839205208, "grad_norm": 0.9458194360965803, "learning_rate": 4.972191323692993e-06, "loss": 0.26, "step": 447 }, { "epoch": 0.1496076139589247, "grad_norm": 0.98845534464712, "learning_rate": 4.9833147942157955e-06, "loss": 0.277, "step": 448 }, { "epoch": 0.1499415595257973, "grad_norm": 1.0038333784339768, "learning_rate": 4.994438264738598e-06, "loss": 0.2607, "step": 449 }, { "epoch": 0.1502755050926699, "grad_norm": 0.9882288469242262, "learning_rate": 5.005561735261402e-06, "loss": 0.2736, "step": 450 }, { "epoch": 0.15060945065954248, "grad_norm": 0.9332132918013903, "learning_rate": 5.016685205784205e-06, "loss": 0.2746, "step": 451 }, { "epoch": 0.1509433962264151, "grad_norm": 0.8514088935668446, "learning_rate": 5.027808676307008e-06, "loss": 0.2667, "step": 452 }, { "epoch": 0.1512773417932877, "grad_norm": 0.8462811908982392, "learning_rate": 5.038932146829812e-06, "loss": 0.2675, "step": 453 }, { "epoch": 0.1516112873601603, "grad_norm": 0.8691114025176568, "learning_rate": 5.050055617352615e-06, "loss": 0.289, "step": 454 }, { "epoch": 0.15194523292703288, "grad_norm": 0.969925154555584, "learning_rate": 5.061179087875418e-06, "loss": 0.2658, "step": 455 }, { "epoch": 0.1522791784939055, "grad_norm": 0.7612750857319712, "learning_rate": 5.072302558398221e-06, "loss": 0.246, "step": 456 }, { "epoch": 0.1526131240607781, "grad_norm": 0.9012049865349984, "learning_rate": 5.083426028921023e-06, "loss": 0.2791, "step": 457 }, { "epoch": 0.1529470696276507, "grad_norm": 0.8335869019920015, "learning_rate": 5.094549499443827e-06, "loss": 0.2542, "step": 458 }, { "epoch": 0.15328101519452328, "grad_norm": 0.8982538630222419, "learning_rate": 5.1056729699666295e-06, "loss": 0.2693, "step": 459 }, { "epoch": 0.15361496076139589, "grad_norm": 0.8430594873139436, "learning_rate": 5.116796440489433e-06, "loss": 0.2533, "step": 460 }, { "epoch": 0.1539489063282685, "grad_norm": 0.8653242092400866, "learning_rate": 5.127919911012236e-06, "loss": 0.264, "step": 461 }, { "epoch": 0.1542828518951411, "grad_norm": 0.8994436945219121, "learning_rate": 5.139043381535039e-06, "loss": 0.2615, "step": 462 }, { "epoch": 0.1546167974620137, "grad_norm": 0.8101799725720696, "learning_rate": 5.150166852057843e-06, "loss": 0.2532, "step": 463 }, { "epoch": 0.15495074302888628, "grad_norm": 0.8855034068759076, "learning_rate": 5.161290322580646e-06, "loss": 0.2732, "step": 464 }, { "epoch": 0.1552846885957589, "grad_norm": 0.8043484789939199, "learning_rate": 5.172413793103449e-06, "loss": 0.2379, "step": 465 }, { "epoch": 0.1556186341626315, "grad_norm": 0.8962988729164553, "learning_rate": 5.183537263626252e-06, "loss": 0.2799, "step": 466 }, { "epoch": 0.1559525797295041, "grad_norm": 0.8232384777691226, "learning_rate": 5.1946607341490554e-06, "loss": 0.2586, "step": 467 }, { "epoch": 0.15628652529637668, "grad_norm": 0.8472967462900588, "learning_rate": 5.205784204671857e-06, "loss": 0.2591, "step": 468 }, { "epoch": 0.1566204708632493, "grad_norm": 0.7977382518203232, "learning_rate": 5.216907675194661e-06, "loss": 0.2517, "step": 469 }, { "epoch": 0.1569544164301219, "grad_norm": 0.9780305545037385, "learning_rate": 5.2280311457174636e-06, "loss": 0.2772, "step": 470 }, { "epoch": 0.1572883619969945, "grad_norm": 0.7622852005224084, "learning_rate": 5.239154616240267e-06, "loss": 0.2538, "step": 471 }, { "epoch": 0.15762230756386708, "grad_norm": 0.8593964452116354, "learning_rate": 5.250278086763071e-06, "loss": 0.2568, "step": 472 }, { "epoch": 0.1579562531307397, "grad_norm": 0.8100335857942087, "learning_rate": 5.261401557285873e-06, "loss": 0.2501, "step": 473 }, { "epoch": 0.1582901986976123, "grad_norm": 0.8014373233797282, "learning_rate": 5.272525027808677e-06, "loss": 0.2535, "step": 474 }, { "epoch": 0.1586241442644849, "grad_norm": 0.7975499651795742, "learning_rate": 5.28364849833148e-06, "loss": 0.2504, "step": 475 }, { "epoch": 0.15895808983135748, "grad_norm": 0.8394801448362739, "learning_rate": 5.294771968854283e-06, "loss": 0.2707, "step": 476 }, { "epoch": 0.1592920353982301, "grad_norm": 0.9828129623333244, "learning_rate": 5.305895439377086e-06, "loss": 0.2775, "step": 477 }, { "epoch": 0.1596259809651027, "grad_norm": 0.732105226843958, "learning_rate": 5.3170189098998895e-06, "loss": 0.2289, "step": 478 }, { "epoch": 0.1599599265319753, "grad_norm": 0.8965879378663164, "learning_rate": 5.328142380422693e-06, "loss": 0.2401, "step": 479 }, { "epoch": 0.16029387209884788, "grad_norm": 0.9057075540665339, "learning_rate": 5.339265850945496e-06, "loss": 0.2681, "step": 480 }, { "epoch": 0.16062781766572048, "grad_norm": 0.7990495639152133, "learning_rate": 5.350389321468299e-06, "loss": 0.2485, "step": 481 }, { "epoch": 0.1609617632325931, "grad_norm": 1.015072564398485, "learning_rate": 5.361512791991101e-06, "loss": 0.2662, "step": 482 }, { "epoch": 0.1612957087994657, "grad_norm": 0.8030254740021361, "learning_rate": 5.372636262513905e-06, "loss": 0.2585, "step": 483 }, { "epoch": 0.16162965436633828, "grad_norm": 1.0859820471858048, "learning_rate": 5.383759733036707e-06, "loss": 0.2715, "step": 484 }, { "epoch": 0.16196359993321088, "grad_norm": 0.8205452365480986, "learning_rate": 5.394883203559511e-06, "loss": 0.2596, "step": 485 }, { "epoch": 0.1622975455000835, "grad_norm": 0.875783049907706, "learning_rate": 5.406006674082314e-06, "loss": 0.2605, "step": 486 }, { "epoch": 0.1626314910669561, "grad_norm": 0.8802931215752893, "learning_rate": 5.417130144605117e-06, "loss": 0.2599, "step": 487 }, { "epoch": 0.16296543663382868, "grad_norm": 0.8179528891483596, "learning_rate": 5.428253615127921e-06, "loss": 0.2527, "step": 488 }, { "epoch": 0.16329938220070128, "grad_norm": 0.734992877721891, "learning_rate": 5.4393770856507235e-06, "loss": 0.241, "step": 489 }, { "epoch": 0.1636333277675739, "grad_norm": 0.8158038849222073, "learning_rate": 5.450500556173527e-06, "loss": 0.2506, "step": 490 }, { "epoch": 0.1639672733344465, "grad_norm": 0.835866048908736, "learning_rate": 5.46162402669633e-06, "loss": 0.2568, "step": 491 }, { "epoch": 0.16430121890131907, "grad_norm": 0.895021641794215, "learning_rate": 5.472747497219133e-06, "loss": 0.2632, "step": 492 }, { "epoch": 0.16463516446819168, "grad_norm": 0.8090105148370232, "learning_rate": 5.483870967741935e-06, "loss": 0.2473, "step": 493 }, { "epoch": 0.1649691100350643, "grad_norm": 0.810000176014472, "learning_rate": 5.494994438264739e-06, "loss": 0.242, "step": 494 }, { "epoch": 0.1653030556019369, "grad_norm": 0.8836235099488476, "learning_rate": 5.506117908787543e-06, "loss": 0.2455, "step": 495 }, { "epoch": 0.16563700116880947, "grad_norm": 0.8254870164296962, "learning_rate": 5.517241379310345e-06, "loss": 0.2539, "step": 496 }, { "epoch": 0.16597094673568208, "grad_norm": 0.9525772225779567, "learning_rate": 5.5283648498331485e-06, "loss": 0.261, "step": 497 }, { "epoch": 0.16630489230255469, "grad_norm": 0.8377853082667718, "learning_rate": 5.539488320355951e-06, "loss": 0.253, "step": 498 }, { "epoch": 0.1666388378694273, "grad_norm": 0.9616580429013377, "learning_rate": 5.550611790878755e-06, "loss": 0.2587, "step": 499 }, { "epoch": 0.16697278343629987, "grad_norm": 0.7796060710868616, "learning_rate": 5.5617352614015575e-06, "loss": 0.2588, "step": 500 }, { "epoch": 0.16730672900317248, "grad_norm": 0.9319120249495298, "learning_rate": 5.572858731924361e-06, "loss": 0.2451, "step": 501 }, { "epoch": 0.16764067457004508, "grad_norm": 1.0596544782036998, "learning_rate": 5.583982202447164e-06, "loss": 0.2749, "step": 502 }, { "epoch": 0.1679746201369177, "grad_norm": 0.8863515672067872, "learning_rate": 5.595105672969967e-06, "loss": 0.2511, "step": 503 }, { "epoch": 0.16830856570379027, "grad_norm": 0.8225742978610687, "learning_rate": 5.606229143492771e-06, "loss": 0.2377, "step": 504 }, { "epoch": 0.16864251127066288, "grad_norm": 0.7637996051027277, "learning_rate": 5.617352614015574e-06, "loss": 0.2335, "step": 505 }, { "epoch": 0.16897645683753548, "grad_norm": 0.8249967207095058, "learning_rate": 5.628476084538377e-06, "loss": 0.2748, "step": 506 }, { "epoch": 0.1693104024044081, "grad_norm": 0.8161862924539601, "learning_rate": 5.639599555061179e-06, "loss": 0.2649, "step": 507 }, { "epoch": 0.16964434797128067, "grad_norm": 0.8749861083035826, "learning_rate": 5.6507230255839826e-06, "loss": 0.2641, "step": 508 }, { "epoch": 0.16997829353815327, "grad_norm": 0.8934466811434186, "learning_rate": 5.661846496106785e-06, "loss": 0.2679, "step": 509 }, { "epoch": 0.17031223910502588, "grad_norm": 0.8787600615518483, "learning_rate": 5.672969966629589e-06, "loss": 0.2735, "step": 510 }, { "epoch": 0.1706461846718985, "grad_norm": 0.7797278565537913, "learning_rate": 5.6840934371523915e-06, "loss": 0.2652, "step": 511 }, { "epoch": 0.1709801302387711, "grad_norm": 0.7628477421169882, "learning_rate": 5.695216907675195e-06, "loss": 0.2424, "step": 512 }, { "epoch": 0.17131407580564367, "grad_norm": 0.840464455052028, "learning_rate": 5.706340378197999e-06, "loss": 0.2659, "step": 513 }, { "epoch": 0.17164802137251628, "grad_norm": 0.985780992202247, "learning_rate": 5.717463848720801e-06, "loss": 0.2507, "step": 514 }, { "epoch": 0.1719819669393889, "grad_norm": 1.4268260954113605, "learning_rate": 5.728587319243605e-06, "loss": 0.2718, "step": 515 }, { "epoch": 0.1723159125062615, "grad_norm": 0.7437022173759871, "learning_rate": 5.739710789766408e-06, "loss": 0.2543, "step": 516 }, { "epoch": 0.17264985807313407, "grad_norm": 0.7373193238546386, "learning_rate": 5.750834260289211e-06, "loss": 0.2451, "step": 517 }, { "epoch": 0.17298380364000668, "grad_norm": 0.9049436389723264, "learning_rate": 5.761957730812013e-06, "loss": 0.2634, "step": 518 }, { "epoch": 0.17331774920687928, "grad_norm": 0.8105545282139595, "learning_rate": 5.773081201334817e-06, "loss": 0.2588, "step": 519 }, { "epoch": 0.1736516947737519, "grad_norm": 0.7844885492997241, "learning_rate": 5.784204671857621e-06, "loss": 0.2507, "step": 520 }, { "epoch": 0.17398564034062447, "grad_norm": 1.5001443038134146, "learning_rate": 5.795328142380423e-06, "loss": 0.258, "step": 521 }, { "epoch": 0.17431958590749708, "grad_norm": 0.9479471274537018, "learning_rate": 5.806451612903226e-06, "loss": 0.2547, "step": 522 }, { "epoch": 0.17465353147436968, "grad_norm": 0.8165944158582878, "learning_rate": 5.817575083426029e-06, "loss": 0.2618, "step": 523 }, { "epoch": 0.1749874770412423, "grad_norm": 0.793372998639345, "learning_rate": 5.828698553948833e-06, "loss": 0.2538, "step": 524 }, { "epoch": 0.17532142260811487, "grad_norm": 0.7708291500668256, "learning_rate": 5.839822024471635e-06, "loss": 0.2561, "step": 525 }, { "epoch": 0.17565536817498748, "grad_norm": 0.7448808234488508, "learning_rate": 5.850945494994439e-06, "loss": 0.2502, "step": 526 }, { "epoch": 0.17598931374186008, "grad_norm": 0.8021357502415758, "learning_rate": 5.862068965517242e-06, "loss": 0.2783, "step": 527 }, { "epoch": 0.1763232593087327, "grad_norm": 0.7495063590638561, "learning_rate": 5.873192436040045e-06, "loss": 0.2476, "step": 528 }, { "epoch": 0.17665720487560527, "grad_norm": 0.7961292168845412, "learning_rate": 5.884315906562849e-06, "loss": 0.2726, "step": 529 }, { "epoch": 0.17699115044247787, "grad_norm": 0.8074466377944476, "learning_rate": 5.8954393770856515e-06, "loss": 0.2582, "step": 530 }, { "epoch": 0.17732509600935048, "grad_norm": 0.8345256528144968, "learning_rate": 5.906562847608455e-06, "loss": 0.245, "step": 531 }, { "epoch": 0.1776590415762231, "grad_norm": 0.7869923386555632, "learning_rate": 5.917686318131257e-06, "loss": 0.2569, "step": 532 }, { "epoch": 0.17799298714309567, "grad_norm": 0.8147246527010965, "learning_rate": 5.9288097886540604e-06, "loss": 0.2362, "step": 533 }, { "epoch": 0.17832693270996827, "grad_norm": 0.8341283135652426, "learning_rate": 5.939933259176863e-06, "loss": 0.2764, "step": 534 }, { "epoch": 0.17866087827684088, "grad_norm": 0.7045070493955722, "learning_rate": 5.951056729699667e-06, "loss": 0.2354, "step": 535 }, { "epoch": 0.17899482384371349, "grad_norm": 0.7421340011554978, "learning_rate": 5.962180200222469e-06, "loss": 0.2386, "step": 536 }, { "epoch": 0.17932876941058606, "grad_norm": 0.8226524022867789, "learning_rate": 5.973303670745273e-06, "loss": 0.2594, "step": 537 }, { "epoch": 0.17966271497745867, "grad_norm": 0.9397477018343224, "learning_rate": 5.9844271412680765e-06, "loss": 0.2639, "step": 538 }, { "epoch": 0.17999666054433128, "grad_norm": 0.9390525099666275, "learning_rate": 5.995550611790879e-06, "loss": 0.2477, "step": 539 }, { "epoch": 0.18033060611120388, "grad_norm": 0.8300240065923898, "learning_rate": 6.006674082313683e-06, "loss": 0.2751, "step": 540 }, { "epoch": 0.18066455167807646, "grad_norm": 0.813819268112457, "learning_rate": 6.0177975528364855e-06, "loss": 0.2617, "step": 541 }, { "epoch": 0.18099849724494907, "grad_norm": 0.9523295641887335, "learning_rate": 6.028921023359289e-06, "loss": 0.2642, "step": 542 }, { "epoch": 0.18133244281182168, "grad_norm": 0.8610172814649126, "learning_rate": 6.040044493882091e-06, "loss": 0.2738, "step": 543 }, { "epoch": 0.18166638837869428, "grad_norm": 0.8247418133291398, "learning_rate": 6.0511679644048945e-06, "loss": 0.2794, "step": 544 }, { "epoch": 0.18200033394556686, "grad_norm": 0.8563775168824261, "learning_rate": 6.062291434927698e-06, "loss": 0.255, "step": 545 }, { "epoch": 0.18233427951243947, "grad_norm": 0.8968217199385246, "learning_rate": 6.073414905450501e-06, "loss": 0.2861, "step": 546 }, { "epoch": 0.18266822507931207, "grad_norm": 0.8235810669629106, "learning_rate": 6.084538375973304e-06, "loss": 0.2663, "step": 547 }, { "epoch": 0.18300217064618468, "grad_norm": 0.8348617646793106, "learning_rate": 6.095661846496107e-06, "loss": 0.2486, "step": 548 }, { "epoch": 0.18333611621305726, "grad_norm": 0.8185061661036209, "learning_rate": 6.1067853170189106e-06, "loss": 0.2885, "step": 549 }, { "epoch": 0.18367006177992987, "grad_norm": 0.8056111861107552, "learning_rate": 6.117908787541713e-06, "loss": 0.2651, "step": 550 }, { "epoch": 0.18400400734680247, "grad_norm": 0.8746807107987543, "learning_rate": 6.129032258064517e-06, "loss": 0.2568, "step": 551 }, { "epoch": 0.18433795291367508, "grad_norm": 0.8681694547363, "learning_rate": 6.1401557285873195e-06, "loss": 0.2638, "step": 552 }, { "epoch": 0.18467189848054766, "grad_norm": 0.8221512767810651, "learning_rate": 6.151279199110123e-06, "loss": 0.2415, "step": 553 }, { "epoch": 0.18500584404742026, "grad_norm": 0.698387186152687, "learning_rate": 6.162402669632927e-06, "loss": 0.245, "step": 554 }, { "epoch": 0.18533978961429287, "grad_norm": 0.8433273577991696, "learning_rate": 6.173526140155729e-06, "loss": 0.2649, "step": 555 }, { "epoch": 0.18567373518116548, "grad_norm": 0.8021748456512204, "learning_rate": 6.184649610678533e-06, "loss": 0.2469, "step": 556 }, { "epoch": 0.18600768074803806, "grad_norm": 0.7364557546886381, "learning_rate": 6.195773081201335e-06, "loss": 0.255, "step": 557 }, { "epoch": 0.18634162631491066, "grad_norm": 0.7762130750403377, "learning_rate": 6.206896551724138e-06, "loss": 0.2524, "step": 558 }, { "epoch": 0.18667557188178327, "grad_norm": 0.8366629532210769, "learning_rate": 6.218020022246941e-06, "loss": 0.2746, "step": 559 }, { "epoch": 0.18700951744865588, "grad_norm": 0.752997914423049, "learning_rate": 6.229143492769745e-06, "loss": 0.2523, "step": 560 }, { "epoch": 0.18734346301552846, "grad_norm": 0.7260230957886581, "learning_rate": 6.240266963292548e-06, "loss": 0.2541, "step": 561 }, { "epoch": 0.18767740858240106, "grad_norm": 0.8788917141496118, "learning_rate": 6.251390433815351e-06, "loss": 0.2715, "step": 562 }, { "epoch": 0.18801135414927367, "grad_norm": 0.800907436768764, "learning_rate": 6.262513904338154e-06, "loss": 0.2645, "step": 563 }, { "epoch": 0.18834529971614627, "grad_norm": 0.6844788488749838, "learning_rate": 6.273637374860957e-06, "loss": 0.2427, "step": 564 }, { "epoch": 0.18867924528301888, "grad_norm": 0.8650404495590827, "learning_rate": 6.284760845383761e-06, "loss": 0.2517, "step": 565 }, { "epoch": 0.18901319084989146, "grad_norm": 0.9007698022142087, "learning_rate": 6.295884315906563e-06, "loss": 0.2321, "step": 566 }, { "epoch": 0.18934713641676407, "grad_norm": 0.7993098878472382, "learning_rate": 6.307007786429367e-06, "loss": 0.2431, "step": 567 }, { "epoch": 0.18968108198363667, "grad_norm": 0.9721899662902379, "learning_rate": 6.318131256952169e-06, "loss": 0.2605, "step": 568 }, { "epoch": 0.19001502755050928, "grad_norm": 0.8389462490316344, "learning_rate": 6.329254727474972e-06, "loss": 0.2601, "step": 569 }, { "epoch": 0.19034897311738186, "grad_norm": 0.8414565668740682, "learning_rate": 6.340378197997776e-06, "loss": 0.2621, "step": 570 }, { "epoch": 0.19068291868425447, "grad_norm": 0.8398750862803184, "learning_rate": 6.351501668520579e-06, "loss": 0.2695, "step": 571 }, { "epoch": 0.19101686425112707, "grad_norm": 0.8866607655134973, "learning_rate": 6.362625139043382e-06, "loss": 0.2533, "step": 572 }, { "epoch": 0.19135080981799968, "grad_norm": 0.9367172444562246, "learning_rate": 6.373748609566185e-06, "loss": 0.2586, "step": 573 }, { "epoch": 0.19168475538487226, "grad_norm": 0.8225843599787163, "learning_rate": 6.3848720800889884e-06, "loss": 0.2565, "step": 574 }, { "epoch": 0.19201870095174486, "grad_norm": 0.8444529094519592, "learning_rate": 6.395995550611791e-06, "loss": 0.2752, "step": 575 }, { "epoch": 0.19235264651861747, "grad_norm": 0.8144999053793032, "learning_rate": 6.407119021134595e-06, "loss": 0.25, "step": 576 }, { "epoch": 0.19268659208549008, "grad_norm": 0.9646835095492448, "learning_rate": 6.418242491657397e-06, "loss": 0.2797, "step": 577 }, { "epoch": 0.19302053765236266, "grad_norm": 0.8030364087953927, "learning_rate": 6.429365962180201e-06, "loss": 0.2521, "step": 578 }, { "epoch": 0.19335448321923526, "grad_norm": 0.7321772482455484, "learning_rate": 6.4404894327030045e-06, "loss": 0.2407, "step": 579 }, { "epoch": 0.19368842878610787, "grad_norm": 0.8945349521153622, "learning_rate": 6.451612903225806e-06, "loss": 0.2606, "step": 580 }, { "epoch": 0.19402237435298048, "grad_norm": 0.9583738923059661, "learning_rate": 6.462736373748611e-06, "loss": 0.2688, "step": 581 }, { "epoch": 0.19435631991985305, "grad_norm": 0.7713487168779097, "learning_rate": 6.473859844271413e-06, "loss": 0.2557, "step": 582 }, { "epoch": 0.19469026548672566, "grad_norm": 0.7917343133180527, "learning_rate": 6.484983314794216e-06, "loss": 0.2802, "step": 583 }, { "epoch": 0.19502421105359827, "grad_norm": 0.8943120918526262, "learning_rate": 6.496106785317019e-06, "loss": 0.2501, "step": 584 }, { "epoch": 0.19535815662047087, "grad_norm": 1.0548584285291802, "learning_rate": 6.5072302558398225e-06, "loss": 0.27, "step": 585 }, { "epoch": 0.19569210218734345, "grad_norm": 0.8891494426082305, "learning_rate": 6.518353726362626e-06, "loss": 0.2762, "step": 586 }, { "epoch": 0.19602604775421606, "grad_norm": 0.8248070845769653, "learning_rate": 6.529477196885429e-06, "loss": 0.2662, "step": 587 }, { "epoch": 0.19635999332108867, "grad_norm": 0.7767078346910501, "learning_rate": 6.540600667408232e-06, "loss": 0.2714, "step": 588 }, { "epoch": 0.19669393888796127, "grad_norm": 0.9437342483702248, "learning_rate": 6.551724137931035e-06, "loss": 0.2565, "step": 589 }, { "epoch": 0.19702788445483385, "grad_norm": 0.7538036441233125, "learning_rate": 6.5628476084538385e-06, "loss": 0.2489, "step": 590 }, { "epoch": 0.19736183002170646, "grad_norm": 1.3943431533296153, "learning_rate": 6.573971078976641e-06, "loss": 0.253, "step": 591 }, { "epoch": 0.19769577558857906, "grad_norm": 0.8252629179756918, "learning_rate": 6.585094549499445e-06, "loss": 0.2514, "step": 592 }, { "epoch": 0.19802972115545167, "grad_norm": 0.7701423212327584, "learning_rate": 6.596218020022247e-06, "loss": 0.2637, "step": 593 }, { "epoch": 0.19836366672232425, "grad_norm": 0.9533433627482636, "learning_rate": 6.60734149054505e-06, "loss": 0.2611, "step": 594 }, { "epoch": 0.19869761228919686, "grad_norm": 1.0300962240262892, "learning_rate": 6.618464961067854e-06, "loss": 0.2606, "step": 595 }, { "epoch": 0.19903155785606946, "grad_norm": 0.7274771616034863, "learning_rate": 6.6295884315906565e-06, "loss": 0.247, "step": 596 }, { "epoch": 0.19936550342294207, "grad_norm": 0.8521863744652877, "learning_rate": 6.64071190211346e-06, "loss": 0.2633, "step": 597 }, { "epoch": 0.19969944898981465, "grad_norm": 0.857057035238085, "learning_rate": 6.651835372636263e-06, "loss": 0.2589, "step": 598 }, { "epoch": 0.20003339455668726, "grad_norm": 0.7632237378212952, "learning_rate": 6.662958843159066e-06, "loss": 0.252, "step": 599 }, { "epoch": 0.20036734012355986, "grad_norm": 0.7570323690891942, "learning_rate": 6.674082313681869e-06, "loss": 0.2493, "step": 600 }, { "epoch": 0.20070128569043247, "grad_norm": 0.7808753856394541, "learning_rate": 6.6852057842046726e-06, "loss": 0.2525, "step": 601 }, { "epoch": 0.20103523125730505, "grad_norm": 0.7516839563088439, "learning_rate": 6.696329254727475e-06, "loss": 0.2531, "step": 602 }, { "epoch": 0.20136917682417765, "grad_norm": 0.9130303291108971, "learning_rate": 6.707452725250279e-06, "loss": 0.2602, "step": 603 }, { "epoch": 0.20170312239105026, "grad_norm": 0.7675587493061325, "learning_rate": 6.718576195773082e-06, "loss": 0.2558, "step": 604 }, { "epoch": 0.20203706795792287, "grad_norm": 0.8499898280803776, "learning_rate": 6.729699666295884e-06, "loss": 0.2521, "step": 605 }, { "epoch": 0.20237101352479545, "grad_norm": 0.6792136287125394, "learning_rate": 6.740823136818689e-06, "loss": 0.241, "step": 606 }, { "epoch": 0.20270495909166805, "grad_norm": 0.8162142022991196, "learning_rate": 6.7519466073414905e-06, "loss": 0.2527, "step": 607 }, { "epoch": 0.20303890465854066, "grad_norm": 0.7296861143295201, "learning_rate": 6.763070077864294e-06, "loss": 0.2435, "step": 608 }, { "epoch": 0.20337285022541327, "grad_norm": 0.7502154456382952, "learning_rate": 6.774193548387097e-06, "loss": 0.2614, "step": 609 }, { "epoch": 0.20370679579228584, "grad_norm": 0.7661648899048032, "learning_rate": 6.7853170189099e-06, "loss": 0.2559, "step": 610 }, { "epoch": 0.20404074135915845, "grad_norm": 0.7204037874440861, "learning_rate": 6.796440489432704e-06, "loss": 0.2414, "step": 611 }, { "epoch": 0.20437468692603106, "grad_norm": 0.8284127688423369, "learning_rate": 6.807563959955507e-06, "loss": 0.2486, "step": 612 }, { "epoch": 0.20470863249290366, "grad_norm": 0.7646957746452056, "learning_rate": 6.81868743047831e-06, "loss": 0.2482, "step": 613 }, { "epoch": 0.20504257805977624, "grad_norm": 0.6666997972855363, "learning_rate": 6.829810901001113e-06, "loss": 0.2412, "step": 614 }, { "epoch": 0.20537652362664885, "grad_norm": 0.8060448469342122, "learning_rate": 6.840934371523916e-06, "loss": 0.2592, "step": 615 }, { "epoch": 0.20571046919352146, "grad_norm": 0.7430816705543792, "learning_rate": 6.852057842046719e-06, "loss": 0.2633, "step": 616 }, { "epoch": 0.20604441476039406, "grad_norm": 0.705554520404781, "learning_rate": 6.863181312569523e-06, "loss": 0.2547, "step": 617 }, { "epoch": 0.20637836032726667, "grad_norm": 0.7321042548247252, "learning_rate": 6.8743047830923245e-06, "loss": 0.2465, "step": 618 }, { "epoch": 0.20671230589413925, "grad_norm": 0.7046105183839342, "learning_rate": 6.885428253615128e-06, "loss": 0.2422, "step": 619 }, { "epoch": 0.20704625146101185, "grad_norm": 0.6631002359153175, "learning_rate": 6.896551724137932e-06, "loss": 0.2338, "step": 620 }, { "epoch": 0.20738019702788446, "grad_norm": 0.820463308715899, "learning_rate": 6.907675194660734e-06, "loss": 0.2357, "step": 621 }, { "epoch": 0.20771414259475707, "grad_norm": 0.8757054022253623, "learning_rate": 6.918798665183538e-06, "loss": 0.2576, "step": 622 }, { "epoch": 0.20804808816162965, "grad_norm": 0.7617466716742917, "learning_rate": 6.929922135706341e-06, "loss": 0.2625, "step": 623 }, { "epoch": 0.20838203372850225, "grad_norm": 0.7507182463449198, "learning_rate": 6.941045606229144e-06, "loss": 0.2484, "step": 624 }, { "epoch": 0.20871597929537486, "grad_norm": 0.653627768081853, "learning_rate": 6.952169076751947e-06, "loss": 0.228, "step": 625 }, { "epoch": 0.20904992486224747, "grad_norm": 0.7532509534249405, "learning_rate": 6.9632925472747504e-06, "loss": 0.2594, "step": 626 }, { "epoch": 0.20938387042912004, "grad_norm": 0.6656450442528803, "learning_rate": 6.974416017797554e-06, "loss": 0.2434, "step": 627 }, { "epoch": 0.20971781599599265, "grad_norm": 0.9213023326955349, "learning_rate": 6.985539488320357e-06, "loss": 0.2577, "step": 628 }, { "epoch": 0.21005176156286526, "grad_norm": 0.8151666294986606, "learning_rate": 6.99666295884316e-06, "loss": 0.2582, "step": 629 }, { "epoch": 0.21038570712973786, "grad_norm": 0.7631009288121973, "learning_rate": 7.007786429365962e-06, "loss": 0.2578, "step": 630 }, { "epoch": 0.21071965269661044, "grad_norm": 0.8363790627747725, "learning_rate": 7.0189098998887665e-06, "loss": 0.2781, "step": 631 }, { "epoch": 0.21105359826348305, "grad_norm": 0.839505394509319, "learning_rate": 7.030033370411568e-06, "loss": 0.2314, "step": 632 }, { "epoch": 0.21138754383035566, "grad_norm": 1.0596615080003053, "learning_rate": 7.041156840934372e-06, "loss": 0.284, "step": 633 }, { "epoch": 0.21172148939722826, "grad_norm": 0.7122109086577653, "learning_rate": 7.052280311457175e-06, "loss": 0.2304, "step": 634 }, { "epoch": 0.21205543496410084, "grad_norm": 0.8054756743369217, "learning_rate": 7.063403781979978e-06, "loss": 0.2714, "step": 635 }, { "epoch": 0.21238938053097345, "grad_norm": 0.7150051499547507, "learning_rate": 7.074527252502782e-06, "loss": 0.2456, "step": 636 }, { "epoch": 0.21272332609784605, "grad_norm": 0.7974489943926535, "learning_rate": 7.0856507230255845e-06, "loss": 0.2571, "step": 637 }, { "epoch": 0.21305727166471866, "grad_norm": 0.9680300225233516, "learning_rate": 7.096774193548388e-06, "loss": 0.2745, "step": 638 }, { "epoch": 0.21339121723159124, "grad_norm": 0.7427730803660618, "learning_rate": 7.107897664071191e-06, "loss": 0.2518, "step": 639 }, { "epoch": 0.21372516279846385, "grad_norm": 0.741805688846562, "learning_rate": 7.119021134593994e-06, "loss": 0.2444, "step": 640 }, { "epoch": 0.21405910836533645, "grad_norm": 0.7821624254946435, "learning_rate": 7.130144605116797e-06, "loss": 0.2727, "step": 641 }, { "epoch": 0.21439305393220906, "grad_norm": 0.6585665747997141, "learning_rate": 7.1412680756396006e-06, "loss": 0.2408, "step": 642 }, { "epoch": 0.21472699949908164, "grad_norm": 0.6560687036372677, "learning_rate": 7.152391546162402e-06, "loss": 0.2396, "step": 643 }, { "epoch": 0.21506094506595425, "grad_norm": 0.746510194063174, "learning_rate": 7.163515016685206e-06, "loss": 0.244, "step": 644 }, { "epoch": 0.21539489063282685, "grad_norm": 0.7591594490569733, "learning_rate": 7.1746384872080095e-06, "loss": 0.2501, "step": 645 }, { "epoch": 0.21572883619969946, "grad_norm": 0.9390381754063648, "learning_rate": 7.185761957730812e-06, "loss": 0.2597, "step": 646 }, { "epoch": 0.21606278176657204, "grad_norm": 0.7677869272615818, "learning_rate": 7.196885428253616e-06, "loss": 0.2309, "step": 647 }, { "epoch": 0.21639672733344464, "grad_norm": 0.7831044566641154, "learning_rate": 7.2080088987764185e-06, "loss": 0.2674, "step": 648 }, { "epoch": 0.21673067290031725, "grad_norm": 0.7513498348722037, "learning_rate": 7.219132369299222e-06, "loss": 0.2608, "step": 649 }, { "epoch": 0.21706461846718986, "grad_norm": 0.8318654606544825, "learning_rate": 7.230255839822025e-06, "loss": 0.277, "step": 650 }, { "epoch": 0.21739856403406244, "grad_norm": 0.6802811622216082, "learning_rate": 7.241379310344828e-06, "loss": 0.2319, "step": 651 }, { "epoch": 0.21773250960093504, "grad_norm": 1.106089563161147, "learning_rate": 7.252502780867632e-06, "loss": 0.2582, "step": 652 }, { "epoch": 0.21806645516780765, "grad_norm": 0.7584213630649976, "learning_rate": 7.263626251390435e-06, "loss": 0.2495, "step": 653 }, { "epoch": 0.21840040073468026, "grad_norm": 0.7854757276260729, "learning_rate": 7.274749721913238e-06, "loss": 0.2557, "step": 654 }, { "epoch": 0.21873434630155283, "grad_norm": 0.7612163368733534, "learning_rate": 7.28587319243604e-06, "loss": 0.2451, "step": 655 }, { "epoch": 0.21906829186842544, "grad_norm": 0.7748245742742014, "learning_rate": 7.296996662958844e-06, "loss": 0.2442, "step": 656 }, { "epoch": 0.21940223743529805, "grad_norm": 0.6807686068804447, "learning_rate": 7.308120133481646e-06, "loss": 0.2408, "step": 657 }, { "epoch": 0.21973618300217065, "grad_norm": 0.9239556234849106, "learning_rate": 7.31924360400445e-06, "loss": 0.2702, "step": 658 }, { "epoch": 0.22007012856904323, "grad_norm": 0.7321576339149546, "learning_rate": 7.3303670745272525e-06, "loss": 0.2644, "step": 659 }, { "epoch": 0.22040407413591584, "grad_norm": 0.7996011030861904, "learning_rate": 7.341490545050056e-06, "loss": 0.2489, "step": 660 }, { "epoch": 0.22073801970278845, "grad_norm": 0.8764342155392295, "learning_rate": 7.35261401557286e-06, "loss": 0.2751, "step": 661 }, { "epoch": 0.22107196526966105, "grad_norm": 0.8597656221387565, "learning_rate": 7.363737486095662e-06, "loss": 0.2786, "step": 662 }, { "epoch": 0.22140591083653363, "grad_norm": 0.6958608408138943, "learning_rate": 7.374860956618466e-06, "loss": 0.2538, "step": 663 }, { "epoch": 0.22173985640340624, "grad_norm": 0.7016554454611178, "learning_rate": 7.385984427141269e-06, "loss": 0.2467, "step": 664 }, { "epoch": 0.22207380197027884, "grad_norm": 0.7640514477559327, "learning_rate": 7.397107897664072e-06, "loss": 0.2634, "step": 665 }, { "epoch": 0.22240774753715145, "grad_norm": 0.7363554934776556, "learning_rate": 7.408231368186875e-06, "loss": 0.2298, "step": 666 }, { "epoch": 0.22274169310402406, "grad_norm": 0.8144798385573445, "learning_rate": 7.4193548387096784e-06, "loss": 0.2443, "step": 667 }, { "epoch": 0.22307563867089664, "grad_norm": 0.8918153080014487, "learning_rate": 7.43047830923248e-06, "loss": 0.2578, "step": 668 }, { "epoch": 0.22340958423776924, "grad_norm": 0.7303457303558588, "learning_rate": 7.441601779755284e-06, "loss": 0.2674, "step": 669 }, { "epoch": 0.22374352980464185, "grad_norm": 0.6891671510625942, "learning_rate": 7.452725250278087e-06, "loss": 0.2528, "step": 670 }, { "epoch": 0.22407747537151446, "grad_norm": 0.7806126310729112, "learning_rate": 7.46384872080089e-06, "loss": 0.2571, "step": 671 }, { "epoch": 0.22441142093838704, "grad_norm": 0.6843873330049303, "learning_rate": 7.474972191323694e-06, "loss": 0.2255, "step": 672 }, { "epoch": 0.22474536650525964, "grad_norm": 0.8453798784973325, "learning_rate": 7.486095661846496e-06, "loss": 0.2625, "step": 673 }, { "epoch": 0.22507931207213225, "grad_norm": 0.8434557324517794, "learning_rate": 7.4972191323693e-06, "loss": 0.2776, "step": 674 }, { "epoch": 0.22541325763900485, "grad_norm": 0.7048585497108847, "learning_rate": 7.508342602892103e-06, "loss": 0.2585, "step": 675 }, { "epoch": 0.22574720320587743, "grad_norm": 0.7659379836914064, "learning_rate": 7.519466073414906e-06, "loss": 0.2548, "step": 676 }, { "epoch": 0.22608114877275004, "grad_norm": 0.6934227319885261, "learning_rate": 7.53058954393771e-06, "loss": 0.2415, "step": 677 }, { "epoch": 0.22641509433962265, "grad_norm": 0.7811227987102337, "learning_rate": 7.5417130144605125e-06, "loss": 0.2517, "step": 678 }, { "epoch": 0.22674903990649525, "grad_norm": 0.804992221647555, "learning_rate": 7.552836484983316e-06, "loss": 0.2741, "step": 679 }, { "epoch": 0.22708298547336783, "grad_norm": 0.8687196799605786, "learning_rate": 7.563959955506118e-06, "loss": 0.2727, "step": 680 }, { "epoch": 0.22741693104024044, "grad_norm": 0.7320254452663593, "learning_rate": 7.575083426028922e-06, "loss": 0.2593, "step": 681 }, { "epoch": 0.22775087660711305, "grad_norm": 0.8031326712488748, "learning_rate": 7.586206896551724e-06, "loss": 0.2539, "step": 682 }, { "epoch": 0.22808482217398565, "grad_norm": 0.7676874292506355, "learning_rate": 7.597330367074528e-06, "loss": 0.2327, "step": 683 }, { "epoch": 0.22841876774085823, "grad_norm": 0.7010316070246935, "learning_rate": 7.60845383759733e-06, "loss": 0.2351, "step": 684 }, { "epoch": 0.22875271330773084, "grad_norm": 0.8074417436579385, "learning_rate": 7.619577308120134e-06, "loss": 0.2521, "step": 685 }, { "epoch": 0.22908665887460344, "grad_norm": 0.8071936582217873, "learning_rate": 7.630700778642938e-06, "loss": 0.244, "step": 686 }, { "epoch": 0.22942060444147605, "grad_norm": 0.6359795454476252, "learning_rate": 7.64182424916574e-06, "loss": 0.2368, "step": 687 }, { "epoch": 0.22975455000834863, "grad_norm": 0.7777196776901433, "learning_rate": 7.652947719688543e-06, "loss": 0.2414, "step": 688 }, { "epoch": 0.23008849557522124, "grad_norm": 0.8592388607477667, "learning_rate": 7.664071190211346e-06, "loss": 0.2455, "step": 689 }, { "epoch": 0.23042244114209384, "grad_norm": 0.7537444209652607, "learning_rate": 7.67519466073415e-06, "loss": 0.2568, "step": 690 }, { "epoch": 0.23075638670896645, "grad_norm": 0.7514964866360881, "learning_rate": 7.686318131256953e-06, "loss": 0.2543, "step": 691 }, { "epoch": 0.23109033227583903, "grad_norm": 0.7856637220526238, "learning_rate": 7.697441601779755e-06, "loss": 0.2396, "step": 692 }, { "epoch": 0.23142427784271163, "grad_norm": 0.6766482208992999, "learning_rate": 7.70856507230256e-06, "loss": 0.2306, "step": 693 }, { "epoch": 0.23175822340958424, "grad_norm": 0.7190582180915687, "learning_rate": 7.719688542825363e-06, "loss": 0.2332, "step": 694 }, { "epoch": 0.23209216897645685, "grad_norm": 0.7365246732401293, "learning_rate": 7.730812013348165e-06, "loss": 0.2462, "step": 695 }, { "epoch": 0.23242611454332943, "grad_norm": 0.7531487377575422, "learning_rate": 7.741935483870968e-06, "loss": 0.2516, "step": 696 }, { "epoch": 0.23276006011020203, "grad_norm": 0.7856970157332456, "learning_rate": 7.753058954393772e-06, "loss": 0.2595, "step": 697 }, { "epoch": 0.23309400567707464, "grad_norm": 0.7465491947839247, "learning_rate": 7.764182424916575e-06, "loss": 0.243, "step": 698 }, { "epoch": 0.23342795124394725, "grad_norm": 0.78553071211408, "learning_rate": 7.775305895439378e-06, "loss": 0.2727, "step": 699 }, { "epoch": 0.23376189681081982, "grad_norm": 0.6936051447843483, "learning_rate": 7.78642936596218e-06, "loss": 0.2406, "step": 700 }, { "epoch": 0.23409584237769243, "grad_norm": 0.7284183041445422, "learning_rate": 7.797552836484983e-06, "loss": 0.2498, "step": 701 }, { "epoch": 0.23442978794456504, "grad_norm": 0.7500119738086012, "learning_rate": 7.808676307007788e-06, "loss": 0.2353, "step": 702 }, { "epoch": 0.23476373351143764, "grad_norm": 0.7405650072009493, "learning_rate": 7.81979977753059e-06, "loss": 0.2565, "step": 703 }, { "epoch": 0.23509767907831022, "grad_norm": 0.7259042405639793, "learning_rate": 7.830923248053393e-06, "loss": 0.2448, "step": 704 }, { "epoch": 0.23543162464518283, "grad_norm": 0.6356529795786438, "learning_rate": 7.842046718576196e-06, "loss": 0.2275, "step": 705 }, { "epoch": 0.23576557021205544, "grad_norm": 0.7052503065077377, "learning_rate": 7.853170189099e-06, "loss": 0.2475, "step": 706 }, { "epoch": 0.23609951577892804, "grad_norm": 0.7686445726114107, "learning_rate": 7.864293659621803e-06, "loss": 0.2644, "step": 707 }, { "epoch": 0.23643346134580062, "grad_norm": 0.7484144633557975, "learning_rate": 7.875417130144606e-06, "loss": 0.2575, "step": 708 }, { "epoch": 0.23676740691267323, "grad_norm": 0.7357228659660728, "learning_rate": 7.886540600667408e-06, "loss": 0.2372, "step": 709 }, { "epoch": 0.23710135247954583, "grad_norm": 0.6982740541543035, "learning_rate": 7.897664071190213e-06, "loss": 0.2508, "step": 710 }, { "epoch": 0.23743529804641844, "grad_norm": 0.7398245740088609, "learning_rate": 7.908787541713015e-06, "loss": 0.2381, "step": 711 }, { "epoch": 0.23776924361329102, "grad_norm": 0.7895941667598881, "learning_rate": 7.919911012235818e-06, "loss": 0.2283, "step": 712 }, { "epoch": 0.23810318918016363, "grad_norm": 0.779972681262703, "learning_rate": 7.93103448275862e-06, "loss": 0.2577, "step": 713 }, { "epoch": 0.23843713474703623, "grad_norm": 0.7064211035558918, "learning_rate": 7.942157953281424e-06, "loss": 0.2391, "step": 714 }, { "epoch": 0.23877108031390884, "grad_norm": 0.8977366266428429, "learning_rate": 7.953281423804228e-06, "loss": 0.2735, "step": 715 }, { "epoch": 0.23910502588078142, "grad_norm": 0.8146695015510214, "learning_rate": 7.96440489432703e-06, "loss": 0.2553, "step": 716 }, { "epoch": 0.23943897144765403, "grad_norm": 0.8075630691246333, "learning_rate": 7.975528364849833e-06, "loss": 0.24, "step": 717 }, { "epoch": 0.23977291701452663, "grad_norm": 0.7417746252500675, "learning_rate": 7.986651835372638e-06, "loss": 0.2455, "step": 718 }, { "epoch": 0.24010686258139924, "grad_norm": 0.7960193301121595, "learning_rate": 7.99777530589544e-06, "loss": 0.2537, "step": 719 }, { "epoch": 0.24044080814827185, "grad_norm": 0.6581455395066982, "learning_rate": 8.008898776418243e-06, "loss": 0.2508, "step": 720 }, { "epoch": 0.24077475371514442, "grad_norm": 0.762028236187238, "learning_rate": 8.020022246941046e-06, "loss": 0.2726, "step": 721 }, { "epoch": 0.24110869928201703, "grad_norm": 0.7200877982327974, "learning_rate": 8.03114571746385e-06, "loss": 0.2551, "step": 722 }, { "epoch": 0.24144264484888964, "grad_norm": 0.6356328284318833, "learning_rate": 8.042269187986651e-06, "loss": 0.2249, "step": 723 }, { "epoch": 0.24177659041576224, "grad_norm": 0.7300735104858539, "learning_rate": 8.053392658509456e-06, "loss": 0.2535, "step": 724 }, { "epoch": 0.24211053598263482, "grad_norm": 0.7236816257988127, "learning_rate": 8.064516129032258e-06, "loss": 0.2521, "step": 725 }, { "epoch": 0.24244448154950743, "grad_norm": 0.6587602905727439, "learning_rate": 8.075639599555061e-06, "loss": 0.2425, "step": 726 }, { "epoch": 0.24277842711638004, "grad_norm": 0.7780907894191817, "learning_rate": 8.086763070077866e-06, "loss": 0.264, "step": 727 }, { "epoch": 0.24311237268325264, "grad_norm": 0.686670566205572, "learning_rate": 8.097886540600668e-06, "loss": 0.2509, "step": 728 }, { "epoch": 0.24344631825012522, "grad_norm": 0.7534125635472662, "learning_rate": 8.109010011123471e-06, "loss": 0.2402, "step": 729 }, { "epoch": 0.24378026381699783, "grad_norm": 0.607443586736483, "learning_rate": 8.120133481646274e-06, "loss": 0.2397, "step": 730 }, { "epoch": 0.24411420938387043, "grad_norm": 1.3378541145711298, "learning_rate": 8.131256952169078e-06, "loss": 0.255, "step": 731 }, { "epoch": 0.24444815495074304, "grad_norm": 0.7693531003770262, "learning_rate": 8.14238042269188e-06, "loss": 0.2469, "step": 732 }, { "epoch": 0.24478210051761562, "grad_norm": 0.7630786614916105, "learning_rate": 8.153503893214683e-06, "loss": 0.2529, "step": 733 }, { "epoch": 0.24511604608448823, "grad_norm": 0.6830755477799588, "learning_rate": 8.164627363737486e-06, "loss": 0.2593, "step": 734 }, { "epoch": 0.24544999165136083, "grad_norm": 0.769470982645176, "learning_rate": 8.17575083426029e-06, "loss": 0.2635, "step": 735 }, { "epoch": 0.24578393721823344, "grad_norm": 0.6411315581669028, "learning_rate": 8.186874304783093e-06, "loss": 0.2333, "step": 736 }, { "epoch": 0.24611788278510602, "grad_norm": 0.7021751041939086, "learning_rate": 8.197997775305896e-06, "loss": 0.2614, "step": 737 }, { "epoch": 0.24645182835197862, "grad_norm": 0.6973361840787954, "learning_rate": 8.209121245828699e-06, "loss": 0.2517, "step": 738 }, { "epoch": 0.24678577391885123, "grad_norm": 0.7235562634782384, "learning_rate": 8.220244716351501e-06, "loss": 0.247, "step": 739 }, { "epoch": 0.24711971948572384, "grad_norm": 0.7907039415300885, "learning_rate": 8.231368186874306e-06, "loss": 0.2747, "step": 740 }, { "epoch": 0.24745366505259642, "grad_norm": 0.9920594690593687, "learning_rate": 8.242491657397109e-06, "loss": 0.2504, "step": 741 }, { "epoch": 0.24778761061946902, "grad_norm": 0.6620117662895729, "learning_rate": 8.253615127919911e-06, "loss": 0.2539, "step": 742 }, { "epoch": 0.24812155618634163, "grad_norm": 0.8115893003059279, "learning_rate": 8.264738598442716e-06, "loss": 0.2445, "step": 743 }, { "epoch": 0.24845550175321424, "grad_norm": 0.7888030282533821, "learning_rate": 8.275862068965518e-06, "loss": 0.2447, "step": 744 }, { "epoch": 0.24878944732008682, "grad_norm": 0.7396486295280933, "learning_rate": 8.286985539488321e-06, "loss": 0.2502, "step": 745 }, { "epoch": 0.24912339288695942, "grad_norm": 0.7245100088509931, "learning_rate": 8.298109010011124e-06, "loss": 0.2484, "step": 746 }, { "epoch": 0.24945733845383203, "grad_norm": 0.7768451542815072, "learning_rate": 8.309232480533928e-06, "loss": 0.2537, "step": 747 }, { "epoch": 0.24979128402070463, "grad_norm": 0.7260972447956803, "learning_rate": 8.32035595105673e-06, "loss": 0.2664, "step": 748 }, { "epoch": 0.2501252295875772, "grad_norm": 0.7727276441288169, "learning_rate": 8.331479421579534e-06, "loss": 0.2457, "step": 749 }, { "epoch": 0.2504591751544498, "grad_norm": 0.7116614209879674, "learning_rate": 8.342602892102336e-06, "loss": 0.228, "step": 750 }, { "epoch": 0.2507931207213224, "grad_norm": 0.9127998577319423, "learning_rate": 8.353726362625139e-06, "loss": 0.2362, "step": 751 }, { "epoch": 0.25112706628819503, "grad_norm": 0.8377483636026994, "learning_rate": 8.364849833147943e-06, "loss": 0.2464, "step": 752 }, { "epoch": 0.25146101185506764, "grad_norm": 0.6816783448014366, "learning_rate": 8.375973303670746e-06, "loss": 0.2369, "step": 753 }, { "epoch": 0.25179495742194025, "grad_norm": 0.8758433268313643, "learning_rate": 8.387096774193549e-06, "loss": 0.2795, "step": 754 }, { "epoch": 0.2521289029888128, "grad_norm": 0.9056051609541013, "learning_rate": 8.398220244716352e-06, "loss": 0.263, "step": 755 }, { "epoch": 0.2524628485556854, "grad_norm": 0.8917755495799926, "learning_rate": 8.409343715239156e-06, "loss": 0.2599, "step": 756 }, { "epoch": 0.252796794122558, "grad_norm": 0.6905229275325627, "learning_rate": 8.420467185761959e-06, "loss": 0.2328, "step": 757 }, { "epoch": 0.2531307396894306, "grad_norm": 0.68920478639127, "learning_rate": 8.431590656284761e-06, "loss": 0.2477, "step": 758 }, { "epoch": 0.2534646852563032, "grad_norm": 0.745543256443487, "learning_rate": 8.442714126807566e-06, "loss": 0.2405, "step": 759 }, { "epoch": 0.25379863082317583, "grad_norm": 0.7884756984382175, "learning_rate": 8.453837597330368e-06, "loss": 0.2362, "step": 760 }, { "epoch": 0.25413257639004844, "grad_norm": 0.7468278824055963, "learning_rate": 8.464961067853171e-06, "loss": 0.262, "step": 761 }, { "epoch": 0.25446652195692104, "grad_norm": 0.8653261962193426, "learning_rate": 8.476084538375974e-06, "loss": 0.2586, "step": 762 }, { "epoch": 0.2548004675237936, "grad_norm": 0.6837818557349334, "learning_rate": 8.487208008898777e-06, "loss": 0.2366, "step": 763 }, { "epoch": 0.2551344130906662, "grad_norm": 0.7536532409242654, "learning_rate": 8.49833147942158e-06, "loss": 0.2591, "step": 764 }, { "epoch": 0.2554683586575388, "grad_norm": 0.9083573270282176, "learning_rate": 8.509454949944384e-06, "loss": 0.2521, "step": 765 }, { "epoch": 0.2558023042244114, "grad_norm": 0.767552861904764, "learning_rate": 8.520578420467186e-06, "loss": 0.2625, "step": 766 }, { "epoch": 0.256136249791284, "grad_norm": 0.7629866167443453, "learning_rate": 8.531701890989989e-06, "loss": 0.251, "step": 767 }, { "epoch": 0.2564701953581566, "grad_norm": 0.7377761813541851, "learning_rate": 8.542825361512793e-06, "loss": 0.2404, "step": 768 }, { "epoch": 0.25680414092502923, "grad_norm": 0.7352772762034555, "learning_rate": 8.553948832035596e-06, "loss": 0.2509, "step": 769 }, { "epoch": 0.25713808649190184, "grad_norm": 0.7212760539284759, "learning_rate": 8.565072302558399e-06, "loss": 0.2418, "step": 770 }, { "epoch": 0.25747203205877445, "grad_norm": 0.6973041703150606, "learning_rate": 8.576195773081202e-06, "loss": 0.2378, "step": 771 }, { "epoch": 0.257805977625647, "grad_norm": 0.8275720116284327, "learning_rate": 8.587319243604006e-06, "loss": 0.2453, "step": 772 }, { "epoch": 0.2581399231925196, "grad_norm": 0.7089874078732612, "learning_rate": 8.598442714126807e-06, "loss": 0.2363, "step": 773 }, { "epoch": 0.2584738687593922, "grad_norm": 0.6667510504135715, "learning_rate": 8.609566184649611e-06, "loss": 0.2404, "step": 774 }, { "epoch": 0.2588078143262648, "grad_norm": 0.8122294668554957, "learning_rate": 8.620689655172414e-06, "loss": 0.2622, "step": 775 }, { "epoch": 0.2591417598931374, "grad_norm": 0.823450700107754, "learning_rate": 8.631813125695217e-06, "loss": 0.2417, "step": 776 }, { "epoch": 0.25947570546001003, "grad_norm": 0.7726092316119784, "learning_rate": 8.642936596218021e-06, "loss": 0.2474, "step": 777 }, { "epoch": 0.25980965102688264, "grad_norm": 0.6746381988563429, "learning_rate": 8.654060066740824e-06, "loss": 0.2445, "step": 778 }, { "epoch": 0.26014359659375524, "grad_norm": 0.7622637807060025, "learning_rate": 8.665183537263627e-06, "loss": 0.2593, "step": 779 }, { "epoch": 0.2604775421606278, "grad_norm": 0.7660043938587235, "learning_rate": 8.67630700778643e-06, "loss": 0.2612, "step": 780 }, { "epoch": 0.2608114877275004, "grad_norm": 0.733656969442999, "learning_rate": 8.687430478309234e-06, "loss": 0.2649, "step": 781 }, { "epoch": 0.261145433294373, "grad_norm": 0.8112312167102349, "learning_rate": 8.698553948832036e-06, "loss": 0.2637, "step": 782 }, { "epoch": 0.2614793788612456, "grad_norm": 0.7552777944668807, "learning_rate": 8.70967741935484e-06, "loss": 0.2511, "step": 783 }, { "epoch": 0.2618133244281182, "grad_norm": 0.6705632857664593, "learning_rate": 8.720800889877644e-06, "loss": 0.2517, "step": 784 }, { "epoch": 0.26214726999499083, "grad_norm": 0.7991435702186575, "learning_rate": 8.731924360400446e-06, "loss": 0.2798, "step": 785 }, { "epoch": 0.26248121556186343, "grad_norm": 0.7428406935602541, "learning_rate": 8.743047830923249e-06, "loss": 0.2669, "step": 786 }, { "epoch": 0.26281516112873604, "grad_norm": 0.7460196154653285, "learning_rate": 8.754171301446052e-06, "loss": 0.2314, "step": 787 }, { "epoch": 0.2631491066956086, "grad_norm": 0.6813560165573437, "learning_rate": 8.765294771968854e-06, "loss": 0.2387, "step": 788 }, { "epoch": 0.2634830522624812, "grad_norm": 1.1679094658128664, "learning_rate": 8.776418242491657e-06, "loss": 0.2642, "step": 789 }, { "epoch": 0.2638169978293538, "grad_norm": 0.6998489054574449, "learning_rate": 8.787541713014462e-06, "loss": 0.2493, "step": 790 }, { "epoch": 0.2641509433962264, "grad_norm": 0.7233905242204927, "learning_rate": 8.798665183537264e-06, "loss": 0.2765, "step": 791 }, { "epoch": 0.264484888963099, "grad_norm": 0.8130295529317806, "learning_rate": 8.809788654060067e-06, "loss": 0.2572, "step": 792 }, { "epoch": 0.2648188345299716, "grad_norm": 0.734643637678886, "learning_rate": 8.820912124582871e-06, "loss": 0.2764, "step": 793 }, { "epoch": 0.26515278009684423, "grad_norm": 0.8129757879046563, "learning_rate": 8.832035595105674e-06, "loss": 0.2597, "step": 794 }, { "epoch": 0.26548672566371684, "grad_norm": 0.7165953475826065, "learning_rate": 8.843159065628477e-06, "loss": 0.2338, "step": 795 }, { "epoch": 0.2658206712305894, "grad_norm": 0.7259835911679116, "learning_rate": 8.85428253615128e-06, "loss": 0.2579, "step": 796 }, { "epoch": 0.266154616797462, "grad_norm": 0.6619475009098943, "learning_rate": 8.865406006674084e-06, "loss": 0.2119, "step": 797 }, { "epoch": 0.2664885623643346, "grad_norm": 0.7625539062454664, "learning_rate": 8.876529477196885e-06, "loss": 0.2281, "step": 798 }, { "epoch": 0.2668225079312072, "grad_norm": 0.7790875438082882, "learning_rate": 8.88765294771969e-06, "loss": 0.264, "step": 799 }, { "epoch": 0.2671564534980798, "grad_norm": 0.7937898709271423, "learning_rate": 8.898776418242492e-06, "loss": 0.2448, "step": 800 }, { "epoch": 0.2674903990649524, "grad_norm": 0.7534922554712896, "learning_rate": 8.909899888765295e-06, "loss": 0.2601, "step": 801 }, { "epoch": 0.26782434463182503, "grad_norm": 0.709328797539281, "learning_rate": 8.921023359288099e-06, "loss": 0.2378, "step": 802 }, { "epoch": 0.26815829019869764, "grad_norm": 0.6700322828506419, "learning_rate": 8.932146829810902e-06, "loss": 0.2377, "step": 803 }, { "epoch": 0.2684922357655702, "grad_norm": 0.7397483140494081, "learning_rate": 8.943270300333705e-06, "loss": 0.2504, "step": 804 }, { "epoch": 0.2688261813324428, "grad_norm": 0.6236769417814907, "learning_rate": 8.954393770856507e-06, "loss": 0.2407, "step": 805 }, { "epoch": 0.2691601268993154, "grad_norm": 0.6903960276905347, "learning_rate": 8.965517241379312e-06, "loss": 0.2449, "step": 806 }, { "epoch": 0.269494072466188, "grad_norm": 0.6618661572934079, "learning_rate": 8.976640711902114e-06, "loss": 0.2322, "step": 807 }, { "epoch": 0.2698280180330606, "grad_norm": 0.6450814858368458, "learning_rate": 8.987764182424917e-06, "loss": 0.2527, "step": 808 }, { "epoch": 0.2701619635999332, "grad_norm": 0.6732302794328731, "learning_rate": 8.998887652947721e-06, "loss": 0.2492, "step": 809 }, { "epoch": 0.2704959091668058, "grad_norm": 0.7193330108510266, "learning_rate": 9.010011123470524e-06, "loss": 0.2521, "step": 810 }, { "epoch": 0.27082985473367843, "grad_norm": 0.6638055458342996, "learning_rate": 9.021134593993327e-06, "loss": 0.2558, "step": 811 }, { "epoch": 0.271163800300551, "grad_norm": 0.6932796625861776, "learning_rate": 9.03225806451613e-06, "loss": 0.2685, "step": 812 }, { "epoch": 0.2714977458674236, "grad_norm": 0.669882215821802, "learning_rate": 9.043381535038932e-06, "loss": 0.2511, "step": 813 }, { "epoch": 0.2718316914342962, "grad_norm": 0.738016797517266, "learning_rate": 9.054505005561735e-06, "loss": 0.2508, "step": 814 }, { "epoch": 0.2721656370011688, "grad_norm": 0.6579473564630967, "learning_rate": 9.06562847608454e-06, "loss": 0.2428, "step": 815 }, { "epoch": 0.2724995825680414, "grad_norm": 0.6601416633554115, "learning_rate": 9.076751946607342e-06, "loss": 0.2334, "step": 816 }, { "epoch": 0.272833528134914, "grad_norm": 0.8681212873817967, "learning_rate": 9.087875417130145e-06, "loss": 0.2649, "step": 817 }, { "epoch": 0.2731674737017866, "grad_norm": 0.6871613484359503, "learning_rate": 9.09899888765295e-06, "loss": 0.2488, "step": 818 }, { "epoch": 0.27350141926865923, "grad_norm": 0.8565684394856233, "learning_rate": 9.110122358175752e-06, "loss": 0.2566, "step": 819 }, { "epoch": 0.27383536483553184, "grad_norm": 0.8255322571537959, "learning_rate": 9.121245828698555e-06, "loss": 0.2493, "step": 820 }, { "epoch": 0.2741693104024044, "grad_norm": 0.8229306366015684, "learning_rate": 9.132369299221357e-06, "loss": 0.252, "step": 821 }, { "epoch": 0.274503255969277, "grad_norm": 0.8463175261291924, "learning_rate": 9.143492769744162e-06, "loss": 0.2567, "step": 822 }, { "epoch": 0.2748372015361496, "grad_norm": 0.6728403338031879, "learning_rate": 9.154616240266963e-06, "loss": 0.2441, "step": 823 }, { "epoch": 0.2751711471030222, "grad_norm": 0.8767027013992857, "learning_rate": 9.165739710789767e-06, "loss": 0.2473, "step": 824 }, { "epoch": 0.2755050926698948, "grad_norm": 0.8201243130499303, "learning_rate": 9.176863181312572e-06, "loss": 0.2563, "step": 825 }, { "epoch": 0.2758390382367674, "grad_norm": 0.6872232291309764, "learning_rate": 9.187986651835373e-06, "loss": 0.2485, "step": 826 }, { "epoch": 0.27617298380364, "grad_norm": 0.6930371900328928, "learning_rate": 9.199110122358177e-06, "loss": 0.236, "step": 827 }, { "epoch": 0.27650692937051263, "grad_norm": 0.7768765519692938, "learning_rate": 9.21023359288098e-06, "loss": 0.2268, "step": 828 }, { "epoch": 0.2768408749373852, "grad_norm": 0.641650918415613, "learning_rate": 9.221357063403782e-06, "loss": 0.2458, "step": 829 }, { "epoch": 0.2771748205042578, "grad_norm": 0.7847607928855562, "learning_rate": 9.232480533926585e-06, "loss": 0.2561, "step": 830 }, { "epoch": 0.2775087660711304, "grad_norm": 0.7522962047924147, "learning_rate": 9.24360400444939e-06, "loss": 0.2462, "step": 831 }, { "epoch": 0.277842711638003, "grad_norm": 0.7856412167874145, "learning_rate": 9.254727474972192e-06, "loss": 0.2453, "step": 832 }, { "epoch": 0.2781766572048756, "grad_norm": 0.6379368244033592, "learning_rate": 9.265850945494995e-06, "loss": 0.2319, "step": 833 }, { "epoch": 0.2785106027717482, "grad_norm": 0.6923372665425509, "learning_rate": 9.2769744160178e-06, "loss": 0.2521, "step": 834 }, { "epoch": 0.2788445483386208, "grad_norm": 0.6285185522634985, "learning_rate": 9.288097886540602e-06, "loss": 0.2414, "step": 835 }, { "epoch": 0.27917849390549343, "grad_norm": 0.7659134854937873, "learning_rate": 9.299221357063405e-06, "loss": 0.2617, "step": 836 }, { "epoch": 0.279512439472366, "grad_norm": 0.651210591819511, "learning_rate": 9.310344827586207e-06, "loss": 0.2412, "step": 837 }, { "epoch": 0.2798463850392386, "grad_norm": 0.604891742779754, "learning_rate": 9.32146829810901e-06, "loss": 0.2361, "step": 838 }, { "epoch": 0.2801803306061112, "grad_norm": 0.6371826775462133, "learning_rate": 9.332591768631813e-06, "loss": 0.2512, "step": 839 }, { "epoch": 0.2805142761729838, "grad_norm": 0.6340690502327005, "learning_rate": 9.343715239154617e-06, "loss": 0.2416, "step": 840 }, { "epoch": 0.2808482217398564, "grad_norm": 0.7342239300077404, "learning_rate": 9.35483870967742e-06, "loss": 0.2448, "step": 841 }, { "epoch": 0.281182167306729, "grad_norm": 0.6549609966808125, "learning_rate": 9.365962180200223e-06, "loss": 0.2458, "step": 842 }, { "epoch": 0.2815161128736016, "grad_norm": 0.7299241942323713, "learning_rate": 9.377085650723027e-06, "loss": 0.2318, "step": 843 }, { "epoch": 0.2818500584404742, "grad_norm": 0.6845739065601146, "learning_rate": 9.38820912124583e-06, "loss": 0.2428, "step": 844 }, { "epoch": 0.2821840040073468, "grad_norm": 0.677968498084514, "learning_rate": 9.399332591768633e-06, "loss": 0.2594, "step": 845 }, { "epoch": 0.2825179495742194, "grad_norm": 0.682427111871113, "learning_rate": 9.410456062291435e-06, "loss": 0.2504, "step": 846 }, { "epoch": 0.282851895141092, "grad_norm": 0.6035000880972591, "learning_rate": 9.42157953281424e-06, "loss": 0.2399, "step": 847 }, { "epoch": 0.2831858407079646, "grad_norm": 0.7419050323765086, "learning_rate": 9.43270300333704e-06, "loss": 0.2724, "step": 848 }, { "epoch": 0.2835197862748372, "grad_norm": 0.6133376838559707, "learning_rate": 9.443826473859845e-06, "loss": 0.2289, "step": 849 }, { "epoch": 0.2838537318417098, "grad_norm": 0.687952283763828, "learning_rate": 9.45494994438265e-06, "loss": 0.2427, "step": 850 }, { "epoch": 0.2841876774085824, "grad_norm": 0.7028605452057839, "learning_rate": 9.46607341490545e-06, "loss": 0.2476, "step": 851 }, { "epoch": 0.284521622975455, "grad_norm": 0.6623344875228663, "learning_rate": 9.477196885428255e-06, "loss": 0.2452, "step": 852 }, { "epoch": 0.2848555685423276, "grad_norm": 0.6332298537339767, "learning_rate": 9.488320355951058e-06, "loss": 0.2399, "step": 853 }, { "epoch": 0.2851895141092002, "grad_norm": 0.6645743882820965, "learning_rate": 9.49944382647386e-06, "loss": 0.2447, "step": 854 }, { "epoch": 0.2855234596760728, "grad_norm": 0.6662259006288558, "learning_rate": 9.510567296996663e-06, "loss": 0.2296, "step": 855 }, { "epoch": 0.2858574052429454, "grad_norm": 0.6919974903102527, "learning_rate": 9.521690767519467e-06, "loss": 0.2537, "step": 856 }, { "epoch": 0.286191350809818, "grad_norm": 0.6454163080434254, "learning_rate": 9.53281423804227e-06, "loss": 0.2514, "step": 857 }, { "epoch": 0.2865252963766906, "grad_norm": 0.6480489173178399, "learning_rate": 9.543937708565073e-06, "loss": 0.2422, "step": 858 }, { "epoch": 0.2868592419435632, "grad_norm": 0.7353022665236786, "learning_rate": 9.555061179087877e-06, "loss": 0.2351, "step": 859 }, { "epoch": 0.2871931875104358, "grad_norm": 0.661065913724645, "learning_rate": 9.56618464961068e-06, "loss": 0.2479, "step": 860 }, { "epoch": 0.28752713307730837, "grad_norm": 0.6763453703327771, "learning_rate": 9.577308120133483e-06, "loss": 0.2226, "step": 861 }, { "epoch": 0.287861078644181, "grad_norm": 0.6727786879376527, "learning_rate": 9.588431590656285e-06, "loss": 0.2548, "step": 862 }, { "epoch": 0.2881950242110536, "grad_norm": 0.9197408709347626, "learning_rate": 9.599555061179088e-06, "loss": 0.2489, "step": 863 }, { "epoch": 0.2885289697779262, "grad_norm": 0.6646129153644628, "learning_rate": 9.61067853170189e-06, "loss": 0.2541, "step": 864 }, { "epoch": 0.2888629153447988, "grad_norm": 0.7687613388007959, "learning_rate": 9.621802002224695e-06, "loss": 0.2549, "step": 865 }, { "epoch": 0.2891968609116714, "grad_norm": 0.663099474914373, "learning_rate": 9.632925472747498e-06, "loss": 0.255, "step": 866 }, { "epoch": 0.289530806478544, "grad_norm": 0.6373838203654694, "learning_rate": 9.6440489432703e-06, "loss": 0.2274, "step": 867 }, { "epoch": 0.2898647520454166, "grad_norm": 0.8958296860718516, "learning_rate": 9.655172413793105e-06, "loss": 0.2744, "step": 868 }, { "epoch": 0.29019869761228917, "grad_norm": 0.6689521121922187, "learning_rate": 9.666295884315908e-06, "loss": 0.2486, "step": 869 }, { "epoch": 0.2905326431791618, "grad_norm": 0.7051581520556306, "learning_rate": 9.67741935483871e-06, "loss": 0.2325, "step": 870 }, { "epoch": 0.2908665887460344, "grad_norm": 0.6723499452278224, "learning_rate": 9.688542825361513e-06, "loss": 0.2494, "step": 871 }, { "epoch": 0.291200534312907, "grad_norm": 0.7127882744746751, "learning_rate": 9.699666295884318e-06, "loss": 0.2492, "step": 872 }, { "epoch": 0.2915344798797796, "grad_norm": 0.6707104286364339, "learning_rate": 9.710789766407119e-06, "loss": 0.251, "step": 873 }, { "epoch": 0.2918684254466522, "grad_norm": 0.781136986166393, "learning_rate": 9.721913236929923e-06, "loss": 0.2484, "step": 874 }, { "epoch": 0.2922023710135248, "grad_norm": 0.6755229208947209, "learning_rate": 9.733036707452727e-06, "loss": 0.2545, "step": 875 }, { "epoch": 0.2925363165803974, "grad_norm": 0.663707156055788, "learning_rate": 9.744160177975528e-06, "loss": 0.235, "step": 876 }, { "epoch": 0.29287026214727, "grad_norm": 0.6679337491435877, "learning_rate": 9.755283648498333e-06, "loss": 0.2409, "step": 877 }, { "epoch": 0.2932042077141426, "grad_norm": 0.8073669301897611, "learning_rate": 9.766407119021135e-06, "loss": 0.2554, "step": 878 }, { "epoch": 0.2935381532810152, "grad_norm": 0.7198972500299666, "learning_rate": 9.777530589543938e-06, "loss": 0.2601, "step": 879 }, { "epoch": 0.2938720988478878, "grad_norm": 0.7486411039843818, "learning_rate": 9.788654060066741e-06, "loss": 0.2717, "step": 880 }, { "epoch": 0.2942060444147604, "grad_norm": 0.7554055472380966, "learning_rate": 9.799777530589545e-06, "loss": 0.2536, "step": 881 }, { "epoch": 0.294539989981633, "grad_norm": 0.6769414341242086, "learning_rate": 9.810901001112348e-06, "loss": 0.2459, "step": 882 }, { "epoch": 0.2948739355485056, "grad_norm": 0.6873429312658788, "learning_rate": 9.82202447163515e-06, "loss": 0.2353, "step": 883 }, { "epoch": 0.2952078811153782, "grad_norm": 0.7752597910074831, "learning_rate": 9.833147942157955e-06, "loss": 0.2535, "step": 884 }, { "epoch": 0.2955418266822508, "grad_norm": 0.6943618227859522, "learning_rate": 9.844271412680758e-06, "loss": 0.2689, "step": 885 }, { "epoch": 0.29587577224912337, "grad_norm": 0.6801838220701963, "learning_rate": 9.85539488320356e-06, "loss": 0.2509, "step": 886 }, { "epoch": 0.296209717815996, "grad_norm": 0.7339744312012771, "learning_rate": 9.866518353726363e-06, "loss": 0.2506, "step": 887 }, { "epoch": 0.2965436633828686, "grad_norm": 0.7467491221803867, "learning_rate": 9.877641824249166e-06, "loss": 0.2566, "step": 888 }, { "epoch": 0.2968776089497412, "grad_norm": 0.6412331225573681, "learning_rate": 9.888765294771969e-06, "loss": 0.2474, "step": 889 }, { "epoch": 0.2972115545166138, "grad_norm": 0.7697158729899859, "learning_rate": 9.899888765294773e-06, "loss": 0.247, "step": 890 }, { "epoch": 0.2975455000834864, "grad_norm": 0.7524145415460668, "learning_rate": 9.911012235817576e-06, "loss": 0.2567, "step": 891 }, { "epoch": 0.297879445650359, "grad_norm": 0.6734182049066624, "learning_rate": 9.922135706340378e-06, "loss": 0.2549, "step": 892 }, { "epoch": 0.2982133912172316, "grad_norm": 0.7218829257352565, "learning_rate": 9.933259176863183e-06, "loss": 0.2406, "step": 893 }, { "epoch": 0.29854733678410417, "grad_norm": 0.6704757464416423, "learning_rate": 9.944382647385986e-06, "loss": 0.2441, "step": 894 }, { "epoch": 0.2988812823509768, "grad_norm": 0.7404615308151165, "learning_rate": 9.955506117908788e-06, "loss": 0.2708, "step": 895 }, { "epoch": 0.2992152279178494, "grad_norm": 0.7370776669079746, "learning_rate": 9.966629588431591e-06, "loss": 0.2554, "step": 896 }, { "epoch": 0.299549173484722, "grad_norm": 0.7453068037060375, "learning_rate": 9.977753058954395e-06, "loss": 0.2522, "step": 897 }, { "epoch": 0.2998831190515946, "grad_norm": 0.7537765886662524, "learning_rate": 9.988876529477196e-06, "loss": 0.2532, "step": 898 }, { "epoch": 0.3002170646184672, "grad_norm": 0.7151572032080934, "learning_rate": 1e-05, "loss": 0.2534, "step": 899 }, { "epoch": 0.3005510101853398, "grad_norm": 0.714477700987338, "learning_rate": 9.999999622345564e-06, "loss": 0.2404, "step": 900 }, { "epoch": 0.3008849557522124, "grad_norm": 0.77324691413999, "learning_rate": 9.999998489382312e-06, "loss": 0.2426, "step": 901 }, { "epoch": 0.30121890131908496, "grad_norm": 0.6128610447991886, "learning_rate": 9.999996601110414e-06, "loss": 0.2368, "step": 902 }, { "epoch": 0.30155284688595757, "grad_norm": 0.8103821519751393, "learning_rate": 9.999993957530157e-06, "loss": 0.2486, "step": 903 }, { "epoch": 0.3018867924528302, "grad_norm": 0.7509864144087558, "learning_rate": 9.999990558641939e-06, "loss": 0.2538, "step": 904 }, { "epoch": 0.3022207380197028, "grad_norm": 0.6455618999464463, "learning_rate": 9.999986404446276e-06, "loss": 0.2622, "step": 905 }, { "epoch": 0.3025546835865754, "grad_norm": 0.7642363495501507, "learning_rate": 9.999981494943791e-06, "loss": 0.2579, "step": 906 }, { "epoch": 0.302888629153448, "grad_norm": 0.7516753216991904, "learning_rate": 9.99997583013523e-06, "loss": 0.2634, "step": 907 }, { "epoch": 0.3032225747203206, "grad_norm": 0.5702441441339939, "learning_rate": 9.999969410021447e-06, "loss": 0.2212, "step": 908 }, { "epoch": 0.3035565202871932, "grad_norm": 0.7489930131914292, "learning_rate": 9.999962234603412e-06, "loss": 0.245, "step": 909 }, { "epoch": 0.30389046585406576, "grad_norm": 0.6082830635429192, "learning_rate": 9.99995430388221e-06, "loss": 0.2325, "step": 910 }, { "epoch": 0.30422441142093837, "grad_norm": 0.620352857608966, "learning_rate": 9.999945617859034e-06, "loss": 0.2396, "step": 911 }, { "epoch": 0.304558356987811, "grad_norm": 0.7221173476692679, "learning_rate": 9.999936176535203e-06, "loss": 0.2305, "step": 912 }, { "epoch": 0.3048923025546836, "grad_norm": 0.8190610193603814, "learning_rate": 9.99992597991214e-06, "loss": 0.2528, "step": 913 }, { "epoch": 0.3052262481215562, "grad_norm": 0.6278371889811301, "learning_rate": 9.999915027991384e-06, "loss": 0.227, "step": 914 }, { "epoch": 0.3055601936884288, "grad_norm": 0.6759595522501595, "learning_rate": 9.999903320774593e-06, "loss": 0.2519, "step": 915 }, { "epoch": 0.3058941392553014, "grad_norm": 0.6733098529721205, "learning_rate": 9.999890858263532e-06, "loss": 0.2299, "step": 916 }, { "epoch": 0.306228084822174, "grad_norm": 0.6706970692688264, "learning_rate": 9.999877640460085e-06, "loss": 0.248, "step": 917 }, { "epoch": 0.30656203038904656, "grad_norm": 0.7033323944048314, "learning_rate": 9.999863667366249e-06, "loss": 0.2528, "step": 918 }, { "epoch": 0.30689597595591916, "grad_norm": 0.7882000097369829, "learning_rate": 9.999848938984135e-06, "loss": 0.2462, "step": 919 }, { "epoch": 0.30722992152279177, "grad_norm": 0.663692017284394, "learning_rate": 9.999833455315966e-06, "loss": 0.2401, "step": 920 }, { "epoch": 0.3075638670896644, "grad_norm": 0.7406267967602063, "learning_rate": 9.999817216364085e-06, "loss": 0.2426, "step": 921 }, { "epoch": 0.307897812656537, "grad_norm": 0.719538796129936, "learning_rate": 9.99980022213094e-06, "loss": 0.259, "step": 922 }, { "epoch": 0.3082317582234096, "grad_norm": 0.7198059211802099, "learning_rate": 9.999782472619102e-06, "loss": 0.2356, "step": 923 }, { "epoch": 0.3085657037902822, "grad_norm": 0.7748449854954533, "learning_rate": 9.99976396783125e-06, "loss": 0.2512, "step": 924 }, { "epoch": 0.3088996493571548, "grad_norm": 0.6743289212320538, "learning_rate": 9.999744707770182e-06, "loss": 0.2295, "step": 925 }, { "epoch": 0.3092335949240274, "grad_norm": 0.8585415752543676, "learning_rate": 9.999724692438805e-06, "loss": 0.2855, "step": 926 }, { "epoch": 0.30956754049089996, "grad_norm": 0.6469196243149617, "learning_rate": 9.999703921840143e-06, "loss": 0.2527, "step": 927 }, { "epoch": 0.30990148605777257, "grad_norm": 0.6798611050791047, "learning_rate": 9.999682395977334e-06, "loss": 0.2469, "step": 928 }, { "epoch": 0.3102354316246452, "grad_norm": 0.7112677389326599, "learning_rate": 9.999660114853631e-06, "loss": 0.243, "step": 929 }, { "epoch": 0.3105693771915178, "grad_norm": 0.5521283993542538, "learning_rate": 9.999637078472398e-06, "loss": 0.2228, "step": 930 }, { "epoch": 0.3109033227583904, "grad_norm": 0.6084209475663657, "learning_rate": 9.999613286837115e-06, "loss": 0.2429, "step": 931 }, { "epoch": 0.311237268325263, "grad_norm": 0.6229978352267757, "learning_rate": 9.999588739951376e-06, "loss": 0.2676, "step": 932 }, { "epoch": 0.3115712138921356, "grad_norm": 0.6220393849687876, "learning_rate": 9.99956343781889e-06, "loss": 0.2396, "step": 933 }, { "epoch": 0.3119051594590082, "grad_norm": 0.6661414030839978, "learning_rate": 9.999537380443479e-06, "loss": 0.2464, "step": 934 }, { "epoch": 0.31223910502588076, "grad_norm": 0.657272661420604, "learning_rate": 9.999510567829079e-06, "loss": 0.2318, "step": 935 }, { "epoch": 0.31257305059275337, "grad_norm": 0.6663711093096606, "learning_rate": 9.999482999979739e-06, "loss": 0.2738, "step": 936 }, { "epoch": 0.31290699615962597, "grad_norm": 1.3839502383145643, "learning_rate": 9.999454676899628e-06, "loss": 0.2663, "step": 937 }, { "epoch": 0.3132409417264986, "grad_norm": 0.7046860437125939, "learning_rate": 9.999425598593018e-06, "loss": 0.2601, "step": 938 }, { "epoch": 0.3135748872933712, "grad_norm": 0.7271393767781734, "learning_rate": 9.999395765064308e-06, "loss": 0.2669, "step": 939 }, { "epoch": 0.3139088328602438, "grad_norm": 0.6277865590931718, "learning_rate": 9.999365176318e-06, "loss": 0.24, "step": 940 }, { "epoch": 0.3142427784271164, "grad_norm": 0.7623469016255324, "learning_rate": 9.999333832358716e-06, "loss": 0.2474, "step": 941 }, { "epoch": 0.314576723993989, "grad_norm": 0.6244292989040466, "learning_rate": 9.999301733191193e-06, "loss": 0.2476, "step": 942 }, { "epoch": 0.31491066956086156, "grad_norm": 0.6679513544214297, "learning_rate": 9.999268878820278e-06, "loss": 0.2566, "step": 943 }, { "epoch": 0.31524461512773416, "grad_norm": 0.9118400082357937, "learning_rate": 9.999235269250933e-06, "loss": 0.2472, "step": 944 }, { "epoch": 0.31557856069460677, "grad_norm": 0.5506843638558178, "learning_rate": 9.999200904488238e-06, "loss": 0.2239, "step": 945 }, { "epoch": 0.3159125062614794, "grad_norm": 0.6977183679119197, "learning_rate": 9.999165784537381e-06, "loss": 0.2522, "step": 946 }, { "epoch": 0.316246451828352, "grad_norm": 0.6229267335231975, "learning_rate": 9.999129909403671e-06, "loss": 0.2385, "step": 947 }, { "epoch": 0.3165803973952246, "grad_norm": 0.6346107999834576, "learning_rate": 9.999093279092524e-06, "loss": 0.2464, "step": 948 }, { "epoch": 0.3169143429620972, "grad_norm": 0.7147362148938331, "learning_rate": 9.999055893609475e-06, "loss": 0.2519, "step": 949 }, { "epoch": 0.3172482885289698, "grad_norm": 0.681378538809813, "learning_rate": 9.999017752960172e-06, "loss": 0.2497, "step": 950 }, { "epoch": 0.31758223409584235, "grad_norm": 0.7257313818749964, "learning_rate": 9.998978857150375e-06, "loss": 0.2586, "step": 951 }, { "epoch": 0.31791617966271496, "grad_norm": 0.6552438164802247, "learning_rate": 9.99893920618596e-06, "loss": 0.2486, "step": 952 }, { "epoch": 0.31825012522958757, "grad_norm": 0.7499319030931288, "learning_rate": 9.998898800072919e-06, "loss": 0.2519, "step": 953 }, { "epoch": 0.3185840707964602, "grad_norm": 0.623600497254021, "learning_rate": 9.998857638817354e-06, "loss": 0.2435, "step": 954 }, { "epoch": 0.3189180163633328, "grad_norm": 0.7248155860321855, "learning_rate": 9.99881572242548e-06, "loss": 0.2376, "step": 955 }, { "epoch": 0.3192519619302054, "grad_norm": 0.5991586377037108, "learning_rate": 9.998773050903637e-06, "loss": 0.2376, "step": 956 }, { "epoch": 0.319585907497078, "grad_norm": 0.7346806017257687, "learning_rate": 9.998729624258262e-06, "loss": 0.2633, "step": 957 }, { "epoch": 0.3199198530639506, "grad_norm": 0.563428634742872, "learning_rate": 9.998685442495921e-06, "loss": 0.2382, "step": 958 }, { "epoch": 0.32025379863082315, "grad_norm": 0.6736707339352784, "learning_rate": 9.998640505623284e-06, "loss": 0.2523, "step": 959 }, { "epoch": 0.32058774419769576, "grad_norm": 0.6334538052099089, "learning_rate": 9.998594813647145e-06, "loss": 0.2382, "step": 960 }, { "epoch": 0.32092168976456836, "grad_norm": 0.6355713277943253, "learning_rate": 9.998548366574401e-06, "loss": 0.245, "step": 961 }, { "epoch": 0.32125563533144097, "grad_norm": 0.5993057051640902, "learning_rate": 9.99850116441207e-06, "loss": 0.2418, "step": 962 }, { "epoch": 0.3215895808983136, "grad_norm": 0.5920496868805775, "learning_rate": 9.998453207167282e-06, "loss": 0.2565, "step": 963 }, { "epoch": 0.3219235264651862, "grad_norm": 0.61198413975063, "learning_rate": 9.998404494847285e-06, "loss": 0.2585, "step": 964 }, { "epoch": 0.3222574720320588, "grad_norm": 0.6369440770274445, "learning_rate": 9.998355027459432e-06, "loss": 0.2706, "step": 965 }, { "epoch": 0.3225914175989314, "grad_norm": 0.5872473237923148, "learning_rate": 9.998304805011199e-06, "loss": 0.2486, "step": 966 }, { "epoch": 0.32292536316580395, "grad_norm": 0.620263995911841, "learning_rate": 9.998253827510173e-06, "loss": 0.2547, "step": 967 }, { "epoch": 0.32325930873267655, "grad_norm": 0.7511215575831635, "learning_rate": 9.998202094964053e-06, "loss": 0.2556, "step": 968 }, { "epoch": 0.32359325429954916, "grad_norm": 0.6305888434992816, "learning_rate": 9.998149607380654e-06, "loss": 0.2356, "step": 969 }, { "epoch": 0.32392719986642177, "grad_norm": 0.6474250636445457, "learning_rate": 9.998096364767906e-06, "loss": 0.2409, "step": 970 }, { "epoch": 0.3242611454332944, "grad_norm": 0.6404737722389929, "learning_rate": 9.998042367133854e-06, "loss": 0.2418, "step": 971 }, { "epoch": 0.324595091000167, "grad_norm": 0.6255070104433448, "learning_rate": 9.997987614486648e-06, "loss": 0.2421, "step": 972 }, { "epoch": 0.3249290365670396, "grad_norm": 0.5688015945545797, "learning_rate": 9.997932106834567e-06, "loss": 0.2349, "step": 973 }, { "epoch": 0.3252629821339122, "grad_norm": 0.6829656370856075, "learning_rate": 9.997875844185991e-06, "loss": 0.2501, "step": 974 }, { "epoch": 0.3255969277007848, "grad_norm": 0.6365251336709236, "learning_rate": 9.99781882654942e-06, "loss": 0.2408, "step": 975 }, { "epoch": 0.32593087326765735, "grad_norm": 0.6327216943145679, "learning_rate": 9.997761053933469e-06, "loss": 0.2532, "step": 976 }, { "epoch": 0.32626481883452996, "grad_norm": 0.5741706691051712, "learning_rate": 9.997702526346864e-06, "loss": 0.2517, "step": 977 }, { "epoch": 0.32659876440140256, "grad_norm": 0.634532616958001, "learning_rate": 9.997643243798446e-06, "loss": 0.2561, "step": 978 }, { "epoch": 0.32693270996827517, "grad_norm": 0.6627572201424764, "learning_rate": 9.99758320629717e-06, "loss": 0.2212, "step": 979 }, { "epoch": 0.3272666555351478, "grad_norm": 0.6562948613513276, "learning_rate": 9.997522413852108e-06, "loss": 0.2503, "step": 980 }, { "epoch": 0.3276006011020204, "grad_norm": 0.7777578566787204, "learning_rate": 9.997460866472439e-06, "loss": 0.2431, "step": 981 }, { "epoch": 0.327934546668893, "grad_norm": 0.7143909349751347, "learning_rate": 9.997398564167465e-06, "loss": 0.261, "step": 982 }, { "epoch": 0.3282684922357656, "grad_norm": 0.6511654646356778, "learning_rate": 9.997335506946596e-06, "loss": 0.2605, "step": 983 }, { "epoch": 0.32860243780263815, "grad_norm": 0.8594408758077905, "learning_rate": 9.997271694819354e-06, "loss": 0.2563, "step": 984 }, { "epoch": 0.32893638336951075, "grad_norm": 0.630895825476141, "learning_rate": 9.997207127795383e-06, "loss": 0.2357, "step": 985 }, { "epoch": 0.32927032893638336, "grad_norm": 0.6193761330050703, "learning_rate": 9.997141805884436e-06, "loss": 0.2395, "step": 986 }, { "epoch": 0.32960427450325597, "grad_norm": 0.611311370706388, "learning_rate": 9.997075729096379e-06, "loss": 0.2506, "step": 987 }, { "epoch": 0.3299382200701286, "grad_norm": 0.7033542886635047, "learning_rate": 9.997008897441194e-06, "loss": 0.2526, "step": 988 }, { "epoch": 0.3302721656370012, "grad_norm": 0.6230286822204218, "learning_rate": 9.996941310928978e-06, "loss": 0.2395, "step": 989 }, { "epoch": 0.3306061112038738, "grad_norm": 0.6362704854163252, "learning_rate": 9.99687296956994e-06, "loss": 0.2378, "step": 990 }, { "epoch": 0.3309400567707464, "grad_norm": 0.5800914414032446, "learning_rate": 9.996803873374402e-06, "loss": 0.2254, "step": 991 }, { "epoch": 0.33127400233761894, "grad_norm": 0.7204698593726746, "learning_rate": 9.996734022352805e-06, "loss": 0.2527, "step": 992 }, { "epoch": 0.33160794790449155, "grad_norm": 0.634627877169416, "learning_rate": 9.9966634165157e-06, "loss": 0.2449, "step": 993 }, { "epoch": 0.33194189347136416, "grad_norm": 0.6111123727859595, "learning_rate": 9.99659205587375e-06, "loss": 0.2474, "step": 994 }, { "epoch": 0.33227583903823676, "grad_norm": 0.6755862324184183, "learning_rate": 9.996519940437737e-06, "loss": 0.2505, "step": 995 }, { "epoch": 0.33260978460510937, "grad_norm": 0.6460412308879759, "learning_rate": 9.996447070218557e-06, "loss": 0.2473, "step": 996 }, { "epoch": 0.332943730171982, "grad_norm": 0.6346454236316136, "learning_rate": 9.996373445227215e-06, "loss": 0.239, "step": 997 }, { "epoch": 0.3332776757388546, "grad_norm": 0.6415377886338729, "learning_rate": 9.996299065474832e-06, "loss": 0.2445, "step": 998 }, { "epoch": 0.3336116213057272, "grad_norm": 0.6555194675483528, "learning_rate": 9.996223930972649e-06, "loss": 0.2401, "step": 999 }, { "epoch": 0.33394556687259974, "grad_norm": 0.5713457684437109, "learning_rate": 9.99614804173201e-06, "loss": 0.2244, "step": 1000 }, { "epoch": 0.33427951243947235, "grad_norm": 0.5893092973518516, "learning_rate": 9.996071397764381e-06, "loss": 0.2602, "step": 1001 }, { "epoch": 0.33461345800634495, "grad_norm": 0.5906004788656943, "learning_rate": 9.995993999081343e-06, "loss": 0.2298, "step": 1002 }, { "epoch": 0.33494740357321756, "grad_norm": 0.6462480077770343, "learning_rate": 9.995915845694584e-06, "loss": 0.2573, "step": 1003 }, { "epoch": 0.33528134914009017, "grad_norm": 0.9424314763458541, "learning_rate": 9.995836937615913e-06, "loss": 0.2275, "step": 1004 }, { "epoch": 0.3356152947069628, "grad_norm": 0.5975148522005419, "learning_rate": 9.995757274857246e-06, "loss": 0.252, "step": 1005 }, { "epoch": 0.3359492402738354, "grad_norm": 0.6746114929233912, "learning_rate": 9.995676857430621e-06, "loss": 0.2296, "step": 1006 }, { "epoch": 0.336283185840708, "grad_norm": 0.6043959986784809, "learning_rate": 9.995595685348186e-06, "loss": 0.2418, "step": 1007 }, { "epoch": 0.33661713140758054, "grad_norm": 0.6561996117107064, "learning_rate": 9.995513758622198e-06, "loss": 0.2411, "step": 1008 }, { "epoch": 0.33695107697445315, "grad_norm": 0.5768734030397459, "learning_rate": 9.995431077265038e-06, "loss": 0.2397, "step": 1009 }, { "epoch": 0.33728502254132575, "grad_norm": 0.5644019192557386, "learning_rate": 9.995347641289194e-06, "loss": 0.2308, "step": 1010 }, { "epoch": 0.33761896810819836, "grad_norm": 0.7155920321425914, "learning_rate": 9.995263450707273e-06, "loss": 0.2483, "step": 1011 }, { "epoch": 0.33795291367507097, "grad_norm": 0.5820377404778101, "learning_rate": 9.995178505531989e-06, "loss": 0.2361, "step": 1012 }, { "epoch": 0.33828685924194357, "grad_norm": 0.6074649479328393, "learning_rate": 9.995092805776175e-06, "loss": 0.2501, "step": 1013 }, { "epoch": 0.3386208048088162, "grad_norm": 0.6808463616772454, "learning_rate": 9.995006351452775e-06, "loss": 0.2514, "step": 1014 }, { "epoch": 0.3389547503756888, "grad_norm": 0.5357571508833467, "learning_rate": 9.994919142574854e-06, "loss": 0.2361, "step": 1015 }, { "epoch": 0.33928869594256134, "grad_norm": 0.6020487192909576, "learning_rate": 9.994831179155584e-06, "loss": 0.2419, "step": 1016 }, { "epoch": 0.33962264150943394, "grad_norm": 0.7283194863543118, "learning_rate": 9.994742461208251e-06, "loss": 0.2607, "step": 1017 }, { "epoch": 0.33995658707630655, "grad_norm": 0.5782631325584849, "learning_rate": 9.994652988746258e-06, "loss": 0.238, "step": 1018 }, { "epoch": 0.34029053264317916, "grad_norm": 0.7864013734784921, "learning_rate": 9.994562761783122e-06, "loss": 0.2497, "step": 1019 }, { "epoch": 0.34062447821005176, "grad_norm": 0.6293240854325619, "learning_rate": 9.99447178033247e-06, "loss": 0.2423, "step": 1020 }, { "epoch": 0.34095842377692437, "grad_norm": 0.6532327987330085, "learning_rate": 9.99438004440805e-06, "loss": 0.2511, "step": 1021 }, { "epoch": 0.341292369343797, "grad_norm": 0.5993467036575328, "learning_rate": 9.994287554023717e-06, "loss": 0.2241, "step": 1022 }, { "epoch": 0.3416263149106696, "grad_norm": 0.6048208268550589, "learning_rate": 9.994194309193442e-06, "loss": 0.2263, "step": 1023 }, { "epoch": 0.3419602604775422, "grad_norm": 0.620928708879154, "learning_rate": 9.99410030993131e-06, "loss": 0.2333, "step": 1024 }, { "epoch": 0.34229420604441474, "grad_norm": 0.5811126029360281, "learning_rate": 9.994005556251525e-06, "loss": 0.2369, "step": 1025 }, { "epoch": 0.34262815161128735, "grad_norm": 0.6238825923972041, "learning_rate": 9.993910048168399e-06, "loss": 0.2273, "step": 1026 }, { "epoch": 0.34296209717815995, "grad_norm": 0.6765552725580103, "learning_rate": 9.993813785696355e-06, "loss": 0.2254, "step": 1027 }, { "epoch": 0.34329604274503256, "grad_norm": 0.6187685905117865, "learning_rate": 9.993716768849942e-06, "loss": 0.2408, "step": 1028 }, { "epoch": 0.34362998831190517, "grad_norm": 0.6097073883952683, "learning_rate": 9.99361899764381e-06, "loss": 0.25, "step": 1029 }, { "epoch": 0.3439639338787778, "grad_norm": 0.6545748380846143, "learning_rate": 9.993520472092732e-06, "loss": 0.2434, "step": 1030 }, { "epoch": 0.3442978794456504, "grad_norm": 0.5477963426981712, "learning_rate": 9.99342119221159e-06, "loss": 0.2346, "step": 1031 }, { "epoch": 0.344631825012523, "grad_norm": 0.5510391995222212, "learning_rate": 9.993321158015379e-06, "loss": 0.2305, "step": 1032 }, { "epoch": 0.34496577057939554, "grad_norm": 0.6324909697234063, "learning_rate": 9.993220369519215e-06, "loss": 0.2467, "step": 1033 }, { "epoch": 0.34529971614626814, "grad_norm": 0.6142047956027233, "learning_rate": 9.99311882673832e-06, "loss": 0.2333, "step": 1034 }, { "epoch": 0.34563366171314075, "grad_norm": 0.6555126781240093, "learning_rate": 9.993016529688033e-06, "loss": 0.2383, "step": 1035 }, { "epoch": 0.34596760728001336, "grad_norm": 0.6360668156570694, "learning_rate": 9.99291347838381e-06, "loss": 0.2418, "step": 1036 }, { "epoch": 0.34630155284688596, "grad_norm": 0.6241515101912609, "learning_rate": 9.992809672841218e-06, "loss": 0.2362, "step": 1037 }, { "epoch": 0.34663549841375857, "grad_norm": 0.6300889467982095, "learning_rate": 9.992705113075933e-06, "loss": 0.237, "step": 1038 }, { "epoch": 0.3469694439806312, "grad_norm": 0.564235622274707, "learning_rate": 9.992599799103754e-06, "loss": 0.2354, "step": 1039 }, { "epoch": 0.3473033895475038, "grad_norm": 0.6842460803144252, "learning_rate": 9.99249373094059e-06, "loss": 0.2416, "step": 1040 }, { "epoch": 0.34763733511437633, "grad_norm": 0.7151896495749245, "learning_rate": 9.992386908602466e-06, "loss": 0.2572, "step": 1041 }, { "epoch": 0.34797128068124894, "grad_norm": 0.5719922134860118, "learning_rate": 9.992279332105512e-06, "loss": 0.2325, "step": 1042 }, { "epoch": 0.34830522624812155, "grad_norm": 0.7231166488003365, "learning_rate": 9.992171001465985e-06, "loss": 0.2549, "step": 1043 }, { "epoch": 0.34863917181499415, "grad_norm": 0.592629753373637, "learning_rate": 9.992061916700247e-06, "loss": 0.235, "step": 1044 }, { "epoch": 0.34897311738186676, "grad_norm": 0.6764193734512775, "learning_rate": 9.991952077824776e-06, "loss": 0.2413, "step": 1045 }, { "epoch": 0.34930706294873937, "grad_norm": 0.7115915690879145, "learning_rate": 9.991841484856166e-06, "loss": 0.2521, "step": 1046 }, { "epoch": 0.349641008515612, "grad_norm": 0.5797754264927091, "learning_rate": 9.991730137811122e-06, "loss": 0.2476, "step": 1047 }, { "epoch": 0.3499749540824846, "grad_norm": 0.6078645313605627, "learning_rate": 9.991618036706464e-06, "loss": 0.2429, "step": 1048 }, { "epoch": 0.35030889964935713, "grad_norm": 0.810806694264766, "learning_rate": 9.99150518155913e-06, "loss": 0.2475, "step": 1049 }, { "epoch": 0.35064284521622974, "grad_norm": 0.7023437648349548, "learning_rate": 9.991391572386162e-06, "loss": 0.2565, "step": 1050 }, { "epoch": 0.35097679078310234, "grad_norm": 0.7282652300633451, "learning_rate": 9.991277209204728e-06, "loss": 0.244, "step": 1051 }, { "epoch": 0.35131073634997495, "grad_norm": 0.6241402490643777, "learning_rate": 9.991162092032101e-06, "loss": 0.2614, "step": 1052 }, { "epoch": 0.35164468191684756, "grad_norm": 0.6258752658980473, "learning_rate": 9.99104622088567e-06, "loss": 0.2461, "step": 1053 }, { "epoch": 0.35197862748372016, "grad_norm": 0.6658730176732505, "learning_rate": 9.990929595782938e-06, "loss": 0.2402, "step": 1054 }, { "epoch": 0.35231257305059277, "grad_norm": 0.5923285109590797, "learning_rate": 9.990812216741529e-06, "loss": 0.2275, "step": 1055 }, { "epoch": 0.3526465186174654, "grad_norm": 0.6257275221441978, "learning_rate": 9.990694083779166e-06, "loss": 0.2396, "step": 1056 }, { "epoch": 0.3529804641843379, "grad_norm": 0.6557840979079784, "learning_rate": 9.990575196913699e-06, "loss": 0.2337, "step": 1057 }, { "epoch": 0.35331440975121053, "grad_norm": 0.7708982420567367, "learning_rate": 9.990455556163086e-06, "loss": 0.251, "step": 1058 }, { "epoch": 0.35364835531808314, "grad_norm": 0.7583771899024517, "learning_rate": 9.990335161545401e-06, "loss": 0.2584, "step": 1059 }, { "epoch": 0.35398230088495575, "grad_norm": 0.6318386182926147, "learning_rate": 9.99021401307883e-06, "loss": 0.2519, "step": 1060 }, { "epoch": 0.35431624645182835, "grad_norm": 0.769122863891848, "learning_rate": 9.990092110781675e-06, "loss": 0.2404, "step": 1061 }, { "epoch": 0.35465019201870096, "grad_norm": 0.6258454284206358, "learning_rate": 9.98996945467235e-06, "loss": 0.2406, "step": 1062 }, { "epoch": 0.35498413758557357, "grad_norm": 0.5571971449891453, "learning_rate": 9.989846044769384e-06, "loss": 0.2342, "step": 1063 }, { "epoch": 0.3553180831524462, "grad_norm": 0.6702068647047834, "learning_rate": 9.98972188109142e-06, "loss": 0.2532, "step": 1064 }, { "epoch": 0.3556520287193187, "grad_norm": 0.709957113892022, "learning_rate": 9.989596963657213e-06, "loss": 0.2501, "step": 1065 }, { "epoch": 0.35598597428619133, "grad_norm": 0.7180875116372593, "learning_rate": 9.989471292485636e-06, "loss": 0.2767, "step": 1066 }, { "epoch": 0.35631991985306394, "grad_norm": 0.6068909083103505, "learning_rate": 9.989344867595668e-06, "loss": 0.2425, "step": 1067 }, { "epoch": 0.35665386541993654, "grad_norm": 0.7137465058277483, "learning_rate": 9.989217689006412e-06, "loss": 0.2531, "step": 1068 }, { "epoch": 0.35698781098680915, "grad_norm": 0.6022049439905831, "learning_rate": 9.989089756737077e-06, "loss": 0.2427, "step": 1069 }, { "epoch": 0.35732175655368176, "grad_norm": 0.6083150772695914, "learning_rate": 9.988961070806991e-06, "loss": 0.248, "step": 1070 }, { "epoch": 0.35765570212055436, "grad_norm": 0.6471745928158773, "learning_rate": 9.988831631235591e-06, "loss": 0.238, "step": 1071 }, { "epoch": 0.35798964768742697, "grad_norm": 0.5615061566854802, "learning_rate": 9.98870143804243e-06, "loss": 0.2149, "step": 1072 }, { "epoch": 0.3583235932542995, "grad_norm": 0.6199663578980865, "learning_rate": 9.988570491247179e-06, "loss": 0.2493, "step": 1073 }, { "epoch": 0.35865753882117213, "grad_norm": 0.6728065943159997, "learning_rate": 9.988438790869616e-06, "loss": 0.2547, "step": 1074 }, { "epoch": 0.35899148438804473, "grad_norm": 0.7194963403788357, "learning_rate": 9.988306336929637e-06, "loss": 0.2482, "step": 1075 }, { "epoch": 0.35932542995491734, "grad_norm": 0.6633897906991787, "learning_rate": 9.988173129447251e-06, "loss": 0.257, "step": 1076 }, { "epoch": 0.35965937552178995, "grad_norm": 0.6374583029418619, "learning_rate": 9.98803916844258e-06, "loss": 0.2498, "step": 1077 }, { "epoch": 0.35999332108866255, "grad_norm": 0.5644829103003917, "learning_rate": 9.98790445393586e-06, "loss": 0.2219, "step": 1078 }, { "epoch": 0.36032726665553516, "grad_norm": 0.7465815493447656, "learning_rate": 9.98776898594744e-06, "loss": 0.2516, "step": 1079 }, { "epoch": 0.36066121222240777, "grad_norm": 0.6137141055718743, "learning_rate": 9.987632764497787e-06, "loss": 0.2307, "step": 1080 }, { "epoch": 0.3609951577892804, "grad_norm": 0.6606373030819324, "learning_rate": 9.987495789607478e-06, "loss": 0.2372, "step": 1081 }, { "epoch": 0.3613291033561529, "grad_norm": 0.6144078594991942, "learning_rate": 9.987358061297203e-06, "loss": 0.2552, "step": 1082 }, { "epoch": 0.36166304892302553, "grad_norm": 0.6034573546156843, "learning_rate": 9.987219579587768e-06, "loss": 0.2271, "step": 1083 }, { "epoch": 0.36199699448989814, "grad_norm": 0.6365200936320912, "learning_rate": 9.987080344500094e-06, "loss": 0.2305, "step": 1084 }, { "epoch": 0.36233094005677075, "grad_norm": 0.6381373323927708, "learning_rate": 9.986940356055212e-06, "loss": 0.2371, "step": 1085 }, { "epoch": 0.36266488562364335, "grad_norm": 0.612493000628613, "learning_rate": 9.986799614274271e-06, "loss": 0.238, "step": 1086 }, { "epoch": 0.36299883119051596, "grad_norm": 0.6157534327216453, "learning_rate": 9.986658119178532e-06, "loss": 0.2597, "step": 1087 }, { "epoch": 0.36333277675738856, "grad_norm": 0.6729028218692379, "learning_rate": 9.986515870789366e-06, "loss": 0.2415, "step": 1088 }, { "epoch": 0.36366672232426117, "grad_norm": 0.6283943072436228, "learning_rate": 9.986372869128264e-06, "loss": 0.2563, "step": 1089 }, { "epoch": 0.3640006678911337, "grad_norm": 0.6576049042218921, "learning_rate": 9.986229114216828e-06, "loss": 0.2427, "step": 1090 }, { "epoch": 0.36433461345800633, "grad_norm": 0.5841024132825844, "learning_rate": 9.986084606076772e-06, "loss": 0.242, "step": 1091 }, { "epoch": 0.36466855902487894, "grad_norm": 0.5877810096206579, "learning_rate": 9.985939344729926e-06, "loss": 0.2214, "step": 1092 }, { "epoch": 0.36500250459175154, "grad_norm": 0.590923371403508, "learning_rate": 9.985793330198237e-06, "loss": 0.2376, "step": 1093 }, { "epoch": 0.36533645015862415, "grad_norm": 0.5462432642092312, "learning_rate": 9.98564656250376e-06, "loss": 0.2239, "step": 1094 }, { "epoch": 0.36567039572549676, "grad_norm": 0.649410508856008, "learning_rate": 9.985499041668664e-06, "loss": 0.2589, "step": 1095 }, { "epoch": 0.36600434129236936, "grad_norm": 0.5597787325842369, "learning_rate": 9.985350767715236e-06, "loss": 0.2265, "step": 1096 }, { "epoch": 0.36633828685924197, "grad_norm": 0.5506515525737525, "learning_rate": 9.985201740665873e-06, "loss": 0.2254, "step": 1097 }, { "epoch": 0.3666722324261145, "grad_norm": 0.599236470388504, "learning_rate": 9.98505196054309e-06, "loss": 0.2164, "step": 1098 }, { "epoch": 0.3670061779929871, "grad_norm": 0.6214018418575783, "learning_rate": 9.98490142736951e-06, "loss": 0.258, "step": 1099 }, { "epoch": 0.36734012355985973, "grad_norm": 0.6032134090942005, "learning_rate": 9.984750141167874e-06, "loss": 0.2412, "step": 1100 }, { "epoch": 0.36767406912673234, "grad_norm": 0.6180438639636011, "learning_rate": 9.984598101961036e-06, "loss": 0.2426, "step": 1101 }, { "epoch": 0.36800801469360495, "grad_norm": 0.9315847932687742, "learning_rate": 9.984445309771963e-06, "loss": 0.2613, "step": 1102 }, { "epoch": 0.36834196026047755, "grad_norm": 0.5753009194815258, "learning_rate": 9.984291764623735e-06, "loss": 0.2298, "step": 1103 }, { "epoch": 0.36867590582735016, "grad_norm": 0.7200885040091543, "learning_rate": 9.98413746653955e-06, "loss": 0.2487, "step": 1104 }, { "epoch": 0.36900985139422277, "grad_norm": 0.6844842815753607, "learning_rate": 9.983982415542713e-06, "loss": 0.2229, "step": 1105 }, { "epoch": 0.3693437969610953, "grad_norm": 0.610318931691899, "learning_rate": 9.983826611656649e-06, "loss": 0.2417, "step": 1106 }, { "epoch": 0.3696777425279679, "grad_norm": 0.6735146550314316, "learning_rate": 9.983670054904891e-06, "loss": 0.2455, "step": 1107 }, { "epoch": 0.37001168809484053, "grad_norm": 0.6404413794547794, "learning_rate": 9.98351274531109e-06, "loss": 0.2393, "step": 1108 }, { "epoch": 0.37034563366171314, "grad_norm": 0.590966335203484, "learning_rate": 9.983354682899012e-06, "loss": 0.2316, "step": 1109 }, { "epoch": 0.37067957922858574, "grad_norm": 0.6989245308286045, "learning_rate": 9.98319586769253e-06, "loss": 0.2408, "step": 1110 }, { "epoch": 0.37101352479545835, "grad_norm": 0.641463921195796, "learning_rate": 9.983036299715637e-06, "loss": 0.2358, "step": 1111 }, { "epoch": 0.37134747036233096, "grad_norm": 0.7275199738126983, "learning_rate": 9.98287597899244e-06, "loss": 0.2494, "step": 1112 }, { "epoch": 0.37168141592920356, "grad_norm": 0.6809370125852048, "learning_rate": 9.982714905547152e-06, "loss": 0.2322, "step": 1113 }, { "epoch": 0.3720153614960761, "grad_norm": 0.6312324387169239, "learning_rate": 9.982553079404109e-06, "loss": 0.2458, "step": 1114 }, { "epoch": 0.3723493070629487, "grad_norm": 0.5856517142062221, "learning_rate": 9.982390500587755e-06, "loss": 0.2344, "step": 1115 }, { "epoch": 0.3726832526298213, "grad_norm": 0.6576879620189332, "learning_rate": 9.982227169122652e-06, "loss": 0.2406, "step": 1116 }, { "epoch": 0.37301719819669393, "grad_norm": 0.6109211179006137, "learning_rate": 9.98206308503347e-06, "loss": 0.2407, "step": 1117 }, { "epoch": 0.37335114376356654, "grad_norm": 0.5360857677658837, "learning_rate": 9.981898248344996e-06, "loss": 0.2291, "step": 1118 }, { "epoch": 0.37368508933043915, "grad_norm": 0.6350541721035942, "learning_rate": 9.981732659082136e-06, "loss": 0.2351, "step": 1119 }, { "epoch": 0.37401903489731175, "grad_norm": 0.5564505418095035, "learning_rate": 9.981566317269895e-06, "loss": 0.2199, "step": 1120 }, { "epoch": 0.37435298046418436, "grad_norm": 0.5869314057324901, "learning_rate": 9.981399222933408e-06, "loss": 0.2459, "step": 1121 }, { "epoch": 0.3746869260310569, "grad_norm": 0.7657771256449448, "learning_rate": 9.981231376097914e-06, "loss": 0.2407, "step": 1122 }, { "epoch": 0.3750208715979295, "grad_norm": 0.5795690536586283, "learning_rate": 9.981062776788769e-06, "loss": 0.2347, "step": 1123 }, { "epoch": 0.3753548171648021, "grad_norm": 0.5891710443110899, "learning_rate": 9.98089342503144e-06, "loss": 0.2382, "step": 1124 }, { "epoch": 0.37568876273167473, "grad_norm": 0.621199984594368, "learning_rate": 9.980723320851512e-06, "loss": 0.2347, "step": 1125 }, { "epoch": 0.37602270829854734, "grad_norm": 0.6517502567095506, "learning_rate": 9.98055246427468e-06, "loss": 0.2462, "step": 1126 }, { "epoch": 0.37635665386541994, "grad_norm": 0.6881884763822493, "learning_rate": 9.980380855326754e-06, "loss": 0.2504, "step": 1127 }, { "epoch": 0.37669059943229255, "grad_norm": 0.5745203456313879, "learning_rate": 9.980208494033659e-06, "loss": 0.2404, "step": 1128 }, { "epoch": 0.37702454499916516, "grad_norm": 0.6050555371100264, "learning_rate": 9.98003538042143e-06, "loss": 0.2531, "step": 1129 }, { "epoch": 0.37735849056603776, "grad_norm": 0.9254994222499924, "learning_rate": 9.979861514516217e-06, "loss": 0.2427, "step": 1130 }, { "epoch": 0.3776924361329103, "grad_norm": 0.6004163946203853, "learning_rate": 9.979686896344289e-06, "loss": 0.2199, "step": 1131 }, { "epoch": 0.3780263816997829, "grad_norm": 0.6102051176431205, "learning_rate": 9.97951152593202e-06, "loss": 0.2514, "step": 1132 }, { "epoch": 0.3783603272666555, "grad_norm": 0.6680363132138403, "learning_rate": 9.979335403305904e-06, "loss": 0.2491, "step": 1133 }, { "epoch": 0.37869427283352813, "grad_norm": 0.6372805133933099, "learning_rate": 9.979158528492546e-06, "loss": 0.2553, "step": 1134 }, { "epoch": 0.37902821840040074, "grad_norm": 0.7266606127441678, "learning_rate": 9.978980901518663e-06, "loss": 0.2435, "step": 1135 }, { "epoch": 0.37936216396727335, "grad_norm": 0.5668609743092723, "learning_rate": 9.978802522411091e-06, "loss": 0.2361, "step": 1136 }, { "epoch": 0.37969610953414595, "grad_norm": 0.6140023762446175, "learning_rate": 9.978623391196774e-06, "loss": 0.2581, "step": 1137 }, { "epoch": 0.38003005510101856, "grad_norm": 0.6739052795637623, "learning_rate": 9.978443507902772e-06, "loss": 0.2446, "step": 1138 }, { "epoch": 0.3803640006678911, "grad_norm": 0.5882555204439429, "learning_rate": 9.978262872556257e-06, "loss": 0.2407, "step": 1139 }, { "epoch": 0.3806979462347637, "grad_norm": 0.7887496587693288, "learning_rate": 9.97808148518452e-06, "loss": 0.2564, "step": 1140 }, { "epoch": 0.3810318918016363, "grad_norm": 0.7154651655569723, "learning_rate": 9.977899345814959e-06, "loss": 0.2677, "step": 1141 }, { "epoch": 0.38136583736850893, "grad_norm": 0.5884603812414391, "learning_rate": 9.977716454475089e-06, "loss": 0.243, "step": 1142 }, { "epoch": 0.38169978293538154, "grad_norm": 0.5670137381925265, "learning_rate": 9.977532811192539e-06, "loss": 0.2321, "step": 1143 }, { "epoch": 0.38203372850225414, "grad_norm": 0.6540687344399836, "learning_rate": 9.977348415995048e-06, "loss": 0.2382, "step": 1144 }, { "epoch": 0.38236767406912675, "grad_norm": 0.6062895960141702, "learning_rate": 9.977163268910472e-06, "loss": 0.2467, "step": 1145 }, { "epoch": 0.38270161963599936, "grad_norm": 0.6354013496136728, "learning_rate": 9.976977369966781e-06, "loss": 0.2568, "step": 1146 }, { "epoch": 0.3830355652028719, "grad_norm": 0.6291640800274567, "learning_rate": 9.976790719192055e-06, "loss": 0.239, "step": 1147 }, { "epoch": 0.3833695107697445, "grad_norm": 0.6401219097828764, "learning_rate": 9.976603316614492e-06, "loss": 0.2528, "step": 1148 }, { "epoch": 0.3837034563366171, "grad_norm": 0.6010786738863266, "learning_rate": 9.976415162262401e-06, "loss": 0.2328, "step": 1149 }, { "epoch": 0.38403740190348973, "grad_norm": 0.606105846014851, "learning_rate": 9.976226256164204e-06, "loss": 0.249, "step": 1150 }, { "epoch": 0.38437134747036233, "grad_norm": 0.6021670703179877, "learning_rate": 9.976036598348437e-06, "loss": 0.2464, "step": 1151 }, { "epoch": 0.38470529303723494, "grad_norm": 0.5488912740197421, "learning_rate": 9.975846188843754e-06, "loss": 0.2343, "step": 1152 }, { "epoch": 0.38503923860410755, "grad_norm": 0.6489026795753373, "learning_rate": 9.975655027678913e-06, "loss": 0.2383, "step": 1153 }, { "epoch": 0.38537318417098015, "grad_norm": 0.5352509454624307, "learning_rate": 9.975463114882792e-06, "loss": 0.2385, "step": 1154 }, { "epoch": 0.3857071297378527, "grad_norm": 0.5557378389098141, "learning_rate": 9.975270450484385e-06, "loss": 0.2331, "step": 1155 }, { "epoch": 0.3860410753047253, "grad_norm": 0.6592063946537283, "learning_rate": 9.975077034512795e-06, "loss": 0.228, "step": 1156 }, { "epoch": 0.3863750208715979, "grad_norm": 0.6561305895606129, "learning_rate": 9.97488286699724e-06, "loss": 0.2507, "step": 1157 }, { "epoch": 0.3867089664384705, "grad_norm": 0.5344971723442756, "learning_rate": 9.974687947967047e-06, "loss": 0.2347, "step": 1158 }, { "epoch": 0.38704291200534313, "grad_norm": 0.5740634729457469, "learning_rate": 9.974492277451668e-06, "loss": 0.2402, "step": 1159 }, { "epoch": 0.38737685757221574, "grad_norm": 0.6784228325087824, "learning_rate": 9.974295855480658e-06, "loss": 0.2468, "step": 1160 }, { "epoch": 0.38771080313908834, "grad_norm": 0.5351257365785597, "learning_rate": 9.974098682083687e-06, "loss": 0.236, "step": 1161 }, { "epoch": 0.38804474870596095, "grad_norm": 0.5153552023195942, "learning_rate": 9.973900757290541e-06, "loss": 0.2096, "step": 1162 }, { "epoch": 0.3883786942728335, "grad_norm": 0.5687511104588763, "learning_rate": 9.97370208113112e-06, "loss": 0.2368, "step": 1163 }, { "epoch": 0.3887126398397061, "grad_norm": 0.6394062683395763, "learning_rate": 9.973502653635438e-06, "loss": 0.2521, "step": 1164 }, { "epoch": 0.3890465854065787, "grad_norm": 0.6513149391308338, "learning_rate": 9.97330247483362e-06, "loss": 0.2421, "step": 1165 }, { "epoch": 0.3893805309734513, "grad_norm": 0.5407808834505893, "learning_rate": 9.973101544755901e-06, "loss": 0.2295, "step": 1166 }, { "epoch": 0.38971447654032393, "grad_norm": 0.6398170225716239, "learning_rate": 9.97289986343264e-06, "loss": 0.2346, "step": 1167 }, { "epoch": 0.39004842210719654, "grad_norm": 0.6043234167832073, "learning_rate": 9.972697430894299e-06, "loss": 0.2338, "step": 1168 }, { "epoch": 0.39038236767406914, "grad_norm": 0.6333600734362572, "learning_rate": 9.97249424717146e-06, "loss": 0.2472, "step": 1169 }, { "epoch": 0.39071631324094175, "grad_norm": 0.6017998882821272, "learning_rate": 9.972290312294816e-06, "loss": 0.2488, "step": 1170 }, { "epoch": 0.3910502588078143, "grad_norm": 0.6311509148718895, "learning_rate": 9.972085626295173e-06, "loss": 0.2426, "step": 1171 }, { "epoch": 0.3913842043746869, "grad_norm": 0.6141756863914073, "learning_rate": 9.971880189203452e-06, "loss": 0.2537, "step": 1172 }, { "epoch": 0.3917181499415595, "grad_norm": 0.5793351876901642, "learning_rate": 9.971674001050687e-06, "loss": 0.2432, "step": 1173 }, { "epoch": 0.3920520955084321, "grad_norm": 0.7048398913255026, "learning_rate": 9.971467061868022e-06, "loss": 0.2774, "step": 1174 }, { "epoch": 0.3923860410753047, "grad_norm": 0.5894804335467615, "learning_rate": 9.971259371686724e-06, "loss": 0.2264, "step": 1175 }, { "epoch": 0.39271998664217733, "grad_norm": 0.6305881343504768, "learning_rate": 9.971050930538161e-06, "loss": 0.2515, "step": 1176 }, { "epoch": 0.39305393220904994, "grad_norm": 0.6169670939348186, "learning_rate": 9.970841738453823e-06, "loss": 0.2474, "step": 1177 }, { "epoch": 0.39338787777592255, "grad_norm": 0.6203195420477706, "learning_rate": 9.970631795465311e-06, "loss": 0.2446, "step": 1178 }, { "epoch": 0.39372182334279515, "grad_norm": 0.5285532423016363, "learning_rate": 9.970421101604339e-06, "loss": 0.2168, "step": 1179 }, { "epoch": 0.3940557689096677, "grad_norm": 0.6175748683107137, "learning_rate": 9.970209656902734e-06, "loss": 0.2421, "step": 1180 }, { "epoch": 0.3943897144765403, "grad_norm": 0.5780266651964711, "learning_rate": 9.969997461392439e-06, "loss": 0.2334, "step": 1181 }, { "epoch": 0.3947236600434129, "grad_norm": 0.7028863790206683, "learning_rate": 9.969784515105508e-06, "loss": 0.2619, "step": 1182 }, { "epoch": 0.3950576056102855, "grad_norm": 0.5744613731332193, "learning_rate": 9.969570818074109e-06, "loss": 0.2273, "step": 1183 }, { "epoch": 0.39539155117715813, "grad_norm": 0.7222715318962293, "learning_rate": 9.96935637033052e-06, "loss": 0.2282, "step": 1184 }, { "epoch": 0.39572549674403074, "grad_norm": 0.6124101572650357, "learning_rate": 9.969141171907142e-06, "loss": 0.2348, "step": 1185 }, { "epoch": 0.39605944231090334, "grad_norm": 0.5960876102003673, "learning_rate": 9.968925222836478e-06, "loss": 0.235, "step": 1186 }, { "epoch": 0.39639338787777595, "grad_norm": 0.574113404593996, "learning_rate": 9.968708523151154e-06, "loss": 0.2494, "step": 1187 }, { "epoch": 0.3967273334446485, "grad_norm": 0.660656988924486, "learning_rate": 9.968491072883902e-06, "loss": 0.2437, "step": 1188 }, { "epoch": 0.3970612790115211, "grad_norm": 0.6305428349461573, "learning_rate": 9.968272872067571e-06, "loss": 0.2477, "step": 1189 }, { "epoch": 0.3973952245783937, "grad_norm": 0.6256118019755806, "learning_rate": 9.968053920735124e-06, "loss": 0.2616, "step": 1190 }, { "epoch": 0.3977291701452663, "grad_norm": 0.5724515150428571, "learning_rate": 9.967834218919634e-06, "loss": 0.2399, "step": 1191 }, { "epoch": 0.3980631157121389, "grad_norm": 0.7058668997385279, "learning_rate": 9.967613766654293e-06, "loss": 0.2488, "step": 1192 }, { "epoch": 0.39839706127901153, "grad_norm": 0.5837497840997278, "learning_rate": 9.967392563972399e-06, "loss": 0.2404, "step": 1193 }, { "epoch": 0.39873100684588414, "grad_norm": 0.5877505859460367, "learning_rate": 9.96717061090737e-06, "loss": 0.2427, "step": 1194 }, { "epoch": 0.39906495241275675, "grad_norm": 0.6767793531128838, "learning_rate": 9.966947907492734e-06, "loss": 0.2423, "step": 1195 }, { "epoch": 0.3993988979796293, "grad_norm": 0.7876764633998403, "learning_rate": 9.966724453762131e-06, "loss": 0.2593, "step": 1196 }, { "epoch": 0.3997328435465019, "grad_norm": 0.664535756576125, "learning_rate": 9.96650024974932e-06, "loss": 0.2668, "step": 1197 }, { "epoch": 0.4000667891133745, "grad_norm": 0.6748545214684532, "learning_rate": 9.966275295488165e-06, "loss": 0.2307, "step": 1198 }, { "epoch": 0.4004007346802471, "grad_norm": 0.5847120767540386, "learning_rate": 9.966049591012651e-06, "loss": 0.25, "step": 1199 }, { "epoch": 0.4007346802471197, "grad_norm": 0.5593088473361937, "learning_rate": 9.965823136356877e-06, "loss": 0.2512, "step": 1200 }, { "epoch": 0.40106862581399233, "grad_norm": 0.5979036171420881, "learning_rate": 9.965595931555043e-06, "loss": 0.2344, "step": 1201 }, { "epoch": 0.40140257138086494, "grad_norm": 0.5610390171317386, "learning_rate": 9.965367976641478e-06, "loss": 0.2297, "step": 1202 }, { "epoch": 0.40173651694773754, "grad_norm": 0.6833684414411917, "learning_rate": 9.965139271650614e-06, "loss": 0.2459, "step": 1203 }, { "epoch": 0.4020704625146101, "grad_norm": 0.728364535501137, "learning_rate": 9.964909816617002e-06, "loss": 0.2544, "step": 1204 }, { "epoch": 0.4024044080814827, "grad_norm": 0.5693930955364505, "learning_rate": 9.964679611575298e-06, "loss": 0.2378, "step": 1205 }, { "epoch": 0.4027383536483553, "grad_norm": 0.6133102401066323, "learning_rate": 9.964448656560286e-06, "loss": 0.2279, "step": 1206 }, { "epoch": 0.4030722992152279, "grad_norm": 0.563392679821793, "learning_rate": 9.964216951606848e-06, "loss": 0.2397, "step": 1207 }, { "epoch": 0.4034062447821005, "grad_norm": 0.5672909007940137, "learning_rate": 9.963984496749988e-06, "loss": 0.2267, "step": 1208 }, { "epoch": 0.4037401903489731, "grad_norm": 0.6488344228564302, "learning_rate": 9.96375129202482e-06, "loss": 0.2494, "step": 1209 }, { "epoch": 0.40407413591584573, "grad_norm": 0.5139669587828634, "learning_rate": 9.963517337466575e-06, "loss": 0.207, "step": 1210 }, { "epoch": 0.40440808148271834, "grad_norm": 0.5287676891678358, "learning_rate": 9.963282633110591e-06, "loss": 0.2303, "step": 1211 }, { "epoch": 0.4047420270495909, "grad_norm": 0.5058680387315015, "learning_rate": 9.963047178992324e-06, "loss": 0.22, "step": 1212 }, { "epoch": 0.4050759726164635, "grad_norm": 0.5445840438834135, "learning_rate": 9.962810975147344e-06, "loss": 0.2397, "step": 1213 }, { "epoch": 0.4054099181833361, "grad_norm": 0.6382046509955335, "learning_rate": 9.96257402161133e-06, "loss": 0.2715, "step": 1214 }, { "epoch": 0.4057438637502087, "grad_norm": 0.5413823853664697, "learning_rate": 9.962336318420078e-06, "loss": 0.2261, "step": 1215 }, { "epoch": 0.4060778093170813, "grad_norm": 0.6202996949474051, "learning_rate": 9.962097865609495e-06, "loss": 0.2283, "step": 1216 }, { "epoch": 0.4064117548839539, "grad_norm": 0.60696612940717, "learning_rate": 9.961858663215604e-06, "loss": 0.2401, "step": 1217 }, { "epoch": 0.40674570045082653, "grad_norm": 0.649264221755663, "learning_rate": 9.961618711274537e-06, "loss": 0.2472, "step": 1218 }, { "epoch": 0.40707964601769914, "grad_norm": 0.5501090472861203, "learning_rate": 9.961378009822542e-06, "loss": 0.2407, "step": 1219 }, { "epoch": 0.4074135915845717, "grad_norm": 0.5345854297532595, "learning_rate": 9.961136558895981e-06, "loss": 0.228, "step": 1220 }, { "epoch": 0.4077475371514443, "grad_norm": 0.5125851264368648, "learning_rate": 9.960894358531328e-06, "loss": 0.2167, "step": 1221 }, { "epoch": 0.4080814827183169, "grad_norm": 0.5920584268375877, "learning_rate": 9.960651408765168e-06, "loss": 0.2367, "step": 1222 }, { "epoch": 0.4084154282851895, "grad_norm": 0.716453784192867, "learning_rate": 9.960407709634203e-06, "loss": 0.2477, "step": 1223 }, { "epoch": 0.4087493738520621, "grad_norm": 0.7562990534172225, "learning_rate": 9.960163261175247e-06, "loss": 0.2332, "step": 1224 }, { "epoch": 0.4090833194189347, "grad_norm": 0.5231312350431248, "learning_rate": 9.959918063425228e-06, "loss": 0.2259, "step": 1225 }, { "epoch": 0.4094172649858073, "grad_norm": 0.5921963859667128, "learning_rate": 9.959672116421181e-06, "loss": 0.2263, "step": 1226 }, { "epoch": 0.40975121055267993, "grad_norm": 0.6028966599262293, "learning_rate": 9.959425420200267e-06, "loss": 0.2489, "step": 1227 }, { "epoch": 0.4100851561195525, "grad_norm": 0.686672288403263, "learning_rate": 9.959177974799742e-06, "loss": 0.2508, "step": 1228 }, { "epoch": 0.4104191016864251, "grad_norm": 0.5673389018187519, "learning_rate": 9.958929780256996e-06, "loss": 0.2207, "step": 1229 }, { "epoch": 0.4107530472532977, "grad_norm": 0.5753969316911537, "learning_rate": 9.958680836609516e-06, "loss": 0.239, "step": 1230 }, { "epoch": 0.4110869928201703, "grad_norm": 0.676883776899676, "learning_rate": 9.95843114389491e-06, "loss": 0.2225, "step": 1231 }, { "epoch": 0.4114209383870429, "grad_norm": 0.6238678967525173, "learning_rate": 9.958180702150895e-06, "loss": 0.2278, "step": 1232 }, { "epoch": 0.4117548839539155, "grad_norm": 0.6316307542043934, "learning_rate": 9.957929511415304e-06, "loss": 0.2485, "step": 1233 }, { "epoch": 0.4120888295207881, "grad_norm": 0.5602900296016604, "learning_rate": 9.957677571726084e-06, "loss": 0.2288, "step": 1234 }, { "epoch": 0.41242277508766073, "grad_norm": 0.6275850836013913, "learning_rate": 9.95742488312129e-06, "loss": 0.2393, "step": 1235 }, { "epoch": 0.41275672065453334, "grad_norm": 0.6298629000187451, "learning_rate": 9.957171445639096e-06, "loss": 0.2456, "step": 1236 }, { "epoch": 0.4130906662214059, "grad_norm": 0.5989836124208895, "learning_rate": 9.956917259317788e-06, "loss": 0.2349, "step": 1237 }, { "epoch": 0.4134246117882785, "grad_norm": 0.6067541086981411, "learning_rate": 9.95666232419576e-06, "loss": 0.2377, "step": 1238 }, { "epoch": 0.4137585573551511, "grad_norm": 0.5754404112239477, "learning_rate": 9.956406640311527e-06, "loss": 0.2555, "step": 1239 }, { "epoch": 0.4140925029220237, "grad_norm": 0.5621508892107175, "learning_rate": 9.956150207703712e-06, "loss": 0.2386, "step": 1240 }, { "epoch": 0.4144264484888963, "grad_norm": 0.5959704813881286, "learning_rate": 9.955893026411048e-06, "loss": 0.2469, "step": 1241 }, { "epoch": 0.4147603940557689, "grad_norm": 0.6373037754433803, "learning_rate": 9.955635096472391e-06, "loss": 0.2326, "step": 1242 }, { "epoch": 0.41509433962264153, "grad_norm": 0.8712757922100017, "learning_rate": 9.9553764179267e-06, "loss": 0.2408, "step": 1243 }, { "epoch": 0.41542828518951413, "grad_norm": 0.5794440921487206, "learning_rate": 9.955116990813056e-06, "loss": 0.2515, "step": 1244 }, { "epoch": 0.4157622307563867, "grad_norm": 0.5417617912815341, "learning_rate": 9.954856815170644e-06, "loss": 0.2257, "step": 1245 }, { "epoch": 0.4160961763232593, "grad_norm": 0.608261531031807, "learning_rate": 9.95459589103877e-06, "loss": 0.2422, "step": 1246 }, { "epoch": 0.4164301218901319, "grad_norm": 0.5882276462833476, "learning_rate": 9.954334218456846e-06, "loss": 0.237, "step": 1247 }, { "epoch": 0.4167640674570045, "grad_norm": 0.9285636581320588, "learning_rate": 9.954071797464405e-06, "loss": 0.249, "step": 1248 }, { "epoch": 0.4170980130238771, "grad_norm": 0.6268984023382586, "learning_rate": 9.953808628101086e-06, "loss": 0.2415, "step": 1249 }, { "epoch": 0.4174319585907497, "grad_norm": 0.6069222808256296, "learning_rate": 9.953544710406646e-06, "loss": 0.2545, "step": 1250 }, { "epoch": 0.4177659041576223, "grad_norm": 0.5778817193778154, "learning_rate": 9.95328004442095e-06, "loss": 0.241, "step": 1251 }, { "epoch": 0.41809984972449493, "grad_norm": 0.61922600341147, "learning_rate": 9.953014630183979e-06, "loss": 0.2244, "step": 1252 }, { "epoch": 0.4184337952913675, "grad_norm": 0.6687820682983995, "learning_rate": 9.95274846773583e-06, "loss": 0.2337, "step": 1253 }, { "epoch": 0.4187677408582401, "grad_norm": 0.5042432866684735, "learning_rate": 9.952481557116708e-06, "loss": 0.2169, "step": 1254 }, { "epoch": 0.4191016864251127, "grad_norm": 0.6258367926926472, "learning_rate": 9.952213898366932e-06, "loss": 0.249, "step": 1255 }, { "epoch": 0.4194356319919853, "grad_norm": 0.5585811866612296, "learning_rate": 9.951945491526938e-06, "loss": 0.226, "step": 1256 }, { "epoch": 0.4197695775588579, "grad_norm": 0.6218677388521249, "learning_rate": 9.951676336637267e-06, "loss": 0.2318, "step": 1257 }, { "epoch": 0.4201035231257305, "grad_norm": 0.5375786545701925, "learning_rate": 9.951406433738587e-06, "loss": 0.2295, "step": 1258 }, { "epoch": 0.4204374686926031, "grad_norm": 0.5316795089983514, "learning_rate": 9.95113578287166e-06, "loss": 0.2278, "step": 1259 }, { "epoch": 0.42077141425947573, "grad_norm": 0.6387497516438277, "learning_rate": 9.950864384077376e-06, "loss": 0.2513, "step": 1260 }, { "epoch": 0.4211053598263483, "grad_norm": 0.7334294291090919, "learning_rate": 9.950592237396732e-06, "loss": 0.2376, "step": 1261 }, { "epoch": 0.4214393053932209, "grad_norm": 0.47638791811568987, "learning_rate": 9.95031934287084e-06, "loss": 0.2203, "step": 1262 }, { "epoch": 0.4217732509600935, "grad_norm": 0.4970617194044711, "learning_rate": 9.950045700540923e-06, "loss": 0.2322, "step": 1263 }, { "epoch": 0.4221071965269661, "grad_norm": 0.5481044534689685, "learning_rate": 9.949771310448317e-06, "loss": 0.2348, "step": 1264 }, { "epoch": 0.4224411420938387, "grad_norm": 0.5249211204705201, "learning_rate": 9.949496172634474e-06, "loss": 0.2237, "step": 1265 }, { "epoch": 0.4227750876607113, "grad_norm": 0.6158684467073959, "learning_rate": 9.949220287140955e-06, "loss": 0.2528, "step": 1266 }, { "epoch": 0.4231090332275839, "grad_norm": 0.5751869229573902, "learning_rate": 9.948943654009438e-06, "loss": 0.2437, "step": 1267 }, { "epoch": 0.4234429787944565, "grad_norm": 0.5278534477612402, "learning_rate": 9.948666273281708e-06, "loss": 0.2281, "step": 1268 }, { "epoch": 0.4237769243613291, "grad_norm": 0.6021004635115851, "learning_rate": 9.94838814499967e-06, "loss": 0.2273, "step": 1269 }, { "epoch": 0.4241108699282017, "grad_norm": 0.5556933684548845, "learning_rate": 9.948109269205338e-06, "loss": 0.2458, "step": 1270 }, { "epoch": 0.4244448154950743, "grad_norm": 0.581592239372877, "learning_rate": 9.947829645940836e-06, "loss": 0.2316, "step": 1271 }, { "epoch": 0.4247787610619469, "grad_norm": 0.5489943931236657, "learning_rate": 9.94754927524841e-06, "loss": 0.2313, "step": 1272 }, { "epoch": 0.4251127066288195, "grad_norm": 0.6603804956451033, "learning_rate": 9.947268157170409e-06, "loss": 0.2503, "step": 1273 }, { "epoch": 0.4254466521956921, "grad_norm": 0.5408603570247881, "learning_rate": 9.9469862917493e-06, "loss": 0.2385, "step": 1274 }, { "epoch": 0.4257805977625647, "grad_norm": 0.506151945318377, "learning_rate": 9.946703679027664e-06, "loss": 0.2129, "step": 1275 }, { "epoch": 0.4261145433294373, "grad_norm": 0.6388174252564417, "learning_rate": 9.946420319048192e-06, "loss": 0.2522, "step": 1276 }, { "epoch": 0.4264484888963099, "grad_norm": 0.6479722260293415, "learning_rate": 9.946136211853689e-06, "loss": 0.2502, "step": 1277 }, { "epoch": 0.4267824344631825, "grad_norm": 0.5423191804013884, "learning_rate": 9.94585135748707e-06, "loss": 0.2243, "step": 1278 }, { "epoch": 0.4271163800300551, "grad_norm": 0.6023773989376456, "learning_rate": 9.94556575599137e-06, "loss": 0.2388, "step": 1279 }, { "epoch": 0.4274503255969277, "grad_norm": 0.6403445586636296, "learning_rate": 9.94527940740973e-06, "loss": 0.253, "step": 1280 }, { "epoch": 0.4277842711638003, "grad_norm": 0.5765601105471051, "learning_rate": 9.944992311785406e-06, "loss": 0.2351, "step": 1281 }, { "epoch": 0.4281182167306729, "grad_norm": 0.5943838786454438, "learning_rate": 9.94470446916177e-06, "loss": 0.2477, "step": 1282 }, { "epoch": 0.4284521622975455, "grad_norm": 0.5967603952647004, "learning_rate": 9.9444158795823e-06, "loss": 0.2286, "step": 1283 }, { "epoch": 0.4287861078644181, "grad_norm": 0.5889203354692991, "learning_rate": 9.944126543090593e-06, "loss": 0.2318, "step": 1284 }, { "epoch": 0.4291200534312907, "grad_norm": 0.5778644128403275, "learning_rate": 9.943836459730356e-06, "loss": 0.2385, "step": 1285 }, { "epoch": 0.4294539989981633, "grad_norm": 0.829101133307546, "learning_rate": 9.943545629545412e-06, "loss": 0.2493, "step": 1286 }, { "epoch": 0.4297879445650359, "grad_norm": 0.5682692079123255, "learning_rate": 9.94325405257969e-06, "loss": 0.2594, "step": 1287 }, { "epoch": 0.4301218901319085, "grad_norm": 0.5616769757489756, "learning_rate": 9.94296172887724e-06, "loss": 0.2407, "step": 1288 }, { "epoch": 0.4304558356987811, "grad_norm": 0.5082241382193762, "learning_rate": 9.942668658482219e-06, "loss": 0.2247, "step": 1289 }, { "epoch": 0.4307897812656537, "grad_norm": 0.5720871139751971, "learning_rate": 9.942374841438898e-06, "loss": 0.2412, "step": 1290 }, { "epoch": 0.4311237268325263, "grad_norm": 0.5571165598994844, "learning_rate": 9.942080277791663e-06, "loss": 0.2318, "step": 1291 }, { "epoch": 0.4314576723993989, "grad_norm": 0.6092865735409773, "learning_rate": 9.941784967585012e-06, "loss": 0.2472, "step": 1292 }, { "epoch": 0.4317916179662715, "grad_norm": 0.47701447633800187, "learning_rate": 9.941488910863553e-06, "loss": 0.2055, "step": 1293 }, { "epoch": 0.4321255635331441, "grad_norm": 0.5174504669044425, "learning_rate": 9.941192107672011e-06, "loss": 0.2353, "step": 1294 }, { "epoch": 0.4324595091000167, "grad_norm": 0.5998296572054426, "learning_rate": 9.940894558055218e-06, "loss": 0.2497, "step": 1295 }, { "epoch": 0.4327934546668893, "grad_norm": 0.5960708917312382, "learning_rate": 9.940596262058128e-06, "loss": 0.2369, "step": 1296 }, { "epoch": 0.4331274002337619, "grad_norm": 0.5601394501830662, "learning_rate": 9.940297219725797e-06, "loss": 0.2363, "step": 1297 }, { "epoch": 0.4334613458006345, "grad_norm": 0.5181219900429102, "learning_rate": 9.939997431103402e-06, "loss": 0.2327, "step": 1298 }, { "epoch": 0.4337952913675071, "grad_norm": 0.5893205151841744, "learning_rate": 9.939696896236229e-06, "loss": 0.2463, "step": 1299 }, { "epoch": 0.4341292369343797, "grad_norm": 0.5527429152911172, "learning_rate": 9.939395615169673e-06, "loss": 0.2401, "step": 1300 }, { "epoch": 0.4344631825012523, "grad_norm": 0.5109471022299947, "learning_rate": 9.939093587949254e-06, "loss": 0.2243, "step": 1301 }, { "epoch": 0.43479712806812487, "grad_norm": 0.5656116960222147, "learning_rate": 9.938790814620591e-06, "loss": 0.2325, "step": 1302 }, { "epoch": 0.4351310736349975, "grad_norm": 0.7838636481919538, "learning_rate": 9.938487295229423e-06, "loss": 0.2528, "step": 1303 }, { "epoch": 0.4354650192018701, "grad_norm": 0.5427979756356961, "learning_rate": 9.9381830298216e-06, "loss": 0.2288, "step": 1304 }, { "epoch": 0.4357989647687427, "grad_norm": 0.5688915141922664, "learning_rate": 9.937878018443085e-06, "loss": 0.2196, "step": 1305 }, { "epoch": 0.4361329103356153, "grad_norm": 0.49786316106132467, "learning_rate": 9.937572261139956e-06, "loss": 0.2376, "step": 1306 }, { "epoch": 0.4364668559024879, "grad_norm": 0.6767510753934641, "learning_rate": 9.937265757958397e-06, "loss": 0.2521, "step": 1307 }, { "epoch": 0.4368008014693605, "grad_norm": 0.542449196038807, "learning_rate": 9.93695850894471e-06, "loss": 0.2305, "step": 1308 }, { "epoch": 0.4371347470362331, "grad_norm": 0.5329012111090545, "learning_rate": 9.93665051414531e-06, "loss": 0.2241, "step": 1309 }, { "epoch": 0.43746869260310567, "grad_norm": 0.5644124677867142, "learning_rate": 9.936341773606723e-06, "loss": 0.2418, "step": 1310 }, { "epoch": 0.4378026381699783, "grad_norm": 0.8167083316266492, "learning_rate": 9.936032287375587e-06, "loss": 0.2533, "step": 1311 }, { "epoch": 0.4381365837368509, "grad_norm": 0.5474885438610664, "learning_rate": 9.935722055498655e-06, "loss": 0.2368, "step": 1312 }, { "epoch": 0.4384705293037235, "grad_norm": 0.6208297848511186, "learning_rate": 9.935411078022791e-06, "loss": 0.2388, "step": 1313 }, { "epoch": 0.4388044748705961, "grad_norm": 0.6282302035769642, "learning_rate": 9.93509935499497e-06, "loss": 0.2493, "step": 1314 }, { "epoch": 0.4391384204374687, "grad_norm": 0.5820840951158709, "learning_rate": 9.934786886462282e-06, "loss": 0.2263, "step": 1315 }, { "epoch": 0.4394723660043413, "grad_norm": 0.582239493200357, "learning_rate": 9.934473672471931e-06, "loss": 0.2248, "step": 1316 }, { "epoch": 0.4398063115712139, "grad_norm": 0.5473573028008037, "learning_rate": 9.934159713071229e-06, "loss": 0.2353, "step": 1317 }, { "epoch": 0.44014025713808647, "grad_norm": 0.59739267317348, "learning_rate": 9.933845008307605e-06, "loss": 0.2398, "step": 1318 }, { "epoch": 0.4404742027049591, "grad_norm": 0.6216076077529502, "learning_rate": 9.933529558228599e-06, "loss": 0.2263, "step": 1319 }, { "epoch": 0.4408081482718317, "grad_norm": 0.5900409644511551, "learning_rate": 9.933213362881861e-06, "loss": 0.235, "step": 1320 }, { "epoch": 0.4411420938387043, "grad_norm": 0.5318503294685462, "learning_rate": 9.932896422315159e-06, "loss": 0.2183, "step": 1321 }, { "epoch": 0.4414760394055769, "grad_norm": 0.5992427234414971, "learning_rate": 9.93257873657637e-06, "loss": 0.2416, "step": 1322 }, { "epoch": 0.4418099849724495, "grad_norm": 0.6067601416763263, "learning_rate": 9.932260305713481e-06, "loss": 0.2391, "step": 1323 }, { "epoch": 0.4421439305393221, "grad_norm": 0.5311726881325887, "learning_rate": 9.9319411297746e-06, "loss": 0.2463, "step": 1324 }, { "epoch": 0.4424778761061947, "grad_norm": 0.6287448890448014, "learning_rate": 9.931621208807939e-06, "loss": 0.2494, "step": 1325 }, { "epoch": 0.44281182167306726, "grad_norm": 0.6170893282731494, "learning_rate": 9.931300542861826e-06, "loss": 0.2418, "step": 1326 }, { "epoch": 0.44314576723993987, "grad_norm": 0.5783810354471404, "learning_rate": 9.930979131984702e-06, "loss": 0.2418, "step": 1327 }, { "epoch": 0.4434797128068125, "grad_norm": 0.5475904806111974, "learning_rate": 9.93065697622512e-06, "loss": 0.2315, "step": 1328 }, { "epoch": 0.4438136583736851, "grad_norm": 0.5659886586228791, "learning_rate": 9.930334075631745e-06, "loss": 0.223, "step": 1329 }, { "epoch": 0.4441476039405577, "grad_norm": 0.5751459012303536, "learning_rate": 9.930010430253356e-06, "loss": 0.2215, "step": 1330 }, { "epoch": 0.4444815495074303, "grad_norm": 0.6063177897359333, "learning_rate": 9.92968604013884e-06, "loss": 0.2563, "step": 1331 }, { "epoch": 0.4448154950743029, "grad_norm": 0.7243143461095843, "learning_rate": 9.929360905337204e-06, "loss": 0.2327, "step": 1332 }, { "epoch": 0.4451494406411755, "grad_norm": 0.570687190416024, "learning_rate": 9.929035025897561e-06, "loss": 0.2226, "step": 1333 }, { "epoch": 0.4454833862080481, "grad_norm": 0.5940808216197645, "learning_rate": 9.928708401869143e-06, "loss": 0.2221, "step": 1334 }, { "epoch": 0.44581733177492067, "grad_norm": 0.6811536558542676, "learning_rate": 9.928381033301284e-06, "loss": 0.2463, "step": 1335 }, { "epoch": 0.4461512773417933, "grad_norm": 0.6824905822313252, "learning_rate": 9.928052920243443e-06, "loss": 0.2545, "step": 1336 }, { "epoch": 0.4464852229086659, "grad_norm": 0.6637186339127504, "learning_rate": 9.927724062745179e-06, "loss": 0.2662, "step": 1337 }, { "epoch": 0.4468191684755385, "grad_norm": 0.5809812855523707, "learning_rate": 9.927394460856174e-06, "loss": 0.2335, "step": 1338 }, { "epoch": 0.4471531140424111, "grad_norm": 0.5700895995361696, "learning_rate": 9.92706411462622e-06, "loss": 0.2137, "step": 1339 }, { "epoch": 0.4474870596092837, "grad_norm": 0.5735937174321404, "learning_rate": 9.926733024105216e-06, "loss": 0.2401, "step": 1340 }, { "epoch": 0.4478210051761563, "grad_norm": 0.5128813606990313, "learning_rate": 9.926401189343177e-06, "loss": 0.2335, "step": 1341 }, { "epoch": 0.4481549507430289, "grad_norm": 0.5468742678506229, "learning_rate": 9.926068610390231e-06, "loss": 0.2232, "step": 1342 }, { "epoch": 0.44848889630990146, "grad_norm": 0.5713584153616417, "learning_rate": 9.925735287296621e-06, "loss": 0.2331, "step": 1343 }, { "epoch": 0.44882284187677407, "grad_norm": 0.6096003519856071, "learning_rate": 9.925401220112698e-06, "loss": 0.2303, "step": 1344 }, { "epoch": 0.4491567874436467, "grad_norm": 0.58393370045869, "learning_rate": 9.925066408888924e-06, "loss": 0.2401, "step": 1345 }, { "epoch": 0.4494907330105193, "grad_norm": 0.5503546449887823, "learning_rate": 9.92473085367588e-06, "loss": 0.2422, "step": 1346 }, { "epoch": 0.4498246785773919, "grad_norm": 0.5633846354206201, "learning_rate": 9.924394554524252e-06, "loss": 0.245, "step": 1347 }, { "epoch": 0.4501586241442645, "grad_norm": 0.5534055213955522, "learning_rate": 9.924057511484844e-06, "loss": 0.2315, "step": 1348 }, { "epoch": 0.4504925697111371, "grad_norm": 0.6078240341950537, "learning_rate": 9.92371972460857e-06, "loss": 0.2341, "step": 1349 }, { "epoch": 0.4508265152780097, "grad_norm": 0.7758411012157077, "learning_rate": 9.923381193946457e-06, "loss": 0.2513, "step": 1350 }, { "epoch": 0.45116046084488226, "grad_norm": 0.590993603668986, "learning_rate": 9.923041919549644e-06, "loss": 0.2259, "step": 1351 }, { "epoch": 0.45149440641175487, "grad_norm": 0.5771541634320295, "learning_rate": 9.92270190146938e-06, "loss": 0.237, "step": 1352 }, { "epoch": 0.4518283519786275, "grad_norm": 0.5921642270486791, "learning_rate": 9.922361139757033e-06, "loss": 0.2391, "step": 1353 }, { "epoch": 0.4521622975455001, "grad_norm": 0.5803176539415612, "learning_rate": 9.922019634464077e-06, "loss": 0.237, "step": 1354 }, { "epoch": 0.4524962431123727, "grad_norm": 0.5689236619706025, "learning_rate": 9.9216773856421e-06, "loss": 0.2329, "step": 1355 }, { "epoch": 0.4528301886792453, "grad_norm": 0.4940682334608807, "learning_rate": 9.921334393342803e-06, "loss": 0.2341, "step": 1356 }, { "epoch": 0.4531641342461179, "grad_norm": 0.5752961824097249, "learning_rate": 9.920990657617998e-06, "loss": 0.254, "step": 1357 }, { "epoch": 0.4534980798129905, "grad_norm": 0.5364989105256935, "learning_rate": 9.920646178519612e-06, "loss": 0.2203, "step": 1358 }, { "epoch": 0.45383202537986306, "grad_norm": 0.5052033735914636, "learning_rate": 9.920300956099682e-06, "loss": 0.2328, "step": 1359 }, { "epoch": 0.45416597094673566, "grad_norm": 0.47346441786960736, "learning_rate": 9.919954990410359e-06, "loss": 0.2226, "step": 1360 }, { "epoch": 0.45449991651360827, "grad_norm": 0.4934375959384355, "learning_rate": 9.919608281503903e-06, "loss": 0.2348, "step": 1361 }, { "epoch": 0.4548338620804809, "grad_norm": 0.511312257771701, "learning_rate": 9.91926082943269e-06, "loss": 0.2153, "step": 1362 }, { "epoch": 0.4551678076473535, "grad_norm": 0.5543164186583251, "learning_rate": 9.918912634249206e-06, "loss": 0.2454, "step": 1363 }, { "epoch": 0.4555017532142261, "grad_norm": 0.5212508239813047, "learning_rate": 9.91856369600605e-06, "loss": 0.2232, "step": 1364 }, { "epoch": 0.4558356987810987, "grad_norm": 0.527429149205606, "learning_rate": 9.918214014755935e-06, "loss": 0.2342, "step": 1365 }, { "epoch": 0.4561696443479713, "grad_norm": 0.5562734152529801, "learning_rate": 9.917863590551682e-06, "loss": 0.2185, "step": 1366 }, { "epoch": 0.45650358991484385, "grad_norm": 0.5093451096437928, "learning_rate": 9.917512423446226e-06, "loss": 0.2252, "step": 1367 }, { "epoch": 0.45683753548171646, "grad_norm": 0.5376789383435711, "learning_rate": 9.917160513492619e-06, "loss": 0.2326, "step": 1368 }, { "epoch": 0.45717148104858907, "grad_norm": 0.5160005226617597, "learning_rate": 9.916807860744017e-06, "loss": 0.2458, "step": 1369 }, { "epoch": 0.4575054266154617, "grad_norm": 0.5243392738518032, "learning_rate": 9.916454465253695e-06, "loss": 0.2208, "step": 1370 }, { "epoch": 0.4578393721823343, "grad_norm": 0.5454097822750845, "learning_rate": 9.916100327075038e-06, "loss": 0.2388, "step": 1371 }, { "epoch": 0.4581733177492069, "grad_norm": 0.5311013984757362, "learning_rate": 9.91574544626154e-06, "loss": 0.2374, "step": 1372 }, { "epoch": 0.4585072633160795, "grad_norm": 0.5081063384150541, "learning_rate": 9.915389822866811e-06, "loss": 0.2221, "step": 1373 }, { "epoch": 0.4588412088829521, "grad_norm": 0.529213615144114, "learning_rate": 9.915033456944572e-06, "loss": 0.2264, "step": 1374 }, { "epoch": 0.45917515444982465, "grad_norm": 0.5216114427240847, "learning_rate": 9.914676348548658e-06, "loss": 0.2343, "step": 1375 }, { "epoch": 0.45950910001669726, "grad_norm": 0.5098198767095327, "learning_rate": 9.914318497733013e-06, "loss": 0.2199, "step": 1376 }, { "epoch": 0.45984304558356986, "grad_norm": 0.5772594585549337, "learning_rate": 9.913959904551695e-06, "loss": 0.2444, "step": 1377 }, { "epoch": 0.46017699115044247, "grad_norm": 0.5692684244936813, "learning_rate": 9.913600569058871e-06, "loss": 0.2325, "step": 1378 }, { "epoch": 0.4605109367173151, "grad_norm": 0.5528985886777825, "learning_rate": 9.913240491308828e-06, "loss": 0.2268, "step": 1379 }, { "epoch": 0.4608448822841877, "grad_norm": 0.5417272692609387, "learning_rate": 9.912879671355956e-06, "loss": 0.2171, "step": 1380 }, { "epoch": 0.4611788278510603, "grad_norm": 0.5302491893848811, "learning_rate": 9.912518109254763e-06, "loss": 0.2215, "step": 1381 }, { "epoch": 0.4615127734179329, "grad_norm": 0.5350805468842509, "learning_rate": 9.912155805059866e-06, "loss": 0.2351, "step": 1382 }, { "epoch": 0.4618467189848055, "grad_norm": 0.5606286743790297, "learning_rate": 9.911792758825996e-06, "loss": 0.2267, "step": 1383 }, { "epoch": 0.46218066455167806, "grad_norm": 0.5599706835878967, "learning_rate": 9.911428970607995e-06, "loss": 0.2361, "step": 1384 }, { "epoch": 0.46251461011855066, "grad_norm": 0.615424046565781, "learning_rate": 9.911064440460818e-06, "loss": 0.2298, "step": 1385 }, { "epoch": 0.46284855568542327, "grad_norm": 0.5649976414245927, "learning_rate": 9.91069916843953e-06, "loss": 0.232, "step": 1386 }, { "epoch": 0.4631825012522959, "grad_norm": 0.5508577983404023, "learning_rate": 9.910333154599314e-06, "loss": 0.2436, "step": 1387 }, { "epoch": 0.4635164468191685, "grad_norm": 0.5205854960323886, "learning_rate": 9.909966398995456e-06, "loss": 0.2362, "step": 1388 }, { "epoch": 0.4638503923860411, "grad_norm": 0.5757688708297488, "learning_rate": 9.909598901683361e-06, "loss": 0.2491, "step": 1389 }, { "epoch": 0.4641843379529137, "grad_norm": 0.5869979821978962, "learning_rate": 9.909230662718543e-06, "loss": 0.2324, "step": 1390 }, { "epoch": 0.4645182835197863, "grad_norm": 0.5625640594323424, "learning_rate": 9.908861682156628e-06, "loss": 0.236, "step": 1391 }, { "epoch": 0.46485222908665885, "grad_norm": 0.7152995547677362, "learning_rate": 9.908491960053357e-06, "loss": 0.2418, "step": 1392 }, { "epoch": 0.46518617465353146, "grad_norm": 0.4895348021939775, "learning_rate": 9.90812149646458e-06, "loss": 0.2246, "step": 1393 }, { "epoch": 0.46552012022040407, "grad_norm": 0.5909081784606736, "learning_rate": 9.907750291446258e-06, "loss": 0.2381, "step": 1394 }, { "epoch": 0.46585406578727667, "grad_norm": 0.5724895641003475, "learning_rate": 9.907378345054471e-06, "loss": 0.2385, "step": 1395 }, { "epoch": 0.4661880113541493, "grad_norm": 0.4977242293200873, "learning_rate": 9.9070056573454e-06, "loss": 0.2221, "step": 1396 }, { "epoch": 0.4665219569210219, "grad_norm": 0.7104851590746769, "learning_rate": 9.906632228375346e-06, "loss": 0.2439, "step": 1397 }, { "epoch": 0.4668559024878945, "grad_norm": 0.5366132433315185, "learning_rate": 9.906258058200722e-06, "loss": 0.2334, "step": 1398 }, { "epoch": 0.4671898480547671, "grad_norm": 0.603863208324995, "learning_rate": 9.905883146878049e-06, "loss": 0.2469, "step": 1399 }, { "epoch": 0.46752379362163965, "grad_norm": 0.6712903315298903, "learning_rate": 9.90550749446396e-06, "loss": 0.2525, "step": 1400 }, { "epoch": 0.46785773918851226, "grad_norm": 0.5599933585459058, "learning_rate": 9.905131101015204e-06, "loss": 0.2298, "step": 1401 }, { "epoch": 0.46819168475538486, "grad_norm": 0.6742593776670335, "learning_rate": 9.904753966588638e-06, "loss": 0.2249, "step": 1402 }, { "epoch": 0.46852563032225747, "grad_norm": 0.5966659500589226, "learning_rate": 9.904376091241236e-06, "loss": 0.2406, "step": 1403 }, { "epoch": 0.4688595758891301, "grad_norm": 0.5086157470594107, "learning_rate": 9.903997475030077e-06, "loss": 0.2239, "step": 1404 }, { "epoch": 0.4691935214560027, "grad_norm": 0.5688250377303993, "learning_rate": 9.903618118012358e-06, "loss": 0.2499, "step": 1405 }, { "epoch": 0.4695274670228753, "grad_norm": 0.5001870005825596, "learning_rate": 9.903238020245383e-06, "loss": 0.228, "step": 1406 }, { "epoch": 0.4698614125897479, "grad_norm": 0.6111840293711042, "learning_rate": 9.902857181786571e-06, "loss": 0.2388, "step": 1407 }, { "epoch": 0.47019535815662045, "grad_norm": 0.620871773232944, "learning_rate": 9.902475602693451e-06, "loss": 0.2176, "step": 1408 }, { "epoch": 0.47052930372349305, "grad_norm": 0.4920698441065501, "learning_rate": 9.90209328302367e-06, "loss": 0.2375, "step": 1409 }, { "epoch": 0.47086324929036566, "grad_norm": 0.5528573354567796, "learning_rate": 9.901710222834976e-06, "loss": 0.2424, "step": 1410 }, { "epoch": 0.47119719485723827, "grad_norm": 0.5208706668107904, "learning_rate": 9.901326422185238e-06, "loss": 0.2298, "step": 1411 }, { "epoch": 0.4715311404241109, "grad_norm": 0.5579864808318521, "learning_rate": 9.900941881132431e-06, "loss": 0.2303, "step": 1412 }, { "epoch": 0.4718650859909835, "grad_norm": 0.7348092396729472, "learning_rate": 9.900556599734647e-06, "loss": 0.2589, "step": 1413 }, { "epoch": 0.4721990315578561, "grad_norm": 0.5613570516628159, "learning_rate": 9.900170578050088e-06, "loss": 0.2541, "step": 1414 }, { "epoch": 0.4725329771247287, "grad_norm": 0.5573029926646937, "learning_rate": 9.899783816137065e-06, "loss": 0.2491, "step": 1415 }, { "epoch": 0.47286692269160124, "grad_norm": 0.5737576301390739, "learning_rate": 9.899396314054002e-06, "loss": 0.2165, "step": 1416 }, { "epoch": 0.47320086825847385, "grad_norm": 0.49834479477003774, "learning_rate": 9.89900807185944e-06, "loss": 0.2188, "step": 1417 }, { "epoch": 0.47353481382534646, "grad_norm": 0.5684392075108803, "learning_rate": 9.89861908961202e-06, "loss": 0.2361, "step": 1418 }, { "epoch": 0.47386875939221906, "grad_norm": 0.5847270713464249, "learning_rate": 9.89822936737051e-06, "loss": 0.2402, "step": 1419 }, { "epoch": 0.47420270495909167, "grad_norm": 0.5831201748397754, "learning_rate": 9.897838905193781e-06, "loss": 0.2264, "step": 1420 }, { "epoch": 0.4745366505259643, "grad_norm": 0.6718561947154861, "learning_rate": 9.897447703140813e-06, "loss": 0.2483, "step": 1421 }, { "epoch": 0.4748705960928369, "grad_norm": 0.5212579182437543, "learning_rate": 9.897055761270705e-06, "loss": 0.2264, "step": 1422 }, { "epoch": 0.4752045416597095, "grad_norm": 0.5544409675912124, "learning_rate": 9.896663079642663e-06, "loss": 0.2447, "step": 1423 }, { "epoch": 0.47553848722658204, "grad_norm": 0.5946504988614014, "learning_rate": 9.896269658316006e-06, "loss": 0.2322, "step": 1424 }, { "epoch": 0.47587243279345465, "grad_norm": 0.5325243574651551, "learning_rate": 9.895875497350165e-06, "loss": 0.2138, "step": 1425 }, { "epoch": 0.47620637836032725, "grad_norm": 0.7211936693494748, "learning_rate": 9.895480596804684e-06, "loss": 0.2187, "step": 1426 }, { "epoch": 0.47654032392719986, "grad_norm": 0.5542697823075144, "learning_rate": 9.895084956739215e-06, "loss": 0.2251, "step": 1427 }, { "epoch": 0.47687426949407247, "grad_norm": 0.5701293308719834, "learning_rate": 9.894688577213527e-06, "loss": 0.2416, "step": 1428 }, { "epoch": 0.4772082150609451, "grad_norm": 0.5347630793631123, "learning_rate": 9.894291458287496e-06, "loss": 0.2292, "step": 1429 }, { "epoch": 0.4775421606278177, "grad_norm": 0.5973098372961372, "learning_rate": 9.893893600021112e-06, "loss": 0.2455, "step": 1430 }, { "epoch": 0.4778761061946903, "grad_norm": 0.5641760379191003, "learning_rate": 9.893495002474475e-06, "loss": 0.231, "step": 1431 }, { "epoch": 0.47821005176156284, "grad_norm": 0.5541774904431316, "learning_rate": 9.893095665707801e-06, "loss": 0.2501, "step": 1432 }, { "epoch": 0.47854399732843544, "grad_norm": 0.5136825487190733, "learning_rate": 9.89269558978141e-06, "loss": 0.2241, "step": 1433 }, { "epoch": 0.47887794289530805, "grad_norm": 0.6526585039569316, "learning_rate": 9.892294774755741e-06, "loss": 0.25, "step": 1434 }, { "epoch": 0.47921188846218066, "grad_norm": 0.5924925924061507, "learning_rate": 9.891893220691343e-06, "loss": 0.245, "step": 1435 }, { "epoch": 0.47954583402905326, "grad_norm": 0.605221439278321, "learning_rate": 9.891490927648872e-06, "loss": 0.2365, "step": 1436 }, { "epoch": 0.47987977959592587, "grad_norm": 0.5890164596674092, "learning_rate": 9.891087895689102e-06, "loss": 0.2298, "step": 1437 }, { "epoch": 0.4802137251627985, "grad_norm": 0.5321381950410381, "learning_rate": 9.890684124872914e-06, "loss": 0.2202, "step": 1438 }, { "epoch": 0.4805476707296711, "grad_norm": 0.7746352115111984, "learning_rate": 9.890279615261302e-06, "loss": 0.2241, "step": 1439 }, { "epoch": 0.4808816162965437, "grad_norm": 0.5854514402548051, "learning_rate": 9.889874366915374e-06, "loss": 0.2265, "step": 1440 }, { "epoch": 0.48121556186341624, "grad_norm": 0.5842152055041235, "learning_rate": 9.889468379896347e-06, "loss": 0.2398, "step": 1441 }, { "epoch": 0.48154950743028885, "grad_norm": 0.5692166615610584, "learning_rate": 9.88906165426555e-06, "loss": 0.248, "step": 1442 }, { "epoch": 0.48188345299716145, "grad_norm": 0.459765580131553, "learning_rate": 9.888654190084422e-06, "loss": 0.2071, "step": 1443 }, { "epoch": 0.48221739856403406, "grad_norm": 0.6471093852235853, "learning_rate": 9.888245987414517e-06, "loss": 0.2368, "step": 1444 }, { "epoch": 0.48255134413090667, "grad_norm": 0.5765295704173624, "learning_rate": 9.8878370463175e-06, "loss": 0.238, "step": 1445 }, { "epoch": 0.4828852896977793, "grad_norm": 0.5371085420371692, "learning_rate": 9.887427366855142e-06, "loss": 0.2234, "step": 1446 }, { "epoch": 0.4832192352646519, "grad_norm": 0.49845898630714686, "learning_rate": 9.887016949089334e-06, "loss": 0.2377, "step": 1447 }, { "epoch": 0.4835531808315245, "grad_norm": 0.47885124555831643, "learning_rate": 9.886605793082073e-06, "loss": 0.2262, "step": 1448 }, { "epoch": 0.48388712639839704, "grad_norm": 0.5622417184126103, "learning_rate": 9.886193898895468e-06, "loss": 0.2388, "step": 1449 }, { "epoch": 0.48422107196526964, "grad_norm": 0.6126625007922746, "learning_rate": 9.885781266591742e-06, "loss": 0.2374, "step": 1450 }, { "epoch": 0.48455501753214225, "grad_norm": 0.5754346810486999, "learning_rate": 9.885367896233229e-06, "loss": 0.2495, "step": 1451 }, { "epoch": 0.48488896309901486, "grad_norm": 0.5679227654667943, "learning_rate": 9.88495378788237e-06, "loss": 0.232, "step": 1452 }, { "epoch": 0.48522290866588746, "grad_norm": 0.523471620727347, "learning_rate": 9.884538941601725e-06, "loss": 0.2433, "step": 1453 }, { "epoch": 0.48555685423276007, "grad_norm": 0.527665998819472, "learning_rate": 9.884123357453959e-06, "loss": 0.2264, "step": 1454 }, { "epoch": 0.4858907997996327, "grad_norm": 0.507908698813618, "learning_rate": 9.883707035501849e-06, "loss": 0.2211, "step": 1455 }, { "epoch": 0.4862247453665053, "grad_norm": 0.5790040972393914, "learning_rate": 9.883289975808288e-06, "loss": 0.2475, "step": 1456 }, { "epoch": 0.48655869093337784, "grad_norm": 0.6259861327751014, "learning_rate": 9.882872178436277e-06, "loss": 0.2122, "step": 1457 }, { "epoch": 0.48689263650025044, "grad_norm": 0.5921509956180295, "learning_rate": 9.882453643448933e-06, "loss": 0.2431, "step": 1458 }, { "epoch": 0.48722658206712305, "grad_norm": 0.6237750475844797, "learning_rate": 9.882034370909474e-06, "loss": 0.2505, "step": 1459 }, { "epoch": 0.48756052763399566, "grad_norm": 0.5668101845599637, "learning_rate": 9.88161436088124e-06, "loss": 0.2474, "step": 1460 }, { "epoch": 0.48789447320086826, "grad_norm": 0.5512787179899852, "learning_rate": 9.881193613427676e-06, "loss": 0.2422, "step": 1461 }, { "epoch": 0.48822841876774087, "grad_norm": 0.5569486602988732, "learning_rate": 9.880772128612345e-06, "loss": 0.2286, "step": 1462 }, { "epoch": 0.4885623643346135, "grad_norm": 0.666951567922003, "learning_rate": 9.880349906498914e-06, "loss": 0.2386, "step": 1463 }, { "epoch": 0.4888963099014861, "grad_norm": 0.6330185692498038, "learning_rate": 9.879926947151164e-06, "loss": 0.2461, "step": 1464 }, { "epoch": 0.48923025546835863, "grad_norm": 0.5316986356744938, "learning_rate": 9.879503250632991e-06, "loss": 0.2424, "step": 1465 }, { "epoch": 0.48956420103523124, "grad_norm": 0.8086554466946749, "learning_rate": 9.879078817008395e-06, "loss": 0.2381, "step": 1466 }, { "epoch": 0.48989814660210385, "grad_norm": 0.6065660752029407, "learning_rate": 9.878653646341498e-06, "loss": 0.2331, "step": 1467 }, { "epoch": 0.49023209216897645, "grad_norm": 0.557985159542511, "learning_rate": 9.878227738696522e-06, "loss": 0.2237, "step": 1468 }, { "epoch": 0.49056603773584906, "grad_norm": 0.48904785077843377, "learning_rate": 9.877801094137807e-06, "loss": 0.2117, "step": 1469 }, { "epoch": 0.49089998330272167, "grad_norm": 0.5750681520541658, "learning_rate": 9.877373712729803e-06, "loss": 0.2321, "step": 1470 }, { "epoch": 0.49123392886959427, "grad_norm": 0.6187608854168688, "learning_rate": 9.876945594537069e-06, "loss": 0.2323, "step": 1471 }, { "epoch": 0.4915678744364669, "grad_norm": 0.5914515580006867, "learning_rate": 9.876516739624279e-06, "loss": 0.2343, "step": 1472 }, { "epoch": 0.49190182000333943, "grad_norm": 0.6132148683275411, "learning_rate": 9.876087148056217e-06, "loss": 0.2206, "step": 1473 }, { "epoch": 0.49223576557021204, "grad_norm": 0.5779312466388966, "learning_rate": 9.875656819897776e-06, "loss": 0.2217, "step": 1474 }, { "epoch": 0.49256971113708464, "grad_norm": 0.5934721585739572, "learning_rate": 9.875225755213966e-06, "loss": 0.2242, "step": 1475 }, { "epoch": 0.49290365670395725, "grad_norm": 0.6090421834628303, "learning_rate": 9.874793954069899e-06, "loss": 0.2488, "step": 1476 }, { "epoch": 0.49323760227082986, "grad_norm": 0.5063010044760622, "learning_rate": 9.874361416530808e-06, "loss": 0.2394, "step": 1477 }, { "epoch": 0.49357154783770246, "grad_norm": 0.5439710083823438, "learning_rate": 9.873928142662031e-06, "loss": 0.2375, "step": 1478 }, { "epoch": 0.49390549340457507, "grad_norm": 0.48631313293144357, "learning_rate": 9.873494132529018e-06, "loss": 0.2314, "step": 1479 }, { "epoch": 0.4942394389714477, "grad_norm": 0.6393500940969762, "learning_rate": 9.873059386197335e-06, "loss": 0.2618, "step": 1480 }, { "epoch": 0.4945733845383202, "grad_norm": 0.5827650304612245, "learning_rate": 9.872623903732652e-06, "loss": 0.2174, "step": 1481 }, { "epoch": 0.49490733010519283, "grad_norm": 0.48147858509311164, "learning_rate": 9.872187685200756e-06, "loss": 0.2177, "step": 1482 }, { "epoch": 0.49524127567206544, "grad_norm": 0.5515351930407174, "learning_rate": 9.87175073066754e-06, "loss": 0.24, "step": 1483 }, { "epoch": 0.49557522123893805, "grad_norm": 0.565292330492309, "learning_rate": 9.871313040199015e-06, "loss": 0.2315, "step": 1484 }, { "epoch": 0.49590916680581065, "grad_norm": 0.5179304120767915, "learning_rate": 9.870874613861297e-06, "loss": 0.2157, "step": 1485 }, { "epoch": 0.49624311237268326, "grad_norm": 0.5509004443246928, "learning_rate": 9.870435451720614e-06, "loss": 0.2276, "step": 1486 }, { "epoch": 0.49657705793955587, "grad_norm": 0.4892998005954115, "learning_rate": 9.869995553843313e-06, "loss": 0.2128, "step": 1487 }, { "epoch": 0.4969110035064285, "grad_norm": 0.5037397253995877, "learning_rate": 9.869554920295836e-06, "loss": 0.2251, "step": 1488 }, { "epoch": 0.4972449490733011, "grad_norm": 0.5227998261948305, "learning_rate": 9.869113551144754e-06, "loss": 0.2175, "step": 1489 }, { "epoch": 0.49757889464017363, "grad_norm": 0.5995114670501726, "learning_rate": 9.86867144645674e-06, "loss": 0.2606, "step": 1490 }, { "epoch": 0.49791284020704624, "grad_norm": 0.4795393090914556, "learning_rate": 9.868228606298574e-06, "loss": 0.2269, "step": 1491 }, { "epoch": 0.49824678577391884, "grad_norm": 0.5497097986938829, "learning_rate": 9.867785030737157e-06, "loss": 0.2422, "step": 1492 }, { "epoch": 0.49858073134079145, "grad_norm": 0.5503594416571509, "learning_rate": 9.867340719839494e-06, "loss": 0.241, "step": 1493 }, { "epoch": 0.49891467690766406, "grad_norm": 0.7026689122560772, "learning_rate": 9.866895673672704e-06, "loss": 0.2523, "step": 1494 }, { "epoch": 0.49924862247453666, "grad_norm": 0.5290404576726735, "learning_rate": 9.866449892304017e-06, "loss": 0.2233, "step": 1495 }, { "epoch": 0.49958256804140927, "grad_norm": 0.4789461584020222, "learning_rate": 9.866003375800773e-06, "loss": 0.2275, "step": 1496 }, { "epoch": 0.4999165136082819, "grad_norm": 0.5496686648273017, "learning_rate": 9.865556124230425e-06, "loss": 0.2269, "step": 1497 }, { "epoch": 0.5002504591751544, "grad_norm": 0.5178066142061346, "learning_rate": 9.865108137660533e-06, "loss": 0.2352, "step": 1498 }, { "epoch": 0.500584404742027, "grad_norm": 0.5198841589298615, "learning_rate": 9.864659416158773e-06, "loss": 0.2312, "step": 1499 }, { "epoch": 0.5009183503088996, "grad_norm": 0.5388111513331779, "learning_rate": 9.864209959792927e-06, "loss": 0.2384, "step": 1500 }, { "epoch": 0.5012522958757722, "grad_norm": 0.6553768022329063, "learning_rate": 9.863759768630893e-06, "loss": 0.2506, "step": 1501 }, { "epoch": 0.5015862414426449, "grad_norm": 0.5610454814274192, "learning_rate": 9.863308842740678e-06, "loss": 0.2247, "step": 1502 }, { "epoch": 0.5019201870095175, "grad_norm": 0.5502060297653726, "learning_rate": 9.862857182190398e-06, "loss": 0.2342, "step": 1503 }, { "epoch": 0.5022541325763901, "grad_norm": 0.4949388903158392, "learning_rate": 9.862404787048283e-06, "loss": 0.2201, "step": 1504 }, { "epoch": 0.5025880781432627, "grad_norm": 0.5240359024269955, "learning_rate": 9.861951657382671e-06, "loss": 0.2426, "step": 1505 }, { "epoch": 0.5029220237101353, "grad_norm": 0.5132026830298406, "learning_rate": 9.861497793262014e-06, "loss": 0.2245, "step": 1506 }, { "epoch": 0.5032559692770079, "grad_norm": 0.5198774578302745, "learning_rate": 9.861043194754874e-06, "loss": 0.2403, "step": 1507 }, { "epoch": 0.5035899148438805, "grad_norm": 0.5039524096753569, "learning_rate": 9.860587861929922e-06, "loss": 0.2159, "step": 1508 }, { "epoch": 0.5039238604107531, "grad_norm": 0.5448793630792352, "learning_rate": 9.86013179485594e-06, "loss": 0.2316, "step": 1509 }, { "epoch": 0.5042578059776256, "grad_norm": 0.48213974652147173, "learning_rate": 9.859674993601826e-06, "loss": 0.2242, "step": 1510 }, { "epoch": 0.5045917515444982, "grad_norm": 0.590315746105308, "learning_rate": 9.859217458236583e-06, "loss": 0.2384, "step": 1511 }, { "epoch": 0.5049256971113708, "grad_norm": 0.5104158501610313, "learning_rate": 9.858759188829328e-06, "loss": 0.2426, "step": 1512 }, { "epoch": 0.5052596426782434, "grad_norm": 0.582674382044229, "learning_rate": 9.858300185449287e-06, "loss": 0.2401, "step": 1513 }, { "epoch": 0.505593588245116, "grad_norm": 0.5203261108265982, "learning_rate": 9.857840448165798e-06, "loss": 0.2391, "step": 1514 }, { "epoch": 0.5059275338119886, "grad_norm": 0.48683141373358735, "learning_rate": 9.857379977048311e-06, "loss": 0.2185, "step": 1515 }, { "epoch": 0.5062614793788612, "grad_norm": 0.5561097986159383, "learning_rate": 9.856918772166385e-06, "loss": 0.2403, "step": 1516 }, { "epoch": 0.5065954249457338, "grad_norm": 0.6235558405935004, "learning_rate": 9.856456833589688e-06, "loss": 0.2183, "step": 1517 }, { "epoch": 0.5069293705126064, "grad_norm": 0.6446845091009377, "learning_rate": 9.855994161388005e-06, "loss": 0.2333, "step": 1518 }, { "epoch": 0.507263316079479, "grad_norm": 0.5687674110037545, "learning_rate": 9.855530755631226e-06, "loss": 0.2428, "step": 1519 }, { "epoch": 0.5075972616463517, "grad_norm": 0.5587656168664682, "learning_rate": 9.855066616389356e-06, "loss": 0.2369, "step": 1520 }, { "epoch": 0.5079312072132243, "grad_norm": 0.7359238587828697, "learning_rate": 9.854601743732504e-06, "loss": 0.2317, "step": 1521 }, { "epoch": 0.5082651527800969, "grad_norm": 0.5066229049650518, "learning_rate": 9.854136137730899e-06, "loss": 0.2337, "step": 1522 }, { "epoch": 0.5085990983469695, "grad_norm": 0.49537075038118694, "learning_rate": 9.853669798454875e-06, "loss": 0.2243, "step": 1523 }, { "epoch": 0.5089330439138421, "grad_norm": 0.6022772076474969, "learning_rate": 9.853202725974878e-06, "loss": 0.2415, "step": 1524 }, { "epoch": 0.5092669894807147, "grad_norm": 0.608749379967523, "learning_rate": 9.852734920361465e-06, "loss": 0.2345, "step": 1525 }, { "epoch": 0.5096009350475872, "grad_norm": 0.6715781075920919, "learning_rate": 9.8522663816853e-06, "loss": 0.2641, "step": 1526 }, { "epoch": 0.5099348806144598, "grad_norm": 0.5101715269421332, "learning_rate": 9.851797110017167e-06, "loss": 0.2196, "step": 1527 }, { "epoch": 0.5102688261813324, "grad_norm": 0.511537007454956, "learning_rate": 9.851327105427952e-06, "loss": 0.2133, "step": 1528 }, { "epoch": 0.510602771748205, "grad_norm": 0.5178737587676562, "learning_rate": 9.850856367988657e-06, "loss": 0.2185, "step": 1529 }, { "epoch": 0.5109367173150776, "grad_norm": 0.5942221037087895, "learning_rate": 9.850384897770388e-06, "loss": 0.245, "step": 1530 }, { "epoch": 0.5112706628819502, "grad_norm": 0.5444327388738013, "learning_rate": 9.84991269484437e-06, "loss": 0.2412, "step": 1531 }, { "epoch": 0.5116046084488228, "grad_norm": 0.5352070717510221, "learning_rate": 9.849439759281934e-06, "loss": 0.2304, "step": 1532 }, { "epoch": 0.5119385540156954, "grad_norm": 0.5602568244179051, "learning_rate": 9.848966091154522e-06, "loss": 0.238, "step": 1533 }, { "epoch": 0.512272499582568, "grad_norm": 0.5211858362853864, "learning_rate": 9.848491690533686e-06, "loss": 0.2376, "step": 1534 }, { "epoch": 0.5126064451494406, "grad_norm": 0.5783858287499946, "learning_rate": 9.848016557491092e-06, "loss": 0.2582, "step": 1535 }, { "epoch": 0.5129403907163133, "grad_norm": 0.5915548412171439, "learning_rate": 9.847540692098513e-06, "loss": 0.2462, "step": 1536 }, { "epoch": 0.5132743362831859, "grad_norm": 0.5380120667594149, "learning_rate": 9.847064094427835e-06, "loss": 0.244, "step": 1537 }, { "epoch": 0.5136082818500585, "grad_norm": 0.5931387539258556, "learning_rate": 9.846586764551054e-06, "loss": 0.241, "step": 1538 }, { "epoch": 0.5139422274169311, "grad_norm": 0.6732547388174122, "learning_rate": 9.846108702540274e-06, "loss": 0.2453, "step": 1539 }, { "epoch": 0.5142761729838037, "grad_norm": 0.5708516751959783, "learning_rate": 9.845629908467714e-06, "loss": 0.2253, "step": 1540 }, { "epoch": 0.5146101185506763, "grad_norm": 0.5964930644824759, "learning_rate": 9.8451503824057e-06, "loss": 0.205, "step": 1541 }, { "epoch": 0.5149440641175489, "grad_norm": 0.5433778021010894, "learning_rate": 9.844670124426672e-06, "loss": 0.2151, "step": 1542 }, { "epoch": 0.5152780096844214, "grad_norm": 0.5567634491740848, "learning_rate": 9.844189134603178e-06, "loss": 0.2154, "step": 1543 }, { "epoch": 0.515611955251294, "grad_norm": 0.4903197026217307, "learning_rate": 9.843707413007874e-06, "loss": 0.2137, "step": 1544 }, { "epoch": 0.5159459008181666, "grad_norm": 0.5960697433572092, "learning_rate": 9.843224959713535e-06, "loss": 0.2527, "step": 1545 }, { "epoch": 0.5162798463850392, "grad_norm": 0.4944143034523633, "learning_rate": 9.842741774793038e-06, "loss": 0.2307, "step": 1546 }, { "epoch": 0.5166137919519118, "grad_norm": 0.49358314195918296, "learning_rate": 9.842257858319375e-06, "loss": 0.2179, "step": 1547 }, { "epoch": 0.5169477375187844, "grad_norm": 0.5796108269031679, "learning_rate": 9.841773210365646e-06, "loss": 0.2519, "step": 1548 }, { "epoch": 0.517281683085657, "grad_norm": 0.5138397804293885, "learning_rate": 9.841287831005064e-06, "loss": 0.2305, "step": 1549 }, { "epoch": 0.5176156286525296, "grad_norm": 0.5250071600094292, "learning_rate": 9.84080172031095e-06, "loss": 0.2311, "step": 1550 }, { "epoch": 0.5179495742194022, "grad_norm": 0.5574764925023964, "learning_rate": 9.840314878356739e-06, "loss": 0.2323, "step": 1551 }, { "epoch": 0.5182835197862748, "grad_norm": 0.6140408300146574, "learning_rate": 9.839827305215972e-06, "loss": 0.2533, "step": 1552 }, { "epoch": 0.5186174653531475, "grad_norm": 0.7516633182539431, "learning_rate": 9.839339000962305e-06, "loss": 0.226, "step": 1553 }, { "epoch": 0.5189514109200201, "grad_norm": 0.5458694547214934, "learning_rate": 9.838849965669499e-06, "loss": 0.2325, "step": 1554 }, { "epoch": 0.5192853564868927, "grad_norm": 0.5064986538497875, "learning_rate": 9.83836019941143e-06, "loss": 0.2114, "step": 1555 }, { "epoch": 0.5196193020537653, "grad_norm": 0.61319519408008, "learning_rate": 9.837869702262082e-06, "loss": 0.2473, "step": 1556 }, { "epoch": 0.5199532476206379, "grad_norm": 0.5565828724070242, "learning_rate": 9.837378474295553e-06, "loss": 0.2332, "step": 1557 }, { "epoch": 0.5202871931875105, "grad_norm": 0.5064088876577069, "learning_rate": 9.836886515586045e-06, "loss": 0.217, "step": 1558 }, { "epoch": 0.520621138754383, "grad_norm": 0.536537012097158, "learning_rate": 9.83639382620788e-06, "loss": 0.2276, "step": 1559 }, { "epoch": 0.5209550843212556, "grad_norm": 0.518315034919932, "learning_rate": 9.835900406235479e-06, "loss": 0.2247, "step": 1560 }, { "epoch": 0.5212890298881282, "grad_norm": 0.5401437965180717, "learning_rate": 9.835406255743381e-06, "loss": 0.229, "step": 1561 }, { "epoch": 0.5216229754550008, "grad_norm": 0.5105619541123207, "learning_rate": 9.834911374806231e-06, "loss": 0.2335, "step": 1562 }, { "epoch": 0.5219569210218734, "grad_norm": 0.519164367586733, "learning_rate": 9.83441576349879e-06, "loss": 0.2251, "step": 1563 }, { "epoch": 0.522290866588746, "grad_norm": 0.569853562629724, "learning_rate": 9.833919421895926e-06, "loss": 0.2537, "step": 1564 }, { "epoch": 0.5226248121556186, "grad_norm": 0.516625707086611, "learning_rate": 9.833422350072615e-06, "loss": 0.2161, "step": 1565 }, { "epoch": 0.5229587577224912, "grad_norm": 0.6535880193463475, "learning_rate": 9.832924548103945e-06, "loss": 0.209, "step": 1566 }, { "epoch": 0.5232927032893638, "grad_norm": 0.4916515442829188, "learning_rate": 9.832426016065117e-06, "loss": 0.2185, "step": 1567 }, { "epoch": 0.5236266488562364, "grad_norm": 0.5814664612841597, "learning_rate": 9.83192675403144e-06, "loss": 0.2266, "step": 1568 }, { "epoch": 0.523960594423109, "grad_norm": 0.5884057167346974, "learning_rate": 9.831426762078331e-06, "loss": 0.2244, "step": 1569 }, { "epoch": 0.5242945399899817, "grad_norm": 0.6183375261868602, "learning_rate": 9.830926040281321e-06, "loss": 0.2445, "step": 1570 }, { "epoch": 0.5246284855568543, "grad_norm": 0.5200985380140108, "learning_rate": 9.830424588716053e-06, "loss": 0.2248, "step": 1571 }, { "epoch": 0.5249624311237269, "grad_norm": 0.5420915104887184, "learning_rate": 9.829922407458273e-06, "loss": 0.2327, "step": 1572 }, { "epoch": 0.5252963766905995, "grad_norm": 0.5270598323458139, "learning_rate": 9.829419496583843e-06, "loss": 0.2256, "step": 1573 }, { "epoch": 0.5256303222574721, "grad_norm": 0.5239074446729693, "learning_rate": 9.828915856168734e-06, "loss": 0.2284, "step": 1574 }, { "epoch": 0.5259642678243446, "grad_norm": 0.5489528638296536, "learning_rate": 9.828411486289026e-06, "loss": 0.2346, "step": 1575 }, { "epoch": 0.5262982133912172, "grad_norm": 0.5306622558740911, "learning_rate": 9.82790638702091e-06, "loss": 0.2334, "step": 1576 }, { "epoch": 0.5266321589580898, "grad_norm": 0.5020797546266, "learning_rate": 9.827400558440687e-06, "loss": 0.2193, "step": 1577 }, { "epoch": 0.5269661045249624, "grad_norm": 0.4958858002471795, "learning_rate": 9.826894000624769e-06, "loss": 0.2214, "step": 1578 }, { "epoch": 0.527300050091835, "grad_norm": 0.5495730199464439, "learning_rate": 9.826386713649678e-06, "loss": 0.2391, "step": 1579 }, { "epoch": 0.5276339956587076, "grad_norm": 0.5337213938676789, "learning_rate": 9.825878697592046e-06, "loss": 0.241, "step": 1580 }, { "epoch": 0.5279679412255802, "grad_norm": 0.5243139931047547, "learning_rate": 9.825369952528611e-06, "loss": 0.2347, "step": 1581 }, { "epoch": 0.5283018867924528, "grad_norm": 0.46951090232168313, "learning_rate": 9.824860478536231e-06, "loss": 0.2174, "step": 1582 }, { "epoch": 0.5286358323593254, "grad_norm": 0.5591143330075115, "learning_rate": 9.824350275691864e-06, "loss": 0.2315, "step": 1583 }, { "epoch": 0.528969777926198, "grad_norm": 0.5741634648736227, "learning_rate": 9.823839344072582e-06, "loss": 0.2392, "step": 1584 }, { "epoch": 0.5293037234930706, "grad_norm": 0.5477207037243178, "learning_rate": 9.823327683755566e-06, "loss": 0.2479, "step": 1585 }, { "epoch": 0.5296376690599433, "grad_norm": 0.5814226357210571, "learning_rate": 9.822815294818113e-06, "loss": 0.2503, "step": 1586 }, { "epoch": 0.5299716146268159, "grad_norm": 0.5959177044049923, "learning_rate": 9.822302177337624e-06, "loss": 0.2414, "step": 1587 }, { "epoch": 0.5303055601936885, "grad_norm": 0.5034065372193987, "learning_rate": 9.821788331391609e-06, "loss": 0.226, "step": 1588 }, { "epoch": 0.5306395057605611, "grad_norm": 0.5444917509379016, "learning_rate": 9.821273757057692e-06, "loss": 0.2208, "step": 1589 }, { "epoch": 0.5309734513274337, "grad_norm": 0.6340433755540438, "learning_rate": 9.820758454413606e-06, "loss": 0.2341, "step": 1590 }, { "epoch": 0.5313073968943063, "grad_norm": 0.5068735932793473, "learning_rate": 9.820242423537192e-06, "loss": 0.2279, "step": 1591 }, { "epoch": 0.5316413424611788, "grad_norm": 0.5514835902750657, "learning_rate": 9.819725664506404e-06, "loss": 0.2397, "step": 1592 }, { "epoch": 0.5319752880280514, "grad_norm": 0.540033557370152, "learning_rate": 9.819208177399303e-06, "loss": 0.2239, "step": 1593 }, { "epoch": 0.532309233594924, "grad_norm": 0.592977095077727, "learning_rate": 9.818689962294063e-06, "loss": 0.2339, "step": 1594 }, { "epoch": 0.5326431791617966, "grad_norm": 0.5871207671435377, "learning_rate": 9.818171019268965e-06, "loss": 0.2293, "step": 1595 }, { "epoch": 0.5329771247286692, "grad_norm": 0.5947688900274528, "learning_rate": 9.817651348402403e-06, "loss": 0.2514, "step": 1596 }, { "epoch": 0.5333110702955418, "grad_norm": 0.5812926758957463, "learning_rate": 9.81713094977288e-06, "loss": 0.2248, "step": 1597 }, { "epoch": 0.5336450158624144, "grad_norm": 0.513970380059725, "learning_rate": 9.816609823459007e-06, "loss": 0.2375, "step": 1598 }, { "epoch": 0.533978961429287, "grad_norm": 0.5034512417703262, "learning_rate": 9.816087969539506e-06, "loss": 0.2287, "step": 1599 }, { "epoch": 0.5343129069961596, "grad_norm": 0.5550546878199349, "learning_rate": 9.815565388093209e-06, "loss": 0.2396, "step": 1600 }, { "epoch": 0.5346468525630322, "grad_norm": 0.5324993847600048, "learning_rate": 9.81504207919906e-06, "loss": 0.2223, "step": 1601 }, { "epoch": 0.5349807981299048, "grad_norm": 0.526275829556474, "learning_rate": 9.814518042936107e-06, "loss": 0.2431, "step": 1602 }, { "epoch": 0.5353147436967775, "grad_norm": 0.5494997861868455, "learning_rate": 9.813993279383518e-06, "loss": 0.2323, "step": 1603 }, { "epoch": 0.5356486892636501, "grad_norm": 0.5171193572022826, "learning_rate": 9.813467788620559e-06, "loss": 0.2416, "step": 1604 }, { "epoch": 0.5359826348305227, "grad_norm": 0.5027893595121139, "learning_rate": 9.812941570726615e-06, "loss": 0.218, "step": 1605 }, { "epoch": 0.5363165803973953, "grad_norm": 0.5506740906758446, "learning_rate": 9.812414625781175e-06, "loss": 0.2296, "step": 1606 }, { "epoch": 0.5366505259642679, "grad_norm": 0.5183511110941164, "learning_rate": 9.811886953863841e-06, "loss": 0.2082, "step": 1607 }, { "epoch": 0.5369844715311404, "grad_norm": 0.6302353066399081, "learning_rate": 9.811358555054326e-06, "loss": 0.2228, "step": 1608 }, { "epoch": 0.537318417098013, "grad_norm": 0.5341795817076737, "learning_rate": 9.810829429432449e-06, "loss": 0.2293, "step": 1609 }, { "epoch": 0.5376523626648856, "grad_norm": 0.5763059453482908, "learning_rate": 9.81029957707814e-06, "loss": 0.2558, "step": 1610 }, { "epoch": 0.5379863082317582, "grad_norm": 0.6041426482444534, "learning_rate": 9.809768998071442e-06, "loss": 0.2293, "step": 1611 }, { "epoch": 0.5383202537986308, "grad_norm": 0.5132151326478286, "learning_rate": 9.809237692492503e-06, "loss": 0.2185, "step": 1612 }, { "epoch": 0.5386541993655034, "grad_norm": 0.5391826703922983, "learning_rate": 9.808705660421582e-06, "loss": 0.2323, "step": 1613 }, { "epoch": 0.538988144932376, "grad_norm": 0.6461704927245718, "learning_rate": 9.808172901939053e-06, "loss": 0.2125, "step": 1614 }, { "epoch": 0.5393220904992486, "grad_norm": 0.5270375186381666, "learning_rate": 9.807639417125392e-06, "loss": 0.2176, "step": 1615 }, { "epoch": 0.5396560360661212, "grad_norm": 0.5445635894652113, "learning_rate": 9.807105206061186e-06, "loss": 0.2274, "step": 1616 }, { "epoch": 0.5399899816329938, "grad_norm": 0.5317828887600755, "learning_rate": 9.80657026882714e-06, "loss": 0.2369, "step": 1617 }, { "epoch": 0.5403239271998664, "grad_norm": 0.5686035453100612, "learning_rate": 9.80603460550406e-06, "loss": 0.2321, "step": 1618 }, { "epoch": 0.540657872766739, "grad_norm": 0.527823856857519, "learning_rate": 9.805498216172861e-06, "loss": 0.2326, "step": 1619 }, { "epoch": 0.5409918183336117, "grad_norm": 0.6946489211645083, "learning_rate": 9.804961100914575e-06, "loss": 0.2275, "step": 1620 }, { "epoch": 0.5413257639004843, "grad_norm": 0.5021269916482256, "learning_rate": 9.804423259810338e-06, "loss": 0.217, "step": 1621 }, { "epoch": 0.5416597094673569, "grad_norm": 0.5280955200768908, "learning_rate": 9.803884692941397e-06, "loss": 0.232, "step": 1622 }, { "epoch": 0.5419936550342295, "grad_norm": 0.524502353180037, "learning_rate": 9.803345400389111e-06, "loss": 0.2295, "step": 1623 }, { "epoch": 0.542327600601102, "grad_norm": 0.5588116006844829, "learning_rate": 9.802805382234941e-06, "loss": 0.2533, "step": 1624 }, { "epoch": 0.5426615461679746, "grad_norm": 0.500852907399858, "learning_rate": 9.80226463856047e-06, "loss": 0.2134, "step": 1625 }, { "epoch": 0.5429954917348472, "grad_norm": 0.6862541833794148, "learning_rate": 9.801723169447378e-06, "loss": 0.2506, "step": 1626 }, { "epoch": 0.5433294373017198, "grad_norm": 0.5040386542518999, "learning_rate": 9.801180974977466e-06, "loss": 0.227, "step": 1627 }, { "epoch": 0.5436633828685924, "grad_norm": 0.5474990228218318, "learning_rate": 9.800638055232635e-06, "loss": 0.2166, "step": 1628 }, { "epoch": 0.543997328435465, "grad_norm": 0.6388226847856388, "learning_rate": 9.800094410294897e-06, "loss": 0.2487, "step": 1629 }, { "epoch": 0.5443312740023376, "grad_norm": 0.4765248139449764, "learning_rate": 9.799550040246381e-06, "loss": 0.2129, "step": 1630 }, { "epoch": 0.5446652195692102, "grad_norm": 0.5019451317873559, "learning_rate": 9.799004945169319e-06, "loss": 0.2038, "step": 1631 }, { "epoch": 0.5449991651360828, "grad_norm": 0.48693504267132603, "learning_rate": 9.798459125146054e-06, "loss": 0.2379, "step": 1632 }, { "epoch": 0.5453331107029554, "grad_norm": 0.5011793654105599, "learning_rate": 9.797912580259037e-06, "loss": 0.2278, "step": 1633 }, { "epoch": 0.545667056269828, "grad_norm": 0.5627274706249237, "learning_rate": 9.797365310590832e-06, "loss": 0.2399, "step": 1634 }, { "epoch": 0.5460010018367006, "grad_norm": 0.5317296411359453, "learning_rate": 9.796817316224107e-06, "loss": 0.2313, "step": 1635 }, { "epoch": 0.5463349474035732, "grad_norm": 0.5256731560457311, "learning_rate": 9.79626859724165e-06, "loss": 0.2274, "step": 1636 }, { "epoch": 0.5466688929704459, "grad_norm": 0.5295939011926792, "learning_rate": 9.795719153726345e-06, "loss": 0.2306, "step": 1637 }, { "epoch": 0.5470028385373185, "grad_norm": 0.49722225435126244, "learning_rate": 9.795168985761192e-06, "loss": 0.2424, "step": 1638 }, { "epoch": 0.5473367841041911, "grad_norm": 0.4782717883596217, "learning_rate": 9.794618093429305e-06, "loss": 0.2337, "step": 1639 }, { "epoch": 0.5476707296710637, "grad_norm": 0.48131945296813267, "learning_rate": 9.794066476813901e-06, "loss": 0.2223, "step": 1640 }, { "epoch": 0.5480046752379362, "grad_norm": 0.5320451343577421, "learning_rate": 9.793514135998306e-06, "loss": 0.2194, "step": 1641 }, { "epoch": 0.5483386208048088, "grad_norm": 0.5662644769319405, "learning_rate": 9.792961071065958e-06, "loss": 0.2404, "step": 1642 }, { "epoch": 0.5486725663716814, "grad_norm": 0.4708050063434642, "learning_rate": 9.792407282100407e-06, "loss": 0.2125, "step": 1643 }, { "epoch": 0.549006511938554, "grad_norm": 0.5331092111658083, "learning_rate": 9.791852769185306e-06, "loss": 0.2317, "step": 1644 }, { "epoch": 0.5493404575054266, "grad_norm": 0.46285348701283613, "learning_rate": 9.791297532404422e-06, "loss": 0.2188, "step": 1645 }, { "epoch": 0.5496744030722992, "grad_norm": 0.6966380998201791, "learning_rate": 9.790741571841629e-06, "loss": 0.2434, "step": 1646 }, { "epoch": 0.5500083486391718, "grad_norm": 0.6705149653085449, "learning_rate": 9.790184887580914e-06, "loss": 0.2545, "step": 1647 }, { "epoch": 0.5503422942060444, "grad_norm": 0.5488413845730032, "learning_rate": 9.78962747970637e-06, "loss": 0.2192, "step": 1648 }, { "epoch": 0.550676239772917, "grad_norm": 0.508399250930862, "learning_rate": 9.789069348302197e-06, "loss": 0.2147, "step": 1649 }, { "epoch": 0.5510101853397896, "grad_norm": 0.5308431015969063, "learning_rate": 9.78851049345271e-06, "loss": 0.2451, "step": 1650 }, { "epoch": 0.5513441309066622, "grad_norm": 0.5254441033154762, "learning_rate": 9.78795091524233e-06, "loss": 0.2307, "step": 1651 }, { "epoch": 0.5516780764735348, "grad_norm": 0.49287285493077243, "learning_rate": 9.78739061375559e-06, "loss": 0.2279, "step": 1652 }, { "epoch": 0.5520120220404074, "grad_norm": 0.6250540163648396, "learning_rate": 9.786829589077125e-06, "loss": 0.2568, "step": 1653 }, { "epoch": 0.55234596760728, "grad_norm": 0.5757068773628706, "learning_rate": 9.78626784129169e-06, "loss": 0.2388, "step": 1654 }, { "epoch": 0.5526799131741527, "grad_norm": 0.5131405555742891, "learning_rate": 9.78570537048414e-06, "loss": 0.2195, "step": 1655 }, { "epoch": 0.5530138587410253, "grad_norm": 0.5273437954722322, "learning_rate": 9.785142176739444e-06, "loss": 0.234, "step": 1656 }, { "epoch": 0.5533478043078978, "grad_norm": 0.5715667449885279, "learning_rate": 9.784578260142679e-06, "loss": 0.2364, "step": 1657 }, { "epoch": 0.5536817498747704, "grad_norm": 0.4662686706134208, "learning_rate": 9.784013620779031e-06, "loss": 0.2174, "step": 1658 }, { "epoch": 0.554015695441643, "grad_norm": 0.5573487695151922, "learning_rate": 9.783448258733795e-06, "loss": 0.2393, "step": 1659 }, { "epoch": 0.5543496410085156, "grad_norm": 0.6004739211376822, "learning_rate": 9.782882174092377e-06, "loss": 0.2366, "step": 1660 }, { "epoch": 0.5546835865753882, "grad_norm": 0.5130868339214583, "learning_rate": 9.78231536694029e-06, "loss": 0.2317, "step": 1661 }, { "epoch": 0.5550175321422608, "grad_norm": 0.5081615667438504, "learning_rate": 9.781747837363158e-06, "loss": 0.2211, "step": 1662 }, { "epoch": 0.5553514777091334, "grad_norm": 0.5243058551452536, "learning_rate": 9.781179585446711e-06, "loss": 0.2321, "step": 1663 }, { "epoch": 0.555685423276006, "grad_norm": 0.5552330996626126, "learning_rate": 9.780610611276791e-06, "loss": 0.2263, "step": 1664 }, { "epoch": 0.5560193688428786, "grad_norm": 0.5084354233773488, "learning_rate": 9.780040914939349e-06, "loss": 0.2209, "step": 1665 }, { "epoch": 0.5563533144097512, "grad_norm": 0.8052644359842761, "learning_rate": 9.779470496520442e-06, "loss": 0.2786, "step": 1666 }, { "epoch": 0.5566872599766238, "grad_norm": 0.5320336558276018, "learning_rate": 9.77889935610624e-06, "loss": 0.2182, "step": 1667 }, { "epoch": 0.5570212055434964, "grad_norm": 0.5365736296465224, "learning_rate": 9.778327493783022e-06, "loss": 0.2251, "step": 1668 }, { "epoch": 0.557355151110369, "grad_norm": 0.4630060106256875, "learning_rate": 9.777754909637173e-06, "loss": 0.2072, "step": 1669 }, { "epoch": 0.5576890966772416, "grad_norm": 0.48881473250954816, "learning_rate": 9.777181603755188e-06, "loss": 0.2251, "step": 1670 }, { "epoch": 0.5580230422441143, "grad_norm": 0.5416086012245569, "learning_rate": 9.776607576223673e-06, "loss": 0.2241, "step": 1671 }, { "epoch": 0.5583569878109869, "grad_norm": 0.5213227615067991, "learning_rate": 9.776032827129338e-06, "loss": 0.221, "step": 1672 }, { "epoch": 0.5586909333778594, "grad_norm": 0.5756279722517523, "learning_rate": 9.775457356559013e-06, "loss": 0.2274, "step": 1673 }, { "epoch": 0.559024878944732, "grad_norm": 0.7857019129301895, "learning_rate": 9.774881164599621e-06, "loss": 0.2443, "step": 1674 }, { "epoch": 0.5593588245116046, "grad_norm": 0.5730959574701345, "learning_rate": 9.77430425133821e-06, "loss": 0.2275, "step": 1675 }, { "epoch": 0.5596927700784772, "grad_norm": 0.5424650987984629, "learning_rate": 9.773726616861926e-06, "loss": 0.2386, "step": 1676 }, { "epoch": 0.5600267156453498, "grad_norm": 0.527455714283128, "learning_rate": 9.773148261258025e-06, "loss": 0.2316, "step": 1677 }, { "epoch": 0.5603606612122224, "grad_norm": 0.6038936532104409, "learning_rate": 9.772569184613879e-06, "loss": 0.2251, "step": 1678 }, { "epoch": 0.560694606779095, "grad_norm": 0.500342452052048, "learning_rate": 9.771989387016962e-06, "loss": 0.2301, "step": 1679 }, { "epoch": 0.5610285523459676, "grad_norm": 0.5596628524290133, "learning_rate": 9.77140886855486e-06, "loss": 0.2347, "step": 1680 }, { "epoch": 0.5613624979128402, "grad_norm": 0.513227216859956, "learning_rate": 9.770827629315266e-06, "loss": 0.2316, "step": 1681 }, { "epoch": 0.5616964434797128, "grad_norm": 0.5571244273244604, "learning_rate": 9.770245669385984e-06, "loss": 0.2246, "step": 1682 }, { "epoch": 0.5620303890465854, "grad_norm": 0.5315880903884372, "learning_rate": 9.76966298885493e-06, "loss": 0.2215, "step": 1683 }, { "epoch": 0.562364334613458, "grad_norm": 0.49598745769160424, "learning_rate": 9.769079587810115e-06, "loss": 0.2309, "step": 1684 }, { "epoch": 0.5626982801803306, "grad_norm": 0.5013001295478747, "learning_rate": 9.768495466339675e-06, "loss": 0.2294, "step": 1685 }, { "epoch": 0.5630322257472032, "grad_norm": 0.5009756499143495, "learning_rate": 9.767910624531852e-06, "loss": 0.2178, "step": 1686 }, { "epoch": 0.5633661713140758, "grad_norm": 0.5041983827071985, "learning_rate": 9.767325062474984e-06, "loss": 0.2264, "step": 1687 }, { "epoch": 0.5637001168809485, "grad_norm": 0.5388073393930474, "learning_rate": 9.766738780257535e-06, "loss": 0.2443, "step": 1688 }, { "epoch": 0.564034062447821, "grad_norm": 0.4849109322874211, "learning_rate": 9.766151777968063e-06, "loss": 0.2238, "step": 1689 }, { "epoch": 0.5643680080146936, "grad_norm": 0.5165836599020902, "learning_rate": 9.765564055695249e-06, "loss": 0.2492, "step": 1690 }, { "epoch": 0.5647019535815662, "grad_norm": 0.5901306226307081, "learning_rate": 9.76497561352787e-06, "loss": 0.2369, "step": 1691 }, { "epoch": 0.5650358991484388, "grad_norm": 0.529734231185276, "learning_rate": 9.764386451554819e-06, "loss": 0.2289, "step": 1692 }, { "epoch": 0.5653698447153114, "grad_norm": 0.5307573390534124, "learning_rate": 9.763796569865095e-06, "loss": 0.2312, "step": 1693 }, { "epoch": 0.565703790282184, "grad_norm": 0.5486503617208062, "learning_rate": 9.763205968547808e-06, "loss": 0.2162, "step": 1694 }, { "epoch": 0.5660377358490566, "grad_norm": 0.5649817924362338, "learning_rate": 9.762614647692175e-06, "loss": 0.2191, "step": 1695 }, { "epoch": 0.5663716814159292, "grad_norm": 0.533218563853688, "learning_rate": 9.762022607387522e-06, "loss": 0.2277, "step": 1696 }, { "epoch": 0.5667056269828018, "grad_norm": 0.5396005837210106, "learning_rate": 9.761429847723281e-06, "loss": 0.251, "step": 1697 }, { "epoch": 0.5670395725496744, "grad_norm": 0.6021708734947762, "learning_rate": 9.760836368788999e-06, "loss": 0.2298, "step": 1698 }, { "epoch": 0.567373518116547, "grad_norm": 0.5571831043526276, "learning_rate": 9.760242170674325e-06, "loss": 0.221, "step": 1699 }, { "epoch": 0.5677074636834196, "grad_norm": 0.6496040372874637, "learning_rate": 9.759647253469023e-06, "loss": 0.2518, "step": 1700 }, { "epoch": 0.5680414092502922, "grad_norm": 0.5260316723253985, "learning_rate": 9.75905161726296e-06, "loss": 0.2489, "step": 1701 }, { "epoch": 0.5683753548171648, "grad_norm": 0.522847565423199, "learning_rate": 9.758455262146114e-06, "loss": 0.2379, "step": 1702 }, { "epoch": 0.5687093003840374, "grad_norm": 0.5215908948559349, "learning_rate": 9.757858188208571e-06, "loss": 0.2267, "step": 1703 }, { "epoch": 0.56904324595091, "grad_norm": 0.4937749913698861, "learning_rate": 9.757260395540527e-06, "loss": 0.2167, "step": 1704 }, { "epoch": 0.5693771915177827, "grad_norm": 0.5231134607360919, "learning_rate": 9.756661884232286e-06, "loss": 0.2288, "step": 1705 }, { "epoch": 0.5697111370846552, "grad_norm": 0.48578184135466346, "learning_rate": 9.756062654374259e-06, "loss": 0.2254, "step": 1706 }, { "epoch": 0.5700450826515278, "grad_norm": 0.4979412299848651, "learning_rate": 9.755462706056966e-06, "loss": 0.2266, "step": 1707 }, { "epoch": 0.5703790282184004, "grad_norm": 0.5441091075782964, "learning_rate": 9.75486203937104e-06, "loss": 0.2193, "step": 1708 }, { "epoch": 0.570712973785273, "grad_norm": 0.5216793063707312, "learning_rate": 9.754260654407214e-06, "loss": 0.2231, "step": 1709 }, { "epoch": 0.5710469193521456, "grad_norm": 0.5004552915915513, "learning_rate": 9.753658551256338e-06, "loss": 0.2399, "step": 1710 }, { "epoch": 0.5713808649190182, "grad_norm": 0.4875294674870155, "learning_rate": 9.753055730009364e-06, "loss": 0.2258, "step": 1711 }, { "epoch": 0.5717148104858908, "grad_norm": 0.6035412634336383, "learning_rate": 9.752452190757358e-06, "loss": 0.2284, "step": 1712 }, { "epoch": 0.5720487560527634, "grad_norm": 0.4665053445860015, "learning_rate": 9.751847933591489e-06, "loss": 0.2112, "step": 1713 }, { "epoch": 0.572382701619636, "grad_norm": 0.4892405263993013, "learning_rate": 9.75124295860304e-06, "loss": 0.228, "step": 1714 }, { "epoch": 0.5727166471865086, "grad_norm": 0.47689612627770583, "learning_rate": 9.750637265883395e-06, "loss": 0.2217, "step": 1715 }, { "epoch": 0.5730505927533812, "grad_norm": 0.5851060256324795, "learning_rate": 9.750030855524058e-06, "loss": 0.2308, "step": 1716 }, { "epoch": 0.5733845383202538, "grad_norm": 0.4739718971007316, "learning_rate": 9.749423727616628e-06, "loss": 0.2276, "step": 1717 }, { "epoch": 0.5737184838871264, "grad_norm": 0.5130343462285478, "learning_rate": 9.748815882252823e-06, "loss": 0.2379, "step": 1718 }, { "epoch": 0.574052429453999, "grad_norm": 0.5085916890670312, "learning_rate": 9.748207319524462e-06, "loss": 0.2238, "step": 1719 }, { "epoch": 0.5743863750208716, "grad_norm": 0.5028286185116214, "learning_rate": 9.747598039523476e-06, "loss": 0.2195, "step": 1720 }, { "epoch": 0.5747203205877442, "grad_norm": 0.5672833426547657, "learning_rate": 9.746988042341907e-06, "loss": 0.2429, "step": 1721 }, { "epoch": 0.5750542661546167, "grad_norm": 0.5485257563867748, "learning_rate": 9.746377328071899e-06, "loss": 0.2126, "step": 1722 }, { "epoch": 0.5753882117214894, "grad_norm": 0.5472794541807945, "learning_rate": 9.74576589680571e-06, "loss": 0.2257, "step": 1723 }, { "epoch": 0.575722157288362, "grad_norm": 0.6005489214038497, "learning_rate": 9.745153748635702e-06, "loss": 0.2297, "step": 1724 }, { "epoch": 0.5760561028552346, "grad_norm": 0.5331745130852713, "learning_rate": 9.744540883654348e-06, "loss": 0.2299, "step": 1725 }, { "epoch": 0.5763900484221072, "grad_norm": 0.5467918769216435, "learning_rate": 9.743927301954229e-06, "loss": 0.2434, "step": 1726 }, { "epoch": 0.5767239939889798, "grad_norm": 0.47631200906324167, "learning_rate": 9.743313003628033e-06, "loss": 0.2294, "step": 1727 }, { "epoch": 0.5770579395558524, "grad_norm": 1.028042272396889, "learning_rate": 9.742697988768557e-06, "loss": 0.2286, "step": 1728 }, { "epoch": 0.577391885122725, "grad_norm": 0.5723656104484236, "learning_rate": 9.742082257468705e-06, "loss": 0.2328, "step": 1729 }, { "epoch": 0.5777258306895976, "grad_norm": 0.5658357107734303, "learning_rate": 9.741465809821493e-06, "loss": 0.2206, "step": 1730 }, { "epoch": 0.5780597762564702, "grad_norm": 0.5303347638147783, "learning_rate": 9.74084864592004e-06, "loss": 0.2382, "step": 1731 }, { "epoch": 0.5783937218233428, "grad_norm": 0.5591531368062704, "learning_rate": 9.74023076585758e-06, "loss": 0.2216, "step": 1732 }, { "epoch": 0.5787276673902154, "grad_norm": 0.5152285449211208, "learning_rate": 9.739612169727446e-06, "loss": 0.2016, "step": 1733 }, { "epoch": 0.579061612957088, "grad_norm": 0.5240518968639969, "learning_rate": 9.73899285762309e-06, "loss": 0.236, "step": 1734 }, { "epoch": 0.5793955585239606, "grad_norm": 0.5941452749632281, "learning_rate": 9.738372829638058e-06, "loss": 0.2208, "step": 1735 }, { "epoch": 0.5797295040908332, "grad_norm": 0.5383582980347633, "learning_rate": 9.73775208586602e-06, "loss": 0.2231, "step": 1736 }, { "epoch": 0.5800634496577058, "grad_norm": 0.5097873416009885, "learning_rate": 9.737130626400745e-06, "loss": 0.2214, "step": 1737 }, { "epoch": 0.5803973952245783, "grad_norm": 0.6409001150404144, "learning_rate": 9.736508451336111e-06, "loss": 0.2565, "step": 1738 }, { "epoch": 0.580731340791451, "grad_norm": 0.5171820969817694, "learning_rate": 9.735885560766104e-06, "loss": 0.2247, "step": 1739 }, { "epoch": 0.5810652863583236, "grad_norm": 0.565262498850217, "learning_rate": 9.73526195478482e-06, "loss": 0.2525, "step": 1740 }, { "epoch": 0.5813992319251962, "grad_norm": 0.5191663542448296, "learning_rate": 9.73463763348646e-06, "loss": 0.2269, "step": 1741 }, { "epoch": 0.5817331774920688, "grad_norm": 0.5008213494455472, "learning_rate": 9.734012596965341e-06, "loss": 0.2292, "step": 1742 }, { "epoch": 0.5820671230589414, "grad_norm": 0.4985166781819941, "learning_rate": 9.733386845315875e-06, "loss": 0.2251, "step": 1743 }, { "epoch": 0.582401068625814, "grad_norm": 0.7068726658287037, "learning_rate": 9.732760378632592e-06, "loss": 0.2575, "step": 1744 }, { "epoch": 0.5827350141926866, "grad_norm": 0.532950090400461, "learning_rate": 9.73213319701013e-06, "loss": 0.2203, "step": 1745 }, { "epoch": 0.5830689597595592, "grad_norm": 0.4870910187814131, "learning_rate": 9.731505300543228e-06, "loss": 0.2235, "step": 1746 }, { "epoch": 0.5834029053264318, "grad_norm": 0.5183483007891654, "learning_rate": 9.730876689326739e-06, "loss": 0.2391, "step": 1747 }, { "epoch": 0.5837368508933044, "grad_norm": 0.5822886754220148, "learning_rate": 9.730247363455621e-06, "loss": 0.246, "step": 1748 }, { "epoch": 0.584070796460177, "grad_norm": 0.5303604700191408, "learning_rate": 9.729617323024943e-06, "loss": 0.2161, "step": 1749 }, { "epoch": 0.5844047420270496, "grad_norm": 0.5402294666419608, "learning_rate": 9.728986568129876e-06, "loss": 0.2165, "step": 1750 }, { "epoch": 0.5847386875939222, "grad_norm": 0.5229291505268835, "learning_rate": 9.72835509886571e-06, "loss": 0.2487, "step": 1751 }, { "epoch": 0.5850726331607948, "grad_norm": 1.005248107058838, "learning_rate": 9.727722915327828e-06, "loss": 0.2344, "step": 1752 }, { "epoch": 0.5854065787276674, "grad_norm": 0.4984992426091078, "learning_rate": 9.727090017611736e-06, "loss": 0.196, "step": 1753 }, { "epoch": 0.58574052429454, "grad_norm": 0.5333276983734322, "learning_rate": 9.726456405813033e-06, "loss": 0.2264, "step": 1754 }, { "epoch": 0.5860744698614125, "grad_norm": 0.45641453158909184, "learning_rate": 9.725822080027442e-06, "loss": 0.2239, "step": 1755 }, { "epoch": 0.5864084154282851, "grad_norm": 0.48841035707321817, "learning_rate": 9.725187040350778e-06, "loss": 0.222, "step": 1756 }, { "epoch": 0.5867423609951578, "grad_norm": 0.4741064605363138, "learning_rate": 9.724551286878976e-06, "loss": 0.2108, "step": 1757 }, { "epoch": 0.5870763065620304, "grad_norm": 0.502475937380326, "learning_rate": 9.723914819708073e-06, "loss": 0.2253, "step": 1758 }, { "epoch": 0.587410252128903, "grad_norm": 0.5194244729847486, "learning_rate": 9.723277638934212e-06, "loss": 0.2341, "step": 1759 }, { "epoch": 0.5877441976957756, "grad_norm": 0.4916149636167516, "learning_rate": 9.72263974465365e-06, "loss": 0.2306, "step": 1760 }, { "epoch": 0.5880781432626482, "grad_norm": 0.5023458268143842, "learning_rate": 9.722001136962746e-06, "loss": 0.2227, "step": 1761 }, { "epoch": 0.5884120888295208, "grad_norm": 0.5608383767110363, "learning_rate": 9.721361815957973e-06, "loss": 0.2491, "step": 1762 }, { "epoch": 0.5887460343963934, "grad_norm": 0.5144301238589608, "learning_rate": 9.720721781735905e-06, "loss": 0.2222, "step": 1763 }, { "epoch": 0.589079979963266, "grad_norm": 0.5018433935726713, "learning_rate": 9.720081034393226e-06, "loss": 0.2258, "step": 1764 }, { "epoch": 0.5894139255301386, "grad_norm": 0.49790563062526416, "learning_rate": 9.71943957402673e-06, "loss": 0.2285, "step": 1765 }, { "epoch": 0.5897478710970112, "grad_norm": 0.4748861509241753, "learning_rate": 9.718797400733314e-06, "loss": 0.218, "step": 1766 }, { "epoch": 0.5900818166638838, "grad_norm": 0.5390774391581642, "learning_rate": 9.718154514609992e-06, "loss": 0.2286, "step": 1767 }, { "epoch": 0.5904157622307564, "grad_norm": 0.5536961578875585, "learning_rate": 9.717510915753876e-06, "loss": 0.2402, "step": 1768 }, { "epoch": 0.590749707797629, "grad_norm": 0.6695127415098429, "learning_rate": 9.716866604262189e-06, "loss": 0.2235, "step": 1769 }, { "epoch": 0.5910836533645016, "grad_norm": 0.5069772545716459, "learning_rate": 9.716221580232261e-06, "loss": 0.2201, "step": 1770 }, { "epoch": 0.5914175989313741, "grad_norm": 0.8037833620387766, "learning_rate": 9.715575843761534e-06, "loss": 0.231, "step": 1771 }, { "epoch": 0.5917515444982467, "grad_norm": 0.5068062069274792, "learning_rate": 9.714929394947548e-06, "loss": 0.2211, "step": 1772 }, { "epoch": 0.5920854900651193, "grad_norm": 0.5549550762490882, "learning_rate": 9.714282233887962e-06, "loss": 0.2393, "step": 1773 }, { "epoch": 0.592419435631992, "grad_norm": 0.5395976419004379, "learning_rate": 9.713634360680537e-06, "loss": 0.2252, "step": 1774 }, { "epoch": 0.5927533811988646, "grad_norm": 0.4892732346046282, "learning_rate": 9.712985775423141e-06, "loss": 0.2193, "step": 1775 }, { "epoch": 0.5930873267657372, "grad_norm": 0.4711153870554155, "learning_rate": 9.712336478213747e-06, "loss": 0.2315, "step": 1776 }, { "epoch": 0.5934212723326098, "grad_norm": 0.4762551984595192, "learning_rate": 9.711686469150444e-06, "loss": 0.2355, "step": 1777 }, { "epoch": 0.5937552178994824, "grad_norm": 0.539286037756309, "learning_rate": 9.711035748331421e-06, "loss": 0.2305, "step": 1778 }, { "epoch": 0.594089163466355, "grad_norm": 0.44306089667586035, "learning_rate": 9.710384315854977e-06, "loss": 0.2119, "step": 1779 }, { "epoch": 0.5944231090332276, "grad_norm": 0.54924454544785, "learning_rate": 9.70973217181952e-06, "loss": 0.2452, "step": 1780 }, { "epoch": 0.5947570546001002, "grad_norm": 0.6928470758099178, "learning_rate": 9.709079316323564e-06, "loss": 0.2344, "step": 1781 }, { "epoch": 0.5950910001669728, "grad_norm": 0.482232968127417, "learning_rate": 9.70842574946573e-06, "loss": 0.2207, "step": 1782 }, { "epoch": 0.5954249457338454, "grad_norm": 0.49136177006387877, "learning_rate": 9.707771471344744e-06, "loss": 0.2261, "step": 1783 }, { "epoch": 0.595758891300718, "grad_norm": 0.46768352694125126, "learning_rate": 9.707116482059447e-06, "loss": 0.2336, "step": 1784 }, { "epoch": 0.5960928368675906, "grad_norm": 0.5029578869306203, "learning_rate": 9.70646078170878e-06, "loss": 0.2299, "step": 1785 }, { "epoch": 0.5964267824344632, "grad_norm": 0.6031061149189456, "learning_rate": 9.705804370391794e-06, "loss": 0.2338, "step": 1786 }, { "epoch": 0.5967607280013357, "grad_norm": 0.5619990384851195, "learning_rate": 9.705147248207652e-06, "loss": 0.2458, "step": 1787 }, { "epoch": 0.5970946735682083, "grad_norm": 0.481566798104976, "learning_rate": 9.704489415255614e-06, "loss": 0.2272, "step": 1788 }, { "epoch": 0.5974286191350809, "grad_norm": 0.5094230596305139, "learning_rate": 9.703830871635057e-06, "loss": 0.2366, "step": 1789 }, { "epoch": 0.5977625647019535, "grad_norm": 0.5032186806617618, "learning_rate": 9.703171617445461e-06, "loss": 0.2378, "step": 1790 }, { "epoch": 0.5980965102688262, "grad_norm": 0.5174151813237029, "learning_rate": 9.702511652786414e-06, "loss": 0.2194, "step": 1791 }, { "epoch": 0.5984304558356988, "grad_norm": 0.5280668787057868, "learning_rate": 9.701850977757611e-06, "loss": 0.2236, "step": 1792 }, { "epoch": 0.5987644014025714, "grad_norm": 0.5381616037918245, "learning_rate": 9.701189592458858e-06, "loss": 0.2243, "step": 1793 }, { "epoch": 0.599098346969444, "grad_norm": 0.5404391079985676, "learning_rate": 9.70052749699006e-06, "loss": 0.2281, "step": 1794 }, { "epoch": 0.5994322925363166, "grad_norm": 0.5628113540045915, "learning_rate": 9.699864691451236e-06, "loss": 0.23, "step": 1795 }, { "epoch": 0.5997662381031892, "grad_norm": 0.5329453587759123, "learning_rate": 9.699201175942514e-06, "loss": 0.215, "step": 1796 }, { "epoch": 0.6001001836700618, "grad_norm": 0.4718836619269112, "learning_rate": 9.698536950564121e-06, "loss": 0.2123, "step": 1797 }, { "epoch": 0.6004341292369344, "grad_norm": 0.5539291679398612, "learning_rate": 9.6978720154164e-06, "loss": 0.2108, "step": 1798 }, { "epoch": 0.600768074803807, "grad_norm": 0.5144961691465181, "learning_rate": 9.697206370599793e-06, "loss": 0.2322, "step": 1799 }, { "epoch": 0.6011020203706796, "grad_norm": 0.5731553026791534, "learning_rate": 9.696540016214857e-06, "loss": 0.2286, "step": 1800 }, { "epoch": 0.6014359659375522, "grad_norm": 0.5188370628082019, "learning_rate": 9.695872952362253e-06, "loss": 0.2284, "step": 1801 }, { "epoch": 0.6017699115044248, "grad_norm": 0.5671772412519566, "learning_rate": 9.695205179142746e-06, "loss": 0.2338, "step": 1802 }, { "epoch": 0.6021038570712974, "grad_norm": 0.5086216193765247, "learning_rate": 9.694536696657213e-06, "loss": 0.2197, "step": 1803 }, { "epoch": 0.6024378026381699, "grad_norm": 0.5238150472595622, "learning_rate": 9.693867505006634e-06, "loss": 0.2209, "step": 1804 }, { "epoch": 0.6027717482050425, "grad_norm": 0.49537223803612296, "learning_rate": 9.693197604292101e-06, "loss": 0.2314, "step": 1805 }, { "epoch": 0.6031056937719151, "grad_norm": 0.537065895743497, "learning_rate": 9.69252699461481e-06, "loss": 0.2411, "step": 1806 }, { "epoch": 0.6034396393387877, "grad_norm": 0.5685097570680073, "learning_rate": 9.691855676076064e-06, "loss": 0.24, "step": 1807 }, { "epoch": 0.6037735849056604, "grad_norm": 0.48659737235918643, "learning_rate": 9.691183648777271e-06, "loss": 0.217, "step": 1808 }, { "epoch": 0.604107530472533, "grad_norm": 0.48629641393278755, "learning_rate": 9.690510912819952e-06, "loss": 0.2199, "step": 1809 }, { "epoch": 0.6044414760394056, "grad_norm": 0.547728598895119, "learning_rate": 9.689837468305732e-06, "loss": 0.2299, "step": 1810 }, { "epoch": 0.6047754216062782, "grad_norm": 0.5394439412865157, "learning_rate": 9.689163315336339e-06, "loss": 0.243, "step": 1811 }, { "epoch": 0.6051093671731508, "grad_norm": 0.5103261884061013, "learning_rate": 9.688488454013616e-06, "loss": 0.2388, "step": 1812 }, { "epoch": 0.6054433127400234, "grad_norm": 0.5099890466296649, "learning_rate": 9.687812884439506e-06, "loss": 0.2344, "step": 1813 }, { "epoch": 0.605777258306896, "grad_norm": 0.46788777418989, "learning_rate": 9.687136606716064e-06, "loss": 0.2187, "step": 1814 }, { "epoch": 0.6061112038737686, "grad_norm": 0.5249164624914014, "learning_rate": 9.686459620945445e-06, "loss": 0.2227, "step": 1815 }, { "epoch": 0.6064451494406412, "grad_norm": 0.5331457391545483, "learning_rate": 9.685781927229923e-06, "loss": 0.2443, "step": 1816 }, { "epoch": 0.6067790950075138, "grad_norm": 0.5886242608442575, "learning_rate": 9.685103525671864e-06, "loss": 0.2531, "step": 1817 }, { "epoch": 0.6071130405743864, "grad_norm": 0.49860326082795825, "learning_rate": 9.684424416373754e-06, "loss": 0.2254, "step": 1818 }, { "epoch": 0.607446986141259, "grad_norm": 0.5325971197360724, "learning_rate": 9.683744599438178e-06, "loss": 0.2294, "step": 1819 }, { "epoch": 0.6077809317081315, "grad_norm": 0.5084348336824585, "learning_rate": 9.683064074967832e-06, "loss": 0.2375, "step": 1820 }, { "epoch": 0.6081148772750041, "grad_norm": 0.46827119088454616, "learning_rate": 9.682382843065516e-06, "loss": 0.2146, "step": 1821 }, { "epoch": 0.6084488228418767, "grad_norm": 0.5179404560268646, "learning_rate": 9.681700903834137e-06, "loss": 0.2324, "step": 1822 }, { "epoch": 0.6087827684087493, "grad_norm": 0.5482754379272196, "learning_rate": 9.681018257376713e-06, "loss": 0.2266, "step": 1823 }, { "epoch": 0.609116713975622, "grad_norm": 0.5402115399595885, "learning_rate": 9.680334903796363e-06, "loss": 0.2437, "step": 1824 }, { "epoch": 0.6094506595424946, "grad_norm": 0.485938659979222, "learning_rate": 9.679650843196318e-06, "loss": 0.2379, "step": 1825 }, { "epoch": 0.6097846051093672, "grad_norm": 0.47740857211098514, "learning_rate": 9.678966075679909e-06, "loss": 0.2294, "step": 1826 }, { "epoch": 0.6101185506762398, "grad_norm": 0.5105546022851197, "learning_rate": 9.678280601350584e-06, "loss": 0.2342, "step": 1827 }, { "epoch": 0.6104524962431124, "grad_norm": 0.5084617379607869, "learning_rate": 9.67759442031189e-06, "loss": 0.2333, "step": 1828 }, { "epoch": 0.610786441809985, "grad_norm": 0.4877401739921855, "learning_rate": 9.676907532667478e-06, "loss": 0.2286, "step": 1829 }, { "epoch": 0.6111203873768576, "grad_norm": 0.48925434897179987, "learning_rate": 9.676219938521116e-06, "loss": 0.2182, "step": 1830 }, { "epoch": 0.6114543329437302, "grad_norm": 0.4795913747536622, "learning_rate": 9.675531637976673e-06, "loss": 0.2183, "step": 1831 }, { "epoch": 0.6117882785106028, "grad_norm": 0.49260466676010023, "learning_rate": 9.674842631138121e-06, "loss": 0.2165, "step": 1832 }, { "epoch": 0.6121222240774754, "grad_norm": 0.48704610155668626, "learning_rate": 9.674152918109547e-06, "loss": 0.2306, "step": 1833 }, { "epoch": 0.612456169644348, "grad_norm": 0.5738129836410425, "learning_rate": 9.673462498995138e-06, "loss": 0.225, "step": 1834 }, { "epoch": 0.6127901152112206, "grad_norm": 0.5935737806472424, "learning_rate": 9.672771373899192e-06, "loss": 0.2263, "step": 1835 }, { "epoch": 0.6131240607780931, "grad_norm": 0.4805492485102887, "learning_rate": 9.672079542926108e-06, "loss": 0.2326, "step": 1836 }, { "epoch": 0.6134580063449657, "grad_norm": 0.5717231219637876, "learning_rate": 9.671387006180398e-06, "loss": 0.2354, "step": 1837 }, { "epoch": 0.6137919519118383, "grad_norm": 0.5224215947069372, "learning_rate": 9.670693763766674e-06, "loss": 0.2151, "step": 1838 }, { "epoch": 0.6141258974787109, "grad_norm": 0.526357591657413, "learning_rate": 9.669999815789664e-06, "loss": 0.233, "step": 1839 }, { "epoch": 0.6144598430455835, "grad_norm": 0.453213851588365, "learning_rate": 9.669305162354194e-06, "loss": 0.2039, "step": 1840 }, { "epoch": 0.6147937886124561, "grad_norm": 0.5265771089532885, "learning_rate": 9.6686098035652e-06, "loss": 0.226, "step": 1841 }, { "epoch": 0.6151277341793288, "grad_norm": 0.4822531246116318, "learning_rate": 9.667913739527724e-06, "loss": 0.2266, "step": 1842 }, { "epoch": 0.6154616797462014, "grad_norm": 0.5092579744932829, "learning_rate": 9.667216970346916e-06, "loss": 0.2227, "step": 1843 }, { "epoch": 0.615795625313074, "grad_norm": 0.5705960019212173, "learning_rate": 9.666519496128027e-06, "loss": 0.2179, "step": 1844 }, { "epoch": 0.6161295708799466, "grad_norm": 0.5870450982878944, "learning_rate": 9.665821316976423e-06, "loss": 0.2309, "step": 1845 }, { "epoch": 0.6164635164468192, "grad_norm": 0.48323049309301813, "learning_rate": 9.665122432997571e-06, "loss": 0.2228, "step": 1846 }, { "epoch": 0.6167974620136918, "grad_norm": 0.43522897761120205, "learning_rate": 9.664422844297045e-06, "loss": 0.2168, "step": 1847 }, { "epoch": 0.6171314075805644, "grad_norm": 0.4797401271036264, "learning_rate": 9.663722550980528e-06, "loss": 0.2166, "step": 1848 }, { "epoch": 0.617465353147437, "grad_norm": 0.5404918091345311, "learning_rate": 9.663021553153805e-06, "loss": 0.2155, "step": 1849 }, { "epoch": 0.6177992987143096, "grad_norm": 0.5019775079244504, "learning_rate": 9.66231985092277e-06, "loss": 0.205, "step": 1850 }, { "epoch": 0.6181332442811822, "grad_norm": 0.5126171358341591, "learning_rate": 9.661617444393427e-06, "loss": 0.2342, "step": 1851 }, { "epoch": 0.6184671898480548, "grad_norm": 0.5298697261126594, "learning_rate": 9.660914333671878e-06, "loss": 0.2189, "step": 1852 }, { "epoch": 0.6188011354149273, "grad_norm": 0.6977397151332118, "learning_rate": 9.66021051886434e-06, "loss": 0.242, "step": 1853 }, { "epoch": 0.6191350809817999, "grad_norm": 0.4420208100094779, "learning_rate": 9.65950600007713e-06, "loss": 0.1995, "step": 1854 }, { "epoch": 0.6194690265486725, "grad_norm": 0.49759113853149267, "learning_rate": 9.658800777416676e-06, "loss": 0.2142, "step": 1855 }, { "epoch": 0.6198029721155451, "grad_norm": 0.4644683178989199, "learning_rate": 9.658094850989508e-06, "loss": 0.206, "step": 1856 }, { "epoch": 0.6201369176824177, "grad_norm": 0.7389862240060607, "learning_rate": 9.657388220902265e-06, "loss": 0.2231, "step": 1857 }, { "epoch": 0.6204708632492903, "grad_norm": 0.4606947820212059, "learning_rate": 9.656680887261693e-06, "loss": 0.2115, "step": 1858 }, { "epoch": 0.620804808816163, "grad_norm": 0.5020543120959755, "learning_rate": 9.655972850174642e-06, "loss": 0.2274, "step": 1859 }, { "epoch": 0.6211387543830356, "grad_norm": 0.44475121899664427, "learning_rate": 9.65526410974807e-06, "loss": 0.2113, "step": 1860 }, { "epoch": 0.6214726999499082, "grad_norm": 0.47941219176464955, "learning_rate": 9.65455466608904e-06, "loss": 0.2232, "step": 1861 }, { "epoch": 0.6218066455167808, "grad_norm": 0.4662479753839037, "learning_rate": 9.653844519304722e-06, "loss": 0.2238, "step": 1862 }, { "epoch": 0.6221405910836534, "grad_norm": 0.5892373131541172, "learning_rate": 9.653133669502393e-06, "loss": 0.2307, "step": 1863 }, { "epoch": 0.622474536650526, "grad_norm": 0.46292878818156435, "learning_rate": 9.652422116789432e-06, "loss": 0.221, "step": 1864 }, { "epoch": 0.6228084822173986, "grad_norm": 0.5683790013976829, "learning_rate": 9.651709861273334e-06, "loss": 0.2267, "step": 1865 }, { "epoch": 0.6231424277842712, "grad_norm": 0.4718926218027168, "learning_rate": 9.650996903061685e-06, "loss": 0.2138, "step": 1866 }, { "epoch": 0.6234763733511438, "grad_norm": 0.496502433614263, "learning_rate": 9.650283242262192e-06, "loss": 0.2246, "step": 1867 }, { "epoch": 0.6238103189180164, "grad_norm": 0.4876162037690022, "learning_rate": 9.64956887898266e-06, "loss": 0.2303, "step": 1868 }, { "epoch": 0.6241442644848889, "grad_norm": 0.5145615842272623, "learning_rate": 9.648853813331e-06, "loss": 0.2278, "step": 1869 }, { "epoch": 0.6244782100517615, "grad_norm": 0.527130326304707, "learning_rate": 9.648138045415236e-06, "loss": 0.2301, "step": 1870 }, { "epoch": 0.6248121556186341, "grad_norm": 0.49041057430030827, "learning_rate": 9.647421575343488e-06, "loss": 0.2271, "step": 1871 }, { "epoch": 0.6251461011855067, "grad_norm": 0.4861327887722697, "learning_rate": 9.646704403223991e-06, "loss": 0.2153, "step": 1872 }, { "epoch": 0.6254800467523793, "grad_norm": 0.4859486191134469, "learning_rate": 9.64598652916508e-06, "loss": 0.2159, "step": 1873 }, { "epoch": 0.6258139923192519, "grad_norm": 0.46779863060566784, "learning_rate": 9.6452679532752e-06, "loss": 0.2145, "step": 1874 }, { "epoch": 0.6261479378861246, "grad_norm": 0.5215440387248278, "learning_rate": 9.644548675662897e-06, "loss": 0.2309, "step": 1875 }, { "epoch": 0.6264818834529972, "grad_norm": 0.5113295193583, "learning_rate": 9.64382869643683e-06, "loss": 0.2393, "step": 1876 }, { "epoch": 0.6268158290198698, "grad_norm": 0.6629216513399953, "learning_rate": 9.64310801570576e-06, "loss": 0.243, "step": 1877 }, { "epoch": 0.6271497745867424, "grad_norm": 0.5805733643515224, "learning_rate": 9.642386633578553e-06, "loss": 0.236, "step": 1878 }, { "epoch": 0.627483720153615, "grad_norm": 0.5211638075348191, "learning_rate": 9.641664550164182e-06, "loss": 0.2262, "step": 1879 }, { "epoch": 0.6278176657204876, "grad_norm": 0.44593296545663935, "learning_rate": 9.640941765571727e-06, "loss": 0.231, "step": 1880 }, { "epoch": 0.6281516112873602, "grad_norm": 0.47615138615510033, "learning_rate": 9.640218279910374e-06, "loss": 0.2146, "step": 1881 }, { "epoch": 0.6284855568542328, "grad_norm": 0.5022339248493972, "learning_rate": 9.639494093289412e-06, "loss": 0.2291, "step": 1882 }, { "epoch": 0.6288195024211054, "grad_norm": 0.46430936600624695, "learning_rate": 9.638769205818239e-06, "loss": 0.2071, "step": 1883 }, { "epoch": 0.629153447987978, "grad_norm": 0.5226803610865401, "learning_rate": 9.638043617606358e-06, "loss": 0.2414, "step": 1884 }, { "epoch": 0.6294873935548505, "grad_norm": 0.48540480199653707, "learning_rate": 9.637317328763378e-06, "loss": 0.2233, "step": 1885 }, { "epoch": 0.6298213391217231, "grad_norm": 0.44530932798385103, "learning_rate": 9.636590339399012e-06, "loss": 0.2044, "step": 1886 }, { "epoch": 0.6301552846885957, "grad_norm": 0.46347505096860814, "learning_rate": 9.63586264962308e-06, "loss": 0.226, "step": 1887 }, { "epoch": 0.6304892302554683, "grad_norm": 0.5058077241719129, "learning_rate": 9.635134259545511e-06, "loss": 0.2334, "step": 1888 }, { "epoch": 0.6308231758223409, "grad_norm": 0.5197637248708888, "learning_rate": 9.634405169276335e-06, "loss": 0.2325, "step": 1889 }, { "epoch": 0.6311571213892135, "grad_norm": 0.5179888030499751, "learning_rate": 9.63367537892569e-06, "loss": 0.2226, "step": 1890 }, { "epoch": 0.6314910669560861, "grad_norm": 0.6237768819180914, "learning_rate": 9.63294488860382e-06, "loss": 0.2429, "step": 1891 }, { "epoch": 0.6318250125229588, "grad_norm": 0.4783353974434491, "learning_rate": 9.63221369842107e-06, "loss": 0.2161, "step": 1892 }, { "epoch": 0.6321589580898314, "grad_norm": 0.4625966356502894, "learning_rate": 9.631481808487902e-06, "loss": 0.219, "step": 1893 }, { "epoch": 0.632492903656704, "grad_norm": 0.5729936471207139, "learning_rate": 9.63074921891487e-06, "loss": 0.2275, "step": 1894 }, { "epoch": 0.6328268492235766, "grad_norm": 0.4584520008958071, "learning_rate": 9.630015929812646e-06, "loss": 0.2271, "step": 1895 }, { "epoch": 0.6331607947904492, "grad_norm": 0.5542075963679276, "learning_rate": 9.629281941291998e-06, "loss": 0.2412, "step": 1896 }, { "epoch": 0.6334947403573218, "grad_norm": 0.47050880416056196, "learning_rate": 9.628547253463804e-06, "loss": 0.2254, "step": 1897 }, { "epoch": 0.6338286859241944, "grad_norm": 0.5287224363148264, "learning_rate": 9.627811866439048e-06, "loss": 0.226, "step": 1898 }, { "epoch": 0.634162631491067, "grad_norm": 0.5349630430590014, "learning_rate": 9.627075780328818e-06, "loss": 0.2457, "step": 1899 }, { "epoch": 0.6344965770579396, "grad_norm": 0.48848827404295203, "learning_rate": 9.626338995244313e-06, "loss": 0.2239, "step": 1900 }, { "epoch": 0.6348305226248122, "grad_norm": 0.5392885299832741, "learning_rate": 9.625601511296826e-06, "loss": 0.2292, "step": 1901 }, { "epoch": 0.6351644681916847, "grad_norm": 0.51626967141371, "learning_rate": 9.624863328597767e-06, "loss": 0.2411, "step": 1902 }, { "epoch": 0.6354984137585573, "grad_norm": 0.48472493185557913, "learning_rate": 9.624124447258647e-06, "loss": 0.2177, "step": 1903 }, { "epoch": 0.6358323593254299, "grad_norm": 0.6068590044334997, "learning_rate": 9.62338486739108e-06, "loss": 0.2214, "step": 1904 }, { "epoch": 0.6361663048923025, "grad_norm": 0.5569455920656661, "learning_rate": 9.62264458910679e-06, "loss": 0.2227, "step": 1905 }, { "epoch": 0.6365002504591751, "grad_norm": 0.4943658711020671, "learning_rate": 9.621903612517608e-06, "loss": 0.22, "step": 1906 }, { "epoch": 0.6368341960260477, "grad_norm": 0.6738439864635068, "learning_rate": 9.621161937735463e-06, "loss": 0.2215, "step": 1907 }, { "epoch": 0.6371681415929203, "grad_norm": 0.5125207288216446, "learning_rate": 9.620419564872394e-06, "loss": 0.2311, "step": 1908 }, { "epoch": 0.637502087159793, "grad_norm": 0.5477693989903016, "learning_rate": 9.619676494040547e-06, "loss": 0.2447, "step": 1909 }, { "epoch": 0.6378360327266656, "grad_norm": 0.618827790959411, "learning_rate": 9.61893272535217e-06, "loss": 0.2269, "step": 1910 }, { "epoch": 0.6381699782935382, "grad_norm": 0.5831020385440705, "learning_rate": 9.618188258919618e-06, "loss": 0.2504, "step": 1911 }, { "epoch": 0.6385039238604108, "grad_norm": 0.4575021594118544, "learning_rate": 9.617443094855354e-06, "loss": 0.2212, "step": 1912 }, { "epoch": 0.6388378694272834, "grad_norm": 0.474232930993678, "learning_rate": 9.61669723327194e-06, "loss": 0.2352, "step": 1913 }, { "epoch": 0.639171814994156, "grad_norm": 0.5374800460315998, "learning_rate": 9.615950674282049e-06, "loss": 0.2396, "step": 1914 }, { "epoch": 0.6395057605610286, "grad_norm": 0.5287587226953798, "learning_rate": 9.61520341799846e-06, "loss": 0.2302, "step": 1915 }, { "epoch": 0.6398397061279012, "grad_norm": 0.5237854216155173, "learning_rate": 9.614455464534049e-06, "loss": 0.2453, "step": 1916 }, { "epoch": 0.6401736516947738, "grad_norm": 0.44508260080269146, "learning_rate": 9.613706814001809e-06, "loss": 0.2066, "step": 1917 }, { "epoch": 0.6405075972616463, "grad_norm": 0.5211596090749453, "learning_rate": 9.612957466514829e-06, "loss": 0.2279, "step": 1918 }, { "epoch": 0.6408415428285189, "grad_norm": 0.4862656406723413, "learning_rate": 9.61220742218631e-06, "loss": 0.2304, "step": 1919 }, { "epoch": 0.6411754883953915, "grad_norm": 0.4472518622732128, "learning_rate": 9.61145668112955e-06, "loss": 0.2296, "step": 1920 }, { "epoch": 0.6415094339622641, "grad_norm": 0.5348057312329286, "learning_rate": 9.610705243457962e-06, "loss": 0.2421, "step": 1921 }, { "epoch": 0.6418433795291367, "grad_norm": 0.539916851096655, "learning_rate": 9.609953109285057e-06, "loss": 0.2225, "step": 1922 }, { "epoch": 0.6421773250960093, "grad_norm": 0.486698830537002, "learning_rate": 9.609200278724456e-06, "loss": 0.2303, "step": 1923 }, { "epoch": 0.6425112706628819, "grad_norm": 0.4853767234392697, "learning_rate": 9.60844675188988e-06, "loss": 0.2358, "step": 1924 }, { "epoch": 0.6428452162297545, "grad_norm": 0.5783948282032476, "learning_rate": 9.60769252889516e-06, "loss": 0.2549, "step": 1925 }, { "epoch": 0.6431791617966272, "grad_norm": 0.4677106105333194, "learning_rate": 9.606937609854227e-06, "loss": 0.2311, "step": 1926 }, { "epoch": 0.6435131073634998, "grad_norm": 0.7242966644398318, "learning_rate": 9.606181994881124e-06, "loss": 0.2273, "step": 1927 }, { "epoch": 0.6438470529303724, "grad_norm": 0.5186721278224474, "learning_rate": 9.605425684089998e-06, "loss": 0.246, "step": 1928 }, { "epoch": 0.644180998497245, "grad_norm": 0.6545504763262779, "learning_rate": 9.604668677595093e-06, "loss": 0.2339, "step": 1929 }, { "epoch": 0.6445149440641176, "grad_norm": 0.4496430333660451, "learning_rate": 9.603910975510764e-06, "loss": 0.2205, "step": 1930 }, { "epoch": 0.6448488896309902, "grad_norm": 0.5824478060538069, "learning_rate": 9.603152577951476e-06, "loss": 0.2292, "step": 1931 }, { "epoch": 0.6451828351978628, "grad_norm": 0.4683634543655863, "learning_rate": 9.60239348503179e-06, "loss": 0.2103, "step": 1932 }, { "epoch": 0.6455167807647354, "grad_norm": 0.4654095627434126, "learning_rate": 9.601633696866376e-06, "loss": 0.2247, "step": 1933 }, { "epoch": 0.6458507263316079, "grad_norm": 0.503168780861353, "learning_rate": 9.60087321357001e-06, "loss": 0.2255, "step": 1934 }, { "epoch": 0.6461846718984805, "grad_norm": 0.5396694281989363, "learning_rate": 9.600112035257571e-06, "loss": 0.2222, "step": 1935 }, { "epoch": 0.6465186174653531, "grad_norm": 0.4407844703562947, "learning_rate": 9.599350162044045e-06, "loss": 0.2215, "step": 1936 }, { "epoch": 0.6468525630322257, "grad_norm": 0.4504311612267907, "learning_rate": 9.598587594044522e-06, "loss": 0.2241, "step": 1937 }, { "epoch": 0.6471865085990983, "grad_norm": 0.5078973609706072, "learning_rate": 9.597824331374196e-06, "loss": 0.2147, "step": 1938 }, { "epoch": 0.6475204541659709, "grad_norm": 0.5105786340988594, "learning_rate": 9.597060374148365e-06, "loss": 0.222, "step": 1939 }, { "epoch": 0.6478543997328435, "grad_norm": 0.4459757628273001, "learning_rate": 9.596295722482439e-06, "loss": 0.2175, "step": 1940 }, { "epoch": 0.6481883452997161, "grad_norm": 0.5146618161191947, "learning_rate": 9.595530376491924e-06, "loss": 0.2367, "step": 1941 }, { "epoch": 0.6485222908665887, "grad_norm": 0.4924035886938882, "learning_rate": 9.594764336292432e-06, "loss": 0.227, "step": 1942 }, { "epoch": 0.6488562364334614, "grad_norm": 0.4836297523018866, "learning_rate": 9.593997601999689e-06, "loss": 0.2184, "step": 1943 }, { "epoch": 0.649190182000334, "grad_norm": 0.4954472502507133, "learning_rate": 9.593230173729514e-06, "loss": 0.2289, "step": 1944 }, { "epoch": 0.6495241275672066, "grad_norm": 0.46892208039162586, "learning_rate": 9.592462051597838e-06, "loss": 0.2333, "step": 1945 }, { "epoch": 0.6498580731340792, "grad_norm": 0.4707733396458665, "learning_rate": 9.591693235720695e-06, "loss": 0.2179, "step": 1946 }, { "epoch": 0.6501920187009518, "grad_norm": 0.5021563108304271, "learning_rate": 9.590923726214224e-06, "loss": 0.2312, "step": 1947 }, { "epoch": 0.6505259642678244, "grad_norm": 0.5216409946552976, "learning_rate": 9.590153523194665e-06, "loss": 0.236, "step": 1948 }, { "epoch": 0.650859909834697, "grad_norm": 0.445367026551766, "learning_rate": 9.589382626778371e-06, "loss": 0.2039, "step": 1949 }, { "epoch": 0.6511938554015696, "grad_norm": 0.4719313967503157, "learning_rate": 9.588611037081793e-06, "loss": 0.2291, "step": 1950 }, { "epoch": 0.6515278009684421, "grad_norm": 0.5041618490835817, "learning_rate": 9.587838754221488e-06, "loss": 0.2186, "step": 1951 }, { "epoch": 0.6518617465353147, "grad_norm": 0.5656846698351666, "learning_rate": 9.587065778314119e-06, "loss": 0.2292, "step": 1952 }, { "epoch": 0.6521956921021873, "grad_norm": 0.48863066421780066, "learning_rate": 9.586292109476454e-06, "loss": 0.2107, "step": 1953 }, { "epoch": 0.6525296376690599, "grad_norm": 0.7874907012437945, "learning_rate": 9.585517747825363e-06, "loss": 0.2378, "step": 1954 }, { "epoch": 0.6528635832359325, "grad_norm": 0.5095487818003742, "learning_rate": 9.584742693477825e-06, "loss": 0.2279, "step": 1955 }, { "epoch": 0.6531975288028051, "grad_norm": 0.5643641986886433, "learning_rate": 9.58396694655092e-06, "loss": 0.2581, "step": 1956 }, { "epoch": 0.6535314743696777, "grad_norm": 0.4898371660965593, "learning_rate": 9.583190507161832e-06, "loss": 0.2203, "step": 1957 }, { "epoch": 0.6538654199365503, "grad_norm": 0.5921772486152574, "learning_rate": 9.582413375427852e-06, "loss": 0.2268, "step": 1958 }, { "epoch": 0.654199365503423, "grad_norm": 0.45116730167892133, "learning_rate": 9.581635551466376e-06, "loss": 0.2195, "step": 1959 }, { "epoch": 0.6545333110702956, "grad_norm": 0.47843623987223366, "learning_rate": 9.580857035394904e-06, "loss": 0.225, "step": 1960 }, { "epoch": 0.6548672566371682, "grad_norm": 0.5197754147716475, "learning_rate": 9.580077827331038e-06, "loss": 0.2368, "step": 1961 }, { "epoch": 0.6552012022040408, "grad_norm": 0.4170784963842771, "learning_rate": 9.579297927392488e-06, "loss": 0.2065, "step": 1962 }, { "epoch": 0.6555351477709134, "grad_norm": 0.5337746138339877, "learning_rate": 9.578517335697065e-06, "loss": 0.2313, "step": 1963 }, { "epoch": 0.655869093337786, "grad_norm": 0.49279896240854576, "learning_rate": 9.577736052362689e-06, "loss": 0.2268, "step": 1964 }, { "epoch": 0.6562030389046586, "grad_norm": 0.4302932121647986, "learning_rate": 9.576954077507381e-06, "loss": 0.2049, "step": 1965 }, { "epoch": 0.6565369844715312, "grad_norm": 0.5127555964144664, "learning_rate": 9.576171411249269e-06, "loss": 0.23, "step": 1966 }, { "epoch": 0.6568709300384037, "grad_norm": 0.5080459538592854, "learning_rate": 9.575388053706582e-06, "loss": 0.2317, "step": 1967 }, { "epoch": 0.6572048756052763, "grad_norm": 0.4745192813209085, "learning_rate": 9.574604004997654e-06, "loss": 0.2154, "step": 1968 }, { "epoch": 0.6575388211721489, "grad_norm": 0.5389965358570025, "learning_rate": 9.57381926524093e-06, "loss": 0.2433, "step": 1969 }, { "epoch": 0.6578727667390215, "grad_norm": 0.4956040255633844, "learning_rate": 9.57303383455495e-06, "loss": 0.2234, "step": 1970 }, { "epoch": 0.6582067123058941, "grad_norm": 0.48821226341668883, "learning_rate": 9.572247713058362e-06, "loss": 0.2225, "step": 1971 }, { "epoch": 0.6585406578727667, "grad_norm": 0.48960509199150504, "learning_rate": 9.571460900869923e-06, "loss": 0.2243, "step": 1972 }, { "epoch": 0.6588746034396393, "grad_norm": 0.5388856840457162, "learning_rate": 9.570673398108485e-06, "loss": 0.2191, "step": 1973 }, { "epoch": 0.6592085490065119, "grad_norm": 0.5247531776272966, "learning_rate": 9.569885204893015e-06, "loss": 0.2438, "step": 1974 }, { "epoch": 0.6595424945733845, "grad_norm": 0.4636309459992003, "learning_rate": 9.569096321342574e-06, "loss": 0.2205, "step": 1975 }, { "epoch": 0.6598764401402571, "grad_norm": 0.5647518684704627, "learning_rate": 9.568306747576335e-06, "loss": 0.2424, "step": 1976 }, { "epoch": 0.6602103857071298, "grad_norm": 0.4632908967911912, "learning_rate": 9.567516483713572e-06, "loss": 0.2183, "step": 1977 }, { "epoch": 0.6605443312740024, "grad_norm": 0.4806235309010455, "learning_rate": 9.566725529873664e-06, "loss": 0.2129, "step": 1978 }, { "epoch": 0.660878276840875, "grad_norm": 0.47812898299179163, "learning_rate": 9.565933886176093e-06, "loss": 0.2226, "step": 1979 }, { "epoch": 0.6612122224077476, "grad_norm": 0.4790050168212765, "learning_rate": 9.565141552740445e-06, "loss": 0.2127, "step": 1980 }, { "epoch": 0.6615461679746202, "grad_norm": 0.47301797232701465, "learning_rate": 9.564348529686413e-06, "loss": 0.2276, "step": 1981 }, { "epoch": 0.6618801135414928, "grad_norm": 0.47800269002423895, "learning_rate": 9.563554817133794e-06, "loss": 0.2276, "step": 1982 }, { "epoch": 0.6622140591083653, "grad_norm": 0.47697326231641335, "learning_rate": 9.562760415202483e-06, "loss": 0.2075, "step": 1983 }, { "epoch": 0.6625480046752379, "grad_norm": 0.45114047498178445, "learning_rate": 9.56196532401249e-06, "loss": 0.1993, "step": 1984 }, { "epoch": 0.6628819502421105, "grad_norm": 0.6767131550163791, "learning_rate": 9.561169543683917e-06, "loss": 0.2323, "step": 1985 }, { "epoch": 0.6632158958089831, "grad_norm": 0.46779536182015385, "learning_rate": 9.560373074336977e-06, "loss": 0.2219, "step": 1986 }, { "epoch": 0.6635498413758557, "grad_norm": 0.5689668029517431, "learning_rate": 9.55957591609199e-06, "loss": 0.2294, "step": 1987 }, { "epoch": 0.6638837869427283, "grad_norm": 0.5473530266696686, "learning_rate": 9.558778069069373e-06, "loss": 0.2187, "step": 1988 }, { "epoch": 0.6642177325096009, "grad_norm": 0.49049488313910705, "learning_rate": 9.55797953338965e-06, "loss": 0.2159, "step": 1989 }, { "epoch": 0.6645516780764735, "grad_norm": 0.6069581055887437, "learning_rate": 9.55718030917345e-06, "loss": 0.2333, "step": 1990 }, { "epoch": 0.6648856236433461, "grad_norm": 0.5169479730953396, "learning_rate": 9.556380396541507e-06, "loss": 0.2312, "step": 1991 }, { "epoch": 0.6652195692102187, "grad_norm": 0.5700537116713167, "learning_rate": 9.555579795614654e-06, "loss": 0.247, "step": 1992 }, { "epoch": 0.6655535147770913, "grad_norm": 0.6320463038320528, "learning_rate": 9.554778506513834e-06, "loss": 0.2578, "step": 1993 }, { "epoch": 0.665887460343964, "grad_norm": 0.6242767829904925, "learning_rate": 9.553976529360087e-06, "loss": 0.2453, "step": 1994 }, { "epoch": 0.6662214059108366, "grad_norm": 0.4657124319169294, "learning_rate": 9.553173864274567e-06, "loss": 0.2049, "step": 1995 }, { "epoch": 0.6665553514777092, "grad_norm": 0.6911587103761502, "learning_rate": 9.552370511378522e-06, "loss": 0.2255, "step": 1996 }, { "epoch": 0.6668892970445818, "grad_norm": 0.5680114058138519, "learning_rate": 9.551566470793308e-06, "loss": 0.2209, "step": 1997 }, { "epoch": 0.6672232426114544, "grad_norm": 0.4878976742541729, "learning_rate": 9.550761742640387e-06, "loss": 0.2254, "step": 1998 }, { "epoch": 0.667557188178327, "grad_norm": 0.5997891836824637, "learning_rate": 9.549956327041318e-06, "loss": 0.2323, "step": 1999 }, { "epoch": 0.6678911337451995, "grad_norm": 0.4561949621578282, "learning_rate": 9.549150224117776e-06, "loss": 0.2078, "step": 2000 }, { "epoch": 0.6682250793120721, "grad_norm": 0.4842209833350863, "learning_rate": 9.548343433991524e-06, "loss": 0.2096, "step": 2001 }, { "epoch": 0.6685590248789447, "grad_norm": 0.5321439301519651, "learning_rate": 9.547535956784445e-06, "loss": 0.2498, "step": 2002 }, { "epoch": 0.6688929704458173, "grad_norm": 0.5345226511901285, "learning_rate": 9.546727792618512e-06, "loss": 0.2149, "step": 2003 }, { "epoch": 0.6692269160126899, "grad_norm": 0.5948754984549891, "learning_rate": 9.545918941615811e-06, "loss": 0.2268, "step": 2004 }, { "epoch": 0.6695608615795625, "grad_norm": 0.5249741131297062, "learning_rate": 9.545109403898527e-06, "loss": 0.2332, "step": 2005 }, { "epoch": 0.6698948071464351, "grad_norm": 0.448439782885968, "learning_rate": 9.544299179588952e-06, "loss": 0.2197, "step": 2006 }, { "epoch": 0.6702287527133077, "grad_norm": 0.5320016792358742, "learning_rate": 9.543488268809478e-06, "loss": 0.2217, "step": 2007 }, { "epoch": 0.6705626982801803, "grad_norm": 0.5033575128331692, "learning_rate": 9.542676671682601e-06, "loss": 0.2294, "step": 2008 }, { "epoch": 0.6708966438470529, "grad_norm": 0.5338603551058775, "learning_rate": 9.541864388330926e-06, "loss": 0.2359, "step": 2009 }, { "epoch": 0.6712305894139255, "grad_norm": 0.4960343876895645, "learning_rate": 9.541051418877156e-06, "loss": 0.23, "step": 2010 }, { "epoch": 0.6715645349807982, "grad_norm": 0.5019233736697615, "learning_rate": 9.5402377634441e-06, "loss": 0.2184, "step": 2011 }, { "epoch": 0.6718984805476708, "grad_norm": 0.5116242184595458, "learning_rate": 9.539423422154672e-06, "loss": 0.2057, "step": 2012 }, { "epoch": 0.6722324261145434, "grad_norm": 0.4581151499736462, "learning_rate": 9.538608395131884e-06, "loss": 0.219, "step": 2013 }, { "epoch": 0.672566371681416, "grad_norm": 0.5263573724429383, "learning_rate": 9.537792682498859e-06, "loss": 0.2345, "step": 2014 }, { "epoch": 0.6729003172482886, "grad_norm": 0.5486956203361241, "learning_rate": 9.536976284378818e-06, "loss": 0.2365, "step": 2015 }, { "epoch": 0.6732342628151611, "grad_norm": 0.48552662228072885, "learning_rate": 9.536159200895088e-06, "loss": 0.2472, "step": 2016 }, { "epoch": 0.6735682083820337, "grad_norm": 0.4807730164835203, "learning_rate": 9.535341432171098e-06, "loss": 0.2158, "step": 2017 }, { "epoch": 0.6739021539489063, "grad_norm": 0.5082241482128, "learning_rate": 9.534522978330384e-06, "loss": 0.2273, "step": 2018 }, { "epoch": 0.6742360995157789, "grad_norm": 0.5247052847237837, "learning_rate": 9.533703839496581e-06, "loss": 0.2122, "step": 2019 }, { "epoch": 0.6745700450826515, "grad_norm": 0.4996722623100839, "learning_rate": 9.532884015793432e-06, "loss": 0.2144, "step": 2020 }, { "epoch": 0.6749039906495241, "grad_norm": 0.4864919237009365, "learning_rate": 9.532063507344777e-06, "loss": 0.2246, "step": 2021 }, { "epoch": 0.6752379362163967, "grad_norm": 0.5055062622420079, "learning_rate": 9.53124231427457e-06, "loss": 0.2329, "step": 2022 }, { "epoch": 0.6755718817832693, "grad_norm": 0.5127546113587057, "learning_rate": 9.530420436706853e-06, "loss": 0.225, "step": 2023 }, { "epoch": 0.6759058273501419, "grad_norm": 0.5199847442243274, "learning_rate": 9.529597874765788e-06, "loss": 0.2224, "step": 2024 }, { "epoch": 0.6762397729170145, "grad_norm": 0.5520828526591032, "learning_rate": 9.528774628575628e-06, "loss": 0.2329, "step": 2025 }, { "epoch": 0.6765737184838871, "grad_norm": 0.4690377580828556, "learning_rate": 9.527950698260737e-06, "loss": 0.2162, "step": 2026 }, { "epoch": 0.6769076640507597, "grad_norm": 0.7257663653006456, "learning_rate": 9.527126083945578e-06, "loss": 0.2423, "step": 2027 }, { "epoch": 0.6772416096176324, "grad_norm": 0.592781750687045, "learning_rate": 9.526300785754719e-06, "loss": 0.2328, "step": 2028 }, { "epoch": 0.677575555184505, "grad_norm": 0.6355902972521256, "learning_rate": 9.525474803812831e-06, "loss": 0.2281, "step": 2029 }, { "epoch": 0.6779095007513776, "grad_norm": 0.5201719503575588, "learning_rate": 9.524648138244688e-06, "loss": 0.2376, "step": 2030 }, { "epoch": 0.6782434463182502, "grad_norm": 0.5216385214418539, "learning_rate": 9.523820789175167e-06, "loss": 0.2329, "step": 2031 }, { "epoch": 0.6785773918851227, "grad_norm": 0.4860538015687635, "learning_rate": 9.52299275672925e-06, "loss": 0.2239, "step": 2032 }, { "epoch": 0.6789113374519953, "grad_norm": 0.5065750638589931, "learning_rate": 9.52216404103202e-06, "loss": 0.2204, "step": 2033 }, { "epoch": 0.6792452830188679, "grad_norm": 0.5465674778090972, "learning_rate": 9.521334642208666e-06, "loss": 0.2149, "step": 2034 }, { "epoch": 0.6795792285857405, "grad_norm": 0.530148424285913, "learning_rate": 9.520504560384476e-06, "loss": 0.2291, "step": 2035 }, { "epoch": 0.6799131741526131, "grad_norm": 0.439419070166103, "learning_rate": 9.519673795684845e-06, "loss": 0.2246, "step": 2036 }, { "epoch": 0.6802471197194857, "grad_norm": 0.5110159600833178, "learning_rate": 9.518842348235271e-06, "loss": 0.2156, "step": 2037 }, { "epoch": 0.6805810652863583, "grad_norm": 0.5080312664365709, "learning_rate": 9.51801021816135e-06, "loss": 0.2282, "step": 2038 }, { "epoch": 0.6809150108532309, "grad_norm": 0.4405972025652121, "learning_rate": 9.51717740558879e-06, "loss": 0.2287, "step": 2039 }, { "epoch": 0.6812489564201035, "grad_norm": 0.4846966375847971, "learning_rate": 9.516343910643395e-06, "loss": 0.2304, "step": 2040 }, { "epoch": 0.6815829019869761, "grad_norm": 0.4458254730841546, "learning_rate": 9.515509733451074e-06, "loss": 0.2266, "step": 2041 }, { "epoch": 0.6819168475538487, "grad_norm": 0.5081979817054557, "learning_rate": 9.514674874137838e-06, "loss": 0.2212, "step": 2042 }, { "epoch": 0.6822507931207213, "grad_norm": 0.44506787175264934, "learning_rate": 9.513839332829806e-06, "loss": 0.217, "step": 2043 }, { "epoch": 0.682584738687594, "grad_norm": 0.4711214808235664, "learning_rate": 9.513003109653192e-06, "loss": 0.214, "step": 2044 }, { "epoch": 0.6829186842544666, "grad_norm": 0.4690699744568473, "learning_rate": 9.512166204734322e-06, "loss": 0.2265, "step": 2045 }, { "epoch": 0.6832526298213392, "grad_norm": 0.47218778632793423, "learning_rate": 9.511328618199614e-06, "loss": 0.222, "step": 2046 }, { "epoch": 0.6835865753882118, "grad_norm": 0.5360979263013558, "learning_rate": 9.510490350175602e-06, "loss": 0.2571, "step": 2047 }, { "epoch": 0.6839205209550844, "grad_norm": 0.4708991844847955, "learning_rate": 9.50965140078891e-06, "loss": 0.2347, "step": 2048 }, { "epoch": 0.6842544665219569, "grad_norm": 0.4819450196925043, "learning_rate": 9.508811770166277e-06, "loss": 0.2322, "step": 2049 }, { "epoch": 0.6845884120888295, "grad_norm": 0.45770077260472547, "learning_rate": 9.507971458434538e-06, "loss": 0.2217, "step": 2050 }, { "epoch": 0.6849223576557021, "grad_norm": 0.4682650978661971, "learning_rate": 9.507130465720628e-06, "loss": 0.2258, "step": 2051 }, { "epoch": 0.6852563032225747, "grad_norm": 0.4225589686793817, "learning_rate": 9.506288792151592e-06, "loss": 0.2082, "step": 2052 }, { "epoch": 0.6855902487894473, "grad_norm": 0.43243099858850814, "learning_rate": 9.505446437854574e-06, "loss": 0.2192, "step": 2053 }, { "epoch": 0.6859241943563199, "grad_norm": 0.48545888804912246, "learning_rate": 9.504603402956823e-06, "loss": 0.232, "step": 2054 }, { "epoch": 0.6862581399231925, "grad_norm": 0.4784145024105088, "learning_rate": 9.503759687585686e-06, "loss": 0.2257, "step": 2055 }, { "epoch": 0.6865920854900651, "grad_norm": 0.47809150718111393, "learning_rate": 9.50291529186862e-06, "loss": 0.2264, "step": 2056 }, { "epoch": 0.6869260310569377, "grad_norm": 0.45443749767645863, "learning_rate": 9.502070215933177e-06, "loss": 0.2247, "step": 2057 }, { "epoch": 0.6872599766238103, "grad_norm": 0.5001191908128457, "learning_rate": 9.501224459907019e-06, "loss": 0.229, "step": 2058 }, { "epoch": 0.6875939221906829, "grad_norm": 0.4682122413633237, "learning_rate": 9.500378023917906e-06, "loss": 0.2206, "step": 2059 }, { "epoch": 0.6879278677575555, "grad_norm": 0.48110266017390535, "learning_rate": 9.499530908093702e-06, "loss": 0.2237, "step": 2060 }, { "epoch": 0.6882618133244282, "grad_norm": 0.4758868687878399, "learning_rate": 9.498683112562374e-06, "loss": 0.2292, "step": 2061 }, { "epoch": 0.6885957588913008, "grad_norm": 0.4398159757318568, "learning_rate": 9.497834637451992e-06, "loss": 0.2194, "step": 2062 }, { "epoch": 0.6889297044581734, "grad_norm": 0.4806638031099439, "learning_rate": 9.496985482890728e-06, "loss": 0.2204, "step": 2063 }, { "epoch": 0.689263650025046, "grad_norm": 0.7573298106478715, "learning_rate": 9.496135649006857e-06, "loss": 0.2451, "step": 2064 }, { "epoch": 0.6895975955919185, "grad_norm": 0.4398493907855471, "learning_rate": 9.495285135928755e-06, "loss": 0.2357, "step": 2065 }, { "epoch": 0.6899315411587911, "grad_norm": 0.4722521081195024, "learning_rate": 9.494433943784901e-06, "loss": 0.2284, "step": 2066 }, { "epoch": 0.6902654867256637, "grad_norm": 0.44358844114458357, "learning_rate": 9.493582072703883e-06, "loss": 0.2094, "step": 2067 }, { "epoch": 0.6905994322925363, "grad_norm": 0.5388979510188426, "learning_rate": 9.49272952281438e-06, "loss": 0.2498, "step": 2068 }, { "epoch": 0.6909333778594089, "grad_norm": 0.4401449144477547, "learning_rate": 9.491876294245184e-06, "loss": 0.2288, "step": 2069 }, { "epoch": 0.6912673234262815, "grad_norm": 0.5258022574749537, "learning_rate": 9.491022387125183e-06, "loss": 0.2227, "step": 2070 }, { "epoch": 0.6916012689931541, "grad_norm": 0.484024323700997, "learning_rate": 9.490167801583373e-06, "loss": 0.2139, "step": 2071 }, { "epoch": 0.6919352145600267, "grad_norm": 0.506106087923025, "learning_rate": 9.489312537748843e-06, "loss": 0.2431, "step": 2072 }, { "epoch": 0.6922691601268993, "grad_norm": 0.4585978711248216, "learning_rate": 9.488456595750795e-06, "loss": 0.2339, "step": 2073 }, { "epoch": 0.6926031056937719, "grad_norm": 0.6031424608851983, "learning_rate": 9.487599975718529e-06, "loss": 0.2433, "step": 2074 }, { "epoch": 0.6929370512606445, "grad_norm": 0.4556413453192436, "learning_rate": 9.486742677781446e-06, "loss": 0.2189, "step": 2075 }, { "epoch": 0.6932709968275171, "grad_norm": 0.687156099033192, "learning_rate": 9.485884702069053e-06, "loss": 0.2343, "step": 2076 }, { "epoch": 0.6936049423943897, "grad_norm": 0.4659994646594298, "learning_rate": 9.485026048710957e-06, "loss": 0.2295, "step": 2077 }, { "epoch": 0.6939388879612624, "grad_norm": 0.5014688431544414, "learning_rate": 9.484166717836865e-06, "loss": 0.234, "step": 2078 }, { "epoch": 0.694272833528135, "grad_norm": 0.5334695156505862, "learning_rate": 9.48330670957659e-06, "loss": 0.2192, "step": 2079 }, { "epoch": 0.6946067790950076, "grad_norm": 0.5223706657750462, "learning_rate": 9.48244602406005e-06, "loss": 0.2254, "step": 2080 }, { "epoch": 0.6949407246618801, "grad_norm": 0.5463595012524518, "learning_rate": 9.481584661417258e-06, "loss": 0.2254, "step": 2081 }, { "epoch": 0.6952746702287527, "grad_norm": 0.48561380690299644, "learning_rate": 9.480722621778334e-06, "loss": 0.2277, "step": 2082 }, { "epoch": 0.6956086157956253, "grad_norm": 0.4492219269705461, "learning_rate": 9.479859905273498e-06, "loss": 0.2236, "step": 2083 }, { "epoch": 0.6959425613624979, "grad_norm": 0.5504216916036948, "learning_rate": 9.478996512033074e-06, "loss": 0.2467, "step": 2084 }, { "epoch": 0.6962765069293705, "grad_norm": 0.5111070325627567, "learning_rate": 9.478132442187491e-06, "loss": 0.2112, "step": 2085 }, { "epoch": 0.6966104524962431, "grad_norm": 0.47186041559580094, "learning_rate": 9.477267695867275e-06, "loss": 0.2229, "step": 2086 }, { "epoch": 0.6969443980631157, "grad_norm": 0.4488571924469654, "learning_rate": 9.476402273203052e-06, "loss": 0.2225, "step": 2087 }, { "epoch": 0.6972783436299883, "grad_norm": 0.5787574166594714, "learning_rate": 9.47553617432556e-06, "loss": 0.2335, "step": 2088 }, { "epoch": 0.6976122891968609, "grad_norm": 0.49866311539288444, "learning_rate": 9.47466939936563e-06, "loss": 0.2237, "step": 2089 }, { "epoch": 0.6979462347637335, "grad_norm": 0.7548546423555135, "learning_rate": 9.473801948454199e-06, "loss": 0.23, "step": 2090 }, { "epoch": 0.6982801803306061, "grad_norm": 0.49007999916257805, "learning_rate": 9.472933821722307e-06, "loss": 0.2166, "step": 2091 }, { "epoch": 0.6986141258974787, "grad_norm": 0.47535667454395897, "learning_rate": 9.472065019301095e-06, "loss": 0.2184, "step": 2092 }, { "epoch": 0.6989480714643513, "grad_norm": 0.5211167728844137, "learning_rate": 9.471195541321805e-06, "loss": 0.2176, "step": 2093 }, { "epoch": 0.699282017031224, "grad_norm": 0.5003555535014772, "learning_rate": 9.470325387915782e-06, "loss": 0.2353, "step": 2094 }, { "epoch": 0.6996159625980966, "grad_norm": 0.48728850394662204, "learning_rate": 9.469454559214473e-06, "loss": 0.2278, "step": 2095 }, { "epoch": 0.6999499081649692, "grad_norm": 0.5088758728966357, "learning_rate": 9.468583055349425e-06, "loss": 0.22, "step": 2096 }, { "epoch": 0.7002838537318417, "grad_norm": 0.4368236017414324, "learning_rate": 9.467710876452292e-06, "loss": 0.2233, "step": 2097 }, { "epoch": 0.7006177992987143, "grad_norm": 0.5077021290184639, "learning_rate": 9.466838022654826e-06, "loss": 0.2352, "step": 2098 }, { "epoch": 0.7009517448655869, "grad_norm": 0.5513449938345952, "learning_rate": 9.465964494088879e-06, "loss": 0.2327, "step": 2099 }, { "epoch": 0.7012856904324595, "grad_norm": 0.4771101109092869, "learning_rate": 9.465090290886411e-06, "loss": 0.2305, "step": 2100 }, { "epoch": 0.7016196359993321, "grad_norm": 0.5415328425413375, "learning_rate": 9.464215413179483e-06, "loss": 0.2424, "step": 2101 }, { "epoch": 0.7019535815662047, "grad_norm": 0.49226725525209947, "learning_rate": 9.46333986110025e-06, "loss": 0.2095, "step": 2102 }, { "epoch": 0.7022875271330773, "grad_norm": 0.47931978869160313, "learning_rate": 9.462463634780977e-06, "loss": 0.2221, "step": 2103 }, { "epoch": 0.7026214726999499, "grad_norm": 0.4655415271549972, "learning_rate": 9.461586734354027e-06, "loss": 0.2201, "step": 2104 }, { "epoch": 0.7029554182668225, "grad_norm": 0.5668783713703968, "learning_rate": 9.460709159951867e-06, "loss": 0.2385, "step": 2105 }, { "epoch": 0.7032893638336951, "grad_norm": 0.4442504668426384, "learning_rate": 9.459830911707066e-06, "loss": 0.2174, "step": 2106 }, { "epoch": 0.7036233094005677, "grad_norm": 0.5148802971653738, "learning_rate": 9.458951989752295e-06, "loss": 0.2298, "step": 2107 }, { "epoch": 0.7039572549674403, "grad_norm": 0.4961289354471199, "learning_rate": 9.458072394220321e-06, "loss": 0.2365, "step": 2108 }, { "epoch": 0.7042912005343129, "grad_norm": 0.39859284746010964, "learning_rate": 9.457192125244021e-06, "loss": 0.2026, "step": 2109 }, { "epoch": 0.7046251461011855, "grad_norm": 0.43207299712712405, "learning_rate": 9.456311182956368e-06, "loss": 0.2191, "step": 2110 }, { "epoch": 0.7049590916680581, "grad_norm": 0.4752148941522198, "learning_rate": 9.45542956749044e-06, "loss": 0.2325, "step": 2111 }, { "epoch": 0.7052930372349308, "grad_norm": 0.5010644723261528, "learning_rate": 9.454547278979415e-06, "loss": 0.2191, "step": 2112 }, { "epoch": 0.7056269828018034, "grad_norm": 0.40752873703821485, "learning_rate": 9.453664317556572e-06, "loss": 0.2106, "step": 2113 }, { "epoch": 0.7059609283686759, "grad_norm": 0.5576876661906841, "learning_rate": 9.452780683355295e-06, "loss": 0.222, "step": 2114 }, { "epoch": 0.7062948739355485, "grad_norm": 0.432810708164213, "learning_rate": 9.451896376509065e-06, "loss": 0.2113, "step": 2115 }, { "epoch": 0.7066288195024211, "grad_norm": 0.4482096027677475, "learning_rate": 9.451011397151469e-06, "loss": 0.2209, "step": 2116 }, { "epoch": 0.7069627650692937, "grad_norm": 0.4366216644067192, "learning_rate": 9.450125745416191e-06, "loss": 0.2107, "step": 2117 }, { "epoch": 0.7072967106361663, "grad_norm": 0.4948001961193805, "learning_rate": 9.44923942143702e-06, "loss": 0.2346, "step": 2118 }, { "epoch": 0.7076306562030389, "grad_norm": 0.8894874375807659, "learning_rate": 9.448352425347848e-06, "loss": 0.2458, "step": 2119 }, { "epoch": 0.7079646017699115, "grad_norm": 0.5299843319037437, "learning_rate": 9.447464757282665e-06, "loss": 0.2367, "step": 2120 }, { "epoch": 0.7082985473367841, "grad_norm": 0.4727096807635542, "learning_rate": 9.44657641737556e-06, "loss": 0.2327, "step": 2121 }, { "epoch": 0.7086324929036567, "grad_norm": 0.4600930804291522, "learning_rate": 9.445687405760735e-06, "loss": 0.2226, "step": 2122 }, { "epoch": 0.7089664384705293, "grad_norm": 0.4332929425486284, "learning_rate": 9.444797722572479e-06, "loss": 0.2107, "step": 2123 }, { "epoch": 0.7093003840374019, "grad_norm": 0.731912468149352, "learning_rate": 9.44390736794519e-06, "loss": 0.238, "step": 2124 }, { "epoch": 0.7096343296042745, "grad_norm": 0.5081533596669653, "learning_rate": 9.443016342013369e-06, "loss": 0.2351, "step": 2125 }, { "epoch": 0.7099682751711471, "grad_norm": 0.5148768327746467, "learning_rate": 9.442124644911614e-06, "loss": 0.2228, "step": 2126 }, { "epoch": 0.7103022207380197, "grad_norm": 0.4889377936506926, "learning_rate": 9.441232276774629e-06, "loss": 0.2185, "step": 2127 }, { "epoch": 0.7106361663048923, "grad_norm": 0.48641943614807776, "learning_rate": 9.440339237737213e-06, "loss": 0.2201, "step": 2128 }, { "epoch": 0.710970111871765, "grad_norm": 0.5242633602293272, "learning_rate": 9.439445527934272e-06, "loss": 0.2176, "step": 2129 }, { "epoch": 0.7113040574386374, "grad_norm": 0.5762365105646677, "learning_rate": 9.438551147500812e-06, "loss": 0.2338, "step": 2130 }, { "epoch": 0.7116380030055101, "grad_norm": 0.48698836807254486, "learning_rate": 9.437656096571938e-06, "loss": 0.2287, "step": 2131 }, { "epoch": 0.7119719485723827, "grad_norm": 0.5525740892373583, "learning_rate": 9.436760375282858e-06, "loss": 0.2395, "step": 2132 }, { "epoch": 0.7123058941392553, "grad_norm": 0.6121087031250623, "learning_rate": 9.435863983768884e-06, "loss": 0.2327, "step": 2133 }, { "epoch": 0.7126398397061279, "grad_norm": 0.5393868118238357, "learning_rate": 9.434966922165424e-06, "loss": 0.242, "step": 2134 }, { "epoch": 0.7129737852730005, "grad_norm": 0.5338938325124174, "learning_rate": 9.43406919060799e-06, "loss": 0.2094, "step": 2135 }, { "epoch": 0.7133077308398731, "grad_norm": 0.4834589668720248, "learning_rate": 9.433170789232196e-06, "loss": 0.207, "step": 2136 }, { "epoch": 0.7136416764067457, "grad_norm": 0.636163814834721, "learning_rate": 9.432271718173756e-06, "loss": 0.2373, "step": 2137 }, { "epoch": 0.7139756219736183, "grad_norm": 0.5423717816603074, "learning_rate": 9.431371977568483e-06, "loss": 0.2398, "step": 2138 }, { "epoch": 0.7143095675404909, "grad_norm": 0.6024062328191949, "learning_rate": 9.430471567552295e-06, "loss": 0.2215, "step": 2139 }, { "epoch": 0.7146435131073635, "grad_norm": 0.5675673827510521, "learning_rate": 9.42957048826121e-06, "loss": 0.2296, "step": 2140 }, { "epoch": 0.7149774586742361, "grad_norm": 0.47911769123994113, "learning_rate": 9.428668739831349e-06, "loss": 0.2133, "step": 2141 }, { "epoch": 0.7153114042411087, "grad_norm": 0.5032255216977924, "learning_rate": 9.427766322398926e-06, "loss": 0.228, "step": 2142 }, { "epoch": 0.7156453498079813, "grad_norm": 0.4892738328424283, "learning_rate": 9.426863236100266e-06, "loss": 0.2315, "step": 2143 }, { "epoch": 0.7159792953748539, "grad_norm": 0.5488139396113335, "learning_rate": 9.425959481071787e-06, "loss": 0.2283, "step": 2144 }, { "epoch": 0.7163132409417265, "grad_norm": 0.46181208491623815, "learning_rate": 9.425055057450017e-06, "loss": 0.2199, "step": 2145 }, { "epoch": 0.716647186508599, "grad_norm": 0.47958048701662137, "learning_rate": 9.424149965371576e-06, "loss": 0.2377, "step": 2146 }, { "epoch": 0.7169811320754716, "grad_norm": 0.533708758771216, "learning_rate": 9.423244204973191e-06, "loss": 0.2263, "step": 2147 }, { "epoch": 0.7173150776423443, "grad_norm": 0.47032392487456925, "learning_rate": 9.422337776391686e-06, "loss": 0.2139, "step": 2148 }, { "epoch": 0.7176490232092169, "grad_norm": 0.4681306879448958, "learning_rate": 9.421430679763989e-06, "loss": 0.2217, "step": 2149 }, { "epoch": 0.7179829687760895, "grad_norm": 0.4611059292110921, "learning_rate": 9.420522915227129e-06, "loss": 0.2262, "step": 2150 }, { "epoch": 0.7183169143429621, "grad_norm": 0.4649061259338331, "learning_rate": 9.419614482918229e-06, "loss": 0.2194, "step": 2151 }, { "epoch": 0.7186508599098347, "grad_norm": 0.49767814980354536, "learning_rate": 9.418705382974524e-06, "loss": 0.2165, "step": 2152 }, { "epoch": 0.7189848054767073, "grad_norm": 0.4876893264124481, "learning_rate": 9.417795615533343e-06, "loss": 0.2306, "step": 2153 }, { "epoch": 0.7193187510435799, "grad_norm": 0.5573835824314209, "learning_rate": 9.416885180732115e-06, "loss": 0.241, "step": 2154 }, { "epoch": 0.7196526966104525, "grad_norm": 0.42980907750226544, "learning_rate": 9.415974078708375e-06, "loss": 0.2142, "step": 2155 }, { "epoch": 0.7199866421773251, "grad_norm": 0.4581280120482188, "learning_rate": 9.415062309599751e-06, "loss": 0.225, "step": 2156 }, { "epoch": 0.7203205877441977, "grad_norm": 0.4793267456929462, "learning_rate": 9.414149873543983e-06, "loss": 0.2216, "step": 2157 }, { "epoch": 0.7206545333110703, "grad_norm": 0.5360592011567692, "learning_rate": 9.4132367706789e-06, "loss": 0.207, "step": 2158 }, { "epoch": 0.7209884788779429, "grad_norm": 0.422519463252475, "learning_rate": 9.412323001142438e-06, "loss": 0.2048, "step": 2159 }, { "epoch": 0.7213224244448155, "grad_norm": 0.47418431358564955, "learning_rate": 9.411408565072635e-06, "loss": 0.2353, "step": 2160 }, { "epoch": 0.7216563700116881, "grad_norm": 0.43728181066282573, "learning_rate": 9.410493462607623e-06, "loss": 0.2095, "step": 2161 }, { "epoch": 0.7219903155785607, "grad_norm": 0.5017096018356688, "learning_rate": 9.409577693885642e-06, "loss": 0.2326, "step": 2162 }, { "epoch": 0.7223242611454332, "grad_norm": 0.45202500297613973, "learning_rate": 9.408661259045032e-06, "loss": 0.215, "step": 2163 }, { "epoch": 0.7226582067123059, "grad_norm": 0.49163901550448014, "learning_rate": 9.407744158224227e-06, "loss": 0.2149, "step": 2164 }, { "epoch": 0.7229921522791785, "grad_norm": 0.49708508765871556, "learning_rate": 9.406826391561767e-06, "loss": 0.228, "step": 2165 }, { "epoch": 0.7233260978460511, "grad_norm": 0.5530402316677213, "learning_rate": 9.405907959196293e-06, "loss": 0.2534, "step": 2166 }, { "epoch": 0.7236600434129237, "grad_norm": 0.4590331097109294, "learning_rate": 9.404988861266543e-06, "loss": 0.2127, "step": 2167 }, { "epoch": 0.7239939889797963, "grad_norm": 0.5061377928038171, "learning_rate": 9.404069097911358e-06, "loss": 0.2181, "step": 2168 }, { "epoch": 0.7243279345466689, "grad_norm": 0.4485152013068261, "learning_rate": 9.40314866926968e-06, "loss": 0.2113, "step": 2169 }, { "epoch": 0.7246618801135415, "grad_norm": 0.4916280062892611, "learning_rate": 9.402227575480549e-06, "loss": 0.2363, "step": 2170 }, { "epoch": 0.7249958256804141, "grad_norm": 0.4369101484429515, "learning_rate": 9.401305816683111e-06, "loss": 0.2022, "step": 2171 }, { "epoch": 0.7253297712472867, "grad_norm": 0.4506814138302595, "learning_rate": 9.400383393016604e-06, "loss": 0.2273, "step": 2172 }, { "epoch": 0.7256637168141593, "grad_norm": 0.4383138721900733, "learning_rate": 9.39946030462037e-06, "loss": 0.2058, "step": 2173 }, { "epoch": 0.7259976623810319, "grad_norm": 0.4733775066633754, "learning_rate": 9.39853655163386e-06, "loss": 0.2033, "step": 2174 }, { "epoch": 0.7263316079479045, "grad_norm": 0.44987005012925274, "learning_rate": 9.39761213419661e-06, "loss": 0.2093, "step": 2175 }, { "epoch": 0.7266655535147771, "grad_norm": 0.44081375935995787, "learning_rate": 9.396687052448267e-06, "loss": 0.2151, "step": 2176 }, { "epoch": 0.7269994990816497, "grad_norm": 0.4668970976585449, "learning_rate": 9.395761306528576e-06, "loss": 0.2111, "step": 2177 }, { "epoch": 0.7273334446485223, "grad_norm": 0.4762785397500771, "learning_rate": 9.39483489657738e-06, "loss": 0.212, "step": 2178 }, { "epoch": 0.7276673902153948, "grad_norm": 0.447677116890413, "learning_rate": 9.393907822734627e-06, "loss": 0.22, "step": 2179 }, { "epoch": 0.7280013357822674, "grad_norm": 0.535442818777305, "learning_rate": 9.39298008514036e-06, "loss": 0.2372, "step": 2180 }, { "epoch": 0.72833528134914, "grad_norm": 0.5188386933388953, "learning_rate": 9.392051683934726e-06, "loss": 0.2268, "step": 2181 }, { "epoch": 0.7286692269160127, "grad_norm": 0.4727820750476286, "learning_rate": 9.39112261925797e-06, "loss": 0.2179, "step": 2182 }, { "epoch": 0.7290031724828853, "grad_norm": 0.4854734035330415, "learning_rate": 9.390192891250439e-06, "loss": 0.2406, "step": 2183 }, { "epoch": 0.7293371180497579, "grad_norm": 1.4538057381250347, "learning_rate": 9.389262500052578e-06, "loss": 0.226, "step": 2184 }, { "epoch": 0.7296710636166305, "grad_norm": 0.511345482598057, "learning_rate": 9.388331445804935e-06, "loss": 0.2238, "step": 2185 }, { "epoch": 0.7300050091835031, "grad_norm": 0.49329394070154936, "learning_rate": 9.387399728648156e-06, "loss": 0.2164, "step": 2186 }, { "epoch": 0.7303389547503757, "grad_norm": 0.47261545953275097, "learning_rate": 9.386467348722989e-06, "loss": 0.2369, "step": 2187 }, { "epoch": 0.7306729003172483, "grad_norm": 0.4970024372138262, "learning_rate": 9.385534306170279e-06, "loss": 0.2337, "step": 2188 }, { "epoch": 0.7310068458841209, "grad_norm": 0.4407037790976595, "learning_rate": 9.384600601130973e-06, "loss": 0.2181, "step": 2189 }, { "epoch": 0.7313407914509935, "grad_norm": 0.4612359667383596, "learning_rate": 9.383666233746121e-06, "loss": 0.213, "step": 2190 }, { "epoch": 0.7316747370178661, "grad_norm": 0.5683151991367129, "learning_rate": 9.382731204156869e-06, "loss": 0.2403, "step": 2191 }, { "epoch": 0.7320086825847387, "grad_norm": 0.6075346410739748, "learning_rate": 9.381795512504461e-06, "loss": 0.2251, "step": 2192 }, { "epoch": 0.7323426281516113, "grad_norm": 0.7259845103570428, "learning_rate": 9.380859158930249e-06, "loss": 0.2379, "step": 2193 }, { "epoch": 0.7326765737184839, "grad_norm": 0.47741648545629145, "learning_rate": 9.379922143575678e-06, "loss": 0.2191, "step": 2194 }, { "epoch": 0.7330105192853564, "grad_norm": 0.45871484382317196, "learning_rate": 9.378984466582294e-06, "loss": 0.207, "step": 2195 }, { "epoch": 0.733344464852229, "grad_norm": 0.4698346774760905, "learning_rate": 9.378046128091748e-06, "loss": 0.2156, "step": 2196 }, { "epoch": 0.7336784104191016, "grad_norm": 0.517374409193277, "learning_rate": 9.377107128245782e-06, "loss": 0.2168, "step": 2197 }, { "epoch": 0.7340123559859743, "grad_norm": 0.4960261055253891, "learning_rate": 9.376167467186246e-06, "loss": 0.2302, "step": 2198 }, { "epoch": 0.7343463015528469, "grad_norm": 0.5756540347478746, "learning_rate": 9.375227145055085e-06, "loss": 0.2132, "step": 2199 }, { "epoch": 0.7346802471197195, "grad_norm": 0.5091682769372302, "learning_rate": 9.374286161994351e-06, "loss": 0.2282, "step": 2200 }, { "epoch": 0.7350141926865921, "grad_norm": 0.5067877827964193, "learning_rate": 9.373344518146184e-06, "loss": 0.2359, "step": 2201 }, { "epoch": 0.7353481382534647, "grad_norm": 0.5423740124921783, "learning_rate": 9.372402213652833e-06, "loss": 0.2315, "step": 2202 }, { "epoch": 0.7356820838203373, "grad_norm": 0.453426255813076, "learning_rate": 9.371459248656645e-06, "loss": 0.2123, "step": 2203 }, { "epoch": 0.7360160293872099, "grad_norm": 0.5465347054793437, "learning_rate": 9.370515623300066e-06, "loss": 0.2203, "step": 2204 }, { "epoch": 0.7363499749540825, "grad_norm": 0.5653821901957052, "learning_rate": 9.369571337725638e-06, "loss": 0.2308, "step": 2205 }, { "epoch": 0.7366839205209551, "grad_norm": 0.5246706912753366, "learning_rate": 9.368626392076013e-06, "loss": 0.2288, "step": 2206 }, { "epoch": 0.7370178660878277, "grad_norm": 0.42226917655168017, "learning_rate": 9.367680786493929e-06, "loss": 0.2245, "step": 2207 }, { "epoch": 0.7373518116547003, "grad_norm": 0.5148652520612048, "learning_rate": 9.366734521122236e-06, "loss": 0.2121, "step": 2208 }, { "epoch": 0.7376857572215729, "grad_norm": 0.49636706629870225, "learning_rate": 9.365787596103877e-06, "loss": 0.2422, "step": 2209 }, { "epoch": 0.7380197027884455, "grad_norm": 0.5272484506883985, "learning_rate": 9.364840011581896e-06, "loss": 0.2361, "step": 2210 }, { "epoch": 0.7383536483553181, "grad_norm": 0.48013110860491137, "learning_rate": 9.363891767699437e-06, "loss": 0.2289, "step": 2211 }, { "epoch": 0.7386875939221906, "grad_norm": 0.4464703818992004, "learning_rate": 9.362942864599746e-06, "loss": 0.2189, "step": 2212 }, { "epoch": 0.7390215394890632, "grad_norm": 0.49475924194998067, "learning_rate": 9.36199330242616e-06, "loss": 0.2301, "step": 2213 }, { "epoch": 0.7393554850559358, "grad_norm": 0.46063739425578615, "learning_rate": 9.361043081322125e-06, "loss": 0.2268, "step": 2214 }, { "epoch": 0.7396894306228085, "grad_norm": 0.4741690736245642, "learning_rate": 9.360092201431186e-06, "loss": 0.2333, "step": 2215 }, { "epoch": 0.7400233761896811, "grad_norm": 0.5013668916419637, "learning_rate": 9.359140662896978e-06, "loss": 0.2202, "step": 2216 }, { "epoch": 0.7403573217565537, "grad_norm": 0.4308948496107637, "learning_rate": 9.358188465863247e-06, "loss": 0.2086, "step": 2217 }, { "epoch": 0.7406912673234263, "grad_norm": 0.4175975145931895, "learning_rate": 9.357235610473833e-06, "loss": 0.2091, "step": 2218 }, { "epoch": 0.7410252128902989, "grad_norm": 0.4655604424669319, "learning_rate": 9.356282096872673e-06, "loss": 0.23, "step": 2219 }, { "epoch": 0.7413591584571715, "grad_norm": 0.4740711739925384, "learning_rate": 9.355327925203811e-06, "loss": 0.2176, "step": 2220 }, { "epoch": 0.7416931040240441, "grad_norm": 0.4748639281311392, "learning_rate": 9.354373095611383e-06, "loss": 0.2359, "step": 2221 }, { "epoch": 0.7420270495909167, "grad_norm": 0.4368036977894943, "learning_rate": 9.353417608239627e-06, "loss": 0.2171, "step": 2222 }, { "epoch": 0.7423609951577893, "grad_norm": 0.4666842076480527, "learning_rate": 9.352461463232882e-06, "loss": 0.2324, "step": 2223 }, { "epoch": 0.7426949407246619, "grad_norm": 0.48937622343211834, "learning_rate": 9.351504660735583e-06, "loss": 0.2247, "step": 2224 }, { "epoch": 0.7430288862915345, "grad_norm": 0.4711175442547071, "learning_rate": 9.350547200892271e-06, "loss": 0.2225, "step": 2225 }, { "epoch": 0.7433628318584071, "grad_norm": 0.47189505898784784, "learning_rate": 9.349589083847577e-06, "loss": 0.2336, "step": 2226 }, { "epoch": 0.7436967774252797, "grad_norm": 0.49998037587574656, "learning_rate": 9.348630309746236e-06, "loss": 0.242, "step": 2227 }, { "epoch": 0.7440307229921522, "grad_norm": 0.4389783954854127, "learning_rate": 9.347670878733084e-06, "loss": 0.2179, "step": 2228 }, { "epoch": 0.7443646685590248, "grad_norm": 0.46169101406158664, "learning_rate": 9.346710790953053e-06, "loss": 0.2259, "step": 2229 }, { "epoch": 0.7446986141258974, "grad_norm": 0.43050129747366644, "learning_rate": 9.345750046551177e-06, "loss": 0.211, "step": 2230 }, { "epoch": 0.74503255969277, "grad_norm": 0.4296786556030227, "learning_rate": 9.344788645672585e-06, "loss": 0.2237, "step": 2231 }, { "epoch": 0.7453665052596427, "grad_norm": 0.5686148097924658, "learning_rate": 9.343826588462513e-06, "loss": 0.2344, "step": 2232 }, { "epoch": 0.7457004508265153, "grad_norm": 0.4233257630785193, "learning_rate": 9.342863875066284e-06, "loss": 0.2123, "step": 2233 }, { "epoch": 0.7460343963933879, "grad_norm": 0.437653238005159, "learning_rate": 9.341900505629333e-06, "loss": 0.2154, "step": 2234 }, { "epoch": 0.7463683419602605, "grad_norm": 0.45698025308598117, "learning_rate": 9.340936480297187e-06, "loss": 0.2295, "step": 2235 }, { "epoch": 0.7467022875271331, "grad_norm": 0.4688987813684971, "learning_rate": 9.339971799215472e-06, "loss": 0.226, "step": 2236 }, { "epoch": 0.7470362330940057, "grad_norm": 0.4760421598172396, "learning_rate": 9.339006462529916e-06, "loss": 0.2334, "step": 2237 }, { "epoch": 0.7473701786608783, "grad_norm": 0.4192485301306118, "learning_rate": 9.338040470386344e-06, "loss": 0.2122, "step": 2238 }, { "epoch": 0.7477041242277509, "grad_norm": 0.45176630982507726, "learning_rate": 9.337073822930681e-06, "loss": 0.2387, "step": 2239 }, { "epoch": 0.7480380697946235, "grad_norm": 0.4486924188323239, "learning_rate": 9.336106520308948e-06, "loss": 0.1943, "step": 2240 }, { "epoch": 0.7483720153614961, "grad_norm": 0.4646813003017934, "learning_rate": 9.335138562667267e-06, "loss": 0.2308, "step": 2241 }, { "epoch": 0.7487059609283687, "grad_norm": 0.46477838126704796, "learning_rate": 9.334169950151866e-06, "loss": 0.2234, "step": 2242 }, { "epoch": 0.7490399064952413, "grad_norm": 0.5008667979340363, "learning_rate": 9.333200682909059e-06, "loss": 0.2236, "step": 2243 }, { "epoch": 0.7493738520621138, "grad_norm": 0.48975993721972527, "learning_rate": 9.332230761085265e-06, "loss": 0.2269, "step": 2244 }, { "epoch": 0.7497077976289864, "grad_norm": 0.43081722939141126, "learning_rate": 9.331260184827006e-06, "loss": 0.2189, "step": 2245 }, { "epoch": 0.750041743195859, "grad_norm": 0.4627663439303731, "learning_rate": 9.330288954280898e-06, "loss": 0.2105, "step": 2246 }, { "epoch": 0.7503756887627316, "grad_norm": 0.5521700146824564, "learning_rate": 9.329317069593654e-06, "loss": 0.2273, "step": 2247 }, { "epoch": 0.7507096343296042, "grad_norm": 0.518812087453654, "learning_rate": 9.328344530912093e-06, "loss": 0.2518, "step": 2248 }, { "epoch": 0.7510435798964769, "grad_norm": 0.5221575866002434, "learning_rate": 9.327371338383124e-06, "loss": 0.2458, "step": 2249 }, { "epoch": 0.7513775254633495, "grad_norm": 0.47799468782489984, "learning_rate": 9.326397492153762e-06, "loss": 0.2268, "step": 2250 }, { "epoch": 0.7517114710302221, "grad_norm": 0.8073887593343466, "learning_rate": 9.325422992371117e-06, "loss": 0.2399, "step": 2251 }, { "epoch": 0.7520454165970947, "grad_norm": 0.5105733251703505, "learning_rate": 9.324447839182397e-06, "loss": 0.2248, "step": 2252 }, { "epoch": 0.7523793621639673, "grad_norm": 0.5480491125068885, "learning_rate": 9.323472032734915e-06, "loss": 0.2464, "step": 2253 }, { "epoch": 0.7527133077308399, "grad_norm": 0.48116501479820156, "learning_rate": 9.322495573176073e-06, "loss": 0.226, "step": 2254 }, { "epoch": 0.7530472532977125, "grad_norm": 0.4210936718940355, "learning_rate": 9.321518460653381e-06, "loss": 0.2125, "step": 2255 }, { "epoch": 0.7533811988645851, "grad_norm": 0.48326083685217036, "learning_rate": 9.32054069531444e-06, "loss": 0.2262, "step": 2256 }, { "epoch": 0.7537151444314577, "grad_norm": 0.9265740345041341, "learning_rate": 9.319562277306955e-06, "loss": 0.2241, "step": 2257 }, { "epoch": 0.7540490899983303, "grad_norm": 0.45048145099511155, "learning_rate": 9.318583206778726e-06, "loss": 0.216, "step": 2258 }, { "epoch": 0.7543830355652029, "grad_norm": 0.578061264472536, "learning_rate": 9.317603483877654e-06, "loss": 0.2202, "step": 2259 }, { "epoch": 0.7547169811320755, "grad_norm": 0.4307754856617888, "learning_rate": 9.316623108751739e-06, "loss": 0.2132, "step": 2260 }, { "epoch": 0.755050926698948, "grad_norm": 0.486997614653093, "learning_rate": 9.315642081549074e-06, "loss": 0.2318, "step": 2261 }, { "epoch": 0.7553848722658206, "grad_norm": 0.5251137171531048, "learning_rate": 9.31466040241786e-06, "loss": 0.2263, "step": 2262 }, { "epoch": 0.7557188178326932, "grad_norm": 0.4664038869726607, "learning_rate": 9.313678071506388e-06, "loss": 0.2252, "step": 2263 }, { "epoch": 0.7560527633995658, "grad_norm": 0.4349295324939173, "learning_rate": 9.31269508896305e-06, "loss": 0.2191, "step": 2264 }, { "epoch": 0.7563867089664384, "grad_norm": 0.5160541914139507, "learning_rate": 9.31171145493634e-06, "loss": 0.2304, "step": 2265 }, { "epoch": 0.756720654533311, "grad_norm": 0.48044683366908697, "learning_rate": 9.310727169574847e-06, "loss": 0.2313, "step": 2266 }, { "epoch": 0.7570546001001837, "grad_norm": 0.44340511774161134, "learning_rate": 9.309742233027258e-06, "loss": 0.2178, "step": 2267 }, { "epoch": 0.7573885456670563, "grad_norm": 0.4401405973485924, "learning_rate": 9.308756645442356e-06, "loss": 0.225, "step": 2268 }, { "epoch": 0.7577224912339289, "grad_norm": 0.47966482086980605, "learning_rate": 9.307770406969032e-06, "loss": 0.23, "step": 2269 }, { "epoch": 0.7580564368008015, "grad_norm": 0.43564783263314855, "learning_rate": 9.306783517756264e-06, "loss": 0.2131, "step": 2270 }, { "epoch": 0.7583903823676741, "grad_norm": 0.45364809207582024, "learning_rate": 9.305795977953134e-06, "loss": 0.2276, "step": 2271 }, { "epoch": 0.7587243279345467, "grad_norm": 0.5198386100281404, "learning_rate": 9.304807787708825e-06, "loss": 0.2259, "step": 2272 }, { "epoch": 0.7590582735014193, "grad_norm": 0.46342370795905347, "learning_rate": 9.303818947172611e-06, "loss": 0.2179, "step": 2273 }, { "epoch": 0.7593922190682919, "grad_norm": 0.4731835740602336, "learning_rate": 9.302829456493868e-06, "loss": 0.2333, "step": 2274 }, { "epoch": 0.7597261646351645, "grad_norm": 0.4216435732249339, "learning_rate": 9.301839315822072e-06, "loss": 0.2196, "step": 2275 }, { "epoch": 0.7600601102020371, "grad_norm": 0.4499998322284951, "learning_rate": 9.300848525306797e-06, "loss": 0.2208, "step": 2276 }, { "epoch": 0.7603940557689096, "grad_norm": 0.4504516858457847, "learning_rate": 9.299857085097708e-06, "loss": 0.2258, "step": 2277 }, { "epoch": 0.7607280013357822, "grad_norm": 0.5090935883272614, "learning_rate": 9.298864995344579e-06, "loss": 0.2151, "step": 2278 }, { "epoch": 0.7610619469026548, "grad_norm": 0.485130607846563, "learning_rate": 9.297872256197276e-06, "loss": 0.231, "step": 2279 }, { "epoch": 0.7613958924695274, "grad_norm": 0.3951173987462743, "learning_rate": 9.296878867805762e-06, "loss": 0.2119, "step": 2280 }, { "epoch": 0.7617298380364, "grad_norm": 0.42718739210129064, "learning_rate": 9.2958848303201e-06, "loss": 0.2259, "step": 2281 }, { "epoch": 0.7620637836032726, "grad_norm": 0.40877281318732694, "learning_rate": 9.294890143890451e-06, "loss": 0.217, "step": 2282 }, { "epoch": 0.7623977291701453, "grad_norm": 0.44623672624981436, "learning_rate": 9.293894808667077e-06, "loss": 0.2216, "step": 2283 }, { "epoch": 0.7627316747370179, "grad_norm": 0.5236867845873939, "learning_rate": 9.292898824800333e-06, "loss": 0.2561, "step": 2284 }, { "epoch": 0.7630656203038905, "grad_norm": 0.46275656758815503, "learning_rate": 9.291902192440673e-06, "loss": 0.2191, "step": 2285 }, { "epoch": 0.7633995658707631, "grad_norm": 0.44986183401594326, "learning_rate": 9.290904911738653e-06, "loss": 0.2196, "step": 2286 }, { "epoch": 0.7637335114376357, "grad_norm": 0.47312386838896714, "learning_rate": 9.289906982844923e-06, "loss": 0.2311, "step": 2287 }, { "epoch": 0.7640674570045083, "grad_norm": 0.4955825415082542, "learning_rate": 9.288908405910228e-06, "loss": 0.2187, "step": 2288 }, { "epoch": 0.7644014025713809, "grad_norm": 0.4529969289979694, "learning_rate": 9.287909181085421e-06, "loss": 0.2211, "step": 2289 }, { "epoch": 0.7647353481382535, "grad_norm": 0.4576506572434161, "learning_rate": 9.286909308521443e-06, "loss": 0.2095, "step": 2290 }, { "epoch": 0.7650692937051261, "grad_norm": 0.4508998678478078, "learning_rate": 9.285908788369336e-06, "loss": 0.2287, "step": 2291 }, { "epoch": 0.7654032392719987, "grad_norm": 0.45715461051048567, "learning_rate": 9.284907620780244e-06, "loss": 0.2042, "step": 2292 }, { "epoch": 0.7657371848388712, "grad_norm": 0.4180093603700725, "learning_rate": 9.2839058059054e-06, "loss": 0.2125, "step": 2293 }, { "epoch": 0.7660711304057438, "grad_norm": 0.43093767255958393, "learning_rate": 9.282903343896144e-06, "loss": 0.2238, "step": 2294 }, { "epoch": 0.7664050759726164, "grad_norm": 0.5483814770622409, "learning_rate": 9.281900234903908e-06, "loss": 0.2316, "step": 2295 }, { "epoch": 0.766739021539489, "grad_norm": 0.43771157015292517, "learning_rate": 9.280896479080224e-06, "loss": 0.2138, "step": 2296 }, { "epoch": 0.7670729671063616, "grad_norm": 0.4574659911330641, "learning_rate": 9.27989207657672e-06, "loss": 0.226, "step": 2297 }, { "epoch": 0.7674069126732342, "grad_norm": 0.42221388094567286, "learning_rate": 9.278887027545125e-06, "loss": 0.2209, "step": 2298 }, { "epoch": 0.7677408582401068, "grad_norm": 0.5150950388506087, "learning_rate": 9.277881332137261e-06, "loss": 0.2349, "step": 2299 }, { "epoch": 0.7680748038069795, "grad_norm": 0.48245297149523503, "learning_rate": 9.276874990505053e-06, "loss": 0.23, "step": 2300 }, { "epoch": 0.7684087493738521, "grad_norm": 0.548683322704859, "learning_rate": 9.27586800280052e-06, "loss": 0.2135, "step": 2301 }, { "epoch": 0.7687426949407247, "grad_norm": 0.4994150747196339, "learning_rate": 9.274860369175775e-06, "loss": 0.2303, "step": 2302 }, { "epoch": 0.7690766405075973, "grad_norm": 0.5378040458428275, "learning_rate": 9.27385208978304e-06, "loss": 0.2461, "step": 2303 }, { "epoch": 0.7694105860744699, "grad_norm": 0.45033554985182755, "learning_rate": 9.272843164774622e-06, "loss": 0.2131, "step": 2304 }, { "epoch": 0.7697445316413425, "grad_norm": 0.46757095199070065, "learning_rate": 9.27183359430293e-06, "loss": 0.2232, "step": 2305 }, { "epoch": 0.7700784772082151, "grad_norm": 0.39788086669142664, "learning_rate": 9.270823378520478e-06, "loss": 0.1993, "step": 2306 }, { "epoch": 0.7704124227750877, "grad_norm": 0.5432380637027175, "learning_rate": 9.269812517579867e-06, "loss": 0.2409, "step": 2307 }, { "epoch": 0.7707463683419603, "grad_norm": 0.5044905567763108, "learning_rate": 9.268801011633799e-06, "loss": 0.2263, "step": 2308 }, { "epoch": 0.7710803139088329, "grad_norm": 0.4430694585924386, "learning_rate": 9.267788860835076e-06, "loss": 0.214, "step": 2309 }, { "epoch": 0.7714142594757054, "grad_norm": 0.5203303582541469, "learning_rate": 9.266776065336593e-06, "loss": 0.246, "step": 2310 }, { "epoch": 0.771748205042578, "grad_norm": 0.42657503652675616, "learning_rate": 9.265762625291346e-06, "loss": 0.2232, "step": 2311 }, { "epoch": 0.7720821506094506, "grad_norm": 0.4594336261365237, "learning_rate": 9.264748540852427e-06, "loss": 0.2359, "step": 2312 }, { "epoch": 0.7724160961763232, "grad_norm": 0.46882401712288807, "learning_rate": 9.263733812173023e-06, "loss": 0.2083, "step": 2313 }, { "epoch": 0.7727500417431958, "grad_norm": 0.5028204129392393, "learning_rate": 9.262718439406425e-06, "loss": 0.2256, "step": 2314 }, { "epoch": 0.7730839873100684, "grad_norm": 0.49213998524386604, "learning_rate": 9.261702422706014e-06, "loss": 0.2436, "step": 2315 }, { "epoch": 0.773417932876941, "grad_norm": 0.43231620796153136, "learning_rate": 9.260685762225273e-06, "loss": 0.233, "step": 2316 }, { "epoch": 0.7737518784438137, "grad_norm": 0.4434494887987508, "learning_rate": 9.25966845811778e-06, "loss": 0.2301, "step": 2317 }, { "epoch": 0.7740858240106863, "grad_norm": 0.4370287988017438, "learning_rate": 9.258650510537208e-06, "loss": 0.1969, "step": 2318 }, { "epoch": 0.7744197695775589, "grad_norm": 0.47263084992349913, "learning_rate": 9.257631919637333e-06, "loss": 0.2157, "step": 2319 }, { "epoch": 0.7747537151444315, "grad_norm": 0.48339688653267926, "learning_rate": 9.256612685572027e-06, "loss": 0.2201, "step": 2320 }, { "epoch": 0.7750876607113041, "grad_norm": 0.5110737778673129, "learning_rate": 9.255592808495254e-06, "loss": 0.2405, "step": 2321 }, { "epoch": 0.7754216062781767, "grad_norm": 0.45441898821062743, "learning_rate": 9.254572288561077e-06, "loss": 0.2124, "step": 2322 }, { "epoch": 0.7757555518450493, "grad_norm": 0.4588311491582138, "learning_rate": 9.253551125923662e-06, "loss": 0.2201, "step": 2323 }, { "epoch": 0.7760894974119219, "grad_norm": 0.5177735740891841, "learning_rate": 9.252529320737265e-06, "loss": 0.2429, "step": 2324 }, { "epoch": 0.7764234429787945, "grad_norm": 0.5022245852427304, "learning_rate": 9.251506873156242e-06, "loss": 0.2188, "step": 2325 }, { "epoch": 0.776757388545667, "grad_norm": 0.4537744070437574, "learning_rate": 9.250483783335046e-06, "loss": 0.2106, "step": 2326 }, { "epoch": 0.7770913341125396, "grad_norm": 0.6192354840406287, "learning_rate": 9.249460051428226e-06, "loss": 0.222, "step": 2327 }, { "epoch": 0.7774252796794122, "grad_norm": 0.44129467986532117, "learning_rate": 9.24843567759043e-06, "loss": 0.206, "step": 2328 }, { "epoch": 0.7777592252462848, "grad_norm": 0.4770974165507659, "learning_rate": 9.247410661976402e-06, "loss": 0.2309, "step": 2329 }, { "epoch": 0.7780931708131574, "grad_norm": 0.48716858018195214, "learning_rate": 9.246385004740981e-06, "loss": 0.2411, "step": 2330 }, { "epoch": 0.77842711638003, "grad_norm": 0.5243113960671338, "learning_rate": 9.245358706039105e-06, "loss": 0.234, "step": 2331 }, { "epoch": 0.7787610619469026, "grad_norm": 0.41801045846912316, "learning_rate": 9.244331766025812e-06, "loss": 0.2239, "step": 2332 }, { "epoch": 0.7790950075137753, "grad_norm": 0.46394404487526614, "learning_rate": 9.243304184856226e-06, "loss": 0.2148, "step": 2333 }, { "epoch": 0.7794289530806479, "grad_norm": 0.4277415195601737, "learning_rate": 9.242275962685584e-06, "loss": 0.212, "step": 2334 }, { "epoch": 0.7797628986475205, "grad_norm": 0.5185901645423077, "learning_rate": 9.241247099669202e-06, "loss": 0.245, "step": 2335 }, { "epoch": 0.7800968442143931, "grad_norm": 0.44824621316470326, "learning_rate": 9.24021759596251e-06, "loss": 0.2255, "step": 2336 }, { "epoch": 0.7804307897812657, "grad_norm": 0.4812361814852991, "learning_rate": 9.239187451721021e-06, "loss": 0.2352, "step": 2337 }, { "epoch": 0.7807647353481383, "grad_norm": 0.4830179394244599, "learning_rate": 9.238156667100354e-06, "loss": 0.2291, "step": 2338 }, { "epoch": 0.7810986809150109, "grad_norm": 0.44869641118909687, "learning_rate": 9.237125242256219e-06, "loss": 0.2121, "step": 2339 }, { "epoch": 0.7814326264818835, "grad_norm": 0.4183573673022089, "learning_rate": 9.236093177344427e-06, "loss": 0.2165, "step": 2340 }, { "epoch": 0.7817665720487561, "grad_norm": 0.44158451252351555, "learning_rate": 9.23506047252088e-06, "loss": 0.2193, "step": 2341 }, { "epoch": 0.7821005176156286, "grad_norm": 0.4988366633490991, "learning_rate": 9.234027127941585e-06, "loss": 0.23, "step": 2342 }, { "epoch": 0.7824344631825012, "grad_norm": 0.47542697794018896, "learning_rate": 9.232993143762637e-06, "loss": 0.2205, "step": 2343 }, { "epoch": 0.7827684087493738, "grad_norm": 0.4307912169479183, "learning_rate": 9.231958520140232e-06, "loss": 0.2193, "step": 2344 }, { "epoch": 0.7831023543162464, "grad_norm": 0.4686420725641428, "learning_rate": 9.230923257230663e-06, "loss": 0.2207, "step": 2345 }, { "epoch": 0.783436299883119, "grad_norm": 0.7093627249782076, "learning_rate": 9.22988735519032e-06, "loss": 0.211, "step": 2346 }, { "epoch": 0.7837702454499916, "grad_norm": 0.4449712797327876, "learning_rate": 9.228850814175684e-06, "loss": 0.2203, "step": 2347 }, { "epoch": 0.7841041910168642, "grad_norm": 0.4224110582979993, "learning_rate": 9.22781363434334e-06, "loss": 0.2164, "step": 2348 }, { "epoch": 0.7844381365837368, "grad_norm": 0.4453644666775102, "learning_rate": 9.226775815849969e-06, "loss": 0.2184, "step": 2349 }, { "epoch": 0.7847720821506095, "grad_norm": 0.47882639139667954, "learning_rate": 9.225737358852339e-06, "loss": 0.2259, "step": 2350 }, { "epoch": 0.7851060277174821, "grad_norm": 0.4614230944404661, "learning_rate": 9.224698263507326e-06, "loss": 0.2437, "step": 2351 }, { "epoch": 0.7854399732843547, "grad_norm": 0.46240361961793186, "learning_rate": 9.223658529971896e-06, "loss": 0.2159, "step": 2352 }, { "epoch": 0.7857739188512273, "grad_norm": 0.4326362693521754, "learning_rate": 9.222618158403111e-06, "loss": 0.2111, "step": 2353 }, { "epoch": 0.7861078644180999, "grad_norm": 0.4294245191748408, "learning_rate": 9.221577148958137e-06, "loss": 0.2179, "step": 2354 }, { "epoch": 0.7864418099849725, "grad_norm": 0.47430465166835356, "learning_rate": 9.220535501794224e-06, "loss": 0.2148, "step": 2355 }, { "epoch": 0.7867757555518451, "grad_norm": 0.40903100342304954, "learning_rate": 9.21949321706873e-06, "loss": 0.2111, "step": 2356 }, { "epoch": 0.7871097011187177, "grad_norm": 0.4871994961911349, "learning_rate": 9.218450294939103e-06, "loss": 0.2329, "step": 2357 }, { "epoch": 0.7874436466855903, "grad_norm": 0.4802859353134735, "learning_rate": 9.217406735562887e-06, "loss": 0.2148, "step": 2358 }, { "epoch": 0.7877775922524628, "grad_norm": 0.4829538927703034, "learning_rate": 9.216362539097726e-06, "loss": 0.2109, "step": 2359 }, { "epoch": 0.7881115378193354, "grad_norm": 0.46957086759196354, "learning_rate": 9.215317705701356e-06, "loss": 0.2158, "step": 2360 }, { "epoch": 0.788445483386208, "grad_norm": 0.4415507689008121, "learning_rate": 9.214272235531615e-06, "loss": 0.22, "step": 2361 }, { "epoch": 0.7887794289530806, "grad_norm": 0.4784953125828448, "learning_rate": 9.213226128746431e-06, "loss": 0.2112, "step": 2362 }, { "epoch": 0.7891133745199532, "grad_norm": 0.4748308372416173, "learning_rate": 9.21217938550383e-06, "loss": 0.2267, "step": 2363 }, { "epoch": 0.7894473200868258, "grad_norm": 0.42786072729815783, "learning_rate": 9.211132005961936e-06, "loss": 0.2182, "step": 2364 }, { "epoch": 0.7897812656536984, "grad_norm": 0.46379390215134986, "learning_rate": 9.210083990278968e-06, "loss": 0.2159, "step": 2365 }, { "epoch": 0.790115211220571, "grad_norm": 0.434672422878081, "learning_rate": 9.209035338613242e-06, "loss": 0.2176, "step": 2366 }, { "epoch": 0.7904491567874437, "grad_norm": 0.4712397323015266, "learning_rate": 9.207986051123167e-06, "loss": 0.2169, "step": 2367 }, { "epoch": 0.7907831023543163, "grad_norm": 0.4643471410872069, "learning_rate": 9.206936127967254e-06, "loss": 0.2231, "step": 2368 }, { "epoch": 0.7911170479211889, "grad_norm": 0.4832625761947329, "learning_rate": 9.205885569304103e-06, "loss": 0.2346, "step": 2369 }, { "epoch": 0.7914509934880615, "grad_norm": 0.5120653341402449, "learning_rate": 9.204834375292413e-06, "loss": 0.2248, "step": 2370 }, { "epoch": 0.7917849390549341, "grad_norm": 0.4595895504768996, "learning_rate": 9.20378254609098e-06, "loss": 0.212, "step": 2371 }, { "epoch": 0.7921188846218067, "grad_norm": 0.4464850753967196, "learning_rate": 9.202730081858697e-06, "loss": 0.2216, "step": 2372 }, { "epoch": 0.7924528301886793, "grad_norm": 0.4465507871460174, "learning_rate": 9.201676982754549e-06, "loss": 0.2175, "step": 2373 }, { "epoch": 0.7927867757555519, "grad_norm": 0.7186089123657715, "learning_rate": 9.200623248937619e-06, "loss": 0.2217, "step": 2374 }, { "epoch": 0.7931207213224244, "grad_norm": 0.48065369107795447, "learning_rate": 9.199568880567085e-06, "loss": 0.2346, "step": 2375 }, { "epoch": 0.793454666889297, "grad_norm": 0.46022889936302425, "learning_rate": 9.198513877802226e-06, "loss": 0.2185, "step": 2376 }, { "epoch": 0.7937886124561696, "grad_norm": 0.48578689235218536, "learning_rate": 9.19745824080241e-06, "loss": 0.2325, "step": 2377 }, { "epoch": 0.7941225580230422, "grad_norm": 0.4201772420752296, "learning_rate": 9.196401969727101e-06, "loss": 0.2219, "step": 2378 }, { "epoch": 0.7944565035899148, "grad_norm": 0.5148645151535987, "learning_rate": 9.195345064735865e-06, "loss": 0.2187, "step": 2379 }, { "epoch": 0.7947904491567874, "grad_norm": 0.4793318370535307, "learning_rate": 9.194287525988358e-06, "loss": 0.2206, "step": 2380 }, { "epoch": 0.79512439472366, "grad_norm": 0.44812143862309217, "learning_rate": 9.193229353644336e-06, "loss": 0.2066, "step": 2381 }, { "epoch": 0.7954583402905326, "grad_norm": 0.48309758118279134, "learning_rate": 9.192170547863644e-06, "loss": 0.2272, "step": 2382 }, { "epoch": 0.7957922858574052, "grad_norm": 0.4955575076751468, "learning_rate": 9.191111108806228e-06, "loss": 0.2152, "step": 2383 }, { "epoch": 0.7961262314242779, "grad_norm": 0.5483835825302927, "learning_rate": 9.190051036632133e-06, "loss": 0.2155, "step": 2384 }, { "epoch": 0.7964601769911505, "grad_norm": 0.43256677860584014, "learning_rate": 9.188990331501493e-06, "loss": 0.2172, "step": 2385 }, { "epoch": 0.7967941225580231, "grad_norm": 0.47777695666620773, "learning_rate": 9.187928993574537e-06, "loss": 0.218, "step": 2386 }, { "epoch": 0.7971280681248957, "grad_norm": 0.4857124920043452, "learning_rate": 9.186867023011598e-06, "loss": 0.2269, "step": 2387 }, { "epoch": 0.7974620136917683, "grad_norm": 0.46097558597069427, "learning_rate": 9.185804419973096e-06, "loss": 0.2262, "step": 2388 }, { "epoch": 0.7977959592586409, "grad_norm": 0.45451151055369127, "learning_rate": 9.18474118461955e-06, "loss": 0.2283, "step": 2389 }, { "epoch": 0.7981299048255135, "grad_norm": 0.45002263688863814, "learning_rate": 9.183677317111572e-06, "loss": 0.2264, "step": 2390 }, { "epoch": 0.798463850392386, "grad_norm": 0.4351929657301828, "learning_rate": 9.182612817609877e-06, "loss": 0.2162, "step": 2391 }, { "epoch": 0.7987977959592586, "grad_norm": 0.4829125840864148, "learning_rate": 9.181547686275266e-06, "loss": 0.2209, "step": 2392 }, { "epoch": 0.7991317415261312, "grad_norm": 0.4617414472574589, "learning_rate": 9.180481923268641e-06, "loss": 0.2317, "step": 2393 }, { "epoch": 0.7994656870930038, "grad_norm": 0.4715677223409943, "learning_rate": 9.179415528750998e-06, "loss": 0.2324, "step": 2394 }, { "epoch": 0.7997996326598764, "grad_norm": 0.445753677713308, "learning_rate": 9.178348502883428e-06, "loss": 0.2086, "step": 2395 }, { "epoch": 0.800133578226749, "grad_norm": 0.4739163161049927, "learning_rate": 9.17728084582712e-06, "loss": 0.2299, "step": 2396 }, { "epoch": 0.8004675237936216, "grad_norm": 0.4444338702698356, "learning_rate": 9.176212557743352e-06, "loss": 0.2179, "step": 2397 }, { "epoch": 0.8008014693604942, "grad_norm": 0.5521009272187888, "learning_rate": 9.175143638793504e-06, "loss": 0.2332, "step": 2398 }, { "epoch": 0.8011354149273668, "grad_norm": 0.4529565320102755, "learning_rate": 9.174074089139048e-06, "loss": 0.2188, "step": 2399 }, { "epoch": 0.8014693604942394, "grad_norm": 0.4655241748444513, "learning_rate": 9.173003908941555e-06, "loss": 0.231, "step": 2400 }, { "epoch": 0.801803306061112, "grad_norm": 0.46546430337015227, "learning_rate": 9.171933098362685e-06, "loss": 0.2245, "step": 2401 }, { "epoch": 0.8021372516279847, "grad_norm": 0.4291232236423889, "learning_rate": 9.170861657564197e-06, "loss": 0.1983, "step": 2402 }, { "epoch": 0.8024711971948573, "grad_norm": 0.47959703405449, "learning_rate": 9.169789586707947e-06, "loss": 0.2225, "step": 2403 }, { "epoch": 0.8028051427617299, "grad_norm": 0.4705338430027695, "learning_rate": 9.16871688595588e-06, "loss": 0.2174, "step": 2404 }, { "epoch": 0.8031390883286025, "grad_norm": 0.4542925186202159, "learning_rate": 9.167643555470044e-06, "loss": 0.221, "step": 2405 }, { "epoch": 0.8034730338954751, "grad_norm": 0.5053346494758011, "learning_rate": 9.166569595412576e-06, "loss": 0.2214, "step": 2406 }, { "epoch": 0.8038069794623477, "grad_norm": 0.518802140242786, "learning_rate": 9.16549500594571e-06, "loss": 0.203, "step": 2407 }, { "epoch": 0.8041409250292202, "grad_norm": 0.4830630204730151, "learning_rate": 9.164419787231778e-06, "loss": 0.2334, "step": 2408 }, { "epoch": 0.8044748705960928, "grad_norm": 0.5058947808268289, "learning_rate": 9.163343939433202e-06, "loss": 0.2303, "step": 2409 }, { "epoch": 0.8048088161629654, "grad_norm": 0.5481350305947928, "learning_rate": 9.162267462712502e-06, "loss": 0.227, "step": 2410 }, { "epoch": 0.805142761729838, "grad_norm": 0.40346017951021734, "learning_rate": 9.161190357232292e-06, "loss": 0.2078, "step": 2411 }, { "epoch": 0.8054767072967106, "grad_norm": 0.4012932942269892, "learning_rate": 9.160112623155282e-06, "loss": 0.2099, "step": 2412 }, { "epoch": 0.8058106528635832, "grad_norm": 0.41692385024259015, "learning_rate": 9.159034260644277e-06, "loss": 0.2084, "step": 2413 }, { "epoch": 0.8061445984304558, "grad_norm": 0.5463744356480256, "learning_rate": 9.157955269862176e-06, "loss": 0.2286, "step": 2414 }, { "epoch": 0.8064785439973284, "grad_norm": 0.49138767972709996, "learning_rate": 9.156875650971974e-06, "loss": 0.2223, "step": 2415 }, { "epoch": 0.806812489564201, "grad_norm": 0.46139748034889444, "learning_rate": 9.155795404136757e-06, "loss": 0.2143, "step": 2416 }, { "epoch": 0.8071464351310736, "grad_norm": 0.41387074512700345, "learning_rate": 9.154714529519715e-06, "loss": 0.2094, "step": 2417 }, { "epoch": 0.8074803806979463, "grad_norm": 0.4711974425999777, "learning_rate": 9.15363302728412e-06, "loss": 0.2252, "step": 2418 }, { "epoch": 0.8078143262648189, "grad_norm": 0.43803798636984687, "learning_rate": 9.15255089759335e-06, "loss": 0.2097, "step": 2419 }, { "epoch": 0.8081482718316915, "grad_norm": 0.4315283690951102, "learning_rate": 9.151468140610872e-06, "loss": 0.2029, "step": 2420 }, { "epoch": 0.8084822173985641, "grad_norm": 0.43314565060866766, "learning_rate": 9.150384756500249e-06, "loss": 0.2129, "step": 2421 }, { "epoch": 0.8088161629654367, "grad_norm": 0.4992267726729122, "learning_rate": 9.14930074542514e-06, "loss": 0.2276, "step": 2422 }, { "epoch": 0.8091501085323093, "grad_norm": 0.8734400531790079, "learning_rate": 9.148216107549297e-06, "loss": 0.2276, "step": 2423 }, { "epoch": 0.8094840540991818, "grad_norm": 0.4618645547769956, "learning_rate": 9.147130843036567e-06, "loss": 0.2216, "step": 2424 }, { "epoch": 0.8098179996660544, "grad_norm": 0.503229948469187, "learning_rate": 9.146044952050891e-06, "loss": 0.2266, "step": 2425 }, { "epoch": 0.810151945232927, "grad_norm": 0.48234269217820075, "learning_rate": 9.144958434756308e-06, "loss": 0.246, "step": 2426 }, { "epoch": 0.8104858907997996, "grad_norm": 0.45799158973946075, "learning_rate": 9.14387129131695e-06, "loss": 0.2241, "step": 2427 }, { "epoch": 0.8108198363666722, "grad_norm": 0.47473856709077045, "learning_rate": 9.142783521897038e-06, "loss": 0.2315, "step": 2428 }, { "epoch": 0.8111537819335448, "grad_norm": 0.46232253841106646, "learning_rate": 9.141695126660896e-06, "loss": 0.2146, "step": 2429 }, { "epoch": 0.8114877275004174, "grad_norm": 0.45675763317544865, "learning_rate": 9.14060610577294e-06, "loss": 0.2196, "step": 2430 }, { "epoch": 0.81182167306729, "grad_norm": 0.45024530888195646, "learning_rate": 9.139516459397675e-06, "loss": 0.2087, "step": 2431 }, { "epoch": 0.8121556186341626, "grad_norm": 0.5389654482824952, "learning_rate": 9.13842618769971e-06, "loss": 0.231, "step": 2432 }, { "epoch": 0.8124895642010352, "grad_norm": 0.48436355868941494, "learning_rate": 9.13733529084374e-06, "loss": 0.2434, "step": 2433 }, { "epoch": 0.8128235097679078, "grad_norm": 0.46078224853375155, "learning_rate": 9.13624376899456e-06, "loss": 0.2338, "step": 2434 }, { "epoch": 0.8131574553347805, "grad_norm": 0.41677648160432945, "learning_rate": 9.135151622317054e-06, "loss": 0.224, "step": 2435 }, { "epoch": 0.8134914009016531, "grad_norm": 0.4755376042102644, "learning_rate": 9.134058850976205e-06, "loss": 0.2247, "step": 2436 }, { "epoch": 0.8138253464685257, "grad_norm": 0.436828747543142, "learning_rate": 9.132965455137092e-06, "loss": 0.2056, "step": 2437 }, { "epoch": 0.8141592920353983, "grad_norm": 0.5091572548534862, "learning_rate": 9.13187143496488e-06, "loss": 0.2249, "step": 2438 }, { "epoch": 0.8144932376022709, "grad_norm": 0.4430402243350094, "learning_rate": 9.13077679062484e-06, "loss": 0.2222, "step": 2439 }, { "epoch": 0.8148271831691434, "grad_norm": 0.47005559131312463, "learning_rate": 9.129681522282326e-06, "loss": 0.2224, "step": 2440 }, { "epoch": 0.815161128736016, "grad_norm": 0.46240395621992303, "learning_rate": 9.128585630102793e-06, "loss": 0.218, "step": 2441 }, { "epoch": 0.8154950743028886, "grad_norm": 0.4229967980030704, "learning_rate": 9.127489114251787e-06, "loss": 0.2088, "step": 2442 }, { "epoch": 0.8158290198697612, "grad_norm": 0.4359587185147543, "learning_rate": 9.12639197489495e-06, "loss": 0.2226, "step": 2443 }, { "epoch": 0.8161629654366338, "grad_norm": 0.47401382528724656, "learning_rate": 9.125294212198022e-06, "loss": 0.2334, "step": 2444 }, { "epoch": 0.8164969110035064, "grad_norm": 0.4838938684515212, "learning_rate": 9.124195826326827e-06, "loss": 0.2328, "step": 2445 }, { "epoch": 0.816830856570379, "grad_norm": 0.5430695760268881, "learning_rate": 9.12309681744729e-06, "loss": 0.2175, "step": 2446 }, { "epoch": 0.8171648021372516, "grad_norm": 0.5297232531969129, "learning_rate": 9.121997185725433e-06, "loss": 0.2239, "step": 2447 }, { "epoch": 0.8174987477041242, "grad_norm": 0.4606818496718444, "learning_rate": 9.120896931327366e-06, "loss": 0.2322, "step": 2448 }, { "epoch": 0.8178326932709968, "grad_norm": 0.43326888876665187, "learning_rate": 9.119796054419295e-06, "loss": 0.2352, "step": 2449 }, { "epoch": 0.8181666388378694, "grad_norm": 0.45191071938809313, "learning_rate": 9.118694555167521e-06, "loss": 0.2334, "step": 2450 }, { "epoch": 0.818500584404742, "grad_norm": 0.4137197400642333, "learning_rate": 9.117592433738439e-06, "loss": 0.2209, "step": 2451 }, { "epoch": 0.8188345299716147, "grad_norm": 0.4726434520191131, "learning_rate": 9.116489690298536e-06, "loss": 0.2307, "step": 2452 }, { "epoch": 0.8191684755384873, "grad_norm": 0.45335955621754376, "learning_rate": 9.115386325014396e-06, "loss": 0.2149, "step": 2453 }, { "epoch": 0.8195024211053599, "grad_norm": 0.5966832339899147, "learning_rate": 9.114282338052695e-06, "loss": 0.2494, "step": 2454 }, { "epoch": 0.8198363666722325, "grad_norm": 0.507628757751821, "learning_rate": 9.113177729580203e-06, "loss": 0.224, "step": 2455 }, { "epoch": 0.820170312239105, "grad_norm": 0.41348330095186164, "learning_rate": 9.112072499763783e-06, "loss": 0.2139, "step": 2456 }, { "epoch": 0.8205042578059776, "grad_norm": 0.4445659097139337, "learning_rate": 9.110966648770392e-06, "loss": 0.2098, "step": 2457 }, { "epoch": 0.8208382033728502, "grad_norm": 0.49639772274003185, "learning_rate": 9.109860176767085e-06, "loss": 0.2242, "step": 2458 }, { "epoch": 0.8211721489397228, "grad_norm": 0.4692548380202328, "learning_rate": 9.108753083921007e-06, "loss": 0.2223, "step": 2459 }, { "epoch": 0.8215060945065954, "grad_norm": 0.4857245701306937, "learning_rate": 9.107645370399395e-06, "loss": 0.2282, "step": 2460 }, { "epoch": 0.821840040073468, "grad_norm": 0.4350617485594562, "learning_rate": 9.106537036369587e-06, "loss": 0.2057, "step": 2461 }, { "epoch": 0.8221739856403406, "grad_norm": 0.45938249982968987, "learning_rate": 9.105428081999004e-06, "loss": 0.2297, "step": 2462 }, { "epoch": 0.8225079312072132, "grad_norm": 0.4086313343960967, "learning_rate": 9.10431850745517e-06, "loss": 0.2036, "step": 2463 }, { "epoch": 0.8228418767740858, "grad_norm": 0.44187113165377345, "learning_rate": 9.103208312905698e-06, "loss": 0.2149, "step": 2464 }, { "epoch": 0.8231758223409584, "grad_norm": 0.4456715017463197, "learning_rate": 9.102097498518299e-06, "loss": 0.2157, "step": 2465 }, { "epoch": 0.823509767907831, "grad_norm": 0.3795807225141392, "learning_rate": 9.100986064460769e-06, "loss": 0.1994, "step": 2466 }, { "epoch": 0.8238437134747036, "grad_norm": 0.5015498235452791, "learning_rate": 9.099874010901009e-06, "loss": 0.2067, "step": 2467 }, { "epoch": 0.8241776590415762, "grad_norm": 0.46873528314568835, "learning_rate": 9.098761338007003e-06, "loss": 0.2342, "step": 2468 }, { "epoch": 0.8245116046084489, "grad_norm": 0.4802429384639126, "learning_rate": 9.097648045946837e-06, "loss": 0.2077, "step": 2469 }, { "epoch": 0.8248455501753215, "grad_norm": 0.47674620695731734, "learning_rate": 9.096534134888685e-06, "loss": 0.2311, "step": 2470 }, { "epoch": 0.8251794957421941, "grad_norm": 0.4741718011929554, "learning_rate": 9.095419605000817e-06, "loss": 0.2223, "step": 2471 }, { "epoch": 0.8255134413090667, "grad_norm": 0.4614907153038838, "learning_rate": 9.094304456451596e-06, "loss": 0.2301, "step": 2472 }, { "epoch": 0.8258473868759392, "grad_norm": 0.4517767126346064, "learning_rate": 9.093188689409477e-06, "loss": 0.2222, "step": 2473 }, { "epoch": 0.8261813324428118, "grad_norm": 0.652118941163761, "learning_rate": 9.09207230404301e-06, "loss": 0.2331, "step": 2474 }, { "epoch": 0.8265152780096844, "grad_norm": 0.45586670064349893, "learning_rate": 9.090955300520842e-06, "loss": 0.2111, "step": 2475 }, { "epoch": 0.826849223576557, "grad_norm": 0.4899208936605748, "learning_rate": 9.089837679011704e-06, "loss": 0.2282, "step": 2476 }, { "epoch": 0.8271831691434296, "grad_norm": 0.4964827628532168, "learning_rate": 9.08871943968443e-06, "loss": 0.2269, "step": 2477 }, { "epoch": 0.8275171147103022, "grad_norm": 0.4552900965289199, "learning_rate": 9.08760058270794e-06, "loss": 0.2356, "step": 2478 }, { "epoch": 0.8278510602771748, "grad_norm": 0.4918378116260361, "learning_rate": 9.086481108251253e-06, "loss": 0.237, "step": 2479 }, { "epoch": 0.8281850058440474, "grad_norm": 0.4607573617794218, "learning_rate": 9.085361016483477e-06, "loss": 0.2197, "step": 2480 }, { "epoch": 0.82851895141092, "grad_norm": 0.5491934418016965, "learning_rate": 9.084240307573816e-06, "loss": 0.2145, "step": 2481 }, { "epoch": 0.8288528969777926, "grad_norm": 0.48107467425045347, "learning_rate": 9.083118981691567e-06, "loss": 0.2198, "step": 2482 }, { "epoch": 0.8291868425446652, "grad_norm": 0.48450480401984153, "learning_rate": 9.081997039006117e-06, "loss": 0.2226, "step": 2483 }, { "epoch": 0.8295207881115378, "grad_norm": 0.4454234793986314, "learning_rate": 9.080874479686952e-06, "loss": 0.2091, "step": 2484 }, { "epoch": 0.8298547336784105, "grad_norm": 0.5397372637338255, "learning_rate": 9.079751303903646e-06, "loss": 0.2302, "step": 2485 }, { "epoch": 0.8301886792452831, "grad_norm": 0.49058908175007737, "learning_rate": 9.078627511825866e-06, "loss": 0.2339, "step": 2486 }, { "epoch": 0.8305226248121557, "grad_norm": 0.49048769393241404, "learning_rate": 9.077503103623379e-06, "loss": 0.2263, "step": 2487 }, { "epoch": 0.8308565703790283, "grad_norm": 0.4704762203253689, "learning_rate": 9.076378079466036e-06, "loss": 0.2132, "step": 2488 }, { "epoch": 0.8311905159459008, "grad_norm": 0.4149169942291882, "learning_rate": 9.075252439523785e-06, "loss": 0.2185, "step": 2489 }, { "epoch": 0.8315244615127734, "grad_norm": 0.4750547835779951, "learning_rate": 9.074126183966669e-06, "loss": 0.2136, "step": 2490 }, { "epoch": 0.831858407079646, "grad_norm": 0.4757858563906054, "learning_rate": 9.072999312964823e-06, "loss": 0.2256, "step": 2491 }, { "epoch": 0.8321923526465186, "grad_norm": 0.4588296740580707, "learning_rate": 9.071871826688472e-06, "loss": 0.2182, "step": 2492 }, { "epoch": 0.8325262982133912, "grad_norm": 0.4501152802206452, "learning_rate": 9.070743725307937e-06, "loss": 0.2126, "step": 2493 }, { "epoch": 0.8328602437802638, "grad_norm": 0.5107092439533395, "learning_rate": 9.06961500899363e-06, "loss": 0.2385, "step": 2494 }, { "epoch": 0.8331941893471364, "grad_norm": 0.46172068053365517, "learning_rate": 9.068485677916059e-06, "loss": 0.2233, "step": 2495 }, { "epoch": 0.833528134914009, "grad_norm": 0.4559896293320187, "learning_rate": 9.06735573224582e-06, "loss": 0.2156, "step": 2496 }, { "epoch": 0.8338620804808816, "grad_norm": 0.48841992918673477, "learning_rate": 9.066225172153607e-06, "loss": 0.2509, "step": 2497 }, { "epoch": 0.8341960260477542, "grad_norm": 0.48807810736665075, "learning_rate": 9.065093997810204e-06, "loss": 0.2297, "step": 2498 }, { "epoch": 0.8345299716146268, "grad_norm": 0.45944899131100364, "learning_rate": 9.063962209386485e-06, "loss": 0.234, "step": 2499 }, { "epoch": 0.8348639171814994, "grad_norm": 0.4787228349593665, "learning_rate": 9.062829807053426e-06, "loss": 0.2188, "step": 2500 }, { "epoch": 0.835197862748372, "grad_norm": 0.5036077547144578, "learning_rate": 9.061696790982086e-06, "loss": 0.2366, "step": 2501 }, { "epoch": 0.8355318083152447, "grad_norm": 0.544975155082703, "learning_rate": 9.060563161343618e-06, "loss": 0.214, "step": 2502 }, { "epoch": 0.8358657538821173, "grad_norm": 0.4434329818632235, "learning_rate": 9.059428918309276e-06, "loss": 0.2141, "step": 2503 }, { "epoch": 0.8361996994489899, "grad_norm": 0.4498540488388689, "learning_rate": 9.058294062050396e-06, "loss": 0.2178, "step": 2504 }, { "epoch": 0.8365336450158624, "grad_norm": 0.42897565416363814, "learning_rate": 9.057158592738414e-06, "loss": 0.2147, "step": 2505 }, { "epoch": 0.836867590582735, "grad_norm": 0.42112860630504073, "learning_rate": 9.056022510544855e-06, "loss": 0.2132, "step": 2506 }, { "epoch": 0.8372015361496076, "grad_norm": 0.4408094729636434, "learning_rate": 9.054885815641336e-06, "loss": 0.2215, "step": 2507 }, { "epoch": 0.8375354817164802, "grad_norm": 0.45258060889972634, "learning_rate": 9.05374850819957e-06, "loss": 0.2132, "step": 2508 }, { "epoch": 0.8378694272833528, "grad_norm": 0.47132515883640624, "learning_rate": 9.052610588391363e-06, "loss": 0.2233, "step": 2509 }, { "epoch": 0.8382033728502254, "grad_norm": 0.4767783733828431, "learning_rate": 9.051472056388606e-06, "loss": 0.2418, "step": 2510 }, { "epoch": 0.838537318417098, "grad_norm": 0.46137862596916196, "learning_rate": 9.050332912363292e-06, "loss": 0.237, "step": 2511 }, { "epoch": 0.8388712639839706, "grad_norm": 0.49409067455142913, "learning_rate": 9.049193156487501e-06, "loss": 0.2046, "step": 2512 }, { "epoch": 0.8392052095508432, "grad_norm": 0.49205788033890396, "learning_rate": 9.048052788933405e-06, "loss": 0.2206, "step": 2513 }, { "epoch": 0.8395391551177158, "grad_norm": 0.4667335941199234, "learning_rate": 9.046911809873271e-06, "loss": 0.2158, "step": 2514 }, { "epoch": 0.8398731006845884, "grad_norm": 0.4121890688466787, "learning_rate": 9.045770219479457e-06, "loss": 0.2021, "step": 2515 }, { "epoch": 0.840207046251461, "grad_norm": 0.42556930558028944, "learning_rate": 9.044628017924415e-06, "loss": 0.2226, "step": 2516 }, { "epoch": 0.8405409918183336, "grad_norm": 0.5288410131785884, "learning_rate": 9.043485205380685e-06, "loss": 0.214, "step": 2517 }, { "epoch": 0.8408749373852062, "grad_norm": 0.435346841429691, "learning_rate": 9.042341782020906e-06, "loss": 0.2163, "step": 2518 }, { "epoch": 0.8412088829520789, "grad_norm": 0.4680554061197634, "learning_rate": 9.041197748017802e-06, "loss": 0.2162, "step": 2519 }, { "epoch": 0.8415428285189515, "grad_norm": 0.6331729472174026, "learning_rate": 9.040053103544196e-06, "loss": 0.2171, "step": 2520 }, { "epoch": 0.8418767740858241, "grad_norm": 0.46302434709386225, "learning_rate": 9.038907848772999e-06, "loss": 0.2313, "step": 2521 }, { "epoch": 0.8422107196526966, "grad_norm": 0.5418674061177113, "learning_rate": 9.037761983877214e-06, "loss": 0.2314, "step": 2522 }, { "epoch": 0.8425446652195692, "grad_norm": 0.422599498634045, "learning_rate": 9.036615509029939e-06, "loss": 0.2148, "step": 2523 }, { "epoch": 0.8428786107864418, "grad_norm": 0.47415409143410847, "learning_rate": 9.035468424404362e-06, "loss": 0.2271, "step": 2524 }, { "epoch": 0.8432125563533144, "grad_norm": 0.43769467339845197, "learning_rate": 9.034320730173762e-06, "loss": 0.2078, "step": 2525 }, { "epoch": 0.843546501920187, "grad_norm": 0.4579042598091712, "learning_rate": 9.033172426511515e-06, "loss": 0.2275, "step": 2526 }, { "epoch": 0.8438804474870596, "grad_norm": 0.472853158936843, "learning_rate": 9.032023513591081e-06, "loss": 0.2163, "step": 2527 }, { "epoch": 0.8442143930539322, "grad_norm": 0.47883958606918925, "learning_rate": 9.030873991586021e-06, "loss": 0.2362, "step": 2528 }, { "epoch": 0.8445483386208048, "grad_norm": 0.4244077632216657, "learning_rate": 9.029723860669983e-06, "loss": 0.2135, "step": 2529 }, { "epoch": 0.8448822841876774, "grad_norm": 0.5703843399860384, "learning_rate": 9.028573121016707e-06, "loss": 0.2595, "step": 2530 }, { "epoch": 0.84521622975455, "grad_norm": 0.5540172877400666, "learning_rate": 9.027421772800027e-06, "loss": 0.2268, "step": 2531 }, { "epoch": 0.8455501753214226, "grad_norm": 0.46860925608211035, "learning_rate": 9.026269816193867e-06, "loss": 0.2231, "step": 2532 }, { "epoch": 0.8458841208882952, "grad_norm": 0.4536040554309427, "learning_rate": 9.025117251372242e-06, "loss": 0.2264, "step": 2533 }, { "epoch": 0.8462180664551678, "grad_norm": 0.48291163229730805, "learning_rate": 9.023964078509263e-06, "loss": 0.2268, "step": 2534 }, { "epoch": 0.8465520120220404, "grad_norm": 0.47777184837544556, "learning_rate": 9.022810297779129e-06, "loss": 0.2208, "step": 2535 }, { "epoch": 0.846885957588913, "grad_norm": 0.46448916819982217, "learning_rate": 9.021655909356133e-06, "loss": 0.2424, "step": 2536 }, { "epoch": 0.8472199031557857, "grad_norm": 0.47316820975138824, "learning_rate": 9.020500913414658e-06, "loss": 0.2248, "step": 2537 }, { "epoch": 0.8475538487226582, "grad_norm": 0.4767805328678842, "learning_rate": 9.019345310129179e-06, "loss": 0.2338, "step": 2538 }, { "epoch": 0.8478877942895308, "grad_norm": 0.4397760724625773, "learning_rate": 9.018189099674266e-06, "loss": 0.2208, "step": 2539 }, { "epoch": 0.8482217398564034, "grad_norm": 0.4862443013166264, "learning_rate": 9.017032282224577e-06, "loss": 0.2299, "step": 2540 }, { "epoch": 0.848555685423276, "grad_norm": 0.44379757315440904, "learning_rate": 9.015874857954863e-06, "loss": 0.2192, "step": 2541 }, { "epoch": 0.8488896309901486, "grad_norm": 0.42679243339558226, "learning_rate": 9.014716827039965e-06, "loss": 0.2222, "step": 2542 }, { "epoch": 0.8492235765570212, "grad_norm": 0.41430931012260835, "learning_rate": 9.013558189654819e-06, "loss": 0.2161, "step": 2543 }, { "epoch": 0.8495575221238938, "grad_norm": 0.48620083773360206, "learning_rate": 9.01239894597445e-06, "loss": 0.2191, "step": 2544 }, { "epoch": 0.8498914676907664, "grad_norm": 0.4914872230641687, "learning_rate": 9.011239096173977e-06, "loss": 0.2205, "step": 2545 }, { "epoch": 0.850225413257639, "grad_norm": 0.4726040998570721, "learning_rate": 9.010078640428606e-06, "loss": 0.2288, "step": 2546 }, { "epoch": 0.8505593588245116, "grad_norm": 0.46491513024652575, "learning_rate": 9.00891757891364e-06, "loss": 0.2277, "step": 2547 }, { "epoch": 0.8508933043913842, "grad_norm": 0.45611845460077005, "learning_rate": 9.007755911804471e-06, "loss": 0.2251, "step": 2548 }, { "epoch": 0.8512272499582568, "grad_norm": 0.5926560572888228, "learning_rate": 9.006593639276582e-06, "loss": 0.2243, "step": 2549 }, { "epoch": 0.8515611955251294, "grad_norm": 0.496523833587818, "learning_rate": 9.005430761505548e-06, "loss": 0.2403, "step": 2550 }, { "epoch": 0.851895141092002, "grad_norm": 0.4714784682154149, "learning_rate": 9.004267278667032e-06, "loss": 0.2149, "step": 2551 }, { "epoch": 0.8522290866588746, "grad_norm": 0.4313656521047808, "learning_rate": 9.003103190936797e-06, "loss": 0.2187, "step": 2552 }, { "epoch": 0.8525630322257473, "grad_norm": 0.46928566508359176, "learning_rate": 9.00193849849069e-06, "loss": 0.2208, "step": 2553 }, { "epoch": 0.8528969777926197, "grad_norm": 0.4283171636804329, "learning_rate": 9.00077320150465e-06, "loss": 0.2112, "step": 2554 }, { "epoch": 0.8532309233594924, "grad_norm": 0.5000190368184554, "learning_rate": 8.999607300154712e-06, "loss": 0.2192, "step": 2555 }, { "epoch": 0.853564868926365, "grad_norm": 0.47355749853117557, "learning_rate": 8.998440794616998e-06, "loss": 0.2213, "step": 2556 }, { "epoch": 0.8538988144932376, "grad_norm": 0.43176568872005033, "learning_rate": 8.99727368506772e-06, "loss": 0.2098, "step": 2557 }, { "epoch": 0.8542327600601102, "grad_norm": 0.46982069301090845, "learning_rate": 8.996105971683187e-06, "loss": 0.2177, "step": 2558 }, { "epoch": 0.8545667056269828, "grad_norm": 0.47512821563209395, "learning_rate": 8.994937654639793e-06, "loss": 0.2278, "step": 2559 }, { "epoch": 0.8549006511938554, "grad_norm": 0.4000965489856469, "learning_rate": 8.993768734114029e-06, "loss": 0.2023, "step": 2560 }, { "epoch": 0.855234596760728, "grad_norm": 0.43268062516172295, "learning_rate": 8.992599210282471e-06, "loss": 0.2202, "step": 2561 }, { "epoch": 0.8555685423276006, "grad_norm": 0.42305087012502596, "learning_rate": 8.991429083321792e-06, "loss": 0.2189, "step": 2562 }, { "epoch": 0.8559024878944732, "grad_norm": 0.4708842682056169, "learning_rate": 8.990258353408754e-06, "loss": 0.2295, "step": 2563 }, { "epoch": 0.8562364334613458, "grad_norm": 0.4600816021897864, "learning_rate": 8.989087020720204e-06, "loss": 0.2193, "step": 2564 }, { "epoch": 0.8565703790282184, "grad_norm": 0.4198834665228543, "learning_rate": 8.987915085433092e-06, "loss": 0.2195, "step": 2565 }, { "epoch": 0.856904324595091, "grad_norm": 0.4489653916885646, "learning_rate": 8.98674254772445e-06, "loss": 0.2126, "step": 2566 }, { "epoch": 0.8572382701619636, "grad_norm": 0.40367098282319236, "learning_rate": 8.985569407771404e-06, "loss": 0.1956, "step": 2567 }, { "epoch": 0.8575722157288362, "grad_norm": 0.4732011179689396, "learning_rate": 8.984395665751169e-06, "loss": 0.2296, "step": 2568 }, { "epoch": 0.8579061612957088, "grad_norm": 0.47840121907574584, "learning_rate": 8.983221321841056e-06, "loss": 0.2299, "step": 2569 }, { "epoch": 0.8582401068625815, "grad_norm": 0.4465691457596304, "learning_rate": 8.98204637621846e-06, "loss": 0.2143, "step": 2570 }, { "epoch": 0.858574052429454, "grad_norm": 0.47090014081524445, "learning_rate": 8.980870829060872e-06, "loss": 0.2255, "step": 2571 }, { "epoch": 0.8589079979963266, "grad_norm": 0.46190459557837005, "learning_rate": 8.979694680545872e-06, "loss": 0.2164, "step": 2572 }, { "epoch": 0.8592419435631992, "grad_norm": 0.3984766673387074, "learning_rate": 8.978517930851132e-06, "loss": 0.2025, "step": 2573 }, { "epoch": 0.8595758891300718, "grad_norm": 0.4881647227380028, "learning_rate": 8.977340580154411e-06, "loss": 0.2222, "step": 2574 }, { "epoch": 0.8599098346969444, "grad_norm": 0.4577249839436491, "learning_rate": 8.976162628633565e-06, "loss": 0.2284, "step": 2575 }, { "epoch": 0.860243780263817, "grad_norm": 0.42843210433451573, "learning_rate": 8.974984076466537e-06, "loss": 0.228, "step": 2576 }, { "epoch": 0.8605777258306896, "grad_norm": 0.46579319758112697, "learning_rate": 8.97380492383136e-06, "loss": 0.2117, "step": 2577 }, { "epoch": 0.8609116713975622, "grad_norm": 0.4552220225147081, "learning_rate": 8.972625170906157e-06, "loss": 0.2316, "step": 2578 }, { "epoch": 0.8612456169644348, "grad_norm": 0.42726184700574793, "learning_rate": 8.971444817869148e-06, "loss": 0.2198, "step": 2579 }, { "epoch": 0.8615795625313074, "grad_norm": 0.45374200275965654, "learning_rate": 8.970263864898636e-06, "loss": 0.2261, "step": 2580 }, { "epoch": 0.86191350809818, "grad_norm": 0.5381715281038784, "learning_rate": 8.969082312173021e-06, "loss": 0.2344, "step": 2581 }, { "epoch": 0.8622474536650526, "grad_norm": 0.4754027335097857, "learning_rate": 8.967900159870787e-06, "loss": 0.2293, "step": 2582 }, { "epoch": 0.8625813992319252, "grad_norm": 0.4460243245841365, "learning_rate": 8.966717408170512e-06, "loss": 0.2129, "step": 2583 }, { "epoch": 0.8629153447987978, "grad_norm": 0.471562541587052, "learning_rate": 8.965534057250866e-06, "loss": 0.232, "step": 2584 }, { "epoch": 0.8632492903656704, "grad_norm": 0.4426034976537733, "learning_rate": 8.964350107290609e-06, "loss": 0.2099, "step": 2585 }, { "epoch": 0.863583235932543, "grad_norm": 0.4588021928348187, "learning_rate": 8.96316555846859e-06, "loss": 0.2205, "step": 2586 }, { "epoch": 0.8639171814994155, "grad_norm": 0.42630016568299534, "learning_rate": 8.961980410963749e-06, "loss": 0.2135, "step": 2587 }, { "epoch": 0.8642511270662881, "grad_norm": 0.45810183824218453, "learning_rate": 8.960794664955115e-06, "loss": 0.2229, "step": 2588 }, { "epoch": 0.8645850726331608, "grad_norm": 0.42559767116343933, "learning_rate": 8.95960832062181e-06, "loss": 0.2156, "step": 2589 }, { "epoch": 0.8649190182000334, "grad_norm": 0.4245683101087079, "learning_rate": 8.958421378143046e-06, "loss": 0.22, "step": 2590 }, { "epoch": 0.865252963766906, "grad_norm": 0.5033515035437922, "learning_rate": 8.957233837698122e-06, "loss": 0.2298, "step": 2591 }, { "epoch": 0.8655869093337786, "grad_norm": 0.46700010068824005, "learning_rate": 8.956045699466433e-06, "loss": 0.2344, "step": 2592 }, { "epoch": 0.8659208549006512, "grad_norm": 0.4434776982903602, "learning_rate": 8.95485696362746e-06, "loss": 0.2143, "step": 2593 }, { "epoch": 0.8662548004675238, "grad_norm": 0.432504424021459, "learning_rate": 8.953667630360778e-06, "loss": 0.2135, "step": 2594 }, { "epoch": 0.8665887460343964, "grad_norm": 0.4761778540131483, "learning_rate": 8.952477699846044e-06, "loss": 0.2311, "step": 2595 }, { "epoch": 0.866922691601269, "grad_norm": 0.41699684183795516, "learning_rate": 8.951287172263018e-06, "loss": 0.2181, "step": 2596 }, { "epoch": 0.8672566371681416, "grad_norm": 0.4546797953736065, "learning_rate": 8.950096047791539e-06, "loss": 0.2311, "step": 2597 }, { "epoch": 0.8675905827350142, "grad_norm": 0.4793027535037006, "learning_rate": 8.94890432661154e-06, "loss": 0.2416, "step": 2598 }, { "epoch": 0.8679245283018868, "grad_norm": 0.4259729344924639, "learning_rate": 8.947712008903045e-06, "loss": 0.2164, "step": 2599 }, { "epoch": 0.8682584738687594, "grad_norm": 0.45367870809652466, "learning_rate": 8.946519094846169e-06, "loss": 0.2168, "step": 2600 }, { "epoch": 0.868592419435632, "grad_norm": 0.4283930933516754, "learning_rate": 8.945325584621116e-06, "loss": 0.2048, "step": 2601 }, { "epoch": 0.8689263650025046, "grad_norm": 0.47811696623877187, "learning_rate": 8.944131478408177e-06, "loss": 0.2256, "step": 2602 }, { "epoch": 0.8692603105693771, "grad_norm": 0.474177095776076, "learning_rate": 8.942936776387739e-06, "loss": 0.2199, "step": 2603 }, { "epoch": 0.8695942561362497, "grad_norm": 0.449120897392552, "learning_rate": 8.941741478740272e-06, "loss": 0.2251, "step": 2604 }, { "epoch": 0.8699282017031224, "grad_norm": 0.3831811961730255, "learning_rate": 8.940545585646344e-06, "loss": 0.1982, "step": 2605 }, { "epoch": 0.870262147269995, "grad_norm": 0.46919762256557285, "learning_rate": 8.939349097286608e-06, "loss": 0.237, "step": 2606 }, { "epoch": 0.8705960928368676, "grad_norm": 0.4595681283636294, "learning_rate": 8.938152013841803e-06, "loss": 0.2359, "step": 2607 }, { "epoch": 0.8709300384037402, "grad_norm": 0.46244221082714243, "learning_rate": 8.93695433549277e-06, "loss": 0.2172, "step": 2608 }, { "epoch": 0.8712639839706128, "grad_norm": 0.44144435300671014, "learning_rate": 8.935756062420426e-06, "loss": 0.2259, "step": 2609 }, { "epoch": 0.8715979295374854, "grad_norm": 0.4735100176435913, "learning_rate": 8.934557194805787e-06, "loss": 0.2221, "step": 2610 }, { "epoch": 0.871931875104358, "grad_norm": 0.44284418340652504, "learning_rate": 8.933357732829957e-06, "loss": 0.2265, "step": 2611 }, { "epoch": 0.8722658206712306, "grad_norm": 0.4659415166073965, "learning_rate": 8.932157676674126e-06, "loss": 0.2276, "step": 2612 }, { "epoch": 0.8725997662381032, "grad_norm": 0.41233515606202303, "learning_rate": 8.93095702651958e-06, "loss": 0.2074, "step": 2613 }, { "epoch": 0.8729337118049758, "grad_norm": 0.4877780049978787, "learning_rate": 8.92975578254769e-06, "loss": 0.2313, "step": 2614 }, { "epoch": 0.8732676573718484, "grad_norm": 0.5150208995785533, "learning_rate": 8.928553944939915e-06, "loss": 0.2156, "step": 2615 }, { "epoch": 0.873601602938721, "grad_norm": 0.4168813001248241, "learning_rate": 8.92735151387781e-06, "loss": 0.2222, "step": 2616 }, { "epoch": 0.8739355485055936, "grad_norm": 0.4098574760494883, "learning_rate": 8.926148489543018e-06, "loss": 0.2175, "step": 2617 }, { "epoch": 0.8742694940724662, "grad_norm": 0.4412115939247315, "learning_rate": 8.924944872117264e-06, "loss": 0.2195, "step": 2618 }, { "epoch": 0.8746034396393388, "grad_norm": 0.4630857578664083, "learning_rate": 8.923740661782376e-06, "loss": 0.2195, "step": 2619 }, { "epoch": 0.8749373852062113, "grad_norm": 0.48566643237103263, "learning_rate": 8.92253585872026e-06, "loss": 0.2351, "step": 2620 }, { "epoch": 0.8752713307730839, "grad_norm": 0.43311552083576144, "learning_rate": 8.921330463112915e-06, "loss": 0.2288, "step": 2621 }, { "epoch": 0.8756052763399566, "grad_norm": 0.39507171452378326, "learning_rate": 8.92012447514243e-06, "loss": 0.207, "step": 2622 }, { "epoch": 0.8759392219068292, "grad_norm": 0.4452755906788476, "learning_rate": 8.918917894990989e-06, "loss": 0.1985, "step": 2623 }, { "epoch": 0.8762731674737018, "grad_norm": 0.3910814697165605, "learning_rate": 8.917710722840853e-06, "loss": 0.2066, "step": 2624 }, { "epoch": 0.8766071130405744, "grad_norm": 0.4239629843736424, "learning_rate": 8.916502958874385e-06, "loss": 0.2274, "step": 2625 }, { "epoch": 0.876941058607447, "grad_norm": 0.45349007585563317, "learning_rate": 8.915294603274027e-06, "loss": 0.2205, "step": 2626 }, { "epoch": 0.8772750041743196, "grad_norm": 0.42342259610566313, "learning_rate": 8.91408565622232e-06, "loss": 0.2065, "step": 2627 }, { "epoch": 0.8776089497411922, "grad_norm": 0.44535591900172594, "learning_rate": 8.912876117901887e-06, "loss": 0.2203, "step": 2628 }, { "epoch": 0.8779428953080648, "grad_norm": 0.46320851664376284, "learning_rate": 8.911665988495446e-06, "loss": 0.2432, "step": 2629 }, { "epoch": 0.8782768408749374, "grad_norm": 0.4367734120566955, "learning_rate": 8.910455268185795e-06, "loss": 0.2166, "step": 2630 }, { "epoch": 0.87861078644181, "grad_norm": 0.43971258610914693, "learning_rate": 8.909243957155835e-06, "loss": 0.2313, "step": 2631 }, { "epoch": 0.8789447320086826, "grad_norm": 0.408475957094058, "learning_rate": 8.908032055588544e-06, "loss": 0.2282, "step": 2632 }, { "epoch": 0.8792786775755552, "grad_norm": 0.4413318202987162, "learning_rate": 8.906819563666997e-06, "loss": 0.2325, "step": 2633 }, { "epoch": 0.8796126231424278, "grad_norm": 0.4204393958934717, "learning_rate": 8.905606481574351e-06, "loss": 0.2212, "step": 2634 }, { "epoch": 0.8799465687093004, "grad_norm": 0.43071251695265306, "learning_rate": 8.90439280949386e-06, "loss": 0.2153, "step": 2635 }, { "epoch": 0.8802805142761729, "grad_norm": 0.44850361365375907, "learning_rate": 8.903178547608863e-06, "loss": 0.2221, "step": 2636 }, { "epoch": 0.8806144598430455, "grad_norm": 0.42781213117927985, "learning_rate": 8.901963696102788e-06, "loss": 0.2306, "step": 2637 }, { "epoch": 0.8809484054099181, "grad_norm": 0.42113339754508183, "learning_rate": 8.900748255159152e-06, "loss": 0.2111, "step": 2638 }, { "epoch": 0.8812823509767908, "grad_norm": 0.47605309867063816, "learning_rate": 8.899532224961562e-06, "loss": 0.229, "step": 2639 }, { "epoch": 0.8816162965436634, "grad_norm": 0.5527061292754034, "learning_rate": 8.898315605693715e-06, "loss": 0.217, "step": 2640 }, { "epoch": 0.881950242110536, "grad_norm": 0.43906596005312143, "learning_rate": 8.897098397539394e-06, "loss": 0.2164, "step": 2641 }, { "epoch": 0.8822841876774086, "grad_norm": 0.5067302891421397, "learning_rate": 8.895880600682472e-06, "loss": 0.2403, "step": 2642 }, { "epoch": 0.8826181332442812, "grad_norm": 0.48526179484087406, "learning_rate": 8.894662215306913e-06, "loss": 0.216, "step": 2643 }, { "epoch": 0.8829520788111538, "grad_norm": 0.5004998831458609, "learning_rate": 8.89344324159677e-06, "loss": 0.2182, "step": 2644 }, { "epoch": 0.8832860243780264, "grad_norm": 0.4141565409924, "learning_rate": 8.89222367973618e-06, "loss": 0.2173, "step": 2645 }, { "epoch": 0.883619969944899, "grad_norm": 0.451587274844467, "learning_rate": 8.891003529909375e-06, "loss": 0.2258, "step": 2646 }, { "epoch": 0.8839539155117716, "grad_norm": 0.4357097873514739, "learning_rate": 8.889782792300672e-06, "loss": 0.2176, "step": 2647 }, { "epoch": 0.8842878610786442, "grad_norm": 0.4424588073002416, "learning_rate": 8.888561467094476e-06, "loss": 0.2173, "step": 2648 }, { "epoch": 0.8846218066455168, "grad_norm": 0.47374105210343537, "learning_rate": 8.887339554475284e-06, "loss": 0.2167, "step": 2649 }, { "epoch": 0.8849557522123894, "grad_norm": 0.4314904279729543, "learning_rate": 8.886117054627682e-06, "loss": 0.2114, "step": 2650 }, { "epoch": 0.885289697779262, "grad_norm": 0.48980056249973974, "learning_rate": 8.88489396773634e-06, "loss": 0.2344, "step": 2651 }, { "epoch": 0.8856236433461345, "grad_norm": 0.38805445786960013, "learning_rate": 8.883670293986019e-06, "loss": 0.2056, "step": 2652 }, { "epoch": 0.8859575889130071, "grad_norm": 0.4178809237058471, "learning_rate": 8.882446033561576e-06, "loss": 0.2242, "step": 2653 }, { "epoch": 0.8862915344798797, "grad_norm": 0.4936359585377921, "learning_rate": 8.881221186647941e-06, "loss": 0.2183, "step": 2654 }, { "epoch": 0.8866254800467523, "grad_norm": 0.4859800139169095, "learning_rate": 8.879995753430148e-06, "loss": 0.2178, "step": 2655 }, { "epoch": 0.886959425613625, "grad_norm": 0.48946455778130216, "learning_rate": 8.878769734093312e-06, "loss": 0.2203, "step": 2656 }, { "epoch": 0.8872933711804976, "grad_norm": 0.41725432475224355, "learning_rate": 8.877543128822634e-06, "loss": 0.2265, "step": 2657 }, { "epoch": 0.8876273167473702, "grad_norm": 0.4663793748591142, "learning_rate": 8.876315937803413e-06, "loss": 0.2262, "step": 2658 }, { "epoch": 0.8879612623142428, "grad_norm": 0.3916146858993435, "learning_rate": 8.875088161221025e-06, "loss": 0.2072, "step": 2659 }, { "epoch": 0.8882952078811154, "grad_norm": 0.47716488660190576, "learning_rate": 8.873859799260944e-06, "loss": 0.2245, "step": 2660 }, { "epoch": 0.888629153447988, "grad_norm": 0.42641030028705323, "learning_rate": 8.872630852108725e-06, "loss": 0.2301, "step": 2661 }, { "epoch": 0.8889630990148606, "grad_norm": 0.4117929648291072, "learning_rate": 8.87140131995002e-06, "loss": 0.2303, "step": 2662 }, { "epoch": 0.8892970445817332, "grad_norm": 0.4161638007577847, "learning_rate": 8.870171202970559e-06, "loss": 0.2121, "step": 2663 }, { "epoch": 0.8896309901486058, "grad_norm": 0.4530085404187551, "learning_rate": 8.868940501356169e-06, "loss": 0.2248, "step": 2664 }, { "epoch": 0.8899649357154784, "grad_norm": 0.40243303655245055, "learning_rate": 8.86770921529276e-06, "loss": 0.2135, "step": 2665 }, { "epoch": 0.890298881282351, "grad_norm": 0.4473350481589006, "learning_rate": 8.866477344966334e-06, "loss": 0.216, "step": 2666 }, { "epoch": 0.8906328268492236, "grad_norm": 0.41627719547498515, "learning_rate": 8.865244890562978e-06, "loss": 0.2174, "step": 2667 }, { "epoch": 0.8909667724160962, "grad_norm": 0.5442020904005506, "learning_rate": 8.864011852268872e-06, "loss": 0.2261, "step": 2668 }, { "epoch": 0.8913007179829687, "grad_norm": 0.4366841475309793, "learning_rate": 8.862778230270276e-06, "loss": 0.2166, "step": 2669 }, { "epoch": 0.8916346635498413, "grad_norm": 0.5358299377296973, "learning_rate": 8.861544024753545e-06, "loss": 0.256, "step": 2670 }, { "epoch": 0.8919686091167139, "grad_norm": 0.7520222436984267, "learning_rate": 8.860309235905122e-06, "loss": 0.2221, "step": 2671 }, { "epoch": 0.8923025546835865, "grad_norm": 0.456864799154424, "learning_rate": 8.859073863911536e-06, "loss": 0.2284, "step": 2672 }, { "epoch": 0.8926365002504592, "grad_norm": 0.4443603685723179, "learning_rate": 8.857837908959404e-06, "loss": 0.2192, "step": 2673 }, { "epoch": 0.8929704458173318, "grad_norm": 0.44649147782698073, "learning_rate": 8.856601371235429e-06, "loss": 0.2153, "step": 2674 }, { "epoch": 0.8933043913842044, "grad_norm": 0.5127058305862575, "learning_rate": 8.855364250926409e-06, "loss": 0.2328, "step": 2675 }, { "epoch": 0.893638336951077, "grad_norm": 0.42241283375166006, "learning_rate": 8.854126548219222e-06, "loss": 0.2144, "step": 2676 }, { "epoch": 0.8939722825179496, "grad_norm": 0.44645580116752953, "learning_rate": 8.85288826330084e-06, "loss": 0.2217, "step": 2677 }, { "epoch": 0.8943062280848222, "grad_norm": 0.4843402606886858, "learning_rate": 8.85164939635832e-06, "loss": 0.2058, "step": 2678 }, { "epoch": 0.8946401736516948, "grad_norm": 0.4401205110768855, "learning_rate": 8.850409947578806e-06, "loss": 0.2255, "step": 2679 }, { "epoch": 0.8949741192185674, "grad_norm": 0.46319114729765054, "learning_rate": 8.849169917149532e-06, "loss": 0.2152, "step": 2680 }, { "epoch": 0.89530806478544, "grad_norm": 0.42318735608832414, "learning_rate": 8.847929305257821e-06, "loss": 0.2072, "step": 2681 }, { "epoch": 0.8956420103523126, "grad_norm": 0.4774593411729633, "learning_rate": 8.846688112091078e-06, "loss": 0.2164, "step": 2682 }, { "epoch": 0.8959759559191852, "grad_norm": 0.5172363111160776, "learning_rate": 8.845446337836805e-06, "loss": 0.2199, "step": 2683 }, { "epoch": 0.8963099014860578, "grad_norm": 0.47524030222731717, "learning_rate": 8.844203982682583e-06, "loss": 0.2118, "step": 2684 }, { "epoch": 0.8966438470529303, "grad_norm": 0.48935917439328974, "learning_rate": 8.842961046816085e-06, "loss": 0.2246, "step": 2685 }, { "epoch": 0.8969777926198029, "grad_norm": 0.42309667094002784, "learning_rate": 8.841717530425071e-06, "loss": 0.2059, "step": 2686 }, { "epoch": 0.8973117381866755, "grad_norm": 0.43840157623404685, "learning_rate": 8.84047343369739e-06, "loss": 0.2208, "step": 2687 }, { "epoch": 0.8976456837535481, "grad_norm": 0.4595498272411674, "learning_rate": 8.839228756820977e-06, "loss": 0.2178, "step": 2688 }, { "epoch": 0.8979796293204207, "grad_norm": 0.5061728834639878, "learning_rate": 8.837983499983856e-06, "loss": 0.2409, "step": 2689 }, { "epoch": 0.8983135748872934, "grad_norm": 0.4681121712319394, "learning_rate": 8.836737663374135e-06, "loss": 0.2213, "step": 2690 }, { "epoch": 0.898647520454166, "grad_norm": 0.47022403674358887, "learning_rate": 8.835491247180012e-06, "loss": 0.2266, "step": 2691 }, { "epoch": 0.8989814660210386, "grad_norm": 0.46339165662113857, "learning_rate": 8.834244251589778e-06, "loss": 0.2293, "step": 2692 }, { "epoch": 0.8993154115879112, "grad_norm": 0.5508066598437971, "learning_rate": 8.832996676791802e-06, "loss": 0.2296, "step": 2693 }, { "epoch": 0.8996493571547838, "grad_norm": 0.3974241538911554, "learning_rate": 8.831748522974545e-06, "loss": 0.2016, "step": 2694 }, { "epoch": 0.8999833027216564, "grad_norm": 0.46507520916790673, "learning_rate": 8.830499790326556e-06, "loss": 0.2204, "step": 2695 }, { "epoch": 0.900317248288529, "grad_norm": 0.4080434870525821, "learning_rate": 8.829250479036473e-06, "loss": 0.2098, "step": 2696 }, { "epoch": 0.9006511938554016, "grad_norm": 0.4703823065746979, "learning_rate": 8.828000589293016e-06, "loss": 0.2096, "step": 2697 }, { "epoch": 0.9009851394222742, "grad_norm": 0.42236956688946864, "learning_rate": 8.826750121284998e-06, "loss": 0.2083, "step": 2698 }, { "epoch": 0.9013190849891468, "grad_norm": 0.44662974396124566, "learning_rate": 8.825499075201314e-06, "loss": 0.2245, "step": 2699 }, { "epoch": 0.9016530305560194, "grad_norm": 0.43992141849148725, "learning_rate": 8.824247451230949e-06, "loss": 0.2171, "step": 2700 }, { "epoch": 0.9019869761228919, "grad_norm": 0.5054985915126862, "learning_rate": 8.82299524956298e-06, "loss": 0.2374, "step": 2701 }, { "epoch": 0.9023209216897645, "grad_norm": 0.384565909847055, "learning_rate": 8.821742470386565e-06, "loss": 0.1991, "step": 2702 }, { "epoch": 0.9026548672566371, "grad_norm": 0.4712108137917473, "learning_rate": 8.820489113890949e-06, "loss": 0.227, "step": 2703 }, { "epoch": 0.9029888128235097, "grad_norm": 0.4162460345682535, "learning_rate": 8.819235180265468e-06, "loss": 0.209, "step": 2704 }, { "epoch": 0.9033227583903823, "grad_norm": 0.3951344465539853, "learning_rate": 8.817980669699544e-06, "loss": 0.1994, "step": 2705 }, { "epoch": 0.903656703957255, "grad_norm": 0.49667700484753613, "learning_rate": 8.816725582382681e-06, "loss": 0.2251, "step": 2706 }, { "epoch": 0.9039906495241276, "grad_norm": 0.444646491895196, "learning_rate": 8.815469918504482e-06, "loss": 0.2253, "step": 2707 }, { "epoch": 0.9043245950910002, "grad_norm": 0.47116995697716324, "learning_rate": 8.814213678254624e-06, "loss": 0.2323, "step": 2708 }, { "epoch": 0.9046585406578728, "grad_norm": 0.4294065077553617, "learning_rate": 8.81295686182288e-06, "loss": 0.2155, "step": 2709 }, { "epoch": 0.9049924862247454, "grad_norm": 0.4643290288473964, "learning_rate": 8.811699469399106e-06, "loss": 0.2257, "step": 2710 }, { "epoch": 0.905326431791618, "grad_norm": 0.5106255418157994, "learning_rate": 8.810441501173245e-06, "loss": 0.2198, "step": 2711 }, { "epoch": 0.9056603773584906, "grad_norm": 0.47059697891525737, "learning_rate": 8.809182957335329e-06, "loss": 0.2193, "step": 2712 }, { "epoch": 0.9059943229253632, "grad_norm": 0.4121796907160016, "learning_rate": 8.807923838075476e-06, "loss": 0.2255, "step": 2713 }, { "epoch": 0.9063282684922358, "grad_norm": 0.4914052442580082, "learning_rate": 8.80666414358389e-06, "loss": 0.2386, "step": 2714 }, { "epoch": 0.9066622140591084, "grad_norm": 0.5300383794010388, "learning_rate": 8.805403874050864e-06, "loss": 0.2275, "step": 2715 }, { "epoch": 0.906996159625981, "grad_norm": 0.4420574700910713, "learning_rate": 8.804143029666775e-06, "loss": 0.2207, "step": 2716 }, { "epoch": 0.9073301051928536, "grad_norm": 0.4569049390299293, "learning_rate": 8.802881610622089e-06, "loss": 0.2102, "step": 2717 }, { "epoch": 0.9076640507597261, "grad_norm": 0.4138994917550044, "learning_rate": 8.801619617107359e-06, "loss": 0.2229, "step": 2718 }, { "epoch": 0.9079979963265987, "grad_norm": 0.4794838835897948, "learning_rate": 8.800357049313222e-06, "loss": 0.2354, "step": 2719 }, { "epoch": 0.9083319418934713, "grad_norm": 0.4589463845784919, "learning_rate": 8.799093907430406e-06, "loss": 0.2195, "step": 2720 }, { "epoch": 0.9086658874603439, "grad_norm": 0.40781923211396, "learning_rate": 8.797830191649721e-06, "loss": 0.2192, "step": 2721 }, { "epoch": 0.9089998330272165, "grad_norm": 0.4177098084042151, "learning_rate": 8.796565902162069e-06, "loss": 0.2096, "step": 2722 }, { "epoch": 0.9093337785940891, "grad_norm": 0.4323255277508984, "learning_rate": 8.795301039158433e-06, "loss": 0.2233, "step": 2723 }, { "epoch": 0.9096677241609618, "grad_norm": 0.4587198185489595, "learning_rate": 8.794035602829887e-06, "loss": 0.2419, "step": 2724 }, { "epoch": 0.9100016697278344, "grad_norm": 0.4542463004544109, "learning_rate": 8.792769593367591e-06, "loss": 0.2179, "step": 2725 }, { "epoch": 0.910335615294707, "grad_norm": 0.4990418712075424, "learning_rate": 8.79150301096279e-06, "loss": 0.2238, "step": 2726 }, { "epoch": 0.9106695608615796, "grad_norm": 0.41162778959199753, "learning_rate": 8.790235855806814e-06, "loss": 0.2127, "step": 2727 }, { "epoch": 0.9110035064284522, "grad_norm": 0.4377098192081709, "learning_rate": 8.788968128091084e-06, "loss": 0.2171, "step": 2728 }, { "epoch": 0.9113374519953248, "grad_norm": 0.4511181347047479, "learning_rate": 8.787699828007104e-06, "loss": 0.2264, "step": 2729 }, { "epoch": 0.9116713975621974, "grad_norm": 0.44193992386166336, "learning_rate": 8.786430955746468e-06, "loss": 0.2142, "step": 2730 }, { "epoch": 0.91200534312907, "grad_norm": 0.5164407737903689, "learning_rate": 8.78516151150085e-06, "loss": 0.2258, "step": 2731 }, { "epoch": 0.9123392886959426, "grad_norm": 0.45102390120947355, "learning_rate": 8.783891495462018e-06, "loss": 0.2182, "step": 2732 }, { "epoch": 0.9126732342628152, "grad_norm": 0.4717630713459086, "learning_rate": 8.782620907821823e-06, "loss": 0.216, "step": 2733 }, { "epoch": 0.9130071798296877, "grad_norm": 0.4192665748513033, "learning_rate": 8.781349748772198e-06, "loss": 0.2054, "step": 2734 }, { "epoch": 0.9133411253965603, "grad_norm": 0.5708074422120173, "learning_rate": 8.780078018505172e-06, "loss": 0.227, "step": 2735 }, { "epoch": 0.9136750709634329, "grad_norm": 0.5610844903221539, "learning_rate": 8.778805717212853e-06, "loss": 0.2202, "step": 2736 }, { "epoch": 0.9140090165303055, "grad_norm": 0.44429928394757645, "learning_rate": 8.777532845087434e-06, "loss": 0.2224, "step": 2737 }, { "epoch": 0.9143429620971781, "grad_norm": 0.5344338868572158, "learning_rate": 8.776259402321201e-06, "loss": 0.235, "step": 2738 }, { "epoch": 0.9146769076640507, "grad_norm": 0.5063152059042075, "learning_rate": 8.774985389106521e-06, "loss": 0.2272, "step": 2739 }, { "epoch": 0.9150108532309233, "grad_norm": 0.5051461254697261, "learning_rate": 8.77371080563585e-06, "loss": 0.2251, "step": 2740 }, { "epoch": 0.915344798797796, "grad_norm": 0.41201433720845737, "learning_rate": 8.772435652101726e-06, "loss": 0.2084, "step": 2741 }, { "epoch": 0.9156787443646686, "grad_norm": 0.42455946149967577, "learning_rate": 8.771159928696779e-06, "loss": 0.2091, "step": 2742 }, { "epoch": 0.9160126899315412, "grad_norm": 0.4605627342858186, "learning_rate": 8.76988363561372e-06, "loss": 0.2197, "step": 2743 }, { "epoch": 0.9163466354984138, "grad_norm": 0.482110449293083, "learning_rate": 8.76860677304535e-06, "loss": 0.216, "step": 2744 }, { "epoch": 0.9166805810652864, "grad_norm": 0.46195236737899087, "learning_rate": 8.767329341184552e-06, "loss": 0.2331, "step": 2745 }, { "epoch": 0.917014526632159, "grad_norm": 0.4413188092360609, "learning_rate": 8.766051340224297e-06, "loss": 0.2171, "step": 2746 }, { "epoch": 0.9173484721990316, "grad_norm": 0.48280257812503025, "learning_rate": 8.764772770357646e-06, "loss": 0.2182, "step": 2747 }, { "epoch": 0.9176824177659042, "grad_norm": 0.4210285641166794, "learning_rate": 8.763493631777738e-06, "loss": 0.2044, "step": 2748 }, { "epoch": 0.9180163633327768, "grad_norm": 0.4496036936278124, "learning_rate": 8.762213924677802e-06, "loss": 0.2224, "step": 2749 }, { "epoch": 0.9183503088996493, "grad_norm": 0.4402813014352695, "learning_rate": 8.760933649251155e-06, "loss": 0.2157, "step": 2750 }, { "epoch": 0.9186842544665219, "grad_norm": 0.43235518297213915, "learning_rate": 8.759652805691197e-06, "loss": 0.221, "step": 2751 }, { "epoch": 0.9190182000333945, "grad_norm": 0.4424479020307994, "learning_rate": 8.758371394191415e-06, "loss": 0.22, "step": 2752 }, { "epoch": 0.9193521456002671, "grad_norm": 0.4281531056346567, "learning_rate": 8.75708941494538e-06, "loss": 0.2127, "step": 2753 }, { "epoch": 0.9196860911671397, "grad_norm": 0.48598727252634466, "learning_rate": 8.75580686814675e-06, "loss": 0.2201, "step": 2754 }, { "epoch": 0.9200200367340123, "grad_norm": 0.4900282146647412, "learning_rate": 8.75452375398927e-06, "loss": 0.2202, "step": 2755 }, { "epoch": 0.9203539823008849, "grad_norm": 0.41487723575509605, "learning_rate": 8.753240072666769e-06, "loss": 0.2172, "step": 2756 }, { "epoch": 0.9206879278677575, "grad_norm": 0.46744752038473614, "learning_rate": 8.751955824373161e-06, "loss": 0.2299, "step": 2757 }, { "epoch": 0.9210218734346302, "grad_norm": 0.3913659048757199, "learning_rate": 8.750671009302448e-06, "loss": 0.2142, "step": 2758 }, { "epoch": 0.9213558190015028, "grad_norm": 0.3858840470760089, "learning_rate": 8.749385627648717e-06, "loss": 0.1974, "step": 2759 }, { "epoch": 0.9216897645683754, "grad_norm": 0.46012895388328423, "learning_rate": 8.748099679606139e-06, "loss": 0.2263, "step": 2760 }, { "epoch": 0.922023710135248, "grad_norm": 0.5056054844807831, "learning_rate": 8.746813165368973e-06, "loss": 0.2164, "step": 2761 }, { "epoch": 0.9223576557021206, "grad_norm": 0.4099051015952098, "learning_rate": 8.745526085131559e-06, "loss": 0.2087, "step": 2762 }, { "epoch": 0.9226916012689932, "grad_norm": 0.44267862491947235, "learning_rate": 8.744238439088328e-06, "loss": 0.2143, "step": 2763 }, { "epoch": 0.9230255468358658, "grad_norm": 0.5002499141140306, "learning_rate": 8.742950227433795e-06, "loss": 0.227, "step": 2764 }, { "epoch": 0.9233594924027384, "grad_norm": 0.4609375168550936, "learning_rate": 8.741661450362559e-06, "loss": 0.222, "step": 2765 }, { "epoch": 0.923693437969611, "grad_norm": 0.40652027628765436, "learning_rate": 8.740372108069304e-06, "loss": 0.2063, "step": 2766 }, { "epoch": 0.9240273835364835, "grad_norm": 0.46845775301622733, "learning_rate": 8.739082200748799e-06, "loss": 0.2056, "step": 2767 }, { "epoch": 0.9243613291033561, "grad_norm": 0.43583945959938114, "learning_rate": 8.737791728595903e-06, "loss": 0.221, "step": 2768 }, { "epoch": 0.9246952746702287, "grad_norm": 0.4211490574278276, "learning_rate": 8.736500691805554e-06, "loss": 0.2249, "step": 2769 }, { "epoch": 0.9250292202371013, "grad_norm": 0.4928325266258507, "learning_rate": 8.73520909057278e-06, "loss": 0.2268, "step": 2770 }, { "epoch": 0.9253631658039739, "grad_norm": 0.47059734345795795, "learning_rate": 8.733916925092691e-06, "loss": 0.231, "step": 2771 }, { "epoch": 0.9256971113708465, "grad_norm": 0.45417002632161574, "learning_rate": 8.732624195560487e-06, "loss": 0.2389, "step": 2772 }, { "epoch": 0.9260310569377191, "grad_norm": 0.3945646807349342, "learning_rate": 8.731330902171447e-06, "loss": 0.2168, "step": 2773 }, { "epoch": 0.9263650025045918, "grad_norm": 0.44248145973303443, "learning_rate": 8.730037045120941e-06, "loss": 0.2238, "step": 2774 }, { "epoch": 0.9266989480714644, "grad_norm": 0.4674819994664436, "learning_rate": 8.728742624604418e-06, "loss": 0.2341, "step": 2775 }, { "epoch": 0.927032893638337, "grad_norm": 0.64231489409082, "learning_rate": 8.727447640817417e-06, "loss": 0.2133, "step": 2776 }, { "epoch": 0.9273668392052096, "grad_norm": 0.44146865573432903, "learning_rate": 8.726152093955561e-06, "loss": 0.2104, "step": 2777 }, { "epoch": 0.9277007847720822, "grad_norm": 0.43933661617710457, "learning_rate": 8.724855984214558e-06, "loss": 0.2321, "step": 2778 }, { "epoch": 0.9280347303389548, "grad_norm": 0.46754943764880913, "learning_rate": 8.723559311790197e-06, "loss": 0.227, "step": 2779 }, { "epoch": 0.9283686759058274, "grad_norm": 0.41188188249517227, "learning_rate": 8.722262076878361e-06, "loss": 0.2002, "step": 2780 }, { "epoch": 0.9287026214727, "grad_norm": 0.4430354887476343, "learning_rate": 8.720964279675009e-06, "loss": 0.2115, "step": 2781 }, { "epoch": 0.9290365670395726, "grad_norm": 0.4164609143001292, "learning_rate": 8.71966592037619e-06, "loss": 0.2121, "step": 2782 }, { "epoch": 0.9293705126064451, "grad_norm": 0.42842924540161714, "learning_rate": 8.718366999178037e-06, "loss": 0.2219, "step": 2783 }, { "epoch": 0.9297044581733177, "grad_norm": 0.46668660218796776, "learning_rate": 8.717067516276764e-06, "loss": 0.2293, "step": 2784 }, { "epoch": 0.9300384037401903, "grad_norm": 0.4467610399518897, "learning_rate": 8.715767471868679e-06, "loss": 0.2166, "step": 2785 }, { "epoch": 0.9303723493070629, "grad_norm": 0.5306058118803246, "learning_rate": 8.714466866150162e-06, "loss": 0.2515, "step": 2786 }, { "epoch": 0.9307062948739355, "grad_norm": 0.4538833004379203, "learning_rate": 8.71316569931769e-06, "loss": 0.2245, "step": 2787 }, { "epoch": 0.9310402404408081, "grad_norm": 0.44120044003214, "learning_rate": 8.71186397156782e-06, "loss": 0.2191, "step": 2788 }, { "epoch": 0.9313741860076807, "grad_norm": 0.4610836615076066, "learning_rate": 8.710561683097189e-06, "loss": 0.2178, "step": 2789 }, { "epoch": 0.9317081315745533, "grad_norm": 0.4363676340228982, "learning_rate": 8.709258834102525e-06, "loss": 0.2242, "step": 2790 }, { "epoch": 0.932042077141426, "grad_norm": 0.6756307435936366, "learning_rate": 8.70795542478064e-06, "loss": 0.231, "step": 2791 }, { "epoch": 0.9323760227082986, "grad_norm": 0.5041996579273476, "learning_rate": 8.706651455328427e-06, "loss": 0.2163, "step": 2792 }, { "epoch": 0.9327099682751712, "grad_norm": 0.4698362067299536, "learning_rate": 8.70534692594287e-06, "loss": 0.2216, "step": 2793 }, { "epoch": 0.9330439138420438, "grad_norm": 0.4820217295468483, "learning_rate": 8.704041836821029e-06, "loss": 0.2287, "step": 2794 }, { "epoch": 0.9333778594089164, "grad_norm": 0.4577816146622304, "learning_rate": 8.702736188160055e-06, "loss": 0.2085, "step": 2795 }, { "epoch": 0.933711804975789, "grad_norm": 0.4804066031180158, "learning_rate": 8.70142998015718e-06, "loss": 0.2128, "step": 2796 }, { "epoch": 0.9340457505426616, "grad_norm": 0.48646393253865444, "learning_rate": 8.700123213009726e-06, "loss": 0.2315, "step": 2797 }, { "epoch": 0.9343796961095342, "grad_norm": 0.4392885717258637, "learning_rate": 8.698815886915094e-06, "loss": 0.2372, "step": 2798 }, { "epoch": 0.9347136416764067, "grad_norm": 0.4573456253338256, "learning_rate": 8.697508002070766e-06, "loss": 0.2237, "step": 2799 }, { "epoch": 0.9350475872432793, "grad_norm": 1.0167289235959724, "learning_rate": 8.696199558674321e-06, "loss": 0.2282, "step": 2800 }, { "epoch": 0.9353815328101519, "grad_norm": 0.49862618680445076, "learning_rate": 8.69489055692341e-06, "loss": 0.2191, "step": 2801 }, { "epoch": 0.9357154783770245, "grad_norm": 0.4071314628849528, "learning_rate": 8.693580997015775e-06, "loss": 0.2145, "step": 2802 }, { "epoch": 0.9360494239438971, "grad_norm": 0.48212789403115414, "learning_rate": 8.692270879149241e-06, "loss": 0.2264, "step": 2803 }, { "epoch": 0.9363833695107697, "grad_norm": 0.46241253052454173, "learning_rate": 8.690960203521713e-06, "loss": 0.2084, "step": 2804 }, { "epoch": 0.9367173150776423, "grad_norm": 0.5117131305242669, "learning_rate": 8.689648970331188e-06, "loss": 0.2498, "step": 2805 }, { "epoch": 0.9370512606445149, "grad_norm": 0.488625905932195, "learning_rate": 8.68833717977574e-06, "loss": 0.205, "step": 2806 }, { "epoch": 0.9373852062113875, "grad_norm": 0.46934650535365496, "learning_rate": 8.687024832053534e-06, "loss": 0.209, "step": 2807 }, { "epoch": 0.9377191517782602, "grad_norm": 0.4876878801995502, "learning_rate": 8.685711927362815e-06, "loss": 0.2166, "step": 2808 }, { "epoch": 0.9380530973451328, "grad_norm": 0.47415720928263255, "learning_rate": 8.68439846590191e-06, "loss": 0.2019, "step": 2809 }, { "epoch": 0.9383870429120054, "grad_norm": 0.5071405923836091, "learning_rate": 8.683084447869234e-06, "loss": 0.2193, "step": 2810 }, { "epoch": 0.938720988478878, "grad_norm": 0.5342801311116051, "learning_rate": 8.681769873463286e-06, "loss": 0.2238, "step": 2811 }, { "epoch": 0.9390549340457506, "grad_norm": 0.4967761833386368, "learning_rate": 8.680454742882647e-06, "loss": 0.2319, "step": 2812 }, { "epoch": 0.9393888796126232, "grad_norm": 0.45077129590302223, "learning_rate": 8.679139056325983e-06, "loss": 0.2295, "step": 2813 }, { "epoch": 0.9397228251794958, "grad_norm": 0.4979759329992662, "learning_rate": 8.677822813992046e-06, "loss": 0.2261, "step": 2814 }, { "epoch": 0.9400567707463683, "grad_norm": 0.4950847343176679, "learning_rate": 8.676506016079664e-06, "loss": 0.2238, "step": 2815 }, { "epoch": 0.9403907163132409, "grad_norm": 0.40708194153685967, "learning_rate": 8.675188662787762e-06, "loss": 0.2187, "step": 2816 }, { "epoch": 0.9407246618801135, "grad_norm": 0.5521110478916056, "learning_rate": 8.673870754315336e-06, "loss": 0.2053, "step": 2817 }, { "epoch": 0.9410586074469861, "grad_norm": 0.4442733755427678, "learning_rate": 8.672552290861478e-06, "loss": 0.2164, "step": 2818 }, { "epoch": 0.9413925530138587, "grad_norm": 0.4553102853972935, "learning_rate": 8.67123327262535e-06, "loss": 0.2169, "step": 2819 }, { "epoch": 0.9417264985807313, "grad_norm": 0.4306856509162607, "learning_rate": 8.669913699806209e-06, "loss": 0.2096, "step": 2820 }, { "epoch": 0.9420604441476039, "grad_norm": 0.5401958009078948, "learning_rate": 8.668593572603394e-06, "loss": 0.2282, "step": 2821 }, { "epoch": 0.9423943897144765, "grad_norm": 0.5107141764908304, "learning_rate": 8.667272891216323e-06, "loss": 0.2359, "step": 2822 }, { "epoch": 0.9427283352813491, "grad_norm": 0.45683098172915115, "learning_rate": 8.6659516558445e-06, "loss": 0.2207, "step": 2823 }, { "epoch": 0.9430622808482217, "grad_norm": 0.5311817625144637, "learning_rate": 8.664629866687514e-06, "loss": 0.2377, "step": 2824 }, { "epoch": 0.9433962264150944, "grad_norm": 0.441871608821732, "learning_rate": 8.663307523945038e-06, "loss": 0.2185, "step": 2825 }, { "epoch": 0.943730171981967, "grad_norm": 0.4506561928586293, "learning_rate": 8.661984627816827e-06, "loss": 0.2136, "step": 2826 }, { "epoch": 0.9440641175488396, "grad_norm": 0.40555871123613024, "learning_rate": 8.660661178502719e-06, "loss": 0.2094, "step": 2827 }, { "epoch": 0.9443980631157122, "grad_norm": 0.48122612843516865, "learning_rate": 8.659337176202636e-06, "loss": 0.2177, "step": 2828 }, { "epoch": 0.9447320086825848, "grad_norm": 0.493730557455204, "learning_rate": 8.658012621116585e-06, "loss": 0.2033, "step": 2829 }, { "epoch": 0.9450659542494574, "grad_norm": 0.39913730987982116, "learning_rate": 8.656687513444656e-06, "loss": 0.1957, "step": 2830 }, { "epoch": 0.94539989981633, "grad_norm": 0.43231231966574224, "learning_rate": 8.655361853387024e-06, "loss": 0.2212, "step": 2831 }, { "epoch": 0.9457338453832025, "grad_norm": 0.5034429330788713, "learning_rate": 8.654035641143944e-06, "loss": 0.2266, "step": 2832 }, { "epoch": 0.9460677909500751, "grad_norm": 0.5248570196879081, "learning_rate": 8.652708876915752e-06, "loss": 0.2183, "step": 2833 }, { "epoch": 0.9464017365169477, "grad_norm": 0.47638029426551504, "learning_rate": 8.651381560902876e-06, "loss": 0.2257, "step": 2834 }, { "epoch": 0.9467356820838203, "grad_norm": 0.4872575546717003, "learning_rate": 8.650053693305824e-06, "loss": 0.2371, "step": 2835 }, { "epoch": 0.9470696276506929, "grad_norm": 0.535075069115959, "learning_rate": 8.648725274325182e-06, "loss": 0.2206, "step": 2836 }, { "epoch": 0.9474035732175655, "grad_norm": 0.48011848827739007, "learning_rate": 8.647396304161625e-06, "loss": 0.2204, "step": 2837 }, { "epoch": 0.9477375187844381, "grad_norm": 0.43107044927809024, "learning_rate": 8.64606678301591e-06, "loss": 0.2143, "step": 2838 }, { "epoch": 0.9480714643513107, "grad_norm": 0.48263476741763794, "learning_rate": 8.644736711088874e-06, "loss": 0.2128, "step": 2839 }, { "epoch": 0.9484054099181833, "grad_norm": 0.4227729549713639, "learning_rate": 8.643406088581446e-06, "loss": 0.2181, "step": 2840 }, { "epoch": 0.948739355485056, "grad_norm": 0.4423800504922258, "learning_rate": 8.642074915694626e-06, "loss": 0.2181, "step": 2841 }, { "epoch": 0.9490733010519286, "grad_norm": 0.49729486477624335, "learning_rate": 8.640743192629507e-06, "loss": 0.2196, "step": 2842 }, { "epoch": 0.9494072466188012, "grad_norm": 0.48595434515596525, "learning_rate": 8.63941091958726e-06, "loss": 0.2181, "step": 2843 }, { "epoch": 0.9497411921856738, "grad_norm": 0.5001238407946756, "learning_rate": 8.638078096769141e-06, "loss": 0.2209, "step": 2844 }, { "epoch": 0.9500751377525464, "grad_norm": 0.4720041136115112, "learning_rate": 8.636744724376488e-06, "loss": 0.2214, "step": 2845 }, { "epoch": 0.950409083319419, "grad_norm": 0.48399406369761006, "learning_rate": 8.635410802610724e-06, "loss": 0.2292, "step": 2846 }, { "epoch": 0.9507430288862916, "grad_norm": 0.4332190126394425, "learning_rate": 8.634076331673354e-06, "loss": 0.2182, "step": 2847 }, { "epoch": 0.9510769744531641, "grad_norm": 0.47377715160398876, "learning_rate": 8.632741311765962e-06, "loss": 0.2217, "step": 2848 }, { "epoch": 0.9514109200200367, "grad_norm": 0.4803537971441186, "learning_rate": 8.631405743090223e-06, "loss": 0.2224, "step": 2849 }, { "epoch": 0.9517448655869093, "grad_norm": 0.44465165141275165, "learning_rate": 8.630069625847885e-06, "loss": 0.212, "step": 2850 }, { "epoch": 0.9520788111537819, "grad_norm": 0.4189509028928389, "learning_rate": 8.628732960240788e-06, "loss": 0.2201, "step": 2851 }, { "epoch": 0.9524127567206545, "grad_norm": 0.4072448497585389, "learning_rate": 8.627395746470852e-06, "loss": 0.1999, "step": 2852 }, { "epoch": 0.9527467022875271, "grad_norm": 0.5302490534823262, "learning_rate": 8.626057984740077e-06, "loss": 0.2235, "step": 2853 }, { "epoch": 0.9530806478543997, "grad_norm": 0.4770176390794186, "learning_rate": 8.624719675250547e-06, "loss": 0.2092, "step": 2854 }, { "epoch": 0.9534145934212723, "grad_norm": 0.4649548910468379, "learning_rate": 8.623380818204431e-06, "loss": 0.2206, "step": 2855 }, { "epoch": 0.9537485389881449, "grad_norm": 0.41794317440850726, "learning_rate": 8.622041413803979e-06, "loss": 0.2299, "step": 2856 }, { "epoch": 0.9540824845550175, "grad_norm": 0.4270595046761071, "learning_rate": 8.620701462251522e-06, "loss": 0.2274, "step": 2857 }, { "epoch": 0.9544164301218901, "grad_norm": 0.5605112243213927, "learning_rate": 8.619360963749478e-06, "loss": 0.2366, "step": 2858 }, { "epoch": 0.9547503756887628, "grad_norm": 0.4315029266321909, "learning_rate": 8.618019918500342e-06, "loss": 0.2155, "step": 2859 }, { "epoch": 0.9550843212556354, "grad_norm": 0.41819570091246855, "learning_rate": 8.616678326706698e-06, "loss": 0.2032, "step": 2860 }, { "epoch": 0.955418266822508, "grad_norm": 0.4720800653986111, "learning_rate": 8.615336188571208e-06, "loss": 0.2106, "step": 2861 }, { "epoch": 0.9557522123893806, "grad_norm": 0.4336533842521673, "learning_rate": 8.613993504296617e-06, "loss": 0.2106, "step": 2862 }, { "epoch": 0.9560861579562532, "grad_norm": 0.4842098122111017, "learning_rate": 8.612650274085755e-06, "loss": 0.2153, "step": 2863 }, { "epoch": 0.9564201035231257, "grad_norm": 0.46478104433885903, "learning_rate": 8.61130649814153e-06, "loss": 0.2172, "step": 2864 }, { "epoch": 0.9567540490899983, "grad_norm": 0.3995110999710848, "learning_rate": 8.609962176666936e-06, "loss": 0.2008, "step": 2865 }, { "epoch": 0.9570879946568709, "grad_norm": 0.4619066958368434, "learning_rate": 8.608617309865051e-06, "loss": 0.2247, "step": 2866 }, { "epoch": 0.9574219402237435, "grad_norm": 0.4772269876791035, "learning_rate": 8.60727189793903e-06, "loss": 0.2238, "step": 2867 }, { "epoch": 0.9577558857906161, "grad_norm": 0.45025507277873233, "learning_rate": 8.605925941092114e-06, "loss": 0.2321, "step": 2868 }, { "epoch": 0.9580898313574887, "grad_norm": 0.4240250807538226, "learning_rate": 8.604579439527627e-06, "loss": 0.2221, "step": 2869 }, { "epoch": 0.9584237769243613, "grad_norm": 0.4467352359560645, "learning_rate": 8.603232393448974e-06, "loss": 0.2115, "step": 2870 }, { "epoch": 0.9587577224912339, "grad_norm": 0.43281845584862416, "learning_rate": 8.601884803059641e-06, "loss": 0.2084, "step": 2871 }, { "epoch": 0.9590916680581065, "grad_norm": 0.43146109431192975, "learning_rate": 8.600536668563197e-06, "loss": 0.2185, "step": 2872 }, { "epoch": 0.9594256136249791, "grad_norm": 0.47249516324982266, "learning_rate": 8.599187990163296e-06, "loss": 0.2221, "step": 2873 }, { "epoch": 0.9597595591918517, "grad_norm": 0.4698332572735109, "learning_rate": 8.597838768063667e-06, "loss": 0.23, "step": 2874 }, { "epoch": 0.9600935047587243, "grad_norm": 0.5412304324762713, "learning_rate": 8.596489002468132e-06, "loss": 0.2245, "step": 2875 }, { "epoch": 0.960427450325597, "grad_norm": 0.44637589874465433, "learning_rate": 8.595138693580583e-06, "loss": 0.2233, "step": 2876 }, { "epoch": 0.9607613958924696, "grad_norm": 0.43336083402117387, "learning_rate": 8.593787841605004e-06, "loss": 0.2048, "step": 2877 }, { "epoch": 0.9610953414593422, "grad_norm": 0.4507857608921773, "learning_rate": 8.592436446745457e-06, "loss": 0.2187, "step": 2878 }, { "epoch": 0.9614292870262148, "grad_norm": 0.4137810248936209, "learning_rate": 8.591084509206085e-06, "loss": 0.2288, "step": 2879 }, { "epoch": 0.9617632325930874, "grad_norm": 0.4381991589239642, "learning_rate": 8.589732029191113e-06, "loss": 0.2156, "step": 2880 }, { "epoch": 0.9620971781599599, "grad_norm": 0.46254816718933667, "learning_rate": 8.588379006904852e-06, "loss": 0.2345, "step": 2881 }, { "epoch": 0.9624311237268325, "grad_norm": 0.447052657136721, "learning_rate": 8.587025442551689e-06, "loss": 0.211, "step": 2882 }, { "epoch": 0.9627650692937051, "grad_norm": 0.4220565998823512, "learning_rate": 8.585671336336096e-06, "loss": 0.2142, "step": 2883 }, { "epoch": 0.9630990148605777, "grad_norm": 0.46402238237283716, "learning_rate": 8.58431668846263e-06, "loss": 0.2124, "step": 2884 }, { "epoch": 0.9634329604274503, "grad_norm": 0.8148299624701707, "learning_rate": 8.582961499135925e-06, "loss": 0.2138, "step": 2885 }, { "epoch": 0.9637669059943229, "grad_norm": 0.4218423758746241, "learning_rate": 8.581605768560694e-06, "loss": 0.2237, "step": 2886 }, { "epoch": 0.9641008515611955, "grad_norm": 0.42468992711883435, "learning_rate": 8.580249496941742e-06, "loss": 0.2222, "step": 2887 }, { "epoch": 0.9644347971280681, "grad_norm": 0.45231316231310115, "learning_rate": 8.578892684483947e-06, "loss": 0.2304, "step": 2888 }, { "epoch": 0.9647687426949407, "grad_norm": 0.39665581763729213, "learning_rate": 8.577535331392272e-06, "loss": 0.2061, "step": 2889 }, { "epoch": 0.9651026882618133, "grad_norm": 0.42218622196372374, "learning_rate": 8.57617743787176e-06, "loss": 0.214, "step": 2890 }, { "epoch": 0.9654366338286859, "grad_norm": 0.4327387439898741, "learning_rate": 8.574819004127539e-06, "loss": 0.2165, "step": 2891 }, { "epoch": 0.9657705793955585, "grad_norm": 0.4368794278929328, "learning_rate": 8.573460030364816e-06, "loss": 0.2057, "step": 2892 }, { "epoch": 0.9661045249624312, "grad_norm": 0.46821955032272594, "learning_rate": 8.572100516788878e-06, "loss": 0.2122, "step": 2893 }, { "epoch": 0.9664384705293038, "grad_norm": 0.5024454235492738, "learning_rate": 8.570740463605096e-06, "loss": 0.2301, "step": 2894 }, { "epoch": 0.9667724160961764, "grad_norm": 0.42826897322512725, "learning_rate": 8.569379871018925e-06, "loss": 0.2359, "step": 2895 }, { "epoch": 0.967106361663049, "grad_norm": 0.447750666872138, "learning_rate": 8.568018739235895e-06, "loss": 0.1986, "step": 2896 }, { "epoch": 0.9674403072299215, "grad_norm": 0.45508421449926406, "learning_rate": 8.566657068461624e-06, "loss": 0.2089, "step": 2897 }, { "epoch": 0.9677742527967941, "grad_norm": 0.4895779825553509, "learning_rate": 8.565294858901804e-06, "loss": 0.2276, "step": 2898 }, { "epoch": 0.9681081983636667, "grad_norm": 0.4114005627874931, "learning_rate": 8.563932110762218e-06, "loss": 0.2125, "step": 2899 }, { "epoch": 0.9684421439305393, "grad_norm": 0.44863986840084646, "learning_rate": 8.562568824248722e-06, "loss": 0.2294, "step": 2900 }, { "epoch": 0.9687760894974119, "grad_norm": 0.45066913390824037, "learning_rate": 8.561204999567258e-06, "loss": 0.2125, "step": 2901 }, { "epoch": 0.9691100350642845, "grad_norm": 0.7353204759248914, "learning_rate": 8.559840636923845e-06, "loss": 0.2262, "step": 2902 }, { "epoch": 0.9694439806311571, "grad_norm": 0.4509345787133013, "learning_rate": 8.55847573652459e-06, "loss": 0.2242, "step": 2903 }, { "epoch": 0.9697779261980297, "grad_norm": 0.41027552893332203, "learning_rate": 8.557110298575674e-06, "loss": 0.1956, "step": 2904 }, { "epoch": 0.9701118717649023, "grad_norm": 0.3886765860123989, "learning_rate": 8.555744323283364e-06, "loss": 0.2062, "step": 2905 }, { "epoch": 0.9704458173317749, "grad_norm": 0.4099609507610317, "learning_rate": 8.554377810854006e-06, "loss": 0.2229, "step": 2906 }, { "epoch": 0.9707797628986475, "grad_norm": 0.4514135763925288, "learning_rate": 8.553010761494029e-06, "loss": 0.205, "step": 2907 }, { "epoch": 0.9711137084655201, "grad_norm": 0.6008850226168184, "learning_rate": 8.551643175409941e-06, "loss": 0.2156, "step": 2908 }, { "epoch": 0.9714476540323927, "grad_norm": 0.4142063360961322, "learning_rate": 8.550275052808332e-06, "loss": 0.2333, "step": 2909 }, { "epoch": 0.9717815995992654, "grad_norm": 0.3950140050648195, "learning_rate": 8.548906393895876e-06, "loss": 0.202, "step": 2910 }, { "epoch": 0.972115545166138, "grad_norm": 0.4092389771262237, "learning_rate": 8.547537198879318e-06, "loss": 0.221, "step": 2911 }, { "epoch": 0.9724494907330106, "grad_norm": 0.4256297246287956, "learning_rate": 8.546167467965496e-06, "loss": 0.2134, "step": 2912 }, { "epoch": 0.9727834362998831, "grad_norm": 0.4442048031336666, "learning_rate": 8.544797201361324e-06, "loss": 0.2213, "step": 2913 }, { "epoch": 0.9731173818667557, "grad_norm": 0.7029604309389493, "learning_rate": 8.543426399273796e-06, "loss": 0.2621, "step": 2914 }, { "epoch": 0.9734513274336283, "grad_norm": 0.45508518355245203, "learning_rate": 8.542055061909988e-06, "loss": 0.2271, "step": 2915 }, { "epoch": 0.9737852730005009, "grad_norm": 0.4386269165717581, "learning_rate": 8.540683189477057e-06, "loss": 0.223, "step": 2916 }, { "epoch": 0.9741192185673735, "grad_norm": 0.392448221999816, "learning_rate": 8.539310782182238e-06, "loss": 0.2021, "step": 2917 }, { "epoch": 0.9744531641342461, "grad_norm": 0.42094965559359726, "learning_rate": 8.537937840232853e-06, "loss": 0.2052, "step": 2918 }, { "epoch": 0.9747871097011187, "grad_norm": 0.454216033457181, "learning_rate": 8.5365643638363e-06, "loss": 0.2272, "step": 2919 }, { "epoch": 0.9751210552679913, "grad_norm": 0.4225256473955746, "learning_rate": 8.535190353200056e-06, "loss": 0.2199, "step": 2920 }, { "epoch": 0.9754550008348639, "grad_norm": 0.39002247751830826, "learning_rate": 8.533815808531685e-06, "loss": 0.2065, "step": 2921 }, { "epoch": 0.9757889464017365, "grad_norm": 0.4266599827086228, "learning_rate": 8.532440730038826e-06, "loss": 0.2187, "step": 2922 }, { "epoch": 0.9761228919686091, "grad_norm": 0.4205083010157622, "learning_rate": 8.531065117929202e-06, "loss": 0.2215, "step": 2923 }, { "epoch": 0.9764568375354817, "grad_norm": 0.39958558170677844, "learning_rate": 8.529688972410616e-06, "loss": 0.2024, "step": 2924 }, { "epoch": 0.9767907831023543, "grad_norm": 0.4258603303840793, "learning_rate": 8.52831229369095e-06, "loss": 0.2219, "step": 2925 }, { "epoch": 0.977124728669227, "grad_norm": 0.46196673066836863, "learning_rate": 8.526935081978166e-06, "loss": 0.2247, "step": 2926 }, { "epoch": 0.9774586742360996, "grad_norm": 0.39273292233721263, "learning_rate": 8.52555733748031e-06, "loss": 0.2047, "step": 2927 }, { "epoch": 0.9777926198029722, "grad_norm": 0.43992124078925776, "learning_rate": 8.524179060405507e-06, "loss": 0.2215, "step": 2928 }, { "epoch": 0.9781265653698448, "grad_norm": 0.43852567181358093, "learning_rate": 8.52280025096196e-06, "loss": 0.2194, "step": 2929 }, { "epoch": 0.9784605109367173, "grad_norm": 0.3978127024829244, "learning_rate": 8.521420909357956e-06, "loss": 0.2048, "step": 2930 }, { "epoch": 0.9787944565035899, "grad_norm": 0.5547762340317196, "learning_rate": 8.52004103580186e-06, "loss": 0.2273, "step": 2931 }, { "epoch": 0.9791284020704625, "grad_norm": 0.43347864645126716, "learning_rate": 8.51866063050212e-06, "loss": 0.2086, "step": 2932 }, { "epoch": 0.9794623476373351, "grad_norm": 0.4228219103235732, "learning_rate": 8.51727969366726e-06, "loss": 0.2133, "step": 2933 }, { "epoch": 0.9797962932042077, "grad_norm": 0.44917306834090986, "learning_rate": 8.515898225505885e-06, "loss": 0.2042, "step": 2934 }, { "epoch": 0.9801302387710803, "grad_norm": 0.46303289407329606, "learning_rate": 8.514516226226688e-06, "loss": 0.2111, "step": 2935 }, { "epoch": 0.9804641843379529, "grad_norm": 0.4636036916535876, "learning_rate": 8.513133696038432e-06, "loss": 0.232, "step": 2936 }, { "epoch": 0.9807981299048255, "grad_norm": 0.3664746812558084, "learning_rate": 8.511750635149965e-06, "loss": 0.1995, "step": 2937 }, { "epoch": 0.9811320754716981, "grad_norm": 0.3944802889752811, "learning_rate": 8.510367043770213e-06, "loss": 0.1984, "step": 2938 }, { "epoch": 0.9814660210385707, "grad_norm": 0.444656078405521, "learning_rate": 8.508982922108188e-06, "loss": 0.2209, "step": 2939 }, { "epoch": 0.9817999666054433, "grad_norm": 0.4562655623442214, "learning_rate": 8.507598270372977e-06, "loss": 0.2312, "step": 2940 }, { "epoch": 0.9821339121723159, "grad_norm": 0.41457463444511544, "learning_rate": 8.506213088773744e-06, "loss": 0.2142, "step": 2941 }, { "epoch": 0.9824678577391885, "grad_norm": 0.39229806393694383, "learning_rate": 8.504827377519743e-06, "loss": 0.2077, "step": 2942 }, { "epoch": 0.9828018033060612, "grad_norm": 0.45229518000097885, "learning_rate": 8.503441136820296e-06, "loss": 0.2343, "step": 2943 }, { "epoch": 0.9831357488729338, "grad_norm": 0.4069598220008855, "learning_rate": 8.502054366884813e-06, "loss": 0.2062, "step": 2944 }, { "epoch": 0.9834696944398064, "grad_norm": 0.5008800220194679, "learning_rate": 8.500667067922784e-06, "loss": 0.2183, "step": 2945 }, { "epoch": 0.9838036400066789, "grad_norm": 0.4430084078638324, "learning_rate": 8.499279240143776e-06, "loss": 0.2272, "step": 2946 }, { "epoch": 0.9841375855735515, "grad_norm": 0.4201159436753723, "learning_rate": 8.497890883757434e-06, "loss": 0.2145, "step": 2947 }, { "epoch": 0.9844715311404241, "grad_norm": 0.4087927730039263, "learning_rate": 8.496501998973489e-06, "loss": 0.2124, "step": 2948 }, { "epoch": 0.9848054767072967, "grad_norm": 0.4203559208003164, "learning_rate": 8.495112586001747e-06, "loss": 0.2237, "step": 2949 }, { "epoch": 0.9851394222741693, "grad_norm": 0.464817002877942, "learning_rate": 8.493722645052093e-06, "loss": 0.2126, "step": 2950 }, { "epoch": 0.9854733678410419, "grad_norm": 0.46608580721174564, "learning_rate": 8.4923321763345e-06, "loss": 0.2061, "step": 2951 }, { "epoch": 0.9858073134079145, "grad_norm": 0.4967627432556802, "learning_rate": 8.490941180059009e-06, "loss": 0.2227, "step": 2952 }, { "epoch": 0.9861412589747871, "grad_norm": 0.37913666532255536, "learning_rate": 8.489549656435748e-06, "loss": 0.2015, "step": 2953 }, { "epoch": 0.9864752045416597, "grad_norm": 0.4623920401155906, "learning_rate": 8.488157605674924e-06, "loss": 0.2315, "step": 2954 }, { "epoch": 0.9868091501085323, "grad_norm": 0.4250267254101623, "learning_rate": 8.486765027986821e-06, "loss": 0.2106, "step": 2955 }, { "epoch": 0.9871430956754049, "grad_norm": 0.4378715394618819, "learning_rate": 8.485371923581807e-06, "loss": 0.2234, "step": 2956 }, { "epoch": 0.9874770412422775, "grad_norm": 0.39969124237667886, "learning_rate": 8.483978292670324e-06, "loss": 0.2013, "step": 2957 }, { "epoch": 0.9878109868091501, "grad_norm": 0.4052628643444376, "learning_rate": 8.482584135462896e-06, "loss": 0.2049, "step": 2958 }, { "epoch": 0.9881449323760227, "grad_norm": 0.5345263629856472, "learning_rate": 8.48118945217013e-06, "loss": 0.2483, "step": 2959 }, { "epoch": 0.9884788779428954, "grad_norm": 0.4411343257938832, "learning_rate": 8.479794243002707e-06, "loss": 0.2185, "step": 2960 }, { "epoch": 0.988812823509768, "grad_norm": 0.41540826133254866, "learning_rate": 8.47839850817139e-06, "loss": 0.2125, "step": 2961 }, { "epoch": 0.9891467690766405, "grad_norm": 0.44157585681599704, "learning_rate": 8.477002247887024e-06, "loss": 0.1991, "step": 2962 }, { "epoch": 0.9894807146435131, "grad_norm": 0.4391424439864253, "learning_rate": 8.475605462360525e-06, "loss": 0.2279, "step": 2963 }, { "epoch": 0.9898146602103857, "grad_norm": 0.5046416290709523, "learning_rate": 8.474208151802898e-06, "loss": 0.2396, "step": 2964 }, { "epoch": 0.9901486057772583, "grad_norm": 0.4567927616624758, "learning_rate": 8.472810316425223e-06, "loss": 0.2277, "step": 2965 }, { "epoch": 0.9904825513441309, "grad_norm": 0.4022340376300813, "learning_rate": 8.471411956438657e-06, "loss": 0.2108, "step": 2966 }, { "epoch": 0.9908164969110035, "grad_norm": 0.4798602783262579, "learning_rate": 8.470013072054442e-06, "loss": 0.2264, "step": 2967 }, { "epoch": 0.9911504424778761, "grad_norm": 0.42605908508496787, "learning_rate": 8.468613663483894e-06, "loss": 0.2163, "step": 2968 }, { "epoch": 0.9914843880447487, "grad_norm": 0.7310766075725814, "learning_rate": 8.467213730938408e-06, "loss": 0.2194, "step": 2969 }, { "epoch": 0.9918183336116213, "grad_norm": 0.40667359914165013, "learning_rate": 8.465813274629466e-06, "loss": 0.214, "step": 2970 }, { "epoch": 0.9921522791784939, "grad_norm": 0.43679647168307384, "learning_rate": 8.46441229476862e-06, "loss": 0.2407, "step": 2971 }, { "epoch": 0.9924862247453665, "grad_norm": 0.41879553528039526, "learning_rate": 8.463010791567503e-06, "loss": 0.2183, "step": 2972 }, { "epoch": 0.9928201703122391, "grad_norm": 0.43420445883979225, "learning_rate": 8.461608765237832e-06, "loss": 0.2221, "step": 2973 }, { "epoch": 0.9931541158791117, "grad_norm": 0.42725936711171186, "learning_rate": 8.460206215991398e-06, "loss": 0.2236, "step": 2974 }, { "epoch": 0.9934880614459843, "grad_norm": 0.45881095769887553, "learning_rate": 8.458803144040071e-06, "loss": 0.2374, "step": 2975 }, { "epoch": 0.993822007012857, "grad_norm": 0.4298372232535485, "learning_rate": 8.457399549595803e-06, "loss": 0.2045, "step": 2976 }, { "epoch": 0.9941559525797296, "grad_norm": 0.4446130630825048, "learning_rate": 8.455995432870626e-06, "loss": 0.2287, "step": 2977 }, { "epoch": 0.9944898981466022, "grad_norm": 0.4227750823184624, "learning_rate": 8.454590794076642e-06, "loss": 0.2199, "step": 2978 }, { "epoch": 0.9948238437134747, "grad_norm": 0.8347772521786165, "learning_rate": 8.453185633426044e-06, "loss": 0.2208, "step": 2979 }, { "epoch": 0.9951577892803473, "grad_norm": 0.38705711704080775, "learning_rate": 8.451779951131096e-06, "loss": 0.2048, "step": 2980 }, { "epoch": 0.9954917348472199, "grad_norm": 0.53915343201022, "learning_rate": 8.450373747404143e-06, "loss": 0.2052, "step": 2981 }, { "epoch": 0.9958256804140925, "grad_norm": 0.42343951131201196, "learning_rate": 8.448967022457611e-06, "loss": 0.2123, "step": 2982 }, { "epoch": 0.9961596259809651, "grad_norm": 0.39481374145182524, "learning_rate": 8.447559776503998e-06, "loss": 0.2132, "step": 2983 }, { "epoch": 0.9964935715478377, "grad_norm": 0.42340444262938, "learning_rate": 8.446152009755886e-06, "loss": 0.2093, "step": 2984 }, { "epoch": 0.9968275171147103, "grad_norm": 0.5923161067138794, "learning_rate": 8.444743722425937e-06, "loss": 0.2359, "step": 2985 }, { "epoch": 0.9971614626815829, "grad_norm": 0.537393246167669, "learning_rate": 8.443334914726886e-06, "loss": 0.2035, "step": 2986 }, { "epoch": 0.9974954082484555, "grad_norm": 0.47016253843865485, "learning_rate": 8.441925586871556e-06, "loss": 0.2039, "step": 2987 }, { "epoch": 0.9978293538153281, "grad_norm": 0.3945282745483418, "learning_rate": 8.440515739072836e-06, "loss": 0.2017, "step": 2988 }, { "epoch": 0.9981632993822007, "grad_norm": 0.42985799339322056, "learning_rate": 8.439105371543703e-06, "loss": 0.2134, "step": 2989 }, { "epoch": 0.9984972449490733, "grad_norm": 0.43566435703151096, "learning_rate": 8.43769448449721e-06, "loss": 0.2139, "step": 2990 }, { "epoch": 0.9988311905159459, "grad_norm": 0.43132891430456666, "learning_rate": 8.436283078146488e-06, "loss": 0.2223, "step": 2991 }, { "epoch": 0.9991651360828185, "grad_norm": 0.546060681351875, "learning_rate": 8.434871152704745e-06, "loss": 0.219, "step": 2992 }, { "epoch": 0.9994990816496911, "grad_norm": 0.45277563875621096, "learning_rate": 8.433458708385272e-06, "loss": 0.2125, "step": 2993 }, { "epoch": 0.9998330272165638, "grad_norm": 0.4072238139150075, "learning_rate": 8.432045745401431e-06, "loss": 0.2231, "step": 2994 }, { "epoch": 0.9998330272165638, "eval_loss": 0.21826396882534027, "eval_runtime": 187.3402, "eval_samples_per_second": 107.681, "eval_steps_per_second": 1.687, "step": 2994 }, { "epoch": 1.0001669727834364, "grad_norm": 0.37766855711266734, "learning_rate": 8.430632263966672e-06, "loss": 0.2026, "step": 2995 }, { "epoch": 1.0005009183503089, "grad_norm": 0.42746862033576705, "learning_rate": 8.429218264294512e-06, "loss": 0.1963, "step": 2996 }, { "epoch": 1.0008348639171816, "grad_norm": 0.42313117231942904, "learning_rate": 8.427803746598557e-06, "loss": 0.1963, "step": 2997 }, { "epoch": 1.001168809484054, "grad_norm": 0.40410590824339193, "learning_rate": 8.426388711092486e-06, "loss": 0.1937, "step": 2998 }, { "epoch": 1.0015027550509268, "grad_norm": 0.3865466171900227, "learning_rate": 8.424973157990053e-06, "loss": 0.1889, "step": 2999 }, { "epoch": 1.0018367006177993, "grad_norm": 0.42882739122705976, "learning_rate": 8.4235570875051e-06, "loss": 0.2107, "step": 3000 }, { "epoch": 1.002170646184672, "grad_norm": 0.45285004117271893, "learning_rate": 8.422140499851536e-06, "loss": 0.2176, "step": 3001 }, { "epoch": 1.0025045917515445, "grad_norm": 0.411068092342574, "learning_rate": 8.420723395243356e-06, "loss": 0.1989, "step": 3002 }, { "epoch": 1.002838537318417, "grad_norm": 0.4226831623463052, "learning_rate": 8.419305773894628e-06, "loss": 0.1881, "step": 3003 }, { "epoch": 1.0031724828852897, "grad_norm": 0.4373537624300133, "learning_rate": 8.417887636019504e-06, "loss": 0.1967, "step": 3004 }, { "epoch": 1.0035064284521622, "grad_norm": 0.4359816928757857, "learning_rate": 8.416468981832207e-06, "loss": 0.1959, "step": 3005 }, { "epoch": 1.003840374019035, "grad_norm": 0.4397588855669126, "learning_rate": 8.415049811547043e-06, "loss": 0.1953, "step": 3006 }, { "epoch": 1.0041743195859074, "grad_norm": 0.4361235547061362, "learning_rate": 8.413630125378393e-06, "loss": 0.1965, "step": 3007 }, { "epoch": 1.0045082651527801, "grad_norm": 0.44202989818976113, "learning_rate": 8.412209923540719e-06, "loss": 0.2008, "step": 3008 }, { "epoch": 1.0048422107196526, "grad_norm": 0.5020608356876495, "learning_rate": 8.41078920624856e-06, "loss": 0.2118, "step": 3009 }, { "epoch": 1.0051761562865253, "grad_norm": 0.427867506708943, "learning_rate": 8.409367973716527e-06, "loss": 0.2082, "step": 3010 }, { "epoch": 1.0055101018533978, "grad_norm": 0.3950096858664292, "learning_rate": 8.40794622615932e-06, "loss": 0.1906, "step": 3011 }, { "epoch": 1.0058440474202706, "grad_norm": 0.46858017830302484, "learning_rate": 8.406523963791709e-06, "loss": 0.2002, "step": 3012 }, { "epoch": 1.006177992987143, "grad_norm": 0.46112765843275033, "learning_rate": 8.405101186828542e-06, "loss": 0.2069, "step": 3013 }, { "epoch": 1.0065119385540158, "grad_norm": 0.7898365532056968, "learning_rate": 8.403677895484746e-06, "loss": 0.2108, "step": 3014 }, { "epoch": 1.0068458841208883, "grad_norm": 0.37587310991070744, "learning_rate": 8.402254089975328e-06, "loss": 0.188, "step": 3015 }, { "epoch": 1.007179829687761, "grad_norm": 0.4706264177833519, "learning_rate": 8.400829770515369e-06, "loss": 0.1941, "step": 3016 }, { "epoch": 1.0075137752546335, "grad_norm": 0.4025345426782933, "learning_rate": 8.399404937320031e-06, "loss": 0.187, "step": 3017 }, { "epoch": 1.0078477208215062, "grad_norm": 0.4425795571729669, "learning_rate": 8.397979590604548e-06, "loss": 0.2139, "step": 3018 }, { "epoch": 1.0081816663883787, "grad_norm": 0.41370338504034, "learning_rate": 8.39655373058424e-06, "loss": 0.1948, "step": 3019 }, { "epoch": 1.0085156119552512, "grad_norm": 0.49065974447042454, "learning_rate": 8.395127357474498e-06, "loss": 0.2014, "step": 3020 }, { "epoch": 1.008849557522124, "grad_norm": 0.42889156437278847, "learning_rate": 8.39370047149079e-06, "loss": 0.1955, "step": 3021 }, { "epoch": 1.0091835030889964, "grad_norm": 0.4188098849683885, "learning_rate": 8.39227307284867e-06, "loss": 0.1857, "step": 3022 }, { "epoch": 1.0095174486558691, "grad_norm": 0.4496220697484712, "learning_rate": 8.390845161763756e-06, "loss": 0.2073, "step": 3023 }, { "epoch": 1.0098513942227416, "grad_norm": 0.4471112381315624, "learning_rate": 8.389416738451755e-06, "loss": 0.1859, "step": 3024 }, { "epoch": 1.0101853397896143, "grad_norm": 0.5110693539703756, "learning_rate": 8.387987803128447e-06, "loss": 0.2178, "step": 3025 }, { "epoch": 1.0105192853564868, "grad_norm": 0.5629538582211671, "learning_rate": 8.386558356009691e-06, "loss": 0.208, "step": 3026 }, { "epoch": 1.0108532309233595, "grad_norm": 0.43042048600723654, "learning_rate": 8.385128397311418e-06, "loss": 0.191, "step": 3027 }, { "epoch": 1.011187176490232, "grad_norm": 0.443595870073697, "learning_rate": 8.383697927249641e-06, "loss": 0.1974, "step": 3028 }, { "epoch": 1.0115211220571048, "grad_norm": 0.5034619295424242, "learning_rate": 8.382266946040453e-06, "loss": 0.2031, "step": 3029 }, { "epoch": 1.0118550676239773, "grad_norm": 0.48727180978971396, "learning_rate": 8.380835453900017e-06, "loss": 0.2093, "step": 3030 }, { "epoch": 1.01218901319085, "grad_norm": 0.4396347345023138, "learning_rate": 8.379403451044576e-06, "loss": 0.197, "step": 3031 }, { "epoch": 1.0125229587577225, "grad_norm": 0.44433522767753325, "learning_rate": 8.377970937690455e-06, "loss": 0.2064, "step": 3032 }, { "epoch": 1.0128569043245952, "grad_norm": 0.42254899723175227, "learning_rate": 8.376537914054048e-06, "loss": 0.1891, "step": 3033 }, { "epoch": 1.0131908498914677, "grad_norm": 0.4347839679518912, "learning_rate": 8.37510438035183e-06, "loss": 0.193, "step": 3034 }, { "epoch": 1.0135247954583404, "grad_norm": 0.4350862330519345, "learning_rate": 8.373670336800358e-06, "loss": 0.2063, "step": 3035 }, { "epoch": 1.013858741025213, "grad_norm": 0.5486004506039731, "learning_rate": 8.372235783616258e-06, "loss": 0.2109, "step": 3036 }, { "epoch": 1.0141926865920854, "grad_norm": 0.41129617581244454, "learning_rate": 8.370800721016232e-06, "loss": 0.1962, "step": 3037 }, { "epoch": 1.014526632158958, "grad_norm": 0.4257282696992318, "learning_rate": 8.369365149217072e-06, "loss": 0.1982, "step": 3038 }, { "epoch": 1.0148605777258306, "grad_norm": 0.46728603791520196, "learning_rate": 8.36792906843563e-06, "loss": 0.1908, "step": 3039 }, { "epoch": 1.0151945232927033, "grad_norm": 0.4749390533604057, "learning_rate": 8.366492478888849e-06, "loss": 0.2052, "step": 3040 }, { "epoch": 1.0155284688595758, "grad_norm": 0.46810287444765153, "learning_rate": 8.365055380793737e-06, "loss": 0.2041, "step": 3041 }, { "epoch": 1.0158624144264485, "grad_norm": 0.42069357580760686, "learning_rate": 8.363617774367389e-06, "loss": 0.2063, "step": 3042 }, { "epoch": 1.016196359993321, "grad_norm": 0.38632263608832246, "learning_rate": 8.36217965982697e-06, "loss": 0.1904, "step": 3043 }, { "epoch": 1.0165303055601937, "grad_norm": 0.43057927948036445, "learning_rate": 8.360741037389727e-06, "loss": 0.216, "step": 3044 }, { "epoch": 1.0168642511270662, "grad_norm": 0.4222757576848028, "learning_rate": 8.359301907272976e-06, "loss": 0.1915, "step": 3045 }, { "epoch": 1.017198196693939, "grad_norm": 0.3786692087465665, "learning_rate": 8.35786226969412e-06, "loss": 0.1788, "step": 3046 }, { "epoch": 1.0175321422608115, "grad_norm": 0.45325514122512583, "learning_rate": 8.356422124870629e-06, "loss": 0.2057, "step": 3047 }, { "epoch": 1.0178660878276842, "grad_norm": 0.40238918835240045, "learning_rate": 8.354981473020056e-06, "loss": 0.1918, "step": 3048 }, { "epoch": 1.0182000333945567, "grad_norm": 0.4393213758556735, "learning_rate": 8.353540314360027e-06, "loss": 0.1956, "step": 3049 }, { "epoch": 1.0185339789614294, "grad_norm": 0.41890810895805786, "learning_rate": 8.352098649108246e-06, "loss": 0.1966, "step": 3050 }, { "epoch": 1.0188679245283019, "grad_norm": 0.4322860515599835, "learning_rate": 8.350656477482497e-06, "loss": 0.1988, "step": 3051 }, { "epoch": 1.0192018700951744, "grad_norm": 0.4546480187544326, "learning_rate": 8.349213799700635e-06, "loss": 0.2049, "step": 3052 }, { "epoch": 1.019535815662047, "grad_norm": 0.4717472602597745, "learning_rate": 8.34777061598059e-06, "loss": 0.2074, "step": 3053 }, { "epoch": 1.0198697612289196, "grad_norm": 0.4401320901117122, "learning_rate": 8.346326926540377e-06, "loss": 0.2015, "step": 3054 }, { "epoch": 1.0202037067957923, "grad_norm": 0.43023737821846486, "learning_rate": 8.344882731598079e-06, "loss": 0.2026, "step": 3055 }, { "epoch": 1.0205376523626648, "grad_norm": 0.41879723678450614, "learning_rate": 8.343438031371858e-06, "loss": 0.1928, "step": 3056 }, { "epoch": 1.0208715979295375, "grad_norm": 0.4906147418309719, "learning_rate": 8.341992826079956e-06, "loss": 0.2218, "step": 3057 }, { "epoch": 1.02120554349641, "grad_norm": 0.475504726884439, "learning_rate": 8.340547115940688e-06, "loss": 0.1776, "step": 3058 }, { "epoch": 1.0215394890632827, "grad_norm": 0.49917711995665354, "learning_rate": 8.339100901172443e-06, "loss": 0.2136, "step": 3059 }, { "epoch": 1.0218734346301552, "grad_norm": 0.6420536276666094, "learning_rate": 8.337654181993691e-06, "loss": 0.1996, "step": 3060 }, { "epoch": 1.022207380197028, "grad_norm": 0.586797523605855, "learning_rate": 8.336206958622975e-06, "loss": 0.2052, "step": 3061 }, { "epoch": 1.0225413257639004, "grad_norm": 0.42846778032094296, "learning_rate": 8.334759231278915e-06, "loss": 0.1913, "step": 3062 }, { "epoch": 1.0228752713307732, "grad_norm": 0.43006716318612825, "learning_rate": 8.333311000180208e-06, "loss": 0.1964, "step": 3063 }, { "epoch": 1.0232092168976457, "grad_norm": 0.415041357651441, "learning_rate": 8.331862265545627e-06, "loss": 0.1903, "step": 3064 }, { "epoch": 1.0235431624645184, "grad_norm": 0.4809451997950013, "learning_rate": 8.330413027594019e-06, "loss": 0.2193, "step": 3065 }, { "epoch": 1.0238771080313909, "grad_norm": 0.4849800241783216, "learning_rate": 8.328963286544309e-06, "loss": 0.2022, "step": 3066 }, { "epoch": 1.0242110535982636, "grad_norm": 0.4358252609774767, "learning_rate": 8.327513042615496e-06, "loss": 0.2153, "step": 3067 }, { "epoch": 1.024544999165136, "grad_norm": 0.42611601243396807, "learning_rate": 8.326062296026657e-06, "loss": 0.1973, "step": 3068 }, { "epoch": 1.0248789447320086, "grad_norm": 0.5112017276974804, "learning_rate": 8.324611046996947e-06, "loss": 0.219, "step": 3069 }, { "epoch": 1.0252128902988813, "grad_norm": 0.4373035623997926, "learning_rate": 8.32315929574559e-06, "loss": 0.1959, "step": 3070 }, { "epoch": 1.0255468358657538, "grad_norm": 0.3837563091069947, "learning_rate": 8.321707042491895e-06, "loss": 0.1844, "step": 3071 }, { "epoch": 1.0258807814326265, "grad_norm": 0.45421219166684185, "learning_rate": 8.320254287455238e-06, "loss": 0.2056, "step": 3072 }, { "epoch": 1.026214726999499, "grad_norm": 0.40781482071872827, "learning_rate": 8.318801030855078e-06, "loss": 0.1864, "step": 3073 }, { "epoch": 1.0265486725663717, "grad_norm": 0.4706705010349072, "learning_rate": 8.317347272910944e-06, "loss": 0.2033, "step": 3074 }, { "epoch": 1.0268826181332442, "grad_norm": 0.45550347872407415, "learning_rate": 8.315893013842441e-06, "loss": 0.2192, "step": 3075 }, { "epoch": 1.027216563700117, "grad_norm": 0.4563220551157251, "learning_rate": 8.31443825386926e-06, "loss": 0.2164, "step": 3076 }, { "epoch": 1.0275505092669894, "grad_norm": 0.4866578429632529, "learning_rate": 8.312982993211151e-06, "loss": 0.2222, "step": 3077 }, { "epoch": 1.0278844548338621, "grad_norm": 0.4244291790269176, "learning_rate": 8.311527232087951e-06, "loss": 0.2031, "step": 3078 }, { "epoch": 1.0282184004007346, "grad_norm": 0.4243539342498891, "learning_rate": 8.310070970719573e-06, "loss": 0.2022, "step": 3079 }, { "epoch": 1.0285523459676074, "grad_norm": 0.4218336141097441, "learning_rate": 8.308614209325997e-06, "loss": 0.2044, "step": 3080 }, { "epoch": 1.0288862915344799, "grad_norm": 0.4661210673858716, "learning_rate": 8.30715694812729e-06, "loss": 0.202, "step": 3081 }, { "epoch": 1.0292202371013526, "grad_norm": 0.4768217692487075, "learning_rate": 8.305699187343586e-06, "loss": 0.2128, "step": 3082 }, { "epoch": 1.029554182668225, "grad_norm": 0.46166941719548166, "learning_rate": 8.304240927195094e-06, "loss": 0.2108, "step": 3083 }, { "epoch": 1.0298881282350978, "grad_norm": 0.43191760215598024, "learning_rate": 8.302782167902103e-06, "loss": 0.2062, "step": 3084 }, { "epoch": 1.0302220738019703, "grad_norm": 0.41257731682620635, "learning_rate": 8.30132290968498e-06, "loss": 0.1818, "step": 3085 }, { "epoch": 1.0305560193688428, "grad_norm": 0.43840781003063306, "learning_rate": 8.299863152764158e-06, "loss": 0.2066, "step": 3086 }, { "epoch": 1.0308899649357155, "grad_norm": 0.4919766494174308, "learning_rate": 8.298402897360152e-06, "loss": 0.1962, "step": 3087 }, { "epoch": 1.031223910502588, "grad_norm": 0.4370771922590659, "learning_rate": 8.29694214369355e-06, "loss": 0.1989, "step": 3088 }, { "epoch": 1.0315578560694607, "grad_norm": 0.4522687494366065, "learning_rate": 8.295480891985019e-06, "loss": 0.197, "step": 3089 }, { "epoch": 1.0318918016363332, "grad_norm": 0.459698970205727, "learning_rate": 8.294019142455295e-06, "loss": 0.215, "step": 3090 }, { "epoch": 1.032225747203206, "grad_norm": 0.45768357278304855, "learning_rate": 8.292556895325195e-06, "loss": 0.1834, "step": 3091 }, { "epoch": 1.0325596927700784, "grad_norm": 0.4779953772645454, "learning_rate": 8.291094150815607e-06, "loss": 0.2046, "step": 3092 }, { "epoch": 1.0328936383369511, "grad_norm": 0.4110970900415633, "learning_rate": 8.289630909147494e-06, "loss": 0.1926, "step": 3093 }, { "epoch": 1.0332275839038236, "grad_norm": 0.4263110732845005, "learning_rate": 8.2881671705419e-06, "loss": 0.1961, "step": 3094 }, { "epoch": 1.0335615294706963, "grad_norm": 0.42351189564424013, "learning_rate": 8.286702935219936e-06, "loss": 0.1924, "step": 3095 }, { "epoch": 1.0338954750375688, "grad_norm": 0.5111127526263297, "learning_rate": 8.285238203402796e-06, "loss": 0.2099, "step": 3096 }, { "epoch": 1.0342294206044416, "grad_norm": 0.4395888062180706, "learning_rate": 8.283772975311742e-06, "loss": 0.2091, "step": 3097 }, { "epoch": 1.034563366171314, "grad_norm": 0.40597673314935695, "learning_rate": 8.282307251168116e-06, "loss": 0.1948, "step": 3098 }, { "epoch": 1.0348973117381868, "grad_norm": 0.41337566836535816, "learning_rate": 8.28084103119333e-06, "loss": 0.1956, "step": 3099 }, { "epoch": 1.0352312573050593, "grad_norm": 0.4440489461102574, "learning_rate": 8.279374315608877e-06, "loss": 0.1849, "step": 3100 }, { "epoch": 1.0355652028719318, "grad_norm": 0.43649748059427285, "learning_rate": 8.27790710463632e-06, "loss": 0.202, "step": 3101 }, { "epoch": 1.0358991484388045, "grad_norm": 0.45527974348537775, "learning_rate": 8.276439398497298e-06, "loss": 0.2069, "step": 3102 }, { "epoch": 1.036233094005677, "grad_norm": 0.4411849130395665, "learning_rate": 8.274971197413527e-06, "loss": 0.2018, "step": 3103 }, { "epoch": 1.0365670395725497, "grad_norm": 0.432927432928707, "learning_rate": 8.273502501606794e-06, "loss": 0.1921, "step": 3104 }, { "epoch": 1.0369009851394222, "grad_norm": 0.48645160430520945, "learning_rate": 8.272033311298965e-06, "loss": 0.2062, "step": 3105 }, { "epoch": 1.037234930706295, "grad_norm": 0.4723298015246345, "learning_rate": 8.270563626711979e-06, "loss": 0.2191, "step": 3106 }, { "epoch": 1.0375688762731674, "grad_norm": 0.4286934741750141, "learning_rate": 8.269093448067845e-06, "loss": 0.1974, "step": 3107 }, { "epoch": 1.0379028218400401, "grad_norm": 0.4898091980655584, "learning_rate": 8.267622775588653e-06, "loss": 0.2071, "step": 3108 }, { "epoch": 1.0382367674069126, "grad_norm": 0.42263697887339474, "learning_rate": 8.266151609496567e-06, "loss": 0.1943, "step": 3109 }, { "epoch": 1.0385707129737853, "grad_norm": 0.4409916018139914, "learning_rate": 8.26467995001382e-06, "loss": 0.1845, "step": 3110 }, { "epoch": 1.0389046585406578, "grad_norm": 0.46867210638599427, "learning_rate": 8.26320779736273e-06, "loss": 0.2111, "step": 3111 }, { "epoch": 1.0392386041075306, "grad_norm": 1.1833877800166233, "learning_rate": 8.261735151765678e-06, "loss": 0.2006, "step": 3112 }, { "epoch": 1.039572549674403, "grad_norm": 0.4392933903047106, "learning_rate": 8.260262013445126e-06, "loss": 0.1959, "step": 3113 }, { "epoch": 1.0399064952412758, "grad_norm": 0.43251596850546686, "learning_rate": 8.258788382623607e-06, "loss": 0.1921, "step": 3114 }, { "epoch": 1.0402404408081483, "grad_norm": 0.5317197119185111, "learning_rate": 8.257314259523732e-06, "loss": 0.2015, "step": 3115 }, { "epoch": 1.040574386375021, "grad_norm": 0.4656601804952947, "learning_rate": 8.255839644368185e-06, "loss": 0.2033, "step": 3116 }, { "epoch": 1.0409083319418935, "grad_norm": 0.48325787940709314, "learning_rate": 8.254364537379725e-06, "loss": 0.1973, "step": 3117 }, { "epoch": 1.041242277508766, "grad_norm": 0.4615708346164602, "learning_rate": 8.25288893878118e-06, "loss": 0.2173, "step": 3118 }, { "epoch": 1.0415762230756387, "grad_norm": 0.4910469321775321, "learning_rate": 8.251412848795462e-06, "loss": 0.2112, "step": 3119 }, { "epoch": 1.0419101686425112, "grad_norm": 0.42906476123756765, "learning_rate": 8.249936267645546e-06, "loss": 0.2085, "step": 3120 }, { "epoch": 1.042244114209384, "grad_norm": 0.4468814505175119, "learning_rate": 8.248459195554492e-06, "loss": 0.2027, "step": 3121 }, { "epoch": 1.0425780597762564, "grad_norm": 0.4510701292444035, "learning_rate": 8.246981632745428e-06, "loss": 0.216, "step": 3122 }, { "epoch": 1.0429120053431291, "grad_norm": 0.5012622614358289, "learning_rate": 8.245503579441554e-06, "loss": 0.2041, "step": 3123 }, { "epoch": 1.0432459509100016, "grad_norm": 0.44604941183333585, "learning_rate": 8.244025035866151e-06, "loss": 0.2107, "step": 3124 }, { "epoch": 1.0435798964768743, "grad_norm": 0.43182075677881904, "learning_rate": 8.242546002242569e-06, "loss": 0.2012, "step": 3125 }, { "epoch": 1.0439138420437468, "grad_norm": 0.7065204162623006, "learning_rate": 8.241066478794233e-06, "loss": 0.2149, "step": 3126 }, { "epoch": 1.0442477876106195, "grad_norm": 0.43548371799456587, "learning_rate": 8.239586465744644e-06, "loss": 0.1964, "step": 3127 }, { "epoch": 1.044581733177492, "grad_norm": 0.45976736574310023, "learning_rate": 8.238105963317376e-06, "loss": 0.1977, "step": 3128 }, { "epoch": 1.0449156787443648, "grad_norm": 0.45610032095662145, "learning_rate": 8.236624971736071e-06, "loss": 0.2006, "step": 3129 }, { "epoch": 1.0452496243112372, "grad_norm": 0.4178927483884343, "learning_rate": 8.235143491224458e-06, "loss": 0.1941, "step": 3130 }, { "epoch": 1.04558356987811, "grad_norm": 0.4450014435903185, "learning_rate": 8.233661522006324e-06, "loss": 0.2011, "step": 3131 }, { "epoch": 1.0459175154449825, "grad_norm": 0.4678822230492103, "learning_rate": 8.232179064305545e-06, "loss": 0.2141, "step": 3132 }, { "epoch": 1.0462514610118552, "grad_norm": 0.4659113518770199, "learning_rate": 8.230696118346059e-06, "loss": 0.2029, "step": 3133 }, { "epoch": 1.0465854065787277, "grad_norm": 0.42572466204955733, "learning_rate": 8.229212684351886e-06, "loss": 0.2092, "step": 3134 }, { "epoch": 1.0469193521456002, "grad_norm": 0.42976487899817956, "learning_rate": 8.227728762547112e-06, "loss": 0.2049, "step": 3135 }, { "epoch": 1.0472532977124729, "grad_norm": 0.4285031948928849, "learning_rate": 8.226244353155906e-06, "loss": 0.1895, "step": 3136 }, { "epoch": 1.0475872432793454, "grad_norm": 0.41985602222315516, "learning_rate": 8.2247594564025e-06, "loss": 0.1976, "step": 3137 }, { "epoch": 1.047921188846218, "grad_norm": 0.44464462060650317, "learning_rate": 8.22327407251121e-06, "loss": 0.1917, "step": 3138 }, { "epoch": 1.0482551344130906, "grad_norm": 0.4273282426042788, "learning_rate": 8.221788201706416e-06, "loss": 0.202, "step": 3139 }, { "epoch": 1.0485890799799633, "grad_norm": 0.41653410454050144, "learning_rate": 8.22030184421258e-06, "loss": 0.2017, "step": 3140 }, { "epoch": 1.0489230255468358, "grad_norm": 0.43858551157838355, "learning_rate": 8.218815000254233e-06, "loss": 0.2022, "step": 3141 }, { "epoch": 1.0492569711137085, "grad_norm": 0.4908298917701791, "learning_rate": 8.21732767005598e-06, "loss": 0.2132, "step": 3142 }, { "epoch": 1.049590916680581, "grad_norm": 0.4749605912490831, "learning_rate": 8.215839853842498e-06, "loss": 0.2158, "step": 3143 }, { "epoch": 1.0499248622474537, "grad_norm": 0.4483578369643801, "learning_rate": 8.214351551838541e-06, "loss": 0.2043, "step": 3144 }, { "epoch": 1.0502588078143262, "grad_norm": 0.42665670967430674, "learning_rate": 8.212862764268936e-06, "loss": 0.1972, "step": 3145 }, { "epoch": 1.050592753381199, "grad_norm": 0.44815585260638, "learning_rate": 8.21137349135858e-06, "loss": 0.2022, "step": 3146 }, { "epoch": 1.0509266989480714, "grad_norm": 0.42978041785750576, "learning_rate": 8.209883733332444e-06, "loss": 0.193, "step": 3147 }, { "epoch": 1.0512606445149442, "grad_norm": 0.40783792703270516, "learning_rate": 8.208393490415576e-06, "loss": 0.2001, "step": 3148 }, { "epoch": 1.0515945900818167, "grad_norm": 0.5034157622557892, "learning_rate": 8.206902762833095e-06, "loss": 0.1908, "step": 3149 }, { "epoch": 1.0519285356486892, "grad_norm": 0.41596953468049164, "learning_rate": 8.205411550810189e-06, "loss": 0.2026, "step": 3150 }, { "epoch": 1.0522624812155619, "grad_norm": 0.4387109004909959, "learning_rate": 8.203919854572126e-06, "loss": 0.1955, "step": 3151 }, { "epoch": 1.0525964267824344, "grad_norm": 0.4400267808923557, "learning_rate": 8.202427674344246e-06, "loss": 0.1956, "step": 3152 }, { "epoch": 1.052930372349307, "grad_norm": 0.42088665905677247, "learning_rate": 8.200935010351958e-06, "loss": 0.2036, "step": 3153 }, { "epoch": 1.0532643179161796, "grad_norm": 0.42093977058051263, "learning_rate": 8.199441862820746e-06, "loss": 0.2094, "step": 3154 }, { "epoch": 1.0535982634830523, "grad_norm": 0.41817125248997694, "learning_rate": 8.197948231976169e-06, "loss": 0.1905, "step": 3155 }, { "epoch": 1.0539322090499248, "grad_norm": 0.42802200051982714, "learning_rate": 8.196454118043856e-06, "loss": 0.2004, "step": 3156 }, { "epoch": 1.0542661546167975, "grad_norm": 0.4750486331903779, "learning_rate": 8.194959521249512e-06, "loss": 0.2019, "step": 3157 }, { "epoch": 1.05460010018367, "grad_norm": 0.5160559948271373, "learning_rate": 8.193464441818913e-06, "loss": 0.209, "step": 3158 }, { "epoch": 1.0549340457505427, "grad_norm": 0.420087588687286, "learning_rate": 8.191968879977907e-06, "loss": 0.2003, "step": 3159 }, { "epoch": 1.0552679913174152, "grad_norm": 0.4037748789668453, "learning_rate": 8.190472835952419e-06, "loss": 0.1872, "step": 3160 }, { "epoch": 1.055601936884288, "grad_norm": 0.38711125361133414, "learning_rate": 8.188976309968443e-06, "loss": 0.1858, "step": 3161 }, { "epoch": 1.0559358824511604, "grad_norm": 0.47560444912511207, "learning_rate": 8.187479302252045e-06, "loss": 0.2034, "step": 3162 }, { "epoch": 1.0562698280180332, "grad_norm": 0.3964934919375712, "learning_rate": 8.185981813029368e-06, "loss": 0.1892, "step": 3163 }, { "epoch": 1.0566037735849056, "grad_norm": 0.45616318954926843, "learning_rate": 8.184483842526623e-06, "loss": 0.1934, "step": 3164 }, { "epoch": 1.0569377191517784, "grad_norm": 0.4221881863168421, "learning_rate": 8.1829853909701e-06, "loss": 0.1882, "step": 3165 }, { "epoch": 1.0572716647186509, "grad_norm": 0.4946404078548695, "learning_rate": 8.181486458586153e-06, "loss": 0.2197, "step": 3166 }, { "epoch": 1.0576056102855234, "grad_norm": 0.41759418433544915, "learning_rate": 8.179987045601217e-06, "loss": 0.1905, "step": 3167 }, { "epoch": 1.057939555852396, "grad_norm": 0.4680014004671277, "learning_rate": 8.178487152241795e-06, "loss": 0.2116, "step": 3168 }, { "epoch": 1.0582735014192686, "grad_norm": 0.4679411328019381, "learning_rate": 8.17698677873446e-06, "loss": 0.2281, "step": 3169 }, { "epoch": 1.0586074469861413, "grad_norm": 0.4306592891134695, "learning_rate": 8.175485925305867e-06, "loss": 0.1981, "step": 3170 }, { "epoch": 1.0589413925530138, "grad_norm": 0.4279511404583339, "learning_rate": 8.173984592182736e-06, "loss": 0.2152, "step": 3171 }, { "epoch": 1.0592753381198865, "grad_norm": 0.4256581923170128, "learning_rate": 8.172482779591858e-06, "loss": 0.1965, "step": 3172 }, { "epoch": 1.059609283686759, "grad_norm": 0.42259600236442063, "learning_rate": 8.170980487760101e-06, "loss": 0.1959, "step": 3173 }, { "epoch": 1.0599432292536317, "grad_norm": 0.3919759523982466, "learning_rate": 8.169477716914405e-06, "loss": 0.1835, "step": 3174 }, { "epoch": 1.0602771748205042, "grad_norm": 0.3965257390740146, "learning_rate": 8.16797446728178e-06, "loss": 0.192, "step": 3175 }, { "epoch": 1.060611120387377, "grad_norm": 0.44522593096004287, "learning_rate": 8.16647073908931e-06, "loss": 0.2098, "step": 3176 }, { "epoch": 1.0609450659542494, "grad_norm": 0.4581897421406975, "learning_rate": 8.164966532564152e-06, "loss": 0.2049, "step": 3177 }, { "epoch": 1.0612790115211221, "grad_norm": 0.4995506758881796, "learning_rate": 8.163461847933532e-06, "loss": 0.1989, "step": 3178 }, { "epoch": 1.0616129570879946, "grad_norm": 0.40476982409428536, "learning_rate": 8.161956685424752e-06, "loss": 0.1843, "step": 3179 }, { "epoch": 1.0619469026548674, "grad_norm": 0.42655470604742796, "learning_rate": 8.160451045265183e-06, "loss": 0.2032, "step": 3180 }, { "epoch": 1.0622808482217398, "grad_norm": 0.42018746091677206, "learning_rate": 8.158944927682269e-06, "loss": 0.1965, "step": 3181 }, { "epoch": 1.0626147937886126, "grad_norm": 0.4268157415568209, "learning_rate": 8.157438332903531e-06, "loss": 0.2006, "step": 3182 }, { "epoch": 1.062948739355485, "grad_norm": 0.4033846341010823, "learning_rate": 8.155931261156555e-06, "loss": 0.1924, "step": 3183 }, { "epoch": 1.0632826849223576, "grad_norm": 0.43889640040181027, "learning_rate": 8.154423712669003e-06, "loss": 0.1986, "step": 3184 }, { "epoch": 1.0636166304892303, "grad_norm": 0.4001342148930981, "learning_rate": 8.152915687668603e-06, "loss": 0.1862, "step": 3185 }, { "epoch": 1.0639505760561028, "grad_norm": 0.4105761999473694, "learning_rate": 8.151407186383166e-06, "loss": 0.191, "step": 3186 }, { "epoch": 1.0642845216229755, "grad_norm": 0.4319067425333874, "learning_rate": 8.149898209040568e-06, "loss": 0.1997, "step": 3187 }, { "epoch": 1.064618467189848, "grad_norm": 0.4560783659343897, "learning_rate": 8.148388755868757e-06, "loss": 0.2077, "step": 3188 }, { "epoch": 1.0649524127567207, "grad_norm": 0.43798081875672745, "learning_rate": 8.146878827095751e-06, "loss": 0.1913, "step": 3189 }, { "epoch": 1.0652863583235932, "grad_norm": 0.4372702184821335, "learning_rate": 8.145368422949647e-06, "loss": 0.2034, "step": 3190 }, { "epoch": 1.065620303890466, "grad_norm": 0.41986516394757967, "learning_rate": 8.143857543658606e-06, "loss": 0.199, "step": 3191 }, { "epoch": 1.0659542494573384, "grad_norm": 0.41907063437667164, "learning_rate": 8.142346189450866e-06, "loss": 0.2159, "step": 3192 }, { "epoch": 1.0662881950242111, "grad_norm": 0.4094081046487141, "learning_rate": 8.140834360554734e-06, "loss": 0.1948, "step": 3193 }, { "epoch": 1.0666221405910836, "grad_norm": 0.4371273342597166, "learning_rate": 8.13932205719859e-06, "loss": 0.2082, "step": 3194 }, { "epoch": 1.0669560861579563, "grad_norm": 0.4135820465008076, "learning_rate": 8.137809279610885e-06, "loss": 0.1929, "step": 3195 }, { "epoch": 1.0672900317248288, "grad_norm": 0.4399567478630838, "learning_rate": 8.13629602802014e-06, "loss": 0.1995, "step": 3196 }, { "epoch": 1.0676239772917016, "grad_norm": 0.5092407448174172, "learning_rate": 8.134782302654953e-06, "loss": 0.2134, "step": 3197 }, { "epoch": 1.067957922858574, "grad_norm": 0.5597000261870062, "learning_rate": 8.133268103743989e-06, "loss": 0.2111, "step": 3198 }, { "epoch": 1.0682918684254465, "grad_norm": 0.4341592549068151, "learning_rate": 8.131753431515984e-06, "loss": 0.2052, "step": 3199 }, { "epoch": 1.0686258139923193, "grad_norm": 0.5131722560356433, "learning_rate": 8.130238286199747e-06, "loss": 0.1998, "step": 3200 }, { "epoch": 1.0689597595591918, "grad_norm": 0.4204116895958866, "learning_rate": 8.128722668024161e-06, "loss": 0.1961, "step": 3201 }, { "epoch": 1.0692937051260645, "grad_norm": 0.46505953263469596, "learning_rate": 8.127206577218177e-06, "loss": 0.2106, "step": 3202 }, { "epoch": 1.069627650692937, "grad_norm": 0.44604847632939687, "learning_rate": 8.125690014010814e-06, "loss": 0.2052, "step": 3203 }, { "epoch": 1.0699615962598097, "grad_norm": 0.43756933565755846, "learning_rate": 8.124172978631173e-06, "loss": 0.1994, "step": 3204 }, { "epoch": 1.0702955418266822, "grad_norm": 0.461832138118902, "learning_rate": 8.12265547130842e-06, "loss": 0.2158, "step": 3205 }, { "epoch": 1.070629487393555, "grad_norm": 0.5180640624879851, "learning_rate": 8.121137492271787e-06, "loss": 0.206, "step": 3206 }, { "epoch": 1.0709634329604274, "grad_norm": 0.44492318961314764, "learning_rate": 8.119619041750586e-06, "loss": 0.2123, "step": 3207 }, { "epoch": 1.0712973785273001, "grad_norm": 0.44935278367080866, "learning_rate": 8.118100119974197e-06, "loss": 0.1938, "step": 3208 }, { "epoch": 1.0716313240941726, "grad_norm": 0.5594811829388091, "learning_rate": 8.116580727172071e-06, "loss": 0.1879, "step": 3209 }, { "epoch": 1.0719652696610453, "grad_norm": 0.4261468140314612, "learning_rate": 8.115060863573729e-06, "loss": 0.194, "step": 3210 }, { "epoch": 1.0722992152279178, "grad_norm": 0.5033805368157565, "learning_rate": 8.113540529408766e-06, "loss": 0.201, "step": 3211 }, { "epoch": 1.0726331607947905, "grad_norm": 0.41711740898266775, "learning_rate": 8.112019724906844e-06, "loss": 0.1898, "step": 3212 }, { "epoch": 1.072967106361663, "grad_norm": 0.4474697888666802, "learning_rate": 8.1104984502977e-06, "loss": 0.2019, "step": 3213 }, { "epoch": 1.0733010519285355, "grad_norm": 0.4425877508940847, "learning_rate": 8.108976705811138e-06, "loss": 0.204, "step": 3214 }, { "epoch": 1.0736349974954082, "grad_norm": 0.40031817179384527, "learning_rate": 8.107454491677041e-06, "loss": 0.191, "step": 3215 }, { "epoch": 1.0739689430622807, "grad_norm": 0.4374992487231899, "learning_rate": 8.10593180812535e-06, "loss": 0.1825, "step": 3216 }, { "epoch": 1.0743028886291535, "grad_norm": 0.4304869205108331, "learning_rate": 8.104408655386092e-06, "loss": 0.1964, "step": 3217 }, { "epoch": 1.074636834196026, "grad_norm": 0.4273397458580168, "learning_rate": 8.102885033689352e-06, "loss": 0.1987, "step": 3218 }, { "epoch": 1.0749707797628987, "grad_norm": 0.44715648855381623, "learning_rate": 8.101360943265293e-06, "loss": 0.2094, "step": 3219 }, { "epoch": 1.0753047253297712, "grad_norm": 0.433584901382971, "learning_rate": 8.099836384344146e-06, "loss": 0.192, "step": 3220 }, { "epoch": 1.075638670896644, "grad_norm": 0.45246811449875446, "learning_rate": 8.098311357156213e-06, "loss": 0.2063, "step": 3221 }, { "epoch": 1.0759726164635164, "grad_norm": 0.3831669548869065, "learning_rate": 8.096785861931868e-06, "loss": 0.1873, "step": 3222 }, { "epoch": 1.076306562030389, "grad_norm": 0.39589825338369394, "learning_rate": 8.095259898901557e-06, "loss": 0.1884, "step": 3223 }, { "epoch": 1.0766405075972616, "grad_norm": 0.42988110937230956, "learning_rate": 8.09373346829579e-06, "loss": 0.1989, "step": 3224 }, { "epoch": 1.0769744531641343, "grad_norm": 0.3861062119239817, "learning_rate": 8.092206570345158e-06, "loss": 0.1876, "step": 3225 }, { "epoch": 1.0773083987310068, "grad_norm": 0.39497963164084615, "learning_rate": 8.090679205280311e-06, "loss": 0.1875, "step": 3226 }, { "epoch": 1.0776423442978795, "grad_norm": 0.4204241046414486, "learning_rate": 8.08915137333198e-06, "loss": 0.1985, "step": 3227 }, { "epoch": 1.077976289864752, "grad_norm": 0.512738178352209, "learning_rate": 8.08762307473096e-06, "loss": 0.2269, "step": 3228 }, { "epoch": 1.0783102354316247, "grad_norm": 0.44715999802900963, "learning_rate": 8.08609430970812e-06, "loss": 0.2031, "step": 3229 }, { "epoch": 1.0786441809984972, "grad_norm": 0.3995392929976696, "learning_rate": 8.084565078494396e-06, "loss": 0.1908, "step": 3230 }, { "epoch": 1.07897812656537, "grad_norm": 0.4132071042034365, "learning_rate": 8.083035381320798e-06, "loss": 0.1984, "step": 3231 }, { "epoch": 1.0793120721322425, "grad_norm": 0.4507397205246835, "learning_rate": 8.081505218418403e-06, "loss": 0.2041, "step": 3232 }, { "epoch": 1.079646017699115, "grad_norm": 0.39389622349185976, "learning_rate": 8.079974590018363e-06, "loss": 0.1941, "step": 3233 }, { "epoch": 1.0799799632659877, "grad_norm": 0.418373210132237, "learning_rate": 8.078443496351893e-06, "loss": 0.1896, "step": 3234 }, { "epoch": 1.0803139088328602, "grad_norm": 0.44554108624356625, "learning_rate": 8.076911937650288e-06, "loss": 0.1959, "step": 3235 }, { "epoch": 1.0806478543997329, "grad_norm": 0.3895348321590768, "learning_rate": 8.075379914144902e-06, "loss": 0.1834, "step": 3236 }, { "epoch": 1.0809817999666054, "grad_norm": 0.41066865301421246, "learning_rate": 8.073847426067172e-06, "loss": 0.1926, "step": 3237 }, { "epoch": 1.081315745533478, "grad_norm": 0.4979728510699529, "learning_rate": 8.072314473648595e-06, "loss": 0.2136, "step": 3238 }, { "epoch": 1.0816496911003506, "grad_norm": 0.4147497389444579, "learning_rate": 8.07078105712074e-06, "loss": 0.2014, "step": 3239 }, { "epoch": 1.0819836366672233, "grad_norm": 0.44728845985893656, "learning_rate": 8.06924717671525e-06, "loss": 0.1971, "step": 3240 }, { "epoch": 1.0823175822340958, "grad_norm": 0.4179130657469007, "learning_rate": 8.067712832663831e-06, "loss": 0.1949, "step": 3241 }, { "epoch": 1.0826515278009685, "grad_norm": 0.4107644564937398, "learning_rate": 8.066178025198272e-06, "loss": 0.1877, "step": 3242 }, { "epoch": 1.082985473367841, "grad_norm": 0.46230738921006503, "learning_rate": 8.064642754550418e-06, "loss": 0.2038, "step": 3243 }, { "epoch": 1.0833194189347137, "grad_norm": 0.44196147805322944, "learning_rate": 8.06310702095219e-06, "loss": 0.2034, "step": 3244 }, { "epoch": 1.0836533645015862, "grad_norm": 0.4247083802737399, "learning_rate": 8.06157082463558e-06, "loss": 0.2016, "step": 3245 }, { "epoch": 1.083987310068459, "grad_norm": 0.4871767192301708, "learning_rate": 8.060034165832648e-06, "loss": 0.2036, "step": 3246 }, { "epoch": 1.0843212556353314, "grad_norm": 0.44229334769971135, "learning_rate": 8.058497044775526e-06, "loss": 0.2002, "step": 3247 }, { "epoch": 1.084655201202204, "grad_norm": 0.47425800895867326, "learning_rate": 8.05695946169641e-06, "loss": 0.2031, "step": 3248 }, { "epoch": 1.0849891467690767, "grad_norm": 0.4145195665483283, "learning_rate": 8.055421416827575e-06, "loss": 0.1965, "step": 3249 }, { "epoch": 1.0853230923359491, "grad_norm": 0.5037905106010119, "learning_rate": 8.053882910401359e-06, "loss": 0.2002, "step": 3250 }, { "epoch": 1.0856570379028219, "grad_norm": 0.47633932245924165, "learning_rate": 8.052343942650168e-06, "loss": 0.2055, "step": 3251 }, { "epoch": 1.0859909834696944, "grad_norm": 0.4534882323600842, "learning_rate": 8.050804513806488e-06, "loss": 0.2039, "step": 3252 }, { "epoch": 1.086324929036567, "grad_norm": 0.4399351104535666, "learning_rate": 8.049264624102862e-06, "loss": 0.1916, "step": 3253 }, { "epoch": 1.0866588746034396, "grad_norm": 0.42972776831463017, "learning_rate": 8.047724273771909e-06, "loss": 0.2051, "step": 3254 }, { "epoch": 1.0869928201703123, "grad_norm": 0.4387648183372129, "learning_rate": 8.046183463046322e-06, "loss": 0.2131, "step": 3255 }, { "epoch": 1.0873267657371848, "grad_norm": 0.4531319223093814, "learning_rate": 8.044642192158854e-06, "loss": 0.2045, "step": 3256 }, { "epoch": 1.0876607113040575, "grad_norm": 0.43051407362446054, "learning_rate": 8.043100461342332e-06, "loss": 0.2063, "step": 3257 }, { "epoch": 1.08799465687093, "grad_norm": 0.4543744781210135, "learning_rate": 8.041558270829655e-06, "loss": 0.1944, "step": 3258 }, { "epoch": 1.0883286024378027, "grad_norm": 0.5087181521948336, "learning_rate": 8.04001562085379e-06, "loss": 0.2223, "step": 3259 }, { "epoch": 1.0886625480046752, "grad_norm": 0.4310436393030242, "learning_rate": 8.038472511647768e-06, "loss": 0.1884, "step": 3260 }, { "epoch": 1.088996493571548, "grad_norm": 0.4297632017698525, "learning_rate": 8.036928943444698e-06, "loss": 0.2041, "step": 3261 }, { "epoch": 1.0893304391384204, "grad_norm": 0.4827595445998538, "learning_rate": 8.03538491647775e-06, "loss": 0.2051, "step": 3262 }, { "epoch": 1.089664384705293, "grad_norm": 0.46301803588539736, "learning_rate": 8.03384043098017e-06, "loss": 0.1946, "step": 3263 }, { "epoch": 1.0899983302721656, "grad_norm": 0.5171554254848715, "learning_rate": 8.032295487185273e-06, "loss": 0.2109, "step": 3264 }, { "epoch": 1.0903322758390381, "grad_norm": 0.38795308444933635, "learning_rate": 8.030750085326438e-06, "loss": 0.1782, "step": 3265 }, { "epoch": 1.0906662214059109, "grad_norm": 0.47435267419979654, "learning_rate": 8.029204225637114e-06, "loss": 0.2044, "step": 3266 }, { "epoch": 1.0910001669727833, "grad_norm": 0.4123685515803338, "learning_rate": 8.027657908350826e-06, "loss": 0.1808, "step": 3267 }, { "epoch": 1.091334112539656, "grad_norm": 0.4947662241675235, "learning_rate": 8.026111133701162e-06, "loss": 0.2165, "step": 3268 }, { "epoch": 1.0916680581065286, "grad_norm": 0.5047171583821233, "learning_rate": 8.02456390192178e-06, "loss": 0.2062, "step": 3269 }, { "epoch": 1.0920020036734013, "grad_norm": 0.41658647064399756, "learning_rate": 8.023016213246406e-06, "loss": 0.1888, "step": 3270 }, { "epoch": 1.0923359492402738, "grad_norm": 0.44506089965581364, "learning_rate": 8.021468067908839e-06, "loss": 0.2019, "step": 3271 }, { "epoch": 1.0926698948071465, "grad_norm": 0.4885924662885089, "learning_rate": 8.019919466142945e-06, "loss": 0.1967, "step": 3272 }, { "epoch": 1.093003840374019, "grad_norm": 0.4729823871926014, "learning_rate": 8.018370408182655e-06, "loss": 0.1982, "step": 3273 }, { "epoch": 1.0933377859408917, "grad_norm": 0.4531774083310163, "learning_rate": 8.016820894261975e-06, "loss": 0.2072, "step": 3274 }, { "epoch": 1.0936717315077642, "grad_norm": 0.43365265276494597, "learning_rate": 8.015270924614977e-06, "loss": 0.2068, "step": 3275 }, { "epoch": 1.094005677074637, "grad_norm": 0.48378575313322314, "learning_rate": 8.013720499475804e-06, "loss": 0.2127, "step": 3276 }, { "epoch": 1.0943396226415094, "grad_norm": 0.4462044392760605, "learning_rate": 8.012169619078662e-06, "loss": 0.205, "step": 3277 }, { "epoch": 1.0946735682083821, "grad_norm": 0.4410639604021698, "learning_rate": 8.010618283657834e-06, "loss": 0.2022, "step": 3278 }, { "epoch": 1.0950075137752546, "grad_norm": 0.41187747484604836, "learning_rate": 8.009066493447664e-06, "loss": 0.1917, "step": 3279 }, { "epoch": 1.0953414593421273, "grad_norm": 0.4505023943246841, "learning_rate": 8.00751424868257e-06, "loss": 0.2128, "step": 3280 }, { "epoch": 1.0956754049089998, "grad_norm": 0.44407650340933, "learning_rate": 8.005961549597037e-06, "loss": 0.2023, "step": 3281 }, { "epoch": 1.0960093504758723, "grad_norm": 0.49103707615481035, "learning_rate": 8.004408396425617e-06, "loss": 0.1892, "step": 3282 }, { "epoch": 1.096343296042745, "grad_norm": 0.41128125303211827, "learning_rate": 8.002854789402931e-06, "loss": 0.1902, "step": 3283 }, { "epoch": 1.0966772416096175, "grad_norm": 0.42074924001477537, "learning_rate": 8.001300728763674e-06, "loss": 0.1842, "step": 3284 }, { "epoch": 1.0970111871764903, "grad_norm": 0.44415785033200067, "learning_rate": 7.999746214742603e-06, "loss": 0.2089, "step": 3285 }, { "epoch": 1.0973451327433628, "grad_norm": 0.3976674538896059, "learning_rate": 7.998191247574545e-06, "loss": 0.1876, "step": 3286 }, { "epoch": 1.0976790783102355, "grad_norm": 0.4602221499860677, "learning_rate": 7.996635827494397e-06, "loss": 0.1924, "step": 3287 }, { "epoch": 1.098013023877108, "grad_norm": 0.4569109408695937, "learning_rate": 7.995079954737122e-06, "loss": 0.2004, "step": 3288 }, { "epoch": 1.0983469694439807, "grad_norm": 0.46440228075117673, "learning_rate": 7.993523629537753e-06, "loss": 0.201, "step": 3289 }, { "epoch": 1.0986809150108532, "grad_norm": 0.4210453426591052, "learning_rate": 7.991966852131394e-06, "loss": 0.2009, "step": 3290 }, { "epoch": 1.099014860577726, "grad_norm": 0.4197516901784847, "learning_rate": 7.990409622753212e-06, "loss": 0.1993, "step": 3291 }, { "epoch": 1.0993488061445984, "grad_norm": 0.40831891802316234, "learning_rate": 7.988851941638445e-06, "loss": 0.1941, "step": 3292 }, { "epoch": 1.0996827517114711, "grad_norm": 0.4286089460485976, "learning_rate": 7.987293809022401e-06, "loss": 0.2052, "step": 3293 }, { "epoch": 1.1000166972783436, "grad_norm": 0.4800654204970883, "learning_rate": 7.985735225140452e-06, "loss": 0.2093, "step": 3294 }, { "epoch": 1.1003506428452163, "grad_norm": 0.4753445481388503, "learning_rate": 7.984176190228042e-06, "loss": 0.2095, "step": 3295 }, { "epoch": 1.1006845884120888, "grad_norm": 0.4414696105218096, "learning_rate": 7.98261670452068e-06, "loss": 0.2049, "step": 3296 }, { "epoch": 1.1010185339789613, "grad_norm": 0.46108006143762986, "learning_rate": 7.981056768253945e-06, "loss": 0.2192, "step": 3297 }, { "epoch": 1.101352479545834, "grad_norm": 0.40832219511245, "learning_rate": 7.979496381663486e-06, "loss": 0.1899, "step": 3298 }, { "epoch": 1.1016864251127065, "grad_norm": 0.43698727164514994, "learning_rate": 7.977935544985016e-06, "loss": 0.2032, "step": 3299 }, { "epoch": 1.1020203706795793, "grad_norm": 0.4212410386989296, "learning_rate": 7.976374258454317e-06, "loss": 0.1885, "step": 3300 }, { "epoch": 1.1023543162464517, "grad_norm": 0.41194340216926234, "learning_rate": 7.97481252230724e-06, "loss": 0.1988, "step": 3301 }, { "epoch": 1.1026882618133245, "grad_norm": 0.4276995668094239, "learning_rate": 7.973250336779705e-06, "loss": 0.2041, "step": 3302 }, { "epoch": 1.103022207380197, "grad_norm": 0.4857427454118398, "learning_rate": 7.971687702107698e-06, "loss": 0.194, "step": 3303 }, { "epoch": 1.1033561529470697, "grad_norm": 0.4220206495507896, "learning_rate": 7.970124618527274e-06, "loss": 0.1857, "step": 3304 }, { "epoch": 1.1036900985139422, "grad_norm": 0.4270813488896286, "learning_rate": 7.968561086274553e-06, "loss": 0.1964, "step": 3305 }, { "epoch": 1.104024044080815, "grad_norm": 0.4252133936392048, "learning_rate": 7.966997105585727e-06, "loss": 0.2049, "step": 3306 }, { "epoch": 1.1043579896476874, "grad_norm": 0.43526141115105044, "learning_rate": 7.965432676697052e-06, "loss": 0.2081, "step": 3307 }, { "epoch": 1.10469193521456, "grad_norm": 0.39251872349305744, "learning_rate": 7.963867799844855e-06, "loss": 0.1874, "step": 3308 }, { "epoch": 1.1050258807814326, "grad_norm": 0.3944749883657532, "learning_rate": 7.962302475265527e-06, "loss": 0.1925, "step": 3309 }, { "epoch": 1.1053598263483053, "grad_norm": 0.42273055292457995, "learning_rate": 7.960736703195533e-06, "loss": 0.1909, "step": 3310 }, { "epoch": 1.1056937719151778, "grad_norm": 0.4328304273348919, "learning_rate": 7.959170483871398e-06, "loss": 0.2037, "step": 3311 }, { "epoch": 1.1060277174820503, "grad_norm": 0.4745377240310588, "learning_rate": 7.957603817529715e-06, "loss": 0.2082, "step": 3312 }, { "epoch": 1.106361663048923, "grad_norm": 0.45131928288623807, "learning_rate": 7.956036704407153e-06, "loss": 0.1966, "step": 3313 }, { "epoch": 1.1066956086157955, "grad_norm": 0.41211914551956946, "learning_rate": 7.954469144740441e-06, "loss": 0.194, "step": 3314 }, { "epoch": 1.1070295541826682, "grad_norm": 0.45784621244927204, "learning_rate": 7.952901138766376e-06, "loss": 0.2016, "step": 3315 }, { "epoch": 1.1073634997495407, "grad_norm": 0.4387965133352464, "learning_rate": 7.951332686721825e-06, "loss": 0.2084, "step": 3316 }, { "epoch": 1.1076974453164135, "grad_norm": 0.41301207819634184, "learning_rate": 7.94976378884372e-06, "loss": 0.208, "step": 3317 }, { "epoch": 1.108031390883286, "grad_norm": 0.4522258424463022, "learning_rate": 7.948194445369065e-06, "loss": 0.2003, "step": 3318 }, { "epoch": 1.1083653364501587, "grad_norm": 0.45104153739353287, "learning_rate": 7.946624656534922e-06, "loss": 0.2051, "step": 3319 }, { "epoch": 1.1086992820170312, "grad_norm": 0.47403483171073474, "learning_rate": 7.945054422578432e-06, "loss": 0.2062, "step": 3320 }, { "epoch": 1.1090332275839039, "grad_norm": 0.44902598801173543, "learning_rate": 7.943483743736793e-06, "loss": 0.2075, "step": 3321 }, { "epoch": 1.1093671731507764, "grad_norm": 0.4303479990019636, "learning_rate": 7.941912620247276e-06, "loss": 0.1969, "step": 3322 }, { "epoch": 1.109701118717649, "grad_norm": 0.4960675003356271, "learning_rate": 7.940341052347219e-06, "loss": 0.2157, "step": 3323 }, { "epoch": 1.1100350642845216, "grad_norm": 0.42395659697291693, "learning_rate": 7.938769040274022e-06, "loss": 0.2068, "step": 3324 }, { "epoch": 1.1103690098513943, "grad_norm": 0.4340552153546476, "learning_rate": 7.937196584265161e-06, "loss": 0.2016, "step": 3325 }, { "epoch": 1.1107029554182668, "grad_norm": 0.4117355175562917, "learning_rate": 7.93562368455817e-06, "loss": 0.2022, "step": 3326 }, { "epoch": 1.1110369009851395, "grad_norm": 0.5466416870551023, "learning_rate": 7.934050341390659e-06, "loss": 0.1812, "step": 3327 }, { "epoch": 1.111370846552012, "grad_norm": 0.4095963376065435, "learning_rate": 7.932476555000294e-06, "loss": 0.1928, "step": 3328 }, { "epoch": 1.1117047921188847, "grad_norm": 0.45614123456720185, "learning_rate": 7.930902325624816e-06, "loss": 0.1947, "step": 3329 }, { "epoch": 1.1120387376857572, "grad_norm": 0.43679448876694665, "learning_rate": 7.929327653502032e-06, "loss": 0.1928, "step": 3330 }, { "epoch": 1.1123726832526297, "grad_norm": 0.4824447177607817, "learning_rate": 7.927752538869816e-06, "loss": 0.208, "step": 3331 }, { "epoch": 1.1127066288195024, "grad_norm": 0.4681733368568868, "learning_rate": 7.926176981966102e-06, "loss": 0.2127, "step": 3332 }, { "epoch": 1.113040574386375, "grad_norm": 0.5039341589751777, "learning_rate": 7.924600983028903e-06, "loss": 0.2128, "step": 3333 }, { "epoch": 1.1133745199532477, "grad_norm": 0.41043359902653814, "learning_rate": 7.92302454229629e-06, "loss": 0.1901, "step": 3334 }, { "epoch": 1.1137084655201201, "grad_norm": 0.42173077749314025, "learning_rate": 7.9214476600064e-06, "loss": 0.1946, "step": 3335 }, { "epoch": 1.1140424110869929, "grad_norm": 0.5686198735094352, "learning_rate": 7.919870336397444e-06, "loss": 0.2045, "step": 3336 }, { "epoch": 1.1143763566538654, "grad_norm": 0.4263229791855886, "learning_rate": 7.918292571707693e-06, "loss": 0.1998, "step": 3337 }, { "epoch": 1.114710302220738, "grad_norm": 0.4538265435685427, "learning_rate": 7.916714366175487e-06, "loss": 0.1977, "step": 3338 }, { "epoch": 1.1150442477876106, "grad_norm": 0.4145724982137613, "learning_rate": 7.915135720039233e-06, "loss": 0.1908, "step": 3339 }, { "epoch": 1.1153781933544833, "grad_norm": 0.4018593488275227, "learning_rate": 7.913556633537403e-06, "loss": 0.2041, "step": 3340 }, { "epoch": 1.1157121389213558, "grad_norm": 0.4354152187879273, "learning_rate": 7.91197710690854e-06, "loss": 0.2153, "step": 3341 }, { "epoch": 1.1160460844882285, "grad_norm": 0.43252317066527857, "learning_rate": 7.910397140391244e-06, "loss": 0.2014, "step": 3342 }, { "epoch": 1.116380030055101, "grad_norm": 0.4474161800654269, "learning_rate": 7.908816734224195e-06, "loss": 0.2027, "step": 3343 }, { "epoch": 1.1167139756219737, "grad_norm": 0.4972888829593969, "learning_rate": 7.907235888646126e-06, "loss": 0.22, "step": 3344 }, { "epoch": 1.1170479211888462, "grad_norm": 0.4505839982249951, "learning_rate": 7.905654603895843e-06, "loss": 0.1939, "step": 3345 }, { "epoch": 1.1173818667557187, "grad_norm": 0.4374753952465327, "learning_rate": 7.90407288021222e-06, "loss": 0.2152, "step": 3346 }, { "epoch": 1.1177158123225914, "grad_norm": 0.47875310572864327, "learning_rate": 7.902490717834196e-06, "loss": 0.2035, "step": 3347 }, { "epoch": 1.118049757889464, "grad_norm": 0.4382931827428161, "learning_rate": 7.90090811700077e-06, "loss": 0.2026, "step": 3348 }, { "epoch": 1.1183837034563366, "grad_norm": 0.4385411786075883, "learning_rate": 7.899325077951018e-06, "loss": 0.196, "step": 3349 }, { "epoch": 1.1187176490232091, "grad_norm": 0.38871833531460476, "learning_rate": 7.897741600924073e-06, "loss": 0.1769, "step": 3350 }, { "epoch": 1.1190515945900819, "grad_norm": 0.4519038045952756, "learning_rate": 7.896157686159142e-06, "loss": 0.2071, "step": 3351 }, { "epoch": 1.1193855401569544, "grad_norm": 0.47224347629867824, "learning_rate": 7.89457333389549e-06, "loss": 0.2029, "step": 3352 }, { "epoch": 1.119719485723827, "grad_norm": 0.4201454685987697, "learning_rate": 7.892988544372454e-06, "loss": 0.1991, "step": 3353 }, { "epoch": 1.1200534312906996, "grad_norm": 0.4289082154934355, "learning_rate": 7.891403317829434e-06, "loss": 0.1932, "step": 3354 }, { "epoch": 1.1203873768575723, "grad_norm": 0.4758235863184793, "learning_rate": 7.889817654505897e-06, "loss": 0.2088, "step": 3355 }, { "epoch": 1.1207213224244448, "grad_norm": 0.4198002688938867, "learning_rate": 7.888231554641377e-06, "loss": 0.1923, "step": 3356 }, { "epoch": 1.1210552679913175, "grad_norm": 0.43029326862973805, "learning_rate": 7.886645018475474e-06, "loss": 0.1915, "step": 3357 }, { "epoch": 1.12138921355819, "grad_norm": 0.6278196412133785, "learning_rate": 7.885058046247852e-06, "loss": 0.2142, "step": 3358 }, { "epoch": 1.1217231591250627, "grad_norm": 0.44070049869120914, "learning_rate": 7.88347063819824e-06, "loss": 0.2005, "step": 3359 }, { "epoch": 1.1220571046919352, "grad_norm": 0.4247481458151347, "learning_rate": 7.881882794566438e-06, "loss": 0.1995, "step": 3360 }, { "epoch": 1.1223910502588077, "grad_norm": 0.4145120275554937, "learning_rate": 7.880294515592304e-06, "loss": 0.1997, "step": 3361 }, { "epoch": 1.1227249958256804, "grad_norm": 0.42952334403896586, "learning_rate": 7.878705801515772e-06, "loss": 0.1919, "step": 3362 }, { "epoch": 1.123058941392553, "grad_norm": 0.43518221391978096, "learning_rate": 7.877116652576832e-06, "loss": 0.1963, "step": 3363 }, { "epoch": 1.1233928869594256, "grad_norm": 0.4414778182611209, "learning_rate": 7.875527069015545e-06, "loss": 0.2023, "step": 3364 }, { "epoch": 1.1237268325262981, "grad_norm": 0.48255357811517446, "learning_rate": 7.873937051072037e-06, "loss": 0.1912, "step": 3365 }, { "epoch": 1.1240607780931708, "grad_norm": 0.4400050750711768, "learning_rate": 7.872346598986496e-06, "loss": 0.1995, "step": 3366 }, { "epoch": 1.1243947236600433, "grad_norm": 0.4700279659952803, "learning_rate": 7.87075571299918e-06, "loss": 0.2191, "step": 3367 }, { "epoch": 1.124728669226916, "grad_norm": 0.4577800514967404, "learning_rate": 7.869164393350412e-06, "loss": 0.1821, "step": 3368 }, { "epoch": 1.1250626147937886, "grad_norm": 0.4447548135499851, "learning_rate": 7.86757264028058e-06, "loss": 0.1968, "step": 3369 }, { "epoch": 1.1253965603606613, "grad_norm": 0.4270542850533781, "learning_rate": 7.865980454030135e-06, "loss": 0.199, "step": 3370 }, { "epoch": 1.1257305059275338, "grad_norm": 0.3999805646644059, "learning_rate": 7.864387834839598e-06, "loss": 0.1932, "step": 3371 }, { "epoch": 1.1260644514944065, "grad_norm": 0.4350361424348412, "learning_rate": 7.86279478294955e-06, "loss": 0.1992, "step": 3372 }, { "epoch": 1.126398397061279, "grad_norm": 0.4781455786710474, "learning_rate": 7.861201298600642e-06, "loss": 0.2009, "step": 3373 }, { "epoch": 1.1267323426281517, "grad_norm": 0.4167397575968442, "learning_rate": 7.85960738203359e-06, "loss": 0.1914, "step": 3374 }, { "epoch": 1.1270662881950242, "grad_norm": 0.5743493123189489, "learning_rate": 7.858013033489171e-06, "loss": 0.2252, "step": 3375 }, { "epoch": 1.1274002337618967, "grad_norm": 0.4175739966743874, "learning_rate": 7.856418253208232e-06, "loss": 0.2073, "step": 3376 }, { "epoch": 1.1277341793287694, "grad_norm": 0.4040680795350153, "learning_rate": 7.85482304143168e-06, "loss": 0.1941, "step": 3377 }, { "epoch": 1.1280681248956421, "grad_norm": 0.42600690189169366, "learning_rate": 7.853227398400495e-06, "loss": 0.1942, "step": 3378 }, { "epoch": 1.1284020704625146, "grad_norm": 0.3704468500759086, "learning_rate": 7.851631324355717e-06, "loss": 0.1838, "step": 3379 }, { "epoch": 1.1287360160293871, "grad_norm": 0.4178851626097371, "learning_rate": 7.850034819538448e-06, "loss": 0.2019, "step": 3380 }, { "epoch": 1.1290699615962598, "grad_norm": 0.41850958354938694, "learning_rate": 7.848437884189864e-06, "loss": 0.1908, "step": 3381 }, { "epoch": 1.1294039071631323, "grad_norm": 0.46566524574700385, "learning_rate": 7.846840518551197e-06, "loss": 0.2139, "step": 3382 }, { "epoch": 1.129737852730005, "grad_norm": 0.43426626236451754, "learning_rate": 7.845242722863749e-06, "loss": 0.2025, "step": 3383 }, { "epoch": 1.1300717982968775, "grad_norm": 0.43143461237116987, "learning_rate": 7.843644497368886e-06, "loss": 0.2002, "step": 3384 }, { "epoch": 1.1304057438637503, "grad_norm": 0.5135500919403986, "learning_rate": 7.842045842308038e-06, "loss": 0.2073, "step": 3385 }, { "epoch": 1.1307396894306228, "grad_norm": 0.47400554849908777, "learning_rate": 7.840446757922704e-06, "loss": 0.2046, "step": 3386 }, { "epoch": 1.1310736349974955, "grad_norm": 0.43000385084533, "learning_rate": 7.838847244454441e-06, "loss": 0.2004, "step": 3387 }, { "epoch": 1.131407580564368, "grad_norm": 0.44538414882916644, "learning_rate": 7.837247302144874e-06, "loss": 0.1996, "step": 3388 }, { "epoch": 1.1317415261312407, "grad_norm": 0.4611482843216874, "learning_rate": 7.835646931235697e-06, "loss": 0.2063, "step": 3389 }, { "epoch": 1.1320754716981132, "grad_norm": 0.4155829516189207, "learning_rate": 7.83404613196866e-06, "loss": 0.1903, "step": 3390 }, { "epoch": 1.132409417264986, "grad_norm": 0.4401772944803297, "learning_rate": 7.832444904585587e-06, "loss": 0.1983, "step": 3391 }, { "epoch": 1.1327433628318584, "grad_norm": 0.550134028287982, "learning_rate": 7.83084324932836e-06, "loss": 0.2153, "step": 3392 }, { "epoch": 1.133077308398731, "grad_norm": 0.438562739435005, "learning_rate": 7.829241166438925e-06, "loss": 0.1966, "step": 3393 }, { "epoch": 1.1334112539656036, "grad_norm": 0.4134789296728246, "learning_rate": 7.827638656159302e-06, "loss": 0.195, "step": 3394 }, { "epoch": 1.133745199532476, "grad_norm": 0.4230251736651399, "learning_rate": 7.826035718731564e-06, "loss": 0.2065, "step": 3395 }, { "epoch": 1.1340791450993488, "grad_norm": 0.4080363331592151, "learning_rate": 7.824432354397857e-06, "loss": 0.1896, "step": 3396 }, { "epoch": 1.1344130906662213, "grad_norm": 0.5090023027343824, "learning_rate": 7.822828563400384e-06, "loss": 0.1999, "step": 3397 }, { "epoch": 1.134747036233094, "grad_norm": 0.4361556768676404, "learning_rate": 7.82122434598142e-06, "loss": 0.2119, "step": 3398 }, { "epoch": 1.1350809817999665, "grad_norm": 0.42843947744351496, "learning_rate": 7.819619702383299e-06, "loss": 0.2032, "step": 3399 }, { "epoch": 1.1354149273668392, "grad_norm": 0.4308637813200796, "learning_rate": 7.818014632848422e-06, "loss": 0.2194, "step": 3400 }, { "epoch": 1.1357488729337117, "grad_norm": 0.41419066838076307, "learning_rate": 7.816409137619254e-06, "loss": 0.2013, "step": 3401 }, { "epoch": 1.1360828185005845, "grad_norm": 0.4978926371404705, "learning_rate": 7.814803216938324e-06, "loss": 0.2085, "step": 3402 }, { "epoch": 1.136416764067457, "grad_norm": 0.4466119651320874, "learning_rate": 7.813196871048226e-06, "loss": 0.1905, "step": 3403 }, { "epoch": 1.1367507096343297, "grad_norm": 0.46519231603409, "learning_rate": 7.811590100191613e-06, "loss": 0.2014, "step": 3404 }, { "epoch": 1.1370846552012022, "grad_norm": 0.45057828935363947, "learning_rate": 7.809982904611213e-06, "loss": 0.2068, "step": 3405 }, { "epoch": 1.1374186007680749, "grad_norm": 0.4500947824438429, "learning_rate": 7.808375284549807e-06, "loss": 0.2019, "step": 3406 }, { "epoch": 1.1377525463349474, "grad_norm": 0.4538311873250359, "learning_rate": 7.806767240250248e-06, "loss": 0.2124, "step": 3407 }, { "epoch": 1.13808649190182, "grad_norm": 0.49784144787499535, "learning_rate": 7.805158771955448e-06, "loss": 0.2027, "step": 3408 }, { "epoch": 1.1384204374686926, "grad_norm": 0.4619216519515817, "learning_rate": 7.803549879908385e-06, "loss": 0.2047, "step": 3409 }, { "epoch": 1.138754383035565, "grad_norm": 0.4107807403289291, "learning_rate": 7.801940564352103e-06, "loss": 0.1956, "step": 3410 }, { "epoch": 1.1390883286024378, "grad_norm": 0.42471264599679803, "learning_rate": 7.800330825529707e-06, "loss": 0.1964, "step": 3411 }, { "epoch": 1.1394222741693105, "grad_norm": 0.40572323274981525, "learning_rate": 7.798720663684367e-06, "loss": 0.1928, "step": 3412 }, { "epoch": 1.139756219736183, "grad_norm": 0.4101254365791967, "learning_rate": 7.797110079059315e-06, "loss": 0.2079, "step": 3413 }, { "epoch": 1.1400901653030555, "grad_norm": 0.4411300893319862, "learning_rate": 7.795499071897855e-06, "loss": 0.2028, "step": 3414 }, { "epoch": 1.1404241108699282, "grad_norm": 0.4332006753538404, "learning_rate": 7.79388764244334e-06, "loss": 0.2156, "step": 3415 }, { "epoch": 1.1407580564368007, "grad_norm": 0.41551192652535196, "learning_rate": 7.792275790939202e-06, "loss": 0.2108, "step": 3416 }, { "epoch": 1.1410920020036734, "grad_norm": 0.4225371412103681, "learning_rate": 7.790663517628927e-06, "loss": 0.1969, "step": 3417 }, { "epoch": 1.141425947570546, "grad_norm": 0.4391114729864135, "learning_rate": 7.789050822756068e-06, "loss": 0.2064, "step": 3418 }, { "epoch": 1.1417598931374187, "grad_norm": 0.40251758639113394, "learning_rate": 7.787437706564243e-06, "loss": 0.1882, "step": 3419 }, { "epoch": 1.1420938387042912, "grad_norm": 0.41418603101346974, "learning_rate": 7.78582416929713e-06, "loss": 0.1919, "step": 3420 }, { "epoch": 1.1424277842711639, "grad_norm": 0.41423945325619144, "learning_rate": 7.784210211198475e-06, "loss": 0.2046, "step": 3421 }, { "epoch": 1.1427617298380364, "grad_norm": 0.45911999518986674, "learning_rate": 7.782595832512086e-06, "loss": 0.2102, "step": 3422 }, { "epoch": 1.143095675404909, "grad_norm": 0.446295856392653, "learning_rate": 7.780981033481832e-06, "loss": 0.2115, "step": 3423 }, { "epoch": 1.1434296209717816, "grad_norm": 0.47779947300607567, "learning_rate": 7.779365814351648e-06, "loss": 0.212, "step": 3424 }, { "epoch": 1.143763566538654, "grad_norm": 0.5041795675046281, "learning_rate": 7.77775017536553e-06, "loss": 0.2201, "step": 3425 }, { "epoch": 1.1440975121055268, "grad_norm": 0.4626546155813629, "learning_rate": 7.776134116767544e-06, "loss": 0.2161, "step": 3426 }, { "epoch": 1.1444314576723995, "grad_norm": 0.43618214464201066, "learning_rate": 7.774517638801808e-06, "loss": 0.2068, "step": 3427 }, { "epoch": 1.144765403239272, "grad_norm": 0.4254363039124097, "learning_rate": 7.772900741712516e-06, "loss": 0.2032, "step": 3428 }, { "epoch": 1.1450993488061445, "grad_norm": 0.45525908259895725, "learning_rate": 7.771283425743916e-06, "loss": 0.2043, "step": 3429 }, { "epoch": 1.1454332943730172, "grad_norm": 0.41941227927769686, "learning_rate": 7.769665691140325e-06, "loss": 0.2057, "step": 3430 }, { "epoch": 1.1457672399398897, "grad_norm": 0.4616850221911409, "learning_rate": 7.76804753814612e-06, "loss": 0.1962, "step": 3431 }, { "epoch": 1.1461011855067624, "grad_norm": 0.44619639348887946, "learning_rate": 7.76642896700574e-06, "loss": 0.1999, "step": 3432 }, { "epoch": 1.146435131073635, "grad_norm": 0.42643033930129975, "learning_rate": 7.764809977963692e-06, "loss": 0.1966, "step": 3433 }, { "epoch": 1.1467690766405076, "grad_norm": 0.41159559705421556, "learning_rate": 7.763190571264542e-06, "loss": 0.1919, "step": 3434 }, { "epoch": 1.1471030222073801, "grad_norm": 0.4303534281592218, "learning_rate": 7.761570747152923e-06, "loss": 0.1985, "step": 3435 }, { "epoch": 1.1474369677742529, "grad_norm": 0.42350721038806954, "learning_rate": 7.759950505873523e-06, "loss": 0.193, "step": 3436 }, { "epoch": 1.1477709133411254, "grad_norm": 0.4596329657408379, "learning_rate": 7.758329847671103e-06, "loss": 0.2052, "step": 3437 }, { "epoch": 1.148104858907998, "grad_norm": 0.4489699249368635, "learning_rate": 7.75670877279048e-06, "loss": 0.2104, "step": 3438 }, { "epoch": 1.1484388044748706, "grad_norm": 0.43084296877947265, "learning_rate": 7.755087281476539e-06, "loss": 0.198, "step": 3439 }, { "epoch": 1.1487727500417433, "grad_norm": 0.4057179440867843, "learning_rate": 7.753465373974223e-06, "loss": 0.1999, "step": 3440 }, { "epoch": 1.1491066956086158, "grad_norm": 0.46218899540177816, "learning_rate": 7.751843050528543e-06, "loss": 0.199, "step": 3441 }, { "epoch": 1.1494406411754885, "grad_norm": 0.4471325368000403, "learning_rate": 7.750220311384567e-06, "loss": 0.2127, "step": 3442 }, { "epoch": 1.149774586742361, "grad_norm": 0.4508117782583261, "learning_rate": 7.748597156787429e-06, "loss": 0.2021, "step": 3443 }, { "epoch": 1.1501085323092335, "grad_norm": 0.42529589186934397, "learning_rate": 7.746973586982328e-06, "loss": 0.2038, "step": 3444 }, { "epoch": 1.1504424778761062, "grad_norm": 0.4501523886838213, "learning_rate": 7.745349602214522e-06, "loss": 0.2114, "step": 3445 }, { "epoch": 1.1507764234429787, "grad_norm": 0.44047520372496385, "learning_rate": 7.743725202729335e-06, "loss": 0.1958, "step": 3446 }, { "epoch": 1.1511103690098514, "grad_norm": 0.4644912732615579, "learning_rate": 7.742100388772148e-06, "loss": 0.2024, "step": 3447 }, { "epoch": 1.151444314576724, "grad_norm": 0.41485035644284507, "learning_rate": 7.74047516058841e-06, "loss": 0.19, "step": 3448 }, { "epoch": 1.1517782601435966, "grad_norm": 0.44397761503160493, "learning_rate": 7.73884951842363e-06, "loss": 0.2103, "step": 3449 }, { "epoch": 1.1521122057104691, "grad_norm": 0.4329722118379211, "learning_rate": 7.737223462523383e-06, "loss": 0.2093, "step": 3450 }, { "epoch": 1.1524461512773418, "grad_norm": 0.4536966006310295, "learning_rate": 7.735596993133303e-06, "loss": 0.2016, "step": 3451 }, { "epoch": 1.1527800968442143, "grad_norm": 0.450365844932539, "learning_rate": 7.733970110499086e-06, "loss": 0.2061, "step": 3452 }, { "epoch": 1.153114042411087, "grad_norm": 0.4556419046795417, "learning_rate": 7.732342814866489e-06, "loss": 0.2023, "step": 3453 }, { "epoch": 1.1534479879779596, "grad_norm": 0.49031666549692304, "learning_rate": 7.730715106481342e-06, "loss": 0.2149, "step": 3454 }, { "epoch": 1.1537819335448323, "grad_norm": 0.3880997191032563, "learning_rate": 7.729086985589523e-06, "loss": 0.1861, "step": 3455 }, { "epoch": 1.1541158791117048, "grad_norm": 0.37937141823654447, "learning_rate": 7.72745845243698e-06, "loss": 0.1863, "step": 3456 }, { "epoch": 1.1544498246785775, "grad_norm": 0.49966955106951333, "learning_rate": 7.725829507269723e-06, "loss": 0.1955, "step": 3457 }, { "epoch": 1.15478377024545, "grad_norm": 0.4960476776887278, "learning_rate": 7.724200150333826e-06, "loss": 0.2025, "step": 3458 }, { "epoch": 1.1551177158123225, "grad_norm": 0.4037417613399222, "learning_rate": 7.722570381875418e-06, "loss": 0.1939, "step": 3459 }, { "epoch": 1.1554516613791952, "grad_norm": 0.4646572771220113, "learning_rate": 7.720940202140698e-06, "loss": 0.2096, "step": 3460 }, { "epoch": 1.155785606946068, "grad_norm": 0.40519117416885664, "learning_rate": 7.71930961137592e-06, "loss": 0.1901, "step": 3461 }, { "epoch": 1.1561195525129404, "grad_norm": 0.45808904281917306, "learning_rate": 7.717678609827409e-06, "loss": 0.2198, "step": 3462 }, { "epoch": 1.156453498079813, "grad_norm": 0.49491562964628316, "learning_rate": 7.716047197741543e-06, "loss": 0.21, "step": 3463 }, { "epoch": 1.1567874436466856, "grad_norm": 0.4560900514019919, "learning_rate": 7.714415375364768e-06, "loss": 0.2124, "step": 3464 }, { "epoch": 1.1571213892135581, "grad_norm": 0.4567473518841226, "learning_rate": 7.712783142943588e-06, "loss": 0.197, "step": 3465 }, { "epoch": 1.1574553347804308, "grad_norm": 0.45021820733197226, "learning_rate": 7.711150500724574e-06, "loss": 0.2032, "step": 3466 }, { "epoch": 1.1577892803473033, "grad_norm": 0.43204683278363776, "learning_rate": 7.709517448954353e-06, "loss": 0.1972, "step": 3467 }, { "epoch": 1.158123225914176, "grad_norm": 0.43096997145471116, "learning_rate": 7.707883987879617e-06, "loss": 0.1837, "step": 3468 }, { "epoch": 1.1584571714810485, "grad_norm": 0.4107749143716421, "learning_rate": 7.70625011774712e-06, "loss": 0.193, "step": 3469 }, { "epoch": 1.1587911170479213, "grad_norm": 0.42555377932051347, "learning_rate": 7.70461583880368e-06, "loss": 0.2027, "step": 3470 }, { "epoch": 1.1591250626147938, "grad_norm": 0.537801799999701, "learning_rate": 7.70298115129617e-06, "loss": 0.2128, "step": 3471 }, { "epoch": 1.1594590081816665, "grad_norm": 0.4701917219067799, "learning_rate": 7.701346055471533e-06, "loss": 0.1973, "step": 3472 }, { "epoch": 1.159792953748539, "grad_norm": 0.4336001405965153, "learning_rate": 7.699710551576763e-06, "loss": 0.1959, "step": 3473 }, { "epoch": 1.1601268993154115, "grad_norm": 0.5109622071728623, "learning_rate": 7.69807463985893e-06, "loss": 0.2068, "step": 3474 }, { "epoch": 1.1604608448822842, "grad_norm": 0.4399649168868142, "learning_rate": 7.696438320565152e-06, "loss": 0.2018, "step": 3475 }, { "epoch": 1.160794790449157, "grad_norm": 0.434807916850808, "learning_rate": 7.694801593942615e-06, "loss": 0.1922, "step": 3476 }, { "epoch": 1.1611287360160294, "grad_norm": 0.4307111475753005, "learning_rate": 7.69316446023857e-06, "loss": 0.1963, "step": 3477 }, { "epoch": 1.161462681582902, "grad_norm": 0.39136888781514806, "learning_rate": 7.691526919700319e-06, "loss": 0.1892, "step": 3478 }, { "epoch": 1.1617966271497746, "grad_norm": 0.5072994571524962, "learning_rate": 7.689888972575237e-06, "loss": 0.2129, "step": 3479 }, { "epoch": 1.162130572716647, "grad_norm": 0.45165856698956547, "learning_rate": 7.688250619110752e-06, "loss": 0.199, "step": 3480 }, { "epoch": 1.1624645182835198, "grad_norm": 0.47159071816202563, "learning_rate": 7.686611859554361e-06, "loss": 0.2176, "step": 3481 }, { "epoch": 1.1627984638503923, "grad_norm": 0.42425403478938783, "learning_rate": 7.684972694153612e-06, "loss": 0.2011, "step": 3482 }, { "epoch": 1.163132409417265, "grad_norm": 0.46430876367122836, "learning_rate": 7.683333123156122e-06, "loss": 0.2024, "step": 3483 }, { "epoch": 1.1634663549841375, "grad_norm": 0.4252770376389329, "learning_rate": 7.681693146809572e-06, "loss": 0.1989, "step": 3484 }, { "epoch": 1.1638003005510102, "grad_norm": 0.38138227868488445, "learning_rate": 7.680052765361693e-06, "loss": 0.1877, "step": 3485 }, { "epoch": 1.1641342461178827, "grad_norm": 0.4621436050450307, "learning_rate": 7.678411979060289e-06, "loss": 0.2138, "step": 3486 }, { "epoch": 1.1644681916847555, "grad_norm": 0.4910521500257586, "learning_rate": 7.676770788153218e-06, "loss": 0.2029, "step": 3487 }, { "epoch": 1.164802137251628, "grad_norm": 0.45326505140278767, "learning_rate": 7.6751291928884e-06, "loss": 0.2071, "step": 3488 }, { "epoch": 1.1651360828185007, "grad_norm": 0.43879315421992415, "learning_rate": 7.673487193513821e-06, "loss": 0.2008, "step": 3489 }, { "epoch": 1.1654700283853732, "grad_norm": 0.4220316459234618, "learning_rate": 7.671844790277522e-06, "loss": 0.201, "step": 3490 }, { "epoch": 1.1658039739522459, "grad_norm": 0.4440932041786498, "learning_rate": 7.670201983427606e-06, "loss": 0.2145, "step": 3491 }, { "epoch": 1.1661379195191184, "grad_norm": 0.42691236313301645, "learning_rate": 7.66855877321224e-06, "loss": 0.2015, "step": 3492 }, { "epoch": 1.1664718650859909, "grad_norm": 0.501936815427682, "learning_rate": 7.666915159879651e-06, "loss": 0.2119, "step": 3493 }, { "epoch": 1.1668058106528636, "grad_norm": 0.44911542211817373, "learning_rate": 7.665271143678125e-06, "loss": 0.2143, "step": 3494 }, { "epoch": 1.167139756219736, "grad_norm": 0.4624678777626138, "learning_rate": 7.66362672485601e-06, "loss": 0.2079, "step": 3495 }, { "epoch": 1.1674737017866088, "grad_norm": 0.40846524379588184, "learning_rate": 7.661981903661715e-06, "loss": 0.1976, "step": 3496 }, { "epoch": 1.1678076473534813, "grad_norm": 0.4103460531002227, "learning_rate": 7.66033668034371e-06, "loss": 0.1968, "step": 3497 }, { "epoch": 1.168141592920354, "grad_norm": 0.4630302942431863, "learning_rate": 7.658691055150524e-06, "loss": 0.214, "step": 3498 }, { "epoch": 1.1684755384872265, "grad_norm": 0.4578551544166337, "learning_rate": 7.65704502833075e-06, "loss": 0.2043, "step": 3499 }, { "epoch": 1.1688094840540992, "grad_norm": 0.4524932747516926, "learning_rate": 7.655398600133037e-06, "loss": 0.2279, "step": 3500 }, { "epoch": 1.1691434296209717, "grad_norm": 0.881428383101394, "learning_rate": 7.653751770806101e-06, "loss": 0.1923, "step": 3501 }, { "epoch": 1.1694773751878444, "grad_norm": 0.41607736093404346, "learning_rate": 7.652104540598712e-06, "loss": 0.2049, "step": 3502 }, { "epoch": 1.169811320754717, "grad_norm": 0.40214250677631747, "learning_rate": 7.650456909759707e-06, "loss": 0.1925, "step": 3503 }, { "epoch": 1.1701452663215897, "grad_norm": 0.4504111125904756, "learning_rate": 7.648808878537976e-06, "loss": 0.192, "step": 3504 }, { "epoch": 1.1704792118884622, "grad_norm": 0.4602724838722225, "learning_rate": 7.647160447182475e-06, "loss": 0.2087, "step": 3505 }, { "epoch": 1.1708131574553349, "grad_norm": 0.4104405110762258, "learning_rate": 7.645511615942218e-06, "loss": 0.192, "step": 3506 }, { "epoch": 1.1711471030222074, "grad_norm": 0.36937151858707484, "learning_rate": 7.643862385066285e-06, "loss": 0.1778, "step": 3507 }, { "epoch": 1.1714810485890799, "grad_norm": 0.4572947626174961, "learning_rate": 7.642212754803804e-06, "loss": 0.1994, "step": 3508 }, { "epoch": 1.1718149941559526, "grad_norm": 0.4816706066033821, "learning_rate": 7.640562725403978e-06, "loss": 0.2051, "step": 3509 }, { "epoch": 1.1721489397228253, "grad_norm": 0.4382076984794106, "learning_rate": 7.638912297116061e-06, "loss": 0.2049, "step": 3510 }, { "epoch": 1.1724828852896978, "grad_norm": 0.40178546966678536, "learning_rate": 7.637261470189369e-06, "loss": 0.1978, "step": 3511 }, { "epoch": 1.1728168308565703, "grad_norm": 0.39280539287687943, "learning_rate": 7.635610244873277e-06, "loss": 0.1912, "step": 3512 }, { "epoch": 1.173150776423443, "grad_norm": 0.46472165624676787, "learning_rate": 7.633958621417226e-06, "loss": 0.215, "step": 3513 }, { "epoch": 1.1734847219903155, "grad_norm": 0.4297956230414589, "learning_rate": 7.632306600070711e-06, "loss": 0.1893, "step": 3514 }, { "epoch": 1.1738186675571882, "grad_norm": 0.43762007892015997, "learning_rate": 7.63065418108329e-06, "loss": 0.1916, "step": 3515 }, { "epoch": 1.1741526131240607, "grad_norm": 0.4897234537598655, "learning_rate": 7.62900136470458e-06, "loss": 0.1981, "step": 3516 }, { "epoch": 1.1744865586909334, "grad_norm": 0.4051411254394592, "learning_rate": 7.627348151184257e-06, "loss": 0.1918, "step": 3517 }, { "epoch": 1.174820504257806, "grad_norm": 0.4902705727555018, "learning_rate": 7.625694540772062e-06, "loss": 0.215, "step": 3518 }, { "epoch": 1.1751544498246786, "grad_norm": 0.459335122777554, "learning_rate": 7.624040533717789e-06, "loss": 0.1931, "step": 3519 }, { "epoch": 1.1754883953915511, "grad_norm": 0.43076363942331786, "learning_rate": 7.622386130271296e-06, "loss": 0.1907, "step": 3520 }, { "epoch": 1.1758223409584239, "grad_norm": 0.4783616479824569, "learning_rate": 7.620731330682501e-06, "loss": 0.212, "step": 3521 }, { "epoch": 1.1761562865252964, "grad_norm": 0.4356060638682757, "learning_rate": 7.6190761352013795e-06, "loss": 0.1951, "step": 3522 }, { "epoch": 1.1764902320921689, "grad_norm": 0.4594433502156529, "learning_rate": 7.61742054407797e-06, "loss": 0.1982, "step": 3523 }, { "epoch": 1.1768241776590416, "grad_norm": 0.45490492503768754, "learning_rate": 7.615764557562368e-06, "loss": 0.198, "step": 3524 }, { "epoch": 1.1771581232259143, "grad_norm": 0.46105614813698614, "learning_rate": 7.6141081759047305e-06, "loss": 0.2014, "step": 3525 }, { "epoch": 1.1774920687927868, "grad_norm": 0.46026783313250486, "learning_rate": 7.612451399355273e-06, "loss": 0.2189, "step": 3526 }, { "epoch": 1.1778260143596593, "grad_norm": 0.4356200212691802, "learning_rate": 7.610794228164271e-06, "loss": 0.2029, "step": 3527 }, { "epoch": 1.178159959926532, "grad_norm": 0.4297337094056724, "learning_rate": 7.60913666258206e-06, "loss": 0.2142, "step": 3528 }, { "epoch": 1.1784939054934045, "grad_norm": 0.40876834016172153, "learning_rate": 7.6074787028590325e-06, "loss": 0.1937, "step": 3529 }, { "epoch": 1.1788278510602772, "grad_norm": 0.3885624115347344, "learning_rate": 7.605820349245645e-06, "loss": 0.1974, "step": 3530 }, { "epoch": 1.1791617966271497, "grad_norm": 0.4338874777406309, "learning_rate": 7.6041616019924125e-06, "loss": 0.1979, "step": 3531 }, { "epoch": 1.1794957421940224, "grad_norm": 0.4408923442251734, "learning_rate": 7.602502461349907e-06, "loss": 0.2044, "step": 3532 }, { "epoch": 1.179829687760895, "grad_norm": 0.41283196570865327, "learning_rate": 7.600842927568761e-06, "loss": 0.208, "step": 3533 }, { "epoch": 1.1801636333277676, "grad_norm": 0.42555424705496336, "learning_rate": 7.599183000899667e-06, "loss": 0.2055, "step": 3534 }, { "epoch": 1.1804975788946401, "grad_norm": 0.43912943678038574, "learning_rate": 7.597522681593375e-06, "loss": 0.2036, "step": 3535 }, { "epoch": 1.1808315244615128, "grad_norm": 0.45635857674949853, "learning_rate": 7.595861969900698e-06, "loss": 0.2137, "step": 3536 }, { "epoch": 1.1811654700283853, "grad_norm": 0.4757314323116078, "learning_rate": 7.5942008660725065e-06, "loss": 0.2046, "step": 3537 }, { "epoch": 1.181499415595258, "grad_norm": 0.3847032911424942, "learning_rate": 7.5925393703597265e-06, "loss": 0.1854, "step": 3538 }, { "epoch": 1.1818333611621306, "grad_norm": 0.40804861098671225, "learning_rate": 7.59087748301335e-06, "loss": 0.1961, "step": 3539 }, { "epoch": 1.1821673067290033, "grad_norm": 0.4048208000344706, "learning_rate": 7.5892152042844224e-06, "loss": 0.201, "step": 3540 }, { "epoch": 1.1825012522958758, "grad_norm": 0.41873793093657335, "learning_rate": 7.58755253442405e-06, "loss": 0.1955, "step": 3541 }, { "epoch": 1.1828351978627483, "grad_norm": 0.45334205386003373, "learning_rate": 7.585889473683401e-06, "loss": 0.2101, "step": 3542 }, { "epoch": 1.183169143429621, "grad_norm": 0.40478766876394096, "learning_rate": 7.5842260223137e-06, "loss": 0.1894, "step": 3543 }, { "epoch": 1.1835030889964935, "grad_norm": 0.4299142249734497, "learning_rate": 7.5825621805662285e-06, "loss": 0.1943, "step": 3544 }, { "epoch": 1.1838370345633662, "grad_norm": 0.3919396554368549, "learning_rate": 7.580897948692332e-06, "loss": 0.176, "step": 3545 }, { "epoch": 1.1841709801302387, "grad_norm": 0.4507907583061385, "learning_rate": 7.579233326943412e-06, "loss": 0.2061, "step": 3546 }, { "epoch": 1.1845049256971114, "grad_norm": 0.43486901958401214, "learning_rate": 7.577568315570925e-06, "loss": 0.2101, "step": 3547 }, { "epoch": 1.184838871263984, "grad_norm": 0.611895109414638, "learning_rate": 7.5759029148263975e-06, "loss": 0.2105, "step": 3548 }, { "epoch": 1.1851728168308566, "grad_norm": 0.46180151479561693, "learning_rate": 7.574237124961403e-06, "loss": 0.1991, "step": 3549 }, { "epoch": 1.1855067623977291, "grad_norm": 0.40920033333286243, "learning_rate": 7.572570946227582e-06, "loss": 0.1985, "step": 3550 }, { "epoch": 1.1858407079646018, "grad_norm": 0.5521228822921554, "learning_rate": 7.570904378876627e-06, "loss": 0.2064, "step": 3551 }, { "epoch": 1.1861746535314743, "grad_norm": 0.4360368544101082, "learning_rate": 7.569237423160294e-06, "loss": 0.1977, "step": 3552 }, { "epoch": 1.186508599098347, "grad_norm": 0.5055283028151625, "learning_rate": 7.567570079330395e-06, "loss": 0.2105, "step": 3553 }, { "epoch": 1.1868425446652195, "grad_norm": 0.47535148943957384, "learning_rate": 7.565902347638806e-06, "loss": 0.2198, "step": 3554 }, { "epoch": 1.1871764902320923, "grad_norm": 0.4567763936303909, "learning_rate": 7.564234228337452e-06, "loss": 0.2059, "step": 3555 }, { "epoch": 1.1875104357989648, "grad_norm": 0.43216007765801734, "learning_rate": 7.5625657216783276e-06, "loss": 0.2042, "step": 3556 }, { "epoch": 1.1878443813658373, "grad_norm": 0.4437876561197619, "learning_rate": 7.560896827913478e-06, "loss": 0.2085, "step": 3557 }, { "epoch": 1.18817832693271, "grad_norm": 0.4136617092910776, "learning_rate": 7.559227547295007e-06, "loss": 0.1918, "step": 3558 }, { "epoch": 1.1885122724995827, "grad_norm": 0.3950033730804212, "learning_rate": 7.557557880075082e-06, "loss": 0.191, "step": 3559 }, { "epoch": 1.1888462180664552, "grad_norm": 0.44611070160298427, "learning_rate": 7.555887826505926e-06, "loss": 0.1998, "step": 3560 }, { "epoch": 1.1891801636333277, "grad_norm": 0.49252383091679314, "learning_rate": 7.554217386839817e-06, "loss": 0.2101, "step": 3561 }, { "epoch": 1.1895141092002004, "grad_norm": 0.43600290528031405, "learning_rate": 7.552546561329097e-06, "loss": 0.2078, "step": 3562 }, { "epoch": 1.189848054767073, "grad_norm": 0.4641562946328928, "learning_rate": 7.550875350226166e-06, "loss": 0.2018, "step": 3563 }, { "epoch": 1.1901820003339456, "grad_norm": 0.4107029251971812, "learning_rate": 7.549203753783475e-06, "loss": 0.1873, "step": 3564 }, { "epoch": 1.190515945900818, "grad_norm": 0.8638079712764937, "learning_rate": 7.547531772253542e-06, "loss": 0.2183, "step": 3565 }, { "epoch": 1.1908498914676908, "grad_norm": 0.46739237740310874, "learning_rate": 7.54585940588894e-06, "loss": 0.2061, "step": 3566 }, { "epoch": 1.1911838370345633, "grad_norm": 0.44207366884569477, "learning_rate": 7.544186654942296e-06, "loss": 0.1888, "step": 3567 }, { "epoch": 1.191517782601436, "grad_norm": 0.40029726288340856, "learning_rate": 7.542513519666302e-06, "loss": 0.1842, "step": 3568 }, { "epoch": 1.1918517281683085, "grad_norm": 0.45850744724415704, "learning_rate": 7.540840000313705e-06, "loss": 0.2071, "step": 3569 }, { "epoch": 1.1921856737351813, "grad_norm": 0.4217447661377353, "learning_rate": 7.539166097137306e-06, "loss": 0.1989, "step": 3570 }, { "epoch": 1.1925196193020537, "grad_norm": 0.47099508009727703, "learning_rate": 7.537491810389972e-06, "loss": 0.2064, "step": 3571 }, { "epoch": 1.1928535648689262, "grad_norm": 0.49006517879961325, "learning_rate": 7.535817140324622e-06, "loss": 0.2089, "step": 3572 }, { "epoch": 1.193187510435799, "grad_norm": 0.45108690927294987, "learning_rate": 7.534142087194234e-06, "loss": 0.2034, "step": 3573 }, { "epoch": 1.1935214560026717, "grad_norm": 0.40647564236000283, "learning_rate": 7.532466651251846e-06, "loss": 0.1976, "step": 3574 }, { "epoch": 1.1938554015695442, "grad_norm": 0.4361047207843498, "learning_rate": 7.5307908327505506e-06, "loss": 0.186, "step": 3575 }, { "epoch": 1.1941893471364167, "grad_norm": 0.41159680206823224, "learning_rate": 7.529114631943501e-06, "loss": 0.18, "step": 3576 }, { "epoch": 1.1945232927032894, "grad_norm": 0.5154574701726614, "learning_rate": 7.527438049083908e-06, "loss": 0.2203, "step": 3577 }, { "epoch": 1.1948572382701619, "grad_norm": 0.48966209480112627, "learning_rate": 7.5257610844250385e-06, "loss": 0.2164, "step": 3578 }, { "epoch": 1.1951911838370346, "grad_norm": 0.484286723912471, "learning_rate": 7.524083738220214e-06, "loss": 0.2147, "step": 3579 }, { "epoch": 1.195525129403907, "grad_norm": 0.4568042601993156, "learning_rate": 7.522406010722824e-06, "loss": 0.2115, "step": 3580 }, { "epoch": 1.1958590749707798, "grad_norm": 0.4637639130204421, "learning_rate": 7.5207279021863045e-06, "loss": 0.2011, "step": 3581 }, { "epoch": 1.1961930205376523, "grad_norm": 0.4503469517122692, "learning_rate": 7.5190494128641545e-06, "loss": 0.2017, "step": 3582 }, { "epoch": 1.196526966104525, "grad_norm": 0.4115714466848455, "learning_rate": 7.5173705430099295e-06, "loss": 0.1894, "step": 3583 }, { "epoch": 1.1968609116713975, "grad_norm": 0.43313860284242234, "learning_rate": 7.515691292877243e-06, "loss": 0.2049, "step": 3584 }, { "epoch": 1.1971948572382702, "grad_norm": 0.44242325330952814, "learning_rate": 7.514011662719766e-06, "loss": 0.2134, "step": 3585 }, { "epoch": 1.1975288028051427, "grad_norm": 0.49641971012087477, "learning_rate": 7.512331652791226e-06, "loss": 0.2194, "step": 3586 }, { "epoch": 1.1978627483720155, "grad_norm": 0.4505528169848719, "learning_rate": 7.510651263345408e-06, "loss": 0.2128, "step": 3587 }, { "epoch": 1.198196693938888, "grad_norm": 0.4207298979058369, "learning_rate": 7.508970494636154e-06, "loss": 0.203, "step": 3588 }, { "epoch": 1.1985306395057607, "grad_norm": 0.41439317908897016, "learning_rate": 7.507289346917366e-06, "loss": 0.2028, "step": 3589 }, { "epoch": 1.1988645850726332, "grad_norm": 0.3898376423037865, "learning_rate": 7.505607820442997e-06, "loss": 0.1838, "step": 3590 }, { "epoch": 1.1991985306395057, "grad_norm": 0.44797911557528114, "learning_rate": 7.503925915467066e-06, "loss": 0.2067, "step": 3591 }, { "epoch": 1.1995324762063784, "grad_norm": 0.4517517097789117, "learning_rate": 7.502243632243645e-06, "loss": 0.2047, "step": 3592 }, { "epoch": 1.1998664217732509, "grad_norm": 0.43869540600955803, "learning_rate": 7.500560971026856e-06, "loss": 0.2081, "step": 3593 }, { "epoch": 1.2002003673401236, "grad_norm": 0.46044286409999025, "learning_rate": 7.498877932070892e-06, "loss": 0.2008, "step": 3594 }, { "epoch": 1.200534312906996, "grad_norm": 0.46702326825878965, "learning_rate": 7.497194515629992e-06, "loss": 0.1912, "step": 3595 }, { "epoch": 1.2008682584738688, "grad_norm": 0.42290587241087874, "learning_rate": 7.4955107219584575e-06, "loss": 0.1955, "step": 3596 }, { "epoch": 1.2012022040407413, "grad_norm": 0.42810888590783125, "learning_rate": 7.493826551310645e-06, "loss": 0.2, "step": 3597 }, { "epoch": 1.201536149607614, "grad_norm": 0.44645503536762926, "learning_rate": 7.492142003940966e-06, "loss": 0.2086, "step": 3598 }, { "epoch": 1.2018700951744865, "grad_norm": 0.3919758646583704, "learning_rate": 7.490457080103895e-06, "loss": 0.1833, "step": 3599 }, { "epoch": 1.2022040407413592, "grad_norm": 0.440006706453243, "learning_rate": 7.4887717800539584e-06, "loss": 0.2122, "step": 3600 }, { "epoch": 1.2025379863082317, "grad_norm": 0.41535578129544654, "learning_rate": 7.48708610404574e-06, "loss": 0.1889, "step": 3601 }, { "epoch": 1.2028719318751044, "grad_norm": 0.4473257740232535, "learning_rate": 7.48540005233388e-06, "loss": 0.1918, "step": 3602 }, { "epoch": 1.203205877441977, "grad_norm": 0.4341216912769296, "learning_rate": 7.483713625173078e-06, "loss": 0.2096, "step": 3603 }, { "epoch": 1.2035398230088497, "grad_norm": 0.40777171525617034, "learning_rate": 7.482026822818088e-06, "loss": 0.1919, "step": 3604 }, { "epoch": 1.2038737685757221, "grad_norm": 0.48363102294243154, "learning_rate": 7.480339645523721e-06, "loss": 0.2269, "step": 3605 }, { "epoch": 1.2042077141425946, "grad_norm": 0.40851845309559937, "learning_rate": 7.478652093544846e-06, "loss": 0.1787, "step": 3606 }, { "epoch": 1.2045416597094674, "grad_norm": 0.3933853695150573, "learning_rate": 7.476964167136388e-06, "loss": 0.1943, "step": 3607 }, { "epoch": 1.20487560527634, "grad_norm": 0.4244304751713145, "learning_rate": 7.475275866553326e-06, "loss": 0.2014, "step": 3608 }, { "epoch": 1.2052095508432126, "grad_norm": 0.6049087445094025, "learning_rate": 7.473587192050698e-06, "loss": 0.2119, "step": 3609 }, { "epoch": 1.205543496410085, "grad_norm": 0.4302000569946228, "learning_rate": 7.471898143883601e-06, "loss": 0.1866, "step": 3610 }, { "epoch": 1.2058774419769578, "grad_norm": 0.4329034497056259, "learning_rate": 7.470208722307183e-06, "loss": 0.2069, "step": 3611 }, { "epoch": 1.2062113875438303, "grad_norm": 0.4175942295887134, "learning_rate": 7.468518927576653e-06, "loss": 0.2029, "step": 3612 }, { "epoch": 1.206545333110703, "grad_norm": 0.40484000172415735, "learning_rate": 7.466828759947271e-06, "loss": 0.1942, "step": 3613 }, { "epoch": 1.2068792786775755, "grad_norm": 0.412447371421282, "learning_rate": 7.465138219674359e-06, "loss": 0.2098, "step": 3614 }, { "epoch": 1.2072132242444482, "grad_norm": 0.4136319006653538, "learning_rate": 7.463447307013294e-06, "loss": 0.1944, "step": 3615 }, { "epoch": 1.2075471698113207, "grad_norm": 0.4652498470308249, "learning_rate": 7.461756022219507e-06, "loss": 0.2017, "step": 3616 }, { "epoch": 1.2078811153781934, "grad_norm": 0.451017605411755, "learning_rate": 7.460064365548486e-06, "loss": 0.1925, "step": 3617 }, { "epoch": 1.208215060945066, "grad_norm": 0.44487372326102165, "learning_rate": 7.458372337255777e-06, "loss": 0.2075, "step": 3618 }, { "epoch": 1.2085490065119386, "grad_norm": 0.4495806275102939, "learning_rate": 7.45667993759698e-06, "loss": 0.2081, "step": 3619 }, { "epoch": 1.2088829520788111, "grad_norm": 0.4430843200910173, "learning_rate": 7.454987166827751e-06, "loss": 0.206, "step": 3620 }, { "epoch": 1.2092168976456836, "grad_norm": 0.4233773457511084, "learning_rate": 7.4532940252038055e-06, "loss": 0.2099, "step": 3621 }, { "epoch": 1.2095508432125563, "grad_norm": 0.4321928280916543, "learning_rate": 7.45160051298091e-06, "loss": 0.1958, "step": 3622 }, { "epoch": 1.209884788779429, "grad_norm": 0.5607630429277455, "learning_rate": 7.4499066304148904e-06, "loss": 0.2037, "step": 3623 }, { "epoch": 1.2102187343463016, "grad_norm": 0.4184204477068433, "learning_rate": 7.448212377761628e-06, "loss": 0.2049, "step": 3624 }, { "epoch": 1.210552679913174, "grad_norm": 0.4286820509175345, "learning_rate": 7.4465177552770585e-06, "loss": 0.2065, "step": 3625 }, { "epoch": 1.2108866254800468, "grad_norm": 0.4204690969276894, "learning_rate": 7.444822763217174e-06, "loss": 0.1969, "step": 3626 }, { "epoch": 1.2112205710469193, "grad_norm": 0.45133162737457627, "learning_rate": 7.443127401838026e-06, "loss": 0.2121, "step": 3627 }, { "epoch": 1.211554516613792, "grad_norm": 0.6038373572960923, "learning_rate": 7.441431671395717e-06, "loss": 0.2025, "step": 3628 }, { "epoch": 1.2118884621806645, "grad_norm": 0.46725093116741995, "learning_rate": 7.439735572146407e-06, "loss": 0.2076, "step": 3629 }, { "epoch": 1.2122224077475372, "grad_norm": 0.4469655909074122, "learning_rate": 7.438039104346312e-06, "loss": 0.1991, "step": 3630 }, { "epoch": 1.2125563533144097, "grad_norm": 0.4339059574842912, "learning_rate": 7.436342268251702e-06, "loss": 0.1954, "step": 3631 }, { "epoch": 1.2128902988812824, "grad_norm": 0.47586137866658257, "learning_rate": 7.434645064118906e-06, "loss": 0.2117, "step": 3632 }, { "epoch": 1.213224244448155, "grad_norm": 0.39511341244546483, "learning_rate": 7.432947492204308e-06, "loss": 0.1867, "step": 3633 }, { "epoch": 1.2135581900150276, "grad_norm": 0.42677190119791664, "learning_rate": 7.431249552764342e-06, "loss": 0.2021, "step": 3634 }, { "epoch": 1.2138921355819001, "grad_norm": 0.4669699424010275, "learning_rate": 7.429551246055504e-06, "loss": 0.1998, "step": 3635 }, { "epoch": 1.2142260811487728, "grad_norm": 0.4174572295361658, "learning_rate": 7.427852572334344e-06, "loss": 0.2047, "step": 3636 }, { "epoch": 1.2145600267156453, "grad_norm": 0.49130052281682524, "learning_rate": 7.426153531857466e-06, "loss": 0.2126, "step": 3637 }, { "epoch": 1.214893972282518, "grad_norm": 0.39466608599092157, "learning_rate": 7.424454124881531e-06, "loss": 0.1912, "step": 3638 }, { "epoch": 1.2152279178493905, "grad_norm": 0.5280861811057292, "learning_rate": 7.422754351663252e-06, "loss": 0.2041, "step": 3639 }, { "epoch": 1.215561863416263, "grad_norm": 0.42326684754756433, "learning_rate": 7.4210542124594e-06, "loss": 0.1907, "step": 3640 }, { "epoch": 1.2158958089831358, "grad_norm": 0.44549725989101097, "learning_rate": 7.419353707526804e-06, "loss": 0.2083, "step": 3641 }, { "epoch": 1.2162297545500083, "grad_norm": 0.43094208142967383, "learning_rate": 7.417652837122345e-06, "loss": 0.2167, "step": 3642 }, { "epoch": 1.216563700116881, "grad_norm": 0.5041095084876982, "learning_rate": 7.4159516015029545e-06, "loss": 0.2003, "step": 3643 }, { "epoch": 1.2168976456837535, "grad_norm": 0.415265396940736, "learning_rate": 7.414250000925629e-06, "loss": 0.1979, "step": 3644 }, { "epoch": 1.2172315912506262, "grad_norm": 0.4362697950301838, "learning_rate": 7.412548035647416e-06, "loss": 0.2035, "step": 3645 }, { "epoch": 1.2175655368174987, "grad_norm": 0.3976738370024975, "learning_rate": 7.4108457059254135e-06, "loss": 0.1947, "step": 3646 }, { "epoch": 1.2178994823843714, "grad_norm": 0.44831231308322833, "learning_rate": 7.40914301201678e-06, "loss": 0.2095, "step": 3647 }, { "epoch": 1.218233427951244, "grad_norm": 0.38688375643877443, "learning_rate": 7.407439954178729e-06, "loss": 0.1935, "step": 3648 }, { "epoch": 1.2185673735181166, "grad_norm": 0.42809394482498125, "learning_rate": 7.405736532668525e-06, "loss": 0.1997, "step": 3649 }, { "epoch": 1.218901319084989, "grad_norm": 0.4272147459706146, "learning_rate": 7.4040327477434926e-06, "loss": 0.1933, "step": 3650 }, { "epoch": 1.2192352646518618, "grad_norm": 0.4233262886986238, "learning_rate": 7.402328599661006e-06, "loss": 0.1961, "step": 3651 }, { "epoch": 1.2195692102187343, "grad_norm": 0.4380339243339666, "learning_rate": 7.400624088678497e-06, "loss": 0.202, "step": 3652 }, { "epoch": 1.219903155785607, "grad_norm": 0.4327575082842074, "learning_rate": 7.398919215053455e-06, "loss": 0.205, "step": 3653 }, { "epoch": 1.2202371013524795, "grad_norm": 0.4371121995452199, "learning_rate": 7.397213979043418e-06, "loss": 0.2089, "step": 3654 }, { "epoch": 1.220571046919352, "grad_norm": 0.4384003879413628, "learning_rate": 7.395508380905983e-06, "loss": 0.2007, "step": 3655 }, { "epoch": 1.2209049924862247, "grad_norm": 0.42353068846381453, "learning_rate": 7.393802420898801e-06, "loss": 0.1949, "step": 3656 }, { "epoch": 1.2212389380530975, "grad_norm": 0.388881556989798, "learning_rate": 7.392096099279579e-06, "loss": 0.1945, "step": 3657 }, { "epoch": 1.22157288361997, "grad_norm": 0.4694668160111598, "learning_rate": 7.390389416306073e-06, "loss": 0.1988, "step": 3658 }, { "epoch": 1.2219068291868425, "grad_norm": 0.41573848399930674, "learning_rate": 7.3886823722361e-06, "loss": 0.195, "step": 3659 }, { "epoch": 1.2222407747537152, "grad_norm": 0.4196740807199658, "learning_rate": 7.386974967327531e-06, "loss": 0.1941, "step": 3660 }, { "epoch": 1.2225747203205877, "grad_norm": 0.3842082607552933, "learning_rate": 7.385267201838284e-06, "loss": 0.1816, "step": 3661 }, { "epoch": 1.2229086658874604, "grad_norm": 0.4210064079129366, "learning_rate": 7.383559076026343e-06, "loss": 0.2085, "step": 3662 }, { "epoch": 1.2232426114543329, "grad_norm": 0.40011050080472454, "learning_rate": 7.381850590149737e-06, "loss": 0.1921, "step": 3663 }, { "epoch": 1.2235765570212056, "grad_norm": 0.41455617156730395, "learning_rate": 7.380141744466555e-06, "loss": 0.1862, "step": 3664 }, { "epoch": 1.223910502588078, "grad_norm": 0.42642811953196785, "learning_rate": 7.378432539234936e-06, "loss": 0.1976, "step": 3665 }, { "epoch": 1.2242444481549508, "grad_norm": 0.4540552515895627, "learning_rate": 7.376722974713078e-06, "loss": 0.201, "step": 3666 }, { "epoch": 1.2245783937218233, "grad_norm": 0.4442490667004041, "learning_rate": 7.3750130511592275e-06, "loss": 0.1957, "step": 3667 }, { "epoch": 1.224912339288696, "grad_norm": 0.49385689080880146, "learning_rate": 7.373302768831694e-06, "loss": 0.2133, "step": 3668 }, { "epoch": 1.2252462848555685, "grad_norm": 0.434217175678413, "learning_rate": 7.371592127988831e-06, "loss": 0.2016, "step": 3669 }, { "epoch": 1.225580230422441, "grad_norm": 0.39530828895439507, "learning_rate": 7.369881128889052e-06, "loss": 0.1923, "step": 3670 }, { "epoch": 1.2259141759893137, "grad_norm": 0.40983470561623925, "learning_rate": 7.368169771790825e-06, "loss": 0.2084, "step": 3671 }, { "epoch": 1.2262481215561865, "grad_norm": 0.4571876738058572, "learning_rate": 7.366458056952668e-06, "loss": 0.1997, "step": 3672 }, { "epoch": 1.226582067123059, "grad_norm": 0.426026877765367, "learning_rate": 7.36474598463316e-06, "loss": 0.2005, "step": 3673 }, { "epoch": 1.2269160126899314, "grad_norm": 0.4118963504455984, "learning_rate": 7.363033555090925e-06, "loss": 0.1886, "step": 3674 }, { "epoch": 1.2272499582568042, "grad_norm": 0.40449592628596953, "learning_rate": 7.361320768584648e-06, "loss": 0.1994, "step": 3675 }, { "epoch": 1.2275839038236767, "grad_norm": 0.4197205384614, "learning_rate": 7.359607625373065e-06, "loss": 0.1891, "step": 3676 }, { "epoch": 1.2279178493905494, "grad_norm": 0.4325982836766197, "learning_rate": 7.357894125714967e-06, "loss": 0.2011, "step": 3677 }, { "epoch": 1.2282517949574219, "grad_norm": 0.44776808435438165, "learning_rate": 7.3561802698691976e-06, "loss": 0.1929, "step": 3678 }, { "epoch": 1.2285857405242946, "grad_norm": 0.4529997887257035, "learning_rate": 7.354466058094656e-06, "loss": 0.2019, "step": 3679 }, { "epoch": 1.228919686091167, "grad_norm": 0.4780531134449325, "learning_rate": 7.352751490650294e-06, "loss": 0.2334, "step": 3680 }, { "epoch": 1.2292536316580398, "grad_norm": 0.4327464219707674, "learning_rate": 7.3510365677951155e-06, "loss": 0.1923, "step": 3681 }, { "epoch": 1.2295875772249123, "grad_norm": 0.4494358023469274, "learning_rate": 7.349321289788181e-06, "loss": 0.2073, "step": 3682 }, { "epoch": 1.229921522791785, "grad_norm": 0.4186072999519254, "learning_rate": 7.3476056568886036e-06, "loss": 0.1897, "step": 3683 }, { "epoch": 1.2302554683586575, "grad_norm": 0.4505587151771832, "learning_rate": 7.34588966935555e-06, "loss": 0.2074, "step": 3684 }, { "epoch": 1.2305894139255302, "grad_norm": 0.4712172452986157, "learning_rate": 7.344173327448238e-06, "loss": 0.1955, "step": 3685 }, { "epoch": 1.2309233594924027, "grad_norm": 0.42151892488420034, "learning_rate": 7.342456631425945e-06, "loss": 0.1968, "step": 3686 }, { "epoch": 1.2312573050592754, "grad_norm": 0.4858147778552328, "learning_rate": 7.340739581547996e-06, "loss": 0.2112, "step": 3687 }, { "epoch": 1.231591250626148, "grad_norm": 0.39299160270288175, "learning_rate": 7.339022178073772e-06, "loss": 0.1783, "step": 3688 }, { "epoch": 1.2319251961930204, "grad_norm": 0.4347870834866963, "learning_rate": 7.337304421262706e-06, "loss": 0.2005, "step": 3689 }, { "epoch": 1.2322591417598932, "grad_norm": 0.3944954395837461, "learning_rate": 7.335586311374287e-06, "loss": 0.1804, "step": 3690 }, { "epoch": 1.2325930873267656, "grad_norm": 0.4511832507539604, "learning_rate": 7.3338678486680545e-06, "loss": 0.226, "step": 3691 }, { "epoch": 1.2329270328936384, "grad_norm": 0.4951489024207722, "learning_rate": 7.3321490334036035e-06, "loss": 0.233, "step": 3692 }, { "epoch": 1.2332609784605109, "grad_norm": 0.42695922666442165, "learning_rate": 7.3304298658405815e-06, "loss": 0.1988, "step": 3693 }, { "epoch": 1.2335949240273836, "grad_norm": 0.4559841391821671, "learning_rate": 7.328710346238688e-06, "loss": 0.1873, "step": 3694 }, { "epoch": 1.233928869594256, "grad_norm": 0.4176584911033391, "learning_rate": 7.326990474857676e-06, "loss": 0.1755, "step": 3695 }, { "epoch": 1.2342628151611288, "grad_norm": 0.388833205756847, "learning_rate": 7.3252702519573545e-06, "loss": 0.1818, "step": 3696 }, { "epoch": 1.2345967607280013, "grad_norm": 0.4741753202108229, "learning_rate": 7.323549677797582e-06, "loss": 0.2035, "step": 3697 }, { "epoch": 1.234930706294874, "grad_norm": 0.41197049335397234, "learning_rate": 7.3218287526382716e-06, "loss": 0.1884, "step": 3698 }, { "epoch": 1.2352646518617465, "grad_norm": 0.4519660783569149, "learning_rate": 7.320107476739389e-06, "loss": 0.1987, "step": 3699 }, { "epoch": 1.2355985974286192, "grad_norm": 0.4441870961138195, "learning_rate": 7.318385850360954e-06, "loss": 0.2002, "step": 3700 }, { "epoch": 1.2359325429954917, "grad_norm": 0.4771636776284061, "learning_rate": 7.316663873763039e-06, "loss": 0.2005, "step": 3701 }, { "epoch": 1.2362664885623644, "grad_norm": 0.421346363486521, "learning_rate": 7.314941547205767e-06, "loss": 0.2057, "step": 3702 }, { "epoch": 1.236600434129237, "grad_norm": 0.3981895128659755, "learning_rate": 7.313218870949317e-06, "loss": 0.1943, "step": 3703 }, { "epoch": 1.2369343796961094, "grad_norm": 0.43622076090027484, "learning_rate": 7.31149584525392e-06, "loss": 0.2047, "step": 3704 }, { "epoch": 1.2372683252629821, "grad_norm": 0.39492812624758417, "learning_rate": 7.309772470379856e-06, "loss": 0.2045, "step": 3705 }, { "epoch": 1.2376022708298549, "grad_norm": 0.39980899433349076, "learning_rate": 7.308048746587466e-06, "loss": 0.1904, "step": 3706 }, { "epoch": 1.2379362163967274, "grad_norm": 0.4226276960308284, "learning_rate": 7.3063246741371365e-06, "loss": 0.1934, "step": 3707 }, { "epoch": 1.2382701619635998, "grad_norm": 0.4416996943577303, "learning_rate": 7.304600253289308e-06, "loss": 0.1986, "step": 3708 }, { "epoch": 1.2386041075304726, "grad_norm": 0.4084631963077389, "learning_rate": 7.302875484304476e-06, "loss": 0.2003, "step": 3709 }, { "epoch": 1.238938053097345, "grad_norm": 0.43539905341193225, "learning_rate": 7.301150367443186e-06, "loss": 0.2037, "step": 3710 }, { "epoch": 1.2392719986642178, "grad_norm": 0.4253528674527778, "learning_rate": 7.299424902966039e-06, "loss": 0.2084, "step": 3711 }, { "epoch": 1.2396059442310903, "grad_norm": 0.4139818088928367, "learning_rate": 7.297699091133685e-06, "loss": 0.2113, "step": 3712 }, { "epoch": 1.239939889797963, "grad_norm": 0.42337946777331986, "learning_rate": 7.295972932206827e-06, "loss": 0.1946, "step": 3713 }, { "epoch": 1.2402738353648355, "grad_norm": 0.42234185270273344, "learning_rate": 7.2942464264462255e-06, "loss": 0.1877, "step": 3714 }, { "epoch": 1.2406077809317082, "grad_norm": 0.42468102269690094, "learning_rate": 7.292519574112688e-06, "loss": 0.1952, "step": 3715 }, { "epoch": 1.2409417264985807, "grad_norm": 0.45061158016324065, "learning_rate": 7.290792375467074e-06, "loss": 0.2077, "step": 3716 }, { "epoch": 1.2412756720654534, "grad_norm": 0.4374334755090788, "learning_rate": 7.2890648307702985e-06, "loss": 0.208, "step": 3717 }, { "epoch": 1.241609617632326, "grad_norm": 0.44401302086261285, "learning_rate": 7.287336940283327e-06, "loss": 0.2005, "step": 3718 }, { "epoch": 1.2419435631991984, "grad_norm": 0.4366476902185395, "learning_rate": 7.28560870426718e-06, "loss": 0.2007, "step": 3719 }, { "epoch": 1.2422775087660711, "grad_norm": 0.43419928883049946, "learning_rate": 7.2838801229829245e-06, "loss": 0.1975, "step": 3720 }, { "epoch": 1.2426114543329438, "grad_norm": 0.40640252159976503, "learning_rate": 7.2821511966916845e-06, "loss": 0.1994, "step": 3721 }, { "epoch": 1.2429453998998163, "grad_norm": 0.42257019966830034, "learning_rate": 7.280421925654635e-06, "loss": 0.2021, "step": 3722 }, { "epoch": 1.2432793454666888, "grad_norm": 0.40500129240346916, "learning_rate": 7.278692310133003e-06, "loss": 0.1919, "step": 3723 }, { "epoch": 1.2436132910335616, "grad_norm": 0.42012490022988247, "learning_rate": 7.276962350388067e-06, "loss": 0.1966, "step": 3724 }, { "epoch": 1.243947236600434, "grad_norm": 0.4029324763396112, "learning_rate": 7.275232046681157e-06, "loss": 0.1896, "step": 3725 }, { "epoch": 1.2442811821673068, "grad_norm": 0.4113849025835322, "learning_rate": 7.273501399273656e-06, "loss": 0.1969, "step": 3726 }, { "epoch": 1.2446151277341793, "grad_norm": 0.4507044786866005, "learning_rate": 7.271770408427e-06, "loss": 0.2062, "step": 3727 }, { "epoch": 1.244949073301052, "grad_norm": 0.45701381903540794, "learning_rate": 7.2700390744026735e-06, "loss": 0.2079, "step": 3728 }, { "epoch": 1.2452830188679245, "grad_norm": 0.47264780094018927, "learning_rate": 7.2683073974622165e-06, "loss": 0.21, "step": 3729 }, { "epoch": 1.2456169644347972, "grad_norm": 0.46485397157026925, "learning_rate": 7.26657537786722e-06, "loss": 0.2165, "step": 3730 }, { "epoch": 1.2459509100016697, "grad_norm": 0.40938039577104884, "learning_rate": 7.264843015879321e-06, "loss": 0.1923, "step": 3731 }, { "epoch": 1.2462848555685424, "grad_norm": 0.5185887899593448, "learning_rate": 7.263110311760221e-06, "loss": 0.2086, "step": 3732 }, { "epoch": 1.246618801135415, "grad_norm": 0.4628231707028568, "learning_rate": 7.2613772657716585e-06, "loss": 0.1927, "step": 3733 }, { "epoch": 1.2469527467022876, "grad_norm": 0.49261373578200623, "learning_rate": 7.259643878175434e-06, "loss": 0.2183, "step": 3734 }, { "epoch": 1.2472866922691601, "grad_norm": 0.4642674170595037, "learning_rate": 7.2579101492333956e-06, "loss": 0.194, "step": 3735 }, { "epoch": 1.2476206378360328, "grad_norm": 0.4697562787124826, "learning_rate": 7.256176079207442e-06, "loss": 0.2052, "step": 3736 }, { "epoch": 1.2479545834029053, "grad_norm": 0.43296428720065006, "learning_rate": 7.254441668359527e-06, "loss": 0.2019, "step": 3737 }, { "epoch": 1.2482885289697778, "grad_norm": 0.5097000021139282, "learning_rate": 7.252706916951653e-06, "loss": 0.2192, "step": 3738 }, { "epoch": 1.2486224745366505, "grad_norm": 0.4864215004236803, "learning_rate": 7.250971825245874e-06, "loss": 0.2168, "step": 3739 }, { "epoch": 1.248956420103523, "grad_norm": 0.41838422249957957, "learning_rate": 7.249236393504296e-06, "loss": 0.2018, "step": 3740 }, { "epoch": 1.2492903656703958, "grad_norm": 0.39261054860167127, "learning_rate": 7.247500621989078e-06, "loss": 0.1887, "step": 3741 }, { "epoch": 1.2496243112372682, "grad_norm": 0.420859082136698, "learning_rate": 7.245764510962426e-06, "loss": 0.2072, "step": 3742 }, { "epoch": 1.249958256804141, "grad_norm": 0.44399344347580727, "learning_rate": 7.244028060686603e-06, "loss": 0.2007, "step": 3743 }, { "epoch": 1.2502922023710135, "grad_norm": 0.403279573354582, "learning_rate": 7.242291271423919e-06, "loss": 0.1983, "step": 3744 }, { "epoch": 1.2506261479378862, "grad_norm": 0.40558518807409527, "learning_rate": 7.240554143436735e-06, "loss": 0.1942, "step": 3745 }, { "epoch": 1.2509600935047587, "grad_norm": 0.45525307076929966, "learning_rate": 7.238816676987467e-06, "loss": 0.196, "step": 3746 }, { "epoch": 1.2512940390716314, "grad_norm": 0.47302635450133046, "learning_rate": 7.237078872338579e-06, "loss": 0.2172, "step": 3747 }, { "epoch": 1.2516279846385039, "grad_norm": 0.4115995099581479, "learning_rate": 7.235340729752584e-06, "loss": 0.202, "step": 3748 }, { "epoch": 1.2519619302053766, "grad_norm": 0.40330391629967777, "learning_rate": 7.233602249492055e-06, "loss": 0.1861, "step": 3749 }, { "epoch": 1.252295875772249, "grad_norm": 0.4611901621435869, "learning_rate": 7.2318634318196045e-06, "loss": 0.1993, "step": 3750 }, { "epoch": 1.2526298213391218, "grad_norm": 0.444872798872787, "learning_rate": 7.230124276997903e-06, "loss": 0.1939, "step": 3751 }, { "epoch": 1.2529637669059943, "grad_norm": 0.39062409444486607, "learning_rate": 7.228384785289671e-06, "loss": 0.1878, "step": 3752 }, { "epoch": 1.2532977124728668, "grad_norm": 0.38750288835226704, "learning_rate": 7.2266449569576804e-06, "loss": 0.2006, "step": 3753 }, { "epoch": 1.2536316580397395, "grad_norm": 0.4739440204799354, "learning_rate": 7.224904792264748e-06, "loss": 0.1972, "step": 3754 }, { "epoch": 1.2539656036066122, "grad_norm": 0.43430096931368434, "learning_rate": 7.223164291473752e-06, "loss": 0.1988, "step": 3755 }, { "epoch": 1.2542995491734847, "grad_norm": 0.4797983430080601, "learning_rate": 7.221423454847611e-06, "loss": 0.2029, "step": 3756 }, { "epoch": 1.2546334947403572, "grad_norm": 0.43552537544104014, "learning_rate": 7.219682282649302e-06, "loss": 0.1982, "step": 3757 }, { "epoch": 1.25496744030723, "grad_norm": 0.44330876808250125, "learning_rate": 7.2179407751418485e-06, "loss": 0.1996, "step": 3758 }, { "epoch": 1.2553013858741024, "grad_norm": 0.40349763911316494, "learning_rate": 7.216198932588325e-06, "loss": 0.1898, "step": 3759 }, { "epoch": 1.2556353314409752, "grad_norm": 0.4307633819537129, "learning_rate": 7.214456755251858e-06, "loss": 0.2011, "step": 3760 }, { "epoch": 1.2559692770078477, "grad_norm": 0.43747344877924577, "learning_rate": 7.212714243395623e-06, "loss": 0.1935, "step": 3761 }, { "epoch": 1.2563032225747204, "grad_norm": 0.439147201532576, "learning_rate": 7.210971397282848e-06, "loss": 0.1968, "step": 3762 }, { "epoch": 1.2566371681415929, "grad_norm": 0.4091513057684739, "learning_rate": 7.20922821717681e-06, "loss": 0.1975, "step": 3763 }, { "epoch": 1.2569711137084656, "grad_norm": 0.4001065589997931, "learning_rate": 7.207484703340838e-06, "loss": 0.1789, "step": 3764 }, { "epoch": 1.257305059275338, "grad_norm": 0.41731976095296924, "learning_rate": 7.205740856038308e-06, "loss": 0.2007, "step": 3765 }, { "epoch": 1.2576390048422108, "grad_norm": 0.4303382469501401, "learning_rate": 7.2039966755326515e-06, "loss": 0.2048, "step": 3766 }, { "epoch": 1.2579729504090833, "grad_norm": 0.43856878267294563, "learning_rate": 7.2022521620873456e-06, "loss": 0.1904, "step": 3767 }, { "epoch": 1.2583068959759558, "grad_norm": 0.4183896525795676, "learning_rate": 7.2005073159659186e-06, "loss": 0.1931, "step": 3768 }, { "epoch": 1.2586408415428285, "grad_norm": 0.4647365271907042, "learning_rate": 7.198762137431952e-06, "loss": 0.2066, "step": 3769 }, { "epoch": 1.2589747871097012, "grad_norm": 0.40618009076146466, "learning_rate": 7.197016626749076e-06, "loss": 0.185, "step": 3770 }, { "epoch": 1.2593087326765737, "grad_norm": 0.4063402599280868, "learning_rate": 7.195270784180968e-06, "loss": 0.1964, "step": 3771 }, { "epoch": 1.2596426782434462, "grad_norm": 0.4980873393441178, "learning_rate": 7.193524609991359e-06, "loss": 0.2167, "step": 3772 }, { "epoch": 1.259976623810319, "grad_norm": 0.4203495972272948, "learning_rate": 7.191778104444031e-06, "loss": 0.1996, "step": 3773 }, { "epoch": 1.2603105693771914, "grad_norm": 0.4691853538909953, "learning_rate": 7.190031267802814e-06, "loss": 0.2068, "step": 3774 }, { "epoch": 1.2606445149440642, "grad_norm": 0.429747934431845, "learning_rate": 7.188284100331585e-06, "loss": 0.197, "step": 3775 }, { "epoch": 1.2609784605109366, "grad_norm": 0.3989541459642685, "learning_rate": 7.186536602294278e-06, "loss": 0.1866, "step": 3776 }, { "epoch": 1.2613124060778094, "grad_norm": 0.4079076916479964, "learning_rate": 7.184788773954871e-06, "loss": 0.191, "step": 3777 }, { "epoch": 1.2616463516446819, "grad_norm": 0.37280995002049727, "learning_rate": 7.1830406155773946e-06, "loss": 0.1842, "step": 3778 }, { "epoch": 1.2619802972115546, "grad_norm": 0.47418669121238, "learning_rate": 7.181292127425928e-06, "loss": 0.2017, "step": 3779 }, { "epoch": 1.262314242778427, "grad_norm": 0.4557782177670249, "learning_rate": 7.179543309764604e-06, "loss": 0.2095, "step": 3780 }, { "epoch": 1.2626481883452998, "grad_norm": 0.4038382372590161, "learning_rate": 7.177794162857598e-06, "loss": 0.1895, "step": 3781 }, { "epoch": 1.2629821339121723, "grad_norm": 0.4257072698432259, "learning_rate": 7.176044686969141e-06, "loss": 0.2084, "step": 3782 }, { "epoch": 1.2633160794790448, "grad_norm": 0.40296656493114924, "learning_rate": 7.174294882363513e-06, "loss": 0.2078, "step": 3783 }, { "epoch": 1.2636500250459175, "grad_norm": 0.3972652707207378, "learning_rate": 7.172544749305039e-06, "loss": 0.2005, "step": 3784 }, { "epoch": 1.2639839706127902, "grad_norm": 0.40141205047962275, "learning_rate": 7.170794288058103e-06, "loss": 0.1899, "step": 3785 }, { "epoch": 1.2643179161796627, "grad_norm": 0.3924619438453352, "learning_rate": 7.169043498887126e-06, "loss": 0.1928, "step": 3786 }, { "epoch": 1.2646518617465352, "grad_norm": 0.4310553864330334, "learning_rate": 7.1672923820565925e-06, "loss": 0.1945, "step": 3787 }, { "epoch": 1.264985807313408, "grad_norm": 0.42295352764743993, "learning_rate": 7.165540937831024e-06, "loss": 0.1984, "step": 3788 }, { "epoch": 1.2653197528802806, "grad_norm": 0.38478960335529383, "learning_rate": 7.163789166474998e-06, "loss": 0.1907, "step": 3789 }, { "epoch": 1.2656536984471531, "grad_norm": 0.3964245436953759, "learning_rate": 7.162037068253141e-06, "loss": 0.1824, "step": 3790 }, { "epoch": 1.2659876440140256, "grad_norm": 0.4650615379634907, "learning_rate": 7.160284643430129e-06, "loss": 0.2234, "step": 3791 }, { "epoch": 1.2663215895808984, "grad_norm": 0.4988659718160951, "learning_rate": 7.158531892270682e-06, "loss": 0.1991, "step": 3792 }, { "epoch": 1.2666555351477709, "grad_norm": 0.40544641816954785, "learning_rate": 7.156778815039579e-06, "loss": 0.1871, "step": 3793 }, { "epoch": 1.2669894807146436, "grad_norm": 0.4331677677975272, "learning_rate": 7.15502541200164e-06, "loss": 0.2045, "step": 3794 }, { "epoch": 1.267323426281516, "grad_norm": 0.4427416226804204, "learning_rate": 7.153271683421738e-06, "loss": 0.2012, "step": 3795 }, { "epoch": 1.2676573718483888, "grad_norm": 0.43060810240200514, "learning_rate": 7.151517629564795e-06, "loss": 0.2067, "step": 3796 }, { "epoch": 1.2679913174152613, "grad_norm": 0.4162223937732087, "learning_rate": 7.14976325069578e-06, "loss": 0.2001, "step": 3797 }, { "epoch": 1.268325262982134, "grad_norm": 0.4283496532813782, "learning_rate": 7.148008547079713e-06, "loss": 0.1942, "step": 3798 }, { "epoch": 1.2686592085490065, "grad_norm": 0.3919278069619941, "learning_rate": 7.1462535189816636e-06, "loss": 0.1978, "step": 3799 }, { "epoch": 1.2689931541158792, "grad_norm": 0.4030159316869139, "learning_rate": 7.14449816666675e-06, "loss": 0.2024, "step": 3800 }, { "epoch": 1.2693270996827517, "grad_norm": 0.4122621176678283, "learning_rate": 7.142742490400135e-06, "loss": 0.1939, "step": 3801 }, { "epoch": 1.2696610452496242, "grad_norm": 0.4242214438614236, "learning_rate": 7.140986490447039e-06, "loss": 0.2014, "step": 3802 }, { "epoch": 1.269994990816497, "grad_norm": 0.41850239250671534, "learning_rate": 7.139230167072724e-06, "loss": 0.2037, "step": 3803 }, { "epoch": 1.2703289363833696, "grad_norm": 0.4148927143434032, "learning_rate": 7.137473520542503e-06, "loss": 0.1982, "step": 3804 }, { "epoch": 1.2706628819502421, "grad_norm": 0.42901971140755585, "learning_rate": 7.135716551121739e-06, "loss": 0.1966, "step": 3805 }, { "epoch": 1.2709968275171146, "grad_norm": 0.45317109667412, "learning_rate": 7.133959259075844e-06, "loss": 0.2161, "step": 3806 }, { "epoch": 1.2713307730839873, "grad_norm": 0.38629809789963193, "learning_rate": 7.132201644670274e-06, "loss": 0.1946, "step": 3807 }, { "epoch": 1.2716647186508598, "grad_norm": 0.43946380323346784, "learning_rate": 7.13044370817054e-06, "loss": 0.2185, "step": 3808 }, { "epoch": 1.2719986642177326, "grad_norm": 0.4153870460305795, "learning_rate": 7.128685449842201e-06, "loss": 0.1829, "step": 3809 }, { "epoch": 1.272332609784605, "grad_norm": 0.4070835430938735, "learning_rate": 7.1269268699508574e-06, "loss": 0.1914, "step": 3810 }, { "epoch": 1.2726665553514778, "grad_norm": 0.5294790454069535, "learning_rate": 7.1251679687621685e-06, "loss": 0.197, "step": 3811 }, { "epoch": 1.2730005009183503, "grad_norm": 0.43195916234280074, "learning_rate": 7.123408746541835e-06, "loss": 0.2067, "step": 3812 }, { "epoch": 1.273334446485223, "grad_norm": 0.4223072878291587, "learning_rate": 7.1216492035556075e-06, "loss": 0.2024, "step": 3813 }, { "epoch": 1.2736683920520955, "grad_norm": 0.43487357461786935, "learning_rate": 7.119889340069286e-06, "loss": 0.1966, "step": 3814 }, { "epoch": 1.2740023376189682, "grad_norm": 0.4051402525802372, "learning_rate": 7.1181291563487175e-06, "loss": 0.1887, "step": 3815 }, { "epoch": 1.2743362831858407, "grad_norm": 0.4456616551512561, "learning_rate": 7.116368652659802e-06, "loss": 0.2077, "step": 3816 }, { "epoch": 1.2746702287527132, "grad_norm": 0.407498695448717, "learning_rate": 7.114607829268481e-06, "loss": 0.1967, "step": 3817 }, { "epoch": 1.275004174319586, "grad_norm": 0.40906980972589485, "learning_rate": 7.1128466864407486e-06, "loss": 0.1986, "step": 3818 }, { "epoch": 1.2753381198864586, "grad_norm": 0.38020921049437223, "learning_rate": 7.111085224442647e-06, "loss": 0.1864, "step": 3819 }, { "epoch": 1.2756720654533311, "grad_norm": 0.41376376861187264, "learning_rate": 7.109323443540263e-06, "loss": 0.2022, "step": 3820 }, { "epoch": 1.2760060110202036, "grad_norm": 0.4085034643758407, "learning_rate": 7.107561343999739e-06, "loss": 0.2002, "step": 3821 }, { "epoch": 1.2763399565870763, "grad_norm": 0.4147881532976798, "learning_rate": 7.105798926087257e-06, "loss": 0.1929, "step": 3822 }, { "epoch": 1.2766739021539488, "grad_norm": 0.4164403555278612, "learning_rate": 7.104036190069052e-06, "loss": 0.1979, "step": 3823 }, { "epoch": 1.2770078477208215, "grad_norm": 0.388973867320647, "learning_rate": 7.102273136211407e-06, "loss": 0.1947, "step": 3824 }, { "epoch": 1.277341793287694, "grad_norm": 0.43615613297218037, "learning_rate": 7.10050976478065e-06, "loss": 0.1904, "step": 3825 }, { "epoch": 1.2776757388545668, "grad_norm": 0.4217476905256785, "learning_rate": 7.098746076043162e-06, "loss": 0.1976, "step": 3826 }, { "epoch": 1.2780096844214393, "grad_norm": 0.4410477775249162, "learning_rate": 7.096982070265366e-06, "loss": 0.2111, "step": 3827 }, { "epoch": 1.278343629988312, "grad_norm": 0.407553596713063, "learning_rate": 7.0952177477137374e-06, "loss": 0.1877, "step": 3828 }, { "epoch": 1.2786775755551845, "grad_norm": 0.4226143283409012, "learning_rate": 7.093453108654798e-06, "loss": 0.1942, "step": 3829 }, { "epoch": 1.2790115211220572, "grad_norm": 0.44448160702216344, "learning_rate": 7.091688153355116e-06, "loss": 0.2117, "step": 3830 }, { "epoch": 1.2793454666889297, "grad_norm": 0.44023312970202644, "learning_rate": 7.08992288208131e-06, "loss": 0.2061, "step": 3831 }, { "epoch": 1.2796794122558022, "grad_norm": 0.4289817202503459, "learning_rate": 7.088157295100046e-06, "loss": 0.1982, "step": 3832 }, { "epoch": 1.280013357822675, "grad_norm": 0.41168121667242447, "learning_rate": 7.0863913926780335e-06, "loss": 0.1965, "step": 3833 }, { "epoch": 1.2803473033895476, "grad_norm": 0.389066292024194, "learning_rate": 7.084625175082036e-06, "loss": 0.1969, "step": 3834 }, { "epoch": 1.28068124895642, "grad_norm": 0.42211956082879903, "learning_rate": 7.082858642578861e-06, "loss": 0.1927, "step": 3835 }, { "epoch": 1.2810151945232926, "grad_norm": 0.41487931684211865, "learning_rate": 7.081091795435361e-06, "loss": 0.2018, "step": 3836 }, { "epoch": 1.2813491400901653, "grad_norm": 0.4355951362057153, "learning_rate": 7.079324633918443e-06, "loss": 0.187, "step": 3837 }, { "epoch": 1.281683085657038, "grad_norm": 0.40177334082527777, "learning_rate": 7.077557158295053e-06, "loss": 0.1943, "step": 3838 }, { "epoch": 1.2820170312239105, "grad_norm": 0.4688295048980153, "learning_rate": 7.075789368832194e-06, "loss": 0.2056, "step": 3839 }, { "epoch": 1.282350976790783, "grad_norm": 0.45332907753482743, "learning_rate": 7.074021265796909e-06, "loss": 0.2151, "step": 3840 }, { "epoch": 1.2826849223576557, "grad_norm": 0.4011388285723007, "learning_rate": 7.072252849456291e-06, "loss": 0.1948, "step": 3841 }, { "epoch": 1.2830188679245282, "grad_norm": 0.4397419364652935, "learning_rate": 7.07048412007748e-06, "loss": 0.1973, "step": 3842 }, { "epoch": 1.283352813491401, "grad_norm": 0.4191526878534424, "learning_rate": 7.068715077927664e-06, "loss": 0.1922, "step": 3843 }, { "epoch": 1.2836867590582735, "grad_norm": 0.4622949980235757, "learning_rate": 7.066945723274077e-06, "loss": 0.1966, "step": 3844 }, { "epoch": 1.2840207046251462, "grad_norm": 0.39544208070452724, "learning_rate": 7.065176056383999e-06, "loss": 0.1978, "step": 3845 }, { "epoch": 1.2843546501920187, "grad_norm": 0.4669748844624701, "learning_rate": 7.063406077524764e-06, "loss": 0.2106, "step": 3846 }, { "epoch": 1.2846885957588914, "grad_norm": 0.4045890789425728, "learning_rate": 7.061635786963743e-06, "loss": 0.1927, "step": 3847 }, { "epoch": 1.2850225413257639, "grad_norm": 0.4297051183096818, "learning_rate": 7.059865184968362e-06, "loss": 0.1991, "step": 3848 }, { "epoch": 1.2853564868926366, "grad_norm": 0.44503532628630693, "learning_rate": 7.058094271806091e-06, "loss": 0.2127, "step": 3849 }, { "epoch": 1.285690432459509, "grad_norm": 0.38682645112211256, "learning_rate": 7.056323047744447e-06, "loss": 0.1753, "step": 3850 }, { "epoch": 1.2860243780263816, "grad_norm": 0.44630819787955817, "learning_rate": 7.054551513050993e-06, "loss": 0.2156, "step": 3851 }, { "epoch": 1.2863583235932543, "grad_norm": 0.4378283545797013, "learning_rate": 7.052779667993342e-06, "loss": 0.2057, "step": 3852 }, { "epoch": 1.286692269160127, "grad_norm": 0.4304232637199404, "learning_rate": 7.051007512839153e-06, "loss": 0.2205, "step": 3853 }, { "epoch": 1.2870262147269995, "grad_norm": 0.5159648005944082, "learning_rate": 7.0492350478561275e-06, "loss": 0.1995, "step": 3854 }, { "epoch": 1.287360160293872, "grad_norm": 0.457847641860177, "learning_rate": 7.04746227331202e-06, "loss": 0.2067, "step": 3855 }, { "epoch": 1.2876941058607447, "grad_norm": 0.41872580138211385, "learning_rate": 7.045689189474628e-06, "loss": 0.1934, "step": 3856 }, { "epoch": 1.2880280514276172, "grad_norm": 0.4110936768730782, "learning_rate": 7.0439157966117955e-06, "loss": 0.1858, "step": 3857 }, { "epoch": 1.28836199699449, "grad_norm": 0.38185640035698354, "learning_rate": 7.042142094991418e-06, "loss": 0.1816, "step": 3858 }, { "epoch": 1.2886959425613624, "grad_norm": 0.3873522432517064, "learning_rate": 7.04036808488143e-06, "loss": 0.1964, "step": 3859 }, { "epoch": 1.2890298881282352, "grad_norm": 0.4232734771641415, "learning_rate": 7.038593766549817e-06, "loss": 0.2104, "step": 3860 }, { "epoch": 1.2893638336951077, "grad_norm": 0.4746183445203052, "learning_rate": 7.0368191402646145e-06, "loss": 0.2097, "step": 3861 }, { "epoch": 1.2896977792619804, "grad_norm": 0.42621702835077613, "learning_rate": 7.035044206293898e-06, "loss": 0.2194, "step": 3862 }, { "epoch": 1.2900317248288529, "grad_norm": 0.42742800253037144, "learning_rate": 7.0332689649057905e-06, "loss": 0.1985, "step": 3863 }, { "epoch": 1.2903656703957256, "grad_norm": 0.4320688142367941, "learning_rate": 7.031493416368466e-06, "loss": 0.1984, "step": 3864 }, { "epoch": 1.290699615962598, "grad_norm": 0.4611896460687471, "learning_rate": 7.029717560950141e-06, "loss": 0.2105, "step": 3865 }, { "epoch": 1.2910335615294706, "grad_norm": 0.42167209030771285, "learning_rate": 7.027941398919078e-06, "loss": 0.2007, "step": 3866 }, { "epoch": 1.2913675070963433, "grad_norm": 0.4486720053192747, "learning_rate": 7.0261649305435895e-06, "loss": 0.2107, "step": 3867 }, { "epoch": 1.291701452663216, "grad_norm": 0.43703159019530075, "learning_rate": 7.02438815609203e-06, "loss": 0.2002, "step": 3868 }, { "epoch": 1.2920353982300885, "grad_norm": 0.4206235426460402, "learning_rate": 7.022611075832804e-06, "loss": 0.1824, "step": 3869 }, { "epoch": 1.292369343796961, "grad_norm": 0.45899661384875784, "learning_rate": 7.02083369003436e-06, "loss": 0.2091, "step": 3870 }, { "epoch": 1.2927032893638337, "grad_norm": 0.4163150817565343, "learning_rate": 7.019055998965191e-06, "loss": 0.2037, "step": 3871 }, { "epoch": 1.2930372349307062, "grad_norm": 0.48635183034398205, "learning_rate": 7.017278002893841e-06, "loss": 0.2072, "step": 3872 }, { "epoch": 1.293371180497579, "grad_norm": 0.4210590900618153, "learning_rate": 7.015499702088896e-06, "loss": 0.1871, "step": 3873 }, { "epoch": 1.2937051260644514, "grad_norm": 0.42679469662211317, "learning_rate": 7.013721096818988e-06, "loss": 0.2057, "step": 3874 }, { "epoch": 1.2940390716313241, "grad_norm": 0.43109509020232506, "learning_rate": 7.011942187352798e-06, "loss": 0.1961, "step": 3875 }, { "epoch": 1.2943730171981966, "grad_norm": 0.4261404803605642, "learning_rate": 7.010162973959052e-06, "loss": 0.1987, "step": 3876 }, { "epoch": 1.2947069627650694, "grad_norm": 0.3984455245433112, "learning_rate": 7.008383456906518e-06, "loss": 0.1915, "step": 3877 }, { "epoch": 1.2950409083319419, "grad_norm": 0.44213008748103527, "learning_rate": 7.0066036364640165e-06, "loss": 0.2063, "step": 3878 }, { "epoch": 1.2953748538988146, "grad_norm": 0.4340151247449952, "learning_rate": 7.004823512900408e-06, "loss": 0.2061, "step": 3879 }, { "epoch": 1.295708799465687, "grad_norm": 0.4353715360420167, "learning_rate": 7.003043086484602e-06, "loss": 0.2192, "step": 3880 }, { "epoch": 1.2960427450325596, "grad_norm": 0.40389370415658, "learning_rate": 7.001262357485553e-06, "loss": 0.1915, "step": 3881 }, { "epoch": 1.2963766905994323, "grad_norm": 0.4385006533052746, "learning_rate": 6.99948132617226e-06, "loss": 0.211, "step": 3882 }, { "epoch": 1.296710636166305, "grad_norm": 0.42962689985450564, "learning_rate": 6.99769999281377e-06, "loss": 0.2004, "step": 3883 }, { "epoch": 1.2970445817331775, "grad_norm": 0.3988243836403783, "learning_rate": 6.9959183576791745e-06, "loss": 0.1935, "step": 3884 }, { "epoch": 1.29737852730005, "grad_norm": 0.46588350704208614, "learning_rate": 6.9941364210376095e-06, "loss": 0.2027, "step": 3885 }, { "epoch": 1.2977124728669227, "grad_norm": 0.4423870351069402, "learning_rate": 6.992354183158258e-06, "loss": 0.2006, "step": 3886 }, { "epoch": 1.2980464184337954, "grad_norm": 0.44696770311545875, "learning_rate": 6.9905716443103475e-06, "loss": 0.1977, "step": 3887 }, { "epoch": 1.298380364000668, "grad_norm": 0.43550098856674935, "learning_rate": 6.9887888047631525e-06, "loss": 0.1935, "step": 3888 }, { "epoch": 1.2987143095675404, "grad_norm": 0.45209412899280554, "learning_rate": 6.987005664785991e-06, "loss": 0.1968, "step": 3889 }, { "epoch": 1.2990482551344131, "grad_norm": 0.44580123030250995, "learning_rate": 6.985222224648227e-06, "loss": 0.2001, "step": 3890 }, { "epoch": 1.2993822007012856, "grad_norm": 0.45139711177023095, "learning_rate": 6.983438484619272e-06, "loss": 0.2037, "step": 3891 }, { "epoch": 1.2997161462681583, "grad_norm": 0.4219811830085104, "learning_rate": 6.981654444968578e-06, "loss": 0.1982, "step": 3892 }, { "epoch": 1.3000500918350308, "grad_norm": 0.44396824829876985, "learning_rate": 6.979870105965648e-06, "loss": 0.2021, "step": 3893 }, { "epoch": 1.3003840374019036, "grad_norm": 0.41445057653912953, "learning_rate": 6.978085467880027e-06, "loss": 0.1894, "step": 3894 }, { "epoch": 1.300717982968776, "grad_norm": 0.4947034290772902, "learning_rate": 6.9763005309813025e-06, "loss": 0.206, "step": 3895 }, { "epoch": 1.3010519285356488, "grad_norm": 0.43818926332258024, "learning_rate": 6.974515295539115e-06, "loss": 0.2049, "step": 3896 }, { "epoch": 1.3013858741025213, "grad_norm": 0.4498029142452314, "learning_rate": 6.9727297618231416e-06, "loss": 0.2033, "step": 3897 }, { "epoch": 1.301719819669394, "grad_norm": 0.39991165215175273, "learning_rate": 6.970943930103109e-06, "loss": 0.2049, "step": 3898 }, { "epoch": 1.3020537652362665, "grad_norm": 0.39753769966116426, "learning_rate": 6.96915780064879e-06, "loss": 0.1969, "step": 3899 }, { "epoch": 1.302387710803139, "grad_norm": 0.4219028223395952, "learning_rate": 6.96737137373e-06, "loss": 0.2007, "step": 3900 }, { "epoch": 1.3027216563700117, "grad_norm": 0.4405048022853998, "learning_rate": 6.965584649616597e-06, "loss": 0.2084, "step": 3901 }, { "epoch": 1.3030556019368844, "grad_norm": 0.47780153655107577, "learning_rate": 6.963797628578489e-06, "loss": 0.189, "step": 3902 }, { "epoch": 1.303389547503757, "grad_norm": 0.44621414800365433, "learning_rate": 6.962010310885627e-06, "loss": 0.201, "step": 3903 }, { "epoch": 1.3037234930706294, "grad_norm": 0.4567887700056304, "learning_rate": 6.960222696808004e-06, "loss": 0.2142, "step": 3904 }, { "epoch": 1.3040574386375021, "grad_norm": 0.4348089611652648, "learning_rate": 6.958434786615663e-06, "loss": 0.1969, "step": 3905 }, { "epoch": 1.3043913842043746, "grad_norm": 0.4141676962452518, "learning_rate": 6.956646580578687e-06, "loss": 0.2024, "step": 3906 }, { "epoch": 1.3047253297712473, "grad_norm": 0.5021452032817756, "learning_rate": 6.954858078967207e-06, "loss": 0.2167, "step": 3907 }, { "epoch": 1.3050592753381198, "grad_norm": 0.42162857639320483, "learning_rate": 6.953069282051397e-06, "loss": 0.198, "step": 3908 }, { "epoch": 1.3053932209049925, "grad_norm": 0.4012274684722401, "learning_rate": 6.951280190101475e-06, "loss": 0.1932, "step": 3909 }, { "epoch": 1.305727166471865, "grad_norm": 0.4800296772397605, "learning_rate": 6.949490803387704e-06, "loss": 0.2146, "step": 3910 }, { "epoch": 1.3060611120387378, "grad_norm": 0.430988613912435, "learning_rate": 6.9477011221803935e-06, "loss": 0.1926, "step": 3911 }, { "epoch": 1.3063950576056103, "grad_norm": 0.5275440853144523, "learning_rate": 6.945911146749894e-06, "loss": 0.1949, "step": 3912 }, { "epoch": 1.306729003172483, "grad_norm": 0.4382678861181215, "learning_rate": 6.944120877366605e-06, "loss": 0.2023, "step": 3913 }, { "epoch": 1.3070629487393555, "grad_norm": 0.44380835569655847, "learning_rate": 6.9423303143009644e-06, "loss": 0.2155, "step": 3914 }, { "epoch": 1.307396894306228, "grad_norm": 0.4166446811542404, "learning_rate": 6.940539457823459e-06, "loss": 0.2038, "step": 3915 }, { "epoch": 1.3077308398731007, "grad_norm": 0.4268607935295313, "learning_rate": 6.938748308204622e-06, "loss": 0.2065, "step": 3916 }, { "epoch": 1.3080647854399734, "grad_norm": 0.44848540251287616, "learning_rate": 6.936956865715024e-06, "loss": 0.1959, "step": 3917 }, { "epoch": 1.308398731006846, "grad_norm": 0.4524449751352037, "learning_rate": 6.9351651306252836e-06, "loss": 0.2078, "step": 3918 }, { "epoch": 1.3087326765737184, "grad_norm": 0.4111904238885648, "learning_rate": 6.933373103206064e-06, "loss": 0.1935, "step": 3919 }, { "epoch": 1.309066622140591, "grad_norm": 0.49171063968711426, "learning_rate": 6.931580783728075e-06, "loss": 0.2076, "step": 3920 }, { "epoch": 1.3094005677074636, "grad_norm": 0.42276347946771226, "learning_rate": 6.929788172462063e-06, "loss": 0.2004, "step": 3921 }, { "epoch": 1.3097345132743363, "grad_norm": 0.45493648816395343, "learning_rate": 6.927995269678826e-06, "loss": 0.2091, "step": 3922 }, { "epoch": 1.3100684588412088, "grad_norm": 0.4905831176410732, "learning_rate": 6.926202075649202e-06, "loss": 0.2073, "step": 3923 }, { "epoch": 1.3104024044080815, "grad_norm": 0.4280595066960188, "learning_rate": 6.924408590644073e-06, "loss": 0.2102, "step": 3924 }, { "epoch": 1.310736349974954, "grad_norm": 0.4519193608106046, "learning_rate": 6.922614814934367e-06, "loss": 0.2056, "step": 3925 }, { "epoch": 1.3110702955418267, "grad_norm": 0.4404172492976198, "learning_rate": 6.920820748791057e-06, "loss": 0.1964, "step": 3926 }, { "epoch": 1.3114042411086992, "grad_norm": 0.42868822460623446, "learning_rate": 6.919026392485154e-06, "loss": 0.1973, "step": 3927 }, { "epoch": 1.311738186675572, "grad_norm": 0.43305908963203626, "learning_rate": 6.91723174628772e-06, "loss": 0.1928, "step": 3928 }, { "epoch": 1.3120721322424445, "grad_norm": 0.4477015665448263, "learning_rate": 6.915436810469856e-06, "loss": 0.2061, "step": 3929 }, { "epoch": 1.312406077809317, "grad_norm": 0.4061799452628013, "learning_rate": 6.913641585302708e-06, "loss": 0.1882, "step": 3930 }, { "epoch": 1.3127400233761897, "grad_norm": 0.3957076000775846, "learning_rate": 6.9118460710574665e-06, "loss": 0.197, "step": 3931 }, { "epoch": 1.3130739689430624, "grad_norm": 0.4529677675383033, "learning_rate": 6.910050268005364e-06, "loss": 0.1999, "step": 3932 }, { "epoch": 1.3134079145099349, "grad_norm": 0.45692150570119494, "learning_rate": 6.908254176417679e-06, "loss": 0.1991, "step": 3933 }, { "epoch": 1.3137418600768074, "grad_norm": 0.3908259281335957, "learning_rate": 6.906457796565732e-06, "loss": 0.1917, "step": 3934 }, { "epoch": 1.31407580564368, "grad_norm": 0.44642695936390425, "learning_rate": 6.904661128720887e-06, "loss": 0.2102, "step": 3935 }, { "epoch": 1.3144097512105528, "grad_norm": 0.4140422793919292, "learning_rate": 6.902864173154551e-06, "loss": 0.1954, "step": 3936 }, { "epoch": 1.3147436967774253, "grad_norm": 0.4175994886561666, "learning_rate": 6.9010669301381765e-06, "loss": 0.197, "step": 3937 }, { "epoch": 1.3150776423442978, "grad_norm": 0.43189349728431153, "learning_rate": 6.899269399943258e-06, "loss": 0.2212, "step": 3938 }, { "epoch": 1.3154115879111705, "grad_norm": 0.4035715995235755, "learning_rate": 6.897471582841333e-06, "loss": 0.1968, "step": 3939 }, { "epoch": 1.315745533478043, "grad_norm": 0.4664423426609285, "learning_rate": 6.895673479103983e-06, "loss": 0.2087, "step": 3940 }, { "epoch": 1.3160794790449157, "grad_norm": 0.4664300811353399, "learning_rate": 6.893875089002835e-06, "loss": 0.2107, "step": 3941 }, { "epoch": 1.3164134246117882, "grad_norm": 0.3890368808127858, "learning_rate": 6.892076412809553e-06, "loss": 0.1894, "step": 3942 }, { "epoch": 1.316747370178661, "grad_norm": 0.4327141924488333, "learning_rate": 6.890277450795851e-06, "loss": 0.1968, "step": 3943 }, { "epoch": 1.3170813157455334, "grad_norm": 0.42540004436257756, "learning_rate": 6.888478203233484e-06, "loss": 0.1837, "step": 3944 }, { "epoch": 1.3174152613124062, "grad_norm": 0.40663174769799976, "learning_rate": 6.886678670394247e-06, "loss": 0.1884, "step": 3945 }, { "epoch": 1.3177492068792787, "grad_norm": 0.4240067688960512, "learning_rate": 6.884878852549982e-06, "loss": 0.2086, "step": 3946 }, { "epoch": 1.3180831524461514, "grad_norm": 0.46538495365988414, "learning_rate": 6.883078749972573e-06, "loss": 0.1947, "step": 3947 }, { "epoch": 1.3184170980130239, "grad_norm": 0.39694236009597017, "learning_rate": 6.881278362933947e-06, "loss": 0.1926, "step": 3948 }, { "epoch": 1.3187510435798964, "grad_norm": 0.4084027414378485, "learning_rate": 6.879477691706071e-06, "loss": 0.1912, "step": 3949 }, { "epoch": 1.319084989146769, "grad_norm": 0.4483001951410644, "learning_rate": 6.877676736560961e-06, "loss": 0.2115, "step": 3950 }, { "epoch": 1.3194189347136418, "grad_norm": 0.45057352996667105, "learning_rate": 6.87587549777067e-06, "loss": 0.2042, "step": 3951 }, { "epoch": 1.3197528802805143, "grad_norm": 0.42211307538365817, "learning_rate": 6.874073975607298e-06, "loss": 0.2053, "step": 3952 }, { "epoch": 1.3200868258473868, "grad_norm": 0.41256970624902645, "learning_rate": 6.872272170342985e-06, "loss": 0.2016, "step": 3953 }, { "epoch": 1.3204207714142595, "grad_norm": 0.5930226936995437, "learning_rate": 6.870470082249917e-06, "loss": 0.2153, "step": 3954 }, { "epoch": 1.320754716981132, "grad_norm": 0.4080686250723333, "learning_rate": 6.868667711600318e-06, "loss": 0.1957, "step": 3955 }, { "epoch": 1.3210886625480047, "grad_norm": 0.4330981620220748, "learning_rate": 6.866865058666459e-06, "loss": 0.2001, "step": 3956 }, { "epoch": 1.3214226081148772, "grad_norm": 0.41330007823441606, "learning_rate": 6.86506212372065e-06, "loss": 0.2064, "step": 3957 }, { "epoch": 1.32175655368175, "grad_norm": 0.4314166600814895, "learning_rate": 6.863258907035246e-06, "loss": 0.2024, "step": 3958 }, { "epoch": 1.3220904992486224, "grad_norm": 0.45440336445114765, "learning_rate": 6.861455408882647e-06, "loss": 0.2149, "step": 3959 }, { "epoch": 1.3224244448154951, "grad_norm": 0.4156203304442072, "learning_rate": 6.85965162953529e-06, "loss": 0.1992, "step": 3960 }, { "epoch": 1.3227583903823676, "grad_norm": 0.40586715685586255, "learning_rate": 6.857847569265657e-06, "loss": 0.1912, "step": 3961 }, { "epoch": 1.3230923359492404, "grad_norm": 0.4198856216324367, "learning_rate": 6.8560432283462745e-06, "loss": 0.2046, "step": 3962 }, { "epoch": 1.3234262815161129, "grad_norm": 0.4506385780580963, "learning_rate": 6.854238607049707e-06, "loss": 0.2107, "step": 3963 }, { "epoch": 1.3237602270829854, "grad_norm": 0.4144978365544001, "learning_rate": 6.852433705648566e-06, "loss": 0.2053, "step": 3964 }, { "epoch": 1.324094172649858, "grad_norm": 0.4206897818109025, "learning_rate": 6.8506285244155e-06, "loss": 0.195, "step": 3965 }, { "epoch": 1.3244281182167308, "grad_norm": 0.4367717494660734, "learning_rate": 6.848823063623207e-06, "loss": 0.1964, "step": 3966 }, { "epoch": 1.3247620637836033, "grad_norm": 0.42784053652612, "learning_rate": 6.84701732354442e-06, "loss": 0.2128, "step": 3967 }, { "epoch": 1.3250960093504758, "grad_norm": 0.4303998202870131, "learning_rate": 6.845211304451919e-06, "loss": 0.1982, "step": 3968 }, { "epoch": 1.3254299549173485, "grad_norm": 0.41585888294104695, "learning_rate": 6.843405006618523e-06, "loss": 0.1887, "step": 3969 }, { "epoch": 1.325763900484221, "grad_norm": 0.3962726406636628, "learning_rate": 6.841598430317096e-06, "loss": 0.1932, "step": 3970 }, { "epoch": 1.3260978460510937, "grad_norm": 0.41713419116526673, "learning_rate": 6.839791575820541e-06, "loss": 0.1938, "step": 3971 }, { "epoch": 1.3264317916179662, "grad_norm": 0.3966219464596358, "learning_rate": 6.837984443401807e-06, "loss": 0.2006, "step": 3972 }, { "epoch": 1.326765737184839, "grad_norm": 0.4215570099402985, "learning_rate": 6.836177033333882e-06, "loss": 0.2002, "step": 3973 }, { "epoch": 1.3270996827517114, "grad_norm": 0.39620589259657973, "learning_rate": 6.834369345889793e-06, "loss": 0.1938, "step": 3974 }, { "epoch": 1.3274336283185841, "grad_norm": 0.43563140461015154, "learning_rate": 6.832561381342617e-06, "loss": 0.2061, "step": 3975 }, { "epoch": 1.3277675738854566, "grad_norm": 0.4041140648181498, "learning_rate": 6.830753139965467e-06, "loss": 0.1975, "step": 3976 }, { "epoch": 1.3281015194523293, "grad_norm": 0.41885286235310515, "learning_rate": 6.828944622031497e-06, "loss": 0.2032, "step": 3977 }, { "epoch": 1.3284354650192018, "grad_norm": 0.4670424648659507, "learning_rate": 6.827135827813909e-06, "loss": 0.2011, "step": 3978 }, { "epoch": 1.3287694105860743, "grad_norm": 0.45994411686683434, "learning_rate": 6.825326757585939e-06, "loss": 0.2069, "step": 3979 }, { "epoch": 1.329103356152947, "grad_norm": 0.4039507808169467, "learning_rate": 6.823517411620871e-06, "loss": 0.1889, "step": 3980 }, { "epoch": 1.3294373017198198, "grad_norm": 0.40372411165182154, "learning_rate": 6.821707790192025e-06, "loss": 0.1921, "step": 3981 }, { "epoch": 1.3297712472866923, "grad_norm": 0.44376795314218465, "learning_rate": 6.819897893572769e-06, "loss": 0.1962, "step": 3982 }, { "epoch": 1.3301051928535648, "grad_norm": 0.39179303819655353, "learning_rate": 6.818087722036507e-06, "loss": 0.1891, "step": 3983 }, { "epoch": 1.3304391384204375, "grad_norm": 0.44009288949714437, "learning_rate": 6.8162772758566875e-06, "loss": 0.2119, "step": 3984 }, { "epoch": 1.3307730839873102, "grad_norm": 0.4444703006216181, "learning_rate": 6.8144665553067975e-06, "loss": 0.2195, "step": 3985 }, { "epoch": 1.3311070295541827, "grad_norm": 0.4277024217685211, "learning_rate": 6.812655560660373e-06, "loss": 0.1933, "step": 3986 }, { "epoch": 1.3314409751210552, "grad_norm": 0.43496112243374163, "learning_rate": 6.810844292190982e-06, "loss": 0.2074, "step": 3987 }, { "epoch": 1.331774920687928, "grad_norm": 0.45009831755053864, "learning_rate": 6.809032750172236e-06, "loss": 0.2111, "step": 3988 }, { "epoch": 1.3321088662548004, "grad_norm": 0.43335127708055415, "learning_rate": 6.807220934877794e-06, "loss": 0.2008, "step": 3989 }, { "epoch": 1.3324428118216731, "grad_norm": 0.4212267467609862, "learning_rate": 6.80540884658135e-06, "loss": 0.1898, "step": 3990 }, { "epoch": 1.3327767573885456, "grad_norm": 0.4700750950129188, "learning_rate": 6.803596485556643e-06, "loss": 0.2029, "step": 3991 }, { "epoch": 1.3331107029554183, "grad_norm": 0.415428277497626, "learning_rate": 6.8017838520774494e-06, "loss": 0.1884, "step": 3992 }, { "epoch": 1.3334446485222908, "grad_norm": 0.43456686746455836, "learning_rate": 6.79997094641759e-06, "loss": 0.2062, "step": 3993 }, { "epoch": 1.3337785940891635, "grad_norm": 0.477054485573292, "learning_rate": 6.798157768850924e-06, "loss": 0.1951, "step": 3994 }, { "epoch": 1.334112539656036, "grad_norm": 0.4216294974695778, "learning_rate": 6.796344319651356e-06, "loss": 0.2044, "step": 3995 }, { "epoch": 1.3344464852229088, "grad_norm": 0.6541115831812444, "learning_rate": 6.794530599092826e-06, "loss": 0.2053, "step": 3996 }, { "epoch": 1.3347804307897813, "grad_norm": 0.4641533174704176, "learning_rate": 6.792716607449319e-06, "loss": 0.2049, "step": 3997 }, { "epoch": 1.3351143763566538, "grad_norm": 0.469313295525281, "learning_rate": 6.790902344994861e-06, "loss": 0.2025, "step": 3998 }, { "epoch": 1.3354483219235265, "grad_norm": 0.4146298442288413, "learning_rate": 6.789087812003516e-06, "loss": 0.2056, "step": 3999 }, { "epoch": 1.3357822674903992, "grad_norm": 0.4017430523249004, "learning_rate": 6.787273008749391e-06, "loss": 0.1972, "step": 4000 }, { "epoch": 1.3361162130572717, "grad_norm": 0.4038683460405391, "learning_rate": 6.785457935506634e-06, "loss": 0.1851, "step": 4001 }, { "epoch": 1.3364501586241442, "grad_norm": 0.4195513584233563, "learning_rate": 6.783642592549433e-06, "loss": 0.1981, "step": 4002 }, { "epoch": 1.336784104191017, "grad_norm": 0.47255883489969897, "learning_rate": 6.781826980152015e-06, "loss": 0.1976, "step": 4003 }, { "epoch": 1.3371180497578894, "grad_norm": 0.39785977899861913, "learning_rate": 6.780011098588654e-06, "loss": 0.1893, "step": 4004 }, { "epoch": 1.337451995324762, "grad_norm": 0.4059364052664352, "learning_rate": 6.778194948133656e-06, "loss": 0.1934, "step": 4005 }, { "epoch": 1.3377859408916346, "grad_norm": 0.38153108505480937, "learning_rate": 6.776378529061374e-06, "loss": 0.1838, "step": 4006 }, { "epoch": 1.3381198864585073, "grad_norm": 0.4191943863581517, "learning_rate": 6.774561841646199e-06, "loss": 0.1969, "step": 4007 }, { "epoch": 1.3384538320253798, "grad_norm": 0.43300160469021504, "learning_rate": 6.772744886162563e-06, "loss": 0.2016, "step": 4008 }, { "epoch": 1.3387877775922525, "grad_norm": 0.4461504516470582, "learning_rate": 6.770927662884937e-06, "loss": 0.2038, "step": 4009 }, { "epoch": 1.339121723159125, "grad_norm": 0.4468710082082139, "learning_rate": 6.769110172087838e-06, "loss": 0.2118, "step": 4010 }, { "epoch": 1.3394556687259978, "grad_norm": 0.42922253720928116, "learning_rate": 6.767292414045816e-06, "loss": 0.1836, "step": 4011 }, { "epoch": 1.3397896142928702, "grad_norm": 0.7166284001551869, "learning_rate": 6.765474389033464e-06, "loss": 0.1962, "step": 4012 }, { "epoch": 1.3401235598597427, "grad_norm": 0.5407365657709906, "learning_rate": 6.7636560973254195e-06, "loss": 0.1867, "step": 4013 }, { "epoch": 1.3404575054266155, "grad_norm": 0.4164038811937711, "learning_rate": 6.761837539196355e-06, "loss": 0.1919, "step": 4014 }, { "epoch": 1.3407914509934882, "grad_norm": 0.42826189965553885, "learning_rate": 6.760018714920985e-06, "loss": 0.1958, "step": 4015 }, { "epoch": 1.3411253965603607, "grad_norm": 0.434608355144472, "learning_rate": 6.758199624774065e-06, "loss": 0.1896, "step": 4016 }, { "epoch": 1.3414593421272332, "grad_norm": 0.4068014354296992, "learning_rate": 6.7563802690303895e-06, "loss": 0.2008, "step": 4017 }, { "epoch": 1.3417932876941059, "grad_norm": 0.4095906278662473, "learning_rate": 6.7545606479647915e-06, "loss": 0.1943, "step": 4018 }, { "epoch": 1.3421272332609784, "grad_norm": 0.4169936089160469, "learning_rate": 6.752740761852151e-06, "loss": 0.1997, "step": 4019 }, { "epoch": 1.342461178827851, "grad_norm": 0.4142264028021034, "learning_rate": 6.7509206109673794e-06, "loss": 0.1978, "step": 4020 }, { "epoch": 1.3427951243947236, "grad_norm": 0.4059367415693497, "learning_rate": 6.749100195585433e-06, "loss": 0.1994, "step": 4021 }, { "epoch": 1.3431290699615963, "grad_norm": 0.44463105396489705, "learning_rate": 6.747279515981307e-06, "loss": 0.2101, "step": 4022 }, { "epoch": 1.3434630155284688, "grad_norm": 0.44157463328609936, "learning_rate": 6.745458572430038e-06, "loss": 0.1992, "step": 4023 }, { "epoch": 1.3437969610953415, "grad_norm": 0.38829291132548344, "learning_rate": 6.743637365206698e-06, "loss": 0.199, "step": 4024 }, { "epoch": 1.344130906662214, "grad_norm": 0.4474724309977797, "learning_rate": 6.741815894586404e-06, "loss": 0.2131, "step": 4025 }, { "epoch": 1.3444648522290867, "grad_norm": 0.5159836340150592, "learning_rate": 6.7399941608443096e-06, "loss": 0.2019, "step": 4026 }, { "epoch": 1.3447987977959592, "grad_norm": 0.4128853305002455, "learning_rate": 6.7381721642556095e-06, "loss": 0.184, "step": 4027 }, { "epoch": 1.3451327433628317, "grad_norm": 0.47630481187131113, "learning_rate": 6.736349905095538e-06, "loss": 0.1956, "step": 4028 }, { "epoch": 1.3454666889297044, "grad_norm": 0.43170867694495907, "learning_rate": 6.734527383639369e-06, "loss": 0.2004, "step": 4029 }, { "epoch": 1.3458006344965772, "grad_norm": 0.44362403957101815, "learning_rate": 6.732704600162414e-06, "loss": 0.22, "step": 4030 }, { "epoch": 1.3461345800634497, "grad_norm": 0.4137671915130916, "learning_rate": 6.730881554940029e-06, "loss": 0.202, "step": 4031 }, { "epoch": 1.3464685256303222, "grad_norm": 0.43747702708097363, "learning_rate": 6.729058248247602e-06, "loss": 0.2066, "step": 4032 }, { "epoch": 1.3468024711971949, "grad_norm": 0.4331403596331942, "learning_rate": 6.727234680360569e-06, "loss": 0.2067, "step": 4033 }, { "epoch": 1.3471364167640676, "grad_norm": 0.3868967897202734, "learning_rate": 6.725410851554401e-06, "loss": 0.188, "step": 4034 }, { "epoch": 1.34747036233094, "grad_norm": 0.5018421015205833, "learning_rate": 6.7235867621046055e-06, "loss": 0.1986, "step": 4035 }, { "epoch": 1.3478043078978126, "grad_norm": 0.3939057183783324, "learning_rate": 6.721762412286738e-06, "loss": 0.1932, "step": 4036 }, { "epoch": 1.3481382534646853, "grad_norm": 0.42393788403562216, "learning_rate": 6.719937802376383e-06, "loss": 0.1912, "step": 4037 }, { "epoch": 1.3484721990315578, "grad_norm": 0.41288963328543327, "learning_rate": 6.718112932649171e-06, "loss": 0.1964, "step": 4038 }, { "epoch": 1.3488061445984305, "grad_norm": 0.4732934923796236, "learning_rate": 6.716287803380771e-06, "loss": 0.2072, "step": 4039 }, { "epoch": 1.349140090165303, "grad_norm": 0.4372044972174929, "learning_rate": 6.714462414846891e-06, "loss": 0.2041, "step": 4040 }, { "epoch": 1.3494740357321757, "grad_norm": 0.4355557384762028, "learning_rate": 6.712636767323273e-06, "loss": 0.2052, "step": 4041 }, { "epoch": 1.3498079812990482, "grad_norm": 0.4392920698842665, "learning_rate": 6.710810861085708e-06, "loss": 0.2097, "step": 4042 }, { "epoch": 1.3501419268659207, "grad_norm": 0.406740095621287, "learning_rate": 6.708984696410018e-06, "loss": 0.1893, "step": 4043 }, { "epoch": 1.3504758724327934, "grad_norm": 0.373673142911883, "learning_rate": 6.707158273572066e-06, "loss": 0.1867, "step": 4044 }, { "epoch": 1.3508098179996662, "grad_norm": 0.40677087019915287, "learning_rate": 6.7053315928477566e-06, "loss": 0.1944, "step": 4045 }, { "epoch": 1.3511437635665386, "grad_norm": 0.4062089141523285, "learning_rate": 6.703504654513031e-06, "loss": 0.1898, "step": 4046 }, { "epoch": 1.3514777091334111, "grad_norm": 0.41024823502203445, "learning_rate": 6.701677458843868e-06, "loss": 0.1926, "step": 4047 }, { "epoch": 1.3518116547002839, "grad_norm": 0.426250767229244, "learning_rate": 6.6998500061162884e-06, "loss": 0.2071, "step": 4048 }, { "epoch": 1.3521456002671566, "grad_norm": 0.4039386500414676, "learning_rate": 6.6980222966063516e-06, "loss": 0.1885, "step": 4049 }, { "epoch": 1.352479545834029, "grad_norm": 0.42655519261390634, "learning_rate": 6.6961943305901515e-06, "loss": 0.1971, "step": 4050 }, { "epoch": 1.3528134914009016, "grad_norm": 0.5067100050466419, "learning_rate": 6.694366108343827e-06, "loss": 0.2232, "step": 4051 }, { "epoch": 1.3531474369677743, "grad_norm": 0.48278513038763404, "learning_rate": 6.692537630143551e-06, "loss": 0.1997, "step": 4052 }, { "epoch": 1.3534813825346468, "grad_norm": 0.4507838778276989, "learning_rate": 6.6907088962655375e-06, "loss": 0.1962, "step": 4053 }, { "epoch": 1.3538153281015195, "grad_norm": 0.4551173960396972, "learning_rate": 6.688879906986036e-06, "loss": 0.2083, "step": 4054 }, { "epoch": 1.354149273668392, "grad_norm": 0.4569334112005147, "learning_rate": 6.687050662581341e-06, "loss": 0.2214, "step": 4055 }, { "epoch": 1.3544832192352647, "grad_norm": 0.4806277133503981, "learning_rate": 6.685221163327778e-06, "loss": 0.1979, "step": 4056 }, { "epoch": 1.3548171648021372, "grad_norm": 0.4563806677226338, "learning_rate": 6.683391409501715e-06, "loss": 0.2132, "step": 4057 }, { "epoch": 1.35515111036901, "grad_norm": 0.5042972618637712, "learning_rate": 6.6815614013795595e-06, "loss": 0.2192, "step": 4058 }, { "epoch": 1.3554850559358824, "grad_norm": 0.44352802619886555, "learning_rate": 6.679731139237753e-06, "loss": 0.2, "step": 4059 }, { "epoch": 1.3558190015027551, "grad_norm": 0.4350886660962178, "learning_rate": 6.67790062335278e-06, "loss": 0.1952, "step": 4060 }, { "epoch": 1.3561529470696276, "grad_norm": 0.40422153509801434, "learning_rate": 6.676069854001162e-06, "loss": 0.195, "step": 4061 }, { "epoch": 1.3564868926365001, "grad_norm": 0.41453093337842917, "learning_rate": 6.674238831459456e-06, "loss": 0.2004, "step": 4062 }, { "epoch": 1.3568208382033728, "grad_norm": 0.43625037543673406, "learning_rate": 6.672407556004262e-06, "loss": 0.2025, "step": 4063 }, { "epoch": 1.3571547837702456, "grad_norm": 0.4481372504164815, "learning_rate": 6.670576027912215e-06, "loss": 0.1952, "step": 4064 }, { "epoch": 1.357488729337118, "grad_norm": 0.4335407448052665, "learning_rate": 6.668744247459988e-06, "loss": 0.1961, "step": 4065 }, { "epoch": 1.3578226749039906, "grad_norm": 0.44753482938235967, "learning_rate": 6.666912214924295e-06, "loss": 0.2144, "step": 4066 }, { "epoch": 1.3581566204708633, "grad_norm": 0.3991995293082414, "learning_rate": 6.665079930581883e-06, "loss": 0.1853, "step": 4067 }, { "epoch": 1.3584905660377358, "grad_norm": 0.42486029468962033, "learning_rate": 6.663247394709542e-06, "loss": 0.1962, "step": 4068 }, { "epoch": 1.3588245116046085, "grad_norm": 0.4266054882931973, "learning_rate": 6.661414607584099e-06, "loss": 0.1975, "step": 4069 }, { "epoch": 1.359158457171481, "grad_norm": 0.432581270411452, "learning_rate": 6.659581569482415e-06, "loss": 0.2044, "step": 4070 }, { "epoch": 1.3594924027383537, "grad_norm": 0.40150080646226216, "learning_rate": 6.657748280681395e-06, "loss": 0.2039, "step": 4071 }, { "epoch": 1.3598263483052262, "grad_norm": 0.4225694559806857, "learning_rate": 6.65591474145798e-06, "loss": 0.204, "step": 4072 }, { "epoch": 1.360160293872099, "grad_norm": 0.45371512908538403, "learning_rate": 6.6540809520891425e-06, "loss": 0.1996, "step": 4073 }, { "epoch": 1.3604942394389714, "grad_norm": 0.4263029427537738, "learning_rate": 6.652246912851903e-06, "loss": 0.1985, "step": 4074 }, { "epoch": 1.3608281850058441, "grad_norm": 0.4403693400801846, "learning_rate": 6.650412624023311e-06, "loss": 0.2079, "step": 4075 }, { "epoch": 1.3611621305727166, "grad_norm": 0.4672726128852996, "learning_rate": 6.648578085880461e-06, "loss": 0.2114, "step": 4076 }, { "epoch": 1.3614960761395891, "grad_norm": 0.42076978881075816, "learning_rate": 6.64674329870048e-06, "loss": 0.1915, "step": 4077 }, { "epoch": 1.3618300217064618, "grad_norm": 0.4074124100582979, "learning_rate": 6.644908262760531e-06, "loss": 0.196, "step": 4078 }, { "epoch": 1.3621639672733346, "grad_norm": 0.43709488826547427, "learning_rate": 6.643072978337823e-06, "loss": 0.2024, "step": 4079 }, { "epoch": 1.362497912840207, "grad_norm": 0.4410887006441817, "learning_rate": 6.641237445709595e-06, "loss": 0.2083, "step": 4080 }, { "epoch": 1.3628318584070795, "grad_norm": 0.41075833370114767, "learning_rate": 6.639401665153126e-06, "loss": 0.1874, "step": 4081 }, { "epoch": 1.3631658039739523, "grad_norm": 0.4314059293595324, "learning_rate": 6.637565636945731e-06, "loss": 0.1969, "step": 4082 }, { "epoch": 1.363499749540825, "grad_norm": 0.3962845145124328, "learning_rate": 6.635729361364765e-06, "loss": 0.182, "step": 4083 }, { "epoch": 1.3638336951076975, "grad_norm": 0.45085289607224943, "learning_rate": 6.633892838687621e-06, "loss": 0.2153, "step": 4084 }, { "epoch": 1.36416764067457, "grad_norm": 0.442502443264247, "learning_rate": 6.632056069191723e-06, "loss": 0.203, "step": 4085 }, { "epoch": 1.3645015862414427, "grad_norm": 0.4331715292207473, "learning_rate": 6.6302190531545395e-06, "loss": 0.1989, "step": 4086 }, { "epoch": 1.3648355318083152, "grad_norm": 0.4526521135216819, "learning_rate": 6.628381790853573e-06, "loss": 0.2094, "step": 4087 }, { "epoch": 1.365169477375188, "grad_norm": 0.40449183201031125, "learning_rate": 6.626544282566363e-06, "loss": 0.1949, "step": 4088 }, { "epoch": 1.3655034229420604, "grad_norm": 0.40387227018258237, "learning_rate": 6.624706528570487e-06, "loss": 0.1988, "step": 4089 }, { "epoch": 1.3658373685089331, "grad_norm": 0.5711678934538252, "learning_rate": 6.6228685291435605e-06, "loss": 0.2188, "step": 4090 }, { "epoch": 1.3661713140758056, "grad_norm": 0.4421358421448025, "learning_rate": 6.621030284563232e-06, "loss": 0.1824, "step": 4091 }, { "epoch": 1.366505259642678, "grad_norm": 0.4559202259210313, "learning_rate": 6.619191795107195e-06, "loss": 0.217, "step": 4092 }, { "epoch": 1.3668392052095508, "grad_norm": 0.4244083311195419, "learning_rate": 6.617353061053171e-06, "loss": 0.2009, "step": 4093 }, { "epoch": 1.3671731507764235, "grad_norm": 0.4173771853428039, "learning_rate": 6.615514082678922e-06, "loss": 0.1872, "step": 4094 }, { "epoch": 1.367507096343296, "grad_norm": 0.4377390377861039, "learning_rate": 6.613674860262249e-06, "loss": 0.2141, "step": 4095 }, { "epoch": 1.3678410419101685, "grad_norm": 0.406886823763542, "learning_rate": 6.61183539408099e-06, "loss": 0.1906, "step": 4096 }, { "epoch": 1.3681749874770412, "grad_norm": 0.42851766250941675, "learning_rate": 6.609995684413013e-06, "loss": 0.1965, "step": 4097 }, { "epoch": 1.368508933043914, "grad_norm": 0.622902793386304, "learning_rate": 6.608155731536233e-06, "loss": 0.1968, "step": 4098 }, { "epoch": 1.3688428786107865, "grad_norm": 0.3853625281666951, "learning_rate": 6.606315535728594e-06, "loss": 0.1816, "step": 4099 }, { "epoch": 1.369176824177659, "grad_norm": 0.4666928758678988, "learning_rate": 6.604475097268079e-06, "loss": 0.202, "step": 4100 }, { "epoch": 1.3695107697445317, "grad_norm": 0.3771661413941458, "learning_rate": 6.602634416432708e-06, "loss": 0.1874, "step": 4101 }, { "epoch": 1.3698447153114042, "grad_norm": 0.4610010980819958, "learning_rate": 6.600793493500539e-06, "loss": 0.2075, "step": 4102 }, { "epoch": 1.3701786608782769, "grad_norm": 0.41246814662170656, "learning_rate": 6.5989523287496645e-06, "loss": 0.1907, "step": 4103 }, { "epoch": 1.3705126064451494, "grad_norm": 0.44901774277549816, "learning_rate": 6.597110922458214e-06, "loss": 0.1955, "step": 4104 }, { "epoch": 1.370846552012022, "grad_norm": 0.3989788461433798, "learning_rate": 6.595269274904351e-06, "loss": 0.1971, "step": 4105 }, { "epoch": 1.3711804975788946, "grad_norm": 0.4601759741943717, "learning_rate": 6.593427386366282e-06, "loss": 0.2044, "step": 4106 }, { "epoch": 1.3715144431457673, "grad_norm": 0.4075278252757786, "learning_rate": 6.591585257122244e-06, "loss": 0.1929, "step": 4107 }, { "epoch": 1.3718483887126398, "grad_norm": 0.45268827886168284, "learning_rate": 6.589742887450512e-06, "loss": 0.2012, "step": 4108 }, { "epoch": 1.3721823342795125, "grad_norm": 0.39829538551301213, "learning_rate": 6.5879002776294e-06, "loss": 0.1934, "step": 4109 }, { "epoch": 1.372516279846385, "grad_norm": 0.4075145132486534, "learning_rate": 6.586057427937252e-06, "loss": 0.2013, "step": 4110 }, { "epoch": 1.3728502254132575, "grad_norm": 0.413224264908335, "learning_rate": 6.584214338652455e-06, "loss": 0.1885, "step": 4111 }, { "epoch": 1.3731841709801302, "grad_norm": 0.42233907535924026, "learning_rate": 6.582371010053429e-06, "loss": 0.2007, "step": 4112 }, { "epoch": 1.373518116547003, "grad_norm": 0.4689784474170459, "learning_rate": 6.58052744241863e-06, "loss": 0.2026, "step": 4113 }, { "epoch": 1.3738520621138754, "grad_norm": 0.42477561232876815, "learning_rate": 6.578683636026551e-06, "loss": 0.203, "step": 4114 }, { "epoch": 1.374186007680748, "grad_norm": 0.4309943871533634, "learning_rate": 6.576839591155719e-06, "loss": 0.1938, "step": 4115 }, { "epoch": 1.3745199532476207, "grad_norm": 0.5615884354542333, "learning_rate": 6.574995308084702e-06, "loss": 0.1806, "step": 4116 }, { "epoch": 1.3748538988144932, "grad_norm": 0.3813788802372199, "learning_rate": 6.573150787092097e-06, "loss": 0.1733, "step": 4117 }, { "epoch": 1.3751878443813659, "grad_norm": 0.42148123715265845, "learning_rate": 6.5713060284565435e-06, "loss": 0.1896, "step": 4118 }, { "epoch": 1.3755217899482384, "grad_norm": 0.431883451577582, "learning_rate": 6.569461032456713e-06, "loss": 0.2186, "step": 4119 }, { "epoch": 1.375855735515111, "grad_norm": 0.4066532875757682, "learning_rate": 6.567615799371313e-06, "loss": 0.1902, "step": 4120 }, { "epoch": 1.3761896810819836, "grad_norm": 0.4370833319403715, "learning_rate": 6.565770329479089e-06, "loss": 0.2035, "step": 4121 }, { "epoch": 1.3765236266488563, "grad_norm": 0.39098258449611045, "learning_rate": 6.5639246230588205e-06, "loss": 0.1918, "step": 4122 }, { "epoch": 1.3768575722157288, "grad_norm": 0.39463295296495227, "learning_rate": 6.562078680389323e-06, "loss": 0.1874, "step": 4123 }, { "epoch": 1.3771915177826015, "grad_norm": 0.7838749092433236, "learning_rate": 6.560232501749446e-06, "loss": 0.2093, "step": 4124 }, { "epoch": 1.377525463349474, "grad_norm": 0.45124048458861776, "learning_rate": 6.558386087418082e-06, "loss": 0.2058, "step": 4125 }, { "epoch": 1.3778594089163465, "grad_norm": 0.44508050673501187, "learning_rate": 6.556539437674147e-06, "loss": 0.2119, "step": 4126 }, { "epoch": 1.3781933544832192, "grad_norm": 0.4532245305883286, "learning_rate": 6.554692552796604e-06, "loss": 0.1988, "step": 4127 }, { "epoch": 1.378527300050092, "grad_norm": 0.4366039970271216, "learning_rate": 6.552845433064445e-06, "loss": 0.2016, "step": 4128 }, { "epoch": 1.3788612456169644, "grad_norm": 0.43991279957977425, "learning_rate": 6.550998078756698e-06, "loss": 0.2125, "step": 4129 }, { "epoch": 1.379195191183837, "grad_norm": 0.45075051528764204, "learning_rate": 6.549150490152429e-06, "loss": 0.2124, "step": 4130 }, { "epoch": 1.3795291367507097, "grad_norm": 0.4697620488513748, "learning_rate": 6.5473026675307394e-06, "loss": 0.214, "step": 4131 }, { "epoch": 1.3798630823175824, "grad_norm": 0.4650812525046603, "learning_rate": 6.545454611170762e-06, "loss": 0.2046, "step": 4132 }, { "epoch": 1.3801970278844549, "grad_norm": 0.4473205624182132, "learning_rate": 6.543606321351668e-06, "loss": 0.1995, "step": 4133 }, { "epoch": 1.3805309734513274, "grad_norm": 0.43793988430119773, "learning_rate": 6.541757798352664e-06, "loss": 0.1964, "step": 4134 }, { "epoch": 1.3808649190182, "grad_norm": 0.46122603750033925, "learning_rate": 6.539909042452991e-06, "loss": 0.2021, "step": 4135 }, { "epoch": 1.3811988645850726, "grad_norm": 0.4431099270741144, "learning_rate": 6.538060053931925e-06, "loss": 0.2151, "step": 4136 }, { "epoch": 1.3815328101519453, "grad_norm": 0.44155698593464704, "learning_rate": 6.536210833068779e-06, "loss": 0.1947, "step": 4137 }, { "epoch": 1.3818667557188178, "grad_norm": 0.4109387527355864, "learning_rate": 6.534361380142896e-06, "loss": 0.1998, "step": 4138 }, { "epoch": 1.3822007012856905, "grad_norm": 0.4603948218667672, "learning_rate": 6.532511695433662e-06, "loss": 0.1966, "step": 4139 }, { "epoch": 1.382534646852563, "grad_norm": 0.44282145799352707, "learning_rate": 6.5306617792204915e-06, "loss": 0.1896, "step": 4140 }, { "epoch": 1.3828685924194355, "grad_norm": 0.44104795387567414, "learning_rate": 6.528811631782835e-06, "loss": 0.2068, "step": 4141 }, { "epoch": 1.3832025379863082, "grad_norm": 0.48858635612357, "learning_rate": 6.526961253400181e-06, "loss": 0.2144, "step": 4142 }, { "epoch": 1.383536483553181, "grad_norm": 0.44312917050300416, "learning_rate": 6.525110644352052e-06, "loss": 0.2069, "step": 4143 }, { "epoch": 1.3838704291200534, "grad_norm": 0.3981097773309502, "learning_rate": 6.523259804918001e-06, "loss": 0.1897, "step": 4144 }, { "epoch": 1.384204374686926, "grad_norm": 0.4204274275796497, "learning_rate": 6.52140873537762e-06, "loss": 0.1838, "step": 4145 }, { "epoch": 1.3845383202537986, "grad_norm": 0.46326487504021546, "learning_rate": 6.519557436010535e-06, "loss": 0.2145, "step": 4146 }, { "epoch": 1.3848722658206714, "grad_norm": 0.4722683700335305, "learning_rate": 6.51770590709641e-06, "loss": 0.2081, "step": 4147 }, { "epoch": 1.3852062113875439, "grad_norm": 0.4311480901777335, "learning_rate": 6.515854148914935e-06, "loss": 0.2024, "step": 4148 }, { "epoch": 1.3855401569544163, "grad_norm": 0.46985472062528916, "learning_rate": 6.514002161745844e-06, "loss": 0.2183, "step": 4149 }, { "epoch": 1.385874102521289, "grad_norm": 0.4803730015949404, "learning_rate": 6.512149945868898e-06, "loss": 0.2193, "step": 4150 }, { "epoch": 1.3862080480881616, "grad_norm": 0.4597435238205787, "learning_rate": 6.510297501563899e-06, "loss": 0.2072, "step": 4151 }, { "epoch": 1.3865419936550343, "grad_norm": 0.43218955473225323, "learning_rate": 6.5084448291106785e-06, "loss": 0.2076, "step": 4152 }, { "epoch": 1.3868759392219068, "grad_norm": 0.4150408431997899, "learning_rate": 6.506591928789105e-06, "loss": 0.1936, "step": 4153 }, { "epoch": 1.3872098847887795, "grad_norm": 0.4312232452910037, "learning_rate": 6.504738800879081e-06, "loss": 0.1933, "step": 4154 }, { "epoch": 1.387543830355652, "grad_norm": 0.4064866596866989, "learning_rate": 6.502885445660544e-06, "loss": 0.184, "step": 4155 }, { "epoch": 1.3878777759225247, "grad_norm": 0.3829518324598277, "learning_rate": 6.501031863413464e-06, "loss": 0.1746, "step": 4156 }, { "epoch": 1.3882117214893972, "grad_norm": 0.45492095092003937, "learning_rate": 6.499178054417847e-06, "loss": 0.2152, "step": 4157 }, { "epoch": 1.38854566705627, "grad_norm": 0.4457187903715593, "learning_rate": 6.497324018953732e-06, "loss": 0.2061, "step": 4158 }, { "epoch": 1.3888796126231424, "grad_norm": 0.4466194927024366, "learning_rate": 6.495469757301196e-06, "loss": 0.1999, "step": 4159 }, { "epoch": 1.389213558190015, "grad_norm": 0.4355326082302892, "learning_rate": 6.493615269740343e-06, "loss": 0.2141, "step": 4160 }, { "epoch": 1.3895475037568876, "grad_norm": 0.3940978415054905, "learning_rate": 6.491760556551315e-06, "loss": 0.188, "step": 4161 }, { "epoch": 1.3898814493237603, "grad_norm": 0.4292342258128804, "learning_rate": 6.489905618014293e-06, "loss": 0.2138, "step": 4162 }, { "epoch": 1.3902153948906328, "grad_norm": 0.43189024487894684, "learning_rate": 6.488050454409483e-06, "loss": 0.1972, "step": 4163 }, { "epoch": 1.3905493404575053, "grad_norm": 0.4053167522949468, "learning_rate": 6.486195066017129e-06, "loss": 0.1958, "step": 4164 }, { "epoch": 1.390883286024378, "grad_norm": 0.45743707193697686, "learning_rate": 6.484339453117514e-06, "loss": 0.2064, "step": 4165 }, { "epoch": 1.3912172315912505, "grad_norm": 0.3727514433378026, "learning_rate": 6.482483615990945e-06, "loss": 0.1789, "step": 4166 }, { "epoch": 1.3915511771581233, "grad_norm": 0.38894137550668284, "learning_rate": 6.480627554917771e-06, "loss": 0.1897, "step": 4167 }, { "epoch": 1.3918851227249958, "grad_norm": 0.3949005314394, "learning_rate": 6.47877127017837e-06, "loss": 0.184, "step": 4168 }, { "epoch": 1.3922190682918685, "grad_norm": 0.5483421957144745, "learning_rate": 6.476914762053158e-06, "loss": 0.2248, "step": 4169 }, { "epoch": 1.392553013858741, "grad_norm": 0.4349485020907497, "learning_rate": 6.47505803082258e-06, "loss": 0.2087, "step": 4170 }, { "epoch": 1.3928869594256137, "grad_norm": 0.494753044097071, "learning_rate": 6.473201076767119e-06, "loss": 0.2029, "step": 4171 }, { "epoch": 1.3932209049924862, "grad_norm": 0.402889777781755, "learning_rate": 6.471343900167289e-06, "loss": 0.1958, "step": 4172 }, { "epoch": 1.393554850559359, "grad_norm": 0.42334287046577884, "learning_rate": 6.469486501303639e-06, "loss": 0.2183, "step": 4173 }, { "epoch": 1.3938887961262314, "grad_norm": 0.3947978066614757, "learning_rate": 6.467628880456749e-06, "loss": 0.1865, "step": 4174 }, { "epoch": 1.394222741693104, "grad_norm": 0.47629578193143574, "learning_rate": 6.465771037907236e-06, "loss": 0.2095, "step": 4175 }, { "epoch": 1.3945566872599766, "grad_norm": 0.3880989484453497, "learning_rate": 6.463912973935749e-06, "loss": 0.1855, "step": 4176 }, { "epoch": 1.3948906328268493, "grad_norm": 0.47037211387441913, "learning_rate": 6.462054688822971e-06, "loss": 0.2078, "step": 4177 }, { "epoch": 1.3952245783937218, "grad_norm": 0.382314312114383, "learning_rate": 6.460196182849616e-06, "loss": 0.1982, "step": 4178 }, { "epoch": 1.3955585239605943, "grad_norm": 0.41109789004350694, "learning_rate": 6.458337456296434e-06, "loss": 0.2024, "step": 4179 }, { "epoch": 1.395892469527467, "grad_norm": 0.4899142261094805, "learning_rate": 6.456478509444209e-06, "loss": 0.1993, "step": 4180 }, { "epoch": 1.3962264150943398, "grad_norm": 0.42665421483230975, "learning_rate": 6.454619342573756e-06, "loss": 0.2041, "step": 4181 }, { "epoch": 1.3965603606612123, "grad_norm": 0.41216556645235136, "learning_rate": 6.452759955965922e-06, "loss": 0.1884, "step": 4182 }, { "epoch": 1.3968943062280847, "grad_norm": 0.4322046656673873, "learning_rate": 6.450900349901592e-06, "loss": 0.2001, "step": 4183 }, { "epoch": 1.3972282517949575, "grad_norm": 0.4285030575773755, "learning_rate": 6.449040524661681e-06, "loss": 0.1906, "step": 4184 }, { "epoch": 1.39756219736183, "grad_norm": 0.43465596091020026, "learning_rate": 6.447180480527135e-06, "loss": 0.2021, "step": 4185 }, { "epoch": 1.3978961429287027, "grad_norm": 0.41971935181118564, "learning_rate": 6.445320217778939e-06, "loss": 0.1957, "step": 4186 }, { "epoch": 1.3982300884955752, "grad_norm": 0.4583440890267203, "learning_rate": 6.443459736698106e-06, "loss": 0.2051, "step": 4187 }, { "epoch": 1.398564034062448, "grad_norm": 0.4406778185384973, "learning_rate": 6.4415990375656826e-06, "loss": 0.2003, "step": 4188 }, { "epoch": 1.3988979796293204, "grad_norm": 0.5268157377750877, "learning_rate": 6.4397381206627505e-06, "loss": 0.2256, "step": 4189 }, { "epoch": 1.3992319251961929, "grad_norm": 0.40175298789569275, "learning_rate": 6.437876986270424e-06, "loss": 0.1926, "step": 4190 }, { "epoch": 1.3995658707630656, "grad_norm": 0.43113117207329726, "learning_rate": 6.436015634669848e-06, "loss": 0.2101, "step": 4191 }, { "epoch": 1.3998998163299383, "grad_norm": 0.40792889401372284, "learning_rate": 6.434154066142201e-06, "loss": 0.1982, "step": 4192 }, { "epoch": 1.4002337618968108, "grad_norm": 0.3980086772220684, "learning_rate": 6.432292280968695e-06, "loss": 0.2006, "step": 4193 }, { "epoch": 1.4005677074636833, "grad_norm": 0.45321556751445036, "learning_rate": 6.430430279430577e-06, "loss": 0.2076, "step": 4194 }, { "epoch": 1.400901653030556, "grad_norm": 0.3962084842360654, "learning_rate": 6.428568061809122e-06, "loss": 0.2022, "step": 4195 }, { "epoch": 1.4012355985974287, "grad_norm": 0.38721195579949064, "learning_rate": 6.426705628385641e-06, "loss": 0.1928, "step": 4196 }, { "epoch": 1.4015695441643012, "grad_norm": 0.43865314302481156, "learning_rate": 6.4248429794414745e-06, "loss": 0.1984, "step": 4197 }, { "epoch": 1.4019034897311737, "grad_norm": 0.4245277931246651, "learning_rate": 6.422980115258e-06, "loss": 0.2047, "step": 4198 }, { "epoch": 1.4022374352980465, "grad_norm": 0.40245781599580605, "learning_rate": 6.421117036116624e-06, "loss": 0.1939, "step": 4199 }, { "epoch": 1.402571380864919, "grad_norm": 0.42437101178064845, "learning_rate": 6.4192537422987864e-06, "loss": 0.1898, "step": 4200 }, { "epoch": 1.4029053264317917, "grad_norm": 0.41477729386611645, "learning_rate": 6.417390234085961e-06, "loss": 0.2016, "step": 4201 }, { "epoch": 1.4032392719986642, "grad_norm": 0.39992406322586277, "learning_rate": 6.415526511759649e-06, "loss": 0.1922, "step": 4202 }, { "epoch": 1.4035732175655369, "grad_norm": 0.45466666242275616, "learning_rate": 6.413662575601391e-06, "loss": 0.2148, "step": 4203 }, { "epoch": 1.4039071631324094, "grad_norm": 0.3778303865454364, "learning_rate": 6.4117984258927565e-06, "loss": 0.1831, "step": 4204 }, { "epoch": 1.404241108699282, "grad_norm": 0.4005745223294027, "learning_rate": 6.409934062915345e-06, "loss": 0.1847, "step": 4205 }, { "epoch": 1.4045750542661546, "grad_norm": 0.4584571575972361, "learning_rate": 6.408069486950793e-06, "loss": 0.2045, "step": 4206 }, { "epoch": 1.4049089998330273, "grad_norm": 0.41082051621126475, "learning_rate": 6.406204698280766e-06, "loss": 0.1912, "step": 4207 }, { "epoch": 1.4052429453998998, "grad_norm": 0.3901252288030012, "learning_rate": 6.40433969718696e-06, "loss": 0.1807, "step": 4208 }, { "epoch": 1.4055768909667723, "grad_norm": 0.4501067185058659, "learning_rate": 6.402474483951109e-06, "loss": 0.2018, "step": 4209 }, { "epoch": 1.405910836533645, "grad_norm": 0.38450772473450306, "learning_rate": 6.400609058854973e-06, "loss": 0.1823, "step": 4210 }, { "epoch": 1.4062447821005177, "grad_norm": 0.40515453066428003, "learning_rate": 6.398743422180346e-06, "loss": 0.2034, "step": 4211 }, { "epoch": 1.4065787276673902, "grad_norm": 0.4239306652790811, "learning_rate": 6.396877574209057e-06, "loss": 0.2087, "step": 4212 }, { "epoch": 1.4069126732342627, "grad_norm": 0.4566772627357021, "learning_rate": 6.395011515222962e-06, "loss": 0.1948, "step": 4213 }, { "epoch": 1.4072466188011354, "grad_norm": 0.4000662579941561, "learning_rate": 6.393145245503951e-06, "loss": 0.1955, "step": 4214 }, { "epoch": 1.407580564368008, "grad_norm": 0.4201143343811621, "learning_rate": 6.391278765333948e-06, "loss": 0.2011, "step": 4215 }, { "epoch": 1.4079145099348807, "grad_norm": 0.39637080003077907, "learning_rate": 6.389412074994906e-06, "loss": 0.1937, "step": 4216 }, { "epoch": 1.4082484555017531, "grad_norm": 0.4258908660767943, "learning_rate": 6.387545174768809e-06, "loss": 0.201, "step": 4217 }, { "epoch": 1.4085824010686259, "grad_norm": 0.46190972235436634, "learning_rate": 6.385678064937677e-06, "loss": 0.219, "step": 4218 }, { "epoch": 1.4089163466354984, "grad_norm": 0.40798704121606794, "learning_rate": 6.383810745783556e-06, "loss": 0.2036, "step": 4219 }, { "epoch": 1.409250292202371, "grad_norm": 0.3900248306224213, "learning_rate": 6.38194321758853e-06, "loss": 0.182, "step": 4220 }, { "epoch": 1.4095842377692436, "grad_norm": 0.41143107768977444, "learning_rate": 6.3800754806347065e-06, "loss": 0.2046, "step": 4221 }, { "epoch": 1.4099181833361163, "grad_norm": 0.4227528210432267, "learning_rate": 6.378207535204234e-06, "loss": 0.2068, "step": 4222 }, { "epoch": 1.4102521289029888, "grad_norm": 0.4496275000832063, "learning_rate": 6.376339381579285e-06, "loss": 0.188, "step": 4223 }, { "epoch": 1.4105860744698613, "grad_norm": 0.3920155752449724, "learning_rate": 6.374471020042067e-06, "loss": 0.1915, "step": 4224 }, { "epoch": 1.410920020036734, "grad_norm": 0.4893281271897962, "learning_rate": 6.372602450874816e-06, "loss": 0.2051, "step": 4225 }, { "epoch": 1.4112539656036067, "grad_norm": 0.3895814494862588, "learning_rate": 6.370733674359803e-06, "loss": 0.2008, "step": 4226 }, { "epoch": 1.4115879111704792, "grad_norm": 0.4464740149177025, "learning_rate": 6.36886469077933e-06, "loss": 0.1996, "step": 4227 }, { "epoch": 1.4119218567373517, "grad_norm": 0.41982386936019905, "learning_rate": 6.366995500415727e-06, "loss": 0.1789, "step": 4228 }, { "epoch": 1.4122558023042244, "grad_norm": 0.48692131344551154, "learning_rate": 6.365126103551358e-06, "loss": 0.1914, "step": 4229 }, { "epoch": 1.4125897478710971, "grad_norm": 0.4188619004865136, "learning_rate": 6.363256500468617e-06, "loss": 0.1926, "step": 4230 }, { "epoch": 1.4129236934379696, "grad_norm": 0.38428575637852025, "learning_rate": 6.3613866914499285e-06, "loss": 0.1913, "step": 4231 }, { "epoch": 1.4132576390048421, "grad_norm": 0.4192523891713591, "learning_rate": 6.359516676777751e-06, "loss": 0.2058, "step": 4232 }, { "epoch": 1.4135915845717149, "grad_norm": 0.4161796743739333, "learning_rate": 6.357646456734574e-06, "loss": 0.1986, "step": 4233 }, { "epoch": 1.4139255301385873, "grad_norm": 0.412845390363555, "learning_rate": 6.3557760316029115e-06, "loss": 0.192, "step": 4234 }, { "epoch": 1.41425947570546, "grad_norm": 0.3764465690310885, "learning_rate": 6.353905401665317e-06, "loss": 0.1832, "step": 4235 }, { "epoch": 1.4145934212723326, "grad_norm": 0.3984816244207758, "learning_rate": 6.35203456720437e-06, "loss": 0.1959, "step": 4236 }, { "epoch": 1.4149273668392053, "grad_norm": 0.4660411346747864, "learning_rate": 6.35016352850268e-06, "loss": 0.208, "step": 4237 }, { "epoch": 1.4152613124060778, "grad_norm": 0.4208257324589308, "learning_rate": 6.3482922858428915e-06, "loss": 0.1906, "step": 4238 }, { "epoch": 1.4155952579729503, "grad_norm": 0.4093879464060409, "learning_rate": 6.34642083950768e-06, "loss": 0.1919, "step": 4239 }, { "epoch": 1.415929203539823, "grad_norm": 0.4169795445243217, "learning_rate": 6.344549189779745e-06, "loss": 0.1934, "step": 4240 }, { "epoch": 1.4162631491066957, "grad_norm": 0.3999232718854513, "learning_rate": 6.342677336941825e-06, "loss": 0.201, "step": 4241 }, { "epoch": 1.4165970946735682, "grad_norm": 0.43188532288830084, "learning_rate": 6.340805281276683e-06, "loss": 0.1915, "step": 4242 }, { "epoch": 1.4169310402404407, "grad_norm": 0.4212897378007842, "learning_rate": 6.338933023067114e-06, "loss": 0.2031, "step": 4243 }, { "epoch": 1.4172649858073134, "grad_norm": 0.39104495072727397, "learning_rate": 6.337060562595949e-06, "loss": 0.1838, "step": 4244 }, { "epoch": 1.4175989313741861, "grad_norm": 0.4279064144901453, "learning_rate": 6.3351879001460425e-06, "loss": 0.203, "step": 4245 }, { "epoch": 1.4179328769410586, "grad_norm": 0.44299570054290127, "learning_rate": 6.333315036000281e-06, "loss": 0.1909, "step": 4246 }, { "epoch": 1.4182668225079311, "grad_norm": 0.4518229529900438, "learning_rate": 6.331441970441585e-06, "loss": 0.2016, "step": 4247 }, { "epoch": 1.4186007680748038, "grad_norm": 0.437754191582659, "learning_rate": 6.329568703752902e-06, "loss": 0.2023, "step": 4248 }, { "epoch": 1.4189347136416763, "grad_norm": 0.39214812360430923, "learning_rate": 6.32769523621721e-06, "loss": 0.1946, "step": 4249 }, { "epoch": 1.419268659208549, "grad_norm": 0.40032628961599576, "learning_rate": 6.3258215681175215e-06, "loss": 0.1893, "step": 4250 }, { "epoch": 1.4196026047754216, "grad_norm": 0.4135834268436745, "learning_rate": 6.323947699736873e-06, "loss": 0.1914, "step": 4251 }, { "epoch": 1.4199365503422943, "grad_norm": 0.4408452075370049, "learning_rate": 6.3220736313583345e-06, "loss": 0.2073, "step": 4252 }, { "epoch": 1.4202704959091668, "grad_norm": 0.46529501920804817, "learning_rate": 6.320199363265008e-06, "loss": 0.1994, "step": 4253 }, { "epoch": 1.4206044414760395, "grad_norm": 0.4180415628920196, "learning_rate": 6.318324895740023e-06, "loss": 0.1979, "step": 4254 }, { "epoch": 1.420938387042912, "grad_norm": 0.40285815975229106, "learning_rate": 6.31645022906654e-06, "loss": 0.1972, "step": 4255 }, { "epoch": 1.4212723326097847, "grad_norm": 0.4245223489154361, "learning_rate": 6.314575363527748e-06, "loss": 0.1955, "step": 4256 }, { "epoch": 1.4216062781766572, "grad_norm": 0.4107284657056354, "learning_rate": 6.312700299406871e-06, "loss": 0.1956, "step": 4257 }, { "epoch": 1.4219402237435297, "grad_norm": 0.4123475914189533, "learning_rate": 6.310825036987154e-06, "loss": 0.1987, "step": 4258 }, { "epoch": 1.4222741693104024, "grad_norm": 0.4113685742620576, "learning_rate": 6.308949576551884e-06, "loss": 0.189, "step": 4259 }, { "epoch": 1.4226081148772751, "grad_norm": 0.4113493190674343, "learning_rate": 6.3070739183843655e-06, "loss": 0.1958, "step": 4260 }, { "epoch": 1.4229420604441476, "grad_norm": 0.41318754392232565, "learning_rate": 6.305198062767942e-06, "loss": 0.1921, "step": 4261 }, { "epoch": 1.4232760060110201, "grad_norm": 0.42444570405075827, "learning_rate": 6.303322009985984e-06, "loss": 0.2002, "step": 4262 }, { "epoch": 1.4236099515778928, "grad_norm": 0.4036884323591814, "learning_rate": 6.301445760321889e-06, "loss": 0.1921, "step": 4263 }, { "epoch": 1.4239438971447653, "grad_norm": 0.3868929082230903, "learning_rate": 6.299569314059088e-06, "loss": 0.1952, "step": 4264 }, { "epoch": 1.424277842711638, "grad_norm": 0.3934660880793526, "learning_rate": 6.297692671481042e-06, "loss": 0.1961, "step": 4265 }, { "epoch": 1.4246117882785105, "grad_norm": 0.5476517438338885, "learning_rate": 6.295815832871235e-06, "loss": 0.178, "step": 4266 }, { "epoch": 1.4249457338453833, "grad_norm": 0.42403647877836415, "learning_rate": 6.2939387985131905e-06, "loss": 0.1926, "step": 4267 }, { "epoch": 1.4252796794122558, "grad_norm": 0.4603384594375843, "learning_rate": 6.292061568690455e-06, "loss": 0.1962, "step": 4268 }, { "epoch": 1.4256136249791285, "grad_norm": 0.42212567893770986, "learning_rate": 6.290184143686606e-06, "loss": 0.1825, "step": 4269 }, { "epoch": 1.425947570546001, "grad_norm": 0.42720372519788125, "learning_rate": 6.288306523785252e-06, "loss": 0.2023, "step": 4270 }, { "epoch": 1.4262815161128737, "grad_norm": 0.3850216851382417, "learning_rate": 6.286428709270026e-06, "loss": 0.19, "step": 4271 }, { "epoch": 1.4266154616797462, "grad_norm": 0.42293406684487284, "learning_rate": 6.284550700424597e-06, "loss": 0.1934, "step": 4272 }, { "epoch": 1.4269494072466187, "grad_norm": 0.4185602665867109, "learning_rate": 6.282672497532659e-06, "loss": 0.1862, "step": 4273 }, { "epoch": 1.4272833528134914, "grad_norm": 0.42353635443489346, "learning_rate": 6.280794100877938e-06, "loss": 0.1976, "step": 4274 }, { "epoch": 1.427617298380364, "grad_norm": 0.4193474465872973, "learning_rate": 6.278915510744187e-06, "loss": 0.1798, "step": 4275 }, { "epoch": 1.4279512439472366, "grad_norm": 0.47731951370814735, "learning_rate": 6.277036727415189e-06, "loss": 0.2027, "step": 4276 }, { "epoch": 1.428285189514109, "grad_norm": 0.41253445818583245, "learning_rate": 6.2751577511747575e-06, "loss": 0.1864, "step": 4277 }, { "epoch": 1.4286191350809818, "grad_norm": 0.41072543870509975, "learning_rate": 6.273278582306732e-06, "loss": 0.2005, "step": 4278 }, { "epoch": 1.4289530806478545, "grad_norm": 0.4464246228500059, "learning_rate": 6.271399221094986e-06, "loss": 0.1992, "step": 4279 }, { "epoch": 1.429287026214727, "grad_norm": 0.4094540550192012, "learning_rate": 6.269519667823416e-06, "loss": 0.1973, "step": 4280 }, { "epoch": 1.4296209717815995, "grad_norm": 0.41233321137674056, "learning_rate": 6.267639922775952e-06, "loss": 0.1979, "step": 4281 }, { "epoch": 1.4299549173484722, "grad_norm": 0.424743809144049, "learning_rate": 6.265759986236552e-06, "loss": 0.2035, "step": 4282 }, { "epoch": 1.4302888629153447, "grad_norm": 0.4005345858496865, "learning_rate": 6.263879858489204e-06, "loss": 0.1913, "step": 4283 }, { "epoch": 1.4306228084822175, "grad_norm": 0.4053273447527923, "learning_rate": 6.261999539817919e-06, "loss": 0.1943, "step": 4284 }, { "epoch": 1.43095675404909, "grad_norm": 0.4185492417076911, "learning_rate": 6.260119030506746e-06, "loss": 0.1975, "step": 4285 }, { "epoch": 1.4312906996159627, "grad_norm": 0.3913801060486488, "learning_rate": 6.258238330839754e-06, "loss": 0.1798, "step": 4286 }, { "epoch": 1.4316246451828352, "grad_norm": 0.426769833384345, "learning_rate": 6.2563574411010485e-06, "loss": 0.1964, "step": 4287 }, { "epoch": 1.4319585907497077, "grad_norm": 0.41614648545349253, "learning_rate": 6.254476361574757e-06, "loss": 0.189, "step": 4288 }, { "epoch": 1.4322925363165804, "grad_norm": 0.4241398703694006, "learning_rate": 6.252595092545042e-06, "loss": 0.1875, "step": 4289 }, { "epoch": 1.432626481883453, "grad_norm": 0.4299458304622268, "learning_rate": 6.250713634296087e-06, "loss": 0.2018, "step": 4290 }, { "epoch": 1.4329604274503256, "grad_norm": 0.4229363795139313, "learning_rate": 6.248831987112113e-06, "loss": 0.206, "step": 4291 }, { "epoch": 1.433294373017198, "grad_norm": 0.401583294371539, "learning_rate": 6.246950151277362e-06, "loss": 0.1883, "step": 4292 }, { "epoch": 1.4336283185840708, "grad_norm": 0.4519792653966738, "learning_rate": 6.245068127076109e-06, "loss": 0.2018, "step": 4293 }, { "epoch": 1.4339622641509435, "grad_norm": 0.40227774137045846, "learning_rate": 6.243185914792655e-06, "loss": 0.1819, "step": 4294 }, { "epoch": 1.434296209717816, "grad_norm": 0.559657939335022, "learning_rate": 6.2413035147113295e-06, "loss": 0.1913, "step": 4295 }, { "epoch": 1.4346301552846885, "grad_norm": 0.3972251507876515, "learning_rate": 6.239420927116493e-06, "loss": 0.1886, "step": 4296 }, { "epoch": 1.4349641008515612, "grad_norm": 0.41776370884070535, "learning_rate": 6.2375381522925325e-06, "loss": 0.1972, "step": 4297 }, { "epoch": 1.4352980464184337, "grad_norm": 0.41015662981383944, "learning_rate": 6.235655190523862e-06, "loss": 0.1978, "step": 4298 }, { "epoch": 1.4356319919853064, "grad_norm": 0.4229324755634245, "learning_rate": 6.233772042094924e-06, "loss": 0.1968, "step": 4299 }, { "epoch": 1.435965937552179, "grad_norm": 0.4710852821204468, "learning_rate": 6.231888707290194e-06, "loss": 0.2016, "step": 4300 }, { "epoch": 1.4362998831190517, "grad_norm": 0.5534094086271172, "learning_rate": 6.230005186394169e-06, "loss": 0.2034, "step": 4301 }, { "epoch": 1.4366338286859242, "grad_norm": 0.40362990371947977, "learning_rate": 6.228121479691377e-06, "loss": 0.1976, "step": 4302 }, { "epoch": 1.4369677742527969, "grad_norm": 0.46882248799019716, "learning_rate": 6.226237587466375e-06, "loss": 0.216, "step": 4303 }, { "epoch": 1.4373017198196694, "grad_norm": 0.44983410588953954, "learning_rate": 6.224353510003747e-06, "loss": 0.1955, "step": 4304 }, { "epoch": 1.437635665386542, "grad_norm": 0.4562795509116905, "learning_rate": 6.222469247588105e-06, "loss": 0.1959, "step": 4305 }, { "epoch": 1.4379696109534146, "grad_norm": 0.40086144043627886, "learning_rate": 6.220584800504091e-06, "loss": 0.1897, "step": 4306 }, { "epoch": 1.438303556520287, "grad_norm": 0.4220200763976442, "learning_rate": 6.218700169036368e-06, "loss": 0.2092, "step": 4307 }, { "epoch": 1.4386375020871598, "grad_norm": 0.40218684127289284, "learning_rate": 6.216815353469636e-06, "loss": 0.1891, "step": 4308 }, { "epoch": 1.4389714476540325, "grad_norm": 0.3699554890094108, "learning_rate": 6.214930354088618e-06, "loss": 0.1804, "step": 4309 }, { "epoch": 1.439305393220905, "grad_norm": 0.46604002782555415, "learning_rate": 6.213045171178063e-06, "loss": 0.2052, "step": 4310 }, { "epoch": 1.4396393387877775, "grad_norm": 0.4096947218921958, "learning_rate": 6.2111598050227535e-06, "loss": 0.1879, "step": 4311 }, { "epoch": 1.4399732843546502, "grad_norm": 0.43814289672198536, "learning_rate": 6.209274255907494e-06, "loss": 0.2108, "step": 4312 }, { "epoch": 1.4403072299215227, "grad_norm": 0.4349153906107477, "learning_rate": 6.207388524117119e-06, "loss": 0.1942, "step": 4313 }, { "epoch": 1.4406411754883954, "grad_norm": 0.389711559079184, "learning_rate": 6.205502609936491e-06, "loss": 0.189, "step": 4314 }, { "epoch": 1.440975121055268, "grad_norm": 0.39940637696014164, "learning_rate": 6.2036165136505e-06, "loss": 0.1912, "step": 4315 }, { "epoch": 1.4413090666221406, "grad_norm": 0.4348476443490899, "learning_rate": 6.201730235544062e-06, "loss": 0.2026, "step": 4316 }, { "epoch": 1.4416430121890131, "grad_norm": 0.4010344004022616, "learning_rate": 6.1998437759021235e-06, "loss": 0.19, "step": 4317 }, { "epoch": 1.4419769577558859, "grad_norm": 0.4077069086603506, "learning_rate": 6.197957135009653e-06, "loss": 0.1992, "step": 4318 }, { "epoch": 1.4423109033227584, "grad_norm": 0.454450027580997, "learning_rate": 6.196070313151652e-06, "loss": 0.2078, "step": 4319 }, { "epoch": 1.442644848889631, "grad_norm": 0.42090639548690023, "learning_rate": 6.194183310613147e-06, "loss": 0.2042, "step": 4320 }, { "epoch": 1.4429787944565036, "grad_norm": 0.3924425533047438, "learning_rate": 6.1922961276791925e-06, "loss": 0.1861, "step": 4321 }, { "epoch": 1.443312740023376, "grad_norm": 0.43825434862695306, "learning_rate": 6.190408764634869e-06, "loss": 0.2036, "step": 4322 }, { "epoch": 1.4436466855902488, "grad_norm": 0.4554298024237673, "learning_rate": 6.188521221765285e-06, "loss": 0.2075, "step": 4323 }, { "epoch": 1.4439806311571215, "grad_norm": 0.40310377491021876, "learning_rate": 6.186633499355576e-06, "loss": 0.1849, "step": 4324 }, { "epoch": 1.444314576723994, "grad_norm": 0.3958077216570012, "learning_rate": 6.184745597690903e-06, "loss": 0.2019, "step": 4325 }, { "epoch": 1.4446485222908665, "grad_norm": 0.41846403968531265, "learning_rate": 6.1828575170564595e-06, "loss": 0.1921, "step": 4326 }, { "epoch": 1.4449824678577392, "grad_norm": 0.4349377636186543, "learning_rate": 6.18096925773746e-06, "loss": 0.1972, "step": 4327 }, { "epoch": 1.445316413424612, "grad_norm": 0.39603752468722014, "learning_rate": 6.179080820019147e-06, "loss": 0.1766, "step": 4328 }, { "epoch": 1.4456503589914844, "grad_norm": 0.40669232085821777, "learning_rate": 6.177192204186796e-06, "loss": 0.1931, "step": 4329 }, { "epoch": 1.445984304558357, "grad_norm": 0.3946887338662643, "learning_rate": 6.1753034105257e-06, "loss": 0.1957, "step": 4330 }, { "epoch": 1.4463182501252296, "grad_norm": 0.41443060881225996, "learning_rate": 6.173414439321185e-06, "loss": 0.1964, "step": 4331 }, { "epoch": 1.4466521956921021, "grad_norm": 0.4405029054870975, "learning_rate": 6.171525290858602e-06, "loss": 0.1927, "step": 4332 }, { "epoch": 1.4469861412589748, "grad_norm": 0.4628203122774109, "learning_rate": 6.169635965423331e-06, "loss": 0.211, "step": 4333 }, { "epoch": 1.4473200868258473, "grad_norm": 0.48872692289281494, "learning_rate": 6.167746463300774e-06, "loss": 0.2088, "step": 4334 }, { "epoch": 1.44765403239272, "grad_norm": 0.4343259836847498, "learning_rate": 6.1658567847763655e-06, "loss": 0.2007, "step": 4335 }, { "epoch": 1.4479879779595926, "grad_norm": 0.4398765922609618, "learning_rate": 6.163966930135561e-06, "loss": 0.2006, "step": 4336 }, { "epoch": 1.448321923526465, "grad_norm": 0.45579456571854265, "learning_rate": 6.162076899663846e-06, "loss": 0.2107, "step": 4337 }, { "epoch": 1.4486558690933378, "grad_norm": 0.40883179410081466, "learning_rate": 6.160186693646732e-06, "loss": 0.1849, "step": 4338 }, { "epoch": 1.4489898146602105, "grad_norm": 0.49071716904008567, "learning_rate": 6.158296312369759e-06, "loss": 0.2147, "step": 4339 }, { "epoch": 1.449323760227083, "grad_norm": 0.4314291467098978, "learning_rate": 6.156405756118489e-06, "loss": 0.2086, "step": 4340 }, { "epoch": 1.4496577057939555, "grad_norm": 0.4593921097879424, "learning_rate": 6.154515025178511e-06, "loss": 0.2002, "step": 4341 }, { "epoch": 1.4499916513608282, "grad_norm": 0.45040771494821547, "learning_rate": 6.152624119835447e-06, "loss": 0.2086, "step": 4342 }, { "epoch": 1.450325596927701, "grad_norm": 0.4299923678581887, "learning_rate": 6.150733040374937e-06, "loss": 0.2014, "step": 4343 }, { "epoch": 1.4506595424945734, "grad_norm": 0.400344990906151, "learning_rate": 6.148841787082653e-06, "loss": 0.1927, "step": 4344 }, { "epoch": 1.450993488061446, "grad_norm": 0.5435972870488641, "learning_rate": 6.146950360244288e-06, "loss": 0.1967, "step": 4345 }, { "epoch": 1.4513274336283186, "grad_norm": 0.5742306297652585, "learning_rate": 6.145058760145568e-06, "loss": 0.2234, "step": 4346 }, { "epoch": 1.4516613791951911, "grad_norm": 0.39381531571315703, "learning_rate": 6.14316698707224e-06, "loss": 0.188, "step": 4347 }, { "epoch": 1.4519953247620638, "grad_norm": 0.39767928183404444, "learning_rate": 6.1412750413100754e-06, "loss": 0.1967, "step": 4348 }, { "epoch": 1.4523292703289363, "grad_norm": 0.42473106735652594, "learning_rate": 6.13938292314488e-06, "loss": 0.2017, "step": 4349 }, { "epoch": 1.452663215895809, "grad_norm": 0.4608850855222034, "learning_rate": 6.137490632862479e-06, "loss": 0.2073, "step": 4350 }, { "epoch": 1.4529971614626815, "grad_norm": 0.44194784147975535, "learning_rate": 6.135598170748721e-06, "loss": 0.1999, "step": 4351 }, { "epoch": 1.4533311070295543, "grad_norm": 0.4336212713439643, "learning_rate": 6.13370553708949e-06, "loss": 0.21, "step": 4352 }, { "epoch": 1.4536650525964268, "grad_norm": 0.4474198500961851, "learning_rate": 6.13181273217069e-06, "loss": 0.2107, "step": 4353 }, { "epoch": 1.4539989981632995, "grad_norm": 0.3988946635025851, "learning_rate": 6.129919756278248e-06, "loss": 0.1998, "step": 4354 }, { "epoch": 1.454332943730172, "grad_norm": 0.4289910244729747, "learning_rate": 6.128026609698124e-06, "loss": 0.1926, "step": 4355 }, { "epoch": 1.4546668892970445, "grad_norm": 0.38926894257927563, "learning_rate": 6.126133292716297e-06, "loss": 0.1893, "step": 4356 }, { "epoch": 1.4550008348639172, "grad_norm": 0.42221784850848676, "learning_rate": 6.124239805618778e-06, "loss": 0.1908, "step": 4357 }, { "epoch": 1.45533478043079, "grad_norm": 0.39646952858495677, "learning_rate": 6.122346148691598e-06, "loss": 0.1877, "step": 4358 }, { "epoch": 1.4556687259976624, "grad_norm": 0.40546148481183963, "learning_rate": 6.120452322220818e-06, "loss": 0.2004, "step": 4359 }, { "epoch": 1.456002671564535, "grad_norm": 0.4547099784865291, "learning_rate": 6.11855832649252e-06, "loss": 0.2115, "step": 4360 }, { "epoch": 1.4563366171314076, "grad_norm": 0.38112993327587635, "learning_rate": 6.116664161792817e-06, "loss": 0.1858, "step": 4361 }, { "epoch": 1.45667056269828, "grad_norm": 0.4019263289774499, "learning_rate": 6.114769828407845e-06, "loss": 0.1955, "step": 4362 }, { "epoch": 1.4570045082651528, "grad_norm": 0.41009801163114296, "learning_rate": 6.112875326623763e-06, "loss": 0.1882, "step": 4363 }, { "epoch": 1.4573384538320253, "grad_norm": 0.4079447248544796, "learning_rate": 6.110980656726759e-06, "loss": 0.1889, "step": 4364 }, { "epoch": 1.457672399398898, "grad_norm": 0.4516607016893233, "learning_rate": 6.109085819003048e-06, "loss": 0.2073, "step": 4365 }, { "epoch": 1.4580063449657705, "grad_norm": 0.42566248368763016, "learning_rate": 6.107190813738864e-06, "loss": 0.2089, "step": 4366 }, { "epoch": 1.4583402905326432, "grad_norm": 0.39147915382309184, "learning_rate": 6.10529564122047e-06, "loss": 0.1974, "step": 4367 }, { "epoch": 1.4586742360995157, "grad_norm": 0.3982765548370061, "learning_rate": 6.103400301734155e-06, "loss": 0.1993, "step": 4368 }, { "epoch": 1.4590081816663885, "grad_norm": 0.40681227759099087, "learning_rate": 6.101504795566232e-06, "loss": 0.2013, "step": 4369 }, { "epoch": 1.459342127233261, "grad_norm": 0.44284559949396546, "learning_rate": 6.099609123003041e-06, "loss": 0.2055, "step": 4370 }, { "epoch": 1.4596760728001335, "grad_norm": 0.3860492351094961, "learning_rate": 6.097713284330944e-06, "loss": 0.1948, "step": 4371 }, { "epoch": 1.4600100183670062, "grad_norm": 0.41425735018239185, "learning_rate": 6.095817279836329e-06, "loss": 0.1912, "step": 4372 }, { "epoch": 1.4603439639338789, "grad_norm": 0.38871439359382953, "learning_rate": 6.093921109805612e-06, "loss": 0.1928, "step": 4373 }, { "epoch": 1.4606779095007514, "grad_norm": 0.4143118345237507, "learning_rate": 6.092024774525231e-06, "loss": 0.215, "step": 4374 }, { "epoch": 1.4610118550676239, "grad_norm": 0.44030852410849036, "learning_rate": 6.090128274281649e-06, "loss": 0.2048, "step": 4375 }, { "epoch": 1.4613458006344966, "grad_norm": 0.4075810714964566, "learning_rate": 6.0882316093613555e-06, "loss": 0.191, "step": 4376 }, { "epoch": 1.4616797462013693, "grad_norm": 0.47178283797989656, "learning_rate": 6.086334780050865e-06, "loss": 0.2076, "step": 4377 }, { "epoch": 1.4620136917682418, "grad_norm": 0.4385058940753501, "learning_rate": 6.084437786636713e-06, "loss": 0.1973, "step": 4378 }, { "epoch": 1.4623476373351143, "grad_norm": 0.39454270917277967, "learning_rate": 6.082540629405467e-06, "loss": 0.1956, "step": 4379 }, { "epoch": 1.462681582901987, "grad_norm": 0.4246463460236804, "learning_rate": 6.08064330864371e-06, "loss": 0.1897, "step": 4380 }, { "epoch": 1.4630155284688595, "grad_norm": 0.4279872130659886, "learning_rate": 6.078745824638058e-06, "loss": 0.1909, "step": 4381 }, { "epoch": 1.4633494740357322, "grad_norm": 0.4037107620637563, "learning_rate": 6.076848177675148e-06, "loss": 0.185, "step": 4382 }, { "epoch": 1.4636834196026047, "grad_norm": 0.452895373409483, "learning_rate": 6.07495036804164e-06, "loss": 0.1987, "step": 4383 }, { "epoch": 1.4640173651694774, "grad_norm": 0.46645780502615036, "learning_rate": 6.073052396024222e-06, "loss": 0.2091, "step": 4384 }, { "epoch": 1.46435131073635, "grad_norm": 0.4041982376337457, "learning_rate": 6.071154261909605e-06, "loss": 0.1929, "step": 4385 }, { "epoch": 1.4646852563032224, "grad_norm": 0.4356675688935076, "learning_rate": 6.069255965984524e-06, "loss": 0.2031, "step": 4386 }, { "epoch": 1.4650192018700952, "grad_norm": 0.40081538939786715, "learning_rate": 6.067357508535741e-06, "loss": 0.1979, "step": 4387 }, { "epoch": 1.4653531474369679, "grad_norm": 0.424597670187965, "learning_rate": 6.065458889850037e-06, "loss": 0.1921, "step": 4388 }, { "epoch": 1.4656870930038404, "grad_norm": 0.4502702104798383, "learning_rate": 6.063560110214224e-06, "loss": 0.2006, "step": 4389 }, { "epoch": 1.4660210385707129, "grad_norm": 0.4195663771268208, "learning_rate": 6.061661169915132e-06, "loss": 0.1984, "step": 4390 }, { "epoch": 1.4663549841375856, "grad_norm": 0.4308479391309738, "learning_rate": 6.05976206923962e-06, "loss": 0.2084, "step": 4391 }, { "epoch": 1.4666889297044583, "grad_norm": 0.4166693946797753, "learning_rate": 6.057862808474569e-06, "loss": 0.1985, "step": 4392 }, { "epoch": 1.4670228752713308, "grad_norm": 0.4603888013323258, "learning_rate": 6.055963387906884e-06, "loss": 0.2015, "step": 4393 }, { "epoch": 1.4673568208382033, "grad_norm": 0.4081769473587065, "learning_rate": 6.054063807823497e-06, "loss": 0.2027, "step": 4394 }, { "epoch": 1.467690766405076, "grad_norm": 0.4134409770785201, "learning_rate": 6.052164068511359e-06, "loss": 0.1994, "step": 4395 }, { "epoch": 1.4680247119719485, "grad_norm": 0.4692504389911519, "learning_rate": 6.05026417025745e-06, "loss": 0.2123, "step": 4396 }, { "epoch": 1.4683586575388212, "grad_norm": 0.4493204264781914, "learning_rate": 6.0483641133487736e-06, "loss": 0.2065, "step": 4397 }, { "epoch": 1.4686926031056937, "grad_norm": 0.42766406223034875, "learning_rate": 6.046463898072351e-06, "loss": 0.2048, "step": 4398 }, { "epoch": 1.4690265486725664, "grad_norm": 0.5308801017267736, "learning_rate": 6.044563524715237e-06, "loss": 0.1971, "step": 4399 }, { "epoch": 1.469360494239439, "grad_norm": 0.4262125710912439, "learning_rate": 6.042662993564503e-06, "loss": 0.1949, "step": 4400 }, { "epoch": 1.4696944398063116, "grad_norm": 0.4025672852690035, "learning_rate": 6.040762304907246e-06, "loss": 0.1984, "step": 4401 }, { "epoch": 1.4700283853731841, "grad_norm": 0.4026158150281825, "learning_rate": 6.038861459030588e-06, "loss": 0.1891, "step": 4402 }, { "epoch": 1.4703623309400569, "grad_norm": 0.3956628314819311, "learning_rate": 6.036960456221677e-06, "loss": 0.1958, "step": 4403 }, { "epoch": 1.4706962765069294, "grad_norm": 0.4044262098140246, "learning_rate": 6.035059296767676e-06, "loss": 0.1916, "step": 4404 }, { "epoch": 1.4710302220738019, "grad_norm": 0.480407738770424, "learning_rate": 6.033157980955782e-06, "loss": 0.2004, "step": 4405 }, { "epoch": 1.4713641676406746, "grad_norm": 0.46733025871686174, "learning_rate": 6.0312565090732115e-06, "loss": 0.2135, "step": 4406 }, { "epoch": 1.4716981132075473, "grad_norm": 0.38309985347010694, "learning_rate": 6.0293548814072004e-06, "loss": 0.1973, "step": 4407 }, { "epoch": 1.4720320587744198, "grad_norm": 0.43882038617597807, "learning_rate": 6.0274530982450155e-06, "loss": 0.2097, "step": 4408 }, { "epoch": 1.4723660043412923, "grad_norm": 0.4763463732615725, "learning_rate": 6.025551159873941e-06, "loss": 0.2025, "step": 4409 }, { "epoch": 1.472699949908165, "grad_norm": 0.4773269336687056, "learning_rate": 6.023649066581288e-06, "loss": 0.2237, "step": 4410 }, { "epoch": 1.4730338954750375, "grad_norm": 0.4148665052375862, "learning_rate": 6.021746818654393e-06, "loss": 0.2008, "step": 4411 }, { "epoch": 1.4733678410419102, "grad_norm": 0.43151291731531444, "learning_rate": 6.019844416380609e-06, "loss": 0.1983, "step": 4412 }, { "epoch": 1.4737017866087827, "grad_norm": 0.4286672462162659, "learning_rate": 6.017941860047318e-06, "loss": 0.1993, "step": 4413 }, { "epoch": 1.4740357321756554, "grad_norm": 0.42593830099460744, "learning_rate": 6.016039149941924e-06, "loss": 0.2, "step": 4414 }, { "epoch": 1.474369677742528, "grad_norm": 0.44556539133547163, "learning_rate": 6.01413628635185e-06, "loss": 0.2224, "step": 4415 }, { "epoch": 1.4747036233094006, "grad_norm": 0.40967464330179093, "learning_rate": 6.012233269564551e-06, "loss": 0.1948, "step": 4416 }, { "epoch": 1.4750375688762731, "grad_norm": 0.43684591455417715, "learning_rate": 6.010330099867497e-06, "loss": 0.2075, "step": 4417 }, { "epoch": 1.4753715144431458, "grad_norm": 0.4341408303231394, "learning_rate": 6.008426777548186e-06, "loss": 0.1982, "step": 4418 }, { "epoch": 1.4757054600100183, "grad_norm": 0.4358707442878612, "learning_rate": 6.0065233028941365e-06, "loss": 0.2045, "step": 4419 }, { "epoch": 1.4760394055768908, "grad_norm": 0.37328822493004127, "learning_rate": 6.00461967619289e-06, "loss": 0.1821, "step": 4420 }, { "epoch": 1.4763733511437636, "grad_norm": 0.400014805788866, "learning_rate": 6.002715897732013e-06, "loss": 0.1847, "step": 4421 }, { "epoch": 1.4767072967106363, "grad_norm": 0.4046246703398329, "learning_rate": 6.000811967799092e-06, "loss": 0.1899, "step": 4422 }, { "epoch": 1.4770412422775088, "grad_norm": 0.40715797582521757, "learning_rate": 5.99890788668174e-06, "loss": 0.1941, "step": 4423 }, { "epoch": 1.4773751878443813, "grad_norm": 0.4206959634130982, "learning_rate": 5.997003654667589e-06, "loss": 0.1991, "step": 4424 }, { "epoch": 1.477709133411254, "grad_norm": 0.40769749828652163, "learning_rate": 5.995099272044298e-06, "loss": 0.1829, "step": 4425 }, { "epoch": 1.4780430789781267, "grad_norm": 0.44404122589778716, "learning_rate": 5.9931947390995435e-06, "loss": 0.2092, "step": 4426 }, { "epoch": 1.4783770245449992, "grad_norm": 0.42520813532123286, "learning_rate": 5.99129005612103e-06, "loss": 0.2081, "step": 4427 }, { "epoch": 1.4787109701118717, "grad_norm": 0.4122469308763514, "learning_rate": 5.989385223396482e-06, "loss": 0.2011, "step": 4428 }, { "epoch": 1.4790449156787444, "grad_norm": 0.3929382468133838, "learning_rate": 5.987480241213646e-06, "loss": 0.192, "step": 4429 }, { "epoch": 1.479378861245617, "grad_norm": 0.5672122545325259, "learning_rate": 5.985575109860292e-06, "loss": 0.2116, "step": 4430 }, { "epoch": 1.4797128068124896, "grad_norm": 0.4983677423174381, "learning_rate": 5.983669829624214e-06, "loss": 0.2018, "step": 4431 }, { "epoch": 1.4800467523793621, "grad_norm": 0.42548655408845926, "learning_rate": 5.981764400793224e-06, "loss": 0.1971, "step": 4432 }, { "epoch": 1.4803806979462348, "grad_norm": 0.39361510423237334, "learning_rate": 5.9798588236551626e-06, "loss": 0.1919, "step": 4433 }, { "epoch": 1.4807146435131073, "grad_norm": 0.42721227153711655, "learning_rate": 5.977953098497889e-06, "loss": 0.2013, "step": 4434 }, { "epoch": 1.4810485890799798, "grad_norm": 0.4093784539006786, "learning_rate": 5.976047225609284e-06, "loss": 0.1945, "step": 4435 }, { "epoch": 1.4813825346468525, "grad_norm": 0.39877583576233977, "learning_rate": 5.974141205277253e-06, "loss": 0.1932, "step": 4436 }, { "epoch": 1.4817164802137253, "grad_norm": 0.4241654493666607, "learning_rate": 5.972235037789723e-06, "loss": 0.1888, "step": 4437 }, { "epoch": 1.4820504257805978, "grad_norm": 0.4127889957282086, "learning_rate": 5.970328723434642e-06, "loss": 0.1855, "step": 4438 }, { "epoch": 1.4823843713474703, "grad_norm": 0.43974577195283593, "learning_rate": 5.968422262499983e-06, "loss": 0.2001, "step": 4439 }, { "epoch": 1.482718316914343, "grad_norm": 0.43856634741945644, "learning_rate": 5.966515655273739e-06, "loss": 0.1909, "step": 4440 }, { "epoch": 1.4830522624812157, "grad_norm": 0.43919243938383296, "learning_rate": 5.9646089020439245e-06, "loss": 0.1957, "step": 4441 }, { "epoch": 1.4833862080480882, "grad_norm": 0.39521255006239037, "learning_rate": 5.962702003098576e-06, "loss": 0.1884, "step": 4442 }, { "epoch": 1.4837201536149607, "grad_norm": 0.39010763406866694, "learning_rate": 5.960794958725756e-06, "loss": 0.1967, "step": 4443 }, { "epoch": 1.4840540991818334, "grad_norm": 0.38308666558185406, "learning_rate": 5.958887769213544e-06, "loss": 0.1898, "step": 4444 }, { "epoch": 1.484388044748706, "grad_norm": 0.3808395689544056, "learning_rate": 5.956980434850044e-06, "loss": 0.1926, "step": 4445 }, { "epoch": 1.4847219903155786, "grad_norm": 0.4019120461912746, "learning_rate": 5.955072955923381e-06, "loss": 0.1901, "step": 4446 }, { "epoch": 1.485055935882451, "grad_norm": 0.3914079755133911, "learning_rate": 5.9531653327217035e-06, "loss": 0.1858, "step": 4447 }, { "epoch": 1.4853898814493238, "grad_norm": 0.4001292440001611, "learning_rate": 5.951257565533177e-06, "loss": 0.1942, "step": 4448 }, { "epoch": 1.4857238270161963, "grad_norm": 0.44889699749599704, "learning_rate": 5.949349654645997e-06, "loss": 0.2144, "step": 4449 }, { "epoch": 1.486057772583069, "grad_norm": 0.4376071738007426, "learning_rate": 5.947441600348373e-06, "loss": 0.2112, "step": 4450 }, { "epoch": 1.4863917181499415, "grad_norm": 0.5002467028319895, "learning_rate": 5.945533402928537e-06, "loss": 0.2143, "step": 4451 }, { "epoch": 1.4867256637168142, "grad_norm": 0.43261606233366906, "learning_rate": 5.9436250626747505e-06, "loss": 0.2231, "step": 4452 }, { "epoch": 1.4870596092836867, "grad_norm": 0.42928106883312683, "learning_rate": 5.941716579875286e-06, "loss": 0.1944, "step": 4453 }, { "epoch": 1.4873935548505592, "grad_norm": 0.44047447971144676, "learning_rate": 5.939807954818443e-06, "loss": 0.2087, "step": 4454 }, { "epoch": 1.487727500417432, "grad_norm": 0.647500480455085, "learning_rate": 5.937899187792544e-06, "loss": 0.1897, "step": 4455 }, { "epoch": 1.4880614459843047, "grad_norm": 0.39203680888737436, "learning_rate": 5.935990279085928e-06, "loss": 0.1926, "step": 4456 }, { "epoch": 1.4883953915511772, "grad_norm": 0.7590048406114819, "learning_rate": 5.93408122898696e-06, "loss": 0.2066, "step": 4457 }, { "epoch": 1.4887293371180497, "grad_norm": 0.3828687255820843, "learning_rate": 5.9321720377840245e-06, "loss": 0.1898, "step": 4458 }, { "epoch": 1.4890632826849224, "grad_norm": 0.3989626077341179, "learning_rate": 5.930262705765526e-06, "loss": 0.1919, "step": 4459 }, { "epoch": 1.4893972282517949, "grad_norm": 0.4435140197348797, "learning_rate": 5.928353233219893e-06, "loss": 0.2138, "step": 4460 }, { "epoch": 1.4897311738186676, "grad_norm": 0.41263294889386565, "learning_rate": 5.926443620435572e-06, "loss": 0.199, "step": 4461 }, { "epoch": 1.49006511938554, "grad_norm": 0.4331460485795739, "learning_rate": 5.924533867701034e-06, "loss": 0.188, "step": 4462 }, { "epoch": 1.4903990649524128, "grad_norm": 0.41622877671317154, "learning_rate": 5.922623975304771e-06, "loss": 0.1985, "step": 4463 }, { "epoch": 1.4907330105192853, "grad_norm": 0.4044199019408801, "learning_rate": 5.920713943535291e-06, "loss": 0.1978, "step": 4464 }, { "epoch": 1.491066956086158, "grad_norm": 0.44762562378654125, "learning_rate": 5.9188037726811285e-06, "loss": 0.2139, "step": 4465 }, { "epoch": 1.4914009016530305, "grad_norm": 0.4544481373494818, "learning_rate": 5.9168934630308385e-06, "loss": 0.2069, "step": 4466 }, { "epoch": 1.4917348472199032, "grad_norm": 0.4158141847232926, "learning_rate": 5.914983014872995e-06, "loss": 0.2005, "step": 4467 }, { "epoch": 1.4920687927867757, "grad_norm": 0.438988331436695, "learning_rate": 5.9130724284961924e-06, "loss": 0.1996, "step": 4468 }, { "epoch": 1.4924027383536482, "grad_norm": 0.4092313246855316, "learning_rate": 5.91116170418905e-06, "loss": 0.1997, "step": 4469 }, { "epoch": 1.492736683920521, "grad_norm": 0.5391760700288386, "learning_rate": 5.909250842240203e-06, "loss": 0.2231, "step": 4470 }, { "epoch": 1.4930706294873937, "grad_norm": 0.42467270074369073, "learning_rate": 5.907339842938309e-06, "loss": 0.1921, "step": 4471 }, { "epoch": 1.4934045750542662, "grad_norm": 0.4149774782856609, "learning_rate": 5.90542870657205e-06, "loss": 0.1953, "step": 4472 }, { "epoch": 1.4937385206211387, "grad_norm": 0.4071411942291915, "learning_rate": 5.903517433430123e-06, "loss": 0.1945, "step": 4473 }, { "epoch": 1.4940724661880114, "grad_norm": 0.4161620755423087, "learning_rate": 5.901606023801248e-06, "loss": 0.196, "step": 4474 }, { "epoch": 1.494406411754884, "grad_norm": 0.44415983179036833, "learning_rate": 5.899694477974168e-06, "loss": 0.2036, "step": 4475 }, { "epoch": 1.4947403573217566, "grad_norm": 0.48213305322394434, "learning_rate": 5.897782796237645e-06, "loss": 0.2017, "step": 4476 }, { "epoch": 1.495074302888629, "grad_norm": 0.4565653405989367, "learning_rate": 5.895870978880457e-06, "loss": 0.2131, "step": 4477 }, { "epoch": 1.4954082484555018, "grad_norm": 0.38413826906308257, "learning_rate": 5.89395902619141e-06, "loss": 0.1824, "step": 4478 }, { "epoch": 1.4957421940223743, "grad_norm": 0.3865209015501584, "learning_rate": 5.892046938459327e-06, "loss": 0.1932, "step": 4479 }, { "epoch": 1.496076139589247, "grad_norm": 0.4485955314433592, "learning_rate": 5.890134715973049e-06, "loss": 0.2067, "step": 4480 }, { "epoch": 1.4964100851561195, "grad_norm": 0.4159945254787882, "learning_rate": 5.888222359021443e-06, "loss": 0.208, "step": 4481 }, { "epoch": 1.4967440307229922, "grad_norm": 0.4705989200101088, "learning_rate": 5.8863098678933896e-06, "loss": 0.2105, "step": 4482 }, { "epoch": 1.4970779762898647, "grad_norm": 0.4875225833597677, "learning_rate": 5.884397242877795e-06, "loss": 0.2036, "step": 4483 }, { "epoch": 1.4974119218567372, "grad_norm": 0.46559568159684633, "learning_rate": 5.882484484263584e-06, "loss": 0.2078, "step": 4484 }, { "epoch": 1.49774586742361, "grad_norm": 0.4104886972900785, "learning_rate": 5.8805715923397e-06, "loss": 0.2093, "step": 4485 }, { "epoch": 1.4980798129904827, "grad_norm": 0.4270246748278954, "learning_rate": 5.87865856739511e-06, "loss": 0.1874, "step": 4486 }, { "epoch": 1.4984137585573551, "grad_norm": 0.40156162163724024, "learning_rate": 5.876745409718796e-06, "loss": 0.1865, "step": 4487 }, { "epoch": 1.4987477041242276, "grad_norm": 0.391157655694104, "learning_rate": 5.874832119599766e-06, "loss": 0.1849, "step": 4488 }, { "epoch": 1.4990816496911004, "grad_norm": 0.4212459484944311, "learning_rate": 5.872918697327042e-06, "loss": 0.1948, "step": 4489 }, { "epoch": 1.499415595257973, "grad_norm": 0.43743610694660373, "learning_rate": 5.871005143189671e-06, "loss": 0.2022, "step": 4490 }, { "epoch": 1.4997495408248456, "grad_norm": 0.47954887154563275, "learning_rate": 5.869091457476718e-06, "loss": 0.2165, "step": 4491 }, { "epoch": 1.500083486391718, "grad_norm": 0.40104355324589935, "learning_rate": 5.8671776404772655e-06, "loss": 0.2011, "step": 4492 }, { "epoch": 1.5004174319585908, "grad_norm": 0.4065530502322676, "learning_rate": 5.8652636924804206e-06, "loss": 0.1917, "step": 4493 }, { "epoch": 1.5007513775254635, "grad_norm": 0.39361953435018554, "learning_rate": 5.863349613775308e-06, "loss": 0.1871, "step": 4494 }, { "epoch": 1.501085323092336, "grad_norm": 0.4220860375266858, "learning_rate": 5.861435404651068e-06, "loss": 0.197, "step": 4495 }, { "epoch": 1.5014192686592085, "grad_norm": 0.4133784223860879, "learning_rate": 5.859521065396869e-06, "loss": 0.2039, "step": 4496 }, { "epoch": 1.5017532142260812, "grad_norm": 0.46819851944987745, "learning_rate": 5.857606596301892e-06, "loss": 0.1887, "step": 4497 }, { "epoch": 1.5020871597929537, "grad_norm": 0.4212627192628078, "learning_rate": 5.85569199765534e-06, "loss": 0.1992, "step": 4498 }, { "epoch": 1.5024211053598262, "grad_norm": 0.5441159142194298, "learning_rate": 5.853777269746438e-06, "loss": 0.1945, "step": 4499 }, { "epoch": 1.502755050926699, "grad_norm": 0.38319452570638174, "learning_rate": 5.851862412864426e-06, "loss": 0.1878, "step": 4500 }, { "epoch": 1.5030889964935716, "grad_norm": 0.39792537321581023, "learning_rate": 5.8499474272985654e-06, "loss": 0.1871, "step": 4501 }, { "epoch": 1.5034229420604441, "grad_norm": 0.42532885484991806, "learning_rate": 5.848032313338139e-06, "loss": 0.188, "step": 4502 }, { "epoch": 1.5037568876273166, "grad_norm": 0.40589177813814775, "learning_rate": 5.846117071272444e-06, "loss": 0.1919, "step": 4503 }, { "epoch": 1.5040908331941893, "grad_norm": 0.4038764967885529, "learning_rate": 5.844201701390806e-06, "loss": 0.1947, "step": 4504 }, { "epoch": 1.504424778761062, "grad_norm": 0.42058966558438154, "learning_rate": 5.842286203982559e-06, "loss": 0.1875, "step": 4505 }, { "epoch": 1.5047587243279346, "grad_norm": 0.44301115684433295, "learning_rate": 5.840370579337063e-06, "loss": 0.2088, "step": 4506 }, { "epoch": 1.505092669894807, "grad_norm": 0.4506602942327276, "learning_rate": 5.838454827743697e-06, "loss": 0.214, "step": 4507 }, { "epoch": 1.5054266154616798, "grad_norm": 0.41888527162391026, "learning_rate": 5.8365389494918565e-06, "loss": 0.1989, "step": 4508 }, { "epoch": 1.5057605610285525, "grad_norm": 0.46234137248207496, "learning_rate": 5.834622944870959e-06, "loss": 0.2071, "step": 4509 }, { "epoch": 1.506094506595425, "grad_norm": 0.3991349847409579, "learning_rate": 5.832706814170437e-06, "loss": 0.1921, "step": 4510 }, { "epoch": 1.5064284521622975, "grad_norm": 0.45328443937762986, "learning_rate": 5.830790557679746e-06, "loss": 0.2094, "step": 4511 }, { "epoch": 1.5067623977291702, "grad_norm": 0.4545739233729224, "learning_rate": 5.8288741756883585e-06, "loss": 0.2233, "step": 4512 }, { "epoch": 1.5070963432960427, "grad_norm": 0.4173274151899909, "learning_rate": 5.826957668485768e-06, "loss": 0.1891, "step": 4513 }, { "epoch": 1.5074302888629152, "grad_norm": 0.4514509261491327, "learning_rate": 5.825041036361484e-06, "loss": 0.2034, "step": 4514 }, { "epoch": 1.507764234429788, "grad_norm": 0.4245100612780044, "learning_rate": 5.823124279605037e-06, "loss": 0.2083, "step": 4515 }, { "epoch": 1.5080981799966606, "grad_norm": 0.41279249791467926, "learning_rate": 5.821207398505976e-06, "loss": 0.2131, "step": 4516 }, { "epoch": 1.5084321255635331, "grad_norm": 0.4430510447851048, "learning_rate": 5.819290393353867e-06, "loss": 0.2233, "step": 4517 }, { "epoch": 1.5087660711304056, "grad_norm": 0.4270138809636306, "learning_rate": 5.817373264438297e-06, "loss": 0.198, "step": 4518 }, { "epoch": 1.5091000166972783, "grad_norm": 0.3855525070150281, "learning_rate": 5.815456012048873e-06, "loss": 0.1943, "step": 4519 }, { "epoch": 1.509433962264151, "grad_norm": 0.3951585582354066, "learning_rate": 5.8135386364752154e-06, "loss": 0.1973, "step": 4520 }, { "epoch": 1.5097679078310235, "grad_norm": 0.44097574717875243, "learning_rate": 5.8116211380069675e-06, "loss": 0.1974, "step": 4521 }, { "epoch": 1.510101853397896, "grad_norm": 0.418568197709888, "learning_rate": 5.809703516933791e-06, "loss": 0.1981, "step": 4522 }, { "epoch": 1.5104357989647688, "grad_norm": 0.39429989332524545, "learning_rate": 5.807785773545364e-06, "loss": 0.1885, "step": 4523 }, { "epoch": 1.5107697445316415, "grad_norm": 0.5133283842688638, "learning_rate": 5.805867908131384e-06, "loss": 0.2054, "step": 4524 }, { "epoch": 1.511103690098514, "grad_norm": 0.4138694184824036, "learning_rate": 5.803949920981568e-06, "loss": 0.2066, "step": 4525 }, { "epoch": 1.5114376356653865, "grad_norm": 0.451090788654152, "learning_rate": 5.802031812385651e-06, "loss": 0.207, "step": 4526 }, { "epoch": 1.5117715812322592, "grad_norm": 0.4391511075816685, "learning_rate": 5.800113582633384e-06, "loss": 0.1972, "step": 4527 }, { "epoch": 1.512105526799132, "grad_norm": 0.4062998929666656, "learning_rate": 5.7981952320145405e-06, "loss": 0.2005, "step": 4528 }, { "epoch": 1.5124394723660042, "grad_norm": 0.4147263705858617, "learning_rate": 5.796276760818908e-06, "loss": 0.2, "step": 4529 }, { "epoch": 1.512773417932877, "grad_norm": 0.4184233692356123, "learning_rate": 5.794358169336295e-06, "loss": 0.2072, "step": 4530 }, { "epoch": 1.5131073634997496, "grad_norm": 0.42606865720950565, "learning_rate": 5.792439457856528e-06, "loss": 0.2058, "step": 4531 }, { "epoch": 1.513441309066622, "grad_norm": 0.4031899587249856, "learning_rate": 5.790520626669449e-06, "loss": 0.1865, "step": 4532 }, { "epoch": 1.5137752546334946, "grad_norm": 0.39822141992740223, "learning_rate": 5.788601676064922e-06, "loss": 0.1849, "step": 4533 }, { "epoch": 1.5141092002003673, "grad_norm": 0.41169793772637675, "learning_rate": 5.786682606332827e-06, "loss": 0.198, "step": 4534 }, { "epoch": 1.51444314576724, "grad_norm": 0.41321607553783124, "learning_rate": 5.78476341776306e-06, "loss": 0.1988, "step": 4535 }, { "epoch": 1.5147770913341125, "grad_norm": 0.4333225224257672, "learning_rate": 5.782844110645539e-06, "loss": 0.1957, "step": 4536 }, { "epoch": 1.515111036900985, "grad_norm": 0.42052885937644136, "learning_rate": 5.780924685270198e-06, "loss": 0.198, "step": 4537 }, { "epoch": 1.5154449824678577, "grad_norm": 0.4037698414311071, "learning_rate": 5.779005141926988e-06, "loss": 0.2, "step": 4538 }, { "epoch": 1.5157789280347305, "grad_norm": 0.3818155530534464, "learning_rate": 5.777085480905877e-06, "loss": 0.1915, "step": 4539 }, { "epoch": 1.516112873601603, "grad_norm": 0.3963973853574785, "learning_rate": 5.7751657024968565e-06, "loss": 0.1988, "step": 4540 }, { "epoch": 1.5164468191684755, "grad_norm": 0.45356922202417344, "learning_rate": 5.773245806989929e-06, "loss": 0.2118, "step": 4541 }, { "epoch": 1.5167807647353482, "grad_norm": 0.41828720268999303, "learning_rate": 5.771325794675117e-06, "loss": 0.2065, "step": 4542 }, { "epoch": 1.517114710302221, "grad_norm": 0.38469626361124054, "learning_rate": 5.769405665842461e-06, "loss": 0.1849, "step": 4543 }, { "epoch": 1.5174486558690934, "grad_norm": 0.3785273566123282, "learning_rate": 5.767485420782021e-06, "loss": 0.1876, "step": 4544 }, { "epoch": 1.5177826014359659, "grad_norm": 0.41324123757089976, "learning_rate": 5.7655650597838704e-06, "loss": 0.2107, "step": 4545 }, { "epoch": 1.5181165470028386, "grad_norm": 0.4023559102615698, "learning_rate": 5.7636445831381034e-06, "loss": 0.1958, "step": 4546 }, { "epoch": 1.518450492569711, "grad_norm": 0.4268003997100768, "learning_rate": 5.761723991134831e-06, "loss": 0.2005, "step": 4547 }, { "epoch": 1.5187844381365836, "grad_norm": 0.41879175195622614, "learning_rate": 5.759803284064181e-06, "loss": 0.1974, "step": 4548 }, { "epoch": 1.5191183837034563, "grad_norm": 0.41661555641918735, "learning_rate": 5.757882462216299e-06, "loss": 0.2028, "step": 4549 }, { "epoch": 1.519452329270329, "grad_norm": 0.5008947195462263, "learning_rate": 5.755961525881345e-06, "loss": 0.211, "step": 4550 }, { "epoch": 1.5197862748372015, "grad_norm": 0.43921728419522166, "learning_rate": 5.7540404753495034e-06, "loss": 0.2032, "step": 4551 }, { "epoch": 1.520120220404074, "grad_norm": 0.44693077234088574, "learning_rate": 5.75211931091097e-06, "loss": 0.2265, "step": 4552 }, { "epoch": 1.5204541659709467, "grad_norm": 0.4001694902689412, "learning_rate": 5.750198032855956e-06, "loss": 0.2, "step": 4553 }, { "epoch": 1.5207881115378195, "grad_norm": 0.4045351162020478, "learning_rate": 5.748276641474698e-06, "loss": 0.1983, "step": 4554 }, { "epoch": 1.521122057104692, "grad_norm": 0.3977245043542055, "learning_rate": 5.746355137057442e-06, "loss": 0.199, "step": 4555 }, { "epoch": 1.5214560026715644, "grad_norm": 0.4176634955723099, "learning_rate": 5.7444335198944555e-06, "loss": 0.2065, "step": 4556 }, { "epoch": 1.5217899482384372, "grad_norm": 0.4086844831410891, "learning_rate": 5.7425117902760195e-06, "loss": 0.2004, "step": 4557 }, { "epoch": 1.5221238938053099, "grad_norm": 0.441434511195485, "learning_rate": 5.7405899484924346e-06, "loss": 0.2116, "step": 4558 }, { "epoch": 1.5224578393721824, "grad_norm": 0.41164529074446093, "learning_rate": 5.738667994834019e-06, "loss": 0.1939, "step": 4559 }, { "epoch": 1.5227917849390549, "grad_norm": 0.4069363998020001, "learning_rate": 5.736745929591103e-06, "loss": 0.1885, "step": 4560 }, { "epoch": 1.5231257305059276, "grad_norm": 0.40733152134950845, "learning_rate": 5.734823753054042e-06, "loss": 0.2001, "step": 4561 }, { "epoch": 1.5234596760728, "grad_norm": 0.3828618387327402, "learning_rate": 5.732901465513199e-06, "loss": 0.1982, "step": 4562 }, { "epoch": 1.5237936216396726, "grad_norm": 0.4305349510985267, "learning_rate": 5.73097906725896e-06, "loss": 0.1974, "step": 4563 }, { "epoch": 1.5241275672065453, "grad_norm": 0.40465587338482667, "learning_rate": 5.729056558581727e-06, "loss": 0.1986, "step": 4564 }, { "epoch": 1.524461512773418, "grad_norm": 0.471282991054306, "learning_rate": 5.727133939771915e-06, "loss": 0.2081, "step": 4565 }, { "epoch": 1.5247954583402905, "grad_norm": 0.4088188439945464, "learning_rate": 5.725211211119961e-06, "loss": 0.1952, "step": 4566 }, { "epoch": 1.525129403907163, "grad_norm": 0.3934161375353832, "learning_rate": 5.723288372916315e-06, "loss": 0.1901, "step": 4567 }, { "epoch": 1.5254633494740357, "grad_norm": 0.39417936401264786, "learning_rate": 5.721365425451442e-06, "loss": 0.195, "step": 4568 }, { "epoch": 1.5257972950409084, "grad_norm": 0.41813761847460523, "learning_rate": 5.719442369015828e-06, "loss": 0.2002, "step": 4569 }, { "epoch": 1.526131240607781, "grad_norm": 0.3928817442742676, "learning_rate": 5.717519203899975e-06, "loss": 0.1821, "step": 4570 }, { "epoch": 1.5264651861746534, "grad_norm": 0.42060022944156716, "learning_rate": 5.715595930394396e-06, "loss": 0.197, "step": 4571 }, { "epoch": 1.5267991317415261, "grad_norm": 0.414538136672965, "learning_rate": 5.713672548789626e-06, "loss": 0.2109, "step": 4572 }, { "epoch": 1.5271330773083989, "grad_norm": 0.40300175281434797, "learning_rate": 5.711749059376215e-06, "loss": 0.194, "step": 4573 }, { "epoch": 1.5274670228752714, "grad_norm": 0.4202683530403047, "learning_rate": 5.7098254624447255e-06, "loss": 0.1891, "step": 4574 }, { "epoch": 1.5278009684421439, "grad_norm": 0.41121947338511855, "learning_rate": 5.707901758285745e-06, "loss": 0.1911, "step": 4575 }, { "epoch": 1.5281349140090166, "grad_norm": 0.3973014662751213, "learning_rate": 5.705977947189868e-06, "loss": 0.1994, "step": 4576 }, { "epoch": 1.5284688595758893, "grad_norm": 0.43891931027961867, "learning_rate": 5.704054029447708e-06, "loss": 0.2007, "step": 4577 }, { "epoch": 1.5288028051427616, "grad_norm": 0.4502833076199682, "learning_rate": 5.702130005349899e-06, "loss": 0.214, "step": 4578 }, { "epoch": 1.5291367507096343, "grad_norm": 0.44959299233942557, "learning_rate": 5.700205875187084e-06, "loss": 0.2153, "step": 4579 }, { "epoch": 1.529470696276507, "grad_norm": 0.4305977208026748, "learning_rate": 5.698281639249927e-06, "loss": 0.2123, "step": 4580 }, { "epoch": 1.5298046418433795, "grad_norm": 0.43603488906842863, "learning_rate": 5.696357297829106e-06, "loss": 0.2089, "step": 4581 }, { "epoch": 1.530138587410252, "grad_norm": 0.4250706270087593, "learning_rate": 5.6944328512153165e-06, "loss": 0.1977, "step": 4582 }, { "epoch": 1.5304725329771247, "grad_norm": 0.4198326353777762, "learning_rate": 5.692508299699269e-06, "loss": 0.2009, "step": 4583 }, { "epoch": 1.5308064785439974, "grad_norm": 0.395956572492155, "learning_rate": 5.690583643571687e-06, "loss": 0.198, "step": 4584 }, { "epoch": 1.53114042411087, "grad_norm": 0.43947140761901465, "learning_rate": 5.688658883123315e-06, "loss": 0.2027, "step": 4585 }, { "epoch": 1.5314743696777424, "grad_norm": 0.4108183015582965, "learning_rate": 5.68673401864491e-06, "loss": 0.2118, "step": 4586 }, { "epoch": 1.5318083152446151, "grad_norm": 0.41182034693533875, "learning_rate": 5.684809050427247e-06, "loss": 0.1987, "step": 4587 }, { "epoch": 1.5321422608114879, "grad_norm": 0.4349586140015333, "learning_rate": 5.682883978761111e-06, "loss": 0.2002, "step": 4588 }, { "epoch": 1.5324762063783604, "grad_norm": 0.39453978972461545, "learning_rate": 5.680958803937311e-06, "loss": 0.1962, "step": 4589 }, { "epoch": 1.5328101519452328, "grad_norm": 0.4430655520347526, "learning_rate": 5.6790335262466645e-06, "loss": 0.2243, "step": 4590 }, { "epoch": 1.5331440975121056, "grad_norm": 0.38426560740617555, "learning_rate": 5.677108145980008e-06, "loss": 0.1949, "step": 4591 }, { "epoch": 1.5334780430789783, "grad_norm": 0.41412598049147914, "learning_rate": 5.675182663428196e-06, "loss": 0.2016, "step": 4592 }, { "epoch": 1.5338119886458508, "grad_norm": 0.3952364153216541, "learning_rate": 5.673257078882091e-06, "loss": 0.1957, "step": 4593 }, { "epoch": 1.5341459342127233, "grad_norm": 0.4347041827090324, "learning_rate": 5.671331392632577e-06, "loss": 0.2062, "step": 4594 }, { "epoch": 1.534479879779596, "grad_norm": 0.4243751095481007, "learning_rate": 5.6694056049705506e-06, "loss": 0.2011, "step": 4595 }, { "epoch": 1.5348138253464685, "grad_norm": 0.41160762842006665, "learning_rate": 5.667479716186927e-06, "loss": 0.2053, "step": 4596 }, { "epoch": 1.535147770913341, "grad_norm": 0.45726125048358807, "learning_rate": 5.665553726572631e-06, "loss": 0.206, "step": 4597 }, { "epoch": 1.5354817164802137, "grad_norm": 0.395285907262752, "learning_rate": 5.663627636418611e-06, "loss": 0.1837, "step": 4598 }, { "epoch": 1.5358156620470864, "grad_norm": 0.3940866152777301, "learning_rate": 5.661701446015821e-06, "loss": 0.1945, "step": 4599 }, { "epoch": 1.536149607613959, "grad_norm": 0.39866982139324986, "learning_rate": 5.659775155655235e-06, "loss": 0.1928, "step": 4600 }, { "epoch": 1.5364835531808314, "grad_norm": 0.4186774736236097, "learning_rate": 5.6578487656278446e-06, "loss": 0.1981, "step": 4601 }, { "epoch": 1.5368174987477041, "grad_norm": 0.41470326955184655, "learning_rate": 5.655922276224652e-06, "loss": 0.2064, "step": 4602 }, { "epoch": 1.5371514443145768, "grad_norm": 0.4092155718896311, "learning_rate": 5.653995687736676e-06, "loss": 0.2006, "step": 4603 }, { "epoch": 1.5374853898814493, "grad_norm": 0.4106065112514508, "learning_rate": 5.652069000454951e-06, "loss": 0.2022, "step": 4604 }, { "epoch": 1.5378193354483218, "grad_norm": 0.4858555749514037, "learning_rate": 5.650142214670527e-06, "loss": 0.1805, "step": 4605 }, { "epoch": 1.5381532810151946, "grad_norm": 0.4812494451824753, "learning_rate": 5.648215330674464e-06, "loss": 0.1882, "step": 4606 }, { "epoch": 1.5384872265820673, "grad_norm": 0.4261470703710842, "learning_rate": 5.646288348757845e-06, "loss": 0.1926, "step": 4607 }, { "epoch": 1.5388211721489398, "grad_norm": 0.3952136708762545, "learning_rate": 5.64436126921176e-06, "loss": 0.1975, "step": 4608 }, { "epoch": 1.5391551177158123, "grad_norm": 0.3970827209666597, "learning_rate": 5.642434092327318e-06, "loss": 0.1999, "step": 4609 }, { "epoch": 1.539489063282685, "grad_norm": 0.40998763479801215, "learning_rate": 5.640506818395643e-06, "loss": 0.1986, "step": 4610 }, { "epoch": 1.5398230088495575, "grad_norm": 0.47084062645452907, "learning_rate": 5.638579447707871e-06, "loss": 0.193, "step": 4611 }, { "epoch": 1.54015695441643, "grad_norm": 0.3860268369786831, "learning_rate": 5.636651980555153e-06, "loss": 0.1909, "step": 4612 }, { "epoch": 1.5404908999833027, "grad_norm": 0.3942593539079482, "learning_rate": 5.634724417228658e-06, "loss": 0.1966, "step": 4613 }, { "epoch": 1.5408248455501754, "grad_norm": 0.42574482432433103, "learning_rate": 5.632796758019566e-06, "loss": 0.1917, "step": 4614 }, { "epoch": 1.541158791117048, "grad_norm": 0.39053070306639825, "learning_rate": 5.630869003219072e-06, "loss": 0.1977, "step": 4615 }, { "epoch": 1.5414927366839204, "grad_norm": 0.43083185683956327, "learning_rate": 5.628941153118388e-06, "loss": 0.2072, "step": 4616 }, { "epoch": 1.5418266822507931, "grad_norm": 0.36680215926548226, "learning_rate": 5.627013208008737e-06, "loss": 0.1708, "step": 4617 }, { "epoch": 1.5421606278176658, "grad_norm": 0.4168771060059623, "learning_rate": 5.625085168181357e-06, "loss": 0.1957, "step": 4618 }, { "epoch": 1.5424945733845383, "grad_norm": 0.4159514176383808, "learning_rate": 5.623157033927503e-06, "loss": 0.1933, "step": 4619 }, { "epoch": 1.5428285189514108, "grad_norm": 0.45704579742530344, "learning_rate": 5.621228805538443e-06, "loss": 0.1997, "step": 4620 }, { "epoch": 1.5431624645182835, "grad_norm": 0.428254804443801, "learning_rate": 5.619300483305454e-06, "loss": 0.1862, "step": 4621 }, { "epoch": 1.5434964100851563, "grad_norm": 0.4203762697429112, "learning_rate": 5.617372067519837e-06, "loss": 0.2034, "step": 4622 }, { "epoch": 1.5438303556520288, "grad_norm": 0.4200993106054955, "learning_rate": 5.6154435584729e-06, "loss": 0.208, "step": 4623 }, { "epoch": 1.5441643012189012, "grad_norm": 0.39120604266589976, "learning_rate": 5.6135149564559665e-06, "loss": 0.1893, "step": 4624 }, { "epoch": 1.544498246785774, "grad_norm": 0.4347411574630664, "learning_rate": 5.611586261760375e-06, "loss": 0.181, "step": 4625 }, { "epoch": 1.5448321923526467, "grad_norm": 0.43018401156890745, "learning_rate": 5.609657474677478e-06, "loss": 0.1994, "step": 4626 }, { "epoch": 1.545166137919519, "grad_norm": 0.4820806633908888, "learning_rate": 5.607728595498641e-06, "loss": 0.2073, "step": 4627 }, { "epoch": 1.5455000834863917, "grad_norm": 0.40368430637167996, "learning_rate": 5.6057996245152435e-06, "loss": 0.1919, "step": 4628 }, { "epoch": 1.5458340290532644, "grad_norm": 0.406341895380249, "learning_rate": 5.603870562018679e-06, "loss": 0.1957, "step": 4629 }, { "epoch": 1.5461679746201369, "grad_norm": 0.4120838046068009, "learning_rate": 5.601941408300358e-06, "loss": 0.2073, "step": 4630 }, { "epoch": 1.5465019201870094, "grad_norm": 0.40592326676295587, "learning_rate": 5.600012163651698e-06, "loss": 0.1984, "step": 4631 }, { "epoch": 1.546835865753882, "grad_norm": 0.4363803396643685, "learning_rate": 5.598082828364134e-06, "loss": 0.1972, "step": 4632 }, { "epoch": 1.5471698113207548, "grad_norm": 0.43058371265331713, "learning_rate": 5.596153402729118e-06, "loss": 0.196, "step": 4633 }, { "epoch": 1.5475037568876273, "grad_norm": 0.39059793054857367, "learning_rate": 5.594223887038113e-06, "loss": 0.181, "step": 4634 }, { "epoch": 1.5478377024544998, "grad_norm": 0.4261233357009109, "learning_rate": 5.592294281582591e-06, "loss": 0.1893, "step": 4635 }, { "epoch": 1.5481716480213725, "grad_norm": 0.41289802429846045, "learning_rate": 5.590364586654043e-06, "loss": 0.2031, "step": 4636 }, { "epoch": 1.5485055935882452, "grad_norm": 0.45183145302663547, "learning_rate": 5.588434802543975e-06, "loss": 0.1886, "step": 4637 }, { "epoch": 1.5488395391551177, "grad_norm": 0.4250974981387959, "learning_rate": 5.5865049295439e-06, "loss": 0.1951, "step": 4638 }, { "epoch": 1.5491734847219902, "grad_norm": 0.43975870182322807, "learning_rate": 5.584574967945351e-06, "loss": 0.2104, "step": 4639 }, { "epoch": 1.549507430288863, "grad_norm": 0.44559355295469355, "learning_rate": 5.582644918039869e-06, "loss": 0.2092, "step": 4640 }, { "epoch": 1.5498413758557357, "grad_norm": 0.41539145621347284, "learning_rate": 5.580714780119011e-06, "loss": 0.2034, "step": 4641 }, { "epoch": 1.5501753214226082, "grad_norm": 0.40591271120703043, "learning_rate": 5.578784554474348e-06, "loss": 0.188, "step": 4642 }, { "epoch": 1.5505092669894807, "grad_norm": 0.6532970191187181, "learning_rate": 5.5768542413974645e-06, "loss": 0.2037, "step": 4643 }, { "epoch": 1.5508432125563534, "grad_norm": 0.4315096314647443, "learning_rate": 5.574923841179953e-06, "loss": 0.2007, "step": 4644 }, { "epoch": 1.5511771581232259, "grad_norm": 0.38269782258347723, "learning_rate": 5.572993354113429e-06, "loss": 0.183, "step": 4645 }, { "epoch": 1.5515111036900984, "grad_norm": 0.5076233296132883, "learning_rate": 5.5710627804895105e-06, "loss": 0.214, "step": 4646 }, { "epoch": 1.551845049256971, "grad_norm": 0.41321256949804475, "learning_rate": 5.569132120599834e-06, "loss": 0.1927, "step": 4647 }, { "epoch": 1.5521789948238438, "grad_norm": 0.39911707248154793, "learning_rate": 5.567201374736051e-06, "loss": 0.1849, "step": 4648 }, { "epoch": 1.5525129403907163, "grad_norm": 0.44520911248754574, "learning_rate": 5.565270543189821e-06, "loss": 0.2107, "step": 4649 }, { "epoch": 1.5528468859575888, "grad_norm": 0.3773856938490637, "learning_rate": 5.563339626252819e-06, "loss": 0.1834, "step": 4650 }, { "epoch": 1.5531808315244615, "grad_norm": 0.39836943833180216, "learning_rate": 5.561408624216734e-06, "loss": 0.1834, "step": 4651 }, { "epoch": 1.5535147770913342, "grad_norm": 0.39621414053364384, "learning_rate": 5.559477537373267e-06, "loss": 0.1918, "step": 4652 }, { "epoch": 1.5538487226582067, "grad_norm": 0.4435868338139983, "learning_rate": 5.557546366014129e-06, "loss": 0.1853, "step": 4653 }, { "epoch": 1.5541826682250792, "grad_norm": 0.3993984001133118, "learning_rate": 5.555615110431049e-06, "loss": 0.1993, "step": 4654 }, { "epoch": 1.554516613791952, "grad_norm": 0.41979545755044656, "learning_rate": 5.553683770915763e-06, "loss": 0.2005, "step": 4655 }, { "epoch": 1.5548505593588247, "grad_norm": 0.4128510095489835, "learning_rate": 5.551752347760023e-06, "loss": 0.1937, "step": 4656 }, { "epoch": 1.5551845049256972, "grad_norm": 0.41228406880777296, "learning_rate": 5.549820841255597e-06, "loss": 0.1934, "step": 4657 }, { "epoch": 1.5555184504925696, "grad_norm": 0.40502569693476115, "learning_rate": 5.547889251694257e-06, "loss": 0.1993, "step": 4658 }, { "epoch": 1.5558523960594424, "grad_norm": 0.45309964934422253, "learning_rate": 5.545957579367795e-06, "loss": 0.2157, "step": 4659 }, { "epoch": 1.5561863416263149, "grad_norm": 0.4075848451781985, "learning_rate": 5.544025824568011e-06, "loss": 0.1882, "step": 4660 }, { "epoch": 1.5565202871931874, "grad_norm": 0.41425823932525097, "learning_rate": 5.542093987586722e-06, "loss": 0.1992, "step": 4661 }, { "epoch": 1.55685423276006, "grad_norm": 0.40778580005587917, "learning_rate": 5.540162068715752e-06, "loss": 0.1909, "step": 4662 }, { "epoch": 1.5571881783269328, "grad_norm": 0.40536508487131023, "learning_rate": 5.538230068246942e-06, "loss": 0.2013, "step": 4663 }, { "epoch": 1.5575221238938053, "grad_norm": 0.45353755000496815, "learning_rate": 5.536297986472142e-06, "loss": 0.1986, "step": 4664 }, { "epoch": 1.5578560694606778, "grad_norm": 0.38378580944056184, "learning_rate": 5.534365823683219e-06, "loss": 0.1869, "step": 4665 }, { "epoch": 1.5581900150275505, "grad_norm": 0.5116844500041666, "learning_rate": 5.532433580172044e-06, "loss": 0.1968, "step": 4666 }, { "epoch": 1.5585239605944232, "grad_norm": 0.40984005892330067, "learning_rate": 5.5305012562305075e-06, "loss": 0.1805, "step": 4667 }, { "epoch": 1.5588579061612957, "grad_norm": 0.4695150357003394, "learning_rate": 5.528568852150511e-06, "loss": 0.2131, "step": 4668 }, { "epoch": 1.5591918517281682, "grad_norm": 0.454406568295344, "learning_rate": 5.526636368223965e-06, "loss": 0.2069, "step": 4669 }, { "epoch": 1.559525797295041, "grad_norm": 0.41198712780712915, "learning_rate": 5.524703804742793e-06, "loss": 0.2079, "step": 4670 }, { "epoch": 1.5598597428619136, "grad_norm": 0.3930997269695332, "learning_rate": 5.522771161998936e-06, "loss": 0.2006, "step": 4671 }, { "epoch": 1.5601936884287861, "grad_norm": 0.4132056747503601, "learning_rate": 5.52083844028434e-06, "loss": 0.1947, "step": 4672 }, { "epoch": 1.5605276339956586, "grad_norm": 0.44323946531045155, "learning_rate": 5.518905639890961e-06, "loss": 0.2009, "step": 4673 }, { "epoch": 1.5608615795625314, "grad_norm": 0.43128682055477907, "learning_rate": 5.516972761110778e-06, "loss": 0.2031, "step": 4674 }, { "epoch": 1.561195525129404, "grad_norm": 0.4152986643855445, "learning_rate": 5.515039804235772e-06, "loss": 0.1924, "step": 4675 }, { "epoch": 1.5615294706962763, "grad_norm": 0.43452047598785126, "learning_rate": 5.51310676955794e-06, "loss": 0.1928, "step": 4676 }, { "epoch": 1.561863416263149, "grad_norm": 0.400631942313824, "learning_rate": 5.511173657369287e-06, "loss": 0.1914, "step": 4677 }, { "epoch": 1.5621973618300218, "grad_norm": 0.45756926721328306, "learning_rate": 5.509240467961835e-06, "loss": 0.1983, "step": 4678 }, { "epoch": 1.5625313073968943, "grad_norm": 0.37815230836023156, "learning_rate": 5.507307201627614e-06, "loss": 0.1795, "step": 4679 }, { "epoch": 1.5628652529637668, "grad_norm": 0.42395660449614353, "learning_rate": 5.505373858658668e-06, "loss": 0.2012, "step": 4680 }, { "epoch": 1.5631991985306395, "grad_norm": 0.38723972997180905, "learning_rate": 5.503440439347048e-06, "loss": 0.186, "step": 4681 }, { "epoch": 1.5635331440975122, "grad_norm": 0.3784601820153379, "learning_rate": 5.501506943984823e-06, "loss": 0.183, "step": 4682 }, { "epoch": 1.5638670896643847, "grad_norm": 0.4147470025596834, "learning_rate": 5.4995733728640695e-06, "loss": 0.1917, "step": 4683 }, { "epoch": 1.5642010352312572, "grad_norm": 0.43067484554889957, "learning_rate": 5.497639726276876e-06, "loss": 0.2089, "step": 4684 }, { "epoch": 1.56453498079813, "grad_norm": 0.4656315754036739, "learning_rate": 5.49570600451534e-06, "loss": 0.2105, "step": 4685 }, { "epoch": 1.5648689263650026, "grad_norm": 0.4202462754676753, "learning_rate": 5.493772207871577e-06, "loss": 0.2008, "step": 4686 }, { "epoch": 1.5652028719318751, "grad_norm": 0.45653138426163725, "learning_rate": 5.491838336637708e-06, "loss": 0.2045, "step": 4687 }, { "epoch": 1.5655368174987476, "grad_norm": 0.41926970695465754, "learning_rate": 5.4899043911058665e-06, "loss": 0.202, "step": 4688 }, { "epoch": 1.5658707630656203, "grad_norm": 0.41411270997069943, "learning_rate": 5.487970371568199e-06, "loss": 0.1866, "step": 4689 }, { "epoch": 1.566204708632493, "grad_norm": 0.38632209581344107, "learning_rate": 5.486036278316861e-06, "loss": 0.1826, "step": 4690 }, { "epoch": 1.5665386541993656, "grad_norm": 0.3689564655199529, "learning_rate": 5.48410211164402e-06, "loss": 0.1834, "step": 4691 }, { "epoch": 1.566872599766238, "grad_norm": 0.4331808630136572, "learning_rate": 5.482167871841855e-06, "loss": 0.2066, "step": 4692 }, { "epoch": 1.5672065453331108, "grad_norm": 0.41532024396149275, "learning_rate": 5.480233559202556e-06, "loss": 0.2061, "step": 4693 }, { "epoch": 1.5675404908999833, "grad_norm": 0.41478007296781094, "learning_rate": 5.4782991740183225e-06, "loss": 0.2085, "step": 4694 }, { "epoch": 1.5678744364668558, "grad_norm": 0.39634309936244644, "learning_rate": 5.476364716581367e-06, "loss": 0.1963, "step": 4695 }, { "epoch": 1.5682083820337285, "grad_norm": 0.38011643978852666, "learning_rate": 5.474430187183912e-06, "loss": 0.1885, "step": 4696 }, { "epoch": 1.5685423276006012, "grad_norm": 0.4050712387842652, "learning_rate": 5.472495586118192e-06, "loss": 0.2043, "step": 4697 }, { "epoch": 1.5688762731674737, "grad_norm": 0.398212340908208, "learning_rate": 5.47056091367645e-06, "loss": 0.1949, "step": 4698 }, { "epoch": 1.5692102187343462, "grad_norm": 0.43479513365376116, "learning_rate": 5.468626170150942e-06, "loss": 0.2019, "step": 4699 }, { "epoch": 1.569544164301219, "grad_norm": 0.4110382738577834, "learning_rate": 5.466691355833932e-06, "loss": 0.189, "step": 4700 }, { "epoch": 1.5698781098680916, "grad_norm": 0.4152423718197278, "learning_rate": 5.464756471017696e-06, "loss": 0.2, "step": 4701 }, { "epoch": 1.5702120554349641, "grad_norm": 0.37504987375871496, "learning_rate": 5.462821515994525e-06, "loss": 0.1847, "step": 4702 }, { "epoch": 1.5705460010018366, "grad_norm": 0.4007750379597849, "learning_rate": 5.460886491056714e-06, "loss": 0.1973, "step": 4703 }, { "epoch": 1.5708799465687093, "grad_norm": 0.4162908356303437, "learning_rate": 5.458951396496572e-06, "loss": 0.2064, "step": 4704 }, { "epoch": 1.571213892135582, "grad_norm": 0.3842156235774017, "learning_rate": 5.457016232606417e-06, "loss": 0.1938, "step": 4705 }, { "epoch": 1.5715478377024545, "grad_norm": 0.3956066511286018, "learning_rate": 5.455080999678579e-06, "loss": 0.1965, "step": 4706 }, { "epoch": 1.571881783269327, "grad_norm": 0.42278335408976025, "learning_rate": 5.453145698005399e-06, "loss": 0.203, "step": 4707 }, { "epoch": 1.5722157288361998, "grad_norm": 0.3980754778029337, "learning_rate": 5.451210327879223e-06, "loss": 0.1885, "step": 4708 }, { "epoch": 1.5725496744030723, "grad_norm": 0.41626036909895414, "learning_rate": 5.449274889592416e-06, "loss": 0.1933, "step": 4709 }, { "epoch": 1.5728836199699447, "grad_norm": 0.45739490381726283, "learning_rate": 5.4473393834373466e-06, "loss": 0.2051, "step": 4710 }, { "epoch": 1.5732175655368175, "grad_norm": 0.40969808362672155, "learning_rate": 5.445403809706395e-06, "loss": 0.1942, "step": 4711 }, { "epoch": 1.5735515111036902, "grad_norm": 0.38965421516201565, "learning_rate": 5.443468168691954e-06, "loss": 0.1872, "step": 4712 }, { "epoch": 1.5738854566705627, "grad_norm": 0.3802144601432017, "learning_rate": 5.441532460686426e-06, "loss": 0.1936, "step": 4713 }, { "epoch": 1.5742194022374352, "grad_norm": 0.4282184037870892, "learning_rate": 5.4395966859822195e-06, "loss": 0.2006, "step": 4714 }, { "epoch": 1.574553347804308, "grad_norm": 0.41529391109227304, "learning_rate": 5.437660844871758e-06, "loss": 0.2018, "step": 4715 }, { "epoch": 1.5748872933711806, "grad_norm": 0.43028826862778147, "learning_rate": 5.435724937647473e-06, "loss": 0.2069, "step": 4716 }, { "epoch": 1.575221238938053, "grad_norm": 0.42632722327309375, "learning_rate": 5.433788964601804e-06, "loss": 0.2035, "step": 4717 }, { "epoch": 1.5755551845049256, "grad_norm": 0.42219790114922706, "learning_rate": 5.431852926027206e-06, "loss": 0.1933, "step": 4718 }, { "epoch": 1.5758891300717983, "grad_norm": 0.41389183867680046, "learning_rate": 5.429916822216138e-06, "loss": 0.1997, "step": 4719 }, { "epoch": 1.576223075638671, "grad_norm": 0.42005326382754093, "learning_rate": 5.42798065346107e-06, "loss": 0.1881, "step": 4720 }, { "epoch": 1.5765570212055435, "grad_norm": 0.39753444162905593, "learning_rate": 5.426044420054488e-06, "loss": 0.1929, "step": 4721 }, { "epoch": 1.576890966772416, "grad_norm": 0.41691063344852947, "learning_rate": 5.424108122288878e-06, "loss": 0.2058, "step": 4722 }, { "epoch": 1.5772249123392887, "grad_norm": 0.4523145688376783, "learning_rate": 5.4221717604567435e-06, "loss": 0.2156, "step": 4723 }, { "epoch": 1.5775588579061615, "grad_norm": 0.39679608858354004, "learning_rate": 5.420235334850593e-06, "loss": 0.1923, "step": 4724 }, { "epoch": 1.5778928034730337, "grad_norm": 0.3958825165224562, "learning_rate": 5.418298845762947e-06, "loss": 0.1885, "step": 4725 }, { "epoch": 1.5782267490399065, "grad_norm": 0.42180340518034565, "learning_rate": 5.416362293486336e-06, "loss": 0.1985, "step": 4726 }, { "epoch": 1.5785606946067792, "grad_norm": 0.43679318826263547, "learning_rate": 5.4144256783132975e-06, "loss": 0.2089, "step": 4727 }, { "epoch": 1.5788946401736517, "grad_norm": 0.38015308316695867, "learning_rate": 5.41248900053638e-06, "loss": 0.1811, "step": 4728 }, { "epoch": 1.5792285857405242, "grad_norm": 0.41432535867088516, "learning_rate": 5.4105522604481435e-06, "loss": 0.1965, "step": 4729 }, { "epoch": 1.5795625313073969, "grad_norm": 0.36628677647353347, "learning_rate": 5.408615458341152e-06, "loss": 0.1796, "step": 4730 }, { "epoch": 1.5798964768742696, "grad_norm": 0.3869654366325382, "learning_rate": 5.4066785945079855e-06, "loss": 0.1973, "step": 4731 }, { "epoch": 1.580230422441142, "grad_norm": 0.3878145330599434, "learning_rate": 5.404741669241228e-06, "loss": 0.1883, "step": 4732 }, { "epoch": 1.5805643680080146, "grad_norm": 0.4021115624829742, "learning_rate": 5.402804682833477e-06, "loss": 0.193, "step": 4733 }, { "epoch": 1.5808983135748873, "grad_norm": 0.49296268950806166, "learning_rate": 5.400867635577335e-06, "loss": 0.1977, "step": 4734 }, { "epoch": 1.58123225914176, "grad_norm": 0.4345599581839175, "learning_rate": 5.398930527765416e-06, "loss": 0.1982, "step": 4735 }, { "epoch": 1.5815662047086325, "grad_norm": 0.413964619926734, "learning_rate": 5.396993359690345e-06, "loss": 0.2034, "step": 4736 }, { "epoch": 1.581900150275505, "grad_norm": 0.43552047249198406, "learning_rate": 5.395056131644752e-06, "loss": 0.1958, "step": 4737 }, { "epoch": 1.5822340958423777, "grad_norm": 0.4109376138019141, "learning_rate": 5.393118843921277e-06, "loss": 0.1922, "step": 4738 }, { "epoch": 1.5825680414092504, "grad_norm": 0.5086676990134947, "learning_rate": 5.391181496812573e-06, "loss": 0.1883, "step": 4739 }, { "epoch": 1.582901986976123, "grad_norm": 0.39573926156137973, "learning_rate": 5.389244090611298e-06, "loss": 0.1994, "step": 4740 }, { "epoch": 1.5832359325429954, "grad_norm": 0.4607457839079693, "learning_rate": 5.38730662561012e-06, "loss": 0.205, "step": 4741 }, { "epoch": 1.5835698781098682, "grad_norm": 0.40835357222366736, "learning_rate": 5.385369102101716e-06, "loss": 0.1872, "step": 4742 }, { "epoch": 1.5839038236767407, "grad_norm": 0.48689078188687296, "learning_rate": 5.38343152037877e-06, "loss": 0.1961, "step": 4743 }, { "epoch": 1.5842377692436131, "grad_norm": 0.4066030628039563, "learning_rate": 5.38149388073398e-06, "loss": 0.1945, "step": 4744 }, { "epoch": 1.5845717148104859, "grad_norm": 0.366013738429132, "learning_rate": 5.379556183460047e-06, "loss": 0.1856, "step": 4745 }, { "epoch": 1.5849056603773586, "grad_norm": 0.43200895906800973, "learning_rate": 5.377618428849683e-06, "loss": 0.2068, "step": 4746 }, { "epoch": 1.585239605944231, "grad_norm": 0.49242292623459655, "learning_rate": 5.375680617195609e-06, "loss": 0.2096, "step": 4747 }, { "epoch": 1.5855735515111036, "grad_norm": 0.4132744338380143, "learning_rate": 5.373742748790555e-06, "loss": 0.1864, "step": 4748 }, { "epoch": 1.5859074970779763, "grad_norm": 0.39706760556922854, "learning_rate": 5.371804823927258e-06, "loss": 0.1926, "step": 4749 }, { "epoch": 1.586241442644849, "grad_norm": 0.4238854670573685, "learning_rate": 5.369866842898465e-06, "loss": 0.2077, "step": 4750 }, { "epoch": 1.5865753882117215, "grad_norm": 0.42640493614431735, "learning_rate": 5.367928805996929e-06, "loss": 0.1972, "step": 4751 }, { "epoch": 1.586909333778594, "grad_norm": 0.37013179888742664, "learning_rate": 5.365990713515414e-06, "loss": 0.1729, "step": 4752 }, { "epoch": 1.5872432793454667, "grad_norm": 0.45907590421110495, "learning_rate": 5.364052565746693e-06, "loss": 0.2073, "step": 4753 }, { "epoch": 1.5875772249123394, "grad_norm": 0.43304177739828936, "learning_rate": 5.362114362983547e-06, "loss": 0.2037, "step": 4754 }, { "epoch": 1.587911170479212, "grad_norm": 0.43747626863293665, "learning_rate": 5.360176105518761e-06, "loss": 0.1922, "step": 4755 }, { "epoch": 1.5882451160460844, "grad_norm": 0.5704783919256339, "learning_rate": 5.358237793645133e-06, "loss": 0.191, "step": 4756 }, { "epoch": 1.5885790616129571, "grad_norm": 0.41877733795764044, "learning_rate": 5.356299427655469e-06, "loss": 0.1945, "step": 4757 }, { "epoch": 1.5889130071798296, "grad_norm": 0.4566114150645618, "learning_rate": 5.354361007842581e-06, "loss": 0.1982, "step": 4758 }, { "epoch": 1.5892469527467021, "grad_norm": 0.38852790236519913, "learning_rate": 5.352422534499291e-06, "loss": 0.1853, "step": 4759 }, { "epoch": 1.5895808983135749, "grad_norm": 0.40708531373941814, "learning_rate": 5.350484007918428e-06, "loss": 0.2064, "step": 4760 }, { "epoch": 1.5899148438804476, "grad_norm": 0.4253185652044943, "learning_rate": 5.3485454283928265e-06, "loss": 0.192, "step": 4761 }, { "epoch": 1.59024878944732, "grad_norm": 0.41539393746379444, "learning_rate": 5.346606796215335e-06, "loss": 0.1835, "step": 4762 }, { "epoch": 1.5905827350141926, "grad_norm": 0.41820448778568065, "learning_rate": 5.344668111678805e-06, "loss": 0.2028, "step": 4763 }, { "epoch": 1.5909166805810653, "grad_norm": 0.41281987451744845, "learning_rate": 5.3427293750761e-06, "loss": 0.2046, "step": 4764 }, { "epoch": 1.591250626147938, "grad_norm": 0.49736587210070504, "learning_rate": 5.340790586700086e-06, "loss": 0.2222, "step": 4765 }, { "epoch": 1.5915845717148105, "grad_norm": 0.46561471285392064, "learning_rate": 5.338851746843643e-06, "loss": 0.2092, "step": 4766 }, { "epoch": 1.591918517281683, "grad_norm": 0.4281307653749809, "learning_rate": 5.336912855799652e-06, "loss": 0.2, "step": 4767 }, { "epoch": 1.5922524628485557, "grad_norm": 0.39860709481846124, "learning_rate": 5.334973913861008e-06, "loss": 0.1945, "step": 4768 }, { "epoch": 1.5925864084154284, "grad_norm": 0.4217558786649774, "learning_rate": 5.33303492132061e-06, "loss": 0.196, "step": 4769 }, { "epoch": 1.592920353982301, "grad_norm": 0.4309860097814307, "learning_rate": 5.3310958784713655e-06, "loss": 0.1979, "step": 4770 }, { "epoch": 1.5932542995491734, "grad_norm": 0.42902743725005, "learning_rate": 5.329156785606191e-06, "loss": 0.2034, "step": 4771 }, { "epoch": 1.5935882451160461, "grad_norm": 0.38985978116145, "learning_rate": 5.327217643018008e-06, "loss": 0.2006, "step": 4772 }, { "epoch": 1.5939221906829188, "grad_norm": 0.4347966967493165, "learning_rate": 5.325278450999747e-06, "loss": 0.2114, "step": 4773 }, { "epoch": 1.5942561362497911, "grad_norm": 0.39474589860208664, "learning_rate": 5.323339209844346e-06, "loss": 0.1921, "step": 4774 }, { "epoch": 1.5945900818166638, "grad_norm": 0.38256053396845374, "learning_rate": 5.32139991984475e-06, "loss": 0.1939, "step": 4775 }, { "epoch": 1.5949240273835366, "grad_norm": 0.3729647811843787, "learning_rate": 5.319460581293911e-06, "loss": 0.1874, "step": 4776 }, { "epoch": 1.595257972950409, "grad_norm": 0.45375740690251337, "learning_rate": 5.317521194484791e-06, "loss": 0.2095, "step": 4777 }, { "epoch": 1.5955919185172815, "grad_norm": 0.3939370561880113, "learning_rate": 5.315581759710356e-06, "loss": 0.1933, "step": 4778 }, { "epoch": 1.5959258640841543, "grad_norm": 0.41746052414225593, "learning_rate": 5.313642277263577e-06, "loss": 0.189, "step": 4779 }, { "epoch": 1.596259809651027, "grad_norm": 0.4516550553035685, "learning_rate": 5.311702747437443e-06, "loss": 0.1925, "step": 4780 }, { "epoch": 1.5965937552178995, "grad_norm": 0.41772684280109756, "learning_rate": 5.309763170524937e-06, "loss": 0.192, "step": 4781 }, { "epoch": 1.596927700784772, "grad_norm": 0.41226393435164765, "learning_rate": 5.307823546819056e-06, "loss": 0.1922, "step": 4782 }, { "epoch": 1.5972616463516447, "grad_norm": 0.39301899832094617, "learning_rate": 5.305883876612805e-06, "loss": 0.1864, "step": 4783 }, { "epoch": 1.5975955919185174, "grad_norm": 0.4344890610361969, "learning_rate": 5.303944160199193e-06, "loss": 0.1933, "step": 4784 }, { "epoch": 1.59792953748539, "grad_norm": 0.4204667147166973, "learning_rate": 5.302004397871237e-06, "loss": 0.2018, "step": 4785 }, { "epoch": 1.5982634830522624, "grad_norm": 0.4025801727517493, "learning_rate": 5.3000645899219594e-06, "loss": 0.1944, "step": 4786 }, { "epoch": 1.5985974286191351, "grad_norm": 0.4300003951448309, "learning_rate": 5.298124736644392e-06, "loss": 0.1997, "step": 4787 }, { "epoch": 1.5989313741860078, "grad_norm": 0.4322841928213523, "learning_rate": 5.296184838331575e-06, "loss": 0.1934, "step": 4788 }, { "epoch": 1.5992653197528803, "grad_norm": 0.4268719199609731, "learning_rate": 5.2942448952765495e-06, "loss": 0.1878, "step": 4789 }, { "epoch": 1.5995992653197528, "grad_norm": 0.4096006704992976, "learning_rate": 5.292304907772367e-06, "loss": 0.1984, "step": 4790 }, { "epoch": 1.5999332108866255, "grad_norm": 0.44146154991878817, "learning_rate": 5.290364876112088e-06, "loss": 0.2028, "step": 4791 }, { "epoch": 1.600267156453498, "grad_norm": 0.4127132107934224, "learning_rate": 5.288424800588775e-06, "loss": 0.1985, "step": 4792 }, { "epoch": 1.6006011020203705, "grad_norm": 0.4089302263887995, "learning_rate": 5.2864846814955e-06, "loss": 0.201, "step": 4793 }, { "epoch": 1.6009350475872433, "grad_norm": 0.4057658128571867, "learning_rate": 5.28454451912534e-06, "loss": 0.1919, "step": 4794 }, { "epoch": 1.601268993154116, "grad_norm": 0.41550864844996815, "learning_rate": 5.28260431377138e-06, "loss": 0.193, "step": 4795 }, { "epoch": 1.6016029387209885, "grad_norm": 0.4214643164993601, "learning_rate": 5.280664065726712e-06, "loss": 0.2037, "step": 4796 }, { "epoch": 1.601936884287861, "grad_norm": 0.41574763678512067, "learning_rate": 5.278723775284432e-06, "loss": 0.1955, "step": 4797 }, { "epoch": 1.6022708298547337, "grad_norm": 0.4156915810039953, "learning_rate": 5.276783442737642e-06, "loss": 0.1951, "step": 4798 }, { "epoch": 1.6026047754216064, "grad_norm": 0.4504952363847089, "learning_rate": 5.274843068379456e-06, "loss": 0.2121, "step": 4799 }, { "epoch": 1.602938720988479, "grad_norm": 0.4141410647043094, "learning_rate": 5.272902652502988e-06, "loss": 0.1908, "step": 4800 }, { "epoch": 1.6032726665553514, "grad_norm": 0.4172694674527439, "learning_rate": 5.27096219540136e-06, "loss": 0.2045, "step": 4801 }, { "epoch": 1.603606612122224, "grad_norm": 0.39101651996056197, "learning_rate": 5.269021697367702e-06, "loss": 0.1979, "step": 4802 }, { "epoch": 1.6039405576890968, "grad_norm": 0.4008081930642851, "learning_rate": 5.26708115869515e-06, "loss": 0.1828, "step": 4803 }, { "epoch": 1.6042745032559693, "grad_norm": 0.40199523795553577, "learning_rate": 5.265140579676844e-06, "loss": 0.1946, "step": 4804 }, { "epoch": 1.6046084488228418, "grad_norm": 0.36475147506022104, "learning_rate": 5.263199960605931e-06, "loss": 0.186, "step": 4805 }, { "epoch": 1.6049423943897145, "grad_norm": 0.44832636055257613, "learning_rate": 5.261259301775564e-06, "loss": 0.2019, "step": 4806 }, { "epoch": 1.605276339956587, "grad_norm": 0.3988676531453092, "learning_rate": 5.259318603478904e-06, "loss": 0.1845, "step": 4807 }, { "epoch": 1.6056102855234595, "grad_norm": 0.4394954645416581, "learning_rate": 5.2573778660091156e-06, "loss": 0.2012, "step": 4808 }, { "epoch": 1.6059442310903322, "grad_norm": 0.4090850456456306, "learning_rate": 5.255437089659371e-06, "loss": 0.1831, "step": 4809 }, { "epoch": 1.606278176657205, "grad_norm": 0.4802815380853867, "learning_rate": 5.253496274722846e-06, "loss": 0.1978, "step": 4810 }, { "epoch": 1.6066121222240775, "grad_norm": 0.38421296402882255, "learning_rate": 5.251555421492722e-06, "loss": 0.1815, "step": 4811 }, { "epoch": 1.60694606779095, "grad_norm": 0.40180199874469, "learning_rate": 5.249614530262191e-06, "loss": 0.1883, "step": 4812 }, { "epoch": 1.6072800133578227, "grad_norm": 0.3883383467412946, "learning_rate": 5.2476736013244475e-06, "loss": 0.1819, "step": 4813 }, { "epoch": 1.6076139589246954, "grad_norm": 0.4086793201262821, "learning_rate": 5.245732634972688e-06, "loss": 0.1958, "step": 4814 }, { "epoch": 1.6079479044915679, "grad_norm": 0.4224168613202054, "learning_rate": 5.243791631500122e-06, "loss": 0.1926, "step": 4815 }, { "epoch": 1.6082818500584404, "grad_norm": 0.42613150503363995, "learning_rate": 5.24185059119996e-06, "loss": 0.1975, "step": 4816 }, { "epoch": 1.608615795625313, "grad_norm": 0.4415148575023946, "learning_rate": 5.239909514365415e-06, "loss": 0.2004, "step": 4817 }, { "epoch": 1.6089497411921858, "grad_norm": 0.40970747948408875, "learning_rate": 5.237968401289717e-06, "loss": 0.1971, "step": 4818 }, { "epoch": 1.6092836867590583, "grad_norm": 0.4112895680407661, "learning_rate": 5.236027252266088e-06, "loss": 0.1972, "step": 4819 }, { "epoch": 1.6096176323259308, "grad_norm": 0.40428437979576776, "learning_rate": 5.234086067587765e-06, "loss": 0.1942, "step": 4820 }, { "epoch": 1.6099515778928035, "grad_norm": 0.419338867445877, "learning_rate": 5.232144847547983e-06, "loss": 0.1785, "step": 4821 }, { "epoch": 1.6102855234596762, "grad_norm": 0.4699954998213189, "learning_rate": 5.230203592439989e-06, "loss": 0.2027, "step": 4822 }, { "epoch": 1.6106194690265485, "grad_norm": 0.38579142330375554, "learning_rate": 5.228262302557034e-06, "loss": 0.1933, "step": 4823 }, { "epoch": 1.6109534145934212, "grad_norm": 0.44120355162315, "learning_rate": 5.226320978192369e-06, "loss": 0.2033, "step": 4824 }, { "epoch": 1.611287360160294, "grad_norm": 0.40448539512862475, "learning_rate": 5.224379619639253e-06, "loss": 0.1851, "step": 4825 }, { "epoch": 1.6116213057271664, "grad_norm": 0.4454285188019959, "learning_rate": 5.222438227190957e-06, "loss": 0.2021, "step": 4826 }, { "epoch": 1.611955251294039, "grad_norm": 0.414760600019116, "learning_rate": 5.220496801140746e-06, "loss": 0.1996, "step": 4827 }, { "epoch": 1.6122891968609117, "grad_norm": 0.42718840912720824, "learning_rate": 5.218555341781897e-06, "loss": 0.189, "step": 4828 }, { "epoch": 1.6126231424277844, "grad_norm": 0.4267525421289148, "learning_rate": 5.216613849407691e-06, "loss": 0.197, "step": 4829 }, { "epoch": 1.6129570879946569, "grad_norm": 0.4493424424103547, "learning_rate": 5.214672324311412e-06, "loss": 0.1982, "step": 4830 }, { "epoch": 1.6132910335615294, "grad_norm": 0.4308674096556119, "learning_rate": 5.21273076678635e-06, "loss": 0.1943, "step": 4831 }, { "epoch": 1.613624979128402, "grad_norm": 0.3925998201907294, "learning_rate": 5.210789177125802e-06, "loss": 0.1881, "step": 4832 }, { "epoch": 1.6139589246952748, "grad_norm": 0.4254383389522204, "learning_rate": 5.208847555623066e-06, "loss": 0.195, "step": 4833 }, { "epoch": 1.6142928702621473, "grad_norm": 0.3784003115153938, "learning_rate": 5.206905902571447e-06, "loss": 0.1845, "step": 4834 }, { "epoch": 1.6146268158290198, "grad_norm": 0.4259770207037785, "learning_rate": 5.204964218264258e-06, "loss": 0.1833, "step": 4835 }, { "epoch": 1.6149607613958925, "grad_norm": 0.39738746452523543, "learning_rate": 5.203022502994808e-06, "loss": 0.1976, "step": 4836 }, { "epoch": 1.6152947069627652, "grad_norm": 0.39030271717351783, "learning_rate": 5.201080757056418e-06, "loss": 0.1999, "step": 4837 }, { "epoch": 1.6156286525296377, "grad_norm": 0.4263335380461042, "learning_rate": 5.1991389807424145e-06, "loss": 0.2086, "step": 4838 }, { "epoch": 1.6159625980965102, "grad_norm": 0.398745944842516, "learning_rate": 5.1971971743461215e-06, "loss": 0.1932, "step": 4839 }, { "epoch": 1.616296543663383, "grad_norm": 0.42880316783788475, "learning_rate": 5.195255338160873e-06, "loss": 0.2033, "step": 4840 }, { "epoch": 1.6166304892302554, "grad_norm": 0.4503242539328835, "learning_rate": 5.193313472480007e-06, "loss": 0.216, "step": 4841 }, { "epoch": 1.616964434797128, "grad_norm": 0.40831941532509597, "learning_rate": 5.191371577596866e-06, "loss": 0.1872, "step": 4842 }, { "epoch": 1.6172983803640006, "grad_norm": 0.41791124388464684, "learning_rate": 5.189429653804794e-06, "loss": 0.194, "step": 4843 }, { "epoch": 1.6176323259308734, "grad_norm": 0.4292909114375624, "learning_rate": 5.187487701397142e-06, "loss": 0.1999, "step": 4844 }, { "epoch": 1.6179662714977459, "grad_norm": 0.4136962868331666, "learning_rate": 5.185545720667266e-06, "loss": 0.1884, "step": 4845 }, { "epoch": 1.6183002170646184, "grad_norm": 0.39331667168777334, "learning_rate": 5.183603711908523e-06, "loss": 0.1932, "step": 4846 }, { "epoch": 1.618634162631491, "grad_norm": 0.380147314511648, "learning_rate": 5.181661675414278e-06, "loss": 0.1895, "step": 4847 }, { "epoch": 1.6189681081983638, "grad_norm": 0.4066449462044048, "learning_rate": 5.179719611477898e-06, "loss": 0.1937, "step": 4848 }, { "epoch": 1.6193020537652363, "grad_norm": 0.4258446181103049, "learning_rate": 5.1777775203927535e-06, "loss": 0.2068, "step": 4849 }, { "epoch": 1.6196359993321088, "grad_norm": 0.4113383272205944, "learning_rate": 5.175835402452223e-06, "loss": 0.2044, "step": 4850 }, { "epoch": 1.6199699448989815, "grad_norm": 0.413380097915521, "learning_rate": 5.173893257949683e-06, "loss": 0.2014, "step": 4851 }, { "epoch": 1.6203038904658542, "grad_norm": 0.42517246513422285, "learning_rate": 5.17195108717852e-06, "loss": 0.2054, "step": 4852 }, { "epoch": 1.6206378360327267, "grad_norm": 0.41524625901706064, "learning_rate": 5.170008890432121e-06, "loss": 0.1873, "step": 4853 }, { "epoch": 1.6209717815995992, "grad_norm": 0.43808341311644106, "learning_rate": 5.168066668003876e-06, "loss": 0.2091, "step": 4854 }, { "epoch": 1.621305727166472, "grad_norm": 0.4252977935303211, "learning_rate": 5.166124420187182e-06, "loss": 0.2007, "step": 4855 }, { "epoch": 1.6216396727333444, "grad_norm": 0.4319004455143065, "learning_rate": 5.164182147275439e-06, "loss": 0.2144, "step": 4856 }, { "epoch": 1.621973618300217, "grad_norm": 0.40036208771754045, "learning_rate": 5.16223984956205e-06, "loss": 0.1875, "step": 4857 }, { "epoch": 1.6223075638670896, "grad_norm": 0.418519003967928, "learning_rate": 5.1602975273404196e-06, "loss": 0.1976, "step": 4858 }, { "epoch": 1.6226415094339623, "grad_norm": 0.40350253708517614, "learning_rate": 5.158355180903961e-06, "loss": 0.1989, "step": 4859 }, { "epoch": 1.6229754550008348, "grad_norm": 0.41204369381140116, "learning_rate": 5.156412810546089e-06, "loss": 0.1949, "step": 4860 }, { "epoch": 1.6233094005677073, "grad_norm": 0.4382744192834387, "learning_rate": 5.154470416560219e-06, "loss": 0.2089, "step": 4861 }, { "epoch": 1.62364334613458, "grad_norm": 0.40586800209662394, "learning_rate": 5.152527999239774e-06, "loss": 0.1748, "step": 4862 }, { "epoch": 1.6239772917014528, "grad_norm": 0.43817177004856733, "learning_rate": 5.150585558878177e-06, "loss": 0.2037, "step": 4863 }, { "epoch": 1.6243112372683253, "grad_norm": 0.40381422303951253, "learning_rate": 5.148643095768861e-06, "loss": 0.1881, "step": 4864 }, { "epoch": 1.6246451828351978, "grad_norm": 0.4223072439345843, "learning_rate": 5.146700610205254e-06, "loss": 0.2005, "step": 4865 }, { "epoch": 1.6249791284020705, "grad_norm": 0.39611701923208614, "learning_rate": 5.144758102480792e-06, "loss": 0.1915, "step": 4866 }, { "epoch": 1.6253130739689432, "grad_norm": 0.39897896763142227, "learning_rate": 5.142815572888915e-06, "loss": 0.1801, "step": 4867 }, { "epoch": 1.6256470195358157, "grad_norm": 0.3942908284104242, "learning_rate": 5.140873021723065e-06, "loss": 0.1873, "step": 4868 }, { "epoch": 1.6259809651026882, "grad_norm": 0.39322301183965197, "learning_rate": 5.138930449276686e-06, "loss": 0.1992, "step": 4869 }, { "epoch": 1.626314910669561, "grad_norm": 0.4303711635184881, "learning_rate": 5.136987855843226e-06, "loss": 0.1995, "step": 4870 }, { "epoch": 1.6266488562364336, "grad_norm": 0.4345354327414109, "learning_rate": 5.135045241716138e-06, "loss": 0.2136, "step": 4871 }, { "epoch": 1.626982801803306, "grad_norm": 0.539742638729196, "learning_rate": 5.133102607188875e-06, "loss": 0.2132, "step": 4872 }, { "epoch": 1.6273167473701786, "grad_norm": 0.4040192184352221, "learning_rate": 5.131159952554896e-06, "loss": 0.1896, "step": 4873 }, { "epoch": 1.6276506929370513, "grad_norm": 0.4169971044202786, "learning_rate": 5.129217278107663e-06, "loss": 0.194, "step": 4874 }, { "epoch": 1.6279846385039238, "grad_norm": 0.40503849753645316, "learning_rate": 5.127274584140636e-06, "loss": 0.1903, "step": 4875 }, { "epoch": 1.6283185840707963, "grad_norm": 0.4237099685305266, "learning_rate": 5.125331870947287e-06, "loss": 0.2008, "step": 4876 }, { "epoch": 1.628652529637669, "grad_norm": 0.4142662739561068, "learning_rate": 5.123389138821084e-06, "loss": 0.1995, "step": 4877 }, { "epoch": 1.6289864752045418, "grad_norm": 0.45354343375917766, "learning_rate": 5.121446388055497e-06, "loss": 0.21, "step": 4878 }, { "epoch": 1.6293204207714143, "grad_norm": 0.37425067796417305, "learning_rate": 5.119503618944004e-06, "loss": 0.1834, "step": 4879 }, { "epoch": 1.6296543663382868, "grad_norm": 0.3788887242903905, "learning_rate": 5.117560831780082e-06, "loss": 0.1904, "step": 4880 }, { "epoch": 1.6299883119051595, "grad_norm": 0.3898545052130911, "learning_rate": 5.115618026857211e-06, "loss": 0.1888, "step": 4881 }, { "epoch": 1.6303222574720322, "grad_norm": 0.4105364824350985, "learning_rate": 5.113675204468876e-06, "loss": 0.1911, "step": 4882 }, { "epoch": 1.6306562030389047, "grad_norm": 0.3962203176947265, "learning_rate": 5.111732364908564e-06, "loss": 0.1764, "step": 4883 }, { "epoch": 1.6309901486057772, "grad_norm": 0.3507120420190793, "learning_rate": 5.109789508469761e-06, "loss": 0.1671, "step": 4884 }, { "epoch": 1.63132409417265, "grad_norm": 0.3949491699443807, "learning_rate": 5.107846635445962e-06, "loss": 0.1958, "step": 4885 }, { "epoch": 1.6316580397395226, "grad_norm": 0.44189276954645507, "learning_rate": 5.1059037461306586e-06, "loss": 0.2031, "step": 4886 }, { "epoch": 1.631991985306395, "grad_norm": 0.42594578365840874, "learning_rate": 5.103960840817346e-06, "loss": 0.2029, "step": 4887 }, { "epoch": 1.6323259308732676, "grad_norm": 0.4073041041534602, "learning_rate": 5.1020179197995245e-06, "loss": 0.1948, "step": 4888 }, { "epoch": 1.6326598764401403, "grad_norm": 0.4197429915528991, "learning_rate": 5.1000749833706964e-06, "loss": 0.179, "step": 4889 }, { "epoch": 1.6329938220070128, "grad_norm": 0.4231299956266433, "learning_rate": 5.098132031824362e-06, "loss": 0.2036, "step": 4890 }, { "epoch": 1.6333277675738853, "grad_norm": 0.43474360913815285, "learning_rate": 5.096189065454029e-06, "loss": 0.2072, "step": 4891 }, { "epoch": 1.633661713140758, "grad_norm": 0.42739722495251686, "learning_rate": 5.094246084553206e-06, "loss": 0.1998, "step": 4892 }, { "epoch": 1.6339956587076307, "grad_norm": 0.4068442630703969, "learning_rate": 5.092303089415403e-06, "loss": 0.1965, "step": 4893 }, { "epoch": 1.6343296042745032, "grad_norm": 0.42075102506762035, "learning_rate": 5.09036008033413e-06, "loss": 0.1958, "step": 4894 }, { "epoch": 1.6346635498413757, "grad_norm": 0.4751211507782048, "learning_rate": 5.0884170576029034e-06, "loss": 0.2173, "step": 4895 }, { "epoch": 1.6349974954082485, "grad_norm": 0.4414325188339375, "learning_rate": 5.086474021515238e-06, "loss": 0.2036, "step": 4896 }, { "epoch": 1.6353314409751212, "grad_norm": 0.43981042916979307, "learning_rate": 5.084530972364656e-06, "loss": 0.1957, "step": 4897 }, { "epoch": 1.6356653865419937, "grad_norm": 0.4447308583849977, "learning_rate": 5.082587910444674e-06, "loss": 0.1966, "step": 4898 }, { "epoch": 1.6359993321088662, "grad_norm": 0.44669721483940267, "learning_rate": 5.080644836048815e-06, "loss": 0.2017, "step": 4899 }, { "epoch": 1.6363332776757389, "grad_norm": 0.37911277492603973, "learning_rate": 5.0787017494706035e-06, "loss": 0.1885, "step": 4900 }, { "epoch": 1.6366672232426116, "grad_norm": 0.42209764612725126, "learning_rate": 5.076758651003567e-06, "loss": 0.1904, "step": 4901 }, { "epoch": 1.637001168809484, "grad_norm": 0.41618819886027797, "learning_rate": 5.0748155409412325e-06, "loss": 0.1971, "step": 4902 }, { "epoch": 1.6373351143763566, "grad_norm": 0.37968963437992637, "learning_rate": 5.0728724195771295e-06, "loss": 0.1933, "step": 4903 }, { "epoch": 1.6376690599432293, "grad_norm": 0.49016772674623965, "learning_rate": 5.070929287204789e-06, "loss": 0.2005, "step": 4904 }, { "epoch": 1.6380030055101018, "grad_norm": 0.4179568582831211, "learning_rate": 5.068986144117746e-06, "loss": 0.2006, "step": 4905 }, { "epoch": 1.6383369510769743, "grad_norm": 0.4255338314834005, "learning_rate": 5.067042990609533e-06, "loss": 0.194, "step": 4906 }, { "epoch": 1.638670896643847, "grad_norm": 0.4489710292882698, "learning_rate": 5.065099826973685e-06, "loss": 0.2143, "step": 4907 }, { "epoch": 1.6390048422107197, "grad_norm": 0.43882456276403864, "learning_rate": 5.0631566535037435e-06, "loss": 0.1977, "step": 4908 }, { "epoch": 1.6393387877775922, "grad_norm": 0.38272437413780414, "learning_rate": 5.061213470493246e-06, "loss": 0.187, "step": 4909 }, { "epoch": 1.6396727333444647, "grad_norm": 0.4223324994778571, "learning_rate": 5.059270278235732e-06, "loss": 0.1967, "step": 4910 }, { "epoch": 1.6400066789113374, "grad_norm": 0.3976067064634602, "learning_rate": 5.057327077024745e-06, "loss": 0.1903, "step": 4911 }, { "epoch": 1.6403406244782102, "grad_norm": 0.4209543105402407, "learning_rate": 5.055383867153829e-06, "loss": 0.2025, "step": 4912 }, { "epoch": 1.6406745700450827, "grad_norm": 0.3947766936457843, "learning_rate": 5.053440648916526e-06, "loss": 0.1819, "step": 4913 }, { "epoch": 1.6410085156119552, "grad_norm": 0.4026201573480306, "learning_rate": 5.051497422606385e-06, "loss": 0.1935, "step": 4914 }, { "epoch": 1.6413424611788279, "grad_norm": 0.37785493785849833, "learning_rate": 5.049554188516952e-06, "loss": 0.1956, "step": 4915 }, { "epoch": 1.6416764067457006, "grad_norm": 0.4080022366885912, "learning_rate": 5.047610946941775e-06, "loss": 0.1959, "step": 4916 }, { "epoch": 1.642010352312573, "grad_norm": 0.39521347259779677, "learning_rate": 5.045667698174403e-06, "loss": 0.1841, "step": 4917 }, { "epoch": 1.6423442978794456, "grad_norm": 0.4052000993449699, "learning_rate": 5.043724442508388e-06, "loss": 0.1873, "step": 4918 }, { "epoch": 1.6426782434463183, "grad_norm": 0.4289851629916548, "learning_rate": 5.0417811802372815e-06, "loss": 0.2038, "step": 4919 }, { "epoch": 1.643012189013191, "grad_norm": 0.42873602680133893, "learning_rate": 5.039837911654637e-06, "loss": 0.1924, "step": 4920 }, { "epoch": 1.6433461345800633, "grad_norm": 0.4163011122982262, "learning_rate": 5.037894637054005e-06, "loss": 0.1887, "step": 4921 }, { "epoch": 1.643680080146936, "grad_norm": 0.41256746568734526, "learning_rate": 5.035951356728942e-06, "loss": 0.1868, "step": 4922 }, { "epoch": 1.6440140257138087, "grad_norm": 0.38452956569882735, "learning_rate": 5.034008070973004e-06, "loss": 0.1955, "step": 4923 }, { "epoch": 1.6443479712806812, "grad_norm": 0.47266399939167203, "learning_rate": 5.032064780079746e-06, "loss": 0.2108, "step": 4924 }, { "epoch": 1.6446819168475537, "grad_norm": 0.3661920769763338, "learning_rate": 5.030121484342725e-06, "loss": 0.1821, "step": 4925 }, { "epoch": 1.6450158624144264, "grad_norm": 0.38288278251589564, "learning_rate": 5.0281781840555e-06, "loss": 0.1872, "step": 4926 }, { "epoch": 1.6453498079812992, "grad_norm": 0.3951674051282928, "learning_rate": 5.026234879511629e-06, "loss": 0.1919, "step": 4927 }, { "epoch": 1.6456837535481716, "grad_norm": 0.4446820667734045, "learning_rate": 5.024291571004668e-06, "loss": 0.2049, "step": 4928 }, { "epoch": 1.6460176991150441, "grad_norm": 0.3847089437235357, "learning_rate": 5.022348258828181e-06, "loss": 0.1854, "step": 4929 }, { "epoch": 1.6463516446819169, "grad_norm": 0.4465777995021089, "learning_rate": 5.020404943275727e-06, "loss": 0.2032, "step": 4930 }, { "epoch": 1.6466855902487896, "grad_norm": 0.4209000333850444, "learning_rate": 5.018461624640864e-06, "loss": 0.1881, "step": 4931 }, { "epoch": 1.647019535815662, "grad_norm": 0.43874264508637995, "learning_rate": 5.016518303217157e-06, "loss": 0.1959, "step": 4932 }, { "epoch": 1.6473534813825346, "grad_norm": 0.40621965004843547, "learning_rate": 5.014574979298166e-06, "loss": 0.195, "step": 4933 }, { "epoch": 1.6476874269494073, "grad_norm": 0.4112489831025004, "learning_rate": 5.012631653177451e-06, "loss": 0.1962, "step": 4934 }, { "epoch": 1.64802137251628, "grad_norm": 0.38501149745631036, "learning_rate": 5.010688325148577e-06, "loss": 0.1827, "step": 4935 }, { "epoch": 1.6483553180831525, "grad_norm": 0.382230170818548, "learning_rate": 5.008744995505107e-06, "loss": 0.1816, "step": 4936 }, { "epoch": 1.648689263650025, "grad_norm": 0.4269969102329425, "learning_rate": 5.0068016645406e-06, "loss": 0.1912, "step": 4937 }, { "epoch": 1.6490232092168977, "grad_norm": 0.3911253797830778, "learning_rate": 5.0048583325486234e-06, "loss": 0.1907, "step": 4938 }, { "epoch": 1.6493571547837702, "grad_norm": 0.3973399530499921, "learning_rate": 5.002914999822737e-06, "loss": 0.1993, "step": 4939 }, { "epoch": 1.6496911003506427, "grad_norm": 0.381805883336254, "learning_rate": 5.000971666656508e-06, "loss": 0.1866, "step": 4940 }, { "epoch": 1.6500250459175154, "grad_norm": 0.44398075937862946, "learning_rate": 4.999028333343494e-06, "loss": 0.2153, "step": 4941 }, { "epoch": 1.6503589914843881, "grad_norm": 0.40495075548352927, "learning_rate": 4.9970850001772634e-06, "loss": 0.2006, "step": 4942 }, { "epoch": 1.6506929370512606, "grad_norm": 0.47323246207653963, "learning_rate": 4.995141667451378e-06, "loss": 0.2111, "step": 4943 }, { "epoch": 1.6510268826181331, "grad_norm": 0.40689036499077386, "learning_rate": 4.993198335459401e-06, "loss": 0.1961, "step": 4944 }, { "epoch": 1.6513608281850058, "grad_norm": 0.45363419907785657, "learning_rate": 4.991255004494896e-06, "loss": 0.2002, "step": 4945 }, { "epoch": 1.6516947737518786, "grad_norm": 0.39079299321552946, "learning_rate": 4.989311674851424e-06, "loss": 0.1918, "step": 4946 }, { "epoch": 1.652028719318751, "grad_norm": 0.4079537560804328, "learning_rate": 4.9873683468225495e-06, "loss": 0.1934, "step": 4947 }, { "epoch": 1.6523626648856236, "grad_norm": 0.3895830948669782, "learning_rate": 4.985425020701836e-06, "loss": 0.1966, "step": 4948 }, { "epoch": 1.6526966104524963, "grad_norm": 0.39586577460541733, "learning_rate": 4.983481696782844e-06, "loss": 0.2038, "step": 4949 }, { "epoch": 1.653030556019369, "grad_norm": 0.4316130617272694, "learning_rate": 4.9815383753591365e-06, "loss": 0.2015, "step": 4950 }, { "epoch": 1.6533645015862415, "grad_norm": 0.4491142890206108, "learning_rate": 4.9795950567242754e-06, "loss": 0.2137, "step": 4951 }, { "epoch": 1.653698447153114, "grad_norm": 0.371221820736492, "learning_rate": 4.9776517411718214e-06, "loss": 0.1865, "step": 4952 }, { "epoch": 1.6540323927199867, "grad_norm": 0.8306835153106146, "learning_rate": 4.9757084289953325e-06, "loss": 0.1926, "step": 4953 }, { "epoch": 1.6543663382868592, "grad_norm": 0.4009920133756041, "learning_rate": 4.973765120488373e-06, "loss": 0.2038, "step": 4954 }, { "epoch": 1.6547002838537317, "grad_norm": 0.4568623426752501, "learning_rate": 4.9718218159445015e-06, "loss": 0.1989, "step": 4955 }, { "epoch": 1.6550342294206044, "grad_norm": 0.48262334533879275, "learning_rate": 4.969878515657276e-06, "loss": 0.1881, "step": 4956 }, { "epoch": 1.6553681749874771, "grad_norm": 0.40522771927191203, "learning_rate": 4.967935219920257e-06, "loss": 0.1892, "step": 4957 }, { "epoch": 1.6557021205543496, "grad_norm": 0.37521846679027776, "learning_rate": 4.9659919290269986e-06, "loss": 0.1844, "step": 4958 }, { "epoch": 1.6560360661212221, "grad_norm": 0.4083183199052804, "learning_rate": 4.964048643271058e-06, "loss": 0.2098, "step": 4959 }, { "epoch": 1.6563700116880948, "grad_norm": 0.4004642626569134, "learning_rate": 4.962105362945996e-06, "loss": 0.1891, "step": 4960 }, { "epoch": 1.6567039572549676, "grad_norm": 0.40776367914606315, "learning_rate": 4.960162088345365e-06, "loss": 0.1911, "step": 4961 }, { "epoch": 1.65703790282184, "grad_norm": 0.4170345590008578, "learning_rate": 4.958218819762719e-06, "loss": 0.1925, "step": 4962 }, { "epoch": 1.6573718483887125, "grad_norm": 0.3933408020439283, "learning_rate": 4.9562755574916125e-06, "loss": 0.1841, "step": 4963 }, { "epoch": 1.6577057939555853, "grad_norm": 0.38270569831932166, "learning_rate": 4.954332301825597e-06, "loss": 0.185, "step": 4964 }, { "epoch": 1.658039739522458, "grad_norm": 0.4416397735911187, "learning_rate": 4.952389053058226e-06, "loss": 0.1998, "step": 4965 }, { "epoch": 1.6583736850893305, "grad_norm": 0.4038672656766227, "learning_rate": 4.95044581148305e-06, "loss": 0.1876, "step": 4966 }, { "epoch": 1.658707630656203, "grad_norm": 0.45268259547839906, "learning_rate": 4.948502577393617e-06, "loss": 0.214, "step": 4967 }, { "epoch": 1.6590415762230757, "grad_norm": 0.40485817055381734, "learning_rate": 4.946559351083475e-06, "loss": 0.1994, "step": 4968 }, { "epoch": 1.6593755217899484, "grad_norm": 0.39575079659822177, "learning_rate": 4.944616132846174e-06, "loss": 0.1897, "step": 4969 }, { "epoch": 1.6597094673568207, "grad_norm": 0.4236243366509332, "learning_rate": 4.942672922975255e-06, "loss": 0.1893, "step": 4970 }, { "epoch": 1.6600434129236934, "grad_norm": 0.42327398359903823, "learning_rate": 4.940729721764268e-06, "loss": 0.1849, "step": 4971 }, { "epoch": 1.6603773584905661, "grad_norm": 0.41896021825786833, "learning_rate": 4.938786529506755e-06, "loss": 0.1968, "step": 4972 }, { "epoch": 1.6607113040574386, "grad_norm": 0.44274001256771034, "learning_rate": 4.936843346496257e-06, "loss": 0.2005, "step": 4973 }, { "epoch": 1.661045249624311, "grad_norm": 0.4440886178498309, "learning_rate": 4.934900173026316e-06, "loss": 0.2138, "step": 4974 }, { "epoch": 1.6613791951911838, "grad_norm": 0.4494201652824032, "learning_rate": 4.93295700939047e-06, "loss": 0.2151, "step": 4975 }, { "epoch": 1.6617131407580565, "grad_norm": 0.4008141428086073, "learning_rate": 4.931013855882255e-06, "loss": 0.1846, "step": 4976 }, { "epoch": 1.662047086324929, "grad_norm": 0.4187124211937765, "learning_rate": 4.929070712795211e-06, "loss": 0.1974, "step": 4977 }, { "epoch": 1.6623810318918015, "grad_norm": 0.3800461821906711, "learning_rate": 4.927127580422871e-06, "loss": 0.18, "step": 4978 }, { "epoch": 1.6627149774586742, "grad_norm": 0.3790661455201971, "learning_rate": 4.925184459058769e-06, "loss": 0.1794, "step": 4979 }, { "epoch": 1.663048923025547, "grad_norm": 0.4088327927350755, "learning_rate": 4.9232413489964345e-06, "loss": 0.1928, "step": 4980 }, { "epoch": 1.6633828685924195, "grad_norm": 0.39844429506903506, "learning_rate": 4.921298250529398e-06, "loss": 0.1873, "step": 4981 }, { "epoch": 1.663716814159292, "grad_norm": 0.3728433467890873, "learning_rate": 4.919355163951186e-06, "loss": 0.1875, "step": 4982 }, { "epoch": 1.6640507597261647, "grad_norm": 0.4049596237809687, "learning_rate": 4.917412089555328e-06, "loss": 0.1834, "step": 4983 }, { "epoch": 1.6643847052930374, "grad_norm": 0.43046575539496446, "learning_rate": 4.915469027635345e-06, "loss": 0.2045, "step": 4984 }, { "epoch": 1.6647186508599099, "grad_norm": 0.34374023895710515, "learning_rate": 4.9135259784847625e-06, "loss": 0.1614, "step": 4985 }, { "epoch": 1.6650525964267824, "grad_norm": 0.4120359522713758, "learning_rate": 4.911582942397098e-06, "loss": 0.1893, "step": 4986 }, { "epoch": 1.665386541993655, "grad_norm": 0.4527687274785145, "learning_rate": 4.909639919665872e-06, "loss": 0.2074, "step": 4987 }, { "epoch": 1.6657204875605276, "grad_norm": 0.42010451152241596, "learning_rate": 4.907696910584599e-06, "loss": 0.1902, "step": 4988 }, { "epoch": 1.6660544331274, "grad_norm": 0.4080591968473613, "learning_rate": 4.905753915446795e-06, "loss": 0.1869, "step": 4989 }, { "epoch": 1.6663883786942728, "grad_norm": 0.45688748641848076, "learning_rate": 4.903810934545972e-06, "loss": 0.1957, "step": 4990 }, { "epoch": 1.6667223242611455, "grad_norm": 0.39479981393128605, "learning_rate": 4.90186796817564e-06, "loss": 0.1892, "step": 4991 }, { "epoch": 1.667056269828018, "grad_norm": 0.3691965681647722, "learning_rate": 4.899925016629307e-06, "loss": 0.1811, "step": 4992 }, { "epoch": 1.6673902153948905, "grad_norm": 0.45179774449253307, "learning_rate": 4.897982080200477e-06, "loss": 0.1988, "step": 4993 }, { "epoch": 1.6677241609617632, "grad_norm": 0.3740310306629665, "learning_rate": 4.896039159182655e-06, "loss": 0.1808, "step": 4994 }, { "epoch": 1.668058106528636, "grad_norm": 0.4089047480386049, "learning_rate": 4.894096253869343e-06, "loss": 0.1927, "step": 4995 }, { "epoch": 1.6683920520955084, "grad_norm": 0.399676505442746, "learning_rate": 4.89215336455404e-06, "loss": 0.1931, "step": 4996 }, { "epoch": 1.668725997662381, "grad_norm": 0.3895788933672829, "learning_rate": 4.89021049153024e-06, "loss": 0.1849, "step": 4997 }, { "epoch": 1.6690599432292537, "grad_norm": 0.3595030957949447, "learning_rate": 4.888267635091439e-06, "loss": 0.173, "step": 4998 }, { "epoch": 1.6693938887961264, "grad_norm": 0.41403286511225557, "learning_rate": 4.886324795531126e-06, "loss": 0.1847, "step": 4999 }, { "epoch": 1.6697278343629989, "grad_norm": 0.4112481211288421, "learning_rate": 4.88438197314279e-06, "loss": 0.1868, "step": 5000 }, { "epoch": 1.6700617799298714, "grad_norm": 0.41924546376727767, "learning_rate": 4.88243916821992e-06, "loss": 0.1997, "step": 5001 }, { "epoch": 1.670395725496744, "grad_norm": 0.3888547499719048, "learning_rate": 4.880496381055998e-06, "loss": 0.1854, "step": 5002 }, { "epoch": 1.6707296710636166, "grad_norm": 0.41588935541900535, "learning_rate": 4.878553611944505e-06, "loss": 0.1913, "step": 5003 }, { "epoch": 1.671063616630489, "grad_norm": 0.37772961201560074, "learning_rate": 4.876610861178918e-06, "loss": 0.1902, "step": 5004 }, { "epoch": 1.6713975621973618, "grad_norm": 0.39759117151567896, "learning_rate": 4.874668129052712e-06, "loss": 0.2015, "step": 5005 }, { "epoch": 1.6717315077642345, "grad_norm": 0.40235692971912135, "learning_rate": 4.872725415859363e-06, "loss": 0.1863, "step": 5006 }, { "epoch": 1.672065453331107, "grad_norm": 0.36928962349677913, "learning_rate": 4.8707827218923385e-06, "loss": 0.1808, "step": 5007 }, { "epoch": 1.6723993988979795, "grad_norm": 0.4562272715321814, "learning_rate": 4.868840047445106e-06, "loss": 0.2151, "step": 5008 }, { "epoch": 1.6727333444648522, "grad_norm": 0.43138486747940097, "learning_rate": 4.866897392811127e-06, "loss": 0.213, "step": 5009 }, { "epoch": 1.673067290031725, "grad_norm": 0.39105820437363875, "learning_rate": 4.864954758283865e-06, "loss": 0.1844, "step": 5010 }, { "epoch": 1.6734012355985974, "grad_norm": 0.39624876568230644, "learning_rate": 4.8630121441567755e-06, "loss": 0.1976, "step": 5011 }, { "epoch": 1.67373518116547, "grad_norm": 0.38463611162536465, "learning_rate": 4.861069550723316e-06, "loss": 0.1955, "step": 5012 }, { "epoch": 1.6740691267323426, "grad_norm": 0.4218274037026937, "learning_rate": 4.859126978276937e-06, "loss": 0.2063, "step": 5013 }, { "epoch": 1.6744030722992154, "grad_norm": 0.4055270643811191, "learning_rate": 4.857184427111086e-06, "loss": 0.2086, "step": 5014 }, { "epoch": 1.6747370178660879, "grad_norm": 0.3660260268828606, "learning_rate": 4.855241897519209e-06, "loss": 0.1744, "step": 5015 }, { "epoch": 1.6750709634329604, "grad_norm": 0.42399611512123225, "learning_rate": 4.8532993897947464e-06, "loss": 0.1971, "step": 5016 }, { "epoch": 1.675404908999833, "grad_norm": 0.4052742336770543, "learning_rate": 4.851356904231139e-06, "loss": 0.1851, "step": 5017 }, { "epoch": 1.6757388545667058, "grad_norm": 0.4318706621172862, "learning_rate": 4.849414441121823e-06, "loss": 0.2056, "step": 5018 }, { "epoch": 1.676072800133578, "grad_norm": 0.4183164683620372, "learning_rate": 4.847472000760228e-06, "loss": 0.2033, "step": 5019 }, { "epoch": 1.6764067457004508, "grad_norm": 0.41452196588285173, "learning_rate": 4.845529583439783e-06, "loss": 0.1937, "step": 5020 }, { "epoch": 1.6767406912673235, "grad_norm": 0.44161534649268036, "learning_rate": 4.843587189453914e-06, "loss": 0.2104, "step": 5021 }, { "epoch": 1.677074636834196, "grad_norm": 0.41291953906054923, "learning_rate": 4.84164481909604e-06, "loss": 0.1964, "step": 5022 }, { "epoch": 1.6774085824010685, "grad_norm": 0.4107451131477934, "learning_rate": 4.839702472659581e-06, "loss": 0.1848, "step": 5023 }, { "epoch": 1.6777425279679412, "grad_norm": 0.40593954437157626, "learning_rate": 4.837760150437952e-06, "loss": 0.1927, "step": 5024 }, { "epoch": 1.678076473534814, "grad_norm": 0.41888894475916594, "learning_rate": 4.8358178527245625e-06, "loss": 0.2006, "step": 5025 }, { "epoch": 1.6784104191016864, "grad_norm": 0.43080741370800135, "learning_rate": 4.83387557981282e-06, "loss": 0.196, "step": 5026 }, { "epoch": 1.678744364668559, "grad_norm": 0.40951343834625403, "learning_rate": 4.831933331996126e-06, "loss": 0.1998, "step": 5027 }, { "epoch": 1.6790783102354316, "grad_norm": 0.3875819486249406, "learning_rate": 4.8299911095678816e-06, "loss": 0.1833, "step": 5028 }, { "epoch": 1.6794122558023044, "grad_norm": 0.4300987227755639, "learning_rate": 4.82804891282148e-06, "loss": 0.2069, "step": 5029 }, { "epoch": 1.6797462013691769, "grad_norm": 0.37297544793032655, "learning_rate": 4.8261067420503175e-06, "loss": 0.188, "step": 5030 }, { "epoch": 1.6800801469360493, "grad_norm": 0.39096748045381796, "learning_rate": 4.8241645975477785e-06, "loss": 0.1835, "step": 5031 }, { "epoch": 1.680414092502922, "grad_norm": 0.3996077225322656, "learning_rate": 4.822222479607247e-06, "loss": 0.1943, "step": 5032 }, { "epoch": 1.6807480380697948, "grad_norm": 0.4304762784482779, "learning_rate": 4.820280388522104e-06, "loss": 0.1948, "step": 5033 }, { "epoch": 1.6810819836366673, "grad_norm": 0.3805492053734514, "learning_rate": 4.818338324585725e-06, "loss": 0.1838, "step": 5034 }, { "epoch": 1.6814159292035398, "grad_norm": 0.37585210563270655, "learning_rate": 4.816396288091478e-06, "loss": 0.1854, "step": 5035 }, { "epoch": 1.6817498747704125, "grad_norm": 0.42351245196254, "learning_rate": 4.814454279332737e-06, "loss": 0.2025, "step": 5036 }, { "epoch": 1.682083820337285, "grad_norm": 0.41695687567784445, "learning_rate": 4.81251229860286e-06, "loss": 0.193, "step": 5037 }, { "epoch": 1.6824177659041575, "grad_norm": 0.537261725808297, "learning_rate": 4.810570346195207e-06, "loss": 0.2114, "step": 5038 }, { "epoch": 1.6827517114710302, "grad_norm": 0.4113025086849026, "learning_rate": 4.808628422403135e-06, "loss": 0.1948, "step": 5039 }, { "epoch": 1.683085657037903, "grad_norm": 0.4076342773958272, "learning_rate": 4.806686527519994e-06, "loss": 0.1986, "step": 5040 }, { "epoch": 1.6834196026047754, "grad_norm": 0.41025435360946494, "learning_rate": 4.804744661839128e-06, "loss": 0.2066, "step": 5041 }, { "epoch": 1.683753548171648, "grad_norm": 0.3636218816609904, "learning_rate": 4.80280282565388e-06, "loss": 0.1872, "step": 5042 }, { "epoch": 1.6840874937385206, "grad_norm": 0.3950953132425706, "learning_rate": 4.800861019257587e-06, "loss": 0.1984, "step": 5043 }, { "epoch": 1.6844214393053933, "grad_norm": 0.42371769285822725, "learning_rate": 4.798919242943583e-06, "loss": 0.2037, "step": 5044 }, { "epoch": 1.6847553848722658, "grad_norm": 0.392433056196255, "learning_rate": 4.796977497005194e-06, "loss": 0.1871, "step": 5045 }, { "epoch": 1.6850893304391383, "grad_norm": 0.46155417346154815, "learning_rate": 4.795035781735743e-06, "loss": 0.1978, "step": 5046 }, { "epoch": 1.685423276006011, "grad_norm": 0.440921144287475, "learning_rate": 4.793094097428552e-06, "loss": 0.2139, "step": 5047 }, { "epoch": 1.6857572215728838, "grad_norm": 0.3794880760617637, "learning_rate": 4.7911524443769346e-06, "loss": 0.1911, "step": 5048 }, { "epoch": 1.6860911671397563, "grad_norm": 0.395057876244426, "learning_rate": 4.789210822874199e-06, "loss": 0.1938, "step": 5049 }, { "epoch": 1.6864251127066288, "grad_norm": 0.3775844552491621, "learning_rate": 4.787269233213651e-06, "loss": 0.188, "step": 5050 }, { "epoch": 1.6867590582735015, "grad_norm": 0.3603621487839833, "learning_rate": 4.785327675688591e-06, "loss": 0.1885, "step": 5051 }, { "epoch": 1.687093003840374, "grad_norm": 0.4027004483451677, "learning_rate": 4.7833861505923096e-06, "loss": 0.1893, "step": 5052 }, { "epoch": 1.6874269494072465, "grad_norm": 0.4765942958527092, "learning_rate": 4.781444658218103e-06, "loss": 0.1933, "step": 5053 }, { "epoch": 1.6877608949741192, "grad_norm": 0.41479948198174693, "learning_rate": 4.779503198859255e-06, "loss": 0.1885, "step": 5054 }, { "epoch": 1.688094840540992, "grad_norm": 0.39411292837696527, "learning_rate": 4.777561772809045e-06, "loss": 0.1867, "step": 5055 }, { "epoch": 1.6884287861078644, "grad_norm": 0.3932933436171054, "learning_rate": 4.775620380360747e-06, "loss": 0.1996, "step": 5056 }, { "epoch": 1.688762731674737, "grad_norm": 0.5515039821445362, "learning_rate": 4.773679021807634e-06, "loss": 0.2012, "step": 5057 }, { "epoch": 1.6890966772416096, "grad_norm": 0.4175879612749353, "learning_rate": 4.771737697442968e-06, "loss": 0.1885, "step": 5058 }, { "epoch": 1.6894306228084823, "grad_norm": 0.4073593770421272, "learning_rate": 4.7697964075600114e-06, "loss": 0.1963, "step": 5059 }, { "epoch": 1.6897645683753548, "grad_norm": 0.41602326668910145, "learning_rate": 4.767855152452019e-06, "loss": 0.1945, "step": 5060 }, { "epoch": 1.6900985139422273, "grad_norm": 0.444598225558259, "learning_rate": 4.765913932412237e-06, "loss": 0.2069, "step": 5061 }, { "epoch": 1.6904324595091, "grad_norm": 0.383752685178437, "learning_rate": 4.763972747733913e-06, "loss": 0.1897, "step": 5062 }, { "epoch": 1.6907664050759728, "grad_norm": 0.42119196388224206, "learning_rate": 4.762031598710285e-06, "loss": 0.1927, "step": 5063 }, { "epoch": 1.6911003506428453, "grad_norm": 0.4190290794400875, "learning_rate": 4.760090485634584e-06, "loss": 0.1973, "step": 5064 }, { "epoch": 1.6914342962097177, "grad_norm": 0.423228700372625, "learning_rate": 4.758149408800042e-06, "loss": 0.2059, "step": 5065 }, { "epoch": 1.6917682417765905, "grad_norm": 0.3900317630440837, "learning_rate": 4.756208368499879e-06, "loss": 0.1885, "step": 5066 }, { "epoch": 1.692102187343463, "grad_norm": 0.412549195493742, "learning_rate": 4.754267365027314e-06, "loss": 0.1965, "step": 5067 }, { "epoch": 1.6924361329103355, "grad_norm": 0.3953914528356778, "learning_rate": 4.752326398675555e-06, "loss": 0.2009, "step": 5068 }, { "epoch": 1.6927700784772082, "grad_norm": 0.42031070024553674, "learning_rate": 4.750385469737811e-06, "loss": 0.1878, "step": 5069 }, { "epoch": 1.693104024044081, "grad_norm": 0.4127570219253499, "learning_rate": 4.748444578507278e-06, "loss": 0.207, "step": 5070 }, { "epoch": 1.6934379696109534, "grad_norm": 0.41827513152551066, "learning_rate": 4.746503725277156e-06, "loss": 0.2002, "step": 5071 }, { "epoch": 1.6937719151778259, "grad_norm": 0.48196867703987134, "learning_rate": 4.744562910340631e-06, "loss": 0.2071, "step": 5072 }, { "epoch": 1.6941058607446986, "grad_norm": 0.38681708500322204, "learning_rate": 4.742622133990885e-06, "loss": 0.185, "step": 5073 }, { "epoch": 1.6944398063115713, "grad_norm": 0.4062271054342476, "learning_rate": 4.740681396521097e-06, "loss": 0.1876, "step": 5074 }, { "epoch": 1.6947737518784438, "grad_norm": 0.516392208571396, "learning_rate": 4.738740698224438e-06, "loss": 0.2053, "step": 5075 }, { "epoch": 1.6951076974453163, "grad_norm": 0.4361598083182339, "learning_rate": 4.73680003939407e-06, "loss": 0.2121, "step": 5076 }, { "epoch": 1.695441643012189, "grad_norm": 0.41160087711576554, "learning_rate": 4.734859420323158e-06, "loss": 0.2036, "step": 5077 }, { "epoch": 1.6957755885790617, "grad_norm": 0.49946880177831654, "learning_rate": 4.7329188413048515e-06, "loss": 0.1999, "step": 5078 }, { "epoch": 1.6961095341459342, "grad_norm": 0.3666729685844792, "learning_rate": 4.7309783026322995e-06, "loss": 0.1833, "step": 5079 }, { "epoch": 1.6964434797128067, "grad_norm": 0.40908472431553156, "learning_rate": 4.7290378045986425e-06, "loss": 0.1904, "step": 5080 }, { "epoch": 1.6967774252796795, "grad_norm": 0.4479324104960931, "learning_rate": 4.727097347497014e-06, "loss": 0.1967, "step": 5081 }, { "epoch": 1.6971113708465522, "grad_norm": 0.43959080462229055, "learning_rate": 4.7251569316205455e-06, "loss": 0.2051, "step": 5082 }, { "epoch": 1.6974453164134247, "grad_norm": 0.38971023600971766, "learning_rate": 4.723216557262359e-06, "loss": 0.1886, "step": 5083 }, { "epoch": 1.6977792619802972, "grad_norm": 0.4760777142908489, "learning_rate": 4.721276224715569e-06, "loss": 0.201, "step": 5084 }, { "epoch": 1.6981132075471699, "grad_norm": 0.38860098507630997, "learning_rate": 4.719335934273289e-06, "loss": 0.1859, "step": 5085 }, { "epoch": 1.6984471531140424, "grad_norm": 0.40103252102303055, "learning_rate": 4.717395686228621e-06, "loss": 0.1884, "step": 5086 }, { "epoch": 1.6987810986809149, "grad_norm": 0.36902419962259236, "learning_rate": 4.715455480874661e-06, "loss": 0.1814, "step": 5087 }, { "epoch": 1.6991150442477876, "grad_norm": 0.412125534705266, "learning_rate": 4.713515318504501e-06, "loss": 0.1913, "step": 5088 }, { "epoch": 1.6994489898146603, "grad_norm": 0.39157748575603113, "learning_rate": 4.711575199411226e-06, "loss": 0.1829, "step": 5089 }, { "epoch": 1.6997829353815328, "grad_norm": 0.41802116475269446, "learning_rate": 4.7096351238879135e-06, "loss": 0.1978, "step": 5090 }, { "epoch": 1.7001168809484053, "grad_norm": 0.6279979025769422, "learning_rate": 4.707695092227634e-06, "loss": 0.1937, "step": 5091 }, { "epoch": 1.700450826515278, "grad_norm": 0.4114760298428927, "learning_rate": 4.705755104723453e-06, "loss": 0.199, "step": 5092 }, { "epoch": 1.7007847720821507, "grad_norm": 0.4395547405428648, "learning_rate": 4.703815161668426e-06, "loss": 0.1931, "step": 5093 }, { "epoch": 1.7011187176490232, "grad_norm": 0.41225308183331766, "learning_rate": 4.701875263355608e-06, "loss": 0.191, "step": 5094 }, { "epoch": 1.7014526632158957, "grad_norm": 0.3958758087990041, "learning_rate": 4.699935410078042e-06, "loss": 0.1941, "step": 5095 }, { "epoch": 1.7017866087827684, "grad_norm": 0.41315215933016763, "learning_rate": 4.697995602128766e-06, "loss": 0.1947, "step": 5096 }, { "epoch": 1.7021205543496412, "grad_norm": 0.3955180863149731, "learning_rate": 4.696055839800809e-06, "loss": 0.1991, "step": 5097 }, { "epoch": 1.7024544999165137, "grad_norm": 0.43154636895066495, "learning_rate": 4.694116123387197e-06, "loss": 0.2104, "step": 5098 }, { "epoch": 1.7027884454833861, "grad_norm": 0.4007233166701319, "learning_rate": 4.692176453180944e-06, "loss": 0.1902, "step": 5099 }, { "epoch": 1.7031223910502589, "grad_norm": 0.40361257750068574, "learning_rate": 4.6902368294750644e-06, "loss": 0.2028, "step": 5100 }, { "epoch": 1.7034563366171314, "grad_norm": 0.40699361291263014, "learning_rate": 4.688297252562559e-06, "loss": 0.1989, "step": 5101 }, { "epoch": 1.7037902821840039, "grad_norm": 0.4079101725879806, "learning_rate": 4.6863577227364235e-06, "loss": 0.1909, "step": 5102 }, { "epoch": 1.7041242277508766, "grad_norm": 0.38587148305952435, "learning_rate": 4.684418240289648e-06, "loss": 0.1861, "step": 5103 }, { "epoch": 1.7044581733177493, "grad_norm": 0.4625478571203875, "learning_rate": 4.682478805515212e-06, "loss": 0.202, "step": 5104 }, { "epoch": 1.7047921188846218, "grad_norm": 0.4358990786312495, "learning_rate": 4.680539418706091e-06, "loss": 0.1927, "step": 5105 }, { "epoch": 1.7051260644514943, "grad_norm": 0.41133770169381584, "learning_rate": 4.678600080155252e-06, "loss": 0.1775, "step": 5106 }, { "epoch": 1.705460010018367, "grad_norm": 0.3962294559870226, "learning_rate": 4.676660790155656e-06, "loss": 0.1981, "step": 5107 }, { "epoch": 1.7057939555852397, "grad_norm": 0.40931109170500446, "learning_rate": 4.674721549000255e-06, "loss": 0.2042, "step": 5108 }, { "epoch": 1.7061279011521122, "grad_norm": 0.4107315070526611, "learning_rate": 4.6727823569819944e-06, "loss": 0.1878, "step": 5109 }, { "epoch": 1.7064618467189847, "grad_norm": 0.3752388358432499, "learning_rate": 4.670843214393811e-06, "loss": 0.1813, "step": 5110 }, { "epoch": 1.7067957922858574, "grad_norm": 0.43157554986961477, "learning_rate": 4.6689041215286344e-06, "loss": 0.2003, "step": 5111 }, { "epoch": 1.7071297378527301, "grad_norm": 0.42486623254964884, "learning_rate": 4.666965078679391e-06, "loss": 0.1971, "step": 5112 }, { "epoch": 1.7074636834196026, "grad_norm": 0.405339605614925, "learning_rate": 4.665026086138993e-06, "loss": 0.2053, "step": 5113 }, { "epoch": 1.7077976289864751, "grad_norm": 0.4002710266542765, "learning_rate": 4.66308714420035e-06, "loss": 0.186, "step": 5114 }, { "epoch": 1.7081315745533479, "grad_norm": 0.4126660129106094, "learning_rate": 4.6611482531563595e-06, "loss": 0.1961, "step": 5115 }, { "epoch": 1.7084655201202203, "grad_norm": 0.45263061369654795, "learning_rate": 4.659209413299916e-06, "loss": 0.2105, "step": 5116 }, { "epoch": 1.7087994656870928, "grad_norm": 0.39704063413819723, "learning_rate": 4.657270624923901e-06, "loss": 0.1866, "step": 5117 }, { "epoch": 1.7091334112539656, "grad_norm": 0.456588695870717, "learning_rate": 4.6553318883211955e-06, "loss": 0.1986, "step": 5118 }, { "epoch": 1.7094673568208383, "grad_norm": 0.4056701874158402, "learning_rate": 4.653393203784667e-06, "loss": 0.1937, "step": 5119 }, { "epoch": 1.7098013023877108, "grad_norm": 0.37801865981225746, "learning_rate": 4.651454571607176e-06, "loss": 0.1828, "step": 5120 }, { "epoch": 1.7101352479545833, "grad_norm": 0.4340941520725217, "learning_rate": 4.649515992081576e-06, "loss": 0.1945, "step": 5121 }, { "epoch": 1.710469193521456, "grad_norm": 0.41411971112825, "learning_rate": 4.64757746550071e-06, "loss": 0.2097, "step": 5122 }, { "epoch": 1.7108031390883287, "grad_norm": 0.4212025495479919, "learning_rate": 4.645638992157419e-06, "loss": 0.1967, "step": 5123 }, { "epoch": 1.7111370846552012, "grad_norm": 0.40864256762488815, "learning_rate": 4.6437005723445316e-06, "loss": 0.1974, "step": 5124 }, { "epoch": 1.7114710302220737, "grad_norm": 0.4028769432832656, "learning_rate": 4.6417622063548675e-06, "loss": 0.1991, "step": 5125 }, { "epoch": 1.7118049757889464, "grad_norm": 0.6276560821172299, "learning_rate": 4.6398238944812414e-06, "loss": 0.1923, "step": 5126 }, { "epoch": 1.7121389213558191, "grad_norm": 0.4154958288569983, "learning_rate": 4.637885637016456e-06, "loss": 0.1945, "step": 5127 }, { "epoch": 1.7124728669226916, "grad_norm": 0.3784544159799719, "learning_rate": 4.635947434253308e-06, "loss": 0.174, "step": 5128 }, { "epoch": 1.7128068124895641, "grad_norm": 0.4121407590389507, "learning_rate": 4.634009286484586e-06, "loss": 0.195, "step": 5129 }, { "epoch": 1.7131407580564368, "grad_norm": 0.3936720216642308, "learning_rate": 4.632071194003073e-06, "loss": 0.1804, "step": 5130 }, { "epoch": 1.7134747036233096, "grad_norm": 0.41086226040341123, "learning_rate": 4.630133157101537e-06, "loss": 0.1921, "step": 5131 }, { "epoch": 1.713808649190182, "grad_norm": 0.3884513041579706, "learning_rate": 4.6281951760727435e-06, "loss": 0.1786, "step": 5132 }, { "epoch": 1.7141425947570545, "grad_norm": 0.39582994223607226, "learning_rate": 4.626257251209446e-06, "loss": 0.1835, "step": 5133 }, { "epoch": 1.7144765403239273, "grad_norm": 0.5027499487784188, "learning_rate": 4.624319382804391e-06, "loss": 0.205, "step": 5134 }, { "epoch": 1.7148104858907998, "grad_norm": 0.3865108788544779, "learning_rate": 4.622381571150317e-06, "loss": 0.1791, "step": 5135 }, { "epoch": 1.7151444314576723, "grad_norm": 0.41736826198301175, "learning_rate": 4.620443816539954e-06, "loss": 0.1845, "step": 5136 }, { "epoch": 1.715478377024545, "grad_norm": 0.4076416332201311, "learning_rate": 4.618506119266021e-06, "loss": 0.2023, "step": 5137 }, { "epoch": 1.7158123225914177, "grad_norm": 0.3940577448952557, "learning_rate": 4.6165684796212306e-06, "loss": 0.1971, "step": 5138 }, { "epoch": 1.7161462681582902, "grad_norm": 0.6699527350793285, "learning_rate": 4.6146308978982865e-06, "loss": 0.2015, "step": 5139 }, { "epoch": 1.7164802137251627, "grad_norm": 0.41792420253823254, "learning_rate": 4.612693374389881e-06, "loss": 0.1996, "step": 5140 }, { "epoch": 1.7168141592920354, "grad_norm": 0.486254656967708, "learning_rate": 4.610755909388703e-06, "loss": 0.2017, "step": 5141 }, { "epoch": 1.7171481048589081, "grad_norm": 0.43032663564827006, "learning_rate": 4.608818503187428e-06, "loss": 0.2077, "step": 5142 }, { "epoch": 1.7174820504257806, "grad_norm": 0.4197636003363948, "learning_rate": 4.606881156078725e-06, "loss": 0.2063, "step": 5143 }, { "epoch": 1.717815995992653, "grad_norm": 0.41927575757137314, "learning_rate": 4.604943868355251e-06, "loss": 0.1971, "step": 5144 }, { "epoch": 1.7181499415595258, "grad_norm": 0.4364890921897206, "learning_rate": 4.603006640309658e-06, "loss": 0.1992, "step": 5145 }, { "epoch": 1.7184838871263985, "grad_norm": 0.40568220551742395, "learning_rate": 4.601069472234584e-06, "loss": 0.2016, "step": 5146 }, { "epoch": 1.718817832693271, "grad_norm": 0.41925099662175647, "learning_rate": 4.599132364422666e-06, "loss": 0.2029, "step": 5147 }, { "epoch": 1.7191517782601435, "grad_norm": 0.4064241820068898, "learning_rate": 4.597195317166525e-06, "loss": 0.1941, "step": 5148 }, { "epoch": 1.7194857238270163, "grad_norm": 0.40433828939310057, "learning_rate": 4.595258330758773e-06, "loss": 0.1928, "step": 5149 }, { "epoch": 1.7198196693938888, "grad_norm": 0.42000296483673916, "learning_rate": 4.593321405492017e-06, "loss": 0.1987, "step": 5150 }, { "epoch": 1.7201536149607612, "grad_norm": 0.4031145987386355, "learning_rate": 4.59138454165885e-06, "loss": 0.1867, "step": 5151 }, { "epoch": 1.720487560527634, "grad_norm": 0.3927642852721298, "learning_rate": 4.589447739551857e-06, "loss": 0.1886, "step": 5152 }, { "epoch": 1.7208215060945067, "grad_norm": 0.4365198371307918, "learning_rate": 4.58751099946362e-06, "loss": 0.2067, "step": 5153 }, { "epoch": 1.7211554516613792, "grad_norm": 0.47754776245942315, "learning_rate": 4.585574321686704e-06, "loss": 0.2044, "step": 5154 }, { "epoch": 1.7214893972282517, "grad_norm": 0.41403702099913486, "learning_rate": 4.583637706513665e-06, "loss": 0.2045, "step": 5155 }, { "epoch": 1.7218233427951244, "grad_norm": 0.4129816326090922, "learning_rate": 4.5817011542370535e-06, "loss": 0.1912, "step": 5156 }, { "epoch": 1.722157288361997, "grad_norm": 0.42004603602811036, "learning_rate": 4.579764665149409e-06, "loss": 0.2005, "step": 5157 }, { "epoch": 1.7224912339288696, "grad_norm": 0.4321750576374955, "learning_rate": 4.577828239543257e-06, "loss": 0.1997, "step": 5158 }, { "epoch": 1.722825179495742, "grad_norm": 0.405430374187034, "learning_rate": 4.575891877711123e-06, "loss": 0.2003, "step": 5159 }, { "epoch": 1.7231591250626148, "grad_norm": 0.3853641882041907, "learning_rate": 4.573955579945514e-06, "loss": 0.1809, "step": 5160 }, { "epoch": 1.7234930706294875, "grad_norm": 0.39519841050530224, "learning_rate": 4.572019346538931e-06, "loss": 0.1826, "step": 5161 }, { "epoch": 1.72382701619636, "grad_norm": 0.39830083588979237, "learning_rate": 4.570083177783865e-06, "loss": 0.1985, "step": 5162 }, { "epoch": 1.7241609617632325, "grad_norm": 0.3912144272420254, "learning_rate": 4.568147073972795e-06, "loss": 0.1791, "step": 5163 }, { "epoch": 1.7244949073301052, "grad_norm": 0.438731828398516, "learning_rate": 4.566211035398196e-06, "loss": 0.2143, "step": 5164 }, { "epoch": 1.7248288528969777, "grad_norm": 0.38952165500853425, "learning_rate": 4.564275062352529e-06, "loss": 0.1898, "step": 5165 }, { "epoch": 1.7251627984638502, "grad_norm": 0.46702184416618503, "learning_rate": 4.5623391551282435e-06, "loss": 0.2063, "step": 5166 }, { "epoch": 1.725496744030723, "grad_norm": 0.3862830352258704, "learning_rate": 4.560403314017782e-06, "loss": 0.1919, "step": 5167 }, { "epoch": 1.7258306895975957, "grad_norm": 0.3939486702748393, "learning_rate": 4.558467539313576e-06, "loss": 0.185, "step": 5168 }, { "epoch": 1.7261646351644682, "grad_norm": 0.4475298224185486, "learning_rate": 4.556531831308045e-06, "loss": 0.2084, "step": 5169 }, { "epoch": 1.7264985807313407, "grad_norm": 0.3740950936131439, "learning_rate": 4.554596190293606e-06, "loss": 0.1861, "step": 5170 }, { "epoch": 1.7268325262982134, "grad_norm": 0.4279837775356034, "learning_rate": 4.552660616562655e-06, "loss": 0.1956, "step": 5171 }, { "epoch": 1.727166471865086, "grad_norm": 0.38629605489548957, "learning_rate": 4.550725110407586e-06, "loss": 0.1882, "step": 5172 }, { "epoch": 1.7275004174319586, "grad_norm": 0.40903630345175285, "learning_rate": 4.548789672120779e-06, "loss": 0.1947, "step": 5173 }, { "epoch": 1.727834362998831, "grad_norm": 0.40754681344598553, "learning_rate": 4.5468543019946045e-06, "loss": 0.1895, "step": 5174 }, { "epoch": 1.7281683085657038, "grad_norm": 0.4154808397286242, "learning_rate": 4.544919000321421e-06, "loss": 0.1979, "step": 5175 }, { "epoch": 1.7285022541325765, "grad_norm": 0.3927989079761158, "learning_rate": 4.542983767393584e-06, "loss": 0.187, "step": 5176 }, { "epoch": 1.728836199699449, "grad_norm": 0.4477597515047998, "learning_rate": 4.541048603503429e-06, "loss": 0.1982, "step": 5177 }, { "epoch": 1.7291701452663215, "grad_norm": 0.4033600052314757, "learning_rate": 4.539113508943287e-06, "loss": 0.1842, "step": 5178 }, { "epoch": 1.7295040908331942, "grad_norm": 0.38530844939756476, "learning_rate": 4.537178484005476e-06, "loss": 0.1906, "step": 5179 }, { "epoch": 1.729838036400067, "grad_norm": 0.4345227695242709, "learning_rate": 4.535243528982305e-06, "loss": 0.2003, "step": 5180 }, { "epoch": 1.7301719819669394, "grad_norm": 0.38337505336535044, "learning_rate": 4.53330864416607e-06, "loss": 0.1841, "step": 5181 }, { "epoch": 1.730505927533812, "grad_norm": 0.38990039540591903, "learning_rate": 4.531373829849061e-06, "loss": 0.1947, "step": 5182 }, { "epoch": 1.7308398731006847, "grad_norm": 0.3929342954019198, "learning_rate": 4.529439086323552e-06, "loss": 0.1867, "step": 5183 }, { "epoch": 1.7311738186675572, "grad_norm": 0.42907331015138545, "learning_rate": 4.52750441388181e-06, "loss": 0.2045, "step": 5184 }, { "epoch": 1.7315077642344296, "grad_norm": 0.3957492762261916, "learning_rate": 4.52556981281609e-06, "loss": 0.1965, "step": 5185 }, { "epoch": 1.7318417098013024, "grad_norm": 0.3985478985780018, "learning_rate": 4.523635283418635e-06, "loss": 0.1836, "step": 5186 }, { "epoch": 1.732175655368175, "grad_norm": 0.4100016585452721, "learning_rate": 4.521700825981678e-06, "loss": 0.2057, "step": 5187 }, { "epoch": 1.7325096009350476, "grad_norm": 0.38648895139654443, "learning_rate": 4.519766440797446e-06, "loss": 0.1998, "step": 5188 }, { "epoch": 1.73284354650192, "grad_norm": 0.3950585537509392, "learning_rate": 4.517832128158147e-06, "loss": 0.1954, "step": 5189 }, { "epoch": 1.7331774920687928, "grad_norm": 0.351655257356069, "learning_rate": 4.515897888355982e-06, "loss": 0.1701, "step": 5190 }, { "epoch": 1.7335114376356655, "grad_norm": 0.39426784941225645, "learning_rate": 4.513963721683142e-06, "loss": 0.1933, "step": 5191 }, { "epoch": 1.733845383202538, "grad_norm": 0.4069564879031356, "learning_rate": 4.5120296284318035e-06, "loss": 0.1839, "step": 5192 }, { "epoch": 1.7341793287694105, "grad_norm": 0.3679036421194138, "learning_rate": 4.510095608894134e-06, "loss": 0.1784, "step": 5193 }, { "epoch": 1.7345132743362832, "grad_norm": 0.41535645901622537, "learning_rate": 4.508161663362294e-06, "loss": 0.2009, "step": 5194 }, { "epoch": 1.734847219903156, "grad_norm": 0.4320307397620132, "learning_rate": 4.506227792128424e-06, "loss": 0.2062, "step": 5195 }, { "epoch": 1.7351811654700284, "grad_norm": 0.42447532292063683, "learning_rate": 4.504293995484662e-06, "loss": 0.2063, "step": 5196 }, { "epoch": 1.735515111036901, "grad_norm": 0.4315851980720862, "learning_rate": 4.502360273723127e-06, "loss": 0.2087, "step": 5197 }, { "epoch": 1.7358490566037736, "grad_norm": 0.40956272992571285, "learning_rate": 4.500426627135933e-06, "loss": 0.187, "step": 5198 }, { "epoch": 1.7361830021706461, "grad_norm": 0.4529666235929845, "learning_rate": 4.4984930560151776e-06, "loss": 0.1871, "step": 5199 }, { "epoch": 1.7365169477375186, "grad_norm": 0.38754524741812935, "learning_rate": 4.496559560652952e-06, "loss": 0.1914, "step": 5200 }, { "epoch": 1.7368508933043914, "grad_norm": 0.43465984631819043, "learning_rate": 4.494626141341334e-06, "loss": 0.1997, "step": 5201 }, { "epoch": 1.737184838871264, "grad_norm": 0.383172124051344, "learning_rate": 4.4926927983723876e-06, "loss": 0.1932, "step": 5202 }, { "epoch": 1.7375187844381366, "grad_norm": 0.4201088959332936, "learning_rate": 4.490759532038166e-06, "loss": 0.1946, "step": 5203 }, { "epoch": 1.737852730005009, "grad_norm": 0.4688579896439726, "learning_rate": 4.488826342630714e-06, "loss": 0.2246, "step": 5204 }, { "epoch": 1.7381866755718818, "grad_norm": 0.4749846175120122, "learning_rate": 4.486893230442062e-06, "loss": 0.2024, "step": 5205 }, { "epoch": 1.7385206211387545, "grad_norm": 0.38787853638285946, "learning_rate": 4.4849601957642295e-06, "loss": 0.186, "step": 5206 }, { "epoch": 1.738854566705627, "grad_norm": 0.38594915412595865, "learning_rate": 4.483027238889223e-06, "loss": 0.193, "step": 5207 }, { "epoch": 1.7391885122724995, "grad_norm": 0.4113339936370685, "learning_rate": 4.48109436010904e-06, "loss": 0.1912, "step": 5208 }, { "epoch": 1.7395224578393722, "grad_norm": 0.4094567096226062, "learning_rate": 4.4791615597156635e-06, "loss": 0.1948, "step": 5209 }, { "epoch": 1.739856403406245, "grad_norm": 0.41900250037341225, "learning_rate": 4.477228838001065e-06, "loss": 0.1891, "step": 5210 }, { "epoch": 1.7401903489731174, "grad_norm": 0.38308421909389073, "learning_rate": 4.475296195257206e-06, "loss": 0.1855, "step": 5211 }, { "epoch": 1.74052429453999, "grad_norm": 0.38927592333160743, "learning_rate": 4.4733636317760365e-06, "loss": 0.1929, "step": 5212 }, { "epoch": 1.7408582401068626, "grad_norm": 0.3896513984545134, "learning_rate": 4.471431147849491e-06, "loss": 0.1808, "step": 5213 }, { "epoch": 1.7411921856737351, "grad_norm": 0.4339601478317658, "learning_rate": 4.469498743769493e-06, "loss": 0.2101, "step": 5214 }, { "epoch": 1.7415261312406076, "grad_norm": 0.4446663564844156, "learning_rate": 4.467566419827958e-06, "loss": 0.1946, "step": 5215 }, { "epoch": 1.7418600768074803, "grad_norm": 0.42455586753823066, "learning_rate": 4.465634176316782e-06, "loss": 0.1935, "step": 5216 }, { "epoch": 1.742194022374353, "grad_norm": 0.39397487594186725, "learning_rate": 4.463702013527857e-06, "loss": 0.1798, "step": 5217 }, { "epoch": 1.7425279679412256, "grad_norm": 0.44564863513382924, "learning_rate": 4.4617699317530585e-06, "loss": 0.209, "step": 5218 }, { "epoch": 1.742861913508098, "grad_norm": 0.3815517649159005, "learning_rate": 4.459837931284249e-06, "loss": 0.1835, "step": 5219 }, { "epoch": 1.7431958590749708, "grad_norm": 0.37966203750616717, "learning_rate": 4.45790601241328e-06, "loss": 0.1842, "step": 5220 }, { "epoch": 1.7435298046418435, "grad_norm": 0.3877934659273845, "learning_rate": 4.45597417543199e-06, "loss": 0.1852, "step": 5221 }, { "epoch": 1.743863750208716, "grad_norm": 0.41155888783147637, "learning_rate": 4.454042420632206e-06, "loss": 0.1863, "step": 5222 }, { "epoch": 1.7441976957755885, "grad_norm": 0.405549495330632, "learning_rate": 4.452110748305744e-06, "loss": 0.1925, "step": 5223 }, { "epoch": 1.7445316413424612, "grad_norm": 0.37776272889626006, "learning_rate": 4.450179158744405e-06, "loss": 0.1897, "step": 5224 }, { "epoch": 1.744865586909334, "grad_norm": 0.41244283018850775, "learning_rate": 4.448247652239978e-06, "loss": 0.1966, "step": 5225 }, { "epoch": 1.7451995324762064, "grad_norm": 0.37070906739068943, "learning_rate": 4.4463162290842395e-06, "loss": 0.185, "step": 5226 }, { "epoch": 1.745533478043079, "grad_norm": 0.38778365798313136, "learning_rate": 4.444384889568954e-06, "loss": 0.1885, "step": 5227 }, { "epoch": 1.7458674236099516, "grad_norm": 0.42132640207261324, "learning_rate": 4.442453633985872e-06, "loss": 0.1996, "step": 5228 }, { "epoch": 1.7462013691768243, "grad_norm": 0.4647696066279634, "learning_rate": 4.4405224626267345e-06, "loss": 0.2121, "step": 5229 }, { "epoch": 1.7465353147436968, "grad_norm": 0.40648419947551384, "learning_rate": 4.438591375783267e-06, "loss": 0.1898, "step": 5230 }, { "epoch": 1.7468692603105693, "grad_norm": 0.48398101512288694, "learning_rate": 4.4366603737471825e-06, "loss": 0.2041, "step": 5231 }, { "epoch": 1.747203205877442, "grad_norm": 0.3757616353194717, "learning_rate": 4.434729456810182e-06, "loss": 0.1725, "step": 5232 }, { "epoch": 1.7475371514443145, "grad_norm": 0.439178547747858, "learning_rate": 4.432798625263951e-06, "loss": 0.2103, "step": 5233 }, { "epoch": 1.747871097011187, "grad_norm": 0.3795864733687818, "learning_rate": 4.430867879400167e-06, "loss": 0.1806, "step": 5234 }, { "epoch": 1.7482050425780598, "grad_norm": 0.41932040849791336, "learning_rate": 4.428937219510491e-06, "loss": 0.1955, "step": 5235 }, { "epoch": 1.7485389881449325, "grad_norm": 0.4871060941331514, "learning_rate": 4.427006645886573e-06, "loss": 0.1906, "step": 5236 }, { "epoch": 1.748872933711805, "grad_norm": 0.35978985995864865, "learning_rate": 4.425076158820048e-06, "loss": 0.1763, "step": 5237 }, { "epoch": 1.7492068792786775, "grad_norm": 0.4361636932415916, "learning_rate": 4.423145758602538e-06, "loss": 0.1901, "step": 5238 }, { "epoch": 1.7495408248455502, "grad_norm": 0.3897242187992088, "learning_rate": 4.4212154455256535e-06, "loss": 0.1854, "step": 5239 }, { "epoch": 1.749874770412423, "grad_norm": 0.44303703845267467, "learning_rate": 4.41928521988099e-06, "loss": 0.2072, "step": 5240 }, { "epoch": 1.7502087159792954, "grad_norm": 0.38297608026027435, "learning_rate": 4.417355081960133e-06, "loss": 0.1852, "step": 5241 }, { "epoch": 1.7505426615461679, "grad_norm": 0.4035320423312304, "learning_rate": 4.415425032054651e-06, "loss": 0.1974, "step": 5242 }, { "epoch": 1.7508766071130406, "grad_norm": 0.4211405489724584, "learning_rate": 4.413495070456101e-06, "loss": 0.2007, "step": 5243 }, { "epoch": 1.7512105526799133, "grad_norm": 0.3643499101190018, "learning_rate": 4.411565197456027e-06, "loss": 0.185, "step": 5244 }, { "epoch": 1.7515444982467858, "grad_norm": 0.411108277805822, "learning_rate": 4.409635413345956e-06, "loss": 0.2048, "step": 5245 }, { "epoch": 1.7518784438136583, "grad_norm": 0.4194151678676051, "learning_rate": 4.40770571841741e-06, "loss": 0.1999, "step": 5246 }, { "epoch": 1.752212389380531, "grad_norm": 0.40680894494308045, "learning_rate": 4.405776112961889e-06, "loss": 0.2074, "step": 5247 }, { "epoch": 1.7525463349474035, "grad_norm": 0.41101451985867804, "learning_rate": 4.4038465972708824e-06, "loss": 0.2035, "step": 5248 }, { "epoch": 1.752880280514276, "grad_norm": 0.4094941833906081, "learning_rate": 4.4019171716358675e-06, "loss": 0.2125, "step": 5249 }, { "epoch": 1.7532142260811487, "grad_norm": 0.47029820090748287, "learning_rate": 4.399987836348305e-06, "loss": 0.2013, "step": 5250 }, { "epoch": 1.7535481716480215, "grad_norm": 0.436547364677873, "learning_rate": 4.398058591699645e-06, "loss": 0.2054, "step": 5251 }, { "epoch": 1.753882117214894, "grad_norm": 0.4223790978332405, "learning_rate": 4.396129437981322e-06, "loss": 0.2078, "step": 5252 }, { "epoch": 1.7542160627817664, "grad_norm": 0.3751825180830145, "learning_rate": 4.394200375484758e-06, "loss": 0.1807, "step": 5253 }, { "epoch": 1.7545500083486392, "grad_norm": 0.41355637546032376, "learning_rate": 4.392271404501361e-06, "loss": 0.1946, "step": 5254 }, { "epoch": 1.7548839539155119, "grad_norm": 0.4283757551832914, "learning_rate": 4.390342525322524e-06, "loss": 0.1935, "step": 5255 }, { "epoch": 1.7552178994823844, "grad_norm": 0.3614554875916809, "learning_rate": 4.3884137382396255e-06, "loss": 0.1699, "step": 5256 }, { "epoch": 1.7555518450492569, "grad_norm": 0.4100290180136812, "learning_rate": 4.3864850435440335e-06, "loss": 0.1949, "step": 5257 }, { "epoch": 1.7558857906161296, "grad_norm": 0.4447808535436557, "learning_rate": 4.3845564415271e-06, "loss": 0.1973, "step": 5258 }, { "epoch": 1.7562197361830023, "grad_norm": 0.38742859585948136, "learning_rate": 4.382627932480164e-06, "loss": 0.1864, "step": 5259 }, { "epoch": 1.7565536817498748, "grad_norm": 0.4042603667108564, "learning_rate": 4.380699516694547e-06, "loss": 0.1927, "step": 5260 }, { "epoch": 1.7568876273167473, "grad_norm": 0.5355154040253798, "learning_rate": 4.37877119446156e-06, "loss": 0.2128, "step": 5261 }, { "epoch": 1.75722157288362, "grad_norm": 0.41543428507131214, "learning_rate": 4.3768429660725e-06, "loss": 0.1852, "step": 5262 }, { "epoch": 1.7575555184504925, "grad_norm": 0.39428337573752037, "learning_rate": 4.374914831818643e-06, "loss": 0.1965, "step": 5263 }, { "epoch": 1.757889464017365, "grad_norm": 0.4158653114188019, "learning_rate": 4.372986791991265e-06, "loss": 0.1917, "step": 5264 }, { "epoch": 1.7582234095842377, "grad_norm": 0.3700405425476926, "learning_rate": 4.371058846881614e-06, "loss": 0.1747, "step": 5265 }, { "epoch": 1.7585573551511104, "grad_norm": 0.41343759916271333, "learning_rate": 4.36913099678093e-06, "loss": 0.2013, "step": 5266 }, { "epoch": 1.758891300717983, "grad_norm": 0.42108912822588007, "learning_rate": 4.367203241980437e-06, "loss": 0.1909, "step": 5267 }, { "epoch": 1.7592252462848554, "grad_norm": 0.3808692844158381, "learning_rate": 4.3652755827713456e-06, "loss": 0.1827, "step": 5268 }, { "epoch": 1.7595591918517282, "grad_norm": 0.394990213378632, "learning_rate": 4.363348019444848e-06, "loss": 0.1926, "step": 5269 }, { "epoch": 1.7598931374186009, "grad_norm": 0.38402085666635427, "learning_rate": 4.361420552292132e-06, "loss": 0.1904, "step": 5270 }, { "epoch": 1.7602270829854734, "grad_norm": 0.34637884058258056, "learning_rate": 4.35949318160436e-06, "loss": 0.1812, "step": 5271 }, { "epoch": 1.7605610285523459, "grad_norm": 0.3776613811676968, "learning_rate": 4.357565907672684e-06, "loss": 0.1903, "step": 5272 }, { "epoch": 1.7608949741192186, "grad_norm": 0.4087198495663909, "learning_rate": 4.355638730788242e-06, "loss": 0.1876, "step": 5273 }, { "epoch": 1.7612289196860913, "grad_norm": 0.38778910053833804, "learning_rate": 4.353711651242157e-06, "loss": 0.1813, "step": 5274 }, { "epoch": 1.7615628652529638, "grad_norm": 0.37325883749294486, "learning_rate": 4.3517846693255365e-06, "loss": 0.1777, "step": 5275 }, { "epoch": 1.7618968108198363, "grad_norm": 0.39984608759219054, "learning_rate": 4.349857785329475e-06, "loss": 0.1923, "step": 5276 }, { "epoch": 1.762230756386709, "grad_norm": 0.3743519628081095, "learning_rate": 4.34793099954505e-06, "loss": 0.1841, "step": 5277 }, { "epoch": 1.7625647019535817, "grad_norm": 0.3955250843428749, "learning_rate": 4.3460043122633256e-06, "loss": 0.1899, "step": 5278 }, { "epoch": 1.7628986475204542, "grad_norm": 0.37322793088236206, "learning_rate": 4.344077723775349e-06, "loss": 0.1707, "step": 5279 }, { "epoch": 1.7632325930873267, "grad_norm": 0.42561651349981755, "learning_rate": 4.342151234372155e-06, "loss": 0.2092, "step": 5280 }, { "epoch": 1.7635665386541994, "grad_norm": 0.42631843408788267, "learning_rate": 4.340224844344766e-06, "loss": 0.2021, "step": 5281 }, { "epoch": 1.763900484221072, "grad_norm": 0.40817135112216674, "learning_rate": 4.338298553984181e-06, "loss": 0.2068, "step": 5282 }, { "epoch": 1.7642344297879444, "grad_norm": 0.39472505278629344, "learning_rate": 4.336372363581391e-06, "loss": 0.1853, "step": 5283 }, { "epoch": 1.7645683753548171, "grad_norm": 0.4000097483831296, "learning_rate": 4.33444627342737e-06, "loss": 0.1919, "step": 5284 }, { "epoch": 1.7649023209216899, "grad_norm": 0.42574844971222253, "learning_rate": 4.332520283813075e-06, "loss": 0.2056, "step": 5285 }, { "epoch": 1.7652362664885624, "grad_norm": 0.4176968980323195, "learning_rate": 4.330594395029449e-06, "loss": 0.2053, "step": 5286 }, { "epoch": 1.7655702120554349, "grad_norm": 0.41190325772011954, "learning_rate": 4.328668607367424e-06, "loss": 0.1903, "step": 5287 }, { "epoch": 1.7659041576223076, "grad_norm": 0.4500552732783806, "learning_rate": 4.326742921117911e-06, "loss": 0.1977, "step": 5288 }, { "epoch": 1.7662381031891803, "grad_norm": 0.451776540064002, "learning_rate": 4.324817336571806e-06, "loss": 0.2023, "step": 5289 }, { "epoch": 1.7665720487560528, "grad_norm": 0.4188013135031576, "learning_rate": 4.3228918540199926e-06, "loss": 0.1844, "step": 5290 }, { "epoch": 1.7669059943229253, "grad_norm": 0.3839938487428742, "learning_rate": 4.320966473753337e-06, "loss": 0.1842, "step": 5291 }, { "epoch": 1.767239939889798, "grad_norm": 0.41400365926676513, "learning_rate": 4.31904119606269e-06, "loss": 0.1974, "step": 5292 }, { "epoch": 1.7675738854566707, "grad_norm": 0.420903637964543, "learning_rate": 4.31711602123889e-06, "loss": 0.1956, "step": 5293 }, { "epoch": 1.7679078310235432, "grad_norm": 0.3933732864451317, "learning_rate": 4.315190949572755e-06, "loss": 0.181, "step": 5294 }, { "epoch": 1.7682417765904157, "grad_norm": 0.4002834826973489, "learning_rate": 4.313265981355091e-06, "loss": 0.1897, "step": 5295 }, { "epoch": 1.7685757221572884, "grad_norm": 0.4534917662589281, "learning_rate": 4.311341116876687e-06, "loss": 0.1912, "step": 5296 }, { "epoch": 1.768909667724161, "grad_norm": 0.3953727715420872, "learning_rate": 4.309416356428315e-06, "loss": 0.192, "step": 5297 }, { "epoch": 1.7692436132910334, "grad_norm": 0.4235110543403725, "learning_rate": 4.307491700300733e-06, "loss": 0.2054, "step": 5298 }, { "epoch": 1.7695775588579061, "grad_norm": 0.39646132981498244, "learning_rate": 4.305567148784685e-06, "loss": 0.195, "step": 5299 }, { "epoch": 1.7699115044247788, "grad_norm": 0.373965105972047, "learning_rate": 4.3036427021708955e-06, "loss": 0.1876, "step": 5300 }, { "epoch": 1.7702454499916513, "grad_norm": 0.3879823479267765, "learning_rate": 4.301718360750074e-06, "loss": 0.181, "step": 5301 }, { "epoch": 1.7705793955585238, "grad_norm": 0.3861311454658933, "learning_rate": 4.299794124812918e-06, "loss": 0.1786, "step": 5302 }, { "epoch": 1.7709133411253966, "grad_norm": 0.41958134378619033, "learning_rate": 4.297869994650103e-06, "loss": 0.2081, "step": 5303 }, { "epoch": 1.7712472866922693, "grad_norm": 0.39575198482226387, "learning_rate": 4.295945970552293e-06, "loss": 0.1834, "step": 5304 }, { "epoch": 1.7715812322591418, "grad_norm": 0.3988296719287663, "learning_rate": 4.294022052810134e-06, "loss": 0.1858, "step": 5305 }, { "epoch": 1.7719151778260143, "grad_norm": 0.4327878844010829, "learning_rate": 4.292098241714256e-06, "loss": 0.1995, "step": 5306 }, { "epoch": 1.772249123392887, "grad_norm": 0.3883451289026303, "learning_rate": 4.290174537555275e-06, "loss": 0.1912, "step": 5307 }, { "epoch": 1.7725830689597597, "grad_norm": 0.45477611232921417, "learning_rate": 4.2882509406237885e-06, "loss": 0.1849, "step": 5308 }, { "epoch": 1.7729170145266322, "grad_norm": 0.38385530123430495, "learning_rate": 4.286327451210377e-06, "loss": 0.1814, "step": 5309 }, { "epoch": 1.7732509600935047, "grad_norm": 0.4346218793838651, "learning_rate": 4.284404069605605e-06, "loss": 0.1986, "step": 5310 }, { "epoch": 1.7735849056603774, "grad_norm": 0.36896709229326324, "learning_rate": 4.282480796100027e-06, "loss": 0.1848, "step": 5311 }, { "epoch": 1.77391885122725, "grad_norm": 0.3921476392351792, "learning_rate": 4.280557630984173e-06, "loss": 0.1888, "step": 5312 }, { "epoch": 1.7742527967941224, "grad_norm": 0.4206459707979227, "learning_rate": 4.27863457454856e-06, "loss": 0.2004, "step": 5313 }, { "epoch": 1.7745867423609951, "grad_norm": 0.36808858668646643, "learning_rate": 4.276711627083688e-06, "loss": 0.177, "step": 5314 }, { "epoch": 1.7749206879278678, "grad_norm": 0.4227954793051248, "learning_rate": 4.274788788880041e-06, "loss": 0.2021, "step": 5315 }, { "epoch": 1.7752546334947403, "grad_norm": 0.4271760624815115, "learning_rate": 4.272866060228084e-06, "loss": 0.2035, "step": 5316 }, { "epoch": 1.7755885790616128, "grad_norm": 0.38758325418151324, "learning_rate": 4.270943441418275e-06, "loss": 0.1882, "step": 5317 }, { "epoch": 1.7759225246284855, "grad_norm": 0.38762943385431875, "learning_rate": 4.2690209327410406e-06, "loss": 0.1907, "step": 5318 }, { "epoch": 1.7762564701953583, "grad_norm": 0.4139981860338518, "learning_rate": 4.267098534486803e-06, "loss": 0.2032, "step": 5319 }, { "epoch": 1.7765904157622308, "grad_norm": 0.4167718701052845, "learning_rate": 4.26517624694596e-06, "loss": 0.1997, "step": 5320 }, { "epoch": 1.7769243613291033, "grad_norm": 0.3827532636411078, "learning_rate": 4.2632540704088975e-06, "loss": 0.1938, "step": 5321 }, { "epoch": 1.777258306895976, "grad_norm": 0.40170330200281723, "learning_rate": 4.261332005165984e-06, "loss": 0.1712, "step": 5322 }, { "epoch": 1.7775922524628487, "grad_norm": 0.39802116363666706, "learning_rate": 4.259410051507567e-06, "loss": 0.1896, "step": 5323 }, { "epoch": 1.7779261980297212, "grad_norm": 0.4222010197604959, "learning_rate": 4.257488209723981e-06, "loss": 0.207, "step": 5324 }, { "epoch": 1.7782601435965937, "grad_norm": 0.40495532187068245, "learning_rate": 4.255566480105546e-06, "loss": 0.1886, "step": 5325 }, { "epoch": 1.7785940891634664, "grad_norm": 0.4075549108054149, "learning_rate": 4.2536448629425585e-06, "loss": 0.1796, "step": 5326 }, { "epoch": 1.7789280347303391, "grad_norm": 0.37713030151299365, "learning_rate": 4.2517233585253024e-06, "loss": 0.1787, "step": 5327 }, { "epoch": 1.7792619802972116, "grad_norm": 0.4343255349139648, "learning_rate": 4.2498019671440435e-06, "loss": 0.2084, "step": 5328 }, { "epoch": 1.779595925864084, "grad_norm": 0.39420471849926414, "learning_rate": 4.247880689089033e-06, "loss": 0.1913, "step": 5329 }, { "epoch": 1.7799298714309568, "grad_norm": 0.3672641930743383, "learning_rate": 4.245959524650498e-06, "loss": 0.1757, "step": 5330 }, { "epoch": 1.7802638169978293, "grad_norm": 0.3904849887413555, "learning_rate": 4.244038474118656e-06, "loss": 0.1815, "step": 5331 }, { "epoch": 1.7805977625647018, "grad_norm": 0.3761970196437007, "learning_rate": 4.242117537783704e-06, "loss": 0.1794, "step": 5332 }, { "epoch": 1.7809317081315745, "grad_norm": 0.4138094357366333, "learning_rate": 4.2401967159358195e-06, "loss": 0.1933, "step": 5333 }, { "epoch": 1.7812656536984472, "grad_norm": 0.4377054814288653, "learning_rate": 4.2382760088651696e-06, "loss": 0.2075, "step": 5334 }, { "epoch": 1.7815995992653197, "grad_norm": 0.44167950165051495, "learning_rate": 4.236355416861897e-06, "loss": 0.2025, "step": 5335 }, { "epoch": 1.7819335448321922, "grad_norm": 0.40362351633119864, "learning_rate": 4.23443494021613e-06, "loss": 0.192, "step": 5336 }, { "epoch": 1.782267490399065, "grad_norm": 0.4000054483623267, "learning_rate": 4.232514579217981e-06, "loss": 0.1863, "step": 5337 }, { "epoch": 1.7826014359659377, "grad_norm": 0.39822312077850025, "learning_rate": 4.23059433415754e-06, "loss": 0.1915, "step": 5338 }, { "epoch": 1.7829353815328102, "grad_norm": 0.41333564470960643, "learning_rate": 4.228674205324884e-06, "loss": 0.1986, "step": 5339 }, { "epoch": 1.7832693270996827, "grad_norm": 0.40541425631144945, "learning_rate": 4.226754193010072e-06, "loss": 0.1819, "step": 5340 }, { "epoch": 1.7836032726665554, "grad_norm": 0.36616267473432135, "learning_rate": 4.224834297503145e-06, "loss": 0.1805, "step": 5341 }, { "epoch": 1.783937218233428, "grad_norm": 0.4368241980696301, "learning_rate": 4.222914519094124e-06, "loss": 0.2082, "step": 5342 }, { "epoch": 1.7842711638003006, "grad_norm": 0.4105369770801003, "learning_rate": 4.220994858073014e-06, "loss": 0.194, "step": 5343 }, { "epoch": 1.784605109367173, "grad_norm": 0.3959702721339513, "learning_rate": 4.2190753147298044e-06, "loss": 0.1945, "step": 5344 }, { "epoch": 1.7849390549340458, "grad_norm": 0.47707382396904957, "learning_rate": 4.2171558893544626e-06, "loss": 0.1815, "step": 5345 }, { "epoch": 1.7852730005009183, "grad_norm": 0.42034793280157745, "learning_rate": 4.215236582236941e-06, "loss": 0.1976, "step": 5346 }, { "epoch": 1.7856069460677908, "grad_norm": 0.3784248182395078, "learning_rate": 4.213317393667175e-06, "loss": 0.191, "step": 5347 }, { "epoch": 1.7859408916346635, "grad_norm": 0.3947336613390132, "learning_rate": 4.211398323935079e-06, "loss": 0.1984, "step": 5348 }, { "epoch": 1.7862748372015362, "grad_norm": 0.4745094890865065, "learning_rate": 4.209479373330552e-06, "loss": 0.2146, "step": 5349 }, { "epoch": 1.7866087827684087, "grad_norm": 0.4115025463067994, "learning_rate": 4.207560542143474e-06, "loss": 0.2033, "step": 5350 }, { "epoch": 1.7869427283352812, "grad_norm": 0.41096840682747465, "learning_rate": 4.205641830663706e-06, "loss": 0.1969, "step": 5351 }, { "epoch": 1.787276673902154, "grad_norm": 0.37721407675737656, "learning_rate": 4.2037232391810925e-06, "loss": 0.1796, "step": 5352 }, { "epoch": 1.7876106194690267, "grad_norm": 0.3712905961394812, "learning_rate": 4.20180476798546e-06, "loss": 0.1831, "step": 5353 }, { "epoch": 1.7879445650358992, "grad_norm": 0.41330452273749596, "learning_rate": 4.1998864173666174e-06, "loss": 0.2077, "step": 5354 }, { "epoch": 1.7882785106027717, "grad_norm": 0.4693570027309341, "learning_rate": 4.197968187614351e-06, "loss": 0.2115, "step": 5355 }, { "epoch": 1.7886124561696444, "grad_norm": 0.4171457241557042, "learning_rate": 4.196050079018433e-06, "loss": 0.1869, "step": 5356 }, { "epoch": 1.788946401736517, "grad_norm": 1.0151186082749302, "learning_rate": 4.194132091868616e-06, "loss": 0.2071, "step": 5357 }, { "epoch": 1.7892803473033896, "grad_norm": 0.4187428989067555, "learning_rate": 4.1922142264546365e-06, "loss": 0.19, "step": 5358 }, { "epoch": 1.789614292870262, "grad_norm": 0.3836619002670058, "learning_rate": 4.1902964830662104e-06, "loss": 0.1924, "step": 5359 }, { "epoch": 1.7899482384371348, "grad_norm": 0.38919854186049957, "learning_rate": 4.188378861993034e-06, "loss": 0.1878, "step": 5360 }, { "epoch": 1.7902821840040073, "grad_norm": 0.39768862778262004, "learning_rate": 4.186461363524786e-06, "loss": 0.198, "step": 5361 }, { "epoch": 1.7906161295708798, "grad_norm": 0.39782728299988296, "learning_rate": 4.184543987951127e-06, "loss": 0.1801, "step": 5362 }, { "epoch": 1.7909500751377525, "grad_norm": 0.495707344907115, "learning_rate": 4.182626735561703e-06, "loss": 0.181, "step": 5363 }, { "epoch": 1.7912840207046252, "grad_norm": 0.44114708561246135, "learning_rate": 4.180709606646134e-06, "loss": 0.2039, "step": 5364 }, { "epoch": 1.7916179662714977, "grad_norm": 0.3927854303581158, "learning_rate": 4.178792601494026e-06, "loss": 0.1896, "step": 5365 }, { "epoch": 1.7919519118383702, "grad_norm": 0.421206797477163, "learning_rate": 4.176875720394965e-06, "loss": 0.2005, "step": 5366 }, { "epoch": 1.792285857405243, "grad_norm": 0.5201455802658564, "learning_rate": 4.174958963638518e-06, "loss": 0.2143, "step": 5367 }, { "epoch": 1.7926198029721157, "grad_norm": 0.4138358670591043, "learning_rate": 4.173042331514234e-06, "loss": 0.1827, "step": 5368 }, { "epoch": 1.7929537485389881, "grad_norm": 0.44836688438557204, "learning_rate": 4.171125824311642e-06, "loss": 0.204, "step": 5369 }, { "epoch": 1.7932876941058606, "grad_norm": 0.39838785544358446, "learning_rate": 4.169209442320255e-06, "loss": 0.1864, "step": 5370 }, { "epoch": 1.7936216396727334, "grad_norm": 0.39065135717154886, "learning_rate": 4.167293185829565e-06, "loss": 0.1777, "step": 5371 }, { "epoch": 1.793955585239606, "grad_norm": 0.3868324514391502, "learning_rate": 4.165377055129043e-06, "loss": 0.1898, "step": 5372 }, { "epoch": 1.7942895308064786, "grad_norm": 0.4086998441668388, "learning_rate": 4.163461050508144e-06, "loss": 0.1874, "step": 5373 }, { "epoch": 1.794623476373351, "grad_norm": 0.3777662978033055, "learning_rate": 4.161545172256303e-06, "loss": 0.1824, "step": 5374 }, { "epoch": 1.7949574219402238, "grad_norm": 0.4382436302483726, "learning_rate": 4.1596294206629375e-06, "loss": 0.2086, "step": 5375 }, { "epoch": 1.7952913675070965, "grad_norm": 0.41758627579949836, "learning_rate": 4.157713796017442e-06, "loss": 0.1874, "step": 5376 }, { "epoch": 1.795625313073969, "grad_norm": 0.40993854049694717, "learning_rate": 4.155798298609196e-06, "loss": 0.2063, "step": 5377 }, { "epoch": 1.7959592586408415, "grad_norm": 0.44807089311268883, "learning_rate": 4.1538829287275565e-06, "loss": 0.1874, "step": 5378 }, { "epoch": 1.7962932042077142, "grad_norm": 0.41081845688238966, "learning_rate": 4.151967686661864e-06, "loss": 0.2055, "step": 5379 }, { "epoch": 1.7966271497745867, "grad_norm": 0.3750452408616848, "learning_rate": 4.150052572701435e-06, "loss": 0.1923, "step": 5380 }, { "epoch": 1.7969610953414592, "grad_norm": 0.3748777838197131, "learning_rate": 4.148137587135575e-06, "loss": 0.1856, "step": 5381 }, { "epoch": 1.797295040908332, "grad_norm": 0.385221778710938, "learning_rate": 4.146222730253563e-06, "loss": 0.1847, "step": 5382 }, { "epoch": 1.7976289864752046, "grad_norm": 0.4130555094657973, "learning_rate": 4.1443080023446605e-06, "loss": 0.2001, "step": 5383 }, { "epoch": 1.7979629320420771, "grad_norm": 0.40847407861926593, "learning_rate": 4.1423934036981096e-06, "loss": 0.1968, "step": 5384 }, { "epoch": 1.7982968776089496, "grad_norm": 0.45331715003457235, "learning_rate": 4.140478934603133e-06, "loss": 0.1954, "step": 5385 }, { "epoch": 1.7986308231758223, "grad_norm": 0.3854167677351394, "learning_rate": 4.138564595348932e-06, "loss": 0.1847, "step": 5386 }, { "epoch": 1.798964768742695, "grad_norm": 0.40379130860802437, "learning_rate": 4.136650386224694e-06, "loss": 0.1974, "step": 5387 }, { "epoch": 1.7992987143095676, "grad_norm": 0.43616884197672473, "learning_rate": 4.13473630751958e-06, "loss": 0.2073, "step": 5388 }, { "epoch": 1.79963265987644, "grad_norm": 0.39500763107731124, "learning_rate": 4.132822359522735e-06, "loss": 0.1964, "step": 5389 }, { "epoch": 1.7999666054433128, "grad_norm": 0.5316276814151004, "learning_rate": 4.130908542523285e-06, "loss": 0.2012, "step": 5390 }, { "epoch": 1.8003005510101855, "grad_norm": 0.4413915607742555, "learning_rate": 4.128994856810332e-06, "loss": 0.1929, "step": 5391 }, { "epoch": 1.800634496577058, "grad_norm": 0.43359946931826776, "learning_rate": 4.127081302672958e-06, "loss": 0.1975, "step": 5392 }, { "epoch": 1.8009684421439305, "grad_norm": 0.4326737933869161, "learning_rate": 4.125167880400235e-06, "loss": 0.1979, "step": 5393 }, { "epoch": 1.8013023877108032, "grad_norm": 0.3999758339829761, "learning_rate": 4.1232545902812046e-06, "loss": 0.1782, "step": 5394 }, { "epoch": 1.8016363332776757, "grad_norm": 0.39414865979855523, "learning_rate": 4.121341432604892e-06, "loss": 0.1908, "step": 5395 }, { "epoch": 1.8019702788445482, "grad_norm": 0.40085607899323583, "learning_rate": 4.1194284076603004e-06, "loss": 0.1991, "step": 5396 }, { "epoch": 1.802304224411421, "grad_norm": 0.3986732428896081, "learning_rate": 4.117515515736418e-06, "loss": 0.1892, "step": 5397 }, { "epoch": 1.8026381699782936, "grad_norm": 0.38552180630358474, "learning_rate": 4.1156027571222054e-06, "loss": 0.1899, "step": 5398 }, { "epoch": 1.8029721155451661, "grad_norm": 0.42639374577496114, "learning_rate": 4.113690132106611e-06, "loss": 0.2013, "step": 5399 }, { "epoch": 1.8033060611120386, "grad_norm": 0.44076136474761535, "learning_rate": 4.111777640978559e-06, "loss": 0.1967, "step": 5400 }, { "epoch": 1.8036400066789113, "grad_norm": 0.3715616165565932, "learning_rate": 4.109865284026953e-06, "loss": 0.1895, "step": 5401 }, { "epoch": 1.803973952245784, "grad_norm": 0.413959384805662, "learning_rate": 4.107953061540676e-06, "loss": 0.1906, "step": 5402 }, { "epoch": 1.8043078978126565, "grad_norm": 0.3986680001400406, "learning_rate": 4.10604097380859e-06, "loss": 0.1884, "step": 5403 }, { "epoch": 1.804641843379529, "grad_norm": 0.39896994690686616, "learning_rate": 4.104129021119543e-06, "loss": 0.1864, "step": 5404 }, { "epoch": 1.8049757889464018, "grad_norm": 0.41708702213753196, "learning_rate": 4.102217203762357e-06, "loss": 0.2042, "step": 5405 }, { "epoch": 1.8053097345132745, "grad_norm": 0.4162521221231452, "learning_rate": 4.1003055220258335e-06, "loss": 0.1886, "step": 5406 }, { "epoch": 1.805643680080147, "grad_norm": 0.388971195188673, "learning_rate": 4.0983939761987535e-06, "loss": 0.1804, "step": 5407 }, { "epoch": 1.8059776256470195, "grad_norm": 0.3976488686723864, "learning_rate": 4.09648256656988e-06, "loss": 0.2036, "step": 5408 }, { "epoch": 1.8063115712138922, "grad_norm": 0.43724410925090645, "learning_rate": 4.094571293427951e-06, "loss": 0.2063, "step": 5409 }, { "epoch": 1.8066455167807647, "grad_norm": 0.42616248921069183, "learning_rate": 4.092660157061691e-06, "loss": 0.2003, "step": 5410 }, { "epoch": 1.8069794623476372, "grad_norm": 0.4045940509370612, "learning_rate": 4.090749157759799e-06, "loss": 0.1894, "step": 5411 }, { "epoch": 1.80731340791451, "grad_norm": 0.4173527749892698, "learning_rate": 4.088838295810952e-06, "loss": 0.2007, "step": 5412 }, { "epoch": 1.8076473534813826, "grad_norm": 0.42528073955892315, "learning_rate": 4.086927571503808e-06, "loss": 0.2005, "step": 5413 }, { "epoch": 1.807981299048255, "grad_norm": 0.41587887704528714, "learning_rate": 4.0850169851270075e-06, "loss": 0.2006, "step": 5414 }, { "epoch": 1.8083152446151276, "grad_norm": 0.40738714104001256, "learning_rate": 4.0831065369691615e-06, "loss": 0.2008, "step": 5415 }, { "epoch": 1.8086491901820003, "grad_norm": 0.44869021642959594, "learning_rate": 4.0811962273188714e-06, "loss": 0.2001, "step": 5416 }, { "epoch": 1.808983135748873, "grad_norm": 0.4002553541976238, "learning_rate": 4.0792860564647105e-06, "loss": 0.1911, "step": 5417 }, { "epoch": 1.8093170813157455, "grad_norm": 0.3786221254197682, "learning_rate": 4.077376024695231e-06, "loss": 0.1774, "step": 5418 }, { "epoch": 1.809651026882618, "grad_norm": 0.3973451247400723, "learning_rate": 4.075466132298967e-06, "loss": 0.1918, "step": 5419 }, { "epoch": 1.8099849724494907, "grad_norm": 0.40821244986487804, "learning_rate": 4.073556379564429e-06, "loss": 0.1921, "step": 5420 }, { "epoch": 1.8103189180163635, "grad_norm": 0.4187248192126581, "learning_rate": 4.071646766780109e-06, "loss": 0.2055, "step": 5421 }, { "epoch": 1.810652863583236, "grad_norm": 0.4009251034725688, "learning_rate": 4.069737294234475e-06, "loss": 0.1921, "step": 5422 }, { "epoch": 1.8109868091501085, "grad_norm": 0.4153763020520176, "learning_rate": 4.067827962215977e-06, "loss": 0.1971, "step": 5423 }, { "epoch": 1.8113207547169812, "grad_norm": 0.42059984630019226, "learning_rate": 4.065918771013042e-06, "loss": 0.1931, "step": 5424 }, { "epoch": 1.811654700283854, "grad_norm": 0.38975550241965073, "learning_rate": 4.064009720914074e-06, "loss": 0.1939, "step": 5425 }, { "epoch": 1.8119886458507264, "grad_norm": 0.4352813167969884, "learning_rate": 4.062100812207459e-06, "loss": 0.1984, "step": 5426 }, { "epoch": 1.8123225914175989, "grad_norm": 0.41222394672549245, "learning_rate": 4.060192045181558e-06, "loss": 0.2003, "step": 5427 }, { "epoch": 1.8126565369844716, "grad_norm": 0.4211144974261239, "learning_rate": 4.058283420124716e-06, "loss": 0.1946, "step": 5428 }, { "epoch": 1.812990482551344, "grad_norm": 0.4033307519940219, "learning_rate": 4.056374937325251e-06, "loss": 0.1888, "step": 5429 }, { "epoch": 1.8133244281182166, "grad_norm": 0.4234575464895144, "learning_rate": 4.054466597071464e-06, "loss": 0.1932, "step": 5430 }, { "epoch": 1.8136583736850893, "grad_norm": 0.41837807879440336, "learning_rate": 4.05255839965163e-06, "loss": 0.2026, "step": 5431 }, { "epoch": 1.813992319251962, "grad_norm": 0.43738094694833785, "learning_rate": 4.050650345354006e-06, "loss": 0.202, "step": 5432 }, { "epoch": 1.8143262648188345, "grad_norm": 0.43660446022144006, "learning_rate": 4.048742434466823e-06, "loss": 0.2109, "step": 5433 }, { "epoch": 1.814660210385707, "grad_norm": 0.3961921763065722, "learning_rate": 4.046834667278298e-06, "loss": 0.1926, "step": 5434 }, { "epoch": 1.8149941559525797, "grad_norm": 0.39349069851042634, "learning_rate": 4.04492704407662e-06, "loss": 0.191, "step": 5435 }, { "epoch": 1.8153281015194525, "grad_norm": 0.40519030905215747, "learning_rate": 4.043019565149958e-06, "loss": 0.1936, "step": 5436 }, { "epoch": 1.815662047086325, "grad_norm": 0.37874729826316716, "learning_rate": 4.041112230786458e-06, "loss": 0.186, "step": 5437 }, { "epoch": 1.8159959926531974, "grad_norm": 0.6081203465553392, "learning_rate": 4.039205041274247e-06, "loss": 0.2003, "step": 5438 }, { "epoch": 1.8163299382200702, "grad_norm": 0.41622622386812613, "learning_rate": 4.0372979969014245e-06, "loss": 0.1904, "step": 5439 }, { "epoch": 1.8166638837869429, "grad_norm": 0.4075713166770594, "learning_rate": 4.035391097956077e-06, "loss": 0.2012, "step": 5440 }, { "epoch": 1.8169978293538154, "grad_norm": 0.4494439120686127, "learning_rate": 4.0334843447262625e-06, "loss": 0.2047, "step": 5441 }, { "epoch": 1.8173317749206879, "grad_norm": 0.4137738402524847, "learning_rate": 4.0315777375000185e-06, "loss": 0.2019, "step": 5442 }, { "epoch": 1.8176657204875606, "grad_norm": 0.393956419351833, "learning_rate": 4.029671276565359e-06, "loss": 0.1929, "step": 5443 }, { "epoch": 1.817999666054433, "grad_norm": 0.3960846147046012, "learning_rate": 4.027764962210278e-06, "loss": 0.1828, "step": 5444 }, { "epoch": 1.8183336116213056, "grad_norm": 0.3877136289033595, "learning_rate": 4.025858794722749e-06, "loss": 0.1908, "step": 5445 }, { "epoch": 1.8186675571881783, "grad_norm": 0.38011780754177016, "learning_rate": 4.0239527743907184e-06, "loss": 0.1833, "step": 5446 }, { "epoch": 1.819001502755051, "grad_norm": 0.41199492546924943, "learning_rate": 4.022046901502114e-06, "loss": 0.1817, "step": 5447 }, { "epoch": 1.8193354483219235, "grad_norm": 0.401329738391802, "learning_rate": 4.020141176344839e-06, "loss": 0.1926, "step": 5448 }, { "epoch": 1.819669393888796, "grad_norm": 0.41127280691800383, "learning_rate": 4.018235599206778e-06, "loss": 0.2009, "step": 5449 }, { "epoch": 1.8200033394556687, "grad_norm": 0.4064474207715107, "learning_rate": 4.016330170375787e-06, "loss": 0.1935, "step": 5450 }, { "epoch": 1.8203372850225414, "grad_norm": 0.4020536829609197, "learning_rate": 4.014424890139709e-06, "loss": 0.1837, "step": 5451 }, { "epoch": 1.820671230589414, "grad_norm": 0.38407516465676206, "learning_rate": 4.012519758786355e-06, "loss": 0.1913, "step": 5452 }, { "epoch": 1.8210051761562864, "grad_norm": 0.4123403170436455, "learning_rate": 4.01061477660352e-06, "loss": 0.1976, "step": 5453 }, { "epoch": 1.8213391217231591, "grad_norm": 0.3839390900902544, "learning_rate": 4.008709943878971e-06, "loss": 0.1836, "step": 5454 }, { "epoch": 1.8216730672900319, "grad_norm": 0.38868642954562516, "learning_rate": 4.006805260900458e-06, "loss": 0.1857, "step": 5455 }, { "epoch": 1.8220070128569044, "grad_norm": 0.44277697324688925, "learning_rate": 4.004900727955703e-06, "loss": 0.203, "step": 5456 }, { "epoch": 1.8223409584237769, "grad_norm": 0.41818991156707475, "learning_rate": 4.0029963453324115e-06, "loss": 0.1891, "step": 5457 }, { "epoch": 1.8226749039906496, "grad_norm": 0.378807769453953, "learning_rate": 4.001092113318261e-06, "loss": 0.1809, "step": 5458 }, { "epoch": 1.823008849557522, "grad_norm": 0.4201589268763546, "learning_rate": 3.99918803220091e-06, "loss": 0.2034, "step": 5459 }, { "epoch": 1.8233427951243946, "grad_norm": 0.3896772152862828, "learning_rate": 3.99728410226799e-06, "loss": 0.1958, "step": 5460 }, { "epoch": 1.8236767406912673, "grad_norm": 0.3922430722052448, "learning_rate": 3.995380323807113e-06, "loss": 0.1953, "step": 5461 }, { "epoch": 1.82401068625814, "grad_norm": 0.35224053029208974, "learning_rate": 3.993476697105864e-06, "loss": 0.1756, "step": 5462 }, { "epoch": 1.8243446318250125, "grad_norm": 0.4117629603867008, "learning_rate": 3.991573222451815e-06, "loss": 0.2055, "step": 5463 }, { "epoch": 1.824678577391885, "grad_norm": 0.4119526641975354, "learning_rate": 3.989669900132504e-06, "loss": 0.1853, "step": 5464 }, { "epoch": 1.8250125229587577, "grad_norm": 0.3944045127756812, "learning_rate": 3.987766730435451e-06, "loss": 0.1987, "step": 5465 }, { "epoch": 1.8253464685256304, "grad_norm": 0.4015804217496364, "learning_rate": 3.9858637136481515e-06, "loss": 0.198, "step": 5466 }, { "epoch": 1.825680414092503, "grad_norm": 0.37176769181449776, "learning_rate": 3.98396085005808e-06, "loss": 0.1862, "step": 5467 }, { "epoch": 1.8260143596593754, "grad_norm": 0.40972865853666285, "learning_rate": 3.982058139952684e-06, "loss": 0.1886, "step": 5468 }, { "epoch": 1.8263483052262481, "grad_norm": 0.3955918184023543, "learning_rate": 3.980155583619392e-06, "loss": 0.1969, "step": 5469 }, { "epoch": 1.8266822507931209, "grad_norm": 0.3825992680611567, "learning_rate": 3.978253181345609e-06, "loss": 0.1867, "step": 5470 }, { "epoch": 1.8270161963599933, "grad_norm": 0.38415661050520683, "learning_rate": 3.9763509334187125e-06, "loss": 0.1918, "step": 5471 }, { "epoch": 1.8273501419268658, "grad_norm": 0.4392047476972209, "learning_rate": 3.974448840126061e-06, "loss": 0.1988, "step": 5472 }, { "epoch": 1.8276840874937386, "grad_norm": 0.4115656594818337, "learning_rate": 3.972546901754987e-06, "loss": 0.1947, "step": 5473 }, { "epoch": 1.8280180330606113, "grad_norm": 0.3900375859373049, "learning_rate": 3.9706451185928e-06, "loss": 0.1921, "step": 5474 }, { "epoch": 1.8283519786274836, "grad_norm": 0.407212324644722, "learning_rate": 3.968743490926791e-06, "loss": 0.1978, "step": 5475 }, { "epoch": 1.8286859241943563, "grad_norm": 0.3799461405429855, "learning_rate": 3.966842019044219e-06, "loss": 0.1794, "step": 5476 }, { "epoch": 1.829019869761229, "grad_norm": 0.5359825297417488, "learning_rate": 3.964940703232326e-06, "loss": 0.2117, "step": 5477 }, { "epoch": 1.8293538153281015, "grad_norm": 0.43912349342902457, "learning_rate": 3.963039543778327e-06, "loss": 0.1951, "step": 5478 }, { "epoch": 1.829687760894974, "grad_norm": 0.35805460260955724, "learning_rate": 3.961138540969411e-06, "loss": 0.1778, "step": 5479 }, { "epoch": 1.8300217064618467, "grad_norm": 0.3810672843512081, "learning_rate": 3.9592376950927545e-06, "loss": 0.187, "step": 5480 }, { "epoch": 1.8303556520287194, "grad_norm": 0.4011484817855961, "learning_rate": 3.957337006435499e-06, "loss": 0.1785, "step": 5481 }, { "epoch": 1.830689597595592, "grad_norm": 0.4789546571386921, "learning_rate": 3.955436475284764e-06, "loss": 0.2091, "step": 5482 }, { "epoch": 1.8310235431624644, "grad_norm": 0.4063460886404937, "learning_rate": 3.95353610192765e-06, "loss": 0.1924, "step": 5483 }, { "epoch": 1.8313574887293371, "grad_norm": 0.41526229249076346, "learning_rate": 3.95163588665123e-06, "loss": 0.1915, "step": 5484 }, { "epoch": 1.8316914342962098, "grad_norm": 0.41928063093273427, "learning_rate": 3.949735829742549e-06, "loss": 0.2008, "step": 5485 }, { "epoch": 1.8320253798630823, "grad_norm": 0.41388821065326953, "learning_rate": 3.947835931488642e-06, "loss": 0.1978, "step": 5486 }, { "epoch": 1.8323593254299548, "grad_norm": 0.42225519497611547, "learning_rate": 3.9459361921765045e-06, "loss": 0.2089, "step": 5487 }, { "epoch": 1.8326932709968276, "grad_norm": 0.3697474868956762, "learning_rate": 3.944036612093117e-06, "loss": 0.1834, "step": 5488 }, { "epoch": 1.8330272165637003, "grad_norm": 0.40844338979490125, "learning_rate": 3.942137191525434e-06, "loss": 0.1815, "step": 5489 }, { "epoch": 1.8333611621305728, "grad_norm": 0.37036000123683893, "learning_rate": 3.9402379307603825e-06, "loss": 0.1757, "step": 5490 }, { "epoch": 1.8336951076974453, "grad_norm": 0.4416887840594564, "learning_rate": 3.93833883008487e-06, "loss": 0.1928, "step": 5491 }, { "epoch": 1.834029053264318, "grad_norm": 0.4230338012823816, "learning_rate": 3.936439889785778e-06, "loss": 0.2049, "step": 5492 }, { "epoch": 1.8343629988311905, "grad_norm": 0.4552356374064481, "learning_rate": 3.934541110149964e-06, "loss": 0.1859, "step": 5493 }, { "epoch": 1.834696944398063, "grad_norm": 0.3996370964740024, "learning_rate": 3.932642491464261e-06, "loss": 0.1863, "step": 5494 }, { "epoch": 1.8350308899649357, "grad_norm": 0.41349629542546434, "learning_rate": 3.930744034015477e-06, "loss": 0.2028, "step": 5495 }, { "epoch": 1.8353648355318084, "grad_norm": 0.3874670370011834, "learning_rate": 3.9288457380903954e-06, "loss": 0.1946, "step": 5496 }, { "epoch": 1.835698781098681, "grad_norm": 0.35173551946256226, "learning_rate": 3.926947603975778e-06, "loss": 0.1787, "step": 5497 }, { "epoch": 1.8360327266655534, "grad_norm": 0.38153385633184733, "learning_rate": 3.925049631958361e-06, "loss": 0.1901, "step": 5498 }, { "epoch": 1.8363666722324261, "grad_norm": 0.37437515777658054, "learning_rate": 3.923151822324854e-06, "loss": 0.1772, "step": 5499 }, { "epoch": 1.8367006177992988, "grad_norm": 0.3666580407206624, "learning_rate": 3.9212541753619435e-06, "loss": 0.1753, "step": 5500 }, { "epoch": 1.8370345633661713, "grad_norm": 0.41030532655032376, "learning_rate": 3.9193566913562915e-06, "loss": 0.2008, "step": 5501 }, { "epoch": 1.8373685089330438, "grad_norm": 0.47705147398700304, "learning_rate": 3.917459370594537e-06, "loss": 0.2097, "step": 5502 }, { "epoch": 1.8377024544999165, "grad_norm": 0.3711053442410675, "learning_rate": 3.915562213363287e-06, "loss": 0.1921, "step": 5503 }, { "epoch": 1.8380364000667893, "grad_norm": 0.3925948723529021, "learning_rate": 3.9136652199491365e-06, "loss": 0.1895, "step": 5504 }, { "epoch": 1.8383703456336618, "grad_norm": 0.44496015087618784, "learning_rate": 3.911768390638645e-06, "loss": 0.2036, "step": 5505 }, { "epoch": 1.8387042912005342, "grad_norm": 0.4320342801300668, "learning_rate": 3.909871725718353e-06, "loss": 0.2007, "step": 5506 }, { "epoch": 1.839038236767407, "grad_norm": 0.40009969508303217, "learning_rate": 3.907975225474771e-06, "loss": 0.1907, "step": 5507 }, { "epoch": 1.8393721823342795, "grad_norm": 0.44359487303286543, "learning_rate": 3.906078890194391e-06, "loss": 0.2049, "step": 5508 }, { "epoch": 1.839706127901152, "grad_norm": 0.38608393971174715, "learning_rate": 3.904182720163672e-06, "loss": 0.1898, "step": 5509 }, { "epoch": 1.8400400734680247, "grad_norm": 0.3923128843531059, "learning_rate": 3.902286715669058e-06, "loss": 0.1837, "step": 5510 }, { "epoch": 1.8403740190348974, "grad_norm": 0.4372944654236281, "learning_rate": 3.9003908769969615e-06, "loss": 0.1986, "step": 5511 }, { "epoch": 1.8407079646017699, "grad_norm": 0.41187405317933395, "learning_rate": 3.89849520443377e-06, "loss": 0.1895, "step": 5512 }, { "epoch": 1.8410419101686424, "grad_norm": 0.4036022582486776, "learning_rate": 3.896599698265847e-06, "loss": 0.1859, "step": 5513 }, { "epoch": 1.841375855735515, "grad_norm": 0.39156933325917426, "learning_rate": 3.894704358779533e-06, "loss": 0.1933, "step": 5514 }, { "epoch": 1.8417098013023878, "grad_norm": 0.41843329760041964, "learning_rate": 3.892809186261138e-06, "loss": 0.2076, "step": 5515 }, { "epoch": 1.8420437468692603, "grad_norm": 0.456977264972126, "learning_rate": 3.890914180996954e-06, "loss": 0.2005, "step": 5516 }, { "epoch": 1.8423776924361328, "grad_norm": 0.38080749341104936, "learning_rate": 3.889019343273242e-06, "loss": 0.1896, "step": 5517 }, { "epoch": 1.8427116380030055, "grad_norm": 0.433536114286531, "learning_rate": 3.887124673376239e-06, "loss": 0.1972, "step": 5518 }, { "epoch": 1.8430455835698782, "grad_norm": 0.4274587785341165, "learning_rate": 3.885230171592157e-06, "loss": 0.2031, "step": 5519 }, { "epoch": 1.8433795291367507, "grad_norm": 0.4017626320601205, "learning_rate": 3.883335838207183e-06, "loss": 0.1903, "step": 5520 }, { "epoch": 1.8437134747036232, "grad_norm": 0.40513921939830017, "learning_rate": 3.881441673507481e-06, "loss": 0.1864, "step": 5521 }, { "epoch": 1.844047420270496, "grad_norm": 0.4158883732225394, "learning_rate": 3.879547677779184e-06, "loss": 0.186, "step": 5522 }, { "epoch": 1.8443813658373687, "grad_norm": 0.39393567740605284, "learning_rate": 3.8776538513084036e-06, "loss": 0.1913, "step": 5523 }, { "epoch": 1.844715311404241, "grad_norm": 0.3830752975691852, "learning_rate": 3.875760194381224e-06, "loss": 0.181, "step": 5524 }, { "epoch": 1.8450492569711137, "grad_norm": 0.43236140310887133, "learning_rate": 3.873866707283704e-06, "loss": 0.195, "step": 5525 }, { "epoch": 1.8453832025379864, "grad_norm": 0.3971639731949253, "learning_rate": 3.871973390301876e-06, "loss": 0.1908, "step": 5526 }, { "epoch": 1.8457171481048589, "grad_norm": 0.4329975492794574, "learning_rate": 3.8700802437217526e-06, "loss": 0.1973, "step": 5527 }, { "epoch": 1.8460510936717314, "grad_norm": 0.39079323807263294, "learning_rate": 3.8681872678293115e-06, "loss": 0.1805, "step": 5528 }, { "epoch": 1.846385039238604, "grad_norm": 0.4575892417350222, "learning_rate": 3.866294462910511e-06, "loss": 0.1846, "step": 5529 }, { "epoch": 1.8467189848054768, "grad_norm": 0.39421025346303856, "learning_rate": 3.86440182925128e-06, "loss": 0.1857, "step": 5530 }, { "epoch": 1.8470529303723493, "grad_norm": 0.3889312076173911, "learning_rate": 3.862509367137525e-06, "loss": 0.1846, "step": 5531 }, { "epoch": 1.8473868759392218, "grad_norm": 0.4334080310493205, "learning_rate": 3.86061707685512e-06, "loss": 0.2061, "step": 5532 }, { "epoch": 1.8477208215060945, "grad_norm": 0.38246167969063505, "learning_rate": 3.8587249586899245e-06, "loss": 0.1822, "step": 5533 }, { "epoch": 1.8480547670729672, "grad_norm": 0.38452054872558217, "learning_rate": 3.856833012927762e-06, "loss": 0.1843, "step": 5534 }, { "epoch": 1.8483887126398397, "grad_norm": 0.36692188968122724, "learning_rate": 3.854941239854433e-06, "loss": 0.1859, "step": 5535 }, { "epoch": 1.8487226582067122, "grad_norm": 0.4291853445087326, "learning_rate": 3.853049639755713e-06, "loss": 0.1927, "step": 5536 }, { "epoch": 1.849056603773585, "grad_norm": 0.4024469130697564, "learning_rate": 3.8511582129173495e-06, "loss": 0.1853, "step": 5537 }, { "epoch": 1.8493905493404577, "grad_norm": 0.4198881770666027, "learning_rate": 3.8492669596250636e-06, "loss": 0.1983, "step": 5538 }, { "epoch": 1.8497244949073302, "grad_norm": 0.4290931844458051, "learning_rate": 3.8473758801645535e-06, "loss": 0.2179, "step": 5539 }, { "epoch": 1.8500584404742026, "grad_norm": 0.4519053873864919, "learning_rate": 3.84548497482149e-06, "loss": 0.2008, "step": 5540 }, { "epoch": 1.8503923860410754, "grad_norm": 0.39187413508386726, "learning_rate": 3.843594243881513e-06, "loss": 0.1898, "step": 5541 }, { "epoch": 1.8507263316079479, "grad_norm": 0.4042759859011185, "learning_rate": 3.841703687630243e-06, "loss": 0.1902, "step": 5542 }, { "epoch": 1.8510602771748204, "grad_norm": 0.42476477246752786, "learning_rate": 3.8398133063532685e-06, "loss": 0.2123, "step": 5543 }, { "epoch": 1.851394222741693, "grad_norm": 0.4270712881303755, "learning_rate": 3.837923100336155e-06, "loss": 0.2102, "step": 5544 }, { "epoch": 1.8517281683085658, "grad_norm": 0.40813919718254027, "learning_rate": 3.836033069864441e-06, "loss": 0.1959, "step": 5545 }, { "epoch": 1.8520621138754383, "grad_norm": 0.5303707806361242, "learning_rate": 3.834143215223637e-06, "loss": 0.2061, "step": 5546 }, { "epoch": 1.8523960594423108, "grad_norm": 0.5552333428331633, "learning_rate": 3.832253536699227e-06, "loss": 0.2078, "step": 5547 }, { "epoch": 1.8527300050091835, "grad_norm": 0.38601722650526543, "learning_rate": 3.8303640345766714e-06, "loss": 0.1881, "step": 5548 }, { "epoch": 1.8530639505760562, "grad_norm": 0.3968436369022213, "learning_rate": 3.8284747091414e-06, "loss": 0.1951, "step": 5549 }, { "epoch": 1.8533978961429287, "grad_norm": 0.3590138296067034, "learning_rate": 3.826585560678816e-06, "loss": 0.1755, "step": 5550 }, { "epoch": 1.8537318417098012, "grad_norm": 0.4474793230992464, "learning_rate": 3.824696589474301e-06, "loss": 0.1995, "step": 5551 }, { "epoch": 1.854065787276674, "grad_norm": 0.4182641931292139, "learning_rate": 3.8228077958132055e-06, "loss": 0.2056, "step": 5552 }, { "epoch": 1.8543997328435466, "grad_norm": 0.41418885944491485, "learning_rate": 3.8209191799808535e-06, "loss": 0.175, "step": 5553 }, { "epoch": 1.8547336784104191, "grad_norm": 0.4098383640034617, "learning_rate": 3.819030742262542e-06, "loss": 0.2002, "step": 5554 }, { "epoch": 1.8550676239772916, "grad_norm": 0.43699938557858403, "learning_rate": 3.817142482943543e-06, "loss": 0.1906, "step": 5555 }, { "epoch": 1.8554015695441644, "grad_norm": 0.41360176753527556, "learning_rate": 3.815254402309097e-06, "loss": 0.1967, "step": 5556 }, { "epoch": 1.8557355151110368, "grad_norm": 0.42771060635906877, "learning_rate": 3.813366500644426e-06, "loss": 0.2154, "step": 5557 }, { "epoch": 1.8560694606779093, "grad_norm": 0.36870116160817834, "learning_rate": 3.8114787782347172e-06, "loss": 0.1812, "step": 5558 }, { "epoch": 1.856403406244782, "grad_norm": 0.40901216532316104, "learning_rate": 3.809591235365133e-06, "loss": 0.1928, "step": 5559 }, { "epoch": 1.8567373518116548, "grad_norm": 0.37696959669230656, "learning_rate": 3.807703872320809e-06, "loss": 0.1763, "step": 5560 }, { "epoch": 1.8570712973785273, "grad_norm": 0.3596874200077695, "learning_rate": 3.8058166893868543e-06, "loss": 0.1714, "step": 5561 }, { "epoch": 1.8574052429453998, "grad_norm": 0.35321557980968193, "learning_rate": 3.8039296868483493e-06, "loss": 0.1739, "step": 5562 }, { "epoch": 1.8577391885122725, "grad_norm": 0.38771500147851023, "learning_rate": 3.802042864990349e-06, "loss": 0.1897, "step": 5563 }, { "epoch": 1.8580731340791452, "grad_norm": 0.4151413292305868, "learning_rate": 3.8001562240978785e-06, "loss": 0.1842, "step": 5564 }, { "epoch": 1.8584070796460177, "grad_norm": 0.40997672571652566, "learning_rate": 3.7982697644559385e-06, "loss": 0.1959, "step": 5565 }, { "epoch": 1.8587410252128902, "grad_norm": 0.44628840605004716, "learning_rate": 3.7963834863495013e-06, "loss": 0.2154, "step": 5566 }, { "epoch": 1.859074970779763, "grad_norm": 0.3581451638522709, "learning_rate": 3.794497390063509e-06, "loss": 0.1749, "step": 5567 }, { "epoch": 1.8594089163466356, "grad_norm": 0.4141106417920125, "learning_rate": 3.792611475882881e-06, "loss": 0.1854, "step": 5568 }, { "epoch": 1.8597428619135081, "grad_norm": 0.3939536353320779, "learning_rate": 3.790725744092507e-06, "loss": 0.1864, "step": 5569 }, { "epoch": 1.8600768074803806, "grad_norm": 0.4176080845444923, "learning_rate": 3.788840194977248e-06, "loss": 0.19, "step": 5570 }, { "epoch": 1.8604107530472533, "grad_norm": 0.3773316234488866, "learning_rate": 3.7869548288219383e-06, "loss": 0.1827, "step": 5571 }, { "epoch": 1.860744698614126, "grad_norm": 0.45447820265924405, "learning_rate": 3.7850696459113845e-06, "loss": 0.2136, "step": 5572 }, { "epoch": 1.8610786441809983, "grad_norm": 0.40973336466501775, "learning_rate": 3.783184646530364e-06, "loss": 0.1974, "step": 5573 }, { "epoch": 1.861412589747871, "grad_norm": 0.38507853427875804, "learning_rate": 3.7812998309636323e-06, "loss": 0.1843, "step": 5574 }, { "epoch": 1.8617465353147438, "grad_norm": 0.42639231490055507, "learning_rate": 3.779415199495911e-06, "loss": 0.19, "step": 5575 }, { "epoch": 1.8620804808816163, "grad_norm": 0.39541454547140653, "learning_rate": 3.777530752411896e-06, "loss": 0.1969, "step": 5576 }, { "epoch": 1.8624144264484888, "grad_norm": 0.41359196848838, "learning_rate": 3.7756464899962546e-06, "loss": 0.1888, "step": 5577 }, { "epoch": 1.8627483720153615, "grad_norm": 0.36075624301082615, "learning_rate": 3.773762412533627e-06, "loss": 0.1792, "step": 5578 }, { "epoch": 1.8630823175822342, "grad_norm": 0.4098671567571041, "learning_rate": 3.771878520308624e-06, "loss": 0.1912, "step": 5579 }, { "epoch": 1.8634162631491067, "grad_norm": 0.3970704176477835, "learning_rate": 3.7699948136058327e-06, "loss": 0.2007, "step": 5580 }, { "epoch": 1.8637502087159792, "grad_norm": 0.41258075692882656, "learning_rate": 3.768111292709808e-06, "loss": 0.1952, "step": 5581 }, { "epoch": 1.864084154282852, "grad_norm": 0.4985649821234798, "learning_rate": 3.7662279579050777e-06, "loss": 0.1976, "step": 5582 }, { "epoch": 1.8644180998497246, "grad_norm": 0.39543825043818337, "learning_rate": 3.764344809476141e-06, "loss": 0.1839, "step": 5583 }, { "epoch": 1.8647520454165971, "grad_norm": 0.4169230905277188, "learning_rate": 3.7624618477074705e-06, "loss": 0.1792, "step": 5584 }, { "epoch": 1.8650859909834696, "grad_norm": 0.40159431111505056, "learning_rate": 3.760579072883508e-06, "loss": 0.1851, "step": 5585 }, { "epoch": 1.8654199365503423, "grad_norm": 0.42377227839386244, "learning_rate": 3.758696485288672e-06, "loss": 0.201, "step": 5586 }, { "epoch": 1.865753882117215, "grad_norm": 0.4257414605932439, "learning_rate": 3.7568140852073464e-06, "loss": 0.1975, "step": 5587 }, { "epoch": 1.8660878276840875, "grad_norm": 0.4169457936193867, "learning_rate": 3.754931872923892e-06, "loss": 0.1923, "step": 5588 }, { "epoch": 1.86642177325096, "grad_norm": 0.4043813591817317, "learning_rate": 3.7530498487226384e-06, "loss": 0.1929, "step": 5589 }, { "epoch": 1.8667557188178328, "grad_norm": 0.41635875636597164, "learning_rate": 3.751168012887888e-06, "loss": 0.1935, "step": 5590 }, { "epoch": 1.8670896643847052, "grad_norm": 0.38859957319609856, "learning_rate": 3.7492863657039126e-06, "loss": 0.1879, "step": 5591 }, { "epoch": 1.8674236099515777, "grad_norm": 0.4261527050568269, "learning_rate": 3.7474049074549596e-06, "loss": 0.1999, "step": 5592 }, { "epoch": 1.8677575555184505, "grad_norm": 0.42976803187002927, "learning_rate": 3.7455236384252435e-06, "loss": 0.2078, "step": 5593 }, { "epoch": 1.8680915010853232, "grad_norm": 0.3885539430483641, "learning_rate": 3.743642558898953e-06, "loss": 0.1803, "step": 5594 }, { "epoch": 1.8684254466521957, "grad_norm": 0.475836117741637, "learning_rate": 3.7417616691602477e-06, "loss": 0.1962, "step": 5595 }, { "epoch": 1.8687593922190682, "grad_norm": 0.4264306061995, "learning_rate": 3.739880969493257e-06, "loss": 0.2007, "step": 5596 }, { "epoch": 1.869093337785941, "grad_norm": 0.4188722723961078, "learning_rate": 3.738000460182081e-06, "loss": 0.199, "step": 5597 }, { "epoch": 1.8694272833528136, "grad_norm": 0.43449811676434813, "learning_rate": 3.736120141510798e-06, "loss": 0.2002, "step": 5598 }, { "epoch": 1.869761228919686, "grad_norm": 0.4255881141855972, "learning_rate": 3.734240013763448e-06, "loss": 0.2031, "step": 5599 }, { "epoch": 1.8700951744865586, "grad_norm": 0.3924986018956876, "learning_rate": 3.732360077224049e-06, "loss": 0.1834, "step": 5600 }, { "epoch": 1.8704291200534313, "grad_norm": 0.41303090285019545, "learning_rate": 3.730480332176586e-06, "loss": 0.1947, "step": 5601 }, { "epoch": 1.870763065620304, "grad_norm": 0.4173686398679209, "learning_rate": 3.7286007789050147e-06, "loss": 0.2048, "step": 5602 }, { "epoch": 1.8710970111871765, "grad_norm": 0.42256726256761623, "learning_rate": 3.726721417693268e-06, "loss": 0.196, "step": 5603 }, { "epoch": 1.871430956754049, "grad_norm": 0.37707316686734915, "learning_rate": 3.7248422488252433e-06, "loss": 0.184, "step": 5604 }, { "epoch": 1.8717649023209217, "grad_norm": 0.42889234026369716, "learning_rate": 3.722963272584812e-06, "loss": 0.1908, "step": 5605 }, { "epoch": 1.8720988478877942, "grad_norm": 0.3962460242481815, "learning_rate": 3.721084489255815e-06, "loss": 0.1839, "step": 5606 }, { "epoch": 1.8724327934546667, "grad_norm": 0.4230266260559559, "learning_rate": 3.719205899122064e-06, "loss": 0.1995, "step": 5607 }, { "epoch": 1.8727667390215395, "grad_norm": 0.42644054894229216, "learning_rate": 3.7173275024673424e-06, "loss": 0.197, "step": 5608 }, { "epoch": 1.8731006845884122, "grad_norm": 0.4754937302170339, "learning_rate": 3.7154492995754046e-06, "loss": 0.2116, "step": 5609 }, { "epoch": 1.8734346301552847, "grad_norm": 0.4370210334991797, "learning_rate": 3.7135712907299753e-06, "loss": 0.1991, "step": 5610 }, { "epoch": 1.8737685757221572, "grad_norm": 0.3959125955396685, "learning_rate": 3.7116934762147504e-06, "loss": 0.1768, "step": 5611 }, { "epoch": 1.8741025212890299, "grad_norm": 0.40337586546501275, "learning_rate": 3.709815856313395e-06, "loss": 0.1843, "step": 5612 }, { "epoch": 1.8744364668559026, "grad_norm": 0.4036266931746901, "learning_rate": 3.7079384313095464e-06, "loss": 0.1948, "step": 5613 }, { "epoch": 1.874770412422775, "grad_norm": 0.40647248544698983, "learning_rate": 3.70606120148681e-06, "loss": 0.1949, "step": 5614 }, { "epoch": 1.8751043579896476, "grad_norm": 0.4983920786650823, "learning_rate": 3.7041841671287654e-06, "loss": 0.1782, "step": 5615 }, { "epoch": 1.8754383035565203, "grad_norm": 0.42161416739832963, "learning_rate": 3.70230732851896e-06, "loss": 0.1899, "step": 5616 }, { "epoch": 1.875772249123393, "grad_norm": 0.5092564041054836, "learning_rate": 3.7004306859409134e-06, "loss": 0.2121, "step": 5617 }, { "epoch": 1.8761061946902655, "grad_norm": 0.42450305824700535, "learning_rate": 3.6985542396781127e-06, "loss": 0.1945, "step": 5618 }, { "epoch": 1.876440140257138, "grad_norm": 0.41218596453526796, "learning_rate": 3.6966779900140193e-06, "loss": 0.1883, "step": 5619 }, { "epoch": 1.8767740858240107, "grad_norm": 0.45585778407093536, "learning_rate": 3.694801937232058e-06, "loss": 0.1923, "step": 5620 }, { "epoch": 1.8771080313908834, "grad_norm": 0.43555642104196896, "learning_rate": 3.6929260816156353e-06, "loss": 0.2081, "step": 5621 }, { "epoch": 1.8774419769577557, "grad_norm": 0.38653206137255025, "learning_rate": 3.691050423448118e-06, "loss": 0.188, "step": 5622 }, { "epoch": 1.8777759225246284, "grad_norm": 0.44802164116477605, "learning_rate": 3.689174963012847e-06, "loss": 0.204, "step": 5623 }, { "epoch": 1.8781098680915012, "grad_norm": 0.3824309407516524, "learning_rate": 3.6872997005931323e-06, "loss": 0.1876, "step": 5624 }, { "epoch": 1.8784438136583737, "grad_norm": 0.4373752040761639, "learning_rate": 3.6854246364722534e-06, "loss": 0.1988, "step": 5625 }, { "epoch": 1.8787777592252461, "grad_norm": 0.3912895000237093, "learning_rate": 3.683549770933461e-06, "loss": 0.1888, "step": 5626 }, { "epoch": 1.8791117047921189, "grad_norm": 0.46238272413161635, "learning_rate": 3.6816751042599774e-06, "loss": 0.2081, "step": 5627 }, { "epoch": 1.8794456503589916, "grad_norm": 0.38589801494425297, "learning_rate": 3.6798006367349926e-06, "loss": 0.1829, "step": 5628 }, { "epoch": 1.879779595925864, "grad_norm": 0.3752621047827673, "learning_rate": 3.6779263686416668e-06, "loss": 0.1925, "step": 5629 }, { "epoch": 1.8801135414927366, "grad_norm": 0.3765889397099863, "learning_rate": 3.676052300263129e-06, "loss": 0.1846, "step": 5630 }, { "epoch": 1.8804474870596093, "grad_norm": 0.3676402690847071, "learning_rate": 3.6741784318824814e-06, "loss": 0.1808, "step": 5631 }, { "epoch": 1.880781432626482, "grad_norm": 0.3551659592860279, "learning_rate": 3.6723047637827897e-06, "loss": 0.1712, "step": 5632 }, { "epoch": 1.8811153781933545, "grad_norm": 0.410879058236752, "learning_rate": 3.670431296247099e-06, "loss": 0.2003, "step": 5633 }, { "epoch": 1.881449323760227, "grad_norm": 0.40283302371260815, "learning_rate": 3.6685580295584162e-06, "loss": 0.188, "step": 5634 }, { "epoch": 1.8817832693270997, "grad_norm": 0.3891040548112664, "learning_rate": 3.6666849639997205e-06, "loss": 0.1849, "step": 5635 }, { "epoch": 1.8821172148939724, "grad_norm": 0.41052509920344155, "learning_rate": 3.6648120998539596e-06, "loss": 0.1862, "step": 5636 }, { "epoch": 1.882451160460845, "grad_norm": 0.4079072214569817, "learning_rate": 3.662939437404053e-06, "loss": 0.1982, "step": 5637 }, { "epoch": 1.8827851060277174, "grad_norm": 0.42609676874734265, "learning_rate": 3.6610669769328853e-06, "loss": 0.1851, "step": 5638 }, { "epoch": 1.8831190515945901, "grad_norm": 0.4182804674313846, "learning_rate": 3.659194718723319e-06, "loss": 0.1946, "step": 5639 }, { "epoch": 1.8834529971614626, "grad_norm": 0.406048729629456, "learning_rate": 3.657322663058177e-06, "loss": 0.1954, "step": 5640 }, { "epoch": 1.8837869427283351, "grad_norm": 0.4220538626404686, "learning_rate": 3.655450810220257e-06, "loss": 0.1911, "step": 5641 }, { "epoch": 1.8841208882952079, "grad_norm": 0.45423348313302325, "learning_rate": 3.6535791604923225e-06, "loss": 0.1924, "step": 5642 }, { "epoch": 1.8844548338620806, "grad_norm": 0.3981725690261841, "learning_rate": 3.6517077141571076e-06, "loss": 0.1844, "step": 5643 }, { "epoch": 1.884788779428953, "grad_norm": 0.559974848713012, "learning_rate": 3.649836471497321e-06, "loss": 0.1981, "step": 5644 }, { "epoch": 1.8851227249958256, "grad_norm": 0.42473441487267527, "learning_rate": 3.6479654327956325e-06, "loss": 0.2059, "step": 5645 }, { "epoch": 1.8854566705626983, "grad_norm": 0.4574118801471747, "learning_rate": 3.646094598334685e-06, "loss": 0.1964, "step": 5646 }, { "epoch": 1.885790616129571, "grad_norm": 0.4137780911436003, "learning_rate": 3.64422396839709e-06, "loss": 0.1848, "step": 5647 }, { "epoch": 1.8861245616964435, "grad_norm": 0.40265674895445425, "learning_rate": 3.642353543265429e-06, "loss": 0.1887, "step": 5648 }, { "epoch": 1.886458507263316, "grad_norm": 0.3859651283367934, "learning_rate": 3.640483323222248e-06, "loss": 0.1923, "step": 5649 }, { "epoch": 1.8867924528301887, "grad_norm": 0.4131845833522721, "learning_rate": 3.638613308550072e-06, "loss": 0.2009, "step": 5650 }, { "epoch": 1.8871263983970614, "grad_norm": 0.3875014614187485, "learning_rate": 3.636743499531385e-06, "loss": 0.1782, "step": 5651 }, { "epoch": 1.887460343963934, "grad_norm": 0.38824358254338215, "learning_rate": 3.634873896448644e-06, "loss": 0.1885, "step": 5652 }, { "epoch": 1.8877942895308064, "grad_norm": 0.35317513678915435, "learning_rate": 3.633004499584275e-06, "loss": 0.1774, "step": 5653 }, { "epoch": 1.8881282350976791, "grad_norm": 0.3775118670972697, "learning_rate": 3.6311353092206723e-06, "loss": 0.186, "step": 5654 }, { "epoch": 1.8884621806645516, "grad_norm": 0.4040450531349787, "learning_rate": 3.6292663256401967e-06, "loss": 0.1935, "step": 5655 }, { "epoch": 1.8887961262314241, "grad_norm": 0.4223300761091552, "learning_rate": 3.6273975491251844e-06, "loss": 0.2026, "step": 5656 }, { "epoch": 1.8891300717982968, "grad_norm": 0.3819512409614182, "learning_rate": 3.625528979957935e-06, "loss": 0.2008, "step": 5657 }, { "epoch": 1.8894640173651696, "grad_norm": 0.40376004385819364, "learning_rate": 3.6236606184207164e-06, "loss": 0.184, "step": 5658 }, { "epoch": 1.889797962932042, "grad_norm": 0.4018629191997057, "learning_rate": 3.621792464795767e-06, "loss": 0.1921, "step": 5659 }, { "epoch": 1.8901319084989145, "grad_norm": 0.38134294753922704, "learning_rate": 3.6199245193652944e-06, "loss": 0.1823, "step": 5660 }, { "epoch": 1.8904658540657873, "grad_norm": 0.4086245882829037, "learning_rate": 3.6180567824114715e-06, "loss": 0.2045, "step": 5661 }, { "epoch": 1.89079979963266, "grad_norm": 0.4147921609611881, "learning_rate": 3.6161892542164444e-06, "loss": 0.2035, "step": 5662 }, { "epoch": 1.8911337451995325, "grad_norm": 0.403633243120622, "learning_rate": 3.614321935062325e-06, "loss": 0.1905, "step": 5663 }, { "epoch": 1.891467690766405, "grad_norm": 0.4062716446644573, "learning_rate": 3.6124548252311918e-06, "loss": 0.2017, "step": 5664 }, { "epoch": 1.8918016363332777, "grad_norm": 0.43732722649221445, "learning_rate": 3.610587925005097e-06, "loss": 0.1922, "step": 5665 }, { "epoch": 1.8921355819001504, "grad_norm": 0.4168207296619112, "learning_rate": 3.608721234666054e-06, "loss": 0.1872, "step": 5666 }, { "epoch": 1.892469527467023, "grad_norm": 0.4001633405949691, "learning_rate": 3.6068547544960493e-06, "loss": 0.1999, "step": 5667 }, { "epoch": 1.8928034730338954, "grad_norm": 0.359620384256815, "learning_rate": 3.6049884847770396e-06, "loss": 0.1879, "step": 5668 }, { "epoch": 1.8931374186007681, "grad_norm": 0.3814699560183258, "learning_rate": 3.6031224257909448e-06, "loss": 0.1925, "step": 5669 }, { "epoch": 1.8934713641676408, "grad_norm": 0.4249305582407571, "learning_rate": 3.6012565778196552e-06, "loss": 0.2018, "step": 5670 }, { "epoch": 1.893805309734513, "grad_norm": 0.44929761623627773, "learning_rate": 3.5993909411450297e-06, "loss": 0.2176, "step": 5671 }, { "epoch": 1.8941392553013858, "grad_norm": 0.4402284991993238, "learning_rate": 3.597525516048894e-06, "loss": 0.1985, "step": 5672 }, { "epoch": 1.8944732008682585, "grad_norm": 0.41033454175195433, "learning_rate": 3.5956603028130397e-06, "loss": 0.179, "step": 5673 }, { "epoch": 1.894807146435131, "grad_norm": 0.41702509272834387, "learning_rate": 3.5937953017192356e-06, "loss": 0.1944, "step": 5674 }, { "epoch": 1.8951410920020035, "grad_norm": 0.417384380584999, "learning_rate": 3.591930513049208e-06, "loss": 0.2002, "step": 5675 }, { "epoch": 1.8954750375688763, "grad_norm": 0.404210881524086, "learning_rate": 3.5900659370846556e-06, "loss": 0.1915, "step": 5676 }, { "epoch": 1.895808983135749, "grad_norm": 0.40054054358372826, "learning_rate": 3.5882015741072464e-06, "loss": 0.1893, "step": 5677 }, { "epoch": 1.8961429287026215, "grad_norm": 0.4716609545548075, "learning_rate": 3.586337424398609e-06, "loss": 0.2013, "step": 5678 }, { "epoch": 1.896476874269494, "grad_norm": 0.4086034807646897, "learning_rate": 3.584473488240352e-06, "loss": 0.1891, "step": 5679 }, { "epoch": 1.8968108198363667, "grad_norm": 0.3875982015231373, "learning_rate": 3.5826097659140413e-06, "loss": 0.189, "step": 5680 }, { "epoch": 1.8971447654032394, "grad_norm": 0.35728633673821014, "learning_rate": 3.5807462577012152e-06, "loss": 0.181, "step": 5681 }, { "epoch": 1.897478710970112, "grad_norm": 0.3834926713626789, "learning_rate": 3.5788829638833777e-06, "loss": 0.189, "step": 5682 }, { "epoch": 1.8978126565369844, "grad_norm": 0.39649251448728734, "learning_rate": 3.5770198847420016e-06, "loss": 0.1892, "step": 5683 }, { "epoch": 1.898146602103857, "grad_norm": 0.42377795991739803, "learning_rate": 3.5751570205585264e-06, "loss": 0.1914, "step": 5684 }, { "epoch": 1.8984805476707298, "grad_norm": 0.4002763802703904, "learning_rate": 3.573294371614361e-06, "loss": 0.1905, "step": 5685 }, { "epoch": 1.8988144932376023, "grad_norm": 0.4236073409479343, "learning_rate": 3.571431938190879e-06, "loss": 0.1981, "step": 5686 }, { "epoch": 1.8991484388044748, "grad_norm": 0.3916550676455339, "learning_rate": 3.5695697205694246e-06, "loss": 0.1984, "step": 5687 }, { "epoch": 1.8994823843713475, "grad_norm": 0.4034111232421996, "learning_rate": 3.567707719031306e-06, "loss": 0.195, "step": 5688 }, { "epoch": 1.89981632993822, "grad_norm": 0.41960190854899787, "learning_rate": 3.5658459338578016e-06, "loss": 0.2041, "step": 5689 }, { "epoch": 1.9001502755050925, "grad_norm": 0.40615605662054505, "learning_rate": 3.563984365330153e-06, "loss": 0.1915, "step": 5690 }, { "epoch": 1.9004842210719652, "grad_norm": 0.3654123514969394, "learning_rate": 3.562123013729577e-06, "loss": 0.1834, "step": 5691 }, { "epoch": 1.900818166638838, "grad_norm": 0.43052112290651406, "learning_rate": 3.56026187933725e-06, "loss": 0.2023, "step": 5692 }, { "epoch": 1.9011521122057105, "grad_norm": 0.4043205360016579, "learning_rate": 3.5584009624343187e-06, "loss": 0.2009, "step": 5693 }, { "epoch": 1.901486057772583, "grad_norm": 0.38893024556161726, "learning_rate": 3.5565402633018963e-06, "loss": 0.1861, "step": 5694 }, { "epoch": 1.9018200033394557, "grad_norm": 0.4320452390717206, "learning_rate": 3.554679782221063e-06, "loss": 0.19, "step": 5695 }, { "epoch": 1.9021539489063284, "grad_norm": 0.3916662102911691, "learning_rate": 3.552819519472865e-06, "loss": 0.1877, "step": 5696 }, { "epoch": 1.9024878944732009, "grad_norm": 0.44565474629895935, "learning_rate": 3.5509594753383202e-06, "loss": 0.2008, "step": 5697 }, { "epoch": 1.9028218400400734, "grad_norm": 0.41234932037291344, "learning_rate": 3.5490996500984085e-06, "loss": 0.1962, "step": 5698 }, { "epoch": 1.903155785606946, "grad_norm": 0.3981277646668938, "learning_rate": 3.547240044034079e-06, "loss": 0.1839, "step": 5699 }, { "epoch": 1.9034897311738188, "grad_norm": 0.3711733861198523, "learning_rate": 3.545380657426247e-06, "loss": 0.1824, "step": 5700 }, { "epoch": 1.9038236767406913, "grad_norm": 0.4233679288176297, "learning_rate": 3.5435214905557937e-06, "loss": 0.1844, "step": 5701 }, { "epoch": 1.9041576223075638, "grad_norm": 0.3896155487732712, "learning_rate": 3.5416625437035656e-06, "loss": 0.1896, "step": 5702 }, { "epoch": 1.9044915678744365, "grad_norm": 0.37448542490424214, "learning_rate": 3.539803817150385e-06, "loss": 0.184, "step": 5703 }, { "epoch": 1.904825513441309, "grad_norm": 0.4181501138090368, "learning_rate": 3.5379453111770313e-06, "loss": 0.2077, "step": 5704 }, { "epoch": 1.9051594590081815, "grad_norm": 0.43828339837035857, "learning_rate": 3.536087026064252e-06, "loss": 0.2003, "step": 5705 }, { "epoch": 1.9054934045750542, "grad_norm": 0.3939854984655365, "learning_rate": 3.534228962092766e-06, "loss": 0.1927, "step": 5706 }, { "epoch": 1.905827350141927, "grad_norm": 0.3850822547742392, "learning_rate": 3.5323711195432533e-06, "loss": 0.1921, "step": 5707 }, { "epoch": 1.9061612957087994, "grad_norm": 0.3797573178782776, "learning_rate": 3.530513498696363e-06, "loss": 0.1868, "step": 5708 }, { "epoch": 1.906495241275672, "grad_norm": 0.3974351606525824, "learning_rate": 3.5286560998327125e-06, "loss": 0.1957, "step": 5709 }, { "epoch": 1.9068291868425447, "grad_norm": 0.37982353045389616, "learning_rate": 3.5267989232328827e-06, "loss": 0.1912, "step": 5710 }, { "epoch": 1.9071631324094174, "grad_norm": 0.4056309512887979, "learning_rate": 3.5249419691774212e-06, "loss": 0.1878, "step": 5711 }, { "epoch": 1.9074970779762899, "grad_norm": 0.41189171348380066, "learning_rate": 3.523085237946844e-06, "loss": 0.1928, "step": 5712 }, { "epoch": 1.9078310235431624, "grad_norm": 0.39755751196618816, "learning_rate": 3.5212287298216306e-06, "loss": 0.1903, "step": 5713 }, { "epoch": 1.908164969110035, "grad_norm": 0.4965401018841489, "learning_rate": 3.5193724450822296e-06, "loss": 0.2001, "step": 5714 }, { "epoch": 1.9084989146769078, "grad_norm": 0.41952782189260945, "learning_rate": 3.517516384009056e-06, "loss": 0.1897, "step": 5715 }, { "epoch": 1.9088328602437803, "grad_norm": 0.36265255842951255, "learning_rate": 3.515660546882488e-06, "loss": 0.1814, "step": 5716 }, { "epoch": 1.9091668058106528, "grad_norm": 0.41875774466473636, "learning_rate": 3.5138049339828718e-06, "loss": 0.1956, "step": 5717 }, { "epoch": 1.9095007513775255, "grad_norm": 0.45427593055322385, "learning_rate": 3.5119495455905194e-06, "loss": 0.1929, "step": 5718 }, { "epoch": 1.9098346969443982, "grad_norm": 0.4283125075634053, "learning_rate": 3.5100943819857082e-06, "loss": 0.1849, "step": 5719 }, { "epoch": 1.9101686425112705, "grad_norm": 0.3912738538037045, "learning_rate": 3.508239443448685e-06, "loss": 0.1754, "step": 5720 }, { "epoch": 1.9105025880781432, "grad_norm": 0.3783746014875604, "learning_rate": 3.5063847302596587e-06, "loss": 0.1858, "step": 5721 }, { "epoch": 1.910836533645016, "grad_norm": 0.40411330753733493, "learning_rate": 3.504530242698806e-06, "loss": 0.185, "step": 5722 }, { "epoch": 1.9111704792118884, "grad_norm": 0.367902521163063, "learning_rate": 3.5026759810462687e-06, "loss": 0.1823, "step": 5723 }, { "epoch": 1.911504424778761, "grad_norm": 0.36691214379783854, "learning_rate": 3.5008219455821546e-06, "loss": 0.1837, "step": 5724 }, { "epoch": 1.9118383703456336, "grad_norm": 0.4113856631417277, "learning_rate": 3.4989681365865363e-06, "loss": 0.1943, "step": 5725 }, { "epoch": 1.9121723159125064, "grad_norm": 0.38908322636188303, "learning_rate": 3.497114554339457e-06, "loss": 0.175, "step": 5726 }, { "epoch": 1.9125062614793789, "grad_norm": 0.4302171916585288, "learning_rate": 3.4952611991209197e-06, "loss": 0.1912, "step": 5727 }, { "epoch": 1.9128402070462514, "grad_norm": 0.42508386899522627, "learning_rate": 3.4934080712108964e-06, "loss": 0.1986, "step": 5728 }, { "epoch": 1.913174152613124, "grad_norm": 0.40828670712191045, "learning_rate": 3.4915551708893236e-06, "loss": 0.1983, "step": 5729 }, { "epoch": 1.9135080981799968, "grad_norm": 0.3826098539335381, "learning_rate": 3.489702498436103e-06, "loss": 0.1892, "step": 5730 }, { "epoch": 1.9138420437468693, "grad_norm": 0.37640018946704573, "learning_rate": 3.487850054131103e-06, "loss": 0.1825, "step": 5731 }, { "epoch": 1.9141759893137418, "grad_norm": 0.423540829528593, "learning_rate": 3.4859978382541575e-06, "loss": 0.2008, "step": 5732 }, { "epoch": 1.9145099348806145, "grad_norm": 0.40110211348855723, "learning_rate": 3.4841458510850656e-06, "loss": 0.1928, "step": 5733 }, { "epoch": 1.9148438804474872, "grad_norm": 0.4202765037424674, "learning_rate": 3.482294092903592e-06, "loss": 0.1913, "step": 5734 }, { "epoch": 1.9151778260143597, "grad_norm": 0.3992739294133403, "learning_rate": 3.480442563989466e-06, "loss": 0.1855, "step": 5735 }, { "epoch": 1.9155117715812322, "grad_norm": 0.39103883707441306, "learning_rate": 3.4785912646223813e-06, "loss": 0.198, "step": 5736 }, { "epoch": 1.915845717148105, "grad_norm": 0.42159492620557487, "learning_rate": 3.4767401950820003e-06, "loss": 0.2061, "step": 5737 }, { "epoch": 1.9161796627149774, "grad_norm": 0.38979029077665317, "learning_rate": 3.4748893556479497e-06, "loss": 0.185, "step": 5738 }, { "epoch": 1.91651360828185, "grad_norm": 0.3767039745449479, "learning_rate": 3.4730387465998194e-06, "loss": 0.1911, "step": 5739 }, { "epoch": 1.9168475538487226, "grad_norm": 0.4421262922770817, "learning_rate": 3.4711883682171666e-06, "loss": 0.2017, "step": 5740 }, { "epoch": 1.9171814994155953, "grad_norm": 0.4073953632957591, "learning_rate": 3.4693382207795114e-06, "loss": 0.1913, "step": 5741 }, { "epoch": 1.9175154449824678, "grad_norm": 0.3976848565725884, "learning_rate": 3.4674883045663404e-06, "loss": 0.1888, "step": 5742 }, { "epoch": 1.9178493905493403, "grad_norm": 0.3853218853012135, "learning_rate": 3.465638619857104e-06, "loss": 0.1828, "step": 5743 }, { "epoch": 1.918183336116213, "grad_norm": 0.40753207467467006, "learning_rate": 3.463789166931223e-06, "loss": 0.1907, "step": 5744 }, { "epoch": 1.9185172816830858, "grad_norm": 0.3765078672210008, "learning_rate": 3.4619399460680757e-06, "loss": 0.1825, "step": 5745 }, { "epoch": 1.9188512272499583, "grad_norm": 0.434986647967096, "learning_rate": 3.460090957547011e-06, "loss": 0.1806, "step": 5746 }, { "epoch": 1.9191851728168308, "grad_norm": 0.40851362414259823, "learning_rate": 3.4582422016473384e-06, "loss": 0.1935, "step": 5747 }, { "epoch": 1.9195191183837035, "grad_norm": 0.41083715107382207, "learning_rate": 3.4563936786483345e-06, "loss": 0.1988, "step": 5748 }, { "epoch": 1.9198530639505762, "grad_norm": 0.4074208018702095, "learning_rate": 3.454545388829239e-06, "loss": 0.1944, "step": 5749 }, { "epoch": 1.9201870095174487, "grad_norm": 0.39520620444751453, "learning_rate": 3.4526973324692614e-06, "loss": 0.1908, "step": 5750 }, { "epoch": 1.9205209550843212, "grad_norm": 0.43906524174111994, "learning_rate": 3.4508495098475712e-06, "loss": 0.1992, "step": 5751 }, { "epoch": 1.920854900651194, "grad_norm": 0.4502858723264202, "learning_rate": 3.4490019212433035e-06, "loss": 0.2044, "step": 5752 }, { "epoch": 1.9211888462180664, "grad_norm": 0.40705908631119775, "learning_rate": 3.447154566935557e-06, "loss": 0.1943, "step": 5753 }, { "epoch": 1.921522791784939, "grad_norm": 0.39945366422919054, "learning_rate": 3.4453074472033975e-06, "loss": 0.1863, "step": 5754 }, { "epoch": 1.9218567373518116, "grad_norm": 0.39100600573805244, "learning_rate": 3.443460562325853e-06, "loss": 0.1823, "step": 5755 }, { "epoch": 1.9221906829186843, "grad_norm": 0.3867960086824861, "learning_rate": 3.4416139125819204e-06, "loss": 0.1833, "step": 5756 }, { "epoch": 1.9225246284855568, "grad_norm": 0.41121060111634883, "learning_rate": 3.4397674982505546e-06, "loss": 0.1887, "step": 5757 }, { "epoch": 1.9228585740524293, "grad_norm": 0.42275941705337255, "learning_rate": 3.43792131961068e-06, "loss": 0.2092, "step": 5758 }, { "epoch": 1.923192519619302, "grad_norm": 0.3843148443080461, "learning_rate": 3.4360753769411816e-06, "loss": 0.1749, "step": 5759 }, { "epoch": 1.9235264651861748, "grad_norm": 0.39501568437932283, "learning_rate": 3.4342296705209112e-06, "loss": 0.1731, "step": 5760 }, { "epoch": 1.9238604107530473, "grad_norm": 0.3831115342716545, "learning_rate": 3.432384200628688e-06, "loss": 0.1848, "step": 5761 }, { "epoch": 1.9241943563199198, "grad_norm": 0.3925566970493553, "learning_rate": 3.4305389675432882e-06, "loss": 0.1923, "step": 5762 }, { "epoch": 1.9245283018867925, "grad_norm": 0.4353970565860827, "learning_rate": 3.4286939715434573e-06, "loss": 0.1941, "step": 5763 }, { "epoch": 1.9248622474536652, "grad_norm": 0.3853234833941961, "learning_rate": 3.4268492129079047e-06, "loss": 0.1862, "step": 5764 }, { "epoch": 1.9251961930205377, "grad_norm": 0.43244855006490596, "learning_rate": 3.4250046919153e-06, "loss": 0.199, "step": 5765 }, { "epoch": 1.9255301385874102, "grad_norm": 0.44410365215339354, "learning_rate": 3.4231604088442806e-06, "loss": 0.196, "step": 5766 }, { "epoch": 1.925864084154283, "grad_norm": 0.4221818596900024, "learning_rate": 3.4213163639734504e-06, "loss": 0.1965, "step": 5767 }, { "epoch": 1.9261980297211556, "grad_norm": 0.40430757506097365, "learning_rate": 3.4194725575813707e-06, "loss": 0.1871, "step": 5768 }, { "epoch": 1.9265319752880279, "grad_norm": 0.39690720279215563, "learning_rate": 3.417628989946572e-06, "loss": 0.1866, "step": 5769 }, { "epoch": 1.9268659208549006, "grad_norm": 0.41882654166179956, "learning_rate": 3.415785661347546e-06, "loss": 0.1976, "step": 5770 }, { "epoch": 1.9271998664217733, "grad_norm": 0.42589760681235683, "learning_rate": 3.4139425720627494e-06, "loss": 0.1931, "step": 5771 }, { "epoch": 1.9275338119886458, "grad_norm": 0.42698531652640753, "learning_rate": 3.412099722370601e-06, "loss": 0.1954, "step": 5772 }, { "epoch": 1.9278677575555183, "grad_norm": 0.4458391967702739, "learning_rate": 3.4102571125494877e-06, "loss": 0.2061, "step": 5773 }, { "epoch": 1.928201703122391, "grad_norm": 0.42935606022669165, "learning_rate": 3.408414742877757e-06, "loss": 0.2027, "step": 5774 }, { "epoch": 1.9285356486892637, "grad_norm": 0.4383502567902757, "learning_rate": 3.406572613633719e-06, "loss": 0.1894, "step": 5775 }, { "epoch": 1.9288695942561362, "grad_norm": 0.40057496070461995, "learning_rate": 3.40473072509565e-06, "loss": 0.199, "step": 5776 }, { "epoch": 1.9292035398230087, "grad_norm": 0.39138634692508933, "learning_rate": 3.4028890775417887e-06, "loss": 0.1884, "step": 5777 }, { "epoch": 1.9295374853898815, "grad_norm": 0.4266213221820064, "learning_rate": 3.4010476712503367e-06, "loss": 0.1957, "step": 5778 }, { "epoch": 1.9298714309567542, "grad_norm": 0.41962434822821776, "learning_rate": 3.3992065064994615e-06, "loss": 0.1986, "step": 5779 }, { "epoch": 1.9302053765236267, "grad_norm": 0.40653053851480586, "learning_rate": 3.3973655835672923e-06, "loss": 0.1872, "step": 5780 }, { "epoch": 1.9305393220904992, "grad_norm": 0.3992762796239201, "learning_rate": 3.3955249027319214e-06, "loss": 0.1926, "step": 5781 }, { "epoch": 1.9308732676573719, "grad_norm": 0.39309596029124866, "learning_rate": 3.3936844642714073e-06, "loss": 0.1885, "step": 5782 }, { "epoch": 1.9312072132242446, "grad_norm": 0.3967296969011724, "learning_rate": 3.3918442684637687e-06, "loss": 0.1945, "step": 5783 }, { "epoch": 1.931541158791117, "grad_norm": 0.37389292859170953, "learning_rate": 3.3900043155869865e-06, "loss": 0.1857, "step": 5784 }, { "epoch": 1.9318751043579896, "grad_norm": 0.406835201145131, "learning_rate": 3.388164605919012e-06, "loss": 0.1912, "step": 5785 }, { "epoch": 1.9322090499248623, "grad_norm": 0.40644414055673206, "learning_rate": 3.3863251397377516e-06, "loss": 0.1882, "step": 5786 }, { "epoch": 1.9325429954917348, "grad_norm": 0.4534582493017801, "learning_rate": 3.3844859173210797e-06, "loss": 0.2098, "step": 5787 }, { "epoch": 1.9328769410586073, "grad_norm": 0.40570770824932884, "learning_rate": 3.382646938946832e-06, "loss": 0.182, "step": 5788 }, { "epoch": 1.93321088662548, "grad_norm": 0.4125254859784587, "learning_rate": 3.3808082048928083e-06, "loss": 0.1827, "step": 5789 }, { "epoch": 1.9335448321923527, "grad_norm": 0.42050886372039403, "learning_rate": 3.378969715436767e-06, "loss": 0.1788, "step": 5790 }, { "epoch": 1.9338787777592252, "grad_norm": 0.4029343552361385, "learning_rate": 3.3771314708564408e-06, "loss": 0.1932, "step": 5791 }, { "epoch": 1.9342127233260977, "grad_norm": 0.4431266567668056, "learning_rate": 3.3752934714295146e-06, "loss": 0.2059, "step": 5792 }, { "epoch": 1.9345466688929704, "grad_norm": 0.393345686460719, "learning_rate": 3.373455717433639e-06, "loss": 0.2035, "step": 5793 }, { "epoch": 1.9348806144598432, "grad_norm": 0.4071248248099155, "learning_rate": 3.3716182091464295e-06, "loss": 0.1797, "step": 5794 }, { "epoch": 1.9352145600267157, "grad_norm": 0.41122336422676053, "learning_rate": 3.3697809468454634e-06, "loss": 0.1922, "step": 5795 }, { "epoch": 1.9355485055935882, "grad_norm": 0.4212957583988024, "learning_rate": 3.3679439308082777e-06, "loss": 0.1996, "step": 5796 }, { "epoch": 1.9358824511604609, "grad_norm": 0.3994840928223961, "learning_rate": 3.366107161312381e-06, "loss": 0.1845, "step": 5797 }, { "epoch": 1.9362163967273336, "grad_norm": 0.42573497457455245, "learning_rate": 3.3642706386352355e-06, "loss": 0.1994, "step": 5798 }, { "epoch": 1.936550342294206, "grad_norm": 0.3631236184721829, "learning_rate": 3.3624343630542707e-06, "loss": 0.176, "step": 5799 }, { "epoch": 1.9368842878610786, "grad_norm": 0.38488554412626735, "learning_rate": 3.3605983348468764e-06, "loss": 0.1882, "step": 5800 }, { "epoch": 1.9372182334279513, "grad_norm": 0.47199555184017095, "learning_rate": 3.3587625542904063e-06, "loss": 0.2026, "step": 5801 }, { "epoch": 1.9375521789948238, "grad_norm": 0.4601046109909035, "learning_rate": 3.356927021662178e-06, "loss": 0.1916, "step": 5802 }, { "epoch": 1.9378861245616963, "grad_norm": 0.3994198582194862, "learning_rate": 3.3550917372394696e-06, "loss": 0.184, "step": 5803 }, { "epoch": 1.938220070128569, "grad_norm": 0.3734853608354193, "learning_rate": 3.353256701299522e-06, "loss": 0.1833, "step": 5804 }, { "epoch": 1.9385540156954417, "grad_norm": 0.38879660452435616, "learning_rate": 3.3514219141195404e-06, "loss": 0.1845, "step": 5805 }, { "epoch": 1.9388879612623142, "grad_norm": 0.38560538917320175, "learning_rate": 3.3495873759766897e-06, "loss": 0.1926, "step": 5806 }, { "epoch": 1.9392219068291867, "grad_norm": 0.38353142779479343, "learning_rate": 3.347753087148098e-06, "loss": 0.1838, "step": 5807 }, { "epoch": 1.9395558523960594, "grad_norm": 0.44448297494571126, "learning_rate": 3.3459190479108583e-06, "loss": 0.199, "step": 5808 }, { "epoch": 1.9398897979629321, "grad_norm": 0.4529776498547492, "learning_rate": 3.344085258542022e-06, "loss": 0.1979, "step": 5809 }, { "epoch": 1.9402237435298046, "grad_norm": 0.4124811126918051, "learning_rate": 3.3422517193186056e-06, "loss": 0.2068, "step": 5810 }, { "epoch": 1.9405576890966771, "grad_norm": 0.39920971830243007, "learning_rate": 3.340418430517586e-06, "loss": 0.1938, "step": 5811 }, { "epoch": 1.9408916346635499, "grad_norm": 0.4222813253850214, "learning_rate": 3.338585392415904e-06, "loss": 0.2053, "step": 5812 }, { "epoch": 1.9412255802304226, "grad_norm": 0.4161721442405356, "learning_rate": 3.3367526052904585e-06, "loss": 0.1787, "step": 5813 }, { "epoch": 1.941559525797295, "grad_norm": 0.4879776692990766, "learning_rate": 3.3349200694181182e-06, "loss": 0.2053, "step": 5814 }, { "epoch": 1.9418934713641676, "grad_norm": 0.4021223636861502, "learning_rate": 3.333087785075707e-06, "loss": 0.1964, "step": 5815 }, { "epoch": 1.9422274169310403, "grad_norm": 0.37651593810978384, "learning_rate": 3.3312557525400133e-06, "loss": 0.1715, "step": 5816 }, { "epoch": 1.942561362497913, "grad_norm": 0.4319318531805005, "learning_rate": 3.329423972087787e-06, "loss": 0.1955, "step": 5817 }, { "epoch": 1.9428953080647853, "grad_norm": 0.4141613206128405, "learning_rate": 3.3275924439957397e-06, "loss": 0.2074, "step": 5818 }, { "epoch": 1.943229253631658, "grad_norm": 0.38424218929059795, "learning_rate": 3.3257611685405444e-06, "loss": 0.1827, "step": 5819 }, { "epoch": 1.9435631991985307, "grad_norm": 0.45704517395065586, "learning_rate": 3.3239301459988395e-06, "loss": 0.204, "step": 5820 }, { "epoch": 1.9438971447654032, "grad_norm": 0.4519944942975536, "learning_rate": 3.322099376647221e-06, "loss": 0.1961, "step": 5821 }, { "epoch": 1.9442310903322757, "grad_norm": 0.3865117153162869, "learning_rate": 3.320268860762249e-06, "loss": 0.1842, "step": 5822 }, { "epoch": 1.9445650358991484, "grad_norm": 0.39394807708092394, "learning_rate": 3.318438598620444e-06, "loss": 0.1934, "step": 5823 }, { "epoch": 1.9448989814660211, "grad_norm": 0.4040471014908394, "learning_rate": 3.316608590498287e-06, "loss": 0.1833, "step": 5824 }, { "epoch": 1.9452329270328936, "grad_norm": 0.42368161376371266, "learning_rate": 3.314778836672224e-06, "loss": 0.1989, "step": 5825 }, { "epoch": 1.9455668725997661, "grad_norm": 0.3799786063158331, "learning_rate": 3.312949337418661e-06, "loss": 0.1903, "step": 5826 }, { "epoch": 1.9459008181666388, "grad_norm": 0.42001314369222936, "learning_rate": 3.311120093013964e-06, "loss": 0.2029, "step": 5827 }, { "epoch": 1.9462347637335116, "grad_norm": 0.4491441693231125, "learning_rate": 3.3092911037344642e-06, "loss": 0.1899, "step": 5828 }, { "epoch": 1.946568709300384, "grad_norm": 0.40616158884667664, "learning_rate": 3.30746236985645e-06, "loss": 0.1902, "step": 5829 }, { "epoch": 1.9469026548672566, "grad_norm": 0.41435867845292346, "learning_rate": 3.305633891656175e-06, "loss": 0.1944, "step": 5830 }, { "epoch": 1.9472366004341293, "grad_norm": 0.3835569025294799, "learning_rate": 3.3038056694098485e-06, "loss": 0.1866, "step": 5831 }, { "epoch": 1.947570546001002, "grad_norm": 0.41557182152841543, "learning_rate": 3.3019777033936497e-06, "loss": 0.1941, "step": 5832 }, { "epoch": 1.9479044915678745, "grad_norm": 0.4136102028672037, "learning_rate": 3.3001499938837124e-06, "loss": 0.199, "step": 5833 }, { "epoch": 1.948238437134747, "grad_norm": 0.42808162442284475, "learning_rate": 3.2983225411561338e-06, "loss": 0.1888, "step": 5834 }, { "epoch": 1.9485723827016197, "grad_norm": 0.4239596741209335, "learning_rate": 3.296495345486971e-06, "loss": 0.1947, "step": 5835 }, { "epoch": 1.9489063282684922, "grad_norm": 0.38609956620089547, "learning_rate": 3.294668407152245e-06, "loss": 0.1887, "step": 5836 }, { "epoch": 1.9492402738353647, "grad_norm": 0.39282968924115996, "learning_rate": 3.2928417264279338e-06, "loss": 0.1866, "step": 5837 }, { "epoch": 1.9495742194022374, "grad_norm": 0.37824694049603946, "learning_rate": 3.2910153035899826e-06, "loss": 0.1867, "step": 5838 }, { "epoch": 1.9499081649691101, "grad_norm": 0.43800186138716113, "learning_rate": 3.2891891389142933e-06, "loss": 0.2164, "step": 5839 }, { "epoch": 1.9502421105359826, "grad_norm": 0.38749810000277757, "learning_rate": 3.2873632326767278e-06, "loss": 0.1806, "step": 5840 }, { "epoch": 1.9505760561028551, "grad_norm": 0.393525628067674, "learning_rate": 3.2855375851531122e-06, "loss": 0.1861, "step": 5841 }, { "epoch": 1.9509100016697278, "grad_norm": 0.4351462404079153, "learning_rate": 3.283712196619229e-06, "loss": 0.2038, "step": 5842 }, { "epoch": 1.9512439472366006, "grad_norm": 0.42090443021448815, "learning_rate": 3.2818870673508297e-06, "loss": 0.2048, "step": 5843 }, { "epoch": 1.951577892803473, "grad_norm": 0.3790280867900267, "learning_rate": 3.2800621976236184e-06, "loss": 0.1874, "step": 5844 }, { "epoch": 1.9519118383703455, "grad_norm": 0.40688808218095657, "learning_rate": 3.2782375877132643e-06, "loss": 0.1808, "step": 5845 }, { "epoch": 1.9522457839372183, "grad_norm": 0.4256516285672986, "learning_rate": 3.276413237895395e-06, "loss": 0.1954, "step": 5846 }, { "epoch": 1.952579729504091, "grad_norm": 0.4171161147626943, "learning_rate": 3.2745891484456016e-06, "loss": 0.2009, "step": 5847 }, { "epoch": 1.9529136750709635, "grad_norm": 0.5044605426457384, "learning_rate": 3.2727653196394314e-06, "loss": 0.194, "step": 5848 }, { "epoch": 1.953247620637836, "grad_norm": 0.40522989200496995, "learning_rate": 3.270941751752398e-06, "loss": 0.191, "step": 5849 }, { "epoch": 1.9535815662047087, "grad_norm": 0.39708955867580614, "learning_rate": 3.269118445059973e-06, "loss": 0.1799, "step": 5850 }, { "epoch": 1.9539155117715812, "grad_norm": 0.40876661685989146, "learning_rate": 3.267295399837587e-06, "loss": 0.1914, "step": 5851 }, { "epoch": 1.9542494573384537, "grad_norm": 0.39832148352306906, "learning_rate": 3.2654726163606333e-06, "loss": 0.1843, "step": 5852 }, { "epoch": 1.9545834029053264, "grad_norm": 0.4339968232593733, "learning_rate": 3.2636500949044637e-06, "loss": 0.204, "step": 5853 }, { "epoch": 1.9549173484721991, "grad_norm": 0.40521624053581823, "learning_rate": 3.2618278357443913e-06, "loss": 0.1923, "step": 5854 }, { "epoch": 1.9552512940390716, "grad_norm": 0.40925796162009104, "learning_rate": 3.260005839155691e-06, "loss": 0.1942, "step": 5855 }, { "epoch": 1.955585239605944, "grad_norm": 0.4065519345018825, "learning_rate": 3.258184105413597e-06, "loss": 0.1929, "step": 5856 }, { "epoch": 1.9559191851728168, "grad_norm": 0.40373783185327916, "learning_rate": 3.256362634793303e-06, "loss": 0.1836, "step": 5857 }, { "epoch": 1.9562531307396895, "grad_norm": 0.466912546541022, "learning_rate": 3.2545414275699638e-06, "loss": 0.2051, "step": 5858 }, { "epoch": 1.956587076306562, "grad_norm": 0.4119496217398985, "learning_rate": 3.2527204840186944e-06, "loss": 0.2009, "step": 5859 }, { "epoch": 1.9569210218734345, "grad_norm": 0.427568161677971, "learning_rate": 3.2508998044145674e-06, "loss": 0.1853, "step": 5860 }, { "epoch": 1.9572549674403072, "grad_norm": 0.3964003737577245, "learning_rate": 3.249079389032621e-06, "loss": 0.1931, "step": 5861 }, { "epoch": 1.95758891300718, "grad_norm": 0.3733715502028118, "learning_rate": 3.247259238147851e-06, "loss": 0.1891, "step": 5862 }, { "epoch": 1.9579228585740525, "grad_norm": 0.387516171710147, "learning_rate": 3.245439352035209e-06, "loss": 0.1937, "step": 5863 }, { "epoch": 1.958256804140925, "grad_norm": 0.41829601573720177, "learning_rate": 3.243619730969614e-06, "loss": 0.2014, "step": 5864 }, { "epoch": 1.9585907497077977, "grad_norm": 0.37178507032150165, "learning_rate": 3.2418003752259374e-06, "loss": 0.1829, "step": 5865 }, { "epoch": 1.9589246952746704, "grad_norm": 0.3755322252943328, "learning_rate": 3.239981285079016e-06, "loss": 0.1816, "step": 5866 }, { "epoch": 1.9592586408415427, "grad_norm": 0.5064830664211579, "learning_rate": 3.238162460803646e-06, "loss": 0.1801, "step": 5867 }, { "epoch": 1.9595925864084154, "grad_norm": 0.40514585437042433, "learning_rate": 3.2363439026745813e-06, "loss": 0.1937, "step": 5868 }, { "epoch": 1.959926531975288, "grad_norm": 0.3949337542067921, "learning_rate": 3.2345256109665366e-06, "loss": 0.1769, "step": 5869 }, { "epoch": 1.9602604775421606, "grad_norm": 0.39658019897229246, "learning_rate": 3.2327075859541867e-06, "loss": 0.1755, "step": 5870 }, { "epoch": 1.960594423109033, "grad_norm": 0.4007129170784194, "learning_rate": 3.2308898279121646e-06, "loss": 0.1887, "step": 5871 }, { "epoch": 1.9609283686759058, "grad_norm": 0.407882174298797, "learning_rate": 3.2290723371150627e-06, "loss": 0.1933, "step": 5872 }, { "epoch": 1.9612623142427785, "grad_norm": 0.38390199585380574, "learning_rate": 3.2272551138374387e-06, "loss": 0.1857, "step": 5873 }, { "epoch": 1.961596259809651, "grad_norm": 0.4144353996140741, "learning_rate": 3.2254381583538025e-06, "loss": 0.2052, "step": 5874 }, { "epoch": 1.9619302053765235, "grad_norm": 0.42520106945104263, "learning_rate": 3.223621470938628e-06, "loss": 0.1989, "step": 5875 }, { "epoch": 1.9622641509433962, "grad_norm": 0.39528450044868774, "learning_rate": 3.2218050518663457e-06, "loss": 0.1952, "step": 5876 }, { "epoch": 1.962598096510269, "grad_norm": 0.38997068609060653, "learning_rate": 3.219988901411347e-06, "loss": 0.1905, "step": 5877 }, { "epoch": 1.9629320420771414, "grad_norm": 0.43396687286925384, "learning_rate": 3.218173019847985e-06, "loss": 0.195, "step": 5878 }, { "epoch": 1.963265987644014, "grad_norm": 0.41610373323796274, "learning_rate": 3.2163574074505686e-06, "loss": 0.2019, "step": 5879 }, { "epoch": 1.9635999332108867, "grad_norm": 0.41341146951828495, "learning_rate": 3.214542064493367e-06, "loss": 0.2027, "step": 5880 }, { "epoch": 1.9639338787777594, "grad_norm": 0.41138072181872515, "learning_rate": 3.2127269912506103e-06, "loss": 0.1868, "step": 5881 }, { "epoch": 1.9642678243446319, "grad_norm": 0.3866031619654102, "learning_rate": 3.210912187996486e-06, "loss": 0.1932, "step": 5882 }, { "epoch": 1.9646017699115044, "grad_norm": 0.4016530046534839, "learning_rate": 3.2090976550051393e-06, "loss": 0.2022, "step": 5883 }, { "epoch": 1.964935715478377, "grad_norm": 0.4546880535883009, "learning_rate": 3.207283392550681e-06, "loss": 0.1937, "step": 5884 }, { "epoch": 1.9652696610452496, "grad_norm": 0.3751451368240068, "learning_rate": 3.2054694009071753e-06, "loss": 0.1766, "step": 5885 }, { "epoch": 1.965603606612122, "grad_norm": 0.4249952733519631, "learning_rate": 3.2036556803486465e-06, "loss": 0.1897, "step": 5886 }, { "epoch": 1.9659375521789948, "grad_norm": 0.4289533302323695, "learning_rate": 3.2018422311490778e-06, "loss": 0.1872, "step": 5887 }, { "epoch": 1.9662714977458675, "grad_norm": 0.4156318288794827, "learning_rate": 3.200029053582413e-06, "loss": 0.2013, "step": 5888 }, { "epoch": 1.96660544331274, "grad_norm": 0.4234857984820954, "learning_rate": 3.1982161479225514e-06, "loss": 0.2035, "step": 5889 }, { "epoch": 1.9669393888796125, "grad_norm": 0.4631603579410054, "learning_rate": 3.196403514443358e-06, "loss": 0.212, "step": 5890 }, { "epoch": 1.9672733344464852, "grad_norm": 0.39403994004750187, "learning_rate": 3.19459115341865e-06, "loss": 0.1785, "step": 5891 }, { "epoch": 1.967607280013358, "grad_norm": 0.3926567448420319, "learning_rate": 3.1927790651222073e-06, "loss": 0.189, "step": 5892 }, { "epoch": 1.9679412255802304, "grad_norm": 0.3989322852990989, "learning_rate": 3.1909672498277656e-06, "loss": 0.2059, "step": 5893 }, { "epoch": 1.968275171147103, "grad_norm": 0.42083685608122595, "learning_rate": 3.1891557078090218e-06, "loss": 0.1998, "step": 5894 }, { "epoch": 1.9686091167139756, "grad_norm": 0.4383329776305396, "learning_rate": 3.187344439339628e-06, "loss": 0.1941, "step": 5895 }, { "epoch": 1.9689430622808484, "grad_norm": 0.41449458340242895, "learning_rate": 3.1855334446932025e-06, "loss": 0.1944, "step": 5896 }, { "epoch": 1.9692770078477209, "grad_norm": 0.3717497165199124, "learning_rate": 3.1837227241433145e-06, "loss": 0.1877, "step": 5897 }, { "epoch": 1.9696109534145934, "grad_norm": 0.4011174641338493, "learning_rate": 3.181912277963495e-06, "loss": 0.1987, "step": 5898 }, { "epoch": 1.969944898981466, "grad_norm": 0.38902286671665814, "learning_rate": 3.180102106427233e-06, "loss": 0.1867, "step": 5899 }, { "epoch": 1.9702788445483386, "grad_norm": 0.42521419169467267, "learning_rate": 3.178292209807976e-06, "loss": 0.2018, "step": 5900 }, { "epoch": 1.970612790115211, "grad_norm": 0.4065707042117532, "learning_rate": 3.1764825883791306e-06, "loss": 0.1885, "step": 5901 }, { "epoch": 1.9709467356820838, "grad_norm": 0.412725809040078, "learning_rate": 3.174673242414062e-06, "loss": 0.1897, "step": 5902 }, { "epoch": 1.9712806812489565, "grad_norm": 0.375413582686983, "learning_rate": 3.1728641721860925e-06, "loss": 0.185, "step": 5903 }, { "epoch": 1.971614626815829, "grad_norm": 0.41467689840017014, "learning_rate": 3.1710553779685036e-06, "loss": 0.1973, "step": 5904 }, { "epoch": 1.9719485723827015, "grad_norm": 0.41671275410394143, "learning_rate": 3.169246860034535e-06, "loss": 0.2082, "step": 5905 }, { "epoch": 1.9722825179495742, "grad_norm": 0.4057263289652559, "learning_rate": 3.1674386186573853e-06, "loss": 0.1897, "step": 5906 }, { "epoch": 1.972616463516447, "grad_norm": 0.38913647089351844, "learning_rate": 3.1656306541102073e-06, "loss": 0.1856, "step": 5907 }, { "epoch": 1.9729504090833194, "grad_norm": 0.3486855638934541, "learning_rate": 3.16382296666612e-06, "loss": 0.1679, "step": 5908 }, { "epoch": 1.973284354650192, "grad_norm": 0.4014595623992815, "learning_rate": 3.1620155565981942e-06, "loss": 0.191, "step": 5909 }, { "epoch": 1.9736183002170646, "grad_norm": 0.40381926251995, "learning_rate": 3.1602084241794595e-06, "loss": 0.1839, "step": 5910 }, { "epoch": 1.9739522457839374, "grad_norm": 0.37605441183464833, "learning_rate": 3.158401569682906e-06, "loss": 0.1875, "step": 5911 }, { "epoch": 1.9742861913508098, "grad_norm": 0.3777469896395668, "learning_rate": 3.156594993381479e-06, "loss": 0.1886, "step": 5912 }, { "epoch": 1.9746201369176823, "grad_norm": 0.3736417903821561, "learning_rate": 3.154788695548082e-06, "loss": 0.1822, "step": 5913 }, { "epoch": 1.974954082484555, "grad_norm": 0.3663210624038179, "learning_rate": 3.152982676455581e-06, "loss": 0.1726, "step": 5914 }, { "epoch": 1.9752880280514278, "grad_norm": 0.39921583039669706, "learning_rate": 3.151176936376794e-06, "loss": 0.1952, "step": 5915 }, { "epoch": 1.9756219736183, "grad_norm": 0.41004780294376225, "learning_rate": 3.1493714755845013e-06, "loss": 0.1892, "step": 5916 }, { "epoch": 1.9759559191851728, "grad_norm": 0.3953073598725052, "learning_rate": 3.1475662943514366e-06, "loss": 0.1846, "step": 5917 }, { "epoch": 1.9762898647520455, "grad_norm": 0.41934031809748096, "learning_rate": 3.145761392950293e-06, "loss": 0.1938, "step": 5918 }, { "epoch": 1.976623810318918, "grad_norm": 0.3991536627450681, "learning_rate": 3.1439567716537268e-06, "loss": 0.1842, "step": 5919 }, { "epoch": 1.9769577558857905, "grad_norm": 0.42211108035671385, "learning_rate": 3.142152430734343e-06, "loss": 0.1926, "step": 5920 }, { "epoch": 1.9772917014526632, "grad_norm": 0.3874004858043167, "learning_rate": 3.140348370464711e-06, "loss": 0.1825, "step": 5921 }, { "epoch": 1.977625647019536, "grad_norm": 0.4959700492993034, "learning_rate": 3.138544591117354e-06, "loss": 0.1881, "step": 5922 }, { "epoch": 1.9779595925864084, "grad_norm": 0.4058894672658346, "learning_rate": 3.1367410929647544e-06, "loss": 0.2038, "step": 5923 }, { "epoch": 1.978293538153281, "grad_norm": 0.4178550089316045, "learning_rate": 3.1349378762793515e-06, "loss": 0.2012, "step": 5924 }, { "epoch": 1.9786274837201536, "grad_norm": 0.4004663381408806, "learning_rate": 3.133134941333543e-06, "loss": 0.1837, "step": 5925 }, { "epoch": 1.9789614292870263, "grad_norm": 0.41889122105286186, "learning_rate": 3.1313322883996833e-06, "loss": 0.2016, "step": 5926 }, { "epoch": 1.9792953748538988, "grad_norm": 0.39423179870453967, "learning_rate": 3.129529917750085e-06, "loss": 0.1859, "step": 5927 }, { "epoch": 1.9796293204207713, "grad_norm": 0.4176399622736979, "learning_rate": 3.1277278296570157e-06, "loss": 0.1969, "step": 5928 }, { "epoch": 1.979963265987644, "grad_norm": 0.720364647524329, "learning_rate": 3.1259260243927035e-06, "loss": 0.1926, "step": 5929 }, { "epoch": 1.9802972115545168, "grad_norm": 0.41290547958373885, "learning_rate": 3.12412450222933e-06, "loss": 0.1898, "step": 5930 }, { "epoch": 1.9806311571213893, "grad_norm": 0.3671561676737529, "learning_rate": 3.12232326343904e-06, "loss": 0.1741, "step": 5931 }, { "epoch": 1.9809651026882618, "grad_norm": 0.4006350592225924, "learning_rate": 3.1205223082939302e-06, "loss": 0.1908, "step": 5932 }, { "epoch": 1.9812990482551345, "grad_norm": 0.398681215978086, "learning_rate": 3.1187216370660558e-06, "loss": 0.1936, "step": 5933 }, { "epoch": 1.981632993822007, "grad_norm": 0.43415498393757535, "learning_rate": 3.1169212500274294e-06, "loss": 0.1943, "step": 5934 }, { "epoch": 1.9819669393888795, "grad_norm": 0.4135058440013624, "learning_rate": 3.11512114745002e-06, "loss": 0.2106, "step": 5935 }, { "epoch": 1.9823008849557522, "grad_norm": 0.3846174813571026, "learning_rate": 3.113321329605754e-06, "loss": 0.1796, "step": 5936 }, { "epoch": 1.982634830522625, "grad_norm": 0.383507587269331, "learning_rate": 3.1115217967665174e-06, "loss": 0.1855, "step": 5937 }, { "epoch": 1.9829687760894974, "grad_norm": 0.37537114616087025, "learning_rate": 3.1097225492041494e-06, "loss": 0.1747, "step": 5938 }, { "epoch": 1.98330272165637, "grad_norm": 0.39957620124069226, "learning_rate": 3.107923587190448e-06, "loss": 0.1931, "step": 5939 }, { "epoch": 1.9836366672232426, "grad_norm": 0.403505274568005, "learning_rate": 3.106124910997168e-06, "loss": 0.1886, "step": 5940 }, { "epoch": 1.9839706127901153, "grad_norm": 0.38362226676493627, "learning_rate": 3.1043265208960187e-06, "loss": 0.1823, "step": 5941 }, { "epoch": 1.9843045583569878, "grad_norm": 0.39122961355599384, "learning_rate": 3.102528417158668e-06, "loss": 0.1859, "step": 5942 }, { "epoch": 1.9846385039238603, "grad_norm": 0.41049641938952836, "learning_rate": 3.1007306000567434e-06, "loss": 0.1918, "step": 5943 }, { "epoch": 1.984972449490733, "grad_norm": 0.36272757102046843, "learning_rate": 3.0989330698618248e-06, "loss": 0.175, "step": 5944 }, { "epoch": 1.9853063950576058, "grad_norm": 0.4101938673103447, "learning_rate": 3.097135826845451e-06, "loss": 0.193, "step": 5945 }, { "epoch": 1.9856403406244783, "grad_norm": 0.3690806719292599, "learning_rate": 3.0953388712791155e-06, "loss": 0.1597, "step": 5946 }, { "epoch": 1.9859742861913507, "grad_norm": 0.4225456794662782, "learning_rate": 3.09354220343427e-06, "loss": 0.1867, "step": 5947 }, { "epoch": 1.9863082317582235, "grad_norm": 0.4282830064350807, "learning_rate": 3.0917458235823215e-06, "loss": 0.2025, "step": 5948 }, { "epoch": 1.986642177325096, "grad_norm": 0.4349543625503407, "learning_rate": 3.089949731994637e-06, "loss": 0.2014, "step": 5949 }, { "epoch": 1.9869761228919685, "grad_norm": 0.37381968406717025, "learning_rate": 3.088153928942535e-06, "loss": 0.1842, "step": 5950 }, { "epoch": 1.9873100684588412, "grad_norm": 0.39590274000895553, "learning_rate": 3.0863584146972935e-06, "loss": 0.1806, "step": 5951 }, { "epoch": 1.987644014025714, "grad_norm": 0.3863725717307747, "learning_rate": 3.084563189530146e-06, "loss": 0.1792, "step": 5952 }, { "epoch": 1.9879779595925864, "grad_norm": 0.40619770607208006, "learning_rate": 3.0827682537122817e-06, "loss": 0.1942, "step": 5953 }, { "epoch": 1.9883119051594589, "grad_norm": 0.4483264558049833, "learning_rate": 3.0809736075148456e-06, "loss": 0.2014, "step": 5954 }, { "epoch": 1.9886458507263316, "grad_norm": 0.4199360913214994, "learning_rate": 3.0791792512089443e-06, "loss": 0.182, "step": 5955 }, { "epoch": 1.9889797962932043, "grad_norm": 0.4058012474543562, "learning_rate": 3.0773851850656335e-06, "loss": 0.1855, "step": 5956 }, { "epoch": 1.9893137418600768, "grad_norm": 0.3894507983132781, "learning_rate": 3.075591409355929e-06, "loss": 0.1914, "step": 5957 }, { "epoch": 1.9896476874269493, "grad_norm": 0.39437724621404885, "learning_rate": 3.073797924350801e-06, "loss": 0.195, "step": 5958 }, { "epoch": 1.989981632993822, "grad_norm": 0.39982310703857066, "learning_rate": 3.0720047303211746e-06, "loss": 0.1943, "step": 5959 }, { "epoch": 1.9903155785606947, "grad_norm": 0.43063169248758626, "learning_rate": 3.0702118275379376e-06, "loss": 0.1986, "step": 5960 }, { "epoch": 1.9906495241275672, "grad_norm": 0.39695348018292154, "learning_rate": 3.0684192162719263e-06, "loss": 0.1922, "step": 5961 }, { "epoch": 1.9909834696944397, "grad_norm": 0.398425444924352, "learning_rate": 3.066626896793936e-06, "loss": 0.1887, "step": 5962 }, { "epoch": 1.9913174152613125, "grad_norm": 0.3756162081582355, "learning_rate": 3.0648348693747177e-06, "loss": 0.1855, "step": 5963 }, { "epoch": 1.9916513608281852, "grad_norm": 0.4199243577842171, "learning_rate": 3.063043134284979e-06, "loss": 0.1933, "step": 5964 }, { "epoch": 1.9919853063950574, "grad_norm": 0.38131482579075354, "learning_rate": 3.0612516917953783e-06, "loss": 0.1843, "step": 5965 }, { "epoch": 1.9923192519619302, "grad_norm": 0.369216400903443, "learning_rate": 3.0594605421765406e-06, "loss": 0.1797, "step": 5966 }, { "epoch": 1.9926531975288029, "grad_norm": 0.4121903275320649, "learning_rate": 3.057669685699037e-06, "loss": 0.1863, "step": 5967 }, { "epoch": 1.9929871430956754, "grad_norm": 0.4355489715595582, "learning_rate": 3.0558791226333974e-06, "loss": 0.1911, "step": 5968 }, { "epoch": 1.9933210886625479, "grad_norm": 0.3789799913064823, "learning_rate": 3.0540888532501075e-06, "loss": 0.1926, "step": 5969 }, { "epoch": 1.9936550342294206, "grad_norm": 0.42365316429605293, "learning_rate": 3.052298877819608e-06, "loss": 0.1904, "step": 5970 }, { "epoch": 1.9939889797962933, "grad_norm": 0.41698808702160367, "learning_rate": 3.050509196612297e-06, "loss": 0.1963, "step": 5971 }, { "epoch": 1.9943229253631658, "grad_norm": 0.37543262198026217, "learning_rate": 3.0487198098985265e-06, "loss": 0.1944, "step": 5972 }, { "epoch": 1.9946568709300383, "grad_norm": 0.43641995145584184, "learning_rate": 3.046930717948604e-06, "loss": 0.1892, "step": 5973 }, { "epoch": 1.994990816496911, "grad_norm": 0.38182184018203114, "learning_rate": 3.0451419210327935e-06, "loss": 0.1806, "step": 5974 }, { "epoch": 1.9953247620637837, "grad_norm": 0.3818787684355753, "learning_rate": 3.0433534194213143e-06, "loss": 0.1799, "step": 5975 }, { "epoch": 1.9956587076306562, "grad_norm": 0.3744181477267095, "learning_rate": 3.0415652133843375e-06, "loss": 0.1816, "step": 5976 }, { "epoch": 1.9959926531975287, "grad_norm": 0.40734828799825423, "learning_rate": 3.0397773031919966e-06, "loss": 0.2016, "step": 5977 }, { "epoch": 1.9963265987644014, "grad_norm": 0.3871428637806214, "learning_rate": 3.0379896891143746e-06, "loss": 0.1806, "step": 5978 }, { "epoch": 1.9966605443312742, "grad_norm": 0.39395345656310504, "learning_rate": 3.036202371421513e-06, "loss": 0.1812, "step": 5979 }, { "epoch": 1.9969944898981467, "grad_norm": 0.41099460235481833, "learning_rate": 3.034415350383405e-06, "loss": 0.1939, "step": 5980 }, { "epoch": 1.9973284354650191, "grad_norm": 0.3679936586206283, "learning_rate": 3.0326286262700035e-06, "loss": 0.1879, "step": 5981 }, { "epoch": 1.9976623810318919, "grad_norm": 0.3814246949048082, "learning_rate": 3.030842199351212e-06, "loss": 0.1879, "step": 5982 }, { "epoch": 1.9979963265987644, "grad_norm": 0.42714571464627366, "learning_rate": 3.0290560698968907e-06, "loss": 0.2029, "step": 5983 }, { "epoch": 1.9983302721656369, "grad_norm": 0.4021653615871575, "learning_rate": 3.0272702381768593e-06, "loss": 0.1936, "step": 5984 }, { "epoch": 1.9986642177325096, "grad_norm": 0.39922967076189847, "learning_rate": 3.0254847044608872e-06, "loss": 0.2034, "step": 5985 }, { "epoch": 1.9989981632993823, "grad_norm": 0.4103626909173217, "learning_rate": 3.0236994690186983e-06, "loss": 0.1874, "step": 5986 }, { "epoch": 1.9993321088662548, "grad_norm": 0.40222096398131774, "learning_rate": 3.0219145321199763e-06, "loss": 0.1868, "step": 5987 }, { "epoch": 1.9996660544331273, "grad_norm": 0.37798334008408047, "learning_rate": 3.0201298940343543e-06, "loss": 0.1801, "step": 5988 }, { "epoch": 2.0, "grad_norm": 0.41315481474910304, "learning_rate": 3.018345555031422e-06, "loss": 0.1861, "step": 5989 }, { "epoch": 2.0, "eval_loss": 0.20493414998054504, "eval_runtime": 184.3833, "eval_samples_per_second": 109.408, "eval_steps_per_second": 1.714, "step": 5989 }, { "epoch": 2.0003339455668727, "grad_norm": 0.3534058618254128, "learning_rate": 3.0165615153807293e-06, "loss": 0.155, "step": 5990 }, { "epoch": 2.000667891133745, "grad_norm": 0.4330126571772758, "learning_rate": 3.014777775351774e-06, "loss": 0.1767, "step": 5991 }, { "epoch": 2.0010018367006177, "grad_norm": 0.3993798294281268, "learning_rate": 3.012994335214011e-06, "loss": 0.1656, "step": 5992 }, { "epoch": 2.0013357822674904, "grad_norm": 0.35823939570937335, "learning_rate": 3.0112111952368496e-06, "loss": 0.152, "step": 5993 }, { "epoch": 2.001669727834363, "grad_norm": 0.4274610122348606, "learning_rate": 3.009428355689654e-06, "loss": 0.1683, "step": 5994 }, { "epoch": 2.0020036734012354, "grad_norm": 0.39209988341414004, "learning_rate": 3.007645816841743e-06, "loss": 0.1553, "step": 5995 }, { "epoch": 2.002337618968108, "grad_norm": 0.41360416403789046, "learning_rate": 3.0058635789623926e-06, "loss": 0.1693, "step": 5996 }, { "epoch": 2.002671564534981, "grad_norm": 0.3964326471268367, "learning_rate": 3.0040816423208276e-06, "loss": 0.157, "step": 5997 }, { "epoch": 2.0030055101018536, "grad_norm": 0.38668198118986086, "learning_rate": 3.002300007186232e-06, "loss": 0.1524, "step": 5998 }, { "epoch": 2.003339455668726, "grad_norm": 0.405645756289619, "learning_rate": 3.0005186738277407e-06, "loss": 0.1656, "step": 5999 }, { "epoch": 2.0036734012355986, "grad_norm": 0.4397302560149037, "learning_rate": 2.9987376425144477e-06, "loss": 0.1577, "step": 6000 }, { "epoch": 2.0040073468024713, "grad_norm": 0.4394388666470143, "learning_rate": 2.9969569135153985e-06, "loss": 0.1639, "step": 6001 }, { "epoch": 2.004341292369344, "grad_norm": 0.4849570632205489, "learning_rate": 2.9951764870995925e-06, "loss": 0.1621, "step": 6002 }, { "epoch": 2.0046752379362163, "grad_norm": 0.48705098971685307, "learning_rate": 2.9933963635359847e-06, "loss": 0.1646, "step": 6003 }, { "epoch": 2.005009183503089, "grad_norm": 0.4641189129482545, "learning_rate": 2.991616543093483e-06, "loss": 0.1749, "step": 6004 }, { "epoch": 2.0053431290699617, "grad_norm": 0.46677908584428857, "learning_rate": 2.9898370260409502e-06, "loss": 0.1623, "step": 6005 }, { "epoch": 2.005677074636834, "grad_norm": 0.4711820568033053, "learning_rate": 2.9880578126472015e-06, "loss": 0.16, "step": 6006 }, { "epoch": 2.0060110202037067, "grad_norm": 0.47617074156351524, "learning_rate": 2.9862789031810126e-06, "loss": 0.1721, "step": 6007 }, { "epoch": 2.0063449657705794, "grad_norm": 0.4487052591604857, "learning_rate": 2.984500297911106e-06, "loss": 0.1584, "step": 6008 }, { "epoch": 2.006678911337452, "grad_norm": 0.447671304592757, "learning_rate": 2.9827219971061607e-06, "loss": 0.1654, "step": 6009 }, { "epoch": 2.0070128569043244, "grad_norm": 0.47125672755220205, "learning_rate": 2.98094400103481e-06, "loss": 0.1681, "step": 6010 }, { "epoch": 2.007346802471197, "grad_norm": 0.45728456238338155, "learning_rate": 2.9791663099656424e-06, "loss": 0.1618, "step": 6011 }, { "epoch": 2.00768074803807, "grad_norm": 0.47447060864994905, "learning_rate": 2.977388924167196e-06, "loss": 0.1492, "step": 6012 }, { "epoch": 2.0080146936049426, "grad_norm": 0.446422979462353, "learning_rate": 2.975611843907971e-06, "loss": 0.1576, "step": 6013 }, { "epoch": 2.008348639171815, "grad_norm": 0.48529971727386545, "learning_rate": 2.9738350694564117e-06, "loss": 0.1674, "step": 6014 }, { "epoch": 2.0086825847386875, "grad_norm": 0.43999105968259355, "learning_rate": 2.9720586010809234e-06, "loss": 0.1584, "step": 6015 }, { "epoch": 2.0090165303055603, "grad_norm": 0.42356091106399757, "learning_rate": 2.9702824390498615e-06, "loss": 0.155, "step": 6016 }, { "epoch": 2.009350475872433, "grad_norm": 0.4529826679339405, "learning_rate": 2.9685065836315362e-06, "loss": 0.1594, "step": 6017 }, { "epoch": 2.0096844214393053, "grad_norm": 0.44293372978393375, "learning_rate": 2.9667310350942103e-06, "loss": 0.1601, "step": 6018 }, { "epoch": 2.010018367006178, "grad_norm": 0.4662546772405464, "learning_rate": 2.964955793706104e-06, "loss": 0.1651, "step": 6019 }, { "epoch": 2.0103523125730507, "grad_norm": 0.4662905596333431, "learning_rate": 2.963180859735387e-06, "loss": 0.1564, "step": 6020 }, { "epoch": 2.0106862581399234, "grad_norm": 0.5287577603093535, "learning_rate": 2.961406233450184e-06, "loss": 0.1658, "step": 6021 }, { "epoch": 2.0110202037067957, "grad_norm": 0.5051506571548454, "learning_rate": 2.9596319151185713e-06, "loss": 0.178, "step": 6022 }, { "epoch": 2.0113541492736684, "grad_norm": 0.4822552977434536, "learning_rate": 2.9578579050085836e-06, "loss": 0.1756, "step": 6023 }, { "epoch": 2.011688094840541, "grad_norm": 0.5070476190943671, "learning_rate": 2.956084203388204e-06, "loss": 0.1565, "step": 6024 }, { "epoch": 2.0120220404074134, "grad_norm": 0.4697832208358172, "learning_rate": 2.9543108105253733e-06, "loss": 0.1665, "step": 6025 }, { "epoch": 2.012355985974286, "grad_norm": 0.47328140753003795, "learning_rate": 2.9525377266879813e-06, "loss": 0.1646, "step": 6026 }, { "epoch": 2.012689931541159, "grad_norm": 0.45988128423824537, "learning_rate": 2.950764952143874e-06, "loss": 0.1568, "step": 6027 }, { "epoch": 2.0130238771080315, "grad_norm": 0.4671425277097255, "learning_rate": 2.9489924871608495e-06, "loss": 0.1643, "step": 6028 }, { "epoch": 2.013357822674904, "grad_norm": 0.4991890618376096, "learning_rate": 2.9472203320066594e-06, "loss": 0.1721, "step": 6029 }, { "epoch": 2.0136917682417765, "grad_norm": 0.4946814103252608, "learning_rate": 2.9454484869490074e-06, "loss": 0.1696, "step": 6030 }, { "epoch": 2.0140257138086493, "grad_norm": 0.5118310832695881, "learning_rate": 2.943676952255554e-06, "loss": 0.1657, "step": 6031 }, { "epoch": 2.014359659375522, "grad_norm": 0.49387059069204864, "learning_rate": 2.9419057281939106e-06, "loss": 0.168, "step": 6032 }, { "epoch": 2.0146936049423942, "grad_norm": 0.4596647029669583, "learning_rate": 2.94013481503164e-06, "loss": 0.1597, "step": 6033 }, { "epoch": 2.015027550509267, "grad_norm": 0.49980994832903475, "learning_rate": 2.9383642130362596e-06, "loss": 0.1647, "step": 6034 }, { "epoch": 2.0153614960761397, "grad_norm": 0.4739084813661146, "learning_rate": 2.9365939224752394e-06, "loss": 0.1564, "step": 6035 }, { "epoch": 2.0156954416430124, "grad_norm": 0.48889393560437605, "learning_rate": 2.934823943616001e-06, "loss": 0.1683, "step": 6036 }, { "epoch": 2.0160293872098847, "grad_norm": 0.46643889067317856, "learning_rate": 2.933054276725925e-06, "loss": 0.1613, "step": 6037 }, { "epoch": 2.0163633327767574, "grad_norm": 0.45399739753399115, "learning_rate": 2.9312849220723382e-06, "loss": 0.1559, "step": 6038 }, { "epoch": 2.01669727834363, "grad_norm": 0.45507324551278094, "learning_rate": 2.929515879922522e-06, "loss": 0.159, "step": 6039 }, { "epoch": 2.0170312239105024, "grad_norm": 0.4612841631939222, "learning_rate": 2.9277471505437105e-06, "loss": 0.1602, "step": 6040 }, { "epoch": 2.017365169477375, "grad_norm": 0.4776368978691176, "learning_rate": 2.925978734203092e-06, "loss": 0.1723, "step": 6041 }, { "epoch": 2.017699115044248, "grad_norm": 0.47341321323873353, "learning_rate": 2.924210631167807e-06, "loss": 0.1518, "step": 6042 }, { "epoch": 2.0180330606111205, "grad_norm": 0.4839747955511077, "learning_rate": 2.922442841704948e-06, "loss": 0.1621, "step": 6043 }, { "epoch": 2.018367006177993, "grad_norm": 0.4883511258492533, "learning_rate": 2.920675366081559e-06, "loss": 0.1636, "step": 6044 }, { "epoch": 2.0187009517448655, "grad_norm": 0.5464115849984775, "learning_rate": 2.9189082045646404e-06, "loss": 0.1777, "step": 6045 }, { "epoch": 2.0190348973117382, "grad_norm": 0.46191595447785594, "learning_rate": 2.9171413574211426e-06, "loss": 0.1623, "step": 6046 }, { "epoch": 2.019368842878611, "grad_norm": 0.49795505123926764, "learning_rate": 2.9153748249179637e-06, "loss": 0.1667, "step": 6047 }, { "epoch": 2.0197027884454832, "grad_norm": 0.47251837346553677, "learning_rate": 2.9136086073219665e-06, "loss": 0.1539, "step": 6048 }, { "epoch": 2.020036734012356, "grad_norm": 0.4581855742704899, "learning_rate": 2.9118427048999544e-06, "loss": 0.1555, "step": 6049 }, { "epoch": 2.0203706795792287, "grad_norm": 0.5045197369778212, "learning_rate": 2.9100771179186904e-06, "loss": 0.17, "step": 6050 }, { "epoch": 2.0207046251461014, "grad_norm": 0.4421223167410785, "learning_rate": 2.9083118466448845e-06, "loss": 0.1502, "step": 6051 }, { "epoch": 2.0210385707129737, "grad_norm": 0.4474440028569155, "learning_rate": 2.9065468913452045e-06, "loss": 0.1462, "step": 6052 }, { "epoch": 2.0213725162798464, "grad_norm": 0.4925003177758385, "learning_rate": 2.904782252286264e-06, "loss": 0.1618, "step": 6053 }, { "epoch": 2.021706461846719, "grad_norm": 0.4821571393316654, "learning_rate": 2.903017929734635e-06, "loss": 0.1662, "step": 6054 }, { "epoch": 2.0220404074135914, "grad_norm": 0.49103538206612174, "learning_rate": 2.9012539239568405e-06, "loss": 0.1633, "step": 6055 }, { "epoch": 2.022374352980464, "grad_norm": 0.4891641946599009, "learning_rate": 2.899490235219351e-06, "loss": 0.1706, "step": 6056 }, { "epoch": 2.022708298547337, "grad_norm": 0.4685788784764766, "learning_rate": 2.897726863788595e-06, "loss": 0.1602, "step": 6057 }, { "epoch": 2.0230422441142095, "grad_norm": 0.4753894335902614, "learning_rate": 2.8959638099309504e-06, "loss": 0.1542, "step": 6058 }, { "epoch": 2.023376189681082, "grad_norm": 0.4919778734814164, "learning_rate": 2.8942010739127446e-06, "loss": 0.1572, "step": 6059 }, { "epoch": 2.0237101352479545, "grad_norm": 0.46606003547768277, "learning_rate": 2.8924386560002627e-06, "loss": 0.1574, "step": 6060 }, { "epoch": 2.0240440808148272, "grad_norm": 0.48964811026319693, "learning_rate": 2.8906765564597384e-06, "loss": 0.1502, "step": 6061 }, { "epoch": 2.0243780263817, "grad_norm": 0.48273759736055805, "learning_rate": 2.8889147755573556e-06, "loss": 0.1546, "step": 6062 }, { "epoch": 2.024711971948572, "grad_norm": 0.5370134887436172, "learning_rate": 2.8871533135592544e-06, "loss": 0.1724, "step": 6063 }, { "epoch": 2.025045917515445, "grad_norm": 0.45586288119980345, "learning_rate": 2.8853921707315215e-06, "loss": 0.1602, "step": 6064 }, { "epoch": 2.0253798630823177, "grad_norm": 0.49510158520645964, "learning_rate": 2.8836313473402e-06, "loss": 0.1659, "step": 6065 }, { "epoch": 2.0257138086491904, "grad_norm": 0.4668970296054312, "learning_rate": 2.881870843651282e-06, "loss": 0.1579, "step": 6066 }, { "epoch": 2.0260477542160626, "grad_norm": 0.4694378333177316, "learning_rate": 2.8801106599307164e-06, "loss": 0.1492, "step": 6067 }, { "epoch": 2.0263816997829354, "grad_norm": 0.4914932697805478, "learning_rate": 2.8783507964443942e-06, "loss": 0.1627, "step": 6068 }, { "epoch": 2.026715645349808, "grad_norm": 0.4815900522040312, "learning_rate": 2.8765912534581674e-06, "loss": 0.1626, "step": 6069 }, { "epoch": 2.027049590916681, "grad_norm": 0.48576180596656354, "learning_rate": 2.874832031237833e-06, "loss": 0.1565, "step": 6070 }, { "epoch": 2.027383536483553, "grad_norm": 0.5102513368484094, "learning_rate": 2.873073130049142e-06, "loss": 0.1637, "step": 6071 }, { "epoch": 2.027717482050426, "grad_norm": 0.4971694308223245, "learning_rate": 2.8713145501578e-06, "loss": 0.1609, "step": 6072 }, { "epoch": 2.0280514276172985, "grad_norm": 0.5074347111244822, "learning_rate": 2.869556291829461e-06, "loss": 0.1503, "step": 6073 }, { "epoch": 2.028385373184171, "grad_norm": 0.4216789477572703, "learning_rate": 2.8677983553297266e-06, "loss": 0.1452, "step": 6074 }, { "epoch": 2.0287193187510435, "grad_norm": 0.49704788792406124, "learning_rate": 2.8660407409241593e-06, "loss": 0.1625, "step": 6075 }, { "epoch": 2.029053264317916, "grad_norm": 0.49175915805051923, "learning_rate": 2.864283448878262e-06, "loss": 0.1694, "step": 6076 }, { "epoch": 2.029387209884789, "grad_norm": 0.46571201952476754, "learning_rate": 2.8625264794574975e-06, "loss": 0.1572, "step": 6077 }, { "epoch": 2.029721155451661, "grad_norm": 0.500277066125853, "learning_rate": 2.860769832927276e-06, "loss": 0.1662, "step": 6078 }, { "epoch": 2.030055101018534, "grad_norm": 0.49439200808987693, "learning_rate": 2.8590135095529624e-06, "loss": 0.1676, "step": 6079 }, { "epoch": 2.0303890465854066, "grad_norm": 0.44751887545978086, "learning_rate": 2.8572575095998646e-06, "loss": 0.1479, "step": 6080 }, { "epoch": 2.0307229921522794, "grad_norm": 0.4366187834049901, "learning_rate": 2.855501833333253e-06, "loss": 0.1499, "step": 6081 }, { "epoch": 2.0310569377191516, "grad_norm": 0.4463655726940175, "learning_rate": 2.853746481018337e-06, "loss": 0.1478, "step": 6082 }, { "epoch": 2.0313908832860244, "grad_norm": 0.525002855673986, "learning_rate": 2.8519914529202868e-06, "loss": 0.1768, "step": 6083 }, { "epoch": 2.031724828852897, "grad_norm": 0.46830169747916234, "learning_rate": 2.8502367493042217e-06, "loss": 0.1652, "step": 6084 }, { "epoch": 2.03205877441977, "grad_norm": 0.474430022851497, "learning_rate": 2.848482370435206e-06, "loss": 0.1601, "step": 6085 }, { "epoch": 2.032392719986642, "grad_norm": 0.4858834048312866, "learning_rate": 2.8467283165782643e-06, "loss": 0.1637, "step": 6086 }, { "epoch": 2.0327266655535148, "grad_norm": 0.5000627831706353, "learning_rate": 2.8449745879983614e-06, "loss": 0.1562, "step": 6087 }, { "epoch": 2.0330606111203875, "grad_norm": 0.48498567163926287, "learning_rate": 2.8432211849604218e-06, "loss": 0.1594, "step": 6088 }, { "epoch": 2.0333945566872598, "grad_norm": 0.4706989935254033, "learning_rate": 2.841468107729318e-06, "loss": 0.1627, "step": 6089 }, { "epoch": 2.0337285022541325, "grad_norm": 0.4897839906916831, "learning_rate": 2.8397153565698744e-06, "loss": 0.1567, "step": 6090 }, { "epoch": 2.034062447821005, "grad_norm": 0.46017908487227666, "learning_rate": 2.8379629317468604e-06, "loss": 0.1588, "step": 6091 }, { "epoch": 2.034396393387878, "grad_norm": 0.4692319755849913, "learning_rate": 2.8362108335250044e-06, "loss": 0.1522, "step": 6092 }, { "epoch": 2.03473033895475, "grad_norm": 0.520679375837806, "learning_rate": 2.834459062168978e-06, "loss": 0.1718, "step": 6093 }, { "epoch": 2.035064284521623, "grad_norm": 0.5246005906068195, "learning_rate": 2.8327076179434088e-06, "loss": 0.1669, "step": 6094 }, { "epoch": 2.0353982300884956, "grad_norm": 0.45873793348961106, "learning_rate": 2.8309565011128732e-06, "loss": 0.1488, "step": 6095 }, { "epoch": 2.0357321756553683, "grad_norm": 0.47134127061918596, "learning_rate": 2.8292057119418994e-06, "loss": 0.1631, "step": 6096 }, { "epoch": 2.0360661212222406, "grad_norm": 0.4729789859407233, "learning_rate": 2.827455250694961e-06, "loss": 0.16, "step": 6097 }, { "epoch": 2.0364000667891133, "grad_norm": 0.5146567715009788, "learning_rate": 2.8257051176364903e-06, "loss": 0.1588, "step": 6098 }, { "epoch": 2.036734012355986, "grad_norm": 0.4937190219735824, "learning_rate": 2.8239553130308604e-06, "loss": 0.1733, "step": 6099 }, { "epoch": 2.0370679579228588, "grad_norm": 0.49973854857161226, "learning_rate": 2.8222058371424033e-06, "loss": 0.1609, "step": 6100 }, { "epoch": 2.037401903489731, "grad_norm": 0.5022894018079591, "learning_rate": 2.820456690235397e-06, "loss": 0.1618, "step": 6101 }, { "epoch": 2.0377358490566038, "grad_norm": 0.5307860067204769, "learning_rate": 2.8187078725740723e-06, "loss": 0.1717, "step": 6102 }, { "epoch": 2.0380697946234765, "grad_norm": 0.5005034713170023, "learning_rate": 2.8169593844226063e-06, "loss": 0.1656, "step": 6103 }, { "epoch": 2.0384037401903488, "grad_norm": 0.44826699148693205, "learning_rate": 2.815211226045131e-06, "loss": 0.1444, "step": 6104 }, { "epoch": 2.0387376857572215, "grad_norm": 0.4616708746298766, "learning_rate": 2.8134633977057236e-06, "loss": 0.1543, "step": 6105 }, { "epoch": 2.039071631324094, "grad_norm": 0.456410361967302, "learning_rate": 2.811715899668415e-06, "loss": 0.1539, "step": 6106 }, { "epoch": 2.039405576890967, "grad_norm": 0.47621252955779786, "learning_rate": 2.8099687321971887e-06, "loss": 0.1556, "step": 6107 }, { "epoch": 2.039739522457839, "grad_norm": 0.4543629974389159, "learning_rate": 2.80822189555597e-06, "loss": 0.1537, "step": 6108 }, { "epoch": 2.040073468024712, "grad_norm": 0.49525294210493037, "learning_rate": 2.8064753900086427e-06, "loss": 0.1654, "step": 6109 }, { "epoch": 2.0404074135915846, "grad_norm": 0.512395040030274, "learning_rate": 2.804729215819034e-06, "loss": 0.1712, "step": 6110 }, { "epoch": 2.0407413591584573, "grad_norm": 0.47930074814671675, "learning_rate": 2.8029833732509282e-06, "loss": 0.1545, "step": 6111 }, { "epoch": 2.0410753047253296, "grad_norm": 0.496750207975215, "learning_rate": 2.801237862568048e-06, "loss": 0.1538, "step": 6112 }, { "epoch": 2.0414092502922023, "grad_norm": 0.5533662779888088, "learning_rate": 2.799492684034083e-06, "loss": 0.159, "step": 6113 }, { "epoch": 2.041743195859075, "grad_norm": 0.5055832988689022, "learning_rate": 2.797747837912656e-06, "loss": 0.1607, "step": 6114 }, { "epoch": 2.0420771414259478, "grad_norm": 0.48537757955911165, "learning_rate": 2.796003324467351e-06, "loss": 0.1572, "step": 6115 }, { "epoch": 2.04241108699282, "grad_norm": 0.5055379856535059, "learning_rate": 2.794259143961693e-06, "loss": 0.1621, "step": 6116 }, { "epoch": 2.0427450325596928, "grad_norm": 0.5309785977167635, "learning_rate": 2.7925152966591627e-06, "loss": 0.168, "step": 6117 }, { "epoch": 2.0430789781265655, "grad_norm": 0.48438645896285526, "learning_rate": 2.7907717828231893e-06, "loss": 0.158, "step": 6118 }, { "epoch": 2.043412923693438, "grad_norm": 0.4991961469549531, "learning_rate": 2.7890286027171532e-06, "loss": 0.1591, "step": 6119 }, { "epoch": 2.0437468692603105, "grad_norm": 0.45423321499364905, "learning_rate": 2.7872857566043775e-06, "loss": 0.158, "step": 6120 }, { "epoch": 2.044080814827183, "grad_norm": 0.4865693553830601, "learning_rate": 2.7855432447481444e-06, "loss": 0.1649, "step": 6121 }, { "epoch": 2.044414760394056, "grad_norm": 0.5337114471576638, "learning_rate": 2.7838010674116767e-06, "loss": 0.1718, "step": 6122 }, { "epoch": 2.044748705960928, "grad_norm": 0.44948290522077555, "learning_rate": 2.7820592248581523e-06, "loss": 0.1487, "step": 6123 }, { "epoch": 2.045082651527801, "grad_norm": 0.4938115357940406, "learning_rate": 2.780317717350697e-06, "loss": 0.163, "step": 6124 }, { "epoch": 2.0454165970946736, "grad_norm": 0.49731409157250733, "learning_rate": 2.7785765451523896e-06, "loss": 0.165, "step": 6125 }, { "epoch": 2.0457505426615463, "grad_norm": 0.45450185170041324, "learning_rate": 2.7768357085262486e-06, "loss": 0.1488, "step": 6126 }, { "epoch": 2.0460844882284186, "grad_norm": 0.45184670089086093, "learning_rate": 2.7750952077352534e-06, "loss": 0.1578, "step": 6127 }, { "epoch": 2.0464184337952913, "grad_norm": 0.4948991525337002, "learning_rate": 2.7733550430423216e-06, "loss": 0.1658, "step": 6128 }, { "epoch": 2.046752379362164, "grad_norm": 0.5040836204485063, "learning_rate": 2.7716152147103292e-06, "loss": 0.1577, "step": 6129 }, { "epoch": 2.0470863249290367, "grad_norm": 0.5335238908571901, "learning_rate": 2.7698757230020986e-06, "loss": 0.1572, "step": 6130 }, { "epoch": 2.047420270495909, "grad_norm": 0.49828751734413135, "learning_rate": 2.7681365681803967e-06, "loss": 0.1657, "step": 6131 }, { "epoch": 2.0477542160627817, "grad_norm": 0.4999406939225242, "learning_rate": 2.7663977505079483e-06, "loss": 0.1687, "step": 6132 }, { "epoch": 2.0480881616296545, "grad_norm": 0.5085730563774729, "learning_rate": 2.764659270247417e-06, "loss": 0.1711, "step": 6133 }, { "epoch": 2.048422107196527, "grad_norm": 0.47688020503244877, "learning_rate": 2.7629211276614255e-06, "loss": 0.1589, "step": 6134 }, { "epoch": 2.0487560527633994, "grad_norm": 0.47192663378129107, "learning_rate": 2.761183323012534e-06, "loss": 0.1613, "step": 6135 }, { "epoch": 2.049089998330272, "grad_norm": 0.4848663154654449, "learning_rate": 2.7594458565632664e-06, "loss": 0.1578, "step": 6136 }, { "epoch": 2.049423943897145, "grad_norm": 0.5000980624774899, "learning_rate": 2.757708728576083e-06, "loss": 0.1556, "step": 6137 }, { "epoch": 2.049757889464017, "grad_norm": 0.47224990165416336, "learning_rate": 2.7559719393133987e-06, "loss": 0.1538, "step": 6138 }, { "epoch": 2.05009183503089, "grad_norm": 0.5215834737284637, "learning_rate": 2.754235489037575e-06, "loss": 0.1563, "step": 6139 }, { "epoch": 2.0504257805977626, "grad_norm": 0.4972320969795835, "learning_rate": 2.7524993780109254e-06, "loss": 0.1491, "step": 6140 }, { "epoch": 2.0507597261646353, "grad_norm": 0.49919640281659483, "learning_rate": 2.750763606495704e-06, "loss": 0.1581, "step": 6141 }, { "epoch": 2.0510936717315076, "grad_norm": 0.5143857980365772, "learning_rate": 2.7490281747541276e-06, "loss": 0.1712, "step": 6142 }, { "epoch": 2.0514276172983803, "grad_norm": 0.5088252050706469, "learning_rate": 2.747293083048348e-06, "loss": 0.1591, "step": 6143 }, { "epoch": 2.051761562865253, "grad_norm": 0.5077613718070135, "learning_rate": 2.7455583316404744e-06, "loss": 0.1673, "step": 6144 }, { "epoch": 2.0520955084321257, "grad_norm": 0.5097681301324266, "learning_rate": 2.743823920792559e-06, "loss": 0.1652, "step": 6145 }, { "epoch": 2.052429453998998, "grad_norm": 0.5389169316301308, "learning_rate": 2.742089850766607e-06, "loss": 0.1742, "step": 6146 }, { "epoch": 2.0527633995658707, "grad_norm": 0.48373136141051654, "learning_rate": 2.7403561218245654e-06, "loss": 0.1589, "step": 6147 }, { "epoch": 2.0530973451327434, "grad_norm": 0.4750482606605971, "learning_rate": 2.7386227342283423e-06, "loss": 0.1573, "step": 6148 }, { "epoch": 2.053431290699616, "grad_norm": 0.5005599752487294, "learning_rate": 2.73688968823978e-06, "loss": 0.1723, "step": 6149 }, { "epoch": 2.0537652362664884, "grad_norm": 0.48098693092080774, "learning_rate": 2.7351569841206792e-06, "loss": 0.1565, "step": 6150 }, { "epoch": 2.054099181833361, "grad_norm": 0.5329904578572746, "learning_rate": 2.733424622132782e-06, "loss": 0.1717, "step": 6151 }, { "epoch": 2.054433127400234, "grad_norm": 0.4760378622271746, "learning_rate": 2.7316926025377855e-06, "loss": 0.1588, "step": 6152 }, { "epoch": 2.054767072967106, "grad_norm": 0.4462805437410892, "learning_rate": 2.729960925597328e-06, "loss": 0.1476, "step": 6153 }, { "epoch": 2.055101018533979, "grad_norm": 0.49345870296933825, "learning_rate": 2.7282295915730016e-06, "loss": 0.1642, "step": 6154 }, { "epoch": 2.0554349641008516, "grad_norm": 0.48456295420893086, "learning_rate": 2.726498600726346e-06, "loss": 0.1632, "step": 6155 }, { "epoch": 2.0557689096677243, "grad_norm": 0.45486223885521615, "learning_rate": 2.7247679533188446e-06, "loss": 0.1519, "step": 6156 }, { "epoch": 2.0561028552345966, "grad_norm": 0.5200442659549483, "learning_rate": 2.723037649611936e-06, "loss": 0.1635, "step": 6157 }, { "epoch": 2.0564368008014693, "grad_norm": 0.4909804430537013, "learning_rate": 2.721307689866997e-06, "loss": 0.1635, "step": 6158 }, { "epoch": 2.056770746368342, "grad_norm": 0.4594697562211968, "learning_rate": 2.719578074345366e-06, "loss": 0.151, "step": 6159 }, { "epoch": 2.0571046919352147, "grad_norm": 0.5065496098738638, "learning_rate": 2.7178488033083163e-06, "loss": 0.1652, "step": 6160 }, { "epoch": 2.057438637502087, "grad_norm": 0.5436549815894377, "learning_rate": 2.7161198770170784e-06, "loss": 0.1702, "step": 6161 }, { "epoch": 2.0577725830689597, "grad_norm": 0.49925937165886347, "learning_rate": 2.714391295732822e-06, "loss": 0.1647, "step": 6162 }, { "epoch": 2.0581065286358324, "grad_norm": 0.4852644890591728, "learning_rate": 2.712663059716675e-06, "loss": 0.1458, "step": 6163 }, { "epoch": 2.058440474202705, "grad_norm": 0.48613041699076365, "learning_rate": 2.7109351692297015e-06, "loss": 0.1657, "step": 6164 }, { "epoch": 2.0587744197695774, "grad_norm": 0.4648594550935304, "learning_rate": 2.7092076245329273e-06, "loss": 0.1598, "step": 6165 }, { "epoch": 2.05910836533645, "grad_norm": 0.4584401434296162, "learning_rate": 2.7074804258873127e-06, "loss": 0.1527, "step": 6166 }, { "epoch": 2.059442310903323, "grad_norm": 0.48690854592041694, "learning_rate": 2.7057535735537754e-06, "loss": 0.1588, "step": 6167 }, { "epoch": 2.0597762564701956, "grad_norm": 0.4726518812957776, "learning_rate": 2.704027067793173e-06, "loss": 0.1596, "step": 6168 }, { "epoch": 2.060110202037068, "grad_norm": 0.4848631026115318, "learning_rate": 2.7023009088663176e-06, "loss": 0.1622, "step": 6169 }, { "epoch": 2.0604441476039406, "grad_norm": 0.5010288195268352, "learning_rate": 2.7005750970339607e-06, "loss": 0.1493, "step": 6170 }, { "epoch": 2.0607780931708133, "grad_norm": 0.5589263121127757, "learning_rate": 2.698849632556815e-06, "loss": 0.1801, "step": 6171 }, { "epoch": 2.0611120387376856, "grad_norm": 0.4738720262047763, "learning_rate": 2.697124515695524e-06, "loss": 0.1539, "step": 6172 }, { "epoch": 2.0614459843045583, "grad_norm": 0.4860672112480917, "learning_rate": 2.695399746710693e-06, "loss": 0.1598, "step": 6173 }, { "epoch": 2.061779929871431, "grad_norm": 0.5056145335563673, "learning_rate": 2.6936753258628643e-06, "loss": 0.1633, "step": 6174 }, { "epoch": 2.0621138754383037, "grad_norm": 0.526585125988662, "learning_rate": 2.691951253412536e-06, "loss": 0.1569, "step": 6175 }, { "epoch": 2.062447821005176, "grad_norm": 0.5141605422386901, "learning_rate": 2.6902275296201445e-06, "loss": 0.1628, "step": 6176 }, { "epoch": 2.0627817665720487, "grad_norm": 0.4813014518093507, "learning_rate": 2.688504154746082e-06, "loss": 0.1613, "step": 6177 }, { "epoch": 2.0631157121389214, "grad_norm": 0.4515762636761851, "learning_rate": 2.686781129050685e-06, "loss": 0.1475, "step": 6178 }, { "epoch": 2.063449657705794, "grad_norm": 0.4709359444645056, "learning_rate": 2.685058452794235e-06, "loss": 0.1603, "step": 6179 }, { "epoch": 2.0637836032726664, "grad_norm": 0.4820755120947043, "learning_rate": 2.6833361262369644e-06, "loss": 0.1532, "step": 6180 }, { "epoch": 2.064117548839539, "grad_norm": 0.45748798633211313, "learning_rate": 2.681614149639048e-06, "loss": 0.1603, "step": 6181 }, { "epoch": 2.064451494406412, "grad_norm": 0.47928383265863794, "learning_rate": 2.679892523260612e-06, "loss": 0.1565, "step": 6182 }, { "epoch": 2.0647854399732846, "grad_norm": 0.4912860713391019, "learning_rate": 2.6781712473617293e-06, "loss": 0.1631, "step": 6183 }, { "epoch": 2.065119385540157, "grad_norm": 0.4937495874322415, "learning_rate": 2.6764503222024202e-06, "loss": 0.1631, "step": 6184 }, { "epoch": 2.0654533311070296, "grad_norm": 0.5250388336695911, "learning_rate": 2.674729748042647e-06, "loss": 0.1712, "step": 6185 }, { "epoch": 2.0657872766739023, "grad_norm": 0.47623079436105503, "learning_rate": 2.673009525142326e-06, "loss": 0.1564, "step": 6186 }, { "epoch": 2.0661212222407745, "grad_norm": 0.4874432578932814, "learning_rate": 2.6712896537613143e-06, "loss": 0.1579, "step": 6187 }, { "epoch": 2.0664551678076473, "grad_norm": 0.45987158697491903, "learning_rate": 2.6695701341594193e-06, "loss": 0.1505, "step": 6188 }, { "epoch": 2.06678911337452, "grad_norm": 0.44396258874408284, "learning_rate": 2.667850966596396e-06, "loss": 0.1597, "step": 6189 }, { "epoch": 2.0671230589413927, "grad_norm": 0.47997705349381775, "learning_rate": 2.6661321513319467e-06, "loss": 0.1575, "step": 6190 }, { "epoch": 2.067457004508265, "grad_norm": 0.4639530958941431, "learning_rate": 2.6644136886257138e-06, "loss": 0.1554, "step": 6191 }, { "epoch": 2.0677909500751377, "grad_norm": 0.47671499247486326, "learning_rate": 2.6626955787372962e-06, "loss": 0.1528, "step": 6192 }, { "epoch": 2.0681248956420104, "grad_norm": 0.5172377476601987, "learning_rate": 2.6609778219262296e-06, "loss": 0.1572, "step": 6193 }, { "epoch": 2.068458841208883, "grad_norm": 0.46150608992405706, "learning_rate": 2.659260418452005e-06, "loss": 0.1508, "step": 6194 }, { "epoch": 2.0687927867757554, "grad_norm": 0.4864260006069014, "learning_rate": 2.6575433685740547e-06, "loss": 0.1503, "step": 6195 }, { "epoch": 2.069126732342628, "grad_norm": 0.47462807507812255, "learning_rate": 2.655826672551762e-06, "loss": 0.1544, "step": 6196 }, { "epoch": 2.069460677909501, "grad_norm": 0.5047881595787677, "learning_rate": 2.6541103306444516e-06, "loss": 0.1627, "step": 6197 }, { "epoch": 2.0697946234763736, "grad_norm": 0.4847110255829372, "learning_rate": 2.6523943431113985e-06, "loss": 0.1575, "step": 6198 }, { "epoch": 2.070128569043246, "grad_norm": 0.5684634058179412, "learning_rate": 2.6506787102118204e-06, "loss": 0.1793, "step": 6199 }, { "epoch": 2.0704625146101185, "grad_norm": 0.4668023927238211, "learning_rate": 2.6489634322048853e-06, "loss": 0.1475, "step": 6200 }, { "epoch": 2.0707964601769913, "grad_norm": 0.4823615249413857, "learning_rate": 2.647248509349708e-06, "loss": 0.1591, "step": 6201 }, { "epoch": 2.0711304057438635, "grad_norm": 0.5284323022808656, "learning_rate": 2.645533941905345e-06, "loss": 0.1711, "step": 6202 }, { "epoch": 2.0714643513107363, "grad_norm": 0.4629106304307049, "learning_rate": 2.6438197301308045e-06, "loss": 0.153, "step": 6203 }, { "epoch": 2.071798296877609, "grad_norm": 0.4756702290381628, "learning_rate": 2.6421058742850346e-06, "loss": 0.1595, "step": 6204 }, { "epoch": 2.0721322424444817, "grad_norm": 0.4651028891547631, "learning_rate": 2.6403923746269368e-06, "loss": 0.1439, "step": 6205 }, { "epoch": 2.072466188011354, "grad_norm": 0.5038789264372514, "learning_rate": 2.638679231415353e-06, "loss": 0.1612, "step": 6206 }, { "epoch": 2.0728001335782267, "grad_norm": 0.465142161854533, "learning_rate": 2.636966444909077e-06, "loss": 0.1581, "step": 6207 }, { "epoch": 2.0731340791450994, "grad_norm": 0.471800296558469, "learning_rate": 2.635254015366842e-06, "loss": 0.1617, "step": 6208 }, { "epoch": 2.073468024711972, "grad_norm": 0.4929302779325763, "learning_rate": 2.633541943047334e-06, "loss": 0.1617, "step": 6209 }, { "epoch": 2.0738019702788444, "grad_norm": 0.5009578484887693, "learning_rate": 2.6318302282091772e-06, "loss": 0.1582, "step": 6210 }, { "epoch": 2.074135915845717, "grad_norm": 0.4885347046833819, "learning_rate": 2.6301188711109494e-06, "loss": 0.1608, "step": 6211 }, { "epoch": 2.07446986141259, "grad_norm": 0.46827738776381345, "learning_rate": 2.6284078720111693e-06, "loss": 0.1463, "step": 6212 }, { "epoch": 2.0748038069794625, "grad_norm": 0.548235859732826, "learning_rate": 2.626697231168308e-06, "loss": 0.1672, "step": 6213 }, { "epoch": 2.075137752546335, "grad_norm": 0.4807495227938576, "learning_rate": 2.624986948840772e-06, "loss": 0.1597, "step": 6214 }, { "epoch": 2.0754716981132075, "grad_norm": 0.5208785862675631, "learning_rate": 2.6232770252869243e-06, "loss": 0.1652, "step": 6215 }, { "epoch": 2.0758056436800802, "grad_norm": 0.5392394769474849, "learning_rate": 2.6215674607650653e-06, "loss": 0.1674, "step": 6216 }, { "epoch": 2.076139589246953, "grad_norm": 0.4812349060192288, "learning_rate": 2.619858255533446e-06, "loss": 0.1555, "step": 6217 }, { "epoch": 2.0764735348138252, "grad_norm": 0.4772963896311151, "learning_rate": 2.6181494098502626e-06, "loss": 0.1508, "step": 6218 }, { "epoch": 2.076807480380698, "grad_norm": 0.4820130088588004, "learning_rate": 2.616440923973659e-06, "loss": 0.1579, "step": 6219 }, { "epoch": 2.0771414259475707, "grad_norm": 0.47326464893761655, "learning_rate": 2.6147327981617167e-06, "loss": 0.156, "step": 6220 }, { "epoch": 2.077475371514443, "grad_norm": 0.4814515104395897, "learning_rate": 2.613025032672472e-06, "loss": 0.1565, "step": 6221 }, { "epoch": 2.0778093170813157, "grad_norm": 0.484941581282741, "learning_rate": 2.611317627763901e-06, "loss": 0.1559, "step": 6222 }, { "epoch": 2.0781432626481884, "grad_norm": 0.46578841936370874, "learning_rate": 2.609610583693928e-06, "loss": 0.1565, "step": 6223 }, { "epoch": 2.078477208215061, "grad_norm": 0.5122244564079962, "learning_rate": 2.6079039007204238e-06, "loss": 0.1628, "step": 6224 }, { "epoch": 2.0788111537819334, "grad_norm": 0.4916632187074603, "learning_rate": 2.6061975791011996e-06, "loss": 0.1529, "step": 6225 }, { "epoch": 2.079145099348806, "grad_norm": 0.4816831224754778, "learning_rate": 2.6044916190940194e-06, "loss": 0.1592, "step": 6226 }, { "epoch": 2.079479044915679, "grad_norm": 0.4506436759636466, "learning_rate": 2.6027860209565835e-06, "loss": 0.145, "step": 6227 }, { "epoch": 2.0798129904825515, "grad_norm": 0.5060128940585642, "learning_rate": 2.6010807849465468e-06, "loss": 0.1579, "step": 6228 }, { "epoch": 2.080146936049424, "grad_norm": 0.5398542683218175, "learning_rate": 2.5993759113215032e-06, "loss": 0.178, "step": 6229 }, { "epoch": 2.0804808816162965, "grad_norm": 0.46194809764874106, "learning_rate": 2.5976714003389963e-06, "loss": 0.1558, "step": 6230 }, { "epoch": 2.0808148271831692, "grad_norm": 0.5504935947232371, "learning_rate": 2.5959672522565095e-06, "loss": 0.1697, "step": 6231 }, { "epoch": 2.081148772750042, "grad_norm": 0.5461299363549844, "learning_rate": 2.594263467331477e-06, "loss": 0.1653, "step": 6232 }, { "epoch": 2.0814827183169142, "grad_norm": 0.5349505003346677, "learning_rate": 2.592560045821273e-06, "loss": 0.1631, "step": 6233 }, { "epoch": 2.081816663883787, "grad_norm": 0.5028722732399603, "learning_rate": 2.5908569879832223e-06, "loss": 0.1587, "step": 6234 }, { "epoch": 2.0821506094506597, "grad_norm": 0.4847488245991641, "learning_rate": 2.5891542940745873e-06, "loss": 0.1645, "step": 6235 }, { "epoch": 2.082484555017532, "grad_norm": 0.5179191384302583, "learning_rate": 2.5874519643525864e-06, "loss": 0.1652, "step": 6236 }, { "epoch": 2.0828185005844047, "grad_norm": 0.5235684818125339, "learning_rate": 2.5857499990743706e-06, "loss": 0.1662, "step": 6237 }, { "epoch": 2.0831524461512774, "grad_norm": 0.4954099960770102, "learning_rate": 2.584048398497047e-06, "loss": 0.1576, "step": 6238 }, { "epoch": 2.08348639171815, "grad_norm": 0.5232717340566326, "learning_rate": 2.5823471628776574e-06, "loss": 0.1611, "step": 6239 }, { "epoch": 2.0838203372850224, "grad_norm": 0.518556939706427, "learning_rate": 2.5806462924731955e-06, "loss": 0.1651, "step": 6240 }, { "epoch": 2.084154282851895, "grad_norm": 0.4616700212286318, "learning_rate": 2.5789457875405986e-06, "loss": 0.1516, "step": 6241 }, { "epoch": 2.084488228418768, "grad_norm": 0.5116149450430864, "learning_rate": 2.57724564833675e-06, "loss": 0.1602, "step": 6242 }, { "epoch": 2.0848221739856405, "grad_norm": 0.4893670944675162, "learning_rate": 2.5755458751184705e-06, "loss": 0.1565, "step": 6243 }, { "epoch": 2.085156119552513, "grad_norm": 0.4520354757290471, "learning_rate": 2.5738464681425356e-06, "loss": 0.1531, "step": 6244 }, { "epoch": 2.0854900651193855, "grad_norm": 0.5233378545227989, "learning_rate": 2.5721474276656566e-06, "loss": 0.1656, "step": 6245 }, { "epoch": 2.0858240106862582, "grad_norm": 0.5282369500802311, "learning_rate": 2.5704487539444956e-06, "loss": 0.17, "step": 6246 }, { "epoch": 2.086157956253131, "grad_norm": 0.572079378431354, "learning_rate": 2.5687504472356596e-06, "loss": 0.1673, "step": 6247 }, { "epoch": 2.086491901820003, "grad_norm": 0.5136764358982354, "learning_rate": 2.5670525077956944e-06, "loss": 0.1672, "step": 6248 }, { "epoch": 2.086825847386876, "grad_norm": 0.4780306930943609, "learning_rate": 2.5653549358810957e-06, "loss": 0.1512, "step": 6249 }, { "epoch": 2.0871597929537486, "grad_norm": 0.5351708052915466, "learning_rate": 2.563657731748299e-06, "loss": 0.1616, "step": 6250 }, { "epoch": 2.087493738520621, "grad_norm": 0.48212931576855894, "learning_rate": 2.5619608956536895e-06, "loss": 0.1541, "step": 6251 }, { "epoch": 2.0878276840874936, "grad_norm": 0.4882011252089522, "learning_rate": 2.5602644278535937e-06, "loss": 0.1689, "step": 6252 }, { "epoch": 2.0881616296543664, "grad_norm": 0.47356586066482304, "learning_rate": 2.558568328604285e-06, "loss": 0.1535, "step": 6253 }, { "epoch": 2.088495575221239, "grad_norm": 0.5437862879169038, "learning_rate": 2.5568725981619747e-06, "loss": 0.1729, "step": 6254 }, { "epoch": 2.0888295207881113, "grad_norm": 0.5035531617051279, "learning_rate": 2.5551772367828276e-06, "loss": 0.1774, "step": 6255 }, { "epoch": 2.089163466354984, "grad_norm": 0.4478316519506567, "learning_rate": 2.5534822447229436e-06, "loss": 0.146, "step": 6256 }, { "epoch": 2.089497411921857, "grad_norm": 0.5394101004610315, "learning_rate": 2.551787622238376e-06, "loss": 0.1699, "step": 6257 }, { "epoch": 2.0898313574887295, "grad_norm": 0.49097878747683316, "learning_rate": 2.5500933695851104e-06, "loss": 0.155, "step": 6258 }, { "epoch": 2.0901653030556018, "grad_norm": 0.5146220478613275, "learning_rate": 2.548399487019092e-06, "loss": 0.1591, "step": 6259 }, { "epoch": 2.0904992486224745, "grad_norm": 0.5347843554604156, "learning_rate": 2.5467059747961953e-06, "loss": 0.1591, "step": 6260 }, { "epoch": 2.090833194189347, "grad_norm": 0.5233614282885114, "learning_rate": 2.54501283317225e-06, "loss": 0.1649, "step": 6261 }, { "epoch": 2.09116713975622, "grad_norm": 0.5146286619898135, "learning_rate": 2.5433200624030212e-06, "loss": 0.161, "step": 6262 }, { "epoch": 2.091501085323092, "grad_norm": 0.5020319234607654, "learning_rate": 2.541627662744225e-06, "loss": 0.1574, "step": 6263 }, { "epoch": 2.091835030889965, "grad_norm": 0.5004055836984991, "learning_rate": 2.5399356344515138e-06, "loss": 0.1572, "step": 6264 }, { "epoch": 2.0921689764568376, "grad_norm": 0.4979420565995763, "learning_rate": 2.538243977780494e-06, "loss": 0.1564, "step": 6265 }, { "epoch": 2.0925029220237104, "grad_norm": 0.51833341034248, "learning_rate": 2.5365526929867056e-06, "loss": 0.1628, "step": 6266 }, { "epoch": 2.0928368675905826, "grad_norm": 0.5459444259176912, "learning_rate": 2.534861780325642e-06, "loss": 0.1555, "step": 6267 }, { "epoch": 2.0931708131574553, "grad_norm": 0.4788442607046698, "learning_rate": 2.53317124005273e-06, "loss": 0.1499, "step": 6268 }, { "epoch": 2.093504758724328, "grad_norm": 0.5008550202971368, "learning_rate": 2.5314810724233502e-06, "loss": 0.1683, "step": 6269 }, { "epoch": 2.0938387042912003, "grad_norm": 0.522704888977689, "learning_rate": 2.529791277692818e-06, "loss": 0.1655, "step": 6270 }, { "epoch": 2.094172649858073, "grad_norm": 0.507328669366408, "learning_rate": 2.5281018561163996e-06, "loss": 0.1635, "step": 6271 }, { "epoch": 2.0945065954249458, "grad_norm": 0.5081138020513082, "learning_rate": 2.5264128079493033e-06, "loss": 0.1694, "step": 6272 }, { "epoch": 2.0948405409918185, "grad_norm": 0.4772258614297116, "learning_rate": 2.524724133446676e-06, "loss": 0.1616, "step": 6273 }, { "epoch": 2.0951744865586908, "grad_norm": 0.495524661859938, "learning_rate": 2.523035832863614e-06, "loss": 0.1641, "step": 6274 }, { "epoch": 2.0955084321255635, "grad_norm": 0.4529427960497026, "learning_rate": 2.521347906455154e-06, "loss": 0.1517, "step": 6275 }, { "epoch": 2.095842377692436, "grad_norm": 0.511282048473958, "learning_rate": 2.5196603544762804e-06, "loss": 0.1628, "step": 6276 }, { "epoch": 2.096176323259309, "grad_norm": 0.4720720037080335, "learning_rate": 2.5179731771819133e-06, "loss": 0.1573, "step": 6277 }, { "epoch": 2.096510268826181, "grad_norm": 0.46615925544747905, "learning_rate": 2.5162863748269247e-06, "loss": 0.1518, "step": 6278 }, { "epoch": 2.096844214393054, "grad_norm": 0.5095981778636587, "learning_rate": 2.514599947666122e-06, "loss": 0.1593, "step": 6279 }, { "epoch": 2.0971781599599266, "grad_norm": 0.5176486525595663, "learning_rate": 2.5129138959542633e-06, "loss": 0.1702, "step": 6280 }, { "epoch": 2.0975121055267993, "grad_norm": 0.476234499249456, "learning_rate": 2.5112282199460415e-06, "loss": 0.1543, "step": 6281 }, { "epoch": 2.0978460510936716, "grad_norm": 0.522206312703102, "learning_rate": 2.5095429198961056e-06, "loss": 0.173, "step": 6282 }, { "epoch": 2.0981799966605443, "grad_norm": 0.5078914776921112, "learning_rate": 2.507857996059034e-06, "loss": 0.1628, "step": 6283 }, { "epoch": 2.098513942227417, "grad_norm": 0.48432480768589903, "learning_rate": 2.5061734486893574e-06, "loss": 0.1541, "step": 6284 }, { "epoch": 2.0988478877942893, "grad_norm": 0.49121279758470837, "learning_rate": 2.504489278041544e-06, "loss": 0.1597, "step": 6285 }, { "epoch": 2.099181833361162, "grad_norm": 0.47385292608458507, "learning_rate": 2.5028054843700102e-06, "loss": 0.1531, "step": 6286 }, { "epoch": 2.0995157789280348, "grad_norm": 0.4841879216861, "learning_rate": 2.501122067929108e-06, "loss": 0.1517, "step": 6287 }, { "epoch": 2.0998497244949075, "grad_norm": 0.479421292773347, "learning_rate": 2.4994390289731446e-06, "loss": 0.1551, "step": 6288 }, { "epoch": 2.1001836700617798, "grad_norm": 0.5527499779534556, "learning_rate": 2.497756367756357e-06, "loss": 0.1618, "step": 6289 }, { "epoch": 2.1005176156286525, "grad_norm": 0.4913185942010325, "learning_rate": 2.496074084532935e-06, "loss": 0.154, "step": 6290 }, { "epoch": 2.100851561195525, "grad_norm": 0.5031384055965269, "learning_rate": 2.4943921795570033e-06, "loss": 0.1479, "step": 6291 }, { "epoch": 2.101185506762398, "grad_norm": 0.49927324992800404, "learning_rate": 2.4927106530826372e-06, "loss": 0.1658, "step": 6292 }, { "epoch": 2.10151945232927, "grad_norm": 0.5309889957366104, "learning_rate": 2.491029505363848e-06, "loss": 0.1422, "step": 6293 }, { "epoch": 2.101853397896143, "grad_norm": 0.49560503238802656, "learning_rate": 2.489348736654593e-06, "loss": 0.1625, "step": 6294 }, { "epoch": 2.1021873434630156, "grad_norm": 0.4782744370282557, "learning_rate": 2.4876683472087767e-06, "loss": 0.1508, "step": 6295 }, { "epoch": 2.1025212890298883, "grad_norm": 0.4648251202295055, "learning_rate": 2.4859883372802357e-06, "loss": 0.1514, "step": 6296 }, { "epoch": 2.1028552345967606, "grad_norm": 0.47761287277219516, "learning_rate": 2.484308707122758e-06, "loss": 0.1469, "step": 6297 }, { "epoch": 2.1031891801636333, "grad_norm": 0.5120606241455523, "learning_rate": 2.4826294569900725e-06, "loss": 0.1599, "step": 6298 }, { "epoch": 2.103523125730506, "grad_norm": 0.5192893295764826, "learning_rate": 2.4809505871358476e-06, "loss": 0.1647, "step": 6299 }, { "epoch": 2.1038570712973783, "grad_norm": 0.49615949952418487, "learning_rate": 2.4792720978136967e-06, "loss": 0.1615, "step": 6300 }, { "epoch": 2.104191016864251, "grad_norm": 0.5171662212170216, "learning_rate": 2.4775939892771787e-06, "loss": 0.1544, "step": 6301 }, { "epoch": 2.1045249624311237, "grad_norm": 0.5549893785210516, "learning_rate": 2.4759162617797873e-06, "loss": 0.1687, "step": 6302 }, { "epoch": 2.1048589079979965, "grad_norm": 0.5230583873016571, "learning_rate": 2.4742389155749657e-06, "loss": 0.1651, "step": 6303 }, { "epoch": 2.1051928535648687, "grad_norm": 0.4862306544241593, "learning_rate": 2.472561950916094e-06, "loss": 0.1553, "step": 6304 }, { "epoch": 2.1055267991317415, "grad_norm": 0.5309377784725847, "learning_rate": 2.4708853680565e-06, "loss": 0.1652, "step": 6305 }, { "epoch": 2.105860744698614, "grad_norm": 0.5055232116428447, "learning_rate": 2.4692091672494494e-06, "loss": 0.163, "step": 6306 }, { "epoch": 2.106194690265487, "grad_norm": 0.5642390943911669, "learning_rate": 2.4675333487481558e-06, "loss": 0.1711, "step": 6307 }, { "epoch": 2.106528635832359, "grad_norm": 0.461679532517826, "learning_rate": 2.4658579128057665e-06, "loss": 0.1534, "step": 6308 }, { "epoch": 2.106862581399232, "grad_norm": 0.5100394097368949, "learning_rate": 2.4641828596753803e-06, "loss": 0.1637, "step": 6309 }, { "epoch": 2.1071965269661046, "grad_norm": 0.46839005082728324, "learning_rate": 2.4625081896100294e-06, "loss": 0.1535, "step": 6310 }, { "epoch": 2.1075304725329773, "grad_norm": 0.4964670435707832, "learning_rate": 2.4608339028626943e-06, "loss": 0.1647, "step": 6311 }, { "epoch": 2.1078644180998496, "grad_norm": 0.48855274829211875, "learning_rate": 2.4591599996862957e-06, "loss": 0.1639, "step": 6312 }, { "epoch": 2.1081983636667223, "grad_norm": 0.5450080590558118, "learning_rate": 2.457486480333699e-06, "loss": 0.174, "step": 6313 }, { "epoch": 2.108532309233595, "grad_norm": 0.5224423801079273, "learning_rate": 2.4558133450577044e-06, "loss": 0.1577, "step": 6314 }, { "epoch": 2.1088662548004677, "grad_norm": 0.5174969356229879, "learning_rate": 2.4541405941110626e-06, "loss": 0.162, "step": 6315 }, { "epoch": 2.10920020036734, "grad_norm": 0.4964356122182152, "learning_rate": 2.452468227746459e-06, "loss": 0.1575, "step": 6316 }, { "epoch": 2.1095341459342127, "grad_norm": 0.5613011623009709, "learning_rate": 2.4507962462165254e-06, "loss": 0.1669, "step": 6317 }, { "epoch": 2.1098680915010855, "grad_norm": 0.47625742049767633, "learning_rate": 2.449124649773835e-06, "loss": 0.1559, "step": 6318 }, { "epoch": 2.1102020370679577, "grad_norm": 0.6372927108132521, "learning_rate": 2.4474534386709036e-06, "loss": 0.1766, "step": 6319 }, { "epoch": 2.1105359826348304, "grad_norm": 0.5062800748969212, "learning_rate": 2.4457826131601835e-06, "loss": 0.1498, "step": 6320 }, { "epoch": 2.110869928201703, "grad_norm": 0.5307208312611601, "learning_rate": 2.444112173494077e-06, "loss": 0.1615, "step": 6321 }, { "epoch": 2.111203873768576, "grad_norm": 0.5407648531518704, "learning_rate": 2.4424421199249194e-06, "loss": 0.1691, "step": 6322 }, { "epoch": 2.111537819335448, "grad_norm": 0.5343008151179298, "learning_rate": 2.440772452704993e-06, "loss": 0.1642, "step": 6323 }, { "epoch": 2.111871764902321, "grad_norm": 0.49431699281947467, "learning_rate": 2.4391031720865246e-06, "loss": 0.1634, "step": 6324 }, { "epoch": 2.1122057104691936, "grad_norm": 0.46526916827882536, "learning_rate": 2.4374342783216732e-06, "loss": 0.1483, "step": 6325 }, { "epoch": 2.1125396560360663, "grad_norm": 0.5278048019270066, "learning_rate": 2.435765771662549e-06, "loss": 0.1569, "step": 6326 }, { "epoch": 2.1128736016029386, "grad_norm": 0.48140891039036754, "learning_rate": 2.4340976523611957e-06, "loss": 0.1496, "step": 6327 }, { "epoch": 2.1132075471698113, "grad_norm": 0.49088279957734005, "learning_rate": 2.4324299206696057e-06, "loss": 0.1609, "step": 6328 }, { "epoch": 2.113541492736684, "grad_norm": 0.49893234907819617, "learning_rate": 2.4307625768397077e-06, "loss": 0.164, "step": 6329 }, { "epoch": 2.1138754383035567, "grad_norm": 0.5409361980416141, "learning_rate": 2.4290956211233757e-06, "loss": 0.1665, "step": 6330 }, { "epoch": 2.114209383870429, "grad_norm": 0.5101008229874319, "learning_rate": 2.42742905377242e-06, "loss": 0.1529, "step": 6331 }, { "epoch": 2.1145433294373017, "grad_norm": 0.5238806985611678, "learning_rate": 2.4257628750385987e-06, "loss": 0.1633, "step": 6332 }, { "epoch": 2.1148772750041744, "grad_norm": 0.4728995120386821, "learning_rate": 2.424097085173604e-06, "loss": 0.1476, "step": 6333 }, { "epoch": 2.1152112205710467, "grad_norm": 0.5183928863262791, "learning_rate": 2.4224316844290747e-06, "loss": 0.1603, "step": 6334 }, { "epoch": 2.1155451661379194, "grad_norm": 0.5244171017941279, "learning_rate": 2.4207666730565893e-06, "loss": 0.1601, "step": 6335 }, { "epoch": 2.115879111704792, "grad_norm": 0.483669792524147, "learning_rate": 2.4191020513076697e-06, "loss": 0.1643, "step": 6336 }, { "epoch": 2.116213057271665, "grad_norm": 0.5036617136208132, "learning_rate": 2.4174378194337715e-06, "loss": 0.1617, "step": 6337 }, { "epoch": 2.116547002838537, "grad_norm": 0.5000061906999408, "learning_rate": 2.4157739776863023e-06, "loss": 0.16, "step": 6338 }, { "epoch": 2.11688094840541, "grad_norm": 0.48562974176186696, "learning_rate": 2.4141105263166e-06, "loss": 0.1594, "step": 6339 }, { "epoch": 2.1172148939722826, "grad_norm": 0.4796849682215605, "learning_rate": 2.41244746557595e-06, "loss": 0.152, "step": 6340 }, { "epoch": 2.1175488395391553, "grad_norm": 0.45539955587257097, "learning_rate": 2.4107847957155784e-06, "loss": 0.157, "step": 6341 }, { "epoch": 2.1178827851060276, "grad_norm": 0.46438375956752614, "learning_rate": 2.409122516986652e-06, "loss": 0.1549, "step": 6342 }, { "epoch": 2.1182167306729003, "grad_norm": 0.4514297641602212, "learning_rate": 2.4074606296402735e-06, "loss": 0.1519, "step": 6343 }, { "epoch": 2.118550676239773, "grad_norm": 0.4859800183560971, "learning_rate": 2.405799133927496e-06, "loss": 0.1593, "step": 6344 }, { "epoch": 2.1188846218066457, "grad_norm": 0.5005344670210375, "learning_rate": 2.404138030099303e-06, "loss": 0.1597, "step": 6345 }, { "epoch": 2.119218567373518, "grad_norm": 0.48375070616490734, "learning_rate": 2.4024773184066253e-06, "loss": 0.1665, "step": 6346 }, { "epoch": 2.1195525129403907, "grad_norm": 0.472840448624495, "learning_rate": 2.4008169991003356e-06, "loss": 0.1452, "step": 6347 }, { "epoch": 2.1198864585072634, "grad_norm": 0.49084422815911033, "learning_rate": 2.3991570724312405e-06, "loss": 0.1528, "step": 6348 }, { "epoch": 2.1202204040741357, "grad_norm": 0.4692583452343208, "learning_rate": 2.3974975386500958e-06, "loss": 0.155, "step": 6349 }, { "epoch": 2.1205543496410084, "grad_norm": 0.5062663536376966, "learning_rate": 2.3958383980075896e-06, "loss": 0.1633, "step": 6350 }, { "epoch": 2.120888295207881, "grad_norm": 0.5403727659874564, "learning_rate": 2.394179650754358e-06, "loss": 0.1671, "step": 6351 }, { "epoch": 2.121222240774754, "grad_norm": 0.5235630447457905, "learning_rate": 2.3925212971409688e-06, "loss": 0.1622, "step": 6352 }, { "epoch": 2.121556186341626, "grad_norm": 0.4699026763388292, "learning_rate": 2.3908633374179436e-06, "loss": 0.1518, "step": 6353 }, { "epoch": 2.121890131908499, "grad_norm": 0.5167884671729314, "learning_rate": 2.3892057718357308e-06, "loss": 0.1604, "step": 6354 }, { "epoch": 2.1222240774753716, "grad_norm": 0.4623642417869892, "learning_rate": 2.3875486006447294e-06, "loss": 0.1491, "step": 6355 }, { "epoch": 2.1225580230422443, "grad_norm": 0.5158989070059895, "learning_rate": 2.3858918240952703e-06, "loss": 0.1576, "step": 6356 }, { "epoch": 2.1228919686091166, "grad_norm": 0.5110491439871296, "learning_rate": 2.384235442437632e-06, "loss": 0.1651, "step": 6357 }, { "epoch": 2.1232259141759893, "grad_norm": 0.5119404415532335, "learning_rate": 2.3825794559220296e-06, "loss": 0.1729, "step": 6358 }, { "epoch": 2.123559859742862, "grad_norm": 0.5501830648498629, "learning_rate": 2.380923864798621e-06, "loss": 0.1692, "step": 6359 }, { "epoch": 2.1238938053097347, "grad_norm": 0.5038516827919598, "learning_rate": 2.3792686693174993e-06, "loss": 0.1627, "step": 6360 }, { "epoch": 2.124227750876607, "grad_norm": 0.5332702528419736, "learning_rate": 2.3776138697287055e-06, "loss": 0.1651, "step": 6361 }, { "epoch": 2.1245616964434797, "grad_norm": 0.4763237636455076, "learning_rate": 2.3759594662822122e-06, "loss": 0.1436, "step": 6362 }, { "epoch": 2.1248956420103524, "grad_norm": 0.5286176751132988, "learning_rate": 2.3743054592279386e-06, "loss": 0.1605, "step": 6363 }, { "epoch": 2.125229587577225, "grad_norm": 0.47547415889114697, "learning_rate": 2.372651848815742e-06, "loss": 0.1473, "step": 6364 }, { "epoch": 2.1255635331440974, "grad_norm": 0.4533019654138708, "learning_rate": 2.370998635295421e-06, "loss": 0.1531, "step": 6365 }, { "epoch": 2.12589747871097, "grad_norm": 0.45228899024811703, "learning_rate": 2.3693458189167106e-06, "loss": 0.1517, "step": 6366 }, { "epoch": 2.126231424277843, "grad_norm": 0.5400654982070013, "learning_rate": 2.3676933999292905e-06, "loss": 0.1697, "step": 6367 }, { "epoch": 2.126565369844715, "grad_norm": 0.5439371706519016, "learning_rate": 2.366041378582775e-06, "loss": 0.171, "step": 6368 }, { "epoch": 2.126899315411588, "grad_norm": 0.5018152963433691, "learning_rate": 2.364389755126723e-06, "loss": 0.1528, "step": 6369 }, { "epoch": 2.1272332609784605, "grad_norm": 0.487011525737873, "learning_rate": 2.3627385298106344e-06, "loss": 0.1598, "step": 6370 }, { "epoch": 2.1275672065453333, "grad_norm": 0.5186675987624135, "learning_rate": 2.361087702883941e-06, "loss": 0.1655, "step": 6371 }, { "epoch": 2.1279011521122055, "grad_norm": 0.4740219296383837, "learning_rate": 2.359437274596024e-06, "loss": 0.1535, "step": 6372 }, { "epoch": 2.1282350976790783, "grad_norm": 0.4723892674328739, "learning_rate": 2.357787245196197e-06, "loss": 0.1461, "step": 6373 }, { "epoch": 2.128569043245951, "grad_norm": 0.4653901182497193, "learning_rate": 2.3561376149337188e-06, "loss": 0.1542, "step": 6374 }, { "epoch": 2.1289029888128237, "grad_norm": 0.5055198140337246, "learning_rate": 2.3544883840577815e-06, "loss": 0.1614, "step": 6375 }, { "epoch": 2.129236934379696, "grad_norm": 0.49423346871659607, "learning_rate": 2.352839552817527e-06, "loss": 0.1606, "step": 6376 }, { "epoch": 2.1295708799465687, "grad_norm": 0.5028984209451132, "learning_rate": 2.3511911214620255e-06, "loss": 0.1524, "step": 6377 }, { "epoch": 2.1299048255134414, "grad_norm": 0.534742967358469, "learning_rate": 2.3495430902402956e-06, "loss": 0.1685, "step": 6378 }, { "epoch": 2.1302387710803137, "grad_norm": 0.5180111072516133, "learning_rate": 2.3478954594012884e-06, "loss": 0.1613, "step": 6379 }, { "epoch": 2.1305727166471864, "grad_norm": 0.4735330750800575, "learning_rate": 2.346248229193901e-06, "loss": 0.1488, "step": 6380 }, { "epoch": 2.130906662214059, "grad_norm": 0.5283937925064244, "learning_rate": 2.344601399866962e-06, "loss": 0.1635, "step": 6381 }, { "epoch": 2.131240607780932, "grad_norm": 0.5098981708034045, "learning_rate": 2.342954971669252e-06, "loss": 0.1621, "step": 6382 }, { "epoch": 2.131574553347804, "grad_norm": 0.5297193277625377, "learning_rate": 2.341308944849477e-06, "loss": 0.1697, "step": 6383 }, { "epoch": 2.131908498914677, "grad_norm": 0.4851259261560039, "learning_rate": 2.3396633196562924e-06, "loss": 0.154, "step": 6384 }, { "epoch": 2.1322424444815495, "grad_norm": 0.5309045479819359, "learning_rate": 2.3380180963382866e-06, "loss": 0.1625, "step": 6385 }, { "epoch": 2.1325763900484223, "grad_norm": 0.529874878718374, "learning_rate": 2.3363732751439926e-06, "loss": 0.1707, "step": 6386 }, { "epoch": 2.1329103356152945, "grad_norm": 0.5019851431122059, "learning_rate": 2.334728856321875e-06, "loss": 0.1647, "step": 6387 }, { "epoch": 2.1332442811821672, "grad_norm": 0.5565660036653327, "learning_rate": 2.33308484012035e-06, "loss": 0.1724, "step": 6388 }, { "epoch": 2.13357822674904, "grad_norm": 0.4636960099887346, "learning_rate": 2.33144122678776e-06, "loss": 0.1483, "step": 6389 }, { "epoch": 2.1339121723159127, "grad_norm": 0.4886503505718227, "learning_rate": 2.3297980165723953e-06, "loss": 0.1556, "step": 6390 }, { "epoch": 2.134246117882785, "grad_norm": 0.5243203293316878, "learning_rate": 2.3281552097224798e-06, "loss": 0.1692, "step": 6391 }, { "epoch": 2.1345800634496577, "grad_norm": 0.5512527072406636, "learning_rate": 2.326512806486181e-06, "loss": 0.1702, "step": 6392 }, { "epoch": 2.1349140090165304, "grad_norm": 0.4869770099606462, "learning_rate": 2.3248708071116005e-06, "loss": 0.1604, "step": 6393 }, { "epoch": 2.135247954583403, "grad_norm": 0.4892570672975904, "learning_rate": 2.323229211846783e-06, "loss": 0.1547, "step": 6394 }, { "epoch": 2.1355819001502754, "grad_norm": 0.49250010065010924, "learning_rate": 2.3215880209397133e-06, "loss": 0.1537, "step": 6395 }, { "epoch": 2.135915845717148, "grad_norm": 0.47927021514041757, "learning_rate": 2.319947234638308e-06, "loss": 0.1532, "step": 6396 }, { "epoch": 2.136249791284021, "grad_norm": 0.5035140794080505, "learning_rate": 2.3183068531904317e-06, "loss": 0.1624, "step": 6397 }, { "epoch": 2.136583736850893, "grad_norm": 0.5127475929816925, "learning_rate": 2.3166668768438772e-06, "loss": 0.1566, "step": 6398 }, { "epoch": 2.136917682417766, "grad_norm": 0.5021756770030403, "learning_rate": 2.31502730584639e-06, "loss": 0.16, "step": 6399 }, { "epoch": 2.1372516279846385, "grad_norm": 0.49666732574803907, "learning_rate": 2.313388140445641e-06, "loss": 0.1579, "step": 6400 }, { "epoch": 2.1375855735515112, "grad_norm": 0.48350078859534124, "learning_rate": 2.311749380889249e-06, "loss": 0.1654, "step": 6401 }, { "epoch": 2.1379195191183835, "grad_norm": 0.5439150444830929, "learning_rate": 2.310111027424764e-06, "loss": 0.1725, "step": 6402 }, { "epoch": 2.1382534646852562, "grad_norm": 0.48456783877805504, "learning_rate": 2.308473080299683e-06, "loss": 0.1408, "step": 6403 }, { "epoch": 2.138587410252129, "grad_norm": 0.4718955603006833, "learning_rate": 2.3068355397614313e-06, "loss": 0.1574, "step": 6404 }, { "epoch": 2.1389213558190017, "grad_norm": 0.5054752478801913, "learning_rate": 2.3051984060573855e-06, "loss": 0.1601, "step": 6405 }, { "epoch": 2.139255301385874, "grad_norm": 0.4728648535556213, "learning_rate": 2.303561679434849e-06, "loss": 0.1509, "step": 6406 }, { "epoch": 2.1395892469527467, "grad_norm": 0.527707051909604, "learning_rate": 2.3019253601410725e-06, "loss": 0.1704, "step": 6407 }, { "epoch": 2.1399231925196194, "grad_norm": 0.47206826548069847, "learning_rate": 2.300289448423237e-06, "loss": 0.1479, "step": 6408 }, { "epoch": 2.140257138086492, "grad_norm": 0.5071150172469431, "learning_rate": 2.2986539445284705e-06, "loss": 0.1628, "step": 6409 }, { "epoch": 2.1405910836533644, "grad_norm": 0.5114902739240026, "learning_rate": 2.2970188487038293e-06, "loss": 0.1616, "step": 6410 }, { "epoch": 2.140925029220237, "grad_norm": 0.5143644585016027, "learning_rate": 2.295384161196321e-06, "loss": 0.1639, "step": 6411 }, { "epoch": 2.14125897478711, "grad_norm": 0.47396643773274283, "learning_rate": 2.293749882252879e-06, "loss": 0.1453, "step": 6412 }, { "epoch": 2.1415929203539825, "grad_norm": 0.5070902818006285, "learning_rate": 2.2921160121203847e-06, "loss": 0.1617, "step": 6413 }, { "epoch": 2.141926865920855, "grad_norm": 0.5206899156340429, "learning_rate": 2.290482551045649e-06, "loss": 0.1673, "step": 6414 }, { "epoch": 2.1422608114877275, "grad_norm": 0.4910791307640534, "learning_rate": 2.2888494992754294e-06, "loss": 0.163, "step": 6415 }, { "epoch": 2.1425947570546002, "grad_norm": 0.49078588542590307, "learning_rate": 2.2872168570564136e-06, "loss": 0.1597, "step": 6416 }, { "epoch": 2.1429287026214725, "grad_norm": 0.48656902115080175, "learning_rate": 2.2855846246352335e-06, "loss": 0.1573, "step": 6417 }, { "epoch": 2.143262648188345, "grad_norm": 0.5285475116853615, "learning_rate": 2.2839528022584596e-06, "loss": 0.1678, "step": 6418 }, { "epoch": 2.143596593755218, "grad_norm": 0.5097327251095314, "learning_rate": 2.2823213901725927e-06, "loss": 0.1722, "step": 6419 }, { "epoch": 2.1439305393220907, "grad_norm": 0.5020115119724882, "learning_rate": 2.2806903886240815e-06, "loss": 0.1567, "step": 6420 }, { "epoch": 2.144264484888963, "grad_norm": 0.5005087225398663, "learning_rate": 2.2790597978593044e-06, "loss": 0.1608, "step": 6421 }, { "epoch": 2.1445984304558356, "grad_norm": 0.4972452001072568, "learning_rate": 2.2774296181245825e-06, "loss": 0.1708, "step": 6422 }, { "epoch": 2.1449323760227084, "grad_norm": 0.5307429371345069, "learning_rate": 2.275799849666174e-06, "loss": 0.1578, "step": 6423 }, { "epoch": 2.145266321589581, "grad_norm": 0.5104050954727964, "learning_rate": 2.274170492730277e-06, "loss": 0.1669, "step": 6424 }, { "epoch": 2.1456002671564534, "grad_norm": 0.5447982842445855, "learning_rate": 2.27254154756302e-06, "loss": 0.1671, "step": 6425 }, { "epoch": 2.145934212723326, "grad_norm": 0.539396360023142, "learning_rate": 2.2709130144104795e-06, "loss": 0.1722, "step": 6426 }, { "epoch": 2.146268158290199, "grad_norm": 0.48061812916772473, "learning_rate": 2.26928489351866e-06, "loss": 0.1572, "step": 6427 }, { "epoch": 2.146602103857071, "grad_norm": 0.5558718837443882, "learning_rate": 2.267657185133511e-06, "loss": 0.1679, "step": 6428 }, { "epoch": 2.146936049423944, "grad_norm": 0.4912517189710352, "learning_rate": 2.2660298895009157e-06, "loss": 0.1533, "step": 6429 }, { "epoch": 2.1472699949908165, "grad_norm": 0.5449170299577375, "learning_rate": 2.2644030068666993e-06, "loss": 0.1701, "step": 6430 }, { "epoch": 2.147603940557689, "grad_norm": 0.5249455334713858, "learning_rate": 2.2627765374766175e-06, "loss": 0.1519, "step": 6431 }, { "epoch": 2.1479378861245615, "grad_norm": 0.5490812081136831, "learning_rate": 2.2611504815763715e-06, "loss": 0.175, "step": 6432 }, { "epoch": 2.148271831691434, "grad_norm": 0.501323784094873, "learning_rate": 2.259524839411592e-06, "loss": 0.1545, "step": 6433 }, { "epoch": 2.148605777258307, "grad_norm": 0.46873001297372796, "learning_rate": 2.2578996112278535e-06, "loss": 0.1472, "step": 6434 }, { "epoch": 2.1489397228251796, "grad_norm": 0.5482354098239963, "learning_rate": 2.2562747972706663e-06, "loss": 0.17, "step": 6435 }, { "epoch": 2.149273668392052, "grad_norm": 0.5127389817917001, "learning_rate": 2.254650397785479e-06, "loss": 0.1563, "step": 6436 }, { "epoch": 2.1496076139589246, "grad_norm": 0.5439560842821645, "learning_rate": 2.253026413017672e-06, "loss": 0.1631, "step": 6437 }, { "epoch": 2.1499415595257974, "grad_norm": 0.5200199301047186, "learning_rate": 2.2514028432125722e-06, "loss": 0.1587, "step": 6438 }, { "epoch": 2.15027550509267, "grad_norm": 0.48845583828397887, "learning_rate": 2.249779688615435e-06, "loss": 0.1572, "step": 6439 }, { "epoch": 2.1506094506595423, "grad_norm": 0.4553983917786923, "learning_rate": 2.248156949471459e-06, "loss": 0.1534, "step": 6440 }, { "epoch": 2.150943396226415, "grad_norm": 0.4848004378929125, "learning_rate": 2.2465346260257786e-06, "loss": 0.1607, "step": 6441 }, { "epoch": 2.151277341793288, "grad_norm": 0.49347866729377543, "learning_rate": 2.2449127185234626e-06, "loss": 0.1541, "step": 6442 }, { "epoch": 2.1516112873601605, "grad_norm": 0.5741045672451875, "learning_rate": 2.2432912272095227e-06, "loss": 0.1637, "step": 6443 }, { "epoch": 2.1519452329270328, "grad_norm": 0.5120871107028994, "learning_rate": 2.2416701523288997e-06, "loss": 0.1607, "step": 6444 }, { "epoch": 2.1522791784939055, "grad_norm": 0.4437091895118815, "learning_rate": 2.240049494126479e-06, "loss": 0.1463, "step": 6445 }, { "epoch": 2.152613124060778, "grad_norm": 0.4871523787141582, "learning_rate": 2.238429252847079e-06, "loss": 0.1627, "step": 6446 }, { "epoch": 2.1529470696276505, "grad_norm": 0.49786741973353504, "learning_rate": 2.2368094287354586e-06, "loss": 0.162, "step": 6447 }, { "epoch": 2.153281015194523, "grad_norm": 0.5147125301457852, "learning_rate": 2.2351900220363083e-06, "loss": 0.1634, "step": 6448 }, { "epoch": 2.153614960761396, "grad_norm": 0.5019185934700248, "learning_rate": 2.2335710329942613e-06, "loss": 0.1608, "step": 6449 }, { "epoch": 2.1539489063282686, "grad_norm": 0.5206661699341952, "learning_rate": 2.2319524618538814e-06, "loss": 0.1593, "step": 6450 }, { "epoch": 2.154282851895141, "grad_norm": 0.5048674347182872, "learning_rate": 2.2303343088596753e-06, "loss": 0.1581, "step": 6451 }, { "epoch": 2.1546167974620136, "grad_norm": 0.48713040989661927, "learning_rate": 2.2287165742560828e-06, "loss": 0.1578, "step": 6452 }, { "epoch": 2.1549507430288863, "grad_norm": 0.49585282458056723, "learning_rate": 2.227099258287485e-06, "loss": 0.157, "step": 6453 }, { "epoch": 2.155284688595759, "grad_norm": 0.5328333267249918, "learning_rate": 2.2254823611981926e-06, "loss": 0.1658, "step": 6454 }, { "epoch": 2.1556186341626313, "grad_norm": 0.4931587019632607, "learning_rate": 2.2238658832324593e-06, "loss": 0.1547, "step": 6455 }, { "epoch": 2.155952579729504, "grad_norm": 0.4506269112296143, "learning_rate": 2.222249824634471e-06, "loss": 0.1439, "step": 6456 }, { "epoch": 2.1562865252963768, "grad_norm": 0.5495482012766242, "learning_rate": 2.220634185648354e-06, "loss": 0.1773, "step": 6457 }, { "epoch": 2.1566204708632495, "grad_norm": 0.483556197943497, "learning_rate": 2.2190189665181684e-06, "loss": 0.1548, "step": 6458 }, { "epoch": 2.1569544164301218, "grad_norm": 0.504658616614687, "learning_rate": 2.2174041674879152e-06, "loss": 0.1605, "step": 6459 }, { "epoch": 2.1572883619969945, "grad_norm": 0.47666557877987864, "learning_rate": 2.2157897888015247e-06, "loss": 0.1489, "step": 6460 }, { "epoch": 2.157622307563867, "grad_norm": 0.5125882112106799, "learning_rate": 2.214175830702871e-06, "loss": 0.1576, "step": 6461 }, { "epoch": 2.15795625313074, "grad_norm": 0.47157304371108405, "learning_rate": 2.2125622934357588e-06, "loss": 0.1528, "step": 6462 }, { "epoch": 2.158290198697612, "grad_norm": 0.5841106675855774, "learning_rate": 2.210949177243933e-06, "loss": 0.1491, "step": 6463 }, { "epoch": 2.158624144264485, "grad_norm": 0.4945144495146848, "learning_rate": 2.209336482371076e-06, "loss": 0.161, "step": 6464 }, { "epoch": 2.1589580898313576, "grad_norm": 0.5245463934666308, "learning_rate": 2.2077242090608e-06, "loss": 0.1638, "step": 6465 }, { "epoch": 2.15929203539823, "grad_norm": 0.5189920938025878, "learning_rate": 2.206112357556662e-06, "loss": 0.1619, "step": 6466 }, { "epoch": 2.1596259809651026, "grad_norm": 0.4710356006327489, "learning_rate": 2.2045009281021486e-06, "loss": 0.1492, "step": 6467 }, { "epoch": 2.1599599265319753, "grad_norm": 0.5391720083168611, "learning_rate": 2.202889920940685e-06, "loss": 0.1673, "step": 6468 }, { "epoch": 2.160293872098848, "grad_norm": 0.5300999004989326, "learning_rate": 2.2012793363156337e-06, "loss": 0.1662, "step": 6469 }, { "epoch": 2.1606278176657203, "grad_norm": 0.545770302502262, "learning_rate": 2.199669174470295e-06, "loss": 0.177, "step": 6470 }, { "epoch": 2.160961763232593, "grad_norm": 0.5544787542265645, "learning_rate": 2.1980594356478977e-06, "loss": 0.1717, "step": 6471 }, { "epoch": 2.1612957087994658, "grad_norm": 0.47765190280150527, "learning_rate": 2.196450120091617e-06, "loss": 0.1532, "step": 6472 }, { "epoch": 2.1616296543663385, "grad_norm": 0.48763213938191263, "learning_rate": 2.194841228044554e-06, "loss": 0.1495, "step": 6473 }, { "epoch": 2.1619635999332107, "grad_norm": 0.47668729251630615, "learning_rate": 2.1932327597497537e-06, "loss": 0.1552, "step": 6474 }, { "epoch": 2.1622975455000835, "grad_norm": 0.509947791860357, "learning_rate": 2.1916247154501937e-06, "loss": 0.1653, "step": 6475 }, { "epoch": 2.162631491066956, "grad_norm": 0.5242288336740697, "learning_rate": 2.190017095388789e-06, "loss": 0.167, "step": 6476 }, { "epoch": 2.1629654366338285, "grad_norm": 0.4842299004185554, "learning_rate": 2.1884098998083867e-06, "loss": 0.1544, "step": 6477 }, { "epoch": 2.163299382200701, "grad_norm": 0.5657409384245479, "learning_rate": 2.1868031289517773e-06, "loss": 0.17, "step": 6478 }, { "epoch": 2.163633327767574, "grad_norm": 0.5057846468272276, "learning_rate": 2.1851967830616773e-06, "loss": 0.1608, "step": 6479 }, { "epoch": 2.1639672733344466, "grad_norm": 0.5045992465263685, "learning_rate": 2.1835908623807462e-06, "loss": 0.1587, "step": 6480 }, { "epoch": 2.164301218901319, "grad_norm": 0.5337740972894388, "learning_rate": 2.1819853671515774e-06, "loss": 0.1659, "step": 6481 }, { "epoch": 2.1646351644681916, "grad_norm": 0.5210496803965817, "learning_rate": 2.180380297616702e-06, "loss": 0.1591, "step": 6482 }, { "epoch": 2.1649691100350643, "grad_norm": 0.5248088157313302, "learning_rate": 2.178775654018581e-06, "loss": 0.1624, "step": 6483 }, { "epoch": 2.165303055601937, "grad_norm": 0.5262130819709666, "learning_rate": 2.177171436599618e-06, "loss": 0.1688, "step": 6484 }, { "epoch": 2.1656370011688093, "grad_norm": 0.5361036965685855, "learning_rate": 2.1755676456021454e-06, "loss": 0.1605, "step": 6485 }, { "epoch": 2.165970946735682, "grad_norm": 0.4938672738227419, "learning_rate": 2.173964281268436e-06, "loss": 0.1545, "step": 6486 }, { "epoch": 2.1663048923025547, "grad_norm": 0.5366176125755003, "learning_rate": 2.1723613438407e-06, "loss": 0.1582, "step": 6487 }, { "epoch": 2.1666388378694275, "grad_norm": 0.5677703842430186, "learning_rate": 2.170758833561075e-06, "loss": 0.1607, "step": 6488 }, { "epoch": 2.1669727834362997, "grad_norm": 0.5831020985138208, "learning_rate": 2.1691567506716433e-06, "loss": 0.1692, "step": 6489 }, { "epoch": 2.1673067290031724, "grad_norm": 0.5308398773677225, "learning_rate": 2.1675550954144147e-06, "loss": 0.1566, "step": 6490 }, { "epoch": 2.167640674570045, "grad_norm": 0.47316973548002056, "learning_rate": 2.1659538680313403e-06, "loss": 0.1487, "step": 6491 }, { "epoch": 2.167974620136918, "grad_norm": 0.5451574177675723, "learning_rate": 2.1643530687643036e-06, "loss": 0.1648, "step": 6492 }, { "epoch": 2.16830856570379, "grad_norm": 0.559874991831784, "learning_rate": 2.1627526978551265e-06, "loss": 0.1522, "step": 6493 }, { "epoch": 2.168642511270663, "grad_norm": 0.5653685743132745, "learning_rate": 2.1611527555455604e-06, "loss": 0.1705, "step": 6494 }, { "epoch": 2.1689764568375356, "grad_norm": 0.4836390569478203, "learning_rate": 2.159553242077298e-06, "loss": 0.1603, "step": 6495 }, { "epoch": 2.169310402404408, "grad_norm": 0.5285691812855493, "learning_rate": 2.1579541576919624e-06, "loss": 0.1624, "step": 6496 }, { "epoch": 2.1696443479712806, "grad_norm": 0.5499127677294646, "learning_rate": 2.1563555026311166e-06, "loss": 0.1644, "step": 6497 }, { "epoch": 2.1699782935381533, "grad_norm": 0.5111505875055791, "learning_rate": 2.154757277136251e-06, "loss": 0.1594, "step": 6498 }, { "epoch": 2.170312239105026, "grad_norm": 0.4626953791952365, "learning_rate": 2.153159481448805e-06, "loss": 0.1402, "step": 6499 }, { "epoch": 2.1706461846718983, "grad_norm": 0.5450349768292784, "learning_rate": 2.1515621158101372e-06, "loss": 0.1658, "step": 6500 }, { "epoch": 2.170980130238771, "grad_norm": 0.4631639748506876, "learning_rate": 2.1499651804615534e-06, "loss": 0.1496, "step": 6501 }, { "epoch": 2.1713140758056437, "grad_norm": 0.5041192964368789, "learning_rate": 2.148368675644285e-06, "loss": 0.1693, "step": 6502 }, { "epoch": 2.1716480213725164, "grad_norm": 0.5020294331070693, "learning_rate": 2.146772601599507e-06, "loss": 0.1599, "step": 6503 }, { "epoch": 2.1719819669393887, "grad_norm": 0.5070209282890003, "learning_rate": 2.1451769585683196e-06, "loss": 0.153, "step": 6504 }, { "epoch": 2.1723159125062614, "grad_norm": 0.5004602093524981, "learning_rate": 2.14358174679177e-06, "loss": 0.1534, "step": 6505 }, { "epoch": 2.172649858073134, "grad_norm": 0.4939325771882108, "learning_rate": 2.1419869665108303e-06, "loss": 0.1525, "step": 6506 }, { "epoch": 2.172983803640007, "grad_norm": 0.5500117190899586, "learning_rate": 2.140392617966412e-06, "loss": 0.1658, "step": 6507 }, { "epoch": 2.173317749206879, "grad_norm": 0.536575903530373, "learning_rate": 2.1387987013993583e-06, "loss": 0.1588, "step": 6508 }, { "epoch": 2.173651694773752, "grad_norm": 0.49710428559563785, "learning_rate": 2.137205217050452e-06, "loss": 0.1607, "step": 6509 }, { "epoch": 2.1739856403406246, "grad_norm": 0.46432099575708613, "learning_rate": 2.135612165160404e-06, "loss": 0.1476, "step": 6510 }, { "epoch": 2.1743195859074973, "grad_norm": 0.5301977692094965, "learning_rate": 2.1340195459698653e-06, "loss": 0.1652, "step": 6511 }, { "epoch": 2.1746535314743696, "grad_norm": 0.4947325177627652, "learning_rate": 2.1324273597194223e-06, "loss": 0.159, "step": 6512 }, { "epoch": 2.1749874770412423, "grad_norm": 0.4855377166862449, "learning_rate": 2.1308356066495893e-06, "loss": 0.1516, "step": 6513 }, { "epoch": 2.175321422608115, "grad_norm": 0.47997522170058093, "learning_rate": 2.1292442870008213e-06, "loss": 0.155, "step": 6514 }, { "epoch": 2.1756553681749873, "grad_norm": 0.5003823360484102, "learning_rate": 2.1276534010135053e-06, "loss": 0.1468, "step": 6515 }, { "epoch": 2.17598931374186, "grad_norm": 0.5190481271339713, "learning_rate": 2.1260629489279662e-06, "loss": 0.1649, "step": 6516 }, { "epoch": 2.1763232593087327, "grad_norm": 0.4604055659642761, "learning_rate": 2.1244729309844564e-06, "loss": 0.1495, "step": 6517 }, { "epoch": 2.1766572048756054, "grad_norm": 0.558870729021529, "learning_rate": 2.1228833474231703e-06, "loss": 0.1622, "step": 6518 }, { "epoch": 2.1769911504424777, "grad_norm": 0.48777199157089124, "learning_rate": 2.1212941984842295e-06, "loss": 0.1514, "step": 6519 }, { "epoch": 2.1773250960093504, "grad_norm": 0.49435336792484225, "learning_rate": 2.1197054844076975e-06, "loss": 0.1488, "step": 6520 }, { "epoch": 2.177659041576223, "grad_norm": 0.5453124726321699, "learning_rate": 2.118117205433563e-06, "loss": 0.167, "step": 6521 }, { "epoch": 2.177992987143096, "grad_norm": 0.4889668124604241, "learning_rate": 2.1165293618017612e-06, "loss": 0.1562, "step": 6522 }, { "epoch": 2.178326932709968, "grad_norm": 0.5060322916216825, "learning_rate": 2.1149419537521495e-06, "loss": 0.1636, "step": 6523 }, { "epoch": 2.178660878276841, "grad_norm": 0.572919121563999, "learning_rate": 2.1133549815245273e-06, "loss": 0.1706, "step": 6524 }, { "epoch": 2.1789948238437136, "grad_norm": 0.5270190608334708, "learning_rate": 2.1117684453586236e-06, "loss": 0.1531, "step": 6525 }, { "epoch": 2.179328769410586, "grad_norm": 0.5559601743872964, "learning_rate": 2.110182345494105e-06, "loss": 0.1719, "step": 6526 }, { "epoch": 2.1796627149774586, "grad_norm": 0.5192431372887695, "learning_rate": 2.1085966821705662e-06, "loss": 0.1595, "step": 6527 }, { "epoch": 2.1799966605443313, "grad_norm": 0.501123467668914, "learning_rate": 2.1070114556275473e-06, "loss": 0.1564, "step": 6528 }, { "epoch": 2.180330606111204, "grad_norm": 0.4801142572023988, "learning_rate": 2.1054266661045105e-06, "loss": 0.145, "step": 6529 }, { "epoch": 2.1806645516780763, "grad_norm": 0.5100950044547728, "learning_rate": 2.103842313840859e-06, "loss": 0.1618, "step": 6530 }, { "epoch": 2.180998497244949, "grad_norm": 0.49876239967556707, "learning_rate": 2.1022583990759265e-06, "loss": 0.1579, "step": 6531 }, { "epoch": 2.1813324428118217, "grad_norm": 0.5300376773967295, "learning_rate": 2.1006749220489834e-06, "loss": 0.1601, "step": 6532 }, { "epoch": 2.1816663883786944, "grad_norm": 0.5139517459957516, "learning_rate": 2.0990918829992307e-06, "loss": 0.1638, "step": 6533 }, { "epoch": 2.1820003339455667, "grad_norm": 0.4956864720621521, "learning_rate": 2.097509282165806e-06, "loss": 0.1678, "step": 6534 }, { "epoch": 2.1823342795124394, "grad_norm": 0.5514089951815736, "learning_rate": 2.0959271197877816e-06, "loss": 0.1699, "step": 6535 }, { "epoch": 2.182668225079312, "grad_norm": 0.529402674892044, "learning_rate": 2.0943453961041587e-06, "loss": 0.1583, "step": 6536 }, { "epoch": 2.183002170646185, "grad_norm": 0.5027930243812394, "learning_rate": 2.0927641113538764e-06, "loss": 0.1558, "step": 6537 }, { "epoch": 2.183336116213057, "grad_norm": 0.48247284060734513, "learning_rate": 2.0911832657758086e-06, "loss": 0.1504, "step": 6538 }, { "epoch": 2.18367006177993, "grad_norm": 0.553224289771926, "learning_rate": 2.089602859608757e-06, "loss": 0.1612, "step": 6539 }, { "epoch": 2.1840040073468026, "grad_norm": 0.5128855380439065, "learning_rate": 2.088022893091462e-06, "loss": 0.1688, "step": 6540 }, { "epoch": 2.1843379529136753, "grad_norm": 0.4987615340223241, "learning_rate": 2.086443366462598e-06, "loss": 0.1584, "step": 6541 }, { "epoch": 2.1846718984805475, "grad_norm": 0.45996958264592197, "learning_rate": 2.084864279960768e-06, "loss": 0.1414, "step": 6542 }, { "epoch": 2.1850058440474203, "grad_norm": 0.5060506307251191, "learning_rate": 2.0832856338245157e-06, "loss": 0.1662, "step": 6543 }, { "epoch": 2.185339789614293, "grad_norm": 0.5240449344599418, "learning_rate": 2.0817074282923087e-06, "loss": 0.17, "step": 6544 }, { "epoch": 2.1856737351811653, "grad_norm": 0.48387681849094627, "learning_rate": 2.080129663602557e-06, "loss": 0.1501, "step": 6545 }, { "epoch": 2.186007680748038, "grad_norm": 0.4837620146066662, "learning_rate": 2.0785523399935996e-06, "loss": 0.1548, "step": 6546 }, { "epoch": 2.1863416263149107, "grad_norm": 0.5719904583600143, "learning_rate": 2.076975457703712e-06, "loss": 0.1661, "step": 6547 }, { "epoch": 2.1866755718817834, "grad_norm": 0.5294141126429812, "learning_rate": 2.0753990169710973e-06, "loss": 0.1555, "step": 6548 }, { "epoch": 2.1870095174486557, "grad_norm": 0.5038994789068824, "learning_rate": 2.0738230180338993e-06, "loss": 0.1579, "step": 6549 }, { "epoch": 2.1873434630155284, "grad_norm": 0.5180801238233425, "learning_rate": 2.0722474611301868e-06, "loss": 0.1565, "step": 6550 }, { "epoch": 2.187677408582401, "grad_norm": 0.517550029930567, "learning_rate": 2.0706723464979687e-06, "loss": 0.1631, "step": 6551 }, { "epoch": 2.188011354149274, "grad_norm": 0.5632560056493515, "learning_rate": 2.0690976743751844e-06, "loss": 0.1737, "step": 6552 }, { "epoch": 2.188345299716146, "grad_norm": 0.5057496168693179, "learning_rate": 2.0675234449997085e-06, "loss": 0.1558, "step": 6553 }, { "epoch": 2.188679245283019, "grad_norm": 0.49701932491339085, "learning_rate": 2.065949658609343e-06, "loss": 0.1564, "step": 6554 }, { "epoch": 2.1890131908498915, "grad_norm": 0.49770206149039964, "learning_rate": 2.0643763154418304e-06, "loss": 0.1545, "step": 6555 }, { "epoch": 2.1893471364167643, "grad_norm": 0.5063235022252858, "learning_rate": 2.06280341573484e-06, "loss": 0.1538, "step": 6556 }, { "epoch": 2.1896810819836365, "grad_norm": 0.4810621047438368, "learning_rate": 2.0612309597259776e-06, "loss": 0.1518, "step": 6557 }, { "epoch": 2.1900150275505093, "grad_norm": 0.5329849457976076, "learning_rate": 2.059658947652784e-06, "loss": 0.1588, "step": 6558 }, { "epoch": 2.190348973117382, "grad_norm": 0.4806564305506935, "learning_rate": 2.058087379752725e-06, "loss": 0.1555, "step": 6559 }, { "epoch": 2.1906829186842547, "grad_norm": 0.5063446638868672, "learning_rate": 2.056516256263208e-06, "loss": 0.1567, "step": 6560 }, { "epoch": 2.191016864251127, "grad_norm": 0.5230077420968994, "learning_rate": 2.0549455774215705e-06, "loss": 0.156, "step": 6561 }, { "epoch": 2.1913508098179997, "grad_norm": 0.49869886677540565, "learning_rate": 2.0533753434650784e-06, "loss": 0.1652, "step": 6562 }, { "epoch": 2.1916847553848724, "grad_norm": 0.5172552699731698, "learning_rate": 2.0518055546309362e-06, "loss": 0.1523, "step": 6563 }, { "epoch": 2.1920187009517447, "grad_norm": 0.4894924237454037, "learning_rate": 2.0502362111562806e-06, "loss": 0.1578, "step": 6564 }, { "epoch": 2.1923526465186174, "grad_norm": 0.5085326999824232, "learning_rate": 2.048667313278176e-06, "loss": 0.1537, "step": 6565 }, { "epoch": 2.19268659208549, "grad_norm": 0.49822549747118766, "learning_rate": 2.0470988612336264e-06, "loss": 0.1536, "step": 6566 }, { "epoch": 2.193020537652363, "grad_norm": 0.5148739261403503, "learning_rate": 2.045530855259561e-06, "loss": 0.1637, "step": 6567 }, { "epoch": 2.193354483219235, "grad_norm": 0.5168387604417966, "learning_rate": 2.043963295592848e-06, "loss": 0.166, "step": 6568 }, { "epoch": 2.193688428786108, "grad_norm": 0.5076478055233222, "learning_rate": 2.042396182470285e-06, "loss": 0.1676, "step": 6569 }, { "epoch": 2.1940223743529805, "grad_norm": 0.4850803716080977, "learning_rate": 2.040829516128605e-06, "loss": 0.1507, "step": 6570 }, { "epoch": 2.1943563199198532, "grad_norm": 0.5096244454579222, "learning_rate": 2.0392632968044686e-06, "loss": 0.1642, "step": 6571 }, { "epoch": 2.1946902654867255, "grad_norm": 0.4942711483631219, "learning_rate": 2.0376975247344736e-06, "loss": 0.1559, "step": 6572 }, { "epoch": 2.1950242110535982, "grad_norm": 0.5207629645400701, "learning_rate": 2.0361322001551466e-06, "loss": 0.1721, "step": 6573 }, { "epoch": 2.195358156620471, "grad_norm": 0.45191543742406315, "learning_rate": 2.034567323302949e-06, "loss": 0.1337, "step": 6574 }, { "epoch": 2.1956921021873432, "grad_norm": 0.48335848242486656, "learning_rate": 2.0330028944142736e-06, "loss": 0.1551, "step": 6575 }, { "epoch": 2.196026047754216, "grad_norm": 0.4956348807447585, "learning_rate": 2.031438913725448e-06, "loss": 0.1504, "step": 6576 }, { "epoch": 2.1963599933210887, "grad_norm": 0.5681269369080285, "learning_rate": 2.0298753814727267e-06, "loss": 0.1531, "step": 6577 }, { "epoch": 2.1966939388879614, "grad_norm": 0.5291428022534318, "learning_rate": 2.028312297892303e-06, "loss": 0.1621, "step": 6578 }, { "epoch": 2.1970278844548337, "grad_norm": 0.5239390275772546, "learning_rate": 2.0267496632202953e-06, "loss": 0.166, "step": 6579 }, { "epoch": 2.1973618300217064, "grad_norm": 0.5409865518744863, "learning_rate": 2.0251874776927598e-06, "loss": 0.1643, "step": 6580 }, { "epoch": 2.197695775588579, "grad_norm": 0.5372965064486103, "learning_rate": 2.0236257415456833e-06, "loss": 0.1646, "step": 6581 }, { "epoch": 2.198029721155452, "grad_norm": 0.509285554214082, "learning_rate": 2.022064455014986e-06, "loss": 0.1623, "step": 6582 }, { "epoch": 2.198363666722324, "grad_norm": 0.757210617776752, "learning_rate": 2.0205036183365145e-06, "loss": 0.1749, "step": 6583 }, { "epoch": 2.198697612289197, "grad_norm": 0.4667375146591432, "learning_rate": 2.018943231746056e-06, "loss": 0.1455, "step": 6584 }, { "epoch": 2.1990315578560695, "grad_norm": 0.5693617648213951, "learning_rate": 2.0173832954793216e-06, "loss": 0.164, "step": 6585 }, { "epoch": 2.1993655034229422, "grad_norm": 0.49312895306196874, "learning_rate": 2.0158238097719597e-06, "loss": 0.1543, "step": 6586 }, { "epoch": 2.1996994489898145, "grad_norm": 0.5064476054065244, "learning_rate": 2.0142647748595502e-06, "loss": 0.1651, "step": 6587 }, { "epoch": 2.2000333945566872, "grad_norm": 0.5332088995867316, "learning_rate": 2.0127061909776e-06, "loss": 0.1661, "step": 6588 }, { "epoch": 2.20036734012356, "grad_norm": 0.47982702274764294, "learning_rate": 2.0111480583615566e-06, "loss": 0.1466, "step": 6589 }, { "epoch": 2.2007012856904327, "grad_norm": 0.4910497844263571, "learning_rate": 2.00959037724679e-06, "loss": 0.1515, "step": 6590 }, { "epoch": 2.201035231257305, "grad_norm": 0.5585938097349513, "learning_rate": 2.0080331478686087e-06, "loss": 0.1732, "step": 6591 }, { "epoch": 2.2013691768241777, "grad_norm": 0.4896922496577059, "learning_rate": 2.006476370462247e-06, "loss": 0.1557, "step": 6592 }, { "epoch": 2.2017031223910504, "grad_norm": 0.4576228652187881, "learning_rate": 2.0049200452628803e-06, "loss": 0.1437, "step": 6593 }, { "epoch": 2.2020370679579226, "grad_norm": 0.4734216591875249, "learning_rate": 2.0033641725056048e-06, "loss": 0.1487, "step": 6594 }, { "epoch": 2.2023710135247954, "grad_norm": 0.5060810259961187, "learning_rate": 2.001808752425457e-06, "loss": 0.1471, "step": 6595 }, { "epoch": 2.202704959091668, "grad_norm": 0.48329648438901496, "learning_rate": 2.000253785257398e-06, "loss": 0.1525, "step": 6596 }, { "epoch": 2.203038904658541, "grad_norm": 0.5357710590885335, "learning_rate": 1.998699271236326e-06, "loss": 0.1612, "step": 6597 }, { "epoch": 2.203372850225413, "grad_norm": 0.525150917969299, "learning_rate": 1.997145210597068e-06, "loss": 0.1697, "step": 6598 }, { "epoch": 2.203706795792286, "grad_norm": 0.47723396449168365, "learning_rate": 1.9955916035743855e-06, "loss": 0.1512, "step": 6599 }, { "epoch": 2.2040407413591585, "grad_norm": 0.47291493505957904, "learning_rate": 1.9940384504029647e-06, "loss": 0.1562, "step": 6600 }, { "epoch": 2.2043746869260312, "grad_norm": 0.5084750289652352, "learning_rate": 1.9924857513174324e-06, "loss": 0.1588, "step": 6601 }, { "epoch": 2.2047086324929035, "grad_norm": 0.5015177899122328, "learning_rate": 1.990933506552337e-06, "loss": 0.1591, "step": 6602 }, { "epoch": 2.205042578059776, "grad_norm": 0.5140862563797094, "learning_rate": 1.989381716342167e-06, "loss": 0.1669, "step": 6603 }, { "epoch": 2.205376523626649, "grad_norm": 0.5335787711446098, "learning_rate": 1.9878303809213367e-06, "loss": 0.1512, "step": 6604 }, { "epoch": 2.2057104691935217, "grad_norm": 0.5424751411330692, "learning_rate": 1.986279500524197e-06, "loss": 0.1657, "step": 6605 }, { "epoch": 2.206044414760394, "grad_norm": 0.5056230951340244, "learning_rate": 1.984729075385022e-06, "loss": 0.1596, "step": 6606 }, { "epoch": 2.2063783603272666, "grad_norm": 0.47996255878953, "learning_rate": 1.983179105738026e-06, "loss": 0.1582, "step": 6607 }, { "epoch": 2.2067123058941394, "grad_norm": 0.4911356123723574, "learning_rate": 1.9816295918173462e-06, "loss": 0.1534, "step": 6608 }, { "epoch": 2.207046251461012, "grad_norm": 0.465393956829766, "learning_rate": 1.9800805338570562e-06, "loss": 0.1601, "step": 6609 }, { "epoch": 2.2073801970278843, "grad_norm": 0.4979367096913454, "learning_rate": 1.9785319320911623e-06, "loss": 0.1463, "step": 6610 }, { "epoch": 2.207714142594757, "grad_norm": 0.486772494636018, "learning_rate": 1.9769837867535948e-06, "loss": 0.1557, "step": 6611 }, { "epoch": 2.20804808816163, "grad_norm": 0.487323805440558, "learning_rate": 1.9754360980782227e-06, "loss": 0.1563, "step": 6612 }, { "epoch": 2.208382033728502, "grad_norm": 0.5575220348139566, "learning_rate": 1.973888866298839e-06, "loss": 0.1676, "step": 6613 }, { "epoch": 2.2087159792953748, "grad_norm": 0.537349301156599, "learning_rate": 1.972342091649176e-06, "loss": 0.1667, "step": 6614 }, { "epoch": 2.2090499248622475, "grad_norm": 0.49476253066855386, "learning_rate": 1.9707957743628854e-06, "loss": 0.1534, "step": 6615 }, { "epoch": 2.20938387042912, "grad_norm": 0.5405416629754679, "learning_rate": 1.9692499146735646e-06, "loss": 0.1642, "step": 6616 }, { "epoch": 2.2097178159959925, "grad_norm": 0.5165616973685996, "learning_rate": 1.967704512814728e-06, "loss": 0.164, "step": 6617 }, { "epoch": 2.210051761562865, "grad_norm": 0.524024480111212, "learning_rate": 1.966159569019831e-06, "loss": 0.1629, "step": 6618 }, { "epoch": 2.210385707129738, "grad_norm": 0.528009150373322, "learning_rate": 1.9646150835222517e-06, "loss": 0.1641, "step": 6619 }, { "epoch": 2.2107196526966106, "grad_norm": 0.48772901021730125, "learning_rate": 1.9630710565553063e-06, "loss": 0.1532, "step": 6620 }, { "epoch": 2.211053598263483, "grad_norm": 0.4371984282049327, "learning_rate": 1.9615274883522327e-06, "loss": 0.1386, "step": 6621 }, { "epoch": 2.2113875438303556, "grad_norm": 0.5506478629606789, "learning_rate": 1.9599843791462123e-06, "loss": 0.1703, "step": 6622 }, { "epoch": 2.2117214893972283, "grad_norm": 0.5183244278342396, "learning_rate": 1.958441729170345e-06, "loss": 0.1558, "step": 6623 }, { "epoch": 2.2120554349641006, "grad_norm": 0.4653062989884138, "learning_rate": 1.9568995386576695e-06, "loss": 0.148, "step": 6624 }, { "epoch": 2.2123893805309733, "grad_norm": 0.49138499728836343, "learning_rate": 1.9553578078411476e-06, "loss": 0.1541, "step": 6625 }, { "epoch": 2.212723326097846, "grad_norm": 0.5254863963640038, "learning_rate": 1.953816536953681e-06, "loss": 0.1546, "step": 6626 }, { "epoch": 2.2130572716647188, "grad_norm": 0.5206964772367224, "learning_rate": 1.95227572622809e-06, "loss": 0.1621, "step": 6627 }, { "epoch": 2.213391217231591, "grad_norm": 0.563970641509512, "learning_rate": 1.95073537589714e-06, "loss": 0.1722, "step": 6628 }, { "epoch": 2.2137251627984638, "grad_norm": 0.4877735248249902, "learning_rate": 1.949195486193514e-06, "loss": 0.1466, "step": 6629 }, { "epoch": 2.2140591083653365, "grad_norm": 0.5032630664511833, "learning_rate": 1.9476560573498332e-06, "loss": 0.1651, "step": 6630 }, { "epoch": 2.214393053932209, "grad_norm": 0.5375345394822361, "learning_rate": 1.946117089598644e-06, "loss": 0.1737, "step": 6631 }, { "epoch": 2.2147269994990815, "grad_norm": 0.4730562669425479, "learning_rate": 1.9445785831724274e-06, "loss": 0.1511, "step": 6632 }, { "epoch": 2.215060945065954, "grad_norm": 0.5274486320082097, "learning_rate": 1.943040538303591e-06, "loss": 0.1596, "step": 6633 }, { "epoch": 2.215394890632827, "grad_norm": 0.5020720245794152, "learning_rate": 1.9415029552244758e-06, "loss": 0.1585, "step": 6634 }, { "epoch": 2.2157288361996996, "grad_norm": 0.5225443854090014, "learning_rate": 1.939965834167354e-06, "loss": 0.1642, "step": 6635 }, { "epoch": 2.216062781766572, "grad_norm": 0.5246272127996099, "learning_rate": 1.9384291753644215e-06, "loss": 0.1531, "step": 6636 }, { "epoch": 2.2163967273334446, "grad_norm": 0.4890983704136873, "learning_rate": 1.9368929790478126e-06, "loss": 0.155, "step": 6637 }, { "epoch": 2.2167306729003173, "grad_norm": 0.5091358082023, "learning_rate": 1.935357245449583e-06, "loss": 0.1508, "step": 6638 }, { "epoch": 2.21706461846719, "grad_norm": 0.5688713170703724, "learning_rate": 1.9338219748017297e-06, "loss": 0.1745, "step": 6639 }, { "epoch": 2.2173985640340623, "grad_norm": 0.5001120410713611, "learning_rate": 1.932287167336168e-06, "loss": 0.1617, "step": 6640 }, { "epoch": 2.217732509600935, "grad_norm": 0.5223274403848096, "learning_rate": 1.9307528232847533e-06, "loss": 0.1588, "step": 6641 }, { "epoch": 2.2180664551678078, "grad_norm": 0.49231056426484826, "learning_rate": 1.9292189428792617e-06, "loss": 0.1453, "step": 6642 }, { "epoch": 2.21840040073468, "grad_norm": 0.5447393922671808, "learning_rate": 1.927685526351408e-06, "loss": 0.1614, "step": 6643 }, { "epoch": 2.2187343463015528, "grad_norm": 0.5130642995180773, "learning_rate": 1.9261525739328273e-06, "loss": 0.1539, "step": 6644 }, { "epoch": 2.2190682918684255, "grad_norm": 0.4673620218126964, "learning_rate": 1.924620085855097e-06, "loss": 0.1416, "step": 6645 }, { "epoch": 2.219402237435298, "grad_norm": 0.5239963893912938, "learning_rate": 1.923088062349713e-06, "loss": 0.1574, "step": 6646 }, { "epoch": 2.2197361830021705, "grad_norm": 0.5192341161140694, "learning_rate": 1.9215565036481083e-06, "loss": 0.1606, "step": 6647 }, { "epoch": 2.220070128569043, "grad_norm": 0.520663322092088, "learning_rate": 1.920025409981639e-06, "loss": 0.1663, "step": 6648 }, { "epoch": 2.220404074135916, "grad_norm": 0.5032776512637247, "learning_rate": 1.918494781581599e-06, "loss": 0.158, "step": 6649 }, { "epoch": 2.2207380197027886, "grad_norm": 0.4713101365681985, "learning_rate": 1.9169646186792025e-06, "loss": 0.1467, "step": 6650 }, { "epoch": 2.221071965269661, "grad_norm": 0.46675389053456917, "learning_rate": 1.9154349215056052e-06, "loss": 0.1546, "step": 6651 }, { "epoch": 2.2214059108365336, "grad_norm": 0.5200622300655069, "learning_rate": 1.9139056902918805e-06, "loss": 0.1665, "step": 6652 }, { "epoch": 2.2217398564034063, "grad_norm": 0.48668102810544783, "learning_rate": 1.912376925269041e-06, "loss": 0.1544, "step": 6653 }, { "epoch": 2.222073801970279, "grad_norm": 0.5121790498856402, "learning_rate": 1.910848626668021e-06, "loss": 0.1533, "step": 6654 }, { "epoch": 2.2224077475371513, "grad_norm": 0.4908382616540141, "learning_rate": 1.9093207947196908e-06, "loss": 0.1604, "step": 6655 }, { "epoch": 2.222741693104024, "grad_norm": 0.4999744558438863, "learning_rate": 1.9077934296548445e-06, "loss": 0.1542, "step": 6656 }, { "epoch": 2.2230756386708967, "grad_norm": 0.5256425135871804, "learning_rate": 1.9062665317042106e-06, "loss": 0.1519, "step": 6657 }, { "epoch": 2.2234095842377695, "grad_norm": 0.4849691863222047, "learning_rate": 1.9047401010984456e-06, "loss": 0.1504, "step": 6658 }, { "epoch": 2.2237435298046417, "grad_norm": 0.4791081942937301, "learning_rate": 1.9032141380681329e-06, "loss": 0.1499, "step": 6659 }, { "epoch": 2.2240774753715145, "grad_norm": 0.5202987778444312, "learning_rate": 1.9016886428437893e-06, "loss": 0.1667, "step": 6660 }, { "epoch": 2.224411420938387, "grad_norm": 0.4901000495769656, "learning_rate": 1.9001636156558562e-06, "loss": 0.1565, "step": 6661 }, { "epoch": 2.2247453665052594, "grad_norm": 0.4823202571420205, "learning_rate": 1.8986390567347085e-06, "loss": 0.1529, "step": 6662 }, { "epoch": 2.225079312072132, "grad_norm": 0.5025149193666405, "learning_rate": 1.8971149663106482e-06, "loss": 0.1572, "step": 6663 }, { "epoch": 2.225413257639005, "grad_norm": 0.4810470147277487, "learning_rate": 1.8955913446139096e-06, "loss": 0.1569, "step": 6664 }, { "epoch": 2.2257472032058776, "grad_norm": 0.46343686657363625, "learning_rate": 1.8940681918746495e-06, "loss": 0.1452, "step": 6665 }, { "epoch": 2.22608114877275, "grad_norm": 0.47263048459042295, "learning_rate": 1.8925455083229622e-06, "loss": 0.1542, "step": 6666 }, { "epoch": 2.2264150943396226, "grad_norm": 0.48227650724640997, "learning_rate": 1.891023294188863e-06, "loss": 0.1566, "step": 6667 }, { "epoch": 2.2267490399064953, "grad_norm": 0.5703992870293059, "learning_rate": 1.8895015497023022e-06, "loss": 0.1628, "step": 6668 }, { "epoch": 2.227082985473368, "grad_norm": 0.5171876577597354, "learning_rate": 1.8879802750931574e-06, "loss": 0.1594, "step": 6669 }, { "epoch": 2.2274169310402403, "grad_norm": 0.4918589655419636, "learning_rate": 1.886459470591237e-06, "loss": 0.1492, "step": 6670 }, { "epoch": 2.227750876607113, "grad_norm": 0.535274808727, "learning_rate": 1.8849391364262721e-06, "loss": 0.1616, "step": 6671 }, { "epoch": 2.2280848221739857, "grad_norm": 0.4918788564052449, "learning_rate": 1.883419272827931e-06, "loss": 0.1544, "step": 6672 }, { "epoch": 2.228418767740858, "grad_norm": 0.5305209595696436, "learning_rate": 1.881899880025802e-06, "loss": 0.1537, "step": 6673 }, { "epoch": 2.2287527133077307, "grad_norm": 0.5543131551389201, "learning_rate": 1.8803809582494143e-06, "loss": 0.1598, "step": 6674 }, { "epoch": 2.2290866588746034, "grad_norm": 0.5080012549587938, "learning_rate": 1.878862507728213e-06, "loss": 0.1602, "step": 6675 }, { "epoch": 2.229420604441476, "grad_norm": 0.48004398539465265, "learning_rate": 1.877344528691582e-06, "loss": 0.147, "step": 6676 }, { "epoch": 2.2297545500083484, "grad_norm": 0.5318763956775434, "learning_rate": 1.8758270213688263e-06, "loss": 0.1656, "step": 6677 }, { "epoch": 2.230088495575221, "grad_norm": 0.527960686138271, "learning_rate": 1.8743099859891866e-06, "loss": 0.1584, "step": 6678 }, { "epoch": 2.230422441142094, "grad_norm": 0.4634772222288885, "learning_rate": 1.8727934227818255e-06, "loss": 0.1459, "step": 6679 }, { "epoch": 2.2307563867089666, "grad_norm": 0.5262489510400422, "learning_rate": 1.8712773319758398e-06, "loss": 0.1623, "step": 6680 }, { "epoch": 2.231090332275839, "grad_norm": 0.49489280932867763, "learning_rate": 1.8697617138002545e-06, "loss": 0.1478, "step": 6681 }, { "epoch": 2.2314242778427116, "grad_norm": 0.5031062159733811, "learning_rate": 1.8682465684840178e-06, "loss": 0.1564, "step": 6682 }, { "epoch": 2.2317582234095843, "grad_norm": 0.4953788465486049, "learning_rate": 1.8667318962560137e-06, "loss": 0.1626, "step": 6683 }, { "epoch": 2.232092168976457, "grad_norm": 0.5329151547726576, "learning_rate": 1.865217697345048e-06, "loss": 0.1674, "step": 6684 }, { "epoch": 2.2324261145433293, "grad_norm": 0.5510845744808649, "learning_rate": 1.86370397197986e-06, "loss": 0.1663, "step": 6685 }, { "epoch": 2.232760060110202, "grad_norm": 0.5461167706937765, "learning_rate": 1.8621907203891159e-06, "loss": 0.1704, "step": 6686 }, { "epoch": 2.2330940056770747, "grad_norm": 0.5425557341551985, "learning_rate": 1.8606779428014116e-06, "loss": 0.1778, "step": 6687 }, { "epoch": 2.2334279512439474, "grad_norm": 0.5394947897686756, "learning_rate": 1.8591656394452667e-06, "loss": 0.1649, "step": 6688 }, { "epoch": 2.2337618968108197, "grad_norm": 0.4755957408400034, "learning_rate": 1.8576538105491359e-06, "loss": 0.1519, "step": 6689 }, { "epoch": 2.2340958423776924, "grad_norm": 0.49279914369472505, "learning_rate": 1.8561424563413949e-06, "loss": 0.1529, "step": 6690 }, { "epoch": 2.234429787944565, "grad_norm": 0.495172322606566, "learning_rate": 1.8546315770503537e-06, "loss": 0.153, "step": 6691 }, { "epoch": 2.2347637335114374, "grad_norm": 0.4707511082921486, "learning_rate": 1.8531211729042486e-06, "loss": 0.1505, "step": 6692 }, { "epoch": 2.23509767907831, "grad_norm": 0.5004010993757639, "learning_rate": 1.8516112441312451e-06, "loss": 0.1559, "step": 6693 }, { "epoch": 2.235431624645183, "grad_norm": 0.5388923994203958, "learning_rate": 1.8501017909594327e-06, "loss": 0.1623, "step": 6694 }, { "epoch": 2.2357655702120556, "grad_norm": 0.5725007414524714, "learning_rate": 1.8485928136168353e-06, "loss": 0.1733, "step": 6695 }, { "epoch": 2.236099515778928, "grad_norm": 0.5413884294057945, "learning_rate": 1.8470843123313982e-06, "loss": 0.1526, "step": 6696 }, { "epoch": 2.2364334613458006, "grad_norm": 0.5215501259534671, "learning_rate": 1.8455762873309995e-06, "loss": 0.1599, "step": 6697 }, { "epoch": 2.2367674069126733, "grad_norm": 0.5434730278673744, "learning_rate": 1.844068738843446e-06, "loss": 0.1697, "step": 6698 }, { "epoch": 2.237101352479546, "grad_norm": 0.8447780480576517, "learning_rate": 1.8425616670964702e-06, "loss": 0.1591, "step": 6699 }, { "epoch": 2.2374352980464183, "grad_norm": 0.4889453910112141, "learning_rate": 1.8410550723177306e-06, "loss": 0.148, "step": 6700 }, { "epoch": 2.237769243613291, "grad_norm": 0.5348087835055323, "learning_rate": 1.8395489547348193e-06, "loss": 0.1586, "step": 6701 }, { "epoch": 2.2381031891801637, "grad_norm": 0.509290022703682, "learning_rate": 1.8380433145752502e-06, "loss": 0.159, "step": 6702 }, { "epoch": 2.2384371347470364, "grad_norm": 0.5065409444970693, "learning_rate": 1.8365381520664695e-06, "loss": 0.1551, "step": 6703 }, { "epoch": 2.2387710803139087, "grad_norm": 0.5002223281275832, "learning_rate": 1.8350334674358505e-06, "loss": 0.1524, "step": 6704 }, { "epoch": 2.2391050258807814, "grad_norm": 0.521909205621699, "learning_rate": 1.8335292609106914e-06, "loss": 0.1507, "step": 6705 }, { "epoch": 2.239438971447654, "grad_norm": 0.5051898762790804, "learning_rate": 1.8320255327182224e-06, "loss": 0.1597, "step": 6706 }, { "epoch": 2.239772917014527, "grad_norm": 0.46375293585113725, "learning_rate": 1.8305222830855973e-06, "loss": 0.1426, "step": 6707 }, { "epoch": 2.240106862581399, "grad_norm": 0.45694266168008607, "learning_rate": 1.8290195122399007e-06, "loss": 0.141, "step": 6708 }, { "epoch": 2.240440808148272, "grad_norm": 0.5090966358483844, "learning_rate": 1.8275172204081437e-06, "loss": 0.1543, "step": 6709 }, { "epoch": 2.2407747537151446, "grad_norm": 0.5076709222116111, "learning_rate": 1.826015407817267e-06, "loss": 0.1548, "step": 6710 }, { "epoch": 2.241108699282017, "grad_norm": 0.553645802415818, "learning_rate": 1.8245140746941336e-06, "loss": 0.1673, "step": 6711 }, { "epoch": 2.2414426448488896, "grad_norm": 0.5243245790384291, "learning_rate": 1.823013221265541e-06, "loss": 0.1592, "step": 6712 }, { "epoch": 2.2417765904157623, "grad_norm": 0.5330623020965718, "learning_rate": 1.8215128477582077e-06, "loss": 0.1623, "step": 6713 }, { "epoch": 2.242110535982635, "grad_norm": 0.5257624493735769, "learning_rate": 1.8200129543987843e-06, "loss": 0.1633, "step": 6714 }, { "epoch": 2.2424444815495073, "grad_norm": 0.5021319140625929, "learning_rate": 1.818513541413847e-06, "loss": 0.1648, "step": 6715 }, { "epoch": 2.24277842711638, "grad_norm": 0.5189532423799506, "learning_rate": 1.8170146090299018e-06, "loss": 0.1596, "step": 6716 }, { "epoch": 2.2431123726832527, "grad_norm": 0.5187290309316134, "learning_rate": 1.8155161574733772e-06, "loss": 0.1553, "step": 6717 }, { "epoch": 2.2434463182501254, "grad_norm": 0.5275964325368073, "learning_rate": 1.8140181869706341e-06, "loss": 0.1555, "step": 6718 }, { "epoch": 2.2437802638169977, "grad_norm": 0.5053082731154795, "learning_rate": 1.812520697747956e-06, "loss": 0.1592, "step": 6719 }, { "epoch": 2.2441142093838704, "grad_norm": 0.539300185313967, "learning_rate": 1.8110236900315582e-06, "loss": 0.1582, "step": 6720 }, { "epoch": 2.244448154950743, "grad_norm": 0.5128000481869541, "learning_rate": 1.8095271640475802e-06, "loss": 0.1615, "step": 6721 }, { "epoch": 2.2447821005176154, "grad_norm": 0.5339868466354106, "learning_rate": 1.8080311200220935e-06, "loss": 0.1661, "step": 6722 }, { "epoch": 2.245116046084488, "grad_norm": 0.5083433012779742, "learning_rate": 1.8065355581810878e-06, "loss": 0.1551, "step": 6723 }, { "epoch": 2.245449991651361, "grad_norm": 0.5628766905570722, "learning_rate": 1.80504047875049e-06, "loss": 0.1724, "step": 6724 }, { "epoch": 2.2457839372182336, "grad_norm": 0.5084207068862423, "learning_rate": 1.8035458819561453e-06, "loss": 0.158, "step": 6725 }, { "epoch": 2.246117882785106, "grad_norm": 0.5065025201452854, "learning_rate": 1.8020517680238326e-06, "loss": 0.1598, "step": 6726 }, { "epoch": 2.2464518283519785, "grad_norm": 0.4990795520486874, "learning_rate": 1.8005581371792564e-06, "loss": 0.1525, "step": 6727 }, { "epoch": 2.2467857739188513, "grad_norm": 0.5080788566646063, "learning_rate": 1.799064989648044e-06, "loss": 0.1528, "step": 6728 }, { "epoch": 2.247119719485724, "grad_norm": 0.4974786247986726, "learning_rate": 1.797572325655756e-06, "loss": 0.1557, "step": 6729 }, { "epoch": 2.2474536650525963, "grad_norm": 0.47239598611834804, "learning_rate": 1.7960801454278742e-06, "loss": 0.1466, "step": 6730 }, { "epoch": 2.247787610619469, "grad_norm": 0.4902956696179493, "learning_rate": 1.7945884491898119e-06, "loss": 0.1496, "step": 6731 }, { "epoch": 2.2481215561863417, "grad_norm": 0.4867643676693888, "learning_rate": 1.7930972371669064e-06, "loss": 0.153, "step": 6732 }, { "epoch": 2.2484555017532144, "grad_norm": 0.49740646202701094, "learning_rate": 1.791606509584425e-06, "loss": 0.1581, "step": 6733 }, { "epoch": 2.2487894473200867, "grad_norm": 0.4636440286036032, "learning_rate": 1.7901162666675564e-06, "loss": 0.1425, "step": 6734 }, { "epoch": 2.2491233928869594, "grad_norm": 0.5257749950431144, "learning_rate": 1.7886265086414222e-06, "loss": 0.1714, "step": 6735 }, { "epoch": 2.249457338453832, "grad_norm": 0.5635419552815703, "learning_rate": 1.7871372357310651e-06, "loss": 0.1699, "step": 6736 }, { "epoch": 2.249791284020705, "grad_norm": 0.6170420468842089, "learning_rate": 1.7856484481614605e-06, "loss": 0.1536, "step": 6737 }, { "epoch": 2.250125229587577, "grad_norm": 0.5167276951617383, "learning_rate": 1.784160146157502e-06, "loss": 0.1624, "step": 6738 }, { "epoch": 2.25045917515445, "grad_norm": 0.54841599016874, "learning_rate": 1.7826723299440224e-06, "loss": 0.1693, "step": 6739 }, { "epoch": 2.2507931207213225, "grad_norm": 0.45416893488770504, "learning_rate": 1.7811849997457681e-06, "loss": 0.1435, "step": 6740 }, { "epoch": 2.251127066288195, "grad_norm": 0.5136555475743064, "learning_rate": 1.779698155787422e-06, "loss": 0.1577, "step": 6741 }, { "epoch": 2.2514610118550675, "grad_norm": 0.4719315935157619, "learning_rate": 1.7782117982935854e-06, "loss": 0.1522, "step": 6742 }, { "epoch": 2.2517949574219402, "grad_norm": 0.5301261871177757, "learning_rate": 1.7767259274887937e-06, "loss": 0.163, "step": 6743 }, { "epoch": 2.252128902988813, "grad_norm": 0.5431816850628656, "learning_rate": 1.7752405435975002e-06, "loss": 0.1704, "step": 6744 }, { "epoch": 2.2524628485556852, "grad_norm": 0.49674970018012937, "learning_rate": 1.7737556468440964e-06, "loss": 0.1541, "step": 6745 }, { "epoch": 2.252796794122558, "grad_norm": 0.5211417957528897, "learning_rate": 1.7722712374528877e-06, "loss": 0.1613, "step": 6746 }, { "epoch": 2.2531307396894307, "grad_norm": 0.5097312100588599, "learning_rate": 1.7707873156481158e-06, "loss": 0.1553, "step": 6747 }, { "epoch": 2.2534646852563034, "grad_norm": 0.5066032252735806, "learning_rate": 1.7693038816539416e-06, "loss": 0.1571, "step": 6748 }, { "epoch": 2.2537986308231757, "grad_norm": 0.5454789116485014, "learning_rate": 1.767820935694457e-06, "loss": 0.1717, "step": 6749 }, { "epoch": 2.2541325763900484, "grad_norm": 0.524425253837954, "learning_rate": 1.7663384779936764e-06, "loss": 0.1659, "step": 6750 }, { "epoch": 2.254466521956921, "grad_norm": 0.49825581432779875, "learning_rate": 1.7648565087755442e-06, "loss": 0.1494, "step": 6751 }, { "epoch": 2.2548004675237934, "grad_norm": 0.5309709923110758, "learning_rate": 1.76337502826393e-06, "loss": 0.1589, "step": 6752 }, { "epoch": 2.255134413090666, "grad_norm": 0.5173890025549714, "learning_rate": 1.7618940366826266e-06, "loss": 0.1596, "step": 6753 }, { "epoch": 2.255468358657539, "grad_norm": 0.538202002454078, "learning_rate": 1.7604135342553564e-06, "loss": 0.1688, "step": 6754 }, { "epoch": 2.2558023042244115, "grad_norm": 0.5511066879412111, "learning_rate": 1.7589335212057663e-06, "loss": 0.1716, "step": 6755 }, { "epoch": 2.2561362497912842, "grad_norm": 0.49185964257268533, "learning_rate": 1.7574539977574323e-06, "loss": 0.1553, "step": 6756 }, { "epoch": 2.2564701953581565, "grad_norm": 0.5284689435240054, "learning_rate": 1.7559749641338497e-06, "loss": 0.157, "step": 6757 }, { "epoch": 2.2568041409250292, "grad_norm": 0.5137649551069989, "learning_rate": 1.7544964205584476e-06, "loss": 0.1509, "step": 6758 }, { "epoch": 2.257138086491902, "grad_norm": 0.5155409321355809, "learning_rate": 1.7530183672545743e-06, "loss": 0.1557, "step": 6759 }, { "epoch": 2.2574720320587742, "grad_norm": 0.5312821280939001, "learning_rate": 1.7515408044455102e-06, "loss": 0.1541, "step": 6760 }, { "epoch": 2.257805977625647, "grad_norm": 0.5410274974402473, "learning_rate": 1.7500637323544534e-06, "loss": 0.1625, "step": 6761 }, { "epoch": 2.2581399231925197, "grad_norm": 0.5369298073405431, "learning_rate": 1.74858715120454e-06, "loss": 0.1623, "step": 6762 }, { "epoch": 2.2584738687593924, "grad_norm": 0.5330721160665443, "learning_rate": 1.7471110612188203e-06, "loss": 0.1667, "step": 6763 }, { "epoch": 2.2588078143262647, "grad_norm": 0.5564739677028375, "learning_rate": 1.7456354626202775e-06, "loss": 0.1622, "step": 6764 }, { "epoch": 2.2591417598931374, "grad_norm": 0.5025246524131759, "learning_rate": 1.7441603556318155e-06, "loss": 0.1591, "step": 6765 }, { "epoch": 2.25947570546001, "grad_norm": 0.5102844772368513, "learning_rate": 1.74268574047627e-06, "loss": 0.1605, "step": 6766 }, { "epoch": 2.259809651026883, "grad_norm": 0.528306743042957, "learning_rate": 1.7412116173763931e-06, "loss": 0.1564, "step": 6767 }, { "epoch": 2.260143596593755, "grad_norm": 0.5076408569071325, "learning_rate": 1.7397379865548758e-06, "loss": 0.1603, "step": 6768 }, { "epoch": 2.260477542160628, "grad_norm": 0.5293441364353967, "learning_rate": 1.7382648482343229e-06, "loss": 0.1626, "step": 6769 }, { "epoch": 2.2608114877275005, "grad_norm": 0.5160283033863646, "learning_rate": 1.7367922026372713e-06, "loss": 0.1491, "step": 6770 }, { "epoch": 2.261145433294373, "grad_norm": 0.5243537166726169, "learning_rate": 1.7353200499861794e-06, "loss": 0.1659, "step": 6771 }, { "epoch": 2.2614793788612455, "grad_norm": 0.46787144815206605, "learning_rate": 1.733848390503436e-06, "loss": 0.1473, "step": 6772 }, { "epoch": 2.261813324428118, "grad_norm": 0.5408034010548239, "learning_rate": 1.732377224411349e-06, "loss": 0.1628, "step": 6773 }, { "epoch": 2.262147269994991, "grad_norm": 0.5580667063910909, "learning_rate": 1.7309065519321572e-06, "loss": 0.1675, "step": 6774 }, { "epoch": 2.2624812155618637, "grad_norm": 0.5029465093966058, "learning_rate": 1.729436373288025e-06, "loss": 0.1554, "step": 6775 }, { "epoch": 2.262815161128736, "grad_norm": 0.515217227344929, "learning_rate": 1.7279666887010361e-06, "loss": 0.1618, "step": 6776 }, { "epoch": 2.2631491066956086, "grad_norm": 0.5002295957612295, "learning_rate": 1.726497498393206e-06, "loss": 0.1531, "step": 6777 }, { "epoch": 2.2634830522624814, "grad_norm": 0.5206621476137773, "learning_rate": 1.7250288025864747e-06, "loss": 0.17, "step": 6778 }, { "epoch": 2.2638169978293536, "grad_norm": 0.5521129732067034, "learning_rate": 1.7235606015027029e-06, "loss": 0.1707, "step": 6779 }, { "epoch": 2.2641509433962264, "grad_norm": 0.5102176164588842, "learning_rate": 1.7220928953636812e-06, "loss": 0.1572, "step": 6780 }, { "epoch": 2.264484888963099, "grad_norm": 0.5209070051551632, "learning_rate": 1.7206256843911252e-06, "loss": 0.1541, "step": 6781 }, { "epoch": 2.264818834529972, "grad_norm": 0.48372276583702023, "learning_rate": 1.7191589688066706e-06, "loss": 0.1577, "step": 6782 }, { "epoch": 2.265152780096844, "grad_norm": 0.5580522540630803, "learning_rate": 1.7176927488318868e-06, "loss": 0.1672, "step": 6783 }, { "epoch": 2.265486725663717, "grad_norm": 0.5402796189908685, "learning_rate": 1.7162270246882595e-06, "loss": 0.1607, "step": 6784 }, { "epoch": 2.2658206712305895, "grad_norm": 0.48998808382575204, "learning_rate": 1.7147617965972052e-06, "loss": 0.1558, "step": 6785 }, { "epoch": 2.266154616797462, "grad_norm": 0.4631863279760277, "learning_rate": 1.7132970647800639e-06, "loss": 0.1402, "step": 6786 }, { "epoch": 2.2664885623643345, "grad_norm": 0.5319179193859932, "learning_rate": 1.7118328294581028e-06, "loss": 0.1637, "step": 6787 }, { "epoch": 2.266822507931207, "grad_norm": 0.5057429440108493, "learning_rate": 1.7103690908525072e-06, "loss": 0.1508, "step": 6788 }, { "epoch": 2.26715645349808, "grad_norm": 0.5396050693212004, "learning_rate": 1.7089058491843967e-06, "loss": 0.1644, "step": 6789 }, { "epoch": 2.267490399064952, "grad_norm": 0.5070077732229445, "learning_rate": 1.7074431046748075e-06, "loss": 0.1552, "step": 6790 }, { "epoch": 2.267824344631825, "grad_norm": 0.5506435678104178, "learning_rate": 1.7059808575447057e-06, "loss": 0.1587, "step": 6791 }, { "epoch": 2.2681582901986976, "grad_norm": 0.5180851984442151, "learning_rate": 1.7045191080149815e-06, "loss": 0.1545, "step": 6792 }, { "epoch": 2.2684922357655704, "grad_norm": 0.503753635614517, "learning_rate": 1.7030578563064504e-06, "loss": 0.1575, "step": 6793 }, { "epoch": 2.2688261813324426, "grad_norm": 0.5181212723409958, "learning_rate": 1.7015971026398487e-06, "loss": 0.1563, "step": 6794 }, { "epoch": 2.2691601268993153, "grad_norm": 0.47584629353603225, "learning_rate": 1.7001368472358442e-06, "loss": 0.149, "step": 6795 }, { "epoch": 2.269494072466188, "grad_norm": 0.5001463375155726, "learning_rate": 1.6986770903150213e-06, "loss": 0.1497, "step": 6796 }, { "epoch": 2.269828018033061, "grad_norm": 0.5233528316290307, "learning_rate": 1.697217832097896e-06, "loss": 0.1543, "step": 6797 }, { "epoch": 2.270161963599933, "grad_norm": 0.5574253388658429, "learning_rate": 1.6957590728049078e-06, "loss": 0.1696, "step": 6798 }, { "epoch": 2.2704959091668058, "grad_norm": 0.5284625902947807, "learning_rate": 1.6943008126564164e-06, "loss": 0.1522, "step": 6799 }, { "epoch": 2.2708298547336785, "grad_norm": 0.5126830539761008, "learning_rate": 1.6928430518727102e-06, "loss": 0.159, "step": 6800 }, { "epoch": 2.2711638003005508, "grad_norm": 0.5004642345379817, "learning_rate": 1.6913857906740033e-06, "loss": 0.1561, "step": 6801 }, { "epoch": 2.2714977458674235, "grad_norm": 0.5114750316659729, "learning_rate": 1.6899290292804288e-06, "loss": 0.1614, "step": 6802 }, { "epoch": 2.271831691434296, "grad_norm": 0.5395301012866076, "learning_rate": 1.6884727679120493e-06, "loss": 0.1575, "step": 6803 }, { "epoch": 2.272165637001169, "grad_norm": 0.5343051941291189, "learning_rate": 1.687017006788852e-06, "loss": 0.1649, "step": 6804 }, { "epoch": 2.2724995825680416, "grad_norm": 0.49336411451085405, "learning_rate": 1.6855617461307427e-06, "loss": 0.1535, "step": 6805 }, { "epoch": 2.272833528134914, "grad_norm": 0.5168391221141233, "learning_rate": 1.6841069861575598e-06, "loss": 0.1534, "step": 6806 }, { "epoch": 2.2731674737017866, "grad_norm": 0.49208671517873626, "learning_rate": 1.6826527270890587e-06, "loss": 0.1514, "step": 6807 }, { "epoch": 2.2735014192686593, "grad_norm": 0.4972768325081571, "learning_rate": 1.6811989691449232e-06, "loss": 0.1572, "step": 6808 }, { "epoch": 2.2738353648355316, "grad_norm": 0.48863264662669803, "learning_rate": 1.6797457125447614e-06, "loss": 0.1481, "step": 6809 }, { "epoch": 2.2741693104024043, "grad_norm": 0.5221620981515211, "learning_rate": 1.678292957508106e-06, "loss": 0.1627, "step": 6810 }, { "epoch": 2.274503255969277, "grad_norm": 0.519752893108589, "learning_rate": 1.6768407042544093e-06, "loss": 0.1641, "step": 6811 }, { "epoch": 2.2748372015361498, "grad_norm": 0.5210756938106716, "learning_rate": 1.6753889530030554e-06, "loss": 0.156, "step": 6812 }, { "epoch": 2.275171147103022, "grad_norm": 0.5504910072599424, "learning_rate": 1.673937703973344e-06, "loss": 0.166, "step": 6813 }, { "epoch": 2.2755050926698948, "grad_norm": 0.4910304113541981, "learning_rate": 1.6724869573845054e-06, "loss": 0.1502, "step": 6814 }, { "epoch": 2.2758390382367675, "grad_norm": 0.5560014050143508, "learning_rate": 1.6710367134556926e-06, "loss": 0.1667, "step": 6815 }, { "epoch": 2.27617298380364, "grad_norm": 0.5421536902759155, "learning_rate": 1.6695869724059827e-06, "loss": 0.1513, "step": 6816 }, { "epoch": 2.2765069293705125, "grad_norm": 0.5104732647871095, "learning_rate": 1.6681377344543737e-06, "loss": 0.1632, "step": 6817 }, { "epoch": 2.276840874937385, "grad_norm": 0.5161082704973411, "learning_rate": 1.6666889998197927e-06, "loss": 0.1588, "step": 6818 }, { "epoch": 2.277174820504258, "grad_norm": 0.5211932452386149, "learning_rate": 1.6652407687210853e-06, "loss": 0.1645, "step": 6819 }, { "epoch": 2.27750876607113, "grad_norm": 0.47349743298012514, "learning_rate": 1.6637930413770249e-06, "loss": 0.1438, "step": 6820 }, { "epoch": 2.277842711638003, "grad_norm": 0.5511636785563299, "learning_rate": 1.6623458180063084e-06, "loss": 0.1632, "step": 6821 }, { "epoch": 2.2781766572048756, "grad_norm": 0.5180653642051466, "learning_rate": 1.6608990988275575e-06, "loss": 0.1581, "step": 6822 }, { "epoch": 2.2785106027717483, "grad_norm": 0.48902595955559913, "learning_rate": 1.6594528840593128e-06, "loss": 0.1577, "step": 6823 }, { "epoch": 2.278844548338621, "grad_norm": 0.5283771989671097, "learning_rate": 1.6580071739200448e-06, "loss": 0.1637, "step": 6824 }, { "epoch": 2.2791784939054933, "grad_norm": 0.4798685050156958, "learning_rate": 1.6565619686281425e-06, "loss": 0.1425, "step": 6825 }, { "epoch": 2.279512439472366, "grad_norm": 0.5262706305004466, "learning_rate": 1.6551172684019224e-06, "loss": 0.1572, "step": 6826 }, { "epoch": 2.2798463850392388, "grad_norm": 0.5158131133227103, "learning_rate": 1.6536730734596257e-06, "loss": 0.1578, "step": 6827 }, { "epoch": 2.280180330606111, "grad_norm": 0.5469011331347828, "learning_rate": 1.652229384019411e-06, "loss": 0.171, "step": 6828 }, { "epoch": 2.2805142761729837, "grad_norm": 0.5075499952775651, "learning_rate": 1.650786200299368e-06, "loss": 0.1542, "step": 6829 }, { "epoch": 2.2808482217398565, "grad_norm": 0.5425927659062526, "learning_rate": 1.6493435225175042e-06, "loss": 0.1612, "step": 6830 }, { "epoch": 2.281182167306729, "grad_norm": 0.46335339234518974, "learning_rate": 1.6479013508917552e-06, "loss": 0.1502, "step": 6831 }, { "epoch": 2.2815161128736015, "grad_norm": 0.48052008185076805, "learning_rate": 1.6464596856399734e-06, "loss": 0.1493, "step": 6832 }, { "epoch": 2.281850058440474, "grad_norm": 0.48377398516358, "learning_rate": 1.6450185269799462e-06, "loss": 0.1529, "step": 6833 }, { "epoch": 2.282184004007347, "grad_norm": 0.5249199169347003, "learning_rate": 1.6435778751293723e-06, "loss": 0.1536, "step": 6834 }, { "epoch": 2.2825179495742196, "grad_norm": 0.49911936444525085, "learning_rate": 1.6421377303058829e-06, "loss": 0.1509, "step": 6835 }, { "epoch": 2.282851895141092, "grad_norm": 0.5020572435479131, "learning_rate": 1.640698092727025e-06, "loss": 0.1572, "step": 6836 }, { "epoch": 2.2831858407079646, "grad_norm": 0.5029497729673867, "learning_rate": 1.639258962610275e-06, "loss": 0.1536, "step": 6837 }, { "epoch": 2.2835197862748373, "grad_norm": 0.4852289148568792, "learning_rate": 1.6378203401730303e-06, "loss": 0.163, "step": 6838 }, { "epoch": 2.2838537318417096, "grad_norm": 0.5788851829372151, "learning_rate": 1.6363822256326128e-06, "loss": 0.1653, "step": 6839 }, { "epoch": 2.2841876774085823, "grad_norm": 0.5116983762288065, "learning_rate": 1.6349446192062635e-06, "loss": 0.1602, "step": 6840 }, { "epoch": 2.284521622975455, "grad_norm": 0.5586774089313628, "learning_rate": 1.633507521111154e-06, "loss": 0.1619, "step": 6841 }, { "epoch": 2.2848555685423277, "grad_norm": 0.5331816877090615, "learning_rate": 1.6320709315643708e-06, "loss": 0.1701, "step": 6842 }, { "epoch": 2.2851895141092, "grad_norm": 0.49596637489137746, "learning_rate": 1.6306348507829294e-06, "loss": 0.1521, "step": 6843 }, { "epoch": 2.2855234596760727, "grad_norm": 0.5525180134705676, "learning_rate": 1.6291992789837669e-06, "loss": 0.1614, "step": 6844 }, { "epoch": 2.2858574052429455, "grad_norm": 0.5308161541932059, "learning_rate": 1.6277642163837444e-06, "loss": 0.1607, "step": 6845 }, { "epoch": 2.286191350809818, "grad_norm": 0.49825308016711456, "learning_rate": 1.6263296631996422e-06, "loss": 0.1527, "step": 6846 }, { "epoch": 2.2865252963766904, "grad_norm": 0.4718547788653712, "learning_rate": 1.6248956196481701e-06, "loss": 0.148, "step": 6847 }, { "epoch": 2.286859241943563, "grad_norm": 0.5382050275600502, "learning_rate": 1.6234620859459537e-06, "loss": 0.1613, "step": 6848 }, { "epoch": 2.287193187510436, "grad_norm": 0.49960602289495415, "learning_rate": 1.6220290623095463e-06, "loss": 0.1491, "step": 6849 }, { "epoch": 2.287527133077308, "grad_norm": 0.5156720699510375, "learning_rate": 1.6205965489554248e-06, "loss": 0.1561, "step": 6850 }, { "epoch": 2.287861078644181, "grad_norm": 0.5025226699485877, "learning_rate": 1.619164546099985e-06, "loss": 0.1593, "step": 6851 }, { "epoch": 2.2881950242110536, "grad_norm": 0.5185730184204812, "learning_rate": 1.6177330539595493e-06, "loss": 0.15, "step": 6852 }, { "epoch": 2.2885289697779263, "grad_norm": 0.48338359238850986, "learning_rate": 1.6163020727503592e-06, "loss": 0.1486, "step": 6853 }, { "epoch": 2.288862915344799, "grad_norm": 0.5446527503447174, "learning_rate": 1.6148716026885847e-06, "loss": 0.1715, "step": 6854 }, { "epoch": 2.2891968609116713, "grad_norm": 0.5200794562046593, "learning_rate": 1.61344164399031e-06, "loss": 0.1543, "step": 6855 }, { "epoch": 2.289530806478544, "grad_norm": 0.47904725129674774, "learning_rate": 1.6120121968715535e-06, "loss": 0.1538, "step": 6856 }, { "epoch": 2.2898647520454167, "grad_norm": 0.544473388151401, "learning_rate": 1.6105832615482453e-06, "loss": 0.1712, "step": 6857 }, { "epoch": 2.290198697612289, "grad_norm": 0.4891963514385171, "learning_rate": 1.609154838236246e-06, "loss": 0.15, "step": 6858 }, { "epoch": 2.2905326431791617, "grad_norm": 0.5130192652682918, "learning_rate": 1.6077269271513328e-06, "loss": 0.1616, "step": 6859 }, { "epoch": 2.2908665887460344, "grad_norm": 0.5118177941331495, "learning_rate": 1.606299528509212e-06, "loss": 0.1592, "step": 6860 }, { "epoch": 2.291200534312907, "grad_norm": 0.49636585798317306, "learning_rate": 1.604872642525503e-06, "loss": 0.1486, "step": 6861 }, { "epoch": 2.2915344798797794, "grad_norm": 0.5108213993992322, "learning_rate": 1.6034462694157615e-06, "loss": 0.1589, "step": 6862 }, { "epoch": 2.291868425446652, "grad_norm": 0.5216001516467106, "learning_rate": 1.6020204093954523e-06, "loss": 0.1618, "step": 6863 }, { "epoch": 2.292202371013525, "grad_norm": 0.4612499122336571, "learning_rate": 1.6005950626799716e-06, "loss": 0.1478, "step": 6864 }, { "epoch": 2.2925363165803976, "grad_norm": 0.5496928863070641, "learning_rate": 1.5991702294846318e-06, "loss": 0.169, "step": 6865 }, { "epoch": 2.29287026214727, "grad_norm": 0.519491010757705, "learning_rate": 1.597745910024674e-06, "loss": 0.1535, "step": 6866 }, { "epoch": 2.2932042077141426, "grad_norm": 0.5395166624198875, "learning_rate": 1.5963221045152537e-06, "loss": 0.1679, "step": 6867 }, { "epoch": 2.2935381532810153, "grad_norm": 0.49544413691093875, "learning_rate": 1.5948988131714594e-06, "loss": 0.1502, "step": 6868 }, { "epoch": 2.2938720988478876, "grad_norm": 0.5089430397982071, "learning_rate": 1.593476036208292e-06, "loss": 0.1584, "step": 6869 }, { "epoch": 2.2942060444147603, "grad_norm": 0.4761749830623971, "learning_rate": 1.5920537738406811e-06, "loss": 0.1438, "step": 6870 }, { "epoch": 2.294539989981633, "grad_norm": 0.5128220624654717, "learning_rate": 1.5906320262834735e-06, "loss": 0.1582, "step": 6871 }, { "epoch": 2.2948739355485057, "grad_norm": 0.4769933038550351, "learning_rate": 1.5892107937514424e-06, "loss": 0.1482, "step": 6872 }, { "epoch": 2.2952078811153784, "grad_norm": 0.49727500860357027, "learning_rate": 1.587790076459283e-06, "loss": 0.1569, "step": 6873 }, { "epoch": 2.2955418266822507, "grad_norm": 0.5198335573936426, "learning_rate": 1.5863698746216082e-06, "loss": 0.1573, "step": 6874 }, { "epoch": 2.2958757722491234, "grad_norm": 0.5199506171376683, "learning_rate": 1.58495018845296e-06, "loss": 0.1652, "step": 6875 }, { "epoch": 2.296209717815996, "grad_norm": 0.5041888828958186, "learning_rate": 1.5835310181677954e-06, "loss": 0.1588, "step": 6876 }, { "epoch": 2.2965436633828684, "grad_norm": 0.5595007893689801, "learning_rate": 1.5821123639804992e-06, "loss": 0.1639, "step": 6877 }, { "epoch": 2.296877608949741, "grad_norm": 0.49220921014348185, "learning_rate": 1.5806942261053715e-06, "loss": 0.1582, "step": 6878 }, { "epoch": 2.297211554516614, "grad_norm": 0.5223549427049144, "learning_rate": 1.5792766047566455e-06, "loss": 0.1577, "step": 6879 }, { "epoch": 2.2975455000834866, "grad_norm": 0.4778172847309024, "learning_rate": 1.5778595001484648e-06, "loss": 0.1468, "step": 6880 }, { "epoch": 2.297879445650359, "grad_norm": 0.5503150749267659, "learning_rate": 1.5764429124949022e-06, "loss": 0.1622, "step": 6881 }, { "epoch": 2.2982133912172316, "grad_norm": 0.48813966328418384, "learning_rate": 1.5750268420099468e-06, "loss": 0.1537, "step": 6882 }, { "epoch": 2.2985473367841043, "grad_norm": 0.48710079923452626, "learning_rate": 1.5736112889075167e-06, "loss": 0.1499, "step": 6883 }, { "epoch": 2.298881282350977, "grad_norm": 0.48893154112588416, "learning_rate": 1.5721962534014424e-06, "loss": 0.1566, "step": 6884 }, { "epoch": 2.2992152279178493, "grad_norm": 0.4801053992347205, "learning_rate": 1.5707817357054882e-06, "loss": 0.15, "step": 6885 }, { "epoch": 2.299549173484722, "grad_norm": 0.5580430430657689, "learning_rate": 1.5693677360333293e-06, "loss": 0.1615, "step": 6886 }, { "epoch": 2.2998831190515947, "grad_norm": 0.530284558249108, "learning_rate": 1.56795425459857e-06, "loss": 0.1566, "step": 6887 }, { "epoch": 2.300217064618467, "grad_norm": 0.5378779347275838, "learning_rate": 1.5665412916147298e-06, "loss": 0.1532, "step": 6888 }, { "epoch": 2.3005510101853397, "grad_norm": 0.4847888072829989, "learning_rate": 1.5651288472952564e-06, "loss": 0.1509, "step": 6889 }, { "epoch": 2.3008849557522124, "grad_norm": 0.5658711305785874, "learning_rate": 1.563716921853512e-06, "loss": 0.162, "step": 6890 }, { "epoch": 2.301218901319085, "grad_norm": 0.5439624589912487, "learning_rate": 1.562305515502791e-06, "loss": 0.1656, "step": 6891 }, { "epoch": 2.3015528468859574, "grad_norm": 0.555316715494243, "learning_rate": 1.5608946284562977e-06, "loss": 0.1656, "step": 6892 }, { "epoch": 2.30188679245283, "grad_norm": 0.5347395221649406, "learning_rate": 1.559484260927166e-06, "loss": 0.1592, "step": 6893 }, { "epoch": 2.302220738019703, "grad_norm": 0.557941240584311, "learning_rate": 1.5580744131284464e-06, "loss": 0.1677, "step": 6894 }, { "epoch": 2.3025546835865756, "grad_norm": 0.5276421760511721, "learning_rate": 1.5566650852731151e-06, "loss": 0.1685, "step": 6895 }, { "epoch": 2.302888629153448, "grad_norm": 0.5020349892960838, "learning_rate": 1.5552562775740654e-06, "loss": 0.1594, "step": 6896 }, { "epoch": 2.3032225747203205, "grad_norm": 0.5293814651715417, "learning_rate": 1.5538479902441156e-06, "loss": 0.1678, "step": 6897 }, { "epoch": 2.3035565202871933, "grad_norm": 0.48716428596425015, "learning_rate": 1.5524402234960056e-06, "loss": 0.147, "step": 6898 }, { "epoch": 2.3038904658540655, "grad_norm": 0.48127955691998964, "learning_rate": 1.5510329775423916e-06, "loss": 0.1542, "step": 6899 }, { "epoch": 2.3042244114209383, "grad_norm": 0.49897847992147765, "learning_rate": 1.5496262525958583e-06, "loss": 0.1628, "step": 6900 }, { "epoch": 2.304558356987811, "grad_norm": 0.48569413278480206, "learning_rate": 1.5482200488689054e-06, "loss": 0.1399, "step": 6901 }, { "epoch": 2.3048923025546837, "grad_norm": 0.5315498993933447, "learning_rate": 1.5468143665739565e-06, "loss": 0.1608, "step": 6902 }, { "epoch": 2.3052262481215564, "grad_norm": 0.5276387365311821, "learning_rate": 1.5454092059233583e-06, "loss": 0.1563, "step": 6903 }, { "epoch": 2.3055601936884287, "grad_norm": 0.5473811887845702, "learning_rate": 1.5440045671293774e-06, "loss": 0.1657, "step": 6904 }, { "epoch": 2.3058941392553014, "grad_norm": 0.519692883836073, "learning_rate": 1.542600450404198e-06, "loss": 0.1621, "step": 6905 }, { "epoch": 2.306228084822174, "grad_norm": 0.5274237692456049, "learning_rate": 1.5411968559599317e-06, "loss": 0.1502, "step": 6906 }, { "epoch": 2.3065620303890464, "grad_norm": 0.5165181375891817, "learning_rate": 1.5397937840086048e-06, "loss": 0.167, "step": 6907 }, { "epoch": 2.306895975955919, "grad_norm": 0.5392613904904744, "learning_rate": 1.5383912347621693e-06, "loss": 0.1609, "step": 6908 }, { "epoch": 2.307229921522792, "grad_norm": 0.5352550993399279, "learning_rate": 1.5369892084324972e-06, "loss": 0.157, "step": 6909 }, { "epoch": 2.3075638670896645, "grad_norm": 0.5132398793438658, "learning_rate": 1.5355877052313822e-06, "loss": 0.162, "step": 6910 }, { "epoch": 2.307897812656537, "grad_norm": 0.5059045512819826, "learning_rate": 1.534186725370535e-06, "loss": 0.1569, "step": 6911 }, { "epoch": 2.3082317582234095, "grad_norm": 0.5241489880536726, "learning_rate": 1.532786269061593e-06, "loss": 0.1645, "step": 6912 }, { "epoch": 2.3085657037902823, "grad_norm": 0.5862344966512784, "learning_rate": 1.531386336516107e-06, "loss": 0.1482, "step": 6913 }, { "epoch": 2.308899649357155, "grad_norm": 0.4937850033656664, "learning_rate": 1.52998692794556e-06, "loss": 0.16, "step": 6914 }, { "epoch": 2.3092335949240272, "grad_norm": 0.5025478647760935, "learning_rate": 1.5285880435613438e-06, "loss": 0.1536, "step": 6915 }, { "epoch": 2.3095675404909, "grad_norm": 0.5395976195462507, "learning_rate": 1.5271896835747795e-06, "loss": 0.1602, "step": 6916 }, { "epoch": 2.3099014860577727, "grad_norm": 0.50885180620015, "learning_rate": 1.5257918481971028e-06, "loss": 0.1584, "step": 6917 }, { "epoch": 2.310235431624645, "grad_norm": 0.49907541237904407, "learning_rate": 1.524394537639477e-06, "loss": 0.1468, "step": 6918 }, { "epoch": 2.3105693771915177, "grad_norm": 0.520098742948566, "learning_rate": 1.5229977521129785e-06, "loss": 0.152, "step": 6919 }, { "epoch": 2.3109033227583904, "grad_norm": 0.555034188223172, "learning_rate": 1.5216014918286097e-06, "loss": 0.1728, "step": 6920 }, { "epoch": 2.311237268325263, "grad_norm": 0.5277476198952158, "learning_rate": 1.5202057569972945e-06, "loss": 0.1559, "step": 6921 }, { "epoch": 2.311571213892136, "grad_norm": 0.5082237210587632, "learning_rate": 1.518810547829871e-06, "loss": 0.1546, "step": 6922 }, { "epoch": 2.311905159459008, "grad_norm": 0.5498238792531575, "learning_rate": 1.517415864537105e-06, "loss": 0.1672, "step": 6923 }, { "epoch": 2.312239105025881, "grad_norm": 0.5102413698777625, "learning_rate": 1.516021707329678e-06, "loss": 0.1639, "step": 6924 }, { "epoch": 2.3125730505927535, "grad_norm": 0.49159724301878027, "learning_rate": 1.5146280764181942e-06, "loss": 0.1535, "step": 6925 }, { "epoch": 2.312906996159626, "grad_norm": 0.5166047110747541, "learning_rate": 1.5132349720131783e-06, "loss": 0.1588, "step": 6926 }, { "epoch": 2.3132409417264985, "grad_norm": 0.5055406381133966, "learning_rate": 1.511842394325077e-06, "loss": 0.1483, "step": 6927 }, { "epoch": 2.3135748872933712, "grad_norm": 0.5191101090446806, "learning_rate": 1.5104503435642526e-06, "loss": 0.158, "step": 6928 }, { "epoch": 2.313908832860244, "grad_norm": 0.5014849155315904, "learning_rate": 1.5090588199409927e-06, "loss": 0.159, "step": 6929 }, { "epoch": 2.3142427784271162, "grad_norm": 0.5270760259726192, "learning_rate": 1.5076678236655018e-06, "loss": 0.1663, "step": 6930 }, { "epoch": 2.314576723993989, "grad_norm": 0.5265525170252408, "learning_rate": 1.5062773549479064e-06, "loss": 0.1674, "step": 6931 }, { "epoch": 2.3149106695608617, "grad_norm": 0.4754623491282869, "learning_rate": 1.504887413998254e-06, "loss": 0.1517, "step": 6932 }, { "epoch": 2.3152446151277344, "grad_norm": 0.4902506423363311, "learning_rate": 1.5034980010265127e-06, "loss": 0.1553, "step": 6933 }, { "epoch": 2.3155785606946067, "grad_norm": 0.5575953129911351, "learning_rate": 1.5021091162425672e-06, "loss": 0.1666, "step": 6934 }, { "epoch": 2.3159125062614794, "grad_norm": 0.5099824545247369, "learning_rate": 1.5007207598562268e-06, "loss": 0.1579, "step": 6935 }, { "epoch": 2.316246451828352, "grad_norm": 0.539874027836098, "learning_rate": 1.4993329320772177e-06, "loss": 0.1662, "step": 6936 }, { "epoch": 2.3165803973952244, "grad_norm": 0.5295612734416608, "learning_rate": 1.4979456331151875e-06, "loss": 0.1597, "step": 6937 }, { "epoch": 2.316914342962097, "grad_norm": 0.5894263333785696, "learning_rate": 1.4965588631797052e-06, "loss": 0.159, "step": 6938 }, { "epoch": 2.31724828852897, "grad_norm": 0.4741039633929316, "learning_rate": 1.4951726224802593e-06, "loss": 0.1494, "step": 6939 }, { "epoch": 2.3175822340958425, "grad_norm": 0.4862856197350301, "learning_rate": 1.493786911226256e-06, "loss": 0.1569, "step": 6940 }, { "epoch": 2.317916179662715, "grad_norm": 0.5529752111878695, "learning_rate": 1.492401729627025e-06, "loss": 0.1608, "step": 6941 }, { "epoch": 2.3182501252295875, "grad_norm": 0.5507021790387356, "learning_rate": 1.491017077891812e-06, "loss": 0.168, "step": 6942 }, { "epoch": 2.3185840707964602, "grad_norm": 0.5125603052330294, "learning_rate": 1.4896329562297863e-06, "loss": 0.1602, "step": 6943 }, { "epoch": 2.318918016363333, "grad_norm": 0.5257969566496764, "learning_rate": 1.4882493648500373e-06, "loss": 0.1576, "step": 6944 }, { "epoch": 2.319251961930205, "grad_norm": 0.5381499105664779, "learning_rate": 1.48686630396157e-06, "loss": 0.1683, "step": 6945 }, { "epoch": 2.319585907497078, "grad_norm": 0.5422363917617385, "learning_rate": 1.4854837737733147e-06, "loss": 0.1679, "step": 6946 }, { "epoch": 2.3199198530639507, "grad_norm": 0.5641296285837942, "learning_rate": 1.484101774494116e-06, "loss": 0.1722, "step": 6947 }, { "epoch": 2.320253798630823, "grad_norm": 0.5253053775187845, "learning_rate": 1.4827203063327427e-06, "loss": 0.1608, "step": 6948 }, { "epoch": 2.3205877441976956, "grad_norm": 0.4501898716795521, "learning_rate": 1.4813393694978812e-06, "loss": 0.1527, "step": 6949 }, { "epoch": 2.3209216897645684, "grad_norm": 0.4766070843342987, "learning_rate": 1.479958964198141e-06, "loss": 0.1473, "step": 6950 }, { "epoch": 2.321255635331441, "grad_norm": 0.5543755055251879, "learning_rate": 1.4785790906420445e-06, "loss": 0.1729, "step": 6951 }, { "epoch": 2.321589580898314, "grad_norm": 0.47207588022242153, "learning_rate": 1.4771997490380414e-06, "loss": 0.1449, "step": 6952 }, { "epoch": 2.321923526465186, "grad_norm": 0.5489402475099499, "learning_rate": 1.4758209395944945e-06, "loss": 0.1666, "step": 6953 }, { "epoch": 2.322257472032059, "grad_norm": 0.4807129427731627, "learning_rate": 1.47444266251969e-06, "loss": 0.1518, "step": 6954 }, { "epoch": 2.3225914175989315, "grad_norm": 0.554604022659153, "learning_rate": 1.4730649180218337e-06, "loss": 0.1757, "step": 6955 }, { "epoch": 2.322925363165804, "grad_norm": 0.5348203406510965, "learning_rate": 1.4716877063090517e-06, "loss": 0.1606, "step": 6956 }, { "epoch": 2.3232593087326765, "grad_norm": 0.525792610223351, "learning_rate": 1.4703110275893846e-06, "loss": 0.1645, "step": 6957 }, { "epoch": 2.323593254299549, "grad_norm": 0.5191996867761393, "learning_rate": 1.4689348820707988e-06, "loss": 0.1601, "step": 6958 }, { "epoch": 2.323927199866422, "grad_norm": 0.5021670260891927, "learning_rate": 1.4675592699611741e-06, "loss": 0.157, "step": 6959 }, { "epoch": 2.324261145433294, "grad_norm": 0.5189366470501476, "learning_rate": 1.4661841914683156e-06, "loss": 0.1561, "step": 6960 }, { "epoch": 2.324595091000167, "grad_norm": 0.5367046655871626, "learning_rate": 1.464809646799944e-06, "loss": 0.1705, "step": 6961 }, { "epoch": 2.3249290365670396, "grad_norm": 0.46937388491530546, "learning_rate": 1.463435636163702e-06, "loss": 0.1535, "step": 6962 }, { "epoch": 2.3252629821339124, "grad_norm": 0.482195068948941, "learning_rate": 1.4620621597671476e-06, "loss": 0.1498, "step": 6963 }, { "epoch": 2.3255969277007846, "grad_norm": 0.4782325808602372, "learning_rate": 1.4606892178177633e-06, "loss": 0.1492, "step": 6964 }, { "epoch": 2.3259308732676574, "grad_norm": 0.49949896473890126, "learning_rate": 1.459316810522945e-06, "loss": 0.1541, "step": 6965 }, { "epoch": 2.32626481883453, "grad_norm": 0.5514134284700918, "learning_rate": 1.457944938090013e-06, "loss": 0.1627, "step": 6966 }, { "epoch": 2.3265987644014023, "grad_norm": 0.5122505150543959, "learning_rate": 1.456573600726206e-06, "loss": 0.1582, "step": 6967 }, { "epoch": 2.326932709968275, "grad_norm": 0.5155388868093768, "learning_rate": 1.4552027986386775e-06, "loss": 0.1605, "step": 6968 }, { "epoch": 2.3272666555351478, "grad_norm": 0.5319645910926085, "learning_rate": 1.453832532034506e-06, "loss": 0.1672, "step": 6969 }, { "epoch": 2.3276006011020205, "grad_norm": 0.5212564670307227, "learning_rate": 1.4524628011206843e-06, "loss": 0.1606, "step": 6970 }, { "epoch": 2.327934546668893, "grad_norm": 0.5197062994619243, "learning_rate": 1.4510936061041269e-06, "loss": 0.1586, "step": 6971 }, { "epoch": 2.3282684922357655, "grad_norm": 0.5793340057434153, "learning_rate": 1.449724947191668e-06, "loss": 0.1625, "step": 6972 }, { "epoch": 2.328602437802638, "grad_norm": 0.5080639803068403, "learning_rate": 1.4483568245900597e-06, "loss": 0.1554, "step": 6973 }, { "epoch": 2.328936383369511, "grad_norm": 0.5268549200084457, "learning_rate": 1.4469892385059713e-06, "loss": 0.1655, "step": 6974 }, { "epoch": 2.329270328936383, "grad_norm": 0.4921015168951849, "learning_rate": 1.4456221891459953e-06, "loss": 0.1559, "step": 6975 }, { "epoch": 2.329604274503256, "grad_norm": 0.507333813102178, "learning_rate": 1.4442556767166371e-06, "loss": 0.1478, "step": 6976 }, { "epoch": 2.3299382200701286, "grad_norm": 0.4805039087114265, "learning_rate": 1.4428897014243288e-06, "loss": 0.143, "step": 6977 }, { "epoch": 2.3302721656370013, "grad_norm": 0.525864995462506, "learning_rate": 1.4415242634754107e-06, "loss": 0.1564, "step": 6978 }, { "epoch": 2.3306061112038736, "grad_norm": 0.48453778085656507, "learning_rate": 1.4401593630761562e-06, "loss": 0.153, "step": 6979 }, { "epoch": 2.3309400567707463, "grad_norm": 0.5560825048911766, "learning_rate": 1.4387950004327434e-06, "loss": 0.1696, "step": 6980 }, { "epoch": 2.331274002337619, "grad_norm": 0.4999034443705413, "learning_rate": 1.4374311757512798e-06, "loss": 0.1489, "step": 6981 }, { "epoch": 2.3316079479044918, "grad_norm": 0.48674712053155916, "learning_rate": 1.4360678892377833e-06, "loss": 0.1558, "step": 6982 }, { "epoch": 2.331941893471364, "grad_norm": 0.5376662163779152, "learning_rate": 1.434705141098197e-06, "loss": 0.1618, "step": 6983 }, { "epoch": 2.3322758390382368, "grad_norm": 0.49634944316675383, "learning_rate": 1.4333429315383768e-06, "loss": 0.1536, "step": 6984 }, { "epoch": 2.3326097846051095, "grad_norm": 0.5261178930730273, "learning_rate": 1.4319812607641055e-06, "loss": 0.1491, "step": 6985 }, { "epoch": 2.3329437301719818, "grad_norm": 0.4925740465169854, "learning_rate": 1.4306201289810756e-06, "loss": 0.1542, "step": 6986 }, { "epoch": 2.3332776757388545, "grad_norm": 0.5190410270947525, "learning_rate": 1.4292595363949047e-06, "loss": 0.1499, "step": 6987 }, { "epoch": 2.333611621305727, "grad_norm": 0.5062847873051785, "learning_rate": 1.4278994832111232e-06, "loss": 0.1559, "step": 6988 }, { "epoch": 2.3339455668726, "grad_norm": 0.5152470669811914, "learning_rate": 1.4265399696351867e-06, "loss": 0.1543, "step": 6989 }, { "epoch": 2.334279512439472, "grad_norm": 0.5110787108439315, "learning_rate": 1.4251809958724623e-06, "loss": 0.1607, "step": 6990 }, { "epoch": 2.334613458006345, "grad_norm": 0.4988509374702272, "learning_rate": 1.4238225621282403e-06, "loss": 0.1588, "step": 6991 }, { "epoch": 2.3349474035732176, "grad_norm": 0.5005557600957211, "learning_rate": 1.4224646686077303e-06, "loss": 0.1569, "step": 6992 }, { "epoch": 2.3352813491400903, "grad_norm": 0.48763438079425747, "learning_rate": 1.4211073155160544e-06, "loss": 0.158, "step": 6993 }, { "epoch": 2.3356152947069626, "grad_norm": 0.5314112983450026, "learning_rate": 1.4197505030582588e-06, "loss": 0.164, "step": 6994 }, { "epoch": 2.3359492402738353, "grad_norm": 0.502506767280615, "learning_rate": 1.4183942314393056e-06, "loss": 0.1553, "step": 6995 }, { "epoch": 2.336283185840708, "grad_norm": 0.514455411162069, "learning_rate": 1.4170385008640774e-06, "loss": 0.1561, "step": 6996 }, { "epoch": 2.3366171314075803, "grad_norm": 0.5256261869279392, "learning_rate": 1.4156833115373702e-06, "loss": 0.1522, "step": 6997 }, { "epoch": 2.336951076974453, "grad_norm": 0.5419599369164668, "learning_rate": 1.4143286636639043e-06, "loss": 0.1648, "step": 6998 }, { "epoch": 2.3372850225413258, "grad_norm": 0.4543607056514028, "learning_rate": 1.4129745574483123e-06, "loss": 0.1443, "step": 6999 }, { "epoch": 2.3376189681081985, "grad_norm": 0.5119302019907201, "learning_rate": 1.4116209930951508e-06, "loss": 0.1589, "step": 7000 }, { "epoch": 2.337952913675071, "grad_norm": 0.5143893370915871, "learning_rate": 1.4102679708088867e-06, "loss": 0.1617, "step": 7001 }, { "epoch": 2.3382868592419435, "grad_norm": 0.4951445956972631, "learning_rate": 1.4089154907939162e-06, "loss": 0.1552, "step": 7002 }, { "epoch": 2.338620804808816, "grad_norm": 0.4944981712221224, "learning_rate": 1.4075635532545435e-06, "loss": 0.1471, "step": 7003 }, { "epoch": 2.338954750375689, "grad_norm": 0.5217606879242871, "learning_rate": 1.4062121583949967e-06, "loss": 0.1527, "step": 7004 }, { "epoch": 2.339288695942561, "grad_norm": 0.5372747665208512, "learning_rate": 1.4048613064194178e-06, "loss": 0.1639, "step": 7005 }, { "epoch": 2.339622641509434, "grad_norm": 0.5084540072164366, "learning_rate": 1.4035109975318712e-06, "loss": 0.1522, "step": 7006 }, { "epoch": 2.3399565870763066, "grad_norm": 0.4915624674536933, "learning_rate": 1.4021612319363326e-06, "loss": 0.1475, "step": 7007 }, { "epoch": 2.3402905326431793, "grad_norm": 0.5141788309732952, "learning_rate": 1.4008120098367062e-06, "loss": 0.1611, "step": 7008 }, { "epoch": 2.3406244782100516, "grad_norm": 0.5340670488732328, "learning_rate": 1.3994633314368034e-06, "loss": 0.1635, "step": 7009 }, { "epoch": 2.3409584237769243, "grad_norm": 0.4839970244154489, "learning_rate": 1.3981151969403606e-06, "loss": 0.1458, "step": 7010 }, { "epoch": 2.341292369343797, "grad_norm": 0.4786078131814078, "learning_rate": 1.3967676065510266e-06, "loss": 0.145, "step": 7011 }, { "epoch": 2.3416263149106697, "grad_norm": 0.47065456298226127, "learning_rate": 1.3954205604723742e-06, "loss": 0.1455, "step": 7012 }, { "epoch": 2.341960260477542, "grad_norm": 0.550572446756861, "learning_rate": 1.3940740589078872e-06, "loss": 0.1606, "step": 7013 }, { "epoch": 2.3422942060444147, "grad_norm": 0.5654689482348924, "learning_rate": 1.3927281020609712e-06, "loss": 0.172, "step": 7014 }, { "epoch": 2.3426281516112875, "grad_norm": 0.5893082288878831, "learning_rate": 1.391382690134952e-06, "loss": 0.1689, "step": 7015 }, { "epoch": 2.3429620971781597, "grad_norm": 0.532586165834147, "learning_rate": 1.3900378233330658e-06, "loss": 0.1589, "step": 7016 }, { "epoch": 2.3432960427450324, "grad_norm": 0.5434759431816953, "learning_rate": 1.3886935018584719e-06, "loss": 0.1561, "step": 7017 }, { "epoch": 2.343629988311905, "grad_norm": 0.4986753275861255, "learning_rate": 1.3873497259142483e-06, "loss": 0.1576, "step": 7018 }, { "epoch": 2.343963933878778, "grad_norm": 0.4850699716446126, "learning_rate": 1.3860064957033847e-06, "loss": 0.1565, "step": 7019 }, { "epoch": 2.3442978794456506, "grad_norm": 0.4626717680688309, "learning_rate": 1.384663811428793e-06, "loss": 0.1403, "step": 7020 }, { "epoch": 2.344631825012523, "grad_norm": 0.5748882353001997, "learning_rate": 1.3833216732933035e-06, "loss": 0.1677, "step": 7021 }, { "epoch": 2.3449657705793956, "grad_norm": 0.4886924233962931, "learning_rate": 1.3819800814996587e-06, "loss": 0.1476, "step": 7022 }, { "epoch": 2.3452997161462683, "grad_norm": 0.5189394050352707, "learning_rate": 1.3806390362505251e-06, "loss": 0.1625, "step": 7023 }, { "epoch": 2.3456336617131406, "grad_norm": 0.4929703711875753, "learning_rate": 1.3792985377484796e-06, "loss": 0.153, "step": 7024 }, { "epoch": 2.3459676072800133, "grad_norm": 0.5074258924655396, "learning_rate": 1.3779585861960226e-06, "loss": 0.1479, "step": 7025 }, { "epoch": 2.346301552846886, "grad_norm": 0.5344322232369463, "learning_rate": 1.3766191817955699e-06, "loss": 0.1666, "step": 7026 }, { "epoch": 2.3466354984137587, "grad_norm": 0.5450383787251549, "learning_rate": 1.3752803247494545e-06, "loss": 0.1697, "step": 7027 }, { "epoch": 2.346969443980631, "grad_norm": 0.5356447852167889, "learning_rate": 1.3739420152599247e-06, "loss": 0.1625, "step": 7028 }, { "epoch": 2.3473033895475037, "grad_norm": 0.49739392807914606, "learning_rate": 1.37260425352915e-06, "loss": 0.1442, "step": 7029 }, { "epoch": 2.3476373351143764, "grad_norm": 0.4677708828080009, "learning_rate": 1.3712670397592127e-06, "loss": 0.1455, "step": 7030 }, { "epoch": 2.347971280681249, "grad_norm": 0.531422867823814, "learning_rate": 1.3699303741521158e-06, "loss": 0.1503, "step": 7031 }, { "epoch": 2.3483052262481214, "grad_norm": 0.5219314248338266, "learning_rate": 1.3685942569097793e-06, "loss": 0.1666, "step": 7032 }, { "epoch": 2.348639171814994, "grad_norm": 0.5289721391712382, "learning_rate": 1.3672586882340393e-06, "loss": 0.1646, "step": 7033 }, { "epoch": 2.348973117381867, "grad_norm": 0.4755425996576312, "learning_rate": 1.3659236683266475e-06, "loss": 0.1514, "step": 7034 }, { "epoch": 2.349307062948739, "grad_norm": 0.49561200390500754, "learning_rate": 1.3645891973892772e-06, "loss": 0.1493, "step": 7035 }, { "epoch": 2.349641008515612, "grad_norm": 0.5361377733253501, "learning_rate": 1.3632552756235124e-06, "loss": 0.1635, "step": 7036 }, { "epoch": 2.3499749540824846, "grad_norm": 0.5245007107369157, "learning_rate": 1.3619219032308594e-06, "loss": 0.1621, "step": 7037 }, { "epoch": 2.3503088996493573, "grad_norm": 0.5233641249374307, "learning_rate": 1.3605890804127415e-06, "loss": 0.1567, "step": 7038 }, { "epoch": 2.3506428452162296, "grad_norm": 0.529716471906781, "learning_rate": 1.3592568073704943e-06, "loss": 0.1567, "step": 7039 }, { "epoch": 2.3509767907831023, "grad_norm": 0.5282219003779071, "learning_rate": 1.3579250843053747e-06, "loss": 0.1522, "step": 7040 }, { "epoch": 2.351310736349975, "grad_norm": 0.5234598148920355, "learning_rate": 1.3565939114185568e-06, "loss": 0.1503, "step": 7041 }, { "epoch": 2.3516446819168477, "grad_norm": 0.6006249908501967, "learning_rate": 1.3552632889111266e-06, "loss": 0.1674, "step": 7042 }, { "epoch": 2.35197862748372, "grad_norm": 0.5012523499275392, "learning_rate": 1.3539332169840918e-06, "loss": 0.1537, "step": 7043 }, { "epoch": 2.3523125730505927, "grad_norm": 0.47883312000552264, "learning_rate": 1.3526036958383777e-06, "loss": 0.1444, "step": 7044 }, { "epoch": 2.3526465186174654, "grad_norm": 0.5046804575270472, "learning_rate": 1.35127472567482e-06, "loss": 0.1516, "step": 7045 }, { "epoch": 2.3529804641843377, "grad_norm": 0.4807311309572237, "learning_rate": 1.3499463066941787e-06, "loss": 0.1466, "step": 7046 }, { "epoch": 2.3533144097512104, "grad_norm": 0.5251926780869718, "learning_rate": 1.3486184390971246e-06, "loss": 0.1545, "step": 7047 }, { "epoch": 2.353648355318083, "grad_norm": 0.5442022137332256, "learning_rate": 1.347291123084249e-06, "loss": 0.1689, "step": 7048 }, { "epoch": 2.353982300884956, "grad_norm": 0.5188350766445398, "learning_rate": 1.3459643588560583e-06, "loss": 0.1597, "step": 7049 }, { "epoch": 2.3543162464518286, "grad_norm": 0.47155341824463987, "learning_rate": 1.3446381466129777e-06, "loss": 0.1477, "step": 7050 }, { "epoch": 2.354650192018701, "grad_norm": 0.5474209459200585, "learning_rate": 1.3433124865553437e-06, "loss": 0.1697, "step": 7051 }, { "epoch": 2.3549841375855736, "grad_norm": 0.5271510936720203, "learning_rate": 1.3419873788834164e-06, "loss": 0.1657, "step": 7052 }, { "epoch": 2.3553180831524463, "grad_norm": 0.5277962735942314, "learning_rate": 1.3406628237973662e-06, "loss": 0.1608, "step": 7053 }, { "epoch": 2.3556520287193186, "grad_norm": 0.5556972174352737, "learning_rate": 1.339338821497283e-06, "loss": 0.1625, "step": 7054 }, { "epoch": 2.3559859742861913, "grad_norm": 0.5248699594279046, "learning_rate": 1.3380153721831745e-06, "loss": 0.1619, "step": 7055 }, { "epoch": 2.356319919853064, "grad_norm": 0.505305439890515, "learning_rate": 1.3366924760549632e-06, "loss": 0.1605, "step": 7056 }, { "epoch": 2.3566538654199367, "grad_norm": 0.509624361874994, "learning_rate": 1.3353701333124863e-06, "loss": 0.1558, "step": 7057 }, { "epoch": 2.356987810986809, "grad_norm": 0.5139996192629374, "learning_rate": 1.3340483441555024e-06, "loss": 0.1569, "step": 7058 }, { "epoch": 2.3573217565536817, "grad_norm": 0.50576323330744, "learning_rate": 1.3327271087836792e-06, "loss": 0.1528, "step": 7059 }, { "epoch": 2.3576557021205544, "grad_norm": 0.4936700282126162, "learning_rate": 1.331406427396607e-06, "loss": 0.1513, "step": 7060 }, { "epoch": 2.357989647687427, "grad_norm": 0.5475069928862222, "learning_rate": 1.3300863001937902e-06, "loss": 0.1588, "step": 7061 }, { "epoch": 2.3583235932542994, "grad_norm": 0.5259688821837174, "learning_rate": 1.3287667273746513e-06, "loss": 0.1646, "step": 7062 }, { "epoch": 2.358657538821172, "grad_norm": 0.6091072760868147, "learning_rate": 1.3274477091385241e-06, "loss": 0.1632, "step": 7063 }, { "epoch": 2.358991484388045, "grad_norm": 0.49664916653441177, "learning_rate": 1.3261292456846648e-06, "loss": 0.1483, "step": 7064 }, { "epoch": 2.359325429954917, "grad_norm": 0.5428777150582343, "learning_rate": 1.3248113372122395e-06, "loss": 0.1596, "step": 7065 }, { "epoch": 2.35965937552179, "grad_norm": 0.5426577923297033, "learning_rate": 1.3234939839203358e-06, "loss": 0.1602, "step": 7066 }, { "epoch": 2.3599933210886626, "grad_norm": 0.5188785999223405, "learning_rate": 1.3221771860079569e-06, "loss": 0.1591, "step": 7067 }, { "epoch": 2.3603272666555353, "grad_norm": 0.5086994931226321, "learning_rate": 1.3208609436740178e-06, "loss": 0.1522, "step": 7068 }, { "epoch": 2.360661212222408, "grad_norm": 0.47362864053642506, "learning_rate": 1.3195452571173551e-06, "loss": 0.1467, "step": 7069 }, { "epoch": 2.3609951577892803, "grad_norm": 0.5264254283456309, "learning_rate": 1.3182301265367154e-06, "loss": 0.1555, "step": 7070 }, { "epoch": 2.361329103356153, "grad_norm": 0.4742773192417172, "learning_rate": 1.3169155521307664e-06, "loss": 0.1433, "step": 7071 }, { "epoch": 2.3616630489230257, "grad_norm": 0.5145066895595479, "learning_rate": 1.3156015340980904e-06, "loss": 0.1574, "step": 7072 }, { "epoch": 2.361996994489898, "grad_norm": 0.5604203763870581, "learning_rate": 1.3142880726371865e-06, "loss": 0.1657, "step": 7073 }, { "epoch": 2.3623309400567707, "grad_norm": 0.53274867372277, "learning_rate": 1.312975167946466e-06, "loss": 0.1718, "step": 7074 }, { "epoch": 2.3626648856236434, "grad_norm": 0.53957363798226, "learning_rate": 1.3116628202242603e-06, "loss": 0.1594, "step": 7075 }, { "epoch": 2.362998831190516, "grad_norm": 0.5006375891688938, "learning_rate": 1.3103510296688137e-06, "loss": 0.1598, "step": 7076 }, { "epoch": 2.3633327767573884, "grad_norm": 0.48159573025330465, "learning_rate": 1.309039796478288e-06, "loss": 0.1495, "step": 7077 }, { "epoch": 2.363666722324261, "grad_norm": 0.5301067970927092, "learning_rate": 1.307729120850761e-06, "loss": 0.1578, "step": 7078 }, { "epoch": 2.364000667891134, "grad_norm": 0.5357552915413548, "learning_rate": 1.306419002984226e-06, "loss": 0.1566, "step": 7079 }, { "epoch": 2.3643346134580066, "grad_norm": 0.5482638058241193, "learning_rate": 1.3051094430765905e-06, "loss": 0.1611, "step": 7080 }, { "epoch": 2.364668559024879, "grad_norm": 0.5096740195373038, "learning_rate": 1.3038004413256805e-06, "loss": 0.1604, "step": 7081 }, { "epoch": 2.3650025045917515, "grad_norm": 0.470098411066193, "learning_rate": 1.3024919979292338e-06, "loss": 0.1474, "step": 7082 }, { "epoch": 2.3653364501586243, "grad_norm": 0.538932422734086, "learning_rate": 1.3011841130849079e-06, "loss": 0.1491, "step": 7083 }, { "epoch": 2.3656703957254965, "grad_norm": 0.48937676072754, "learning_rate": 1.2998767869902733e-06, "loss": 0.159, "step": 7084 }, { "epoch": 2.3660043412923693, "grad_norm": 0.49651132883192295, "learning_rate": 1.2985700198428197e-06, "loss": 0.1492, "step": 7085 }, { "epoch": 2.366338286859242, "grad_norm": 0.5336066250058544, "learning_rate": 1.2972638118399456e-06, "loss": 0.1509, "step": 7086 }, { "epoch": 2.3666722324261147, "grad_norm": 0.5022650378372203, "learning_rate": 1.2959581631789725e-06, "loss": 0.1521, "step": 7087 }, { "epoch": 2.367006177992987, "grad_norm": 0.5256152756288829, "learning_rate": 1.2946530740571316e-06, "loss": 0.1561, "step": 7088 }, { "epoch": 2.3673401235598597, "grad_norm": 0.5106806485581995, "learning_rate": 1.293348544671572e-06, "loss": 0.1584, "step": 7089 }, { "epoch": 2.3676740691267324, "grad_norm": 0.4815349175488414, "learning_rate": 1.2920445752193617e-06, "loss": 0.1494, "step": 7090 }, { "epoch": 2.368008014693605, "grad_norm": 0.49516867886375865, "learning_rate": 1.2907411658974756e-06, "loss": 0.1536, "step": 7091 }, { "epoch": 2.3683419602604774, "grad_norm": 0.5133737571155627, "learning_rate": 1.2894383169028134e-06, "loss": 0.1514, "step": 7092 }, { "epoch": 2.36867590582735, "grad_norm": 0.5047295137569292, "learning_rate": 1.2881360284321825e-06, "loss": 0.1537, "step": 7093 }, { "epoch": 2.369009851394223, "grad_norm": 0.5233980435478595, "learning_rate": 1.2868343006823113e-06, "loss": 0.1617, "step": 7094 }, { "epoch": 2.369343796961095, "grad_norm": 0.47283027887814555, "learning_rate": 1.2855331338498377e-06, "loss": 0.1528, "step": 7095 }, { "epoch": 2.369677742527968, "grad_norm": 0.5649361822819848, "learning_rate": 1.2842325281313233e-06, "loss": 0.1699, "step": 7096 }, { "epoch": 2.3700116880948405, "grad_norm": 0.5228406368655593, "learning_rate": 1.282932483723236e-06, "loss": 0.1571, "step": 7097 }, { "epoch": 2.3703456336617132, "grad_norm": 0.528258600013772, "learning_rate": 1.2816330008219656e-06, "loss": 0.1673, "step": 7098 }, { "epoch": 2.370679579228586, "grad_norm": 0.5126074736580588, "learning_rate": 1.280334079623811e-06, "loss": 0.1586, "step": 7099 }, { "epoch": 2.3710135247954582, "grad_norm": 0.5209165501480114, "learning_rate": 1.2790357203249931e-06, "loss": 0.1612, "step": 7100 }, { "epoch": 2.371347470362331, "grad_norm": 0.5650474597014316, "learning_rate": 1.2777379231216391e-06, "loss": 0.1616, "step": 7101 }, { "epoch": 2.3716814159292037, "grad_norm": 0.5315493028470518, "learning_rate": 1.2764406882098035e-06, "loss": 0.1576, "step": 7102 }, { "epoch": 2.372015361496076, "grad_norm": 0.511469997004763, "learning_rate": 1.2751440157854439e-06, "loss": 0.1546, "step": 7103 }, { "epoch": 2.3723493070629487, "grad_norm": 0.5397007074652154, "learning_rate": 1.2738479060444408e-06, "loss": 0.1623, "step": 7104 }, { "epoch": 2.3726832526298214, "grad_norm": 0.47300263395113906, "learning_rate": 1.2725523591825845e-06, "loss": 0.1443, "step": 7105 }, { "epoch": 2.373017198196694, "grad_norm": 0.49067687314657127, "learning_rate": 1.2712573753955842e-06, "loss": 0.1523, "step": 7106 }, { "epoch": 2.3733511437635664, "grad_norm": 0.5354579285385755, "learning_rate": 1.2699629548790599e-06, "loss": 0.1634, "step": 7107 }, { "epoch": 2.373685089330439, "grad_norm": 0.5125476972638018, "learning_rate": 1.2686690978285533e-06, "loss": 0.1618, "step": 7108 }, { "epoch": 2.374019034897312, "grad_norm": 0.5010154309189855, "learning_rate": 1.267375804439513e-06, "loss": 0.1514, "step": 7109 }, { "epoch": 2.3743529804641845, "grad_norm": 0.4951422731618649, "learning_rate": 1.2660830749073093e-06, "loss": 0.1505, "step": 7110 }, { "epoch": 2.374686926031057, "grad_norm": 0.5662049963465066, "learning_rate": 1.2647909094272215e-06, "loss": 0.1686, "step": 7111 }, { "epoch": 2.3750208715979295, "grad_norm": 0.496822761224137, "learning_rate": 1.2634993081944469e-06, "loss": 0.15, "step": 7112 }, { "epoch": 2.3753548171648022, "grad_norm": 0.48684393051910746, "learning_rate": 1.2622082714040995e-06, "loss": 0.1555, "step": 7113 }, { "epoch": 2.3756887627316745, "grad_norm": 0.51894267837415, "learning_rate": 1.2609177992512022e-06, "loss": 0.1644, "step": 7114 }, { "epoch": 2.3760227082985472, "grad_norm": 0.5292677495026286, "learning_rate": 1.2596278919306993e-06, "loss": 0.1585, "step": 7115 }, { "epoch": 2.37635665386542, "grad_norm": 0.48845207606014346, "learning_rate": 1.2583385496374428e-06, "loss": 0.152, "step": 7116 }, { "epoch": 2.3766905994322927, "grad_norm": 0.5379775111433409, "learning_rate": 1.2570497725662067e-06, "loss": 0.1645, "step": 7117 }, { "epoch": 2.3770245449991654, "grad_norm": 0.5055506542870268, "learning_rate": 1.2557615609116713e-06, "loss": 0.1555, "step": 7118 }, { "epoch": 2.3773584905660377, "grad_norm": 0.4733144294957525, "learning_rate": 1.254473914868442e-06, "loss": 0.1484, "step": 7119 }, { "epoch": 2.3776924361329104, "grad_norm": 0.5299212758598031, "learning_rate": 1.2531868346310288e-06, "loss": 0.16, "step": 7120 }, { "epoch": 2.378026381699783, "grad_norm": 0.5284052191730892, "learning_rate": 1.2519003203938628e-06, "loss": 0.1529, "step": 7121 }, { "epoch": 2.3783603272666554, "grad_norm": 0.5238906543650232, "learning_rate": 1.2506143723512842e-06, "loss": 0.1557, "step": 7122 }, { "epoch": 2.378694272833528, "grad_norm": 0.5335370952252194, "learning_rate": 1.2493289906975543e-06, "loss": 0.1533, "step": 7123 }, { "epoch": 2.379028218400401, "grad_norm": 0.5786503237493286, "learning_rate": 1.2480441756268397e-06, "loss": 0.1715, "step": 7124 }, { "epoch": 2.3793621639672735, "grad_norm": 0.4843155928013435, "learning_rate": 1.2467599273332332e-06, "loss": 0.156, "step": 7125 }, { "epoch": 2.379696109534146, "grad_norm": 0.5227159067590956, "learning_rate": 1.245476246010731e-06, "loss": 0.1642, "step": 7126 }, { "epoch": 2.3800300551010185, "grad_norm": 0.4944621882634703, "learning_rate": 1.244193131853252e-06, "loss": 0.1475, "step": 7127 }, { "epoch": 2.3803640006678912, "grad_norm": 0.5358983175601851, "learning_rate": 1.2429105850546213e-06, "loss": 0.1548, "step": 7128 }, { "epoch": 2.380697946234764, "grad_norm": 0.5032449469880587, "learning_rate": 1.241628605808587e-06, "loss": 0.1589, "step": 7129 }, { "epoch": 2.381031891801636, "grad_norm": 0.5243288444558761, "learning_rate": 1.2403471943088018e-06, "loss": 0.1556, "step": 7130 }, { "epoch": 2.381365837368509, "grad_norm": 0.5599001289068225, "learning_rate": 1.239066350748845e-06, "loss": 0.1747, "step": 7131 }, { "epoch": 2.3816997829353816, "grad_norm": 0.5533477090829723, "learning_rate": 1.2377860753221976e-06, "loss": 0.1721, "step": 7132 }, { "epoch": 2.382033728502254, "grad_norm": 0.551804825022015, "learning_rate": 1.236506368222264e-06, "loss": 0.1633, "step": 7133 }, { "epoch": 2.3823676740691266, "grad_norm": 0.5217476920386144, "learning_rate": 1.235227229642355e-06, "loss": 0.156, "step": 7134 }, { "epoch": 2.3827016196359994, "grad_norm": 0.5338330593096421, "learning_rate": 1.2339486597757038e-06, "loss": 0.1665, "step": 7135 }, { "epoch": 2.383035565202872, "grad_norm": 0.5231766549666796, "learning_rate": 1.2326706588154496e-06, "loss": 0.1622, "step": 7136 }, { "epoch": 2.3833695107697443, "grad_norm": 0.5193547058090123, "learning_rate": 1.2313932269546518e-06, "loss": 0.1553, "step": 7137 }, { "epoch": 2.383703456336617, "grad_norm": 0.5268208854231688, "learning_rate": 1.2301163643862817e-06, "loss": 0.1584, "step": 7138 }, { "epoch": 2.38403740190349, "grad_norm": 0.509012014482607, "learning_rate": 1.2288400713032227e-06, "loss": 0.1524, "step": 7139 }, { "epoch": 2.3843713474703625, "grad_norm": 0.5645944044633189, "learning_rate": 1.2275643478982762e-06, "loss": 0.1684, "step": 7140 }, { "epoch": 2.3847052930372348, "grad_norm": 0.5293226597996646, "learning_rate": 1.2262891943641526e-06, "loss": 0.1536, "step": 7141 }, { "epoch": 2.3850392386041075, "grad_norm": 0.567921584560172, "learning_rate": 1.2250146108934802e-06, "loss": 0.1659, "step": 7142 }, { "epoch": 2.38537318417098, "grad_norm": 0.5097541913627487, "learning_rate": 1.2237405976787997e-06, "loss": 0.1544, "step": 7143 }, { "epoch": 2.3857071297378525, "grad_norm": 0.5253260119018623, "learning_rate": 1.2224671549125673e-06, "loss": 0.1627, "step": 7144 }, { "epoch": 2.386041075304725, "grad_norm": 0.5148031038721778, "learning_rate": 1.2211942827871486e-06, "loss": 0.1573, "step": 7145 }, { "epoch": 2.386375020871598, "grad_norm": 0.515311953727318, "learning_rate": 1.2199219814948294e-06, "loss": 0.154, "step": 7146 }, { "epoch": 2.3867089664384706, "grad_norm": 0.5236022358304645, "learning_rate": 1.218650251227802e-06, "loss": 0.1574, "step": 7147 }, { "epoch": 2.3870429120053434, "grad_norm": 0.5614360461378991, "learning_rate": 1.2173790921781786e-06, "loss": 0.1605, "step": 7148 }, { "epoch": 2.3873768575722156, "grad_norm": 0.5241197638766266, "learning_rate": 1.2161085045379818e-06, "loss": 0.155, "step": 7149 }, { "epoch": 2.3877108031390883, "grad_norm": 0.5220473602267451, "learning_rate": 1.214838488499151e-06, "loss": 0.1545, "step": 7150 }, { "epoch": 2.388044748705961, "grad_norm": 0.5446184370250129, "learning_rate": 1.2135690442535335e-06, "loss": 0.1576, "step": 7151 }, { "epoch": 2.3883786942728333, "grad_norm": 0.5369404959958738, "learning_rate": 1.2123001719928972e-06, "loss": 0.1645, "step": 7152 }, { "epoch": 2.388712639839706, "grad_norm": 0.5412541896274068, "learning_rate": 1.211031871908916e-06, "loss": 0.1587, "step": 7153 }, { "epoch": 2.3890465854065788, "grad_norm": 0.5880386251671798, "learning_rate": 1.2097641441931868e-06, "loss": 0.1785, "step": 7154 }, { "epoch": 2.3893805309734515, "grad_norm": 0.5251849578213053, "learning_rate": 1.2084969890372111e-06, "loss": 0.1567, "step": 7155 }, { "epoch": 2.3897144765403238, "grad_norm": 0.5583506662971707, "learning_rate": 1.2072304066324103e-06, "loss": 0.1698, "step": 7156 }, { "epoch": 2.3900484221071965, "grad_norm": 0.5335369655122735, "learning_rate": 1.205964397170113e-06, "loss": 0.1592, "step": 7157 }, { "epoch": 2.390382367674069, "grad_norm": 0.5418466727615724, "learning_rate": 1.2046989608415682e-06, "loss": 0.1598, "step": 7158 }, { "epoch": 2.390716313240942, "grad_norm": 0.4969750469808351, "learning_rate": 1.2034340978379328e-06, "loss": 0.1478, "step": 7159 }, { "epoch": 2.391050258807814, "grad_norm": 0.6032598631872019, "learning_rate": 1.2021698083502797e-06, "loss": 0.172, "step": 7160 }, { "epoch": 2.391384204374687, "grad_norm": 0.5109947419002258, "learning_rate": 1.2009060925695965e-06, "loss": 0.146, "step": 7161 }, { "epoch": 2.3917181499415596, "grad_norm": 0.5009788689546751, "learning_rate": 1.1996429506867797e-06, "loss": 0.1501, "step": 7162 }, { "epoch": 2.392052095508432, "grad_norm": 0.5335564451937221, "learning_rate": 1.1983803828926438e-06, "loss": 0.1633, "step": 7163 }, { "epoch": 2.3923860410753046, "grad_norm": 0.5471286776914387, "learning_rate": 1.1971183893779125e-06, "loss": 0.1637, "step": 7164 }, { "epoch": 2.3927199866421773, "grad_norm": 0.5296431778636541, "learning_rate": 1.1958569703332262e-06, "loss": 0.1612, "step": 7165 }, { "epoch": 2.39305393220905, "grad_norm": 0.5313318060463695, "learning_rate": 1.1945961259491368e-06, "loss": 0.1544, "step": 7166 }, { "epoch": 2.3933878777759228, "grad_norm": 0.5573335599867503, "learning_rate": 1.1933358564161108e-06, "loss": 0.1561, "step": 7167 }, { "epoch": 2.393721823342795, "grad_norm": 0.5278835623914241, "learning_rate": 1.1920761619245246e-06, "loss": 0.1661, "step": 7168 }, { "epoch": 2.3940557689096678, "grad_norm": 0.5286543182285065, "learning_rate": 1.1908170426646726e-06, "loss": 0.1576, "step": 7169 }, { "epoch": 2.3943897144765405, "grad_norm": 0.4797109888500716, "learning_rate": 1.189558498826756e-06, "loss": 0.1383, "step": 7170 }, { "epoch": 2.3947236600434127, "grad_norm": 0.5215268741066937, "learning_rate": 1.1883005306008955e-06, "loss": 0.1531, "step": 7171 }, { "epoch": 2.3950576056102855, "grad_norm": 0.4951612997717505, "learning_rate": 1.1870431381771203e-06, "loss": 0.152, "step": 7172 }, { "epoch": 2.395391551177158, "grad_norm": 0.5105466271334883, "learning_rate": 1.185786321745377e-06, "loss": 0.1489, "step": 7173 }, { "epoch": 2.395725496744031, "grad_norm": 0.557204326375292, "learning_rate": 1.1845300814955192e-06, "loss": 0.1576, "step": 7174 }, { "epoch": 2.396059442310903, "grad_norm": 0.5630209033895921, "learning_rate": 1.18327441761732e-06, "loss": 0.1585, "step": 7175 }, { "epoch": 2.396393387877776, "grad_norm": 0.5298445211408632, "learning_rate": 1.1820193303004584e-06, "loss": 0.1627, "step": 7176 }, { "epoch": 2.3967273334446486, "grad_norm": 0.46722816889909324, "learning_rate": 1.1807648197345327e-06, "loss": 0.1376, "step": 7177 }, { "epoch": 2.3970612790115213, "grad_norm": 0.5325098603893473, "learning_rate": 1.1795108861090515e-06, "loss": 0.1551, "step": 7178 }, { "epoch": 2.3973952245783936, "grad_norm": 0.5301349354335696, "learning_rate": 1.1782575296134363e-06, "loss": 0.1519, "step": 7179 }, { "epoch": 2.3977291701452663, "grad_norm": 0.5145757838651948, "learning_rate": 1.1770047504370197e-06, "loss": 0.1538, "step": 7180 }, { "epoch": 2.398063115712139, "grad_norm": 0.5300739981678517, "learning_rate": 1.1757525487690513e-06, "loss": 0.1541, "step": 7181 }, { "epoch": 2.3983970612790113, "grad_norm": 0.5601765735049783, "learning_rate": 1.1745009247986882e-06, "loss": 0.1669, "step": 7182 }, { "epoch": 2.398731006845884, "grad_norm": 0.5090285498231324, "learning_rate": 1.1732498787150044e-06, "loss": 0.1448, "step": 7183 }, { "epoch": 2.3990649524127567, "grad_norm": 0.5003809889361199, "learning_rate": 1.171999410706986e-06, "loss": 0.1451, "step": 7184 }, { "epoch": 2.3993988979796295, "grad_norm": 0.5417432938953705, "learning_rate": 1.1707495209635283e-06, "loss": 0.1557, "step": 7185 }, { "epoch": 2.3997328435465017, "grad_norm": 0.5604087075100392, "learning_rate": 1.1695002096734454e-06, "loss": 0.1657, "step": 7186 }, { "epoch": 2.4000667891133745, "grad_norm": 0.5283839371353753, "learning_rate": 1.1682514770254567e-06, "loss": 0.1552, "step": 7187 }, { "epoch": 2.400400734680247, "grad_norm": 0.5167426973502384, "learning_rate": 1.1670033232081995e-06, "loss": 0.1596, "step": 7188 }, { "epoch": 2.40073468024712, "grad_norm": 0.4966963801218271, "learning_rate": 1.1657557484102228e-06, "loss": 0.1551, "step": 7189 }, { "epoch": 2.401068625813992, "grad_norm": 0.5768210088793992, "learning_rate": 1.1645087528199883e-06, "loss": 0.1669, "step": 7190 }, { "epoch": 2.401402571380865, "grad_norm": 0.5000691629042625, "learning_rate": 1.1632623366258666e-06, "loss": 0.1472, "step": 7191 }, { "epoch": 2.4017365169477376, "grad_norm": 0.5345663390184785, "learning_rate": 1.162016500016147e-06, "loss": 0.1532, "step": 7192 }, { "epoch": 2.40207046251461, "grad_norm": 0.5230805429006006, "learning_rate": 1.1607712431790242e-06, "loss": 0.1613, "step": 7193 }, { "epoch": 2.4024044080814826, "grad_norm": 0.47784139221190725, "learning_rate": 1.15952656630261e-06, "loss": 0.1378, "step": 7194 }, { "epoch": 2.4027383536483553, "grad_norm": 0.5413001588245746, "learning_rate": 1.158282469574929e-06, "loss": 0.16, "step": 7195 }, { "epoch": 2.403072299215228, "grad_norm": 0.5172176939927573, "learning_rate": 1.1570389531839165e-06, "loss": 0.1449, "step": 7196 }, { "epoch": 2.4034062447821007, "grad_norm": 0.5126788675251615, "learning_rate": 1.1557960173174183e-06, "loss": 0.1583, "step": 7197 }, { "epoch": 2.403740190348973, "grad_norm": 0.5071441617182192, "learning_rate": 1.154553662163197e-06, "loss": 0.1485, "step": 7198 }, { "epoch": 2.4040741359158457, "grad_norm": 0.5710809229470581, "learning_rate": 1.1533118879089227e-06, "loss": 0.1681, "step": 7199 }, { "epoch": 2.4044080814827185, "grad_norm": 0.5293626105834608, "learning_rate": 1.1520706947421806e-06, "loss": 0.149, "step": 7200 }, { "epoch": 2.4047420270495907, "grad_norm": 0.5452941336660879, "learning_rate": 1.1508300828504682e-06, "loss": 0.1697, "step": 7201 }, { "epoch": 2.4050759726164634, "grad_norm": 0.5166114823495399, "learning_rate": 1.1495900524211955e-06, "loss": 0.1532, "step": 7202 }, { "epoch": 2.405409918183336, "grad_norm": 0.5471123464355846, "learning_rate": 1.1483506036416814e-06, "loss": 0.1664, "step": 7203 }, { "epoch": 2.405743863750209, "grad_norm": 0.5141430169803471, "learning_rate": 1.1471117366991613e-06, "loss": 0.1555, "step": 7204 }, { "epoch": 2.406077809317081, "grad_norm": 0.46243845794224187, "learning_rate": 1.1458734517807785e-06, "loss": 0.1418, "step": 7205 }, { "epoch": 2.406411754883954, "grad_norm": 0.5162303747719997, "learning_rate": 1.1446357490735921e-06, "loss": 0.1535, "step": 7206 }, { "epoch": 2.4067457004508266, "grad_norm": 0.48927363828819215, "learning_rate": 1.143398628764572e-06, "loss": 0.1525, "step": 7207 }, { "epoch": 2.4070796460176993, "grad_norm": 0.5390800009438081, "learning_rate": 1.1421620910405977e-06, "loss": 0.1674, "step": 7208 }, { "epoch": 2.4074135915845716, "grad_norm": 0.4994447846500062, "learning_rate": 1.1409261360884661e-06, "loss": 0.1584, "step": 7209 }, { "epoch": 2.4077475371514443, "grad_norm": 0.5197712049684485, "learning_rate": 1.1396907640948785e-06, "loss": 0.1478, "step": 7210 }, { "epoch": 2.408081482718317, "grad_norm": 0.5135963024209774, "learning_rate": 1.1384559752464553e-06, "loss": 0.1488, "step": 7211 }, { "epoch": 2.4084154282851893, "grad_norm": 0.531386611449885, "learning_rate": 1.137221769729725e-06, "loss": 0.153, "step": 7212 }, { "epoch": 2.408749373852062, "grad_norm": 0.5578052539668118, "learning_rate": 1.1359881477311301e-06, "loss": 0.1591, "step": 7213 }, { "epoch": 2.4090833194189347, "grad_norm": 0.5529075972559461, "learning_rate": 1.1347551094370224e-06, "loss": 0.1704, "step": 7214 }, { "epoch": 2.4094172649858074, "grad_norm": 0.5441030100011386, "learning_rate": 1.1335226550336676e-06, "loss": 0.1587, "step": 7215 }, { "epoch": 2.40975121055268, "grad_norm": 0.485355416601119, "learning_rate": 1.1322907847072411e-06, "loss": 0.1419, "step": 7216 }, { "epoch": 2.4100851561195524, "grad_norm": 0.5148035517585307, "learning_rate": 1.1310594986438339e-06, "loss": 0.1503, "step": 7217 }, { "epoch": 2.410419101686425, "grad_norm": 0.5021996340564928, "learning_rate": 1.129828797029442e-06, "loss": 0.1545, "step": 7218 }, { "epoch": 2.410753047253298, "grad_norm": 0.5582216534507548, "learning_rate": 1.128598680049982e-06, "loss": 0.1584, "step": 7219 }, { "epoch": 2.41108699282017, "grad_norm": 0.5244566063420066, "learning_rate": 1.1273691478912752e-06, "loss": 0.1464, "step": 7220 }, { "epoch": 2.411420938387043, "grad_norm": 0.5195593318131434, "learning_rate": 1.1261402007390587e-06, "loss": 0.1592, "step": 7221 }, { "epoch": 2.4117548839539156, "grad_norm": 0.5493008998470231, "learning_rate": 1.1249118387789764e-06, "loss": 0.1627, "step": 7222 }, { "epoch": 2.4120888295207883, "grad_norm": 0.525214705071319, "learning_rate": 1.12368406219659e-06, "loss": 0.1606, "step": 7223 }, { "epoch": 2.4124227750876606, "grad_norm": 0.4939289053178912, "learning_rate": 1.1224568711773653e-06, "loss": 0.151, "step": 7224 }, { "epoch": 2.4127567206545333, "grad_norm": 0.48455079432812664, "learning_rate": 1.1212302659066898e-06, "loss": 0.1501, "step": 7225 }, { "epoch": 2.413090666221406, "grad_norm": 0.5156418088336056, "learning_rate": 1.1200042465698518e-06, "loss": 0.151, "step": 7226 }, { "epoch": 2.4134246117882787, "grad_norm": 0.5148384016761979, "learning_rate": 1.1187788133520594e-06, "loss": 0.1642, "step": 7227 }, { "epoch": 2.413758557355151, "grad_norm": 0.5138997748746464, "learning_rate": 1.1175539664384261e-06, "loss": 0.1587, "step": 7228 }, { "epoch": 2.4140925029220237, "grad_norm": 0.519066569925353, "learning_rate": 1.1163297060139815e-06, "loss": 0.1533, "step": 7229 }, { "epoch": 2.4144264484888964, "grad_norm": 0.49248902007645573, "learning_rate": 1.1151060322636625e-06, "loss": 0.1506, "step": 7230 }, { "epoch": 2.4147603940557687, "grad_norm": 0.5312541778053379, "learning_rate": 1.1138829453723204e-06, "loss": 0.1593, "step": 7231 }, { "epoch": 2.4150943396226414, "grad_norm": 0.5018480303953734, "learning_rate": 1.112660445524718e-06, "loss": 0.1461, "step": 7232 }, { "epoch": 2.415428285189514, "grad_norm": 0.5226338743612154, "learning_rate": 1.1114385329055262e-06, "loss": 0.1608, "step": 7233 }, { "epoch": 2.415762230756387, "grad_norm": 0.5635333501894039, "learning_rate": 1.1102172076993301e-06, "loss": 0.1633, "step": 7234 }, { "epoch": 2.416096176323259, "grad_norm": 0.5602180181468333, "learning_rate": 1.1089964700906257e-06, "loss": 0.1655, "step": 7235 }, { "epoch": 2.416430121890132, "grad_norm": 0.5756321012974656, "learning_rate": 1.1077763202638208e-06, "loss": 0.1613, "step": 7236 }, { "epoch": 2.4167640674570046, "grad_norm": 0.5029427815181746, "learning_rate": 1.106556758403231e-06, "loss": 0.1515, "step": 7237 }, { "epoch": 2.4170980130238773, "grad_norm": 0.5262956548357789, "learning_rate": 1.105337784693088e-06, "loss": 0.1634, "step": 7238 }, { "epoch": 2.4174319585907496, "grad_norm": 0.5626068139042897, "learning_rate": 1.1041193993175293e-06, "loss": 0.1579, "step": 7239 }, { "epoch": 2.4177659041576223, "grad_norm": 0.5082947467016112, "learning_rate": 1.1029016024606093e-06, "loss": 0.1532, "step": 7240 }, { "epoch": 2.418099849724495, "grad_norm": 0.5297964029071952, "learning_rate": 1.101684394306286e-06, "loss": 0.1582, "step": 7241 }, { "epoch": 2.4184337952913673, "grad_norm": 0.5296061030264333, "learning_rate": 1.100467775038439e-06, "loss": 0.1555, "step": 7242 }, { "epoch": 2.41876774085824, "grad_norm": 0.5951302915676688, "learning_rate": 1.099251744840849e-06, "loss": 0.1609, "step": 7243 }, { "epoch": 2.4191016864251127, "grad_norm": 0.5448858703349814, "learning_rate": 1.0980363038972141e-06, "loss": 0.1512, "step": 7244 }, { "epoch": 2.4194356319919854, "grad_norm": 0.5240412587293782, "learning_rate": 1.096821452391138e-06, "loss": 0.1472, "step": 7245 }, { "epoch": 2.419769577558858, "grad_norm": 0.5340001181165048, "learning_rate": 1.0956071905061415e-06, "loss": 0.1524, "step": 7246 }, { "epoch": 2.4201035231257304, "grad_norm": 0.5621964258233278, "learning_rate": 1.0943935184256487e-06, "loss": 0.162, "step": 7247 }, { "epoch": 2.420437468692603, "grad_norm": 0.49430836108102416, "learning_rate": 1.093180436333005e-06, "loss": 0.1499, "step": 7248 }, { "epoch": 2.420771414259476, "grad_norm": 0.49277358371660024, "learning_rate": 1.091967944411456e-06, "loss": 0.1503, "step": 7249 }, { "epoch": 2.421105359826348, "grad_norm": 0.5566152601905626, "learning_rate": 1.0907560428441666e-06, "loss": 0.163, "step": 7250 }, { "epoch": 2.421439305393221, "grad_norm": 0.5354411967166895, "learning_rate": 1.0895447318142043e-06, "loss": 0.1592, "step": 7251 }, { "epoch": 2.4217732509600935, "grad_norm": 0.5178968638492157, "learning_rate": 1.0883340115045566e-06, "loss": 0.1555, "step": 7252 }, { "epoch": 2.4221071965269663, "grad_norm": 0.5064556197403924, "learning_rate": 1.0871238820981133e-06, "loss": 0.1553, "step": 7253 }, { "epoch": 2.4224411420938385, "grad_norm": 0.513639948844901, "learning_rate": 1.0859143437776803e-06, "loss": 0.1611, "step": 7254 }, { "epoch": 2.4227750876607113, "grad_norm": 0.539271199077283, "learning_rate": 1.0847053967259736e-06, "loss": 0.1529, "step": 7255 }, { "epoch": 2.423109033227584, "grad_norm": 0.52303917750466, "learning_rate": 1.0834970411256167e-06, "loss": 0.1611, "step": 7256 }, { "epoch": 2.4234429787944567, "grad_norm": 0.5294641295234923, "learning_rate": 1.082289277159147e-06, "loss": 0.1598, "step": 7257 }, { "epoch": 2.423776924361329, "grad_norm": 0.550231135188987, "learning_rate": 1.0810821050090132e-06, "loss": 0.1721, "step": 7258 }, { "epoch": 2.4241108699282017, "grad_norm": 0.6018154425280414, "learning_rate": 1.0798755248575694e-06, "loss": 0.1592, "step": 7259 }, { "epoch": 2.4244448154950744, "grad_norm": 0.5102466501385734, "learning_rate": 1.078669536887086e-06, "loss": 0.1615, "step": 7260 }, { "epoch": 2.4247787610619467, "grad_norm": 0.5042309590141728, "learning_rate": 1.077464141279742e-06, "loss": 0.1518, "step": 7261 }, { "epoch": 2.4251127066288194, "grad_norm": 0.5388065888923106, "learning_rate": 1.0762593382176244e-06, "loss": 0.1687, "step": 7262 }, { "epoch": 2.425446652195692, "grad_norm": 0.48205881440780235, "learning_rate": 1.0750551278827365e-06, "loss": 0.1533, "step": 7263 }, { "epoch": 2.425780597762565, "grad_norm": 0.5594387794673621, "learning_rate": 1.073851510456984e-06, "loss": 0.1672, "step": 7264 }, { "epoch": 2.4261145433294375, "grad_norm": 0.4905063271692703, "learning_rate": 1.0726484861221902e-06, "loss": 0.149, "step": 7265 }, { "epoch": 2.42644848889631, "grad_norm": 0.5205108609752576, "learning_rate": 1.0714460550600859e-06, "loss": 0.1485, "step": 7266 }, { "epoch": 2.4267824344631825, "grad_norm": 0.5231560925068052, "learning_rate": 1.0702442174523132e-06, "loss": 0.1552, "step": 7267 }, { "epoch": 2.4271163800300553, "grad_norm": 0.4769453332959195, "learning_rate": 1.0690429734804214e-06, "loss": 0.1434, "step": 7268 }, { "epoch": 2.4274503255969275, "grad_norm": 0.5312419128658573, "learning_rate": 1.0678423233258755e-06, "loss": 0.1514, "step": 7269 }, { "epoch": 2.4277842711638002, "grad_norm": 0.5457120778252545, "learning_rate": 1.0666422671700438e-06, "loss": 0.1529, "step": 7270 }, { "epoch": 2.428118216730673, "grad_norm": 0.5443040289592945, "learning_rate": 1.065442805194214e-06, "loss": 0.1643, "step": 7271 }, { "epoch": 2.4284521622975457, "grad_norm": 0.5274373283039757, "learning_rate": 1.0642439375795748e-06, "loss": 0.1488, "step": 7272 }, { "epoch": 2.428786107864418, "grad_norm": 0.5390642593642723, "learning_rate": 1.0630456645072324e-06, "loss": 0.1586, "step": 7273 }, { "epoch": 2.4291200534312907, "grad_norm": 0.5089151199607201, "learning_rate": 1.0618479861581971e-06, "loss": 0.1537, "step": 7274 }, { "epoch": 2.4294539989981634, "grad_norm": 0.5258576186084203, "learning_rate": 1.060650902713395e-06, "loss": 0.1556, "step": 7275 }, { "epoch": 2.429787944565036, "grad_norm": 0.5113060552599415, "learning_rate": 1.0594544143536572e-06, "loss": 0.1517, "step": 7276 }, { "epoch": 2.4301218901319084, "grad_norm": 0.5072388702252578, "learning_rate": 1.0582585212597286e-06, "loss": 0.1499, "step": 7277 }, { "epoch": 2.430455835698781, "grad_norm": 0.5557065639821541, "learning_rate": 1.0570632236122641e-06, "loss": 0.1621, "step": 7278 }, { "epoch": 2.430789781265654, "grad_norm": 0.4741104501250161, "learning_rate": 1.0558685215918246e-06, "loss": 0.139, "step": 7279 }, { "epoch": 2.431123726832526, "grad_norm": 0.5313610251633167, "learning_rate": 1.0546744153788858e-06, "loss": 0.1554, "step": 7280 }, { "epoch": 2.431457672399399, "grad_norm": 0.5144511403714003, "learning_rate": 1.0534809051538324e-06, "loss": 0.1471, "step": 7281 }, { "epoch": 2.4317916179662715, "grad_norm": 0.5905335390702079, "learning_rate": 1.0522879910969563e-06, "loss": 0.1742, "step": 7282 }, { "epoch": 2.4321255635331442, "grad_norm": 0.49916027008308816, "learning_rate": 1.0510956733884614e-06, "loss": 0.1487, "step": 7283 }, { "epoch": 2.4324595091000165, "grad_norm": 0.5277947791001935, "learning_rate": 1.0499039522084637e-06, "loss": 0.1534, "step": 7284 }, { "epoch": 2.4327934546668892, "grad_norm": 0.5430657315225573, "learning_rate": 1.0487128277369829e-06, "loss": 0.1649, "step": 7285 }, { "epoch": 2.433127400233762, "grad_norm": 0.539630403103098, "learning_rate": 1.0475223001539564e-06, "loss": 0.1602, "step": 7286 }, { "epoch": 2.4334613458006347, "grad_norm": 0.5264937727044785, "learning_rate": 1.0463323696392236e-06, "loss": 0.1561, "step": 7287 }, { "epoch": 2.433795291367507, "grad_norm": 0.47187721488243567, "learning_rate": 1.0451430363725395e-06, "loss": 0.1526, "step": 7288 }, { "epoch": 2.4341292369343797, "grad_norm": 0.532862403776023, "learning_rate": 1.043954300533566e-06, "loss": 0.1615, "step": 7289 }, { "epoch": 2.4344631825012524, "grad_norm": 0.540835650588637, "learning_rate": 1.0427661623018786e-06, "loss": 0.1648, "step": 7290 }, { "epoch": 2.4347971280681246, "grad_norm": 0.5395899185409637, "learning_rate": 1.0415786218569557e-06, "loss": 0.1655, "step": 7291 }, { "epoch": 2.4351310736349974, "grad_norm": 0.5003448121019961, "learning_rate": 1.0403916793781922e-06, "loss": 0.1498, "step": 7292 }, { "epoch": 2.43546501920187, "grad_norm": 0.4713027523101309, "learning_rate": 1.0392053350448867e-06, "loss": 0.14, "step": 7293 }, { "epoch": 2.435798964768743, "grad_norm": 0.49901626838835894, "learning_rate": 1.0380195890362527e-06, "loss": 0.1591, "step": 7294 }, { "epoch": 2.4361329103356155, "grad_norm": 0.4962709060760146, "learning_rate": 1.0368344415314101e-06, "loss": 0.1559, "step": 7295 }, { "epoch": 2.436466855902488, "grad_norm": 0.5031119480742257, "learning_rate": 1.0356498927093916e-06, "loss": 0.1496, "step": 7296 }, { "epoch": 2.4368008014693605, "grad_norm": 0.4904804267379514, "learning_rate": 1.0344659427491343e-06, "loss": 0.1566, "step": 7297 }, { "epoch": 2.4371347470362332, "grad_norm": 0.5219797130767747, "learning_rate": 1.0332825918294898e-06, "loss": 0.1554, "step": 7298 }, { "epoch": 2.4374686926031055, "grad_norm": 0.5189968707324387, "learning_rate": 1.0320998401292154e-06, "loss": 0.1472, "step": 7299 }, { "epoch": 2.437802638169978, "grad_norm": 0.5253702042666821, "learning_rate": 1.0309176878269806e-06, "loss": 0.1489, "step": 7300 }, { "epoch": 2.438136583736851, "grad_norm": 0.49138094461283727, "learning_rate": 1.0297361351013646e-06, "loss": 0.1465, "step": 7301 }, { "epoch": 2.4384705293037237, "grad_norm": 0.5309848647784869, "learning_rate": 1.028555182130853e-06, "loss": 0.1605, "step": 7302 }, { "epoch": 2.438804474870596, "grad_norm": 0.538951743332172, "learning_rate": 1.027374829093843e-06, "loss": 0.1658, "step": 7303 }, { "epoch": 2.4391384204374686, "grad_norm": 0.5411757444422154, "learning_rate": 1.0261950761686423e-06, "loss": 0.1581, "step": 7304 }, { "epoch": 2.4394723660043414, "grad_norm": 0.5200715406012469, "learning_rate": 1.0250159235334645e-06, "loss": 0.158, "step": 7305 }, { "epoch": 2.439806311571214, "grad_norm": 0.558830853232633, "learning_rate": 1.0238373713664351e-06, "loss": 0.1662, "step": 7306 }, { "epoch": 2.4401402571380864, "grad_norm": 0.4868370076213395, "learning_rate": 1.0226594198455903e-06, "loss": 0.146, "step": 7307 }, { "epoch": 2.440474202704959, "grad_norm": 0.49711149600547216, "learning_rate": 1.0214820691488698e-06, "loss": 0.1514, "step": 7308 }, { "epoch": 2.440808148271832, "grad_norm": 0.5147765431980813, "learning_rate": 1.02030531945413e-06, "loss": 0.1631, "step": 7309 }, { "epoch": 2.441142093838704, "grad_norm": 0.5618697583716376, "learning_rate": 1.0191291709391298e-06, "loss": 0.1437, "step": 7310 }, { "epoch": 2.441476039405577, "grad_norm": 0.5719539944950716, "learning_rate": 1.0179536237815413e-06, "loss": 0.1691, "step": 7311 }, { "epoch": 2.4418099849724495, "grad_norm": 0.5234878850474092, "learning_rate": 1.016778678158945e-06, "loss": 0.155, "step": 7312 }, { "epoch": 2.442143930539322, "grad_norm": 0.5452399489884633, "learning_rate": 1.015604334248832e-06, "loss": 0.1589, "step": 7313 }, { "epoch": 2.442477876106195, "grad_norm": 0.5534133198761949, "learning_rate": 1.0144305922285975e-06, "loss": 0.1523, "step": 7314 }, { "epoch": 2.442811821673067, "grad_norm": 0.5535102460042848, "learning_rate": 1.0132574522755518e-06, "loss": 0.1592, "step": 7315 }, { "epoch": 2.44314576723994, "grad_norm": 0.5204726820983054, "learning_rate": 1.0120849145669093e-06, "loss": 0.1534, "step": 7316 }, { "epoch": 2.4434797128068126, "grad_norm": 0.5244522290512145, "learning_rate": 1.010912979279796e-06, "loss": 0.161, "step": 7317 }, { "epoch": 2.443813658373685, "grad_norm": 0.4986635964399862, "learning_rate": 1.009741646591248e-06, "loss": 0.1537, "step": 7318 }, { "epoch": 2.4441476039405576, "grad_norm": 0.5206734432989935, "learning_rate": 1.0085709166782088e-06, "loss": 0.1612, "step": 7319 }, { "epoch": 2.4444815495074304, "grad_norm": 0.5336710529700994, "learning_rate": 1.0074007897175291e-06, "loss": 0.1531, "step": 7320 }, { "epoch": 2.444815495074303, "grad_norm": 0.5533500819209353, "learning_rate": 1.0062312658859723e-06, "loss": 0.1611, "step": 7321 }, { "epoch": 2.4451494406411753, "grad_norm": 0.5658983196079189, "learning_rate": 1.0050623453602075e-06, "loss": 0.1667, "step": 7322 }, { "epoch": 2.445483386208048, "grad_norm": 0.5287095854597083, "learning_rate": 1.0038940283168136e-06, "loss": 0.1505, "step": 7323 }, { "epoch": 2.4458173317749208, "grad_norm": 0.5375178265696032, "learning_rate": 1.0027263149322797e-06, "loss": 0.1637, "step": 7324 }, { "epoch": 2.4461512773417935, "grad_norm": 0.47445833611149235, "learning_rate": 1.001559205383003e-06, "loss": 0.1446, "step": 7325 }, { "epoch": 2.4464852229086658, "grad_norm": 0.4768203964078314, "learning_rate": 1.000392699845288e-06, "loss": 0.1345, "step": 7326 }, { "epoch": 2.4468191684755385, "grad_norm": 0.5206013386975504, "learning_rate": 9.992267984953503e-07, "loss": 0.1528, "step": 7327 }, { "epoch": 2.447153114042411, "grad_norm": 0.5182522042264855, "learning_rate": 9.98061501509311e-07, "loss": 0.1566, "step": 7328 }, { "epoch": 2.4474870596092835, "grad_norm": 0.505189403897211, "learning_rate": 9.968968090632032e-07, "loss": 0.1522, "step": 7329 }, { "epoch": 2.447821005176156, "grad_norm": 0.5362073600754579, "learning_rate": 9.957327213329687e-07, "loss": 0.1546, "step": 7330 }, { "epoch": 2.448154950743029, "grad_norm": 0.5102731648397336, "learning_rate": 9.945692384944544e-07, "loss": 0.1461, "step": 7331 }, { "epoch": 2.4484888963099016, "grad_norm": 0.5226654494134502, "learning_rate": 9.934063607234202e-07, "loss": 0.1532, "step": 7332 }, { "epoch": 2.448822841876774, "grad_norm": 0.5230643942856512, "learning_rate": 9.922440881955298e-07, "loss": 0.1523, "step": 7333 }, { "epoch": 2.4491567874436466, "grad_norm": 0.5655913741188585, "learning_rate": 9.910824210863611e-07, "loss": 0.1639, "step": 7334 }, { "epoch": 2.4494907330105193, "grad_norm": 0.5386421493301948, "learning_rate": 9.899213595713935e-07, "loss": 0.1628, "step": 7335 }, { "epoch": 2.449824678577392, "grad_norm": 0.49776939645132456, "learning_rate": 9.887609038260243e-07, "loss": 0.1549, "step": 7336 }, { "epoch": 2.4501586241442643, "grad_norm": 0.558716304445521, "learning_rate": 9.876010540255504e-07, "loss": 0.1635, "step": 7337 }, { "epoch": 2.450492569711137, "grad_norm": 0.4917972097451291, "learning_rate": 9.86441810345183e-07, "loss": 0.1527, "step": 7338 }, { "epoch": 2.4508265152780098, "grad_norm": 0.5377757832010998, "learning_rate": 9.852831729600365e-07, "loss": 0.162, "step": 7339 }, { "epoch": 2.451160460844882, "grad_norm": 0.4868757826124942, "learning_rate": 9.841251420451398e-07, "loss": 0.1444, "step": 7340 }, { "epoch": 2.4514944064117548, "grad_norm": 0.5439255876381551, "learning_rate": 9.829677177754231e-07, "loss": 0.1568, "step": 7341 }, { "epoch": 2.4518283519786275, "grad_norm": 0.5411457197387434, "learning_rate": 9.818109003257348e-07, "loss": 0.1649, "step": 7342 }, { "epoch": 2.4521622975455, "grad_norm": 0.5373406277835615, "learning_rate": 9.806546898708213e-07, "loss": 0.1587, "step": 7343 }, { "epoch": 2.452496243112373, "grad_norm": 0.5184857587685223, "learning_rate": 9.794990865853444e-07, "loss": 0.1553, "step": 7344 }, { "epoch": 2.452830188679245, "grad_norm": 0.4972829968409121, "learning_rate": 9.783440906438686e-07, "loss": 0.1518, "step": 7345 }, { "epoch": 2.453164134246118, "grad_norm": 0.5263298934429176, "learning_rate": 9.771897022208732e-07, "loss": 0.1605, "step": 7346 }, { "epoch": 2.4534980798129906, "grad_norm": 0.5039481262297244, "learning_rate": 9.760359214907372e-07, "loss": 0.1489, "step": 7347 }, { "epoch": 2.453832025379863, "grad_norm": 0.5540379369889635, "learning_rate": 9.74882748627759e-07, "loss": 0.1582, "step": 7348 }, { "epoch": 2.4541659709467356, "grad_norm": 0.5561596840406983, "learning_rate": 9.737301838061342e-07, "loss": 0.1647, "step": 7349 }, { "epoch": 2.4544999165136083, "grad_norm": 0.5045374898134211, "learning_rate": 9.725782271999744e-07, "loss": 0.1477, "step": 7350 }, { "epoch": 2.454833862080481, "grad_norm": 0.49671437127259194, "learning_rate": 9.714268789832937e-07, "loss": 0.1501, "step": 7351 }, { "epoch": 2.4551678076473533, "grad_norm": 0.5284559165780311, "learning_rate": 9.702761393300176e-07, "loss": 0.1578, "step": 7352 }, { "epoch": 2.455501753214226, "grad_norm": 0.5117383212099872, "learning_rate": 9.691260084139802e-07, "loss": 0.1496, "step": 7353 }, { "epoch": 2.4558356987810988, "grad_norm": 0.526036454760328, "learning_rate": 9.679764864089203e-07, "loss": 0.1562, "step": 7354 }, { "epoch": 2.4561696443479715, "grad_norm": 0.49859254666734404, "learning_rate": 9.668275734884885e-07, "loss": 0.1517, "step": 7355 }, { "epoch": 2.4565035899148437, "grad_norm": 0.5353355704871284, "learning_rate": 9.656792698262402e-07, "loss": 0.1527, "step": 7356 }, { "epoch": 2.4568375354817165, "grad_norm": 0.5062378256192873, "learning_rate": 9.645315755956413e-07, "loss": 0.1534, "step": 7357 }, { "epoch": 2.457171481048589, "grad_norm": 0.46223832301303525, "learning_rate": 9.633844909700618e-07, "loss": 0.1401, "step": 7358 }, { "epoch": 2.4575054266154615, "grad_norm": 0.5524054906187581, "learning_rate": 9.622380161227873e-07, "loss": 0.161, "step": 7359 }, { "epoch": 2.457839372182334, "grad_norm": 0.5056246623447084, "learning_rate": 9.61092151227002e-07, "loss": 0.1548, "step": 7360 }, { "epoch": 2.458173317749207, "grad_norm": 0.5121677065114587, "learning_rate": 9.599468964558051e-07, "loss": 0.1459, "step": 7361 }, { "epoch": 2.4585072633160796, "grad_norm": 0.5588974512360972, "learning_rate": 9.588022519821983e-07, "loss": 0.161, "step": 7362 }, { "epoch": 2.4588412088829523, "grad_norm": 0.5641228129018973, "learning_rate": 9.576582179790967e-07, "loss": 0.1607, "step": 7363 }, { "epoch": 2.4591751544498246, "grad_norm": 0.5565054266004965, "learning_rate": 9.565147946193149e-07, "loss": 0.1578, "step": 7364 }, { "epoch": 2.4595091000166973, "grad_norm": 0.5954752685742998, "learning_rate": 9.553719820755869e-07, "loss": 0.1652, "step": 7365 }, { "epoch": 2.45984304558357, "grad_norm": 0.5018003233841967, "learning_rate": 9.542297805205436e-07, "loss": 0.1422, "step": 7366 }, { "epoch": 2.4601769911504423, "grad_norm": 0.5505488375112121, "learning_rate": 9.530881901267308e-07, "loss": 0.1484, "step": 7367 }, { "epoch": 2.460510936717315, "grad_norm": 0.5195725678865587, "learning_rate": 9.519472110665967e-07, "loss": 0.1467, "step": 7368 }, { "epoch": 2.4608448822841877, "grad_norm": 0.5151373816065521, "learning_rate": 9.508068435125012e-07, "loss": 0.1553, "step": 7369 }, { "epoch": 2.4611788278510605, "grad_norm": 0.5281539463812102, "learning_rate": 9.496670876367076e-07, "loss": 0.1568, "step": 7370 }, { "epoch": 2.4615127734179327, "grad_norm": 0.5334649152791151, "learning_rate": 9.485279436113942e-07, "loss": 0.1671, "step": 7371 }, { "epoch": 2.4618467189848054, "grad_norm": 0.541217305277803, "learning_rate": 9.473894116086379e-07, "loss": 0.1582, "step": 7372 }, { "epoch": 2.462180664551678, "grad_norm": 0.4819910188513477, "learning_rate": 9.462514918004301e-07, "loss": 0.1401, "step": 7373 }, { "epoch": 2.462514610118551, "grad_norm": 0.5450310802132138, "learning_rate": 9.451141843586647e-07, "loss": 0.1556, "step": 7374 }, { "epoch": 2.462848555685423, "grad_norm": 0.5413919382336527, "learning_rate": 9.439774894551479e-07, "loss": 0.1603, "step": 7375 }, { "epoch": 2.463182501252296, "grad_norm": 0.5547122615105633, "learning_rate": 9.428414072615877e-07, "loss": 0.1618, "step": 7376 }, { "epoch": 2.4635164468191686, "grad_norm": 0.5160881731381368, "learning_rate": 9.417059379496047e-07, "loss": 0.1609, "step": 7377 }, { "epoch": 2.463850392386041, "grad_norm": 0.5382470923154085, "learning_rate": 9.40571081690726e-07, "loss": 0.1624, "step": 7378 }, { "epoch": 2.4641843379529136, "grad_norm": 0.5136862931411745, "learning_rate": 9.394368386563823e-07, "loss": 0.1537, "step": 7379 }, { "epoch": 2.4645182835197863, "grad_norm": 0.5130561945345334, "learning_rate": 9.383032090179173e-07, "loss": 0.1595, "step": 7380 }, { "epoch": 2.464852229086659, "grad_norm": 0.4979229387087476, "learning_rate": 9.371701929465759e-07, "loss": 0.1538, "step": 7381 }, { "epoch": 2.4651861746535313, "grad_norm": 0.5288085451267187, "learning_rate": 9.360377906135148e-07, "loss": 0.1653, "step": 7382 }, { "epoch": 2.465520120220404, "grad_norm": 0.5246843887272186, "learning_rate": 9.349060021897976e-07, "loss": 0.1599, "step": 7383 }, { "epoch": 2.4658540657872767, "grad_norm": 0.5128075936979838, "learning_rate": 9.337748278463948e-07, "loss": 0.1592, "step": 7384 }, { "epoch": 2.4661880113541494, "grad_norm": 0.5244124581207874, "learning_rate": 9.326442677541813e-07, "loss": 0.1576, "step": 7385 }, { "epoch": 2.4665219569210217, "grad_norm": 0.5204631164355119, "learning_rate": 9.31514322083944e-07, "loss": 0.153, "step": 7386 }, { "epoch": 2.4668559024878944, "grad_norm": 0.5045026609152344, "learning_rate": 9.303849910063717e-07, "loss": 0.1523, "step": 7387 }, { "epoch": 2.467189848054767, "grad_norm": 0.5230456092218726, "learning_rate": 9.292562746920647e-07, "loss": 0.1592, "step": 7388 }, { "epoch": 2.4675237936216394, "grad_norm": 0.5378631540624517, "learning_rate": 9.281281733115288e-07, "loss": 0.1546, "step": 7389 }, { "epoch": 2.467857739188512, "grad_norm": 0.5030886402668243, "learning_rate": 9.270006870351789e-07, "loss": 0.1416, "step": 7390 }, { "epoch": 2.468191684755385, "grad_norm": 0.5286770365980373, "learning_rate": 9.258738160333314e-07, "loss": 0.1533, "step": 7391 }, { "epoch": 2.4685256303222576, "grad_norm": 0.5174511810173182, "learning_rate": 9.247475604762168e-07, "loss": 0.1629, "step": 7392 }, { "epoch": 2.4688595758891303, "grad_norm": 0.5775562479058309, "learning_rate": 9.236219205339647e-07, "loss": 0.1591, "step": 7393 }, { "epoch": 2.4691935214560026, "grad_norm": 0.5530670682386785, "learning_rate": 9.224968963766223e-07, "loss": 0.1617, "step": 7394 }, { "epoch": 2.4695274670228753, "grad_norm": 0.5588841493266121, "learning_rate": 9.213724881741337e-07, "loss": 0.1697, "step": 7395 }, { "epoch": 2.469861412589748, "grad_norm": 0.496013975272932, "learning_rate": 9.202486960963559e-07, "loss": 0.1531, "step": 7396 }, { "epoch": 2.4701953581566203, "grad_norm": 0.5076710510224289, "learning_rate": 9.191255203130489e-07, "loss": 0.1587, "step": 7397 }, { "epoch": 2.470529303723493, "grad_norm": 0.5020408243905019, "learning_rate": 9.18002960993884e-07, "loss": 0.1518, "step": 7398 }, { "epoch": 2.4708632492903657, "grad_norm": 0.5348771472192876, "learning_rate": 9.168810183084348e-07, "loss": 0.1601, "step": 7399 }, { "epoch": 2.4711971948572384, "grad_norm": 0.5044766853939331, "learning_rate": 9.157596924261847e-07, "loss": 0.1542, "step": 7400 }, { "epoch": 2.4715311404241107, "grad_norm": 0.5757230051543882, "learning_rate": 9.146389835165248e-07, "loss": 0.1769, "step": 7401 }, { "epoch": 2.4718650859909834, "grad_norm": 0.5052922924503145, "learning_rate": 9.135188917487487e-07, "loss": 0.1507, "step": 7402 }, { "epoch": 2.472199031557856, "grad_norm": 0.5365350669677343, "learning_rate": 9.12399417292062e-07, "loss": 0.1488, "step": 7403 }, { "epoch": 2.472532977124729, "grad_norm": 0.507603981314568, "learning_rate": 9.112805603155716e-07, "loss": 0.1534, "step": 7404 }, { "epoch": 2.472866922691601, "grad_norm": 0.8258984417878298, "learning_rate": 9.101623209882965e-07, "loss": 0.157, "step": 7405 }, { "epoch": 2.473200868258474, "grad_norm": 0.5625468162092421, "learning_rate": 9.090446994791585e-07, "loss": 0.1589, "step": 7406 }, { "epoch": 2.4735348138253466, "grad_norm": 0.5349082547026425, "learning_rate": 9.079276959569899e-07, "loss": 0.1647, "step": 7407 }, { "epoch": 2.473868759392219, "grad_norm": 0.5270376007769813, "learning_rate": 9.068113105905235e-07, "loss": 0.155, "step": 7408 }, { "epoch": 2.4742027049590916, "grad_norm": 0.5257667676560155, "learning_rate": 9.056955435484061e-07, "loss": 0.1517, "step": 7409 }, { "epoch": 2.4745366505259643, "grad_norm": 0.5310131029906042, "learning_rate": 9.045803949991843e-07, "loss": 0.1627, "step": 7410 }, { "epoch": 2.474870596092837, "grad_norm": 0.5110165371528869, "learning_rate": 9.034658651113154e-07, "loss": 0.1547, "step": 7411 }, { "epoch": 2.4752045416597097, "grad_norm": 0.5240970004480227, "learning_rate": 9.023519540531633e-07, "loss": 0.1574, "step": 7412 }, { "epoch": 2.475538487226582, "grad_norm": 0.5146289857699795, "learning_rate": 9.01238661992998e-07, "loss": 0.1519, "step": 7413 }, { "epoch": 2.4758724327934547, "grad_norm": 0.5125323382835603, "learning_rate": 9.001259890989927e-07, "loss": 0.1452, "step": 7414 }, { "epoch": 2.4762063783603274, "grad_norm": 0.548528572666765, "learning_rate": 8.990139355392324e-07, "loss": 0.1633, "step": 7415 }, { "epoch": 2.4765403239271997, "grad_norm": 0.5426017894190176, "learning_rate": 8.979025014817039e-07, "loss": 0.1554, "step": 7416 }, { "epoch": 2.4768742694940724, "grad_norm": 0.5273871603200672, "learning_rate": 8.967916870943028e-07, "loss": 0.156, "step": 7417 }, { "epoch": 2.477208215060945, "grad_norm": 0.5015673721907438, "learning_rate": 8.956814925448309e-07, "loss": 0.1551, "step": 7418 }, { "epoch": 2.477542160627818, "grad_norm": 0.5351497045095686, "learning_rate": 8.945719180009977e-07, "loss": 0.1593, "step": 7419 }, { "epoch": 2.47787610619469, "grad_norm": 0.5248370885000369, "learning_rate": 8.934629636304149e-07, "loss": 0.1505, "step": 7420 }, { "epoch": 2.478210051761563, "grad_norm": 0.5879544927092775, "learning_rate": 8.923546296006058e-07, "loss": 0.166, "step": 7421 }, { "epoch": 2.4785439973284356, "grad_norm": 0.5316868901778447, "learning_rate": 8.912469160789944e-07, "loss": 0.1548, "step": 7422 }, { "epoch": 2.4788779428953083, "grad_norm": 0.5293767236125144, "learning_rate": 8.901398232329156e-07, "loss": 0.1555, "step": 7423 }, { "epoch": 2.4792118884621805, "grad_norm": 0.5766018928369575, "learning_rate": 8.890333512296095e-07, "loss": 0.1685, "step": 7424 }, { "epoch": 2.4795458340290533, "grad_norm": 0.49283680451273953, "learning_rate": 8.879275002362197e-07, "loss": 0.1481, "step": 7425 }, { "epoch": 2.479879779595926, "grad_norm": 0.7372173936319003, "learning_rate": 8.868222704198004e-07, "loss": 0.1562, "step": 7426 }, { "epoch": 2.4802137251627983, "grad_norm": 0.5586198661064684, "learning_rate": 8.857176619473068e-07, "loss": 0.1478, "step": 7427 }, { "epoch": 2.480547670729671, "grad_norm": 0.48785260397998287, "learning_rate": 8.846136749856044e-07, "loss": 0.1443, "step": 7428 }, { "epoch": 2.4808816162965437, "grad_norm": 0.5223348130739237, "learning_rate": 8.835103097014636e-07, "loss": 0.1563, "step": 7429 }, { "epoch": 2.4812155618634164, "grad_norm": 0.5289422818248141, "learning_rate": 8.824075662615617e-07, "loss": 0.1539, "step": 7430 }, { "epoch": 2.4815495074302887, "grad_norm": 0.5366617936145673, "learning_rate": 8.813054448324792e-07, "loss": 0.1564, "step": 7431 }, { "epoch": 2.4818834529971614, "grad_norm": 0.5091155974176387, "learning_rate": 8.80203945580706e-07, "loss": 0.1421, "step": 7432 }, { "epoch": 2.482217398564034, "grad_norm": 0.5162217183502404, "learning_rate": 8.791030686726349e-07, "loss": 0.1594, "step": 7433 }, { "epoch": 2.482551344130907, "grad_norm": 0.53574017718763, "learning_rate": 8.780028142745673e-07, "loss": 0.158, "step": 7434 }, { "epoch": 2.482885289697779, "grad_norm": 0.5170105505185464, "learning_rate": 8.769031825527097e-07, "loss": 0.1585, "step": 7435 }, { "epoch": 2.483219235264652, "grad_norm": 0.544046520845691, "learning_rate": 8.758041736731753e-07, "loss": 0.1589, "step": 7436 }, { "epoch": 2.4835531808315245, "grad_norm": 0.5531553610216963, "learning_rate": 8.747057878019799e-07, "loss": 0.1604, "step": 7437 }, { "epoch": 2.483887126398397, "grad_norm": 0.4998496915162335, "learning_rate": 8.736080251050505e-07, "loss": 0.1451, "step": 7438 }, { "epoch": 2.4842210719652695, "grad_norm": 0.5360141755777315, "learning_rate": 8.725108857482145e-07, "loss": 0.1601, "step": 7439 }, { "epoch": 2.4845550175321423, "grad_norm": 0.5323788988525391, "learning_rate": 8.714143698972083e-07, "loss": 0.155, "step": 7440 }, { "epoch": 2.484888963099015, "grad_norm": 0.48108964205325405, "learning_rate": 8.703184777176743e-07, "loss": 0.1426, "step": 7441 }, { "epoch": 2.4852229086658877, "grad_norm": 0.5182627973890735, "learning_rate": 8.692232093751613e-07, "loss": 0.1491, "step": 7442 }, { "epoch": 2.48555685423276, "grad_norm": 0.48494737578990094, "learning_rate": 8.68128565035119e-07, "loss": 0.1471, "step": 7443 }, { "epoch": 2.4858907997996327, "grad_norm": 0.49811481612701375, "learning_rate": 8.670345448629097e-07, "loss": 0.1515, "step": 7444 }, { "epoch": 2.4862247453665054, "grad_norm": 0.5458036809897917, "learning_rate": 8.659411490237951e-07, "loss": 0.1586, "step": 7445 }, { "epoch": 2.4865586909333777, "grad_norm": 0.5294276700638828, "learning_rate": 8.648483776829469e-07, "loss": 0.1663, "step": 7446 }, { "epoch": 2.4868926365002504, "grad_norm": 0.491083599496525, "learning_rate": 8.637562310054425e-07, "loss": 0.1483, "step": 7447 }, { "epoch": 2.487226582067123, "grad_norm": 0.551263065477862, "learning_rate": 8.626647091562612e-07, "loss": 0.1617, "step": 7448 }, { "epoch": 2.487560527633996, "grad_norm": 0.49702157711243133, "learning_rate": 8.61573812300292e-07, "loss": 0.1504, "step": 7449 }, { "epoch": 2.487894473200868, "grad_norm": 0.5200659870372368, "learning_rate": 8.604835406023254e-07, "loss": 0.1598, "step": 7450 }, { "epoch": 2.488228418767741, "grad_norm": 0.49403596255559235, "learning_rate": 8.593938942270613e-07, "loss": 0.1489, "step": 7451 }, { "epoch": 2.4885623643346135, "grad_norm": 0.5572892670064632, "learning_rate": 8.583048733391036e-07, "loss": 0.1634, "step": 7452 }, { "epoch": 2.4888963099014862, "grad_norm": 0.524050270984186, "learning_rate": 8.57216478102963e-07, "loss": 0.152, "step": 7453 }, { "epoch": 2.4892302554683585, "grad_norm": 0.5450655789511571, "learning_rate": 8.561287086830516e-07, "loss": 0.1578, "step": 7454 }, { "epoch": 2.4895642010352312, "grad_norm": 0.4938587213270162, "learning_rate": 8.550415652436927e-07, "loss": 0.1541, "step": 7455 }, { "epoch": 2.489898146602104, "grad_norm": 0.5114068685962905, "learning_rate": 8.539550479491093e-07, "loss": 0.151, "step": 7456 }, { "epoch": 2.4902320921689762, "grad_norm": 0.5449036600871238, "learning_rate": 8.528691569634357e-07, "loss": 0.1687, "step": 7457 }, { "epoch": 2.490566037735849, "grad_norm": 0.5858119072813122, "learning_rate": 8.517838924507039e-07, "loss": 0.1708, "step": 7458 }, { "epoch": 2.4908999833027217, "grad_norm": 0.5347934032616631, "learning_rate": 8.50699254574861e-07, "loss": 0.1602, "step": 7459 }, { "epoch": 2.4912339288695944, "grad_norm": 0.511032385630495, "learning_rate": 8.496152434997518e-07, "loss": 0.1505, "step": 7460 }, { "epoch": 2.491567874436467, "grad_norm": 0.5590357970061134, "learning_rate": 8.485318593891295e-07, "loss": 0.1422, "step": 7461 }, { "epoch": 2.4919018200033394, "grad_norm": 0.5920055950996739, "learning_rate": 8.474491024066512e-07, "loss": 0.1667, "step": 7462 }, { "epoch": 2.492235765570212, "grad_norm": 0.5492390339266932, "learning_rate": 8.463669727158819e-07, "loss": 0.1587, "step": 7463 }, { "epoch": 2.492569711137085, "grad_norm": 0.526164253031194, "learning_rate": 8.45285470480286e-07, "loss": 0.1534, "step": 7464 }, { "epoch": 2.492903656703957, "grad_norm": 0.5364159996478518, "learning_rate": 8.442045958632428e-07, "loss": 0.1597, "step": 7465 }, { "epoch": 2.49323760227083, "grad_norm": 0.5481814028546967, "learning_rate": 8.431243490280267e-07, "loss": 0.1607, "step": 7466 }, { "epoch": 2.4935715478377025, "grad_norm": 0.5497464097584005, "learning_rate": 8.420447301378249e-07, "loss": 0.1603, "step": 7467 }, { "epoch": 2.4939054934045752, "grad_norm": 0.49499461566929853, "learning_rate": 8.409657393557236e-07, "loss": 0.1532, "step": 7468 }, { "epoch": 2.4942394389714475, "grad_norm": 0.5280999383444667, "learning_rate": 8.39887376844718e-07, "loss": 0.1513, "step": 7469 }, { "epoch": 2.4945733845383202, "grad_norm": 0.541491593990084, "learning_rate": 8.388096427677095e-07, "loss": 0.1575, "step": 7470 }, { "epoch": 2.494907330105193, "grad_norm": 0.5567512139651408, "learning_rate": 8.377325372874995e-07, "loss": 0.1587, "step": 7471 }, { "epoch": 2.4952412756720657, "grad_norm": 0.4789075537156718, "learning_rate": 8.366560605668006e-07, "loss": 0.1466, "step": 7472 }, { "epoch": 2.495575221238938, "grad_norm": 0.5241227372625797, "learning_rate": 8.355802127682238e-07, "loss": 0.1575, "step": 7473 }, { "epoch": 2.4959091668058107, "grad_norm": 0.5261817341228623, "learning_rate": 8.345049940542904e-07, "loss": 0.1473, "step": 7474 }, { "epoch": 2.4962431123726834, "grad_norm": 0.5125544170513542, "learning_rate": 8.334304045874248e-07, "loss": 0.1622, "step": 7475 }, { "epoch": 2.4965770579395556, "grad_norm": 0.4886019125041492, "learning_rate": 8.323564445299575e-07, "loss": 0.1477, "step": 7476 }, { "epoch": 2.4969110035064284, "grad_norm": 0.46844423797008194, "learning_rate": 8.312831140441207e-07, "loss": 0.1444, "step": 7477 }, { "epoch": 2.497244949073301, "grad_norm": 0.5183590771956148, "learning_rate": 8.302104132920552e-07, "loss": 0.1565, "step": 7478 }, { "epoch": 2.497578894640174, "grad_norm": 0.5577300530187361, "learning_rate": 8.291383424358041e-07, "loss": 0.1617, "step": 7479 }, { "epoch": 2.497912840207046, "grad_norm": 0.5636308968550843, "learning_rate": 8.280669016373172e-07, "loss": 0.1691, "step": 7480 }, { "epoch": 2.498246785773919, "grad_norm": 0.5084283575774319, "learning_rate": 8.269960910584457e-07, "loss": 0.155, "step": 7481 }, { "epoch": 2.4985807313407915, "grad_norm": 0.5246190756503197, "learning_rate": 8.259259108609524e-07, "loss": 0.1592, "step": 7482 }, { "epoch": 2.4989146769076642, "grad_norm": 0.4765757170409379, "learning_rate": 8.248563612064969e-07, "loss": 0.1451, "step": 7483 }, { "epoch": 2.4992486224745365, "grad_norm": 0.5183633745167002, "learning_rate": 8.237874422566505e-07, "loss": 0.1571, "step": 7484 }, { "epoch": 2.499582568041409, "grad_norm": 0.49644317659578663, "learning_rate": 8.227191541728829e-07, "loss": 0.1486, "step": 7485 }, { "epoch": 2.499916513608282, "grad_norm": 0.5302602187154111, "learning_rate": 8.21651497116574e-07, "loss": 0.1544, "step": 7486 }, { "epoch": 2.500250459175154, "grad_norm": 0.5532769666070599, "learning_rate": 8.205844712490024e-07, "loss": 0.1677, "step": 7487 }, { "epoch": 2.500584404742027, "grad_norm": 0.555633213185817, "learning_rate": 8.195180767313604e-07, "loss": 0.1645, "step": 7488 }, { "epoch": 2.5009183503088996, "grad_norm": 0.5607026487442186, "learning_rate": 8.184523137247346e-07, "loss": 0.1576, "step": 7489 }, { "epoch": 2.5012522958757724, "grad_norm": 0.5288814062908609, "learning_rate": 8.173871823901247e-07, "loss": 0.1486, "step": 7490 }, { "epoch": 2.501586241442645, "grad_norm": 0.5171926986273441, "learning_rate": 8.16322682888428e-07, "loss": 0.1588, "step": 7491 }, { "epoch": 2.5019201870095173, "grad_norm": 0.5293127044915098, "learning_rate": 8.15258815380453e-07, "loss": 0.1583, "step": 7492 }, { "epoch": 2.50225413257639, "grad_norm": 0.5176749228194966, "learning_rate": 8.141955800269058e-07, "loss": 0.1549, "step": 7493 }, { "epoch": 2.502588078143263, "grad_norm": 0.5159921514636313, "learning_rate": 8.131329769884027e-07, "loss": 0.1512, "step": 7494 }, { "epoch": 2.502922023710135, "grad_norm": 0.5257063522299663, "learning_rate": 8.120710064254634e-07, "loss": 0.1536, "step": 7495 }, { "epoch": 2.5032559692770078, "grad_norm": 0.5264810763788741, "learning_rate": 8.110096684985086e-07, "loss": 0.1622, "step": 7496 }, { "epoch": 2.5035899148438805, "grad_norm": 0.5639565998252986, "learning_rate": 8.099489633678676e-07, "loss": 0.1718, "step": 7497 }, { "epoch": 2.503923860410753, "grad_norm": 0.5502371897141942, "learning_rate": 8.088888911937726e-07, "loss": 0.1676, "step": 7498 }, { "epoch": 2.5042578059776255, "grad_norm": 0.6154170180004221, "learning_rate": 8.078294521363584e-07, "loss": 0.1745, "step": 7499 }, { "epoch": 2.504591751544498, "grad_norm": 0.5371791465358621, "learning_rate": 8.067706463556663e-07, "loss": 0.1613, "step": 7500 }, { "epoch": 2.504925697111371, "grad_norm": 0.5207840621091431, "learning_rate": 8.057124740116434e-07, "loss": 0.1594, "step": 7501 }, { "epoch": 2.5052596426782436, "grad_norm": 0.4840124847038789, "learning_rate": 8.046549352641359e-07, "loss": 0.1549, "step": 7502 }, { "epoch": 2.505593588245116, "grad_norm": 0.4843225011059664, "learning_rate": 8.035980302729008e-07, "loss": 0.1444, "step": 7503 }, { "epoch": 2.5059275338119886, "grad_norm": 0.5015309666946438, "learning_rate": 8.025417591975926e-07, "loss": 0.1495, "step": 7504 }, { "epoch": 2.5062614793788613, "grad_norm": 0.5588866802180681, "learning_rate": 8.014861221977749e-07, "loss": 0.1632, "step": 7505 }, { "epoch": 2.5065954249457336, "grad_norm": 0.5258734952648931, "learning_rate": 8.004311194329145e-07, "loss": 0.1564, "step": 7506 }, { "epoch": 2.5069293705126063, "grad_norm": 0.5018390452316217, "learning_rate": 7.993767510623834e-07, "loss": 0.1539, "step": 7507 }, { "epoch": 2.507263316079479, "grad_norm": 0.5238239925463696, "learning_rate": 7.983230172454531e-07, "loss": 0.1519, "step": 7508 }, { "epoch": 2.5075972616463518, "grad_norm": 0.5573481196677725, "learning_rate": 7.972699181413058e-07, "loss": 0.1591, "step": 7509 }, { "epoch": 2.5079312072132245, "grad_norm": 0.5166173807653006, "learning_rate": 7.962174539090201e-07, "loss": 0.1494, "step": 7510 }, { "epoch": 2.5082651527800968, "grad_norm": 0.5055095687381181, "learning_rate": 7.951656247075884e-07, "loss": 0.1519, "step": 7511 }, { "epoch": 2.5085990983469695, "grad_norm": 0.5560200453792518, "learning_rate": 7.941144306958986e-07, "loss": 0.1676, "step": 7512 }, { "epoch": 2.508933043913842, "grad_norm": 0.539443573117671, "learning_rate": 7.930638720327477e-07, "loss": 0.1644, "step": 7513 }, { "epoch": 2.5092669894807145, "grad_norm": 0.5078255433817627, "learning_rate": 7.920139488768325e-07, "loss": 0.1542, "step": 7514 }, { "epoch": 2.509600935047587, "grad_norm": 0.5588296693905245, "learning_rate": 7.909646613867594e-07, "loss": 0.1689, "step": 7515 }, { "epoch": 2.50993488061446, "grad_norm": 0.5243310356959708, "learning_rate": 7.899160097210329e-07, "loss": 0.1535, "step": 7516 }, { "epoch": 2.510268826181332, "grad_norm": 0.4984528436569994, "learning_rate": 7.888679940380644e-07, "loss": 0.1573, "step": 7517 }, { "epoch": 2.510602771748205, "grad_norm": 0.4913467718422485, "learning_rate": 7.87820614496172e-07, "loss": 0.1558, "step": 7518 }, { "epoch": 2.5109367173150776, "grad_norm": 0.5518878812557201, "learning_rate": 7.867738712535711e-07, "loss": 0.1622, "step": 7519 }, { "epoch": 2.5112706628819503, "grad_norm": 0.5117546474410686, "learning_rate": 7.857277644683858e-07, "loss": 0.151, "step": 7520 }, { "epoch": 2.511604608448823, "grad_norm": 0.5716066786013106, "learning_rate": 7.846822942986449e-07, "loss": 0.1585, "step": 7521 }, { "epoch": 2.5119385540156953, "grad_norm": 0.526200387575101, "learning_rate": 7.836374609022756e-07, "loss": 0.146, "step": 7522 }, { "epoch": 2.512272499582568, "grad_norm": 0.5237426581490771, "learning_rate": 7.825932644371137e-07, "loss": 0.1565, "step": 7523 }, { "epoch": 2.5126064451494408, "grad_norm": 0.5157049326498191, "learning_rate": 7.815497050608989e-07, "loss": 0.1586, "step": 7524 }, { "epoch": 2.512940390716313, "grad_norm": 0.5075710985330287, "learning_rate": 7.805067829312707e-07, "loss": 0.1526, "step": 7525 }, { "epoch": 2.5132743362831858, "grad_norm": 0.5214226974333179, "learning_rate": 7.79464498205777e-07, "loss": 0.153, "step": 7526 }, { "epoch": 2.5136082818500585, "grad_norm": 0.519026372135375, "learning_rate": 7.78422851041865e-07, "loss": 0.1557, "step": 7527 }, { "epoch": 2.513942227416931, "grad_norm": 0.5682299743488699, "learning_rate": 7.773818415968887e-07, "loss": 0.1679, "step": 7528 }, { "epoch": 2.514276172983804, "grad_norm": 0.527132465649919, "learning_rate": 7.763414700281053e-07, "loss": 0.1512, "step": 7529 }, { "epoch": 2.514610118550676, "grad_norm": 0.5662292574706226, "learning_rate": 7.753017364926757e-07, "loss": 0.1618, "step": 7530 }, { "epoch": 2.514944064117549, "grad_norm": 0.5394910775395201, "learning_rate": 7.742626411476617e-07, "loss": 0.1461, "step": 7531 }, { "epoch": 2.5152780096844216, "grad_norm": 0.5124017265010042, "learning_rate": 7.732241841500332e-07, "loss": 0.1497, "step": 7532 }, { "epoch": 2.515611955251294, "grad_norm": 0.5236527981564442, "learning_rate": 7.721863656566597e-07, "loss": 0.1495, "step": 7533 }, { "epoch": 2.5159459008181666, "grad_norm": 0.45904066998357373, "learning_rate": 7.711491858243164e-07, "loss": 0.1397, "step": 7534 }, { "epoch": 2.5162798463850393, "grad_norm": 0.5023271030916909, "learning_rate": 7.701126448096813e-07, "loss": 0.1515, "step": 7535 }, { "epoch": 2.5166137919519116, "grad_norm": 0.5461686273133802, "learning_rate": 7.69076742769338e-07, "loss": 0.1552, "step": 7536 }, { "epoch": 2.5169477375187843, "grad_norm": 0.4674555571007729, "learning_rate": 7.68041479859769e-07, "loss": 0.1399, "step": 7537 }, { "epoch": 2.517281683085657, "grad_norm": 0.5515888155967048, "learning_rate": 7.670068562373656e-07, "loss": 0.1637, "step": 7538 }, { "epoch": 2.5176156286525297, "grad_norm": 0.5957056156113891, "learning_rate": 7.65972872058417e-07, "loss": 0.1634, "step": 7539 }, { "epoch": 2.5179495742194025, "grad_norm": 0.5036132612130619, "learning_rate": 7.6493952747912e-07, "loss": 0.1535, "step": 7540 }, { "epoch": 2.5182835197862747, "grad_norm": 0.5190668427338941, "learning_rate": 7.639068226555751e-07, "loss": 0.1519, "step": 7541 }, { "epoch": 2.5186174653531475, "grad_norm": 0.5037790938432386, "learning_rate": 7.628747577437817e-07, "loss": 0.1524, "step": 7542 }, { "epoch": 2.51895141092002, "grad_norm": 0.5004546836079737, "learning_rate": 7.618433328996466e-07, "loss": 0.1532, "step": 7543 }, { "epoch": 2.5192853564868924, "grad_norm": 0.5501512241433032, "learning_rate": 7.608125482789802e-07, "loss": 0.1653, "step": 7544 }, { "epoch": 2.519619302053765, "grad_norm": 0.6007238389340253, "learning_rate": 7.597824040374918e-07, "loss": 0.1758, "step": 7545 }, { "epoch": 2.519953247620638, "grad_norm": 0.49556874191589473, "learning_rate": 7.587529003307981e-07, "loss": 0.1545, "step": 7546 }, { "epoch": 2.5202871931875106, "grad_norm": 0.5251067061399489, "learning_rate": 7.57724037314419e-07, "loss": 0.1582, "step": 7547 }, { "epoch": 2.520621138754383, "grad_norm": 0.5381560130179013, "learning_rate": 7.566958151437743e-07, "loss": 0.159, "step": 7548 }, { "epoch": 2.5209550843212556, "grad_norm": 0.4986522998380296, "learning_rate": 7.556682339741911e-07, "loss": 0.1498, "step": 7549 }, { "epoch": 2.5212890298881283, "grad_norm": 0.4996807633077003, "learning_rate": 7.546412939608955e-07, "loss": 0.1471, "step": 7550 }, { "epoch": 2.521622975455001, "grad_norm": 0.6054073039657178, "learning_rate": 7.5361499525902e-07, "loss": 0.1613, "step": 7551 }, { "epoch": 2.5219569210218733, "grad_norm": 0.48971374377963084, "learning_rate": 7.525893380235988e-07, "loss": 0.1441, "step": 7552 }, { "epoch": 2.522290866588746, "grad_norm": 0.5391665062112588, "learning_rate": 7.515643224095709e-07, "loss": 0.1617, "step": 7553 }, { "epoch": 2.5226248121556187, "grad_norm": 0.5135011269872816, "learning_rate": 7.505399485717746e-07, "loss": 0.1564, "step": 7554 }, { "epoch": 2.522958757722491, "grad_norm": 0.5283825677531684, "learning_rate": 7.495162166649561e-07, "loss": 0.1627, "step": 7555 }, { "epoch": 2.5232927032893637, "grad_norm": 0.5178565756705336, "learning_rate": 7.484931268437595e-07, "loss": 0.1548, "step": 7556 }, { "epoch": 2.5236266488562364, "grad_norm": 0.5152821898488568, "learning_rate": 7.474706792627362e-07, "loss": 0.1573, "step": 7557 }, { "epoch": 2.523960594423109, "grad_norm": 0.5314921955835342, "learning_rate": 7.464488740763387e-07, "loss": 0.152, "step": 7558 }, { "epoch": 2.524294539989982, "grad_norm": 0.4911118017182647, "learning_rate": 7.454277114389241e-07, "loss": 0.1472, "step": 7559 }, { "epoch": 2.524628485556854, "grad_norm": 0.5249373913173525, "learning_rate": 7.444071915047479e-07, "loss": 0.1576, "step": 7560 }, { "epoch": 2.524962431123727, "grad_norm": 0.5337994119643109, "learning_rate": 7.433873144279751e-07, "loss": 0.1643, "step": 7561 }, { "epoch": 2.5252963766905996, "grad_norm": 0.6119106329771593, "learning_rate": 7.42368080362667e-07, "loss": 0.1687, "step": 7562 }, { "epoch": 2.525630322257472, "grad_norm": 0.5577030632990558, "learning_rate": 7.413494894627926e-07, "loss": 0.1777, "step": 7563 }, { "epoch": 2.5259642678243446, "grad_norm": 0.5203702420065588, "learning_rate": 7.403315418822215e-07, "loss": 0.1485, "step": 7564 }, { "epoch": 2.5262982133912173, "grad_norm": 0.5038351465089509, "learning_rate": 7.393142377747287e-07, "loss": 0.1475, "step": 7565 }, { "epoch": 2.5266321589580896, "grad_norm": 0.48326180757043785, "learning_rate": 7.382975772939866e-07, "loss": 0.1471, "step": 7566 }, { "epoch": 2.5269661045249623, "grad_norm": 0.5366216732331346, "learning_rate": 7.372815605935763e-07, "loss": 0.1617, "step": 7567 }, { "epoch": 2.527300050091835, "grad_norm": 0.5061743583902273, "learning_rate": 7.362661878269772e-07, "loss": 0.1571, "step": 7568 }, { "epoch": 2.5276339956587077, "grad_norm": 0.5307042051406543, "learning_rate": 7.352514591475746e-07, "loss": 0.1586, "step": 7569 }, { "epoch": 2.5279679412255804, "grad_norm": 0.680745479156831, "learning_rate": 7.342373747086557e-07, "loss": 0.1666, "step": 7570 }, { "epoch": 2.5283018867924527, "grad_norm": 0.5242025885825541, "learning_rate": 7.332239346634079e-07, "loss": 0.1573, "step": 7571 }, { "epoch": 2.5286358323593254, "grad_norm": 0.5393298239285648, "learning_rate": 7.322111391649261e-07, "loss": 0.1593, "step": 7572 }, { "epoch": 2.528969777926198, "grad_norm": 0.5185798461075484, "learning_rate": 7.311989883662018e-07, "loss": 0.1588, "step": 7573 }, { "epoch": 2.5293037234930704, "grad_norm": 0.592503504928077, "learning_rate": 7.301874824201349e-07, "loss": 0.1737, "step": 7574 }, { "epoch": 2.529637669059943, "grad_norm": 0.5289070259578227, "learning_rate": 7.29176621479522e-07, "loss": 0.1541, "step": 7575 }, { "epoch": 2.529971614626816, "grad_norm": 0.5399286735957974, "learning_rate": 7.2816640569707e-07, "loss": 0.154, "step": 7576 }, { "epoch": 2.5303055601936886, "grad_norm": 0.5254361535936164, "learning_rate": 7.271568352253804e-07, "loss": 0.1564, "step": 7577 }, { "epoch": 2.5306395057605613, "grad_norm": 0.555206381458067, "learning_rate": 7.261479102169627e-07, "loss": 0.1725, "step": 7578 }, { "epoch": 2.5309734513274336, "grad_norm": 0.5290307988418901, "learning_rate": 7.251396308242259e-07, "loss": 0.1484, "step": 7579 }, { "epoch": 2.5313073968943063, "grad_norm": 0.504515982522606, "learning_rate": 7.241319971994831e-07, "loss": 0.1496, "step": 7580 }, { "epoch": 2.531641342461179, "grad_norm": 0.49559680913923954, "learning_rate": 7.231250094949472e-07, "loss": 0.1461, "step": 7581 }, { "epoch": 2.5319752880280513, "grad_norm": 0.5978243995818697, "learning_rate": 7.221186678627389e-07, "loss": 0.1729, "step": 7582 }, { "epoch": 2.532309233594924, "grad_norm": 0.5606973816163814, "learning_rate": 7.211129724548754e-07, "loss": 0.1581, "step": 7583 }, { "epoch": 2.5326431791617967, "grad_norm": 0.5306020480428629, "learning_rate": 7.201079234232805e-07, "loss": 0.1624, "step": 7584 }, { "epoch": 2.532977124728669, "grad_norm": 0.5223762718058418, "learning_rate": 7.191035209197772e-07, "loss": 0.1497, "step": 7585 }, { "epoch": 2.5333110702955417, "grad_norm": 0.540345518499979, "learning_rate": 7.180997650960936e-07, "loss": 0.16, "step": 7586 }, { "epoch": 2.5336450158624144, "grad_norm": 0.5450407713397211, "learning_rate": 7.170966561038561e-07, "loss": 0.1657, "step": 7587 }, { "epoch": 2.533978961429287, "grad_norm": 0.5391559776735416, "learning_rate": 7.160941940946009e-07, "loss": 0.1577, "step": 7588 }, { "epoch": 2.53431290699616, "grad_norm": 0.5230788079516275, "learning_rate": 7.150923792197579e-07, "loss": 0.1556, "step": 7589 }, { "epoch": 2.534646852563032, "grad_norm": 0.5216974254081449, "learning_rate": 7.140912116306648e-07, "loss": 0.1486, "step": 7590 }, { "epoch": 2.534980798129905, "grad_norm": 0.6321980599276655, "learning_rate": 7.130906914785585e-07, "loss": 0.1547, "step": 7591 }, { "epoch": 2.5353147436967776, "grad_norm": 0.506483027802621, "learning_rate": 7.120908189145798e-07, "loss": 0.1447, "step": 7592 }, { "epoch": 2.53564868926365, "grad_norm": 0.5169580171339891, "learning_rate": 7.110915940897722e-07, "loss": 0.1485, "step": 7593 }, { "epoch": 2.5359826348305226, "grad_norm": 0.5278085164861606, "learning_rate": 7.100930171550785e-07, "loss": 0.1552, "step": 7594 }, { "epoch": 2.5363165803973953, "grad_norm": 0.5350544885579419, "learning_rate": 7.090950882613479e-07, "loss": 0.1638, "step": 7595 }, { "epoch": 2.536650525964268, "grad_norm": 0.5060006641486688, "learning_rate": 7.08097807559327e-07, "loss": 0.148, "step": 7596 }, { "epoch": 2.5369844715311403, "grad_norm": 0.5396958297533546, "learning_rate": 7.071011751996687e-07, "loss": 0.1581, "step": 7597 }, { "epoch": 2.537318417098013, "grad_norm": 0.556091790600878, "learning_rate": 7.061051913329231e-07, "loss": 0.1697, "step": 7598 }, { "epoch": 2.5376523626648857, "grad_norm": 0.5181420886400087, "learning_rate": 7.051098561095493e-07, "loss": 0.1489, "step": 7599 }, { "epoch": 2.5379863082317584, "grad_norm": 0.4768470665798708, "learning_rate": 7.041151696799014e-07, "loss": 0.1363, "step": 7600 }, { "epoch": 2.5383202537986307, "grad_norm": 0.5196161421818665, "learning_rate": 7.031211321942405e-07, "loss": 0.1533, "step": 7601 }, { "epoch": 2.5386541993655034, "grad_norm": 0.5232659742245588, "learning_rate": 7.021277438027258e-07, "loss": 0.1568, "step": 7602 }, { "epoch": 2.538988144932376, "grad_norm": 0.5176128865084071, "learning_rate": 7.011350046554227e-07, "loss": 0.1542, "step": 7603 }, { "epoch": 2.5393220904992484, "grad_norm": 0.5651500840876793, "learning_rate": 7.001429149022915e-07, "loss": 0.1647, "step": 7604 }, { "epoch": 2.539656036066121, "grad_norm": 0.5098263760222316, "learning_rate": 6.991514746932048e-07, "loss": 0.1498, "step": 7605 }, { "epoch": 2.539989981632994, "grad_norm": 0.536465839742826, "learning_rate": 6.981606841779281e-07, "loss": 0.1533, "step": 7606 }, { "epoch": 2.5403239271998665, "grad_norm": 0.5226597424328394, "learning_rate": 6.971705435061333e-07, "loss": 0.1516, "step": 7607 }, { "epoch": 2.5406578727667393, "grad_norm": 0.5804541135430479, "learning_rate": 6.96181052827391e-07, "loss": 0.165, "step": 7608 }, { "epoch": 2.5409918183336115, "grad_norm": 0.5237353158113098, "learning_rate": 6.951922122911775e-07, "loss": 0.1538, "step": 7609 }, { "epoch": 2.5413257639004843, "grad_norm": 0.5080706830627284, "learning_rate": 6.942040220468654e-07, "loss": 0.148, "step": 7610 }, { "epoch": 2.541659709467357, "grad_norm": 0.5140049188783807, "learning_rate": 6.932164822437371e-07, "loss": 0.1557, "step": 7611 }, { "epoch": 2.5419936550342292, "grad_norm": 0.5930589485845684, "learning_rate": 6.922295930309691e-07, "loss": 0.165, "step": 7612 }, { "epoch": 2.542327600601102, "grad_norm": 0.549299614539444, "learning_rate": 6.912433545576446e-07, "loss": 0.1579, "step": 7613 }, { "epoch": 2.5426615461679747, "grad_norm": 0.562070790263285, "learning_rate": 6.90257766972744e-07, "loss": 0.1699, "step": 7614 }, { "epoch": 2.542995491734847, "grad_norm": 0.5227209625996607, "learning_rate": 6.892728304251544e-07, "loss": 0.1612, "step": 7615 }, { "epoch": 2.5433294373017197, "grad_norm": 0.5487714357697593, "learning_rate": 6.8828854506366e-07, "loss": 0.16, "step": 7616 }, { "epoch": 2.5436633828685924, "grad_norm": 0.5139354728097603, "learning_rate": 6.873049110369495e-07, "loss": 0.1473, "step": 7617 }, { "epoch": 2.543997328435465, "grad_norm": 0.495487042816524, "learning_rate": 6.863219284936135e-07, "loss": 0.143, "step": 7618 }, { "epoch": 2.544331274002338, "grad_norm": 0.5240554996595517, "learning_rate": 6.853395975821414e-07, "loss": 0.158, "step": 7619 }, { "epoch": 2.54466521956921, "grad_norm": 0.5291080480147916, "learning_rate": 6.843579184509275e-07, "loss": 0.1543, "step": 7620 }, { "epoch": 2.544999165136083, "grad_norm": 0.49305356830405517, "learning_rate": 6.833768912482636e-07, "loss": 0.1466, "step": 7621 }, { "epoch": 2.5453331107029555, "grad_norm": 0.4901391626064939, "learning_rate": 6.823965161223472e-07, "loss": 0.142, "step": 7622 }, { "epoch": 2.545667056269828, "grad_norm": 0.49462502258324764, "learning_rate": 6.814167932212751e-07, "loss": 0.1467, "step": 7623 }, { "epoch": 2.5460010018367005, "grad_norm": 0.5141470034471365, "learning_rate": 6.804377226930469e-07, "loss": 0.1573, "step": 7624 }, { "epoch": 2.5463349474035732, "grad_norm": 0.5089120455293014, "learning_rate": 6.794593046855613e-07, "loss": 0.1534, "step": 7625 }, { "epoch": 2.546668892970446, "grad_norm": 0.5090069224737189, "learning_rate": 6.784815393466215e-07, "loss": 0.1551, "step": 7626 }, { "epoch": 2.5470028385373187, "grad_norm": 0.518767697913172, "learning_rate": 6.775044268239278e-07, "loss": 0.1463, "step": 7627 }, { "epoch": 2.547336784104191, "grad_norm": 0.5376965894711552, "learning_rate": 6.765279672650865e-07, "loss": 0.1537, "step": 7628 }, { "epoch": 2.5476707296710637, "grad_norm": 0.5257888120052993, "learning_rate": 6.75552160817603e-07, "loss": 0.1505, "step": 7629 }, { "epoch": 2.5480046752379364, "grad_norm": 0.5316600959696877, "learning_rate": 6.745770076288854e-07, "loss": 0.1562, "step": 7630 }, { "epoch": 2.5483386208048087, "grad_norm": 0.5346487077712377, "learning_rate": 6.736025078462399e-07, "loss": 0.1618, "step": 7631 }, { "epoch": 2.5486725663716814, "grad_norm": 0.5060699607051514, "learning_rate": 6.726286616168781e-07, "loss": 0.1487, "step": 7632 }, { "epoch": 2.549006511938554, "grad_norm": 0.5685768568375997, "learning_rate": 6.716554690879085e-07, "loss": 0.1582, "step": 7633 }, { "epoch": 2.5493404575054264, "grad_norm": 0.5704678810184574, "learning_rate": 6.706829304063467e-07, "loss": 0.1697, "step": 7634 }, { "epoch": 2.549674403072299, "grad_norm": 0.5273554677422916, "learning_rate": 6.697110457191031e-07, "loss": 0.1525, "step": 7635 }, { "epoch": 2.550008348639172, "grad_norm": 0.5291739725258179, "learning_rate": 6.687398151729951e-07, "loss": 0.1556, "step": 7636 }, { "epoch": 2.5503422942060445, "grad_norm": 0.49722507845623964, "learning_rate": 6.677692389147355e-07, "loss": 0.1444, "step": 7637 }, { "epoch": 2.5506762397729172, "grad_norm": 0.5218911341868374, "learning_rate": 6.667993170909437e-07, "loss": 0.1534, "step": 7638 }, { "epoch": 2.5510101853397895, "grad_norm": 0.4754035667843285, "learning_rate": 6.658300498481363e-07, "loss": 0.143, "step": 7639 }, { "epoch": 2.5513441309066622, "grad_norm": 0.5646948692153895, "learning_rate": 6.648614373327328e-07, "loss": 0.1604, "step": 7640 }, { "epoch": 2.551678076473535, "grad_norm": 0.5619083057889515, "learning_rate": 6.638934796910545e-07, "loss": 0.1614, "step": 7641 }, { "epoch": 2.5520120220404072, "grad_norm": 0.5161418977249806, "learning_rate": 6.629261770693213e-07, "loss": 0.1586, "step": 7642 }, { "epoch": 2.55234596760728, "grad_norm": 0.5378163686695885, "learning_rate": 6.619595296136577e-07, "loss": 0.1624, "step": 7643 }, { "epoch": 2.5526799131741527, "grad_norm": 0.4863809933072381, "learning_rate": 6.609935374700849e-07, "loss": 0.1483, "step": 7644 }, { "epoch": 2.5530138587410254, "grad_norm": 0.5311657608751162, "learning_rate": 6.600282007845277e-07, "loss": 0.158, "step": 7645 }, { "epoch": 2.5533478043078977, "grad_norm": 0.5304450691241134, "learning_rate": 6.590635197028128e-07, "loss": 0.16, "step": 7646 }, { "epoch": 2.5536817498747704, "grad_norm": 0.5094024551256224, "learning_rate": 6.580994943706675e-07, "loss": 0.161, "step": 7647 }, { "epoch": 2.554015695441643, "grad_norm": 0.4882401458282333, "learning_rate": 6.571361249337161e-07, "loss": 0.1455, "step": 7648 }, { "epoch": 2.554349641008516, "grad_norm": 0.5183521869165364, "learning_rate": 6.561734115374901e-07, "loss": 0.1589, "step": 7649 }, { "epoch": 2.554683586575388, "grad_norm": 0.4919516392113919, "learning_rate": 6.552113543274158e-07, "loss": 0.1466, "step": 7650 }, { "epoch": 2.555017532142261, "grad_norm": 0.4819998758087338, "learning_rate": 6.54249953448825e-07, "loss": 0.1441, "step": 7651 }, { "epoch": 2.5553514777091335, "grad_norm": 0.5147802225320831, "learning_rate": 6.532892090469484e-07, "loss": 0.1563, "step": 7652 }, { "epoch": 2.555685423276006, "grad_norm": 0.5320623190525292, "learning_rate": 6.52329121266918e-07, "loss": 0.1685, "step": 7653 }, { "epoch": 2.5560193688428785, "grad_norm": 0.5253504225252258, "learning_rate": 6.513696902537653e-07, "loss": 0.166, "step": 7654 }, { "epoch": 2.556353314409751, "grad_norm": 0.5296212624005909, "learning_rate": 6.504109161524257e-07, "loss": 0.1573, "step": 7655 }, { "epoch": 2.556687259976624, "grad_norm": 0.5441043227921218, "learning_rate": 6.494527991077304e-07, "loss": 0.162, "step": 7656 }, { "epoch": 2.5570212055434967, "grad_norm": 0.5589909412419124, "learning_rate": 6.484953392644161e-07, "loss": 0.1505, "step": 7657 }, { "epoch": 2.557355151110369, "grad_norm": 0.5117170772974657, "learning_rate": 6.475385367671183e-07, "loss": 0.1514, "step": 7658 }, { "epoch": 2.5576890966772416, "grad_norm": 0.534672576883815, "learning_rate": 6.465823917603742e-07, "loss": 0.1478, "step": 7659 }, { "epoch": 2.5580230422441144, "grad_norm": 0.5313783504428939, "learning_rate": 6.456269043886182e-07, "loss": 0.1628, "step": 7660 }, { "epoch": 2.5583569878109866, "grad_norm": 0.5160524410564582, "learning_rate": 6.446720747961904e-07, "loss": 0.1531, "step": 7661 }, { "epoch": 2.5586909333778594, "grad_norm": 0.47096063309538416, "learning_rate": 6.437179031273272e-07, "loss": 0.1446, "step": 7662 }, { "epoch": 2.559024878944732, "grad_norm": 0.537685199712028, "learning_rate": 6.427643895261687e-07, "loss": 0.1578, "step": 7663 }, { "epoch": 2.5593588245116043, "grad_norm": 0.5315214663957065, "learning_rate": 6.418115341367543e-07, "loss": 0.1524, "step": 7664 }, { "epoch": 2.559692770078477, "grad_norm": 0.5027992814477966, "learning_rate": 6.408593371030231e-07, "loss": 0.1503, "step": 7665 }, { "epoch": 2.56002671564535, "grad_norm": 0.5622015188810098, "learning_rate": 6.399077985688168e-07, "loss": 0.1647, "step": 7666 }, { "epoch": 2.5603606612122225, "grad_norm": 0.5444389360370473, "learning_rate": 6.389569186778754e-07, "loss": 0.1541, "step": 7667 }, { "epoch": 2.560694606779095, "grad_norm": 0.4900284290776257, "learning_rate": 6.38006697573842e-07, "loss": 0.1419, "step": 7668 }, { "epoch": 2.5610285523459675, "grad_norm": 0.5383726943088658, "learning_rate": 6.370571354002553e-07, "loss": 0.1616, "step": 7669 }, { "epoch": 2.56136249791284, "grad_norm": 0.5003686273859252, "learning_rate": 6.361082323005624e-07, "loss": 0.1542, "step": 7670 }, { "epoch": 2.561696443479713, "grad_norm": 0.5788182480274164, "learning_rate": 6.351599884181037e-07, "loss": 0.1705, "step": 7671 }, { "epoch": 2.562030389046585, "grad_norm": 0.5398324783213839, "learning_rate": 6.342124038961234e-07, "loss": 0.1549, "step": 7672 }, { "epoch": 2.562364334613458, "grad_norm": 0.5253027875595168, "learning_rate": 6.332654788777642e-07, "loss": 0.1559, "step": 7673 }, { "epoch": 2.5626982801803306, "grad_norm": 0.5641699534412207, "learning_rate": 6.323192135060713e-07, "loss": 0.1604, "step": 7674 }, { "epoch": 2.5630322257472034, "grad_norm": 0.5536198139751572, "learning_rate": 6.31373607923989e-07, "loss": 0.1575, "step": 7675 }, { "epoch": 2.563366171314076, "grad_norm": 0.5608823102074348, "learning_rate": 6.304286622743627e-07, "loss": 0.1548, "step": 7676 }, { "epoch": 2.5637001168809483, "grad_norm": 0.5147008886535758, "learning_rate": 6.294843766999364e-07, "loss": 0.1644, "step": 7677 }, { "epoch": 2.564034062447821, "grad_norm": 0.5051122394513354, "learning_rate": 6.285407513433572e-07, "loss": 0.1504, "step": 7678 }, { "epoch": 2.564368008014694, "grad_norm": 0.5089827237941057, "learning_rate": 6.275977863471683e-07, "loss": 0.1541, "step": 7679 }, { "epoch": 2.564701953581566, "grad_norm": 0.5833479789208602, "learning_rate": 6.266554818538173e-07, "loss": 0.1733, "step": 7680 }, { "epoch": 2.5650358991484388, "grad_norm": 0.49383382112180885, "learning_rate": 6.257138380056505e-07, "loss": 0.1437, "step": 7681 }, { "epoch": 2.5653698447153115, "grad_norm": 0.5396466133422341, "learning_rate": 6.24772854944915e-07, "loss": 0.164, "step": 7682 }, { "epoch": 2.5657037902821838, "grad_norm": 0.5253514663001958, "learning_rate": 6.238325328137552e-07, "loss": 0.1444, "step": 7683 }, { "epoch": 2.5660377358490565, "grad_norm": 0.5067795635086644, "learning_rate": 6.228928717542205e-07, "loss": 0.1549, "step": 7684 }, { "epoch": 2.566371681415929, "grad_norm": 0.5002550750718825, "learning_rate": 6.219538719082546e-07, "loss": 0.1482, "step": 7685 }, { "epoch": 2.566705626982802, "grad_norm": 0.5179834571022827, "learning_rate": 6.210155334177064e-07, "loss": 0.1456, "step": 7686 }, { "epoch": 2.5670395725496746, "grad_norm": 0.5463570828984406, "learning_rate": 6.200778564243237e-07, "loss": 0.1557, "step": 7687 }, { "epoch": 2.567373518116547, "grad_norm": 0.533683576059173, "learning_rate": 6.19140841069752e-07, "loss": 0.1511, "step": 7688 }, { "epoch": 2.5677074636834196, "grad_norm": 0.5512649791188792, "learning_rate": 6.1820448749554e-07, "loss": 0.1568, "step": 7689 }, { "epoch": 2.5680414092502923, "grad_norm": 0.5157111237491274, "learning_rate": 6.172687958431328e-07, "loss": 0.1499, "step": 7690 }, { "epoch": 2.5683753548171646, "grad_norm": 0.5054894343421307, "learning_rate": 6.163337662538793e-07, "loss": 0.1648, "step": 7691 }, { "epoch": 2.5687093003840373, "grad_norm": 0.5738044330278376, "learning_rate": 6.153993988690266e-07, "loss": 0.1696, "step": 7692 }, { "epoch": 2.56904324595091, "grad_norm": 0.5139856319427727, "learning_rate": 6.144656938297227e-07, "loss": 0.1616, "step": 7693 }, { "epoch": 2.5693771915177828, "grad_norm": 0.529624079244088, "learning_rate": 6.135326512770124e-07, "loss": 0.1554, "step": 7694 }, { "epoch": 2.569711137084655, "grad_norm": 0.532186839181486, "learning_rate": 6.126002713518453e-07, "loss": 0.1634, "step": 7695 }, { "epoch": 2.5700450826515278, "grad_norm": 0.4761868956330876, "learning_rate": 6.116685541950663e-07, "loss": 0.1485, "step": 7696 }, { "epoch": 2.5703790282184005, "grad_norm": 0.5616544286220363, "learning_rate": 6.107374999474236e-07, "loss": 0.1708, "step": 7697 }, { "epoch": 2.570712973785273, "grad_norm": 0.5155203837062691, "learning_rate": 6.098071087495621e-07, "loss": 0.1484, "step": 7698 }, { "epoch": 2.5710469193521455, "grad_norm": 0.5225906206355545, "learning_rate": 6.088773807420312e-07, "loss": 0.1591, "step": 7699 }, { "epoch": 2.571380864919018, "grad_norm": 0.5466633655649673, "learning_rate": 6.07948316065275e-07, "loss": 0.1612, "step": 7700 }, { "epoch": 2.571714810485891, "grad_norm": 0.5276109996287601, "learning_rate": 6.070199148596411e-07, "loss": 0.1543, "step": 7701 }, { "epoch": 2.572048756052763, "grad_norm": 0.5516155736454269, "learning_rate": 6.060921772653738e-07, "loss": 0.1618, "step": 7702 }, { "epoch": 2.572382701619636, "grad_norm": 0.4830812549050556, "learning_rate": 6.051651034226208e-07, "loss": 0.1473, "step": 7703 }, { "epoch": 2.5727166471865086, "grad_norm": 0.5248643245059543, "learning_rate": 6.042386934714245e-07, "loss": 0.156, "step": 7704 }, { "epoch": 2.5730505927533813, "grad_norm": 0.5189440442245319, "learning_rate": 6.03312947551734e-07, "loss": 0.153, "step": 7705 }, { "epoch": 2.573384538320254, "grad_norm": 0.5311742748839656, "learning_rate": 6.02387865803391e-07, "loss": 0.1588, "step": 7706 }, { "epoch": 2.5737184838871263, "grad_norm": 0.5306508792963632, "learning_rate": 6.014634483661419e-07, "loss": 0.1582, "step": 7707 }, { "epoch": 2.574052429453999, "grad_norm": 0.5187201844876844, "learning_rate": 6.005396953796294e-07, "loss": 0.1506, "step": 7708 }, { "epoch": 2.5743863750208718, "grad_norm": 0.50422011790657, "learning_rate": 5.996166069833976e-07, "loss": 0.1513, "step": 7709 }, { "epoch": 2.574720320587744, "grad_norm": 0.5131733480453597, "learning_rate": 5.986941833168913e-07, "loss": 0.1477, "step": 7710 }, { "epoch": 2.5750542661546167, "grad_norm": 0.5346311131575376, "learning_rate": 5.97772424519451e-07, "loss": 0.1571, "step": 7711 }, { "epoch": 2.5753882117214895, "grad_norm": 0.5096595724529045, "learning_rate": 5.96851330730322e-07, "loss": 0.1472, "step": 7712 }, { "epoch": 2.5757221572883617, "grad_norm": 0.4972298467703904, "learning_rate": 5.959309020886433e-07, "loss": 0.1452, "step": 7713 }, { "epoch": 2.5760561028552345, "grad_norm": 0.5252331142466029, "learning_rate": 5.950111387334584e-07, "loss": 0.1486, "step": 7714 }, { "epoch": 2.576390048422107, "grad_norm": 0.5088947881046305, "learning_rate": 5.940920408037081e-07, "loss": 0.1479, "step": 7715 }, { "epoch": 2.57672399398898, "grad_norm": 0.5705717088562303, "learning_rate": 5.93173608438234e-07, "loss": 0.1641, "step": 7716 }, { "epoch": 2.5770579395558526, "grad_norm": 0.5117709453123926, "learning_rate": 5.92255841775774e-07, "loss": 0.1552, "step": 7717 }, { "epoch": 2.577391885122725, "grad_norm": 0.5272255582904503, "learning_rate": 5.913387409549693e-07, "loss": 0.1486, "step": 7718 }, { "epoch": 2.5777258306895976, "grad_norm": 0.5500948968492612, "learning_rate": 5.904223061143577e-07, "loss": 0.1614, "step": 7719 }, { "epoch": 2.5780597762564703, "grad_norm": 0.5422871702657105, "learning_rate": 5.895065373923781e-07, "loss": 0.1612, "step": 7720 }, { "epoch": 2.5783937218233426, "grad_norm": 0.5311152125105255, "learning_rate": 5.885914349273664e-07, "loss": 0.1588, "step": 7721 }, { "epoch": 2.5787276673902153, "grad_norm": 0.5100562305085944, "learning_rate": 5.876769988575631e-07, "loss": 0.1479, "step": 7722 }, { "epoch": 2.579061612957088, "grad_norm": 0.5539085439821296, "learning_rate": 5.867632293211011e-07, "loss": 0.1611, "step": 7723 }, { "epoch": 2.5793955585239607, "grad_norm": 0.5398868308395143, "learning_rate": 5.85850126456019e-07, "loss": 0.1503, "step": 7724 }, { "epoch": 2.5797295040908335, "grad_norm": 0.5099924237947704, "learning_rate": 5.84937690400249e-07, "loss": 0.1465, "step": 7725 }, { "epoch": 2.5800634496577057, "grad_norm": 0.5346486269242904, "learning_rate": 5.840259212916277e-07, "loss": 0.1515, "step": 7726 }, { "epoch": 2.5803973952245784, "grad_norm": 0.5473017465597664, "learning_rate": 5.831148192678853e-07, "loss": 0.1598, "step": 7727 }, { "epoch": 2.580731340791451, "grad_norm": 0.5494409248977583, "learning_rate": 5.822043844666586e-07, "loss": 0.1605, "step": 7728 }, { "epoch": 2.5810652863583234, "grad_norm": 0.5465152627591858, "learning_rate": 5.812946170254763e-07, "loss": 0.1607, "step": 7729 }, { "epoch": 2.581399231925196, "grad_norm": 0.547006910687151, "learning_rate": 5.803855170817718e-07, "loss": 0.1538, "step": 7730 }, { "epoch": 2.581733177492069, "grad_norm": 0.5075310278681255, "learning_rate": 5.794770847728736e-07, "loss": 0.147, "step": 7731 }, { "epoch": 2.582067123058941, "grad_norm": 0.5263962669768227, "learning_rate": 5.785693202360121e-07, "loss": 0.155, "step": 7732 }, { "epoch": 2.582401068625814, "grad_norm": 0.5384871321201414, "learning_rate": 5.776622236083146e-07, "loss": 0.1469, "step": 7733 }, { "epoch": 2.5827350141926866, "grad_norm": 0.5167209940597584, "learning_rate": 5.767557950268099e-07, "loss": 0.1554, "step": 7734 }, { "epoch": 2.5830689597595593, "grad_norm": 0.5339870635390116, "learning_rate": 5.758500346284252e-07, "loss": 0.1633, "step": 7735 }, { "epoch": 2.583402905326432, "grad_norm": 0.5639288536519077, "learning_rate": 5.749449425499843e-07, "loss": 0.1646, "step": 7736 }, { "epoch": 2.5837368508933043, "grad_norm": 0.5703969992909854, "learning_rate": 5.740405189282134e-07, "loss": 0.1692, "step": 7737 }, { "epoch": 2.584070796460177, "grad_norm": 0.49620284954011423, "learning_rate": 5.73136763899737e-07, "loss": 0.1574, "step": 7738 }, { "epoch": 2.5844047420270497, "grad_norm": 0.5336886804139199, "learning_rate": 5.722336776010756e-07, "loss": 0.158, "step": 7739 }, { "epoch": 2.584738687593922, "grad_norm": 0.564930073801695, "learning_rate": 5.713312601686533e-07, "loss": 0.1654, "step": 7740 }, { "epoch": 2.5850726331607947, "grad_norm": 0.5116232221341999, "learning_rate": 5.704295117387904e-07, "loss": 0.1563, "step": 7741 }, { "epoch": 2.5854065787276674, "grad_norm": 0.5427584437481574, "learning_rate": 5.695284324477052e-07, "loss": 0.1608, "step": 7742 }, { "epoch": 2.58574052429454, "grad_norm": 0.49116718894623634, "learning_rate": 5.686280224315189e-07, "loss": 0.1455, "step": 7743 }, { "epoch": 2.5860744698614124, "grad_norm": 0.4711470768494421, "learning_rate": 5.677282818262464e-07, "loss": 0.1355, "step": 7744 }, { "epoch": 2.586408415428285, "grad_norm": 0.5313007777846921, "learning_rate": 5.668292107678048e-07, "loss": 0.1594, "step": 7745 }, { "epoch": 2.586742360995158, "grad_norm": 0.5555662849152072, "learning_rate": 5.659308093920101e-07, "loss": 0.1595, "step": 7746 }, { "epoch": 2.5870763065620306, "grad_norm": 0.5467008793166378, "learning_rate": 5.650330778345776e-07, "loss": 0.1571, "step": 7747 }, { "epoch": 2.587410252128903, "grad_norm": 0.5230866932851201, "learning_rate": 5.641360162311171e-07, "loss": 0.1468, "step": 7748 }, { "epoch": 2.5877441976957756, "grad_norm": 0.49357033180205706, "learning_rate": 5.632396247171429e-07, "loss": 0.1513, "step": 7749 }, { "epoch": 2.5880781432626483, "grad_norm": 0.5286758416649487, "learning_rate": 5.623439034280625e-07, "loss": 0.1546, "step": 7750 }, { "epoch": 2.5884120888295206, "grad_norm": 0.5572992513940127, "learning_rate": 5.614488524991896e-07, "loss": 0.1607, "step": 7751 }, { "epoch": 2.5887460343963933, "grad_norm": 0.5053585297967675, "learning_rate": 5.605544720657286e-07, "loss": 0.1557, "step": 7752 }, { "epoch": 2.589079979963266, "grad_norm": 0.5642451185748008, "learning_rate": 5.596607622627887e-07, "loss": 0.1607, "step": 7753 }, { "epoch": 2.5894139255301387, "grad_norm": 0.5401260030717812, "learning_rate": 5.587677232253725e-07, "loss": 0.1525, "step": 7754 }, { "epoch": 2.5897478710970114, "grad_norm": 0.4954060068895129, "learning_rate": 5.57875355088387e-07, "loss": 0.1487, "step": 7755 }, { "epoch": 2.5900818166638837, "grad_norm": 0.5493464971995057, "learning_rate": 5.569836579866316e-07, "loss": 0.1593, "step": 7756 }, { "epoch": 2.5904157622307564, "grad_norm": 0.5442184853873584, "learning_rate": 5.560926320548105e-07, "loss": 0.1657, "step": 7757 }, { "epoch": 2.590749707797629, "grad_norm": 0.5006345373798716, "learning_rate": 5.552022774275228e-07, "loss": 0.153, "step": 7758 }, { "epoch": 2.5910836533645014, "grad_norm": 0.5469704888693632, "learning_rate": 5.543125942392664e-07, "loss": 0.1609, "step": 7759 }, { "epoch": 2.591417598931374, "grad_norm": 0.494422781019865, "learning_rate": 5.534235826244389e-07, "loss": 0.1493, "step": 7760 }, { "epoch": 2.591751544498247, "grad_norm": 0.5474099861552523, "learning_rate": 5.525352427173369e-07, "loss": 0.1608, "step": 7761 }, { "epoch": 2.592085490065119, "grad_norm": 0.5147734733031684, "learning_rate": 5.516475746521527e-07, "loss": 0.1536, "step": 7762 }, { "epoch": 2.592419435631992, "grad_norm": 0.5449219405525347, "learning_rate": 5.507605785629794e-07, "loss": 0.1661, "step": 7763 }, { "epoch": 2.5927533811988646, "grad_norm": 0.510727938826208, "learning_rate": 5.498742545838104e-07, "loss": 0.1602, "step": 7764 }, { "epoch": 2.5930873267657373, "grad_norm": 0.49424743101460045, "learning_rate": 5.48988602848533e-07, "loss": 0.1438, "step": 7765 }, { "epoch": 2.59342127233261, "grad_norm": 0.5161401437755033, "learning_rate": 5.481036234909365e-07, "loss": 0.1544, "step": 7766 }, { "epoch": 2.5937552178994823, "grad_norm": 0.5239447561809009, "learning_rate": 5.472193166447065e-07, "loss": 0.1617, "step": 7767 }, { "epoch": 2.594089163466355, "grad_norm": 0.48449270455326193, "learning_rate": 5.463356824434285e-07, "loss": 0.1475, "step": 7768 }, { "epoch": 2.5944231090332277, "grad_norm": 0.5139041345004897, "learning_rate": 5.454527210205857e-07, "loss": 0.1523, "step": 7769 }, { "epoch": 2.5947570546001, "grad_norm": 0.5735900578841117, "learning_rate": 5.445704325095613e-07, "loss": 0.1689, "step": 7770 }, { "epoch": 2.5950910001669727, "grad_norm": 0.5461313985515455, "learning_rate": 5.436888170436327e-07, "loss": 0.1595, "step": 7771 }, { "epoch": 2.5954249457338454, "grad_norm": 0.5188701878410461, "learning_rate": 5.428078747559806e-07, "loss": 0.1524, "step": 7772 }, { "epoch": 2.595758891300718, "grad_norm": 0.5380374188365196, "learning_rate": 5.419276057796802e-07, "loss": 0.1522, "step": 7773 }, { "epoch": 2.596092836867591, "grad_norm": 0.5483299645095472, "learning_rate": 5.410480102477067e-07, "loss": 0.1706, "step": 7774 }, { "epoch": 2.596426782434463, "grad_norm": 0.5307610952040726, "learning_rate": 5.401690882929333e-07, "loss": 0.1538, "step": 7775 }, { "epoch": 2.596760728001336, "grad_norm": 0.51063905487827, "learning_rate": 5.392908400481334e-07, "loss": 0.1476, "step": 7776 }, { "epoch": 2.5970946735682086, "grad_norm": 0.4993184207547867, "learning_rate": 5.384132656459745e-07, "loss": 0.1525, "step": 7777 }, { "epoch": 2.597428619135081, "grad_norm": 0.5401423593539602, "learning_rate": 5.375363652190257e-07, "loss": 0.1602, "step": 7778 }, { "epoch": 2.5977625647019535, "grad_norm": 0.5034998004381569, "learning_rate": 5.366601388997522e-07, "loss": 0.1438, "step": 7779 }, { "epoch": 2.5980965102688263, "grad_norm": 0.48936768353800086, "learning_rate": 5.357845868205191e-07, "loss": 0.1464, "step": 7780 }, { "epoch": 2.5984304558356985, "grad_norm": 0.5169519189655872, "learning_rate": 5.34909709113589e-07, "loss": 0.1577, "step": 7781 }, { "epoch": 2.5987644014025713, "grad_norm": 0.5433344403290058, "learning_rate": 5.340355059111213e-07, "loss": 0.1594, "step": 7782 }, { "epoch": 2.599098346969444, "grad_norm": 0.5137287693888043, "learning_rate": 5.331619773451757e-07, "loss": 0.1555, "step": 7783 }, { "epoch": 2.5994322925363167, "grad_norm": 0.543531749495905, "learning_rate": 5.32289123547709e-07, "loss": 0.1582, "step": 7784 }, { "epoch": 2.5997662381031894, "grad_norm": 0.5571790592000054, "learning_rate": 5.314169446505757e-07, "loss": 0.1595, "step": 7785 }, { "epoch": 2.6001001836700617, "grad_norm": 0.5359827903002512, "learning_rate": 5.305454407855282e-07, "loss": 0.1579, "step": 7786 }, { "epoch": 2.6004341292369344, "grad_norm": 0.5327339424957118, "learning_rate": 5.296746120842189e-07, "loss": 0.1504, "step": 7787 }, { "epoch": 2.600768074803807, "grad_norm": 0.532171748237851, "learning_rate": 5.288044586781955e-07, "loss": 0.161, "step": 7788 }, { "epoch": 2.6011020203706794, "grad_norm": 0.5941419346349204, "learning_rate": 5.279349806989054e-07, "loss": 0.1709, "step": 7789 }, { "epoch": 2.601435965937552, "grad_norm": 0.5568723344691379, "learning_rate": 5.270661782776931e-07, "loss": 0.1656, "step": 7790 }, { "epoch": 2.601769911504425, "grad_norm": 0.5382829971872203, "learning_rate": 5.26198051545801e-07, "loss": 0.1664, "step": 7791 }, { "epoch": 2.6021038570712975, "grad_norm": 0.4687505363311479, "learning_rate": 5.253306006343706e-07, "loss": 0.1445, "step": 7792 }, { "epoch": 2.60243780263817, "grad_norm": 0.5365972383720856, "learning_rate": 5.244638256744422e-07, "loss": 0.1563, "step": 7793 }, { "epoch": 2.6027717482050425, "grad_norm": 0.531629913868646, "learning_rate": 5.235977267969489e-07, "loss": 0.1545, "step": 7794 }, { "epoch": 2.6031056937719153, "grad_norm": 0.5512052735448711, "learning_rate": 5.227323041327281e-07, "loss": 0.1651, "step": 7795 }, { "epoch": 2.603439639338788, "grad_norm": 0.5192438240687594, "learning_rate": 5.218675578125099e-07, "loss": 0.1593, "step": 7796 }, { "epoch": 2.6037735849056602, "grad_norm": 0.5341457063593223, "learning_rate": 5.210034879669257e-07, "loss": 0.1606, "step": 7797 }, { "epoch": 2.604107530472533, "grad_norm": 0.4849767604435831, "learning_rate": 5.201400947265029e-07, "loss": 0.1402, "step": 7798 }, { "epoch": 2.6044414760394057, "grad_norm": 0.5178804361452914, "learning_rate": 5.192773782216681e-07, "loss": 0.1586, "step": 7799 }, { "epoch": 2.604775421606278, "grad_norm": 0.5509222801070119, "learning_rate": 5.184153385827434e-07, "loss": 0.1608, "step": 7800 }, { "epoch": 2.6051093671731507, "grad_norm": 0.5055391926707585, "learning_rate": 5.175539759399518e-07, "loss": 0.149, "step": 7801 }, { "epoch": 2.6054433127400234, "grad_norm": 0.4990687566409661, "learning_rate": 5.166932904234101e-07, "loss": 0.1407, "step": 7802 }, { "epoch": 2.605777258306896, "grad_norm": 0.5445711995101518, "learning_rate": 5.158332821631362e-07, "loss": 0.1567, "step": 7803 }, { "epoch": 2.606111203873769, "grad_norm": 0.5676814520874788, "learning_rate": 5.149739512890445e-07, "loss": 0.1597, "step": 7804 }, { "epoch": 2.606445149440641, "grad_norm": 0.49071635784943063, "learning_rate": 5.141152979309477e-07, "loss": 0.1433, "step": 7805 }, { "epoch": 2.606779095007514, "grad_norm": 0.5593982162095931, "learning_rate": 5.132573222185539e-07, "loss": 0.1624, "step": 7806 }, { "epoch": 2.6071130405743865, "grad_norm": 0.5026472702629329, "learning_rate": 5.124000242814725e-07, "loss": 0.1545, "step": 7807 }, { "epoch": 2.607446986141259, "grad_norm": 0.5215743662653198, "learning_rate": 5.115434042492057e-07, "loss": 0.1497, "step": 7808 }, { "epoch": 2.6077809317081315, "grad_norm": 0.5329900153400169, "learning_rate": 5.106874622511576e-07, "loss": 0.1571, "step": 7809 }, { "epoch": 2.6081148772750042, "grad_norm": 0.5110805817757201, "learning_rate": 5.098321984166293e-07, "loss": 0.1449, "step": 7810 }, { "epoch": 2.6084488228418765, "grad_norm": 0.5610884253526799, "learning_rate": 5.089776128748169e-07, "loss": 0.1589, "step": 7811 }, { "epoch": 2.6087827684087492, "grad_norm": 0.5378244387007248, "learning_rate": 5.081237057548166e-07, "loss": 0.157, "step": 7812 }, { "epoch": 2.609116713975622, "grad_norm": 0.5931449426137503, "learning_rate": 5.072704771856201e-07, "loss": 0.1687, "step": 7813 }, { "epoch": 2.6094506595424947, "grad_norm": 0.4805946751761879, "learning_rate": 5.06417927296119e-07, "loss": 0.1457, "step": 7814 }, { "epoch": 2.6097846051093674, "grad_norm": 0.4980998435747012, "learning_rate": 5.055660562150983e-07, "loss": 0.1444, "step": 7815 }, { "epoch": 2.6101185506762397, "grad_norm": 0.531313199842911, "learning_rate": 5.047148640712468e-07, "loss": 0.1558, "step": 7816 }, { "epoch": 2.6104524962431124, "grad_norm": 0.5085288422637632, "learning_rate": 5.038643509931446e-07, "loss": 0.156, "step": 7817 }, { "epoch": 2.610786441809985, "grad_norm": 0.5819241309614197, "learning_rate": 5.030145171092732e-07, "loss": 0.1652, "step": 7818 }, { "epoch": 2.6111203873768574, "grad_norm": 0.5511100906751708, "learning_rate": 5.021653625480089e-07, "loss": 0.1678, "step": 7819 }, { "epoch": 2.61145433294373, "grad_norm": 0.5176029698496152, "learning_rate": 5.013168874376273e-07, "loss": 0.1526, "step": 7820 }, { "epoch": 2.611788278510603, "grad_norm": 0.6173691909315729, "learning_rate": 5.004690919062983e-07, "loss": 0.1704, "step": 7821 }, { "epoch": 2.6121222240774755, "grad_norm": 0.539288615553018, "learning_rate": 4.996219760820947e-07, "loss": 0.1529, "step": 7822 }, { "epoch": 2.6124561696443482, "grad_norm": 0.5481896336994918, "learning_rate": 4.987755400929817e-07, "loss": 0.1596, "step": 7823 }, { "epoch": 2.6127901152112205, "grad_norm": 0.5191460081896047, "learning_rate": 4.97929784066824e-07, "loss": 0.151, "step": 7824 }, { "epoch": 2.6131240607780932, "grad_norm": 0.5264355185799506, "learning_rate": 4.970847081313818e-07, "loss": 0.1561, "step": 7825 }, { "epoch": 2.613458006344966, "grad_norm": 0.5277097383950515, "learning_rate": 4.962403124143156e-07, "loss": 0.1513, "step": 7826 }, { "epoch": 2.613791951911838, "grad_norm": 0.5003241442742133, "learning_rate": 4.953965970431779e-07, "loss": 0.153, "step": 7827 }, { "epoch": 2.614125897478711, "grad_norm": 0.5230613526678042, "learning_rate": 4.945535621454268e-07, "loss": 0.1479, "step": 7828 }, { "epoch": 2.6144598430455837, "grad_norm": 0.5437893134594357, "learning_rate": 4.937112078484086e-07, "loss": 0.1573, "step": 7829 }, { "epoch": 2.614793788612456, "grad_norm": 0.5504021915641389, "learning_rate": 4.928695342793733e-07, "loss": 0.1626, "step": 7830 }, { "epoch": 2.6151277341793286, "grad_norm": 0.5153465880324457, "learning_rate": 4.92028541565464e-07, "loss": 0.1548, "step": 7831 }, { "epoch": 2.6154616797462014, "grad_norm": 0.5340166597075059, "learning_rate": 4.911882298337228e-07, "loss": 0.1628, "step": 7832 }, { "epoch": 2.615795625313074, "grad_norm": 0.5363461554034484, "learning_rate": 4.903485992110901e-07, "loss": 0.1536, "step": 7833 }, { "epoch": 2.616129570879947, "grad_norm": 0.5255258415056727, "learning_rate": 4.895096498243995e-07, "loss": 0.1517, "step": 7834 }, { "epoch": 2.616463516446819, "grad_norm": 0.5926188747420524, "learning_rate": 4.886713818003874e-07, "loss": 0.1681, "step": 7835 }, { "epoch": 2.616797462013692, "grad_norm": 0.5227398551663379, "learning_rate": 4.878337952656809e-07, "loss": 0.1522, "step": 7836 }, { "epoch": 2.6171314075805645, "grad_norm": 0.5447007227711369, "learning_rate": 4.869968903468092e-07, "loss": 0.1609, "step": 7837 }, { "epoch": 2.617465353147437, "grad_norm": 0.5237554693670284, "learning_rate": 4.861606671701946e-07, "loss": 0.1559, "step": 7838 }, { "epoch": 2.6177992987143095, "grad_norm": 0.5797396060428281, "learning_rate": 4.853251258621621e-07, "loss": 0.1723, "step": 7839 }, { "epoch": 2.618133244281182, "grad_norm": 0.5201124138243138, "learning_rate": 4.844902665489265e-07, "loss": 0.159, "step": 7840 }, { "epoch": 2.618467189848055, "grad_norm": 0.514676268580547, "learning_rate": 4.836560893566056e-07, "loss": 0.1527, "step": 7841 }, { "epoch": 2.618801135414927, "grad_norm": 0.5207632098157826, "learning_rate": 4.828225944112097e-07, "loss": 0.1545, "step": 7842 }, { "epoch": 2.6191350809818, "grad_norm": 0.5262119712435508, "learning_rate": 4.819897818386499e-07, "loss": 0.1511, "step": 7843 }, { "epoch": 2.6194690265486726, "grad_norm": 0.5143227879462103, "learning_rate": 4.811576517647299e-07, "loss": 0.148, "step": 7844 }, { "epoch": 2.6198029721155454, "grad_norm": 0.5622213956904916, "learning_rate": 4.803262043151557e-07, "loss": 0.1617, "step": 7845 }, { "epoch": 2.6201369176824176, "grad_norm": 0.5978754119797444, "learning_rate": 4.794954396155249e-07, "loss": 0.1688, "step": 7846 }, { "epoch": 2.6204708632492903, "grad_norm": 0.5177420641465259, "learning_rate": 4.786653577913364e-07, "loss": 0.1503, "step": 7847 }, { "epoch": 2.620804808816163, "grad_norm": 0.5276437691064124, "learning_rate": 4.77835958967981e-07, "loss": 0.1479, "step": 7848 }, { "epoch": 2.6211387543830353, "grad_norm": 0.5363122643509928, "learning_rate": 4.770072432707523e-07, "loss": 0.1486, "step": 7849 }, { "epoch": 2.621472699949908, "grad_norm": 0.5652686462454588, "learning_rate": 4.761792108248342e-07, "loss": 0.1667, "step": 7850 }, { "epoch": 2.6218066455167808, "grad_norm": 0.5305584330166254, "learning_rate": 4.753518617553138e-07, "loss": 0.1591, "step": 7851 }, { "epoch": 2.6221405910836535, "grad_norm": 0.5253770844239304, "learning_rate": 4.745251961871705e-07, "loss": 0.156, "step": 7852 }, { "epoch": 2.622474536650526, "grad_norm": 0.5468790587793163, "learning_rate": 4.736992142452823e-07, "loss": 0.1598, "step": 7853 }, { "epoch": 2.6228084822173985, "grad_norm": 0.5425759314571648, "learning_rate": 4.728739160544227e-07, "loss": 0.153, "step": 7854 }, { "epoch": 2.623142427784271, "grad_norm": 0.5271469526144371, "learning_rate": 4.720493017392641e-07, "loss": 0.152, "step": 7855 }, { "epoch": 2.623476373351144, "grad_norm": 0.49647538460886176, "learning_rate": 4.712253714243725e-07, "loss": 0.1552, "step": 7856 }, { "epoch": 2.623810318918016, "grad_norm": 0.5114529568024085, "learning_rate": 4.7040212523421335e-07, "loss": 0.1582, "step": 7857 }, { "epoch": 2.624144264484889, "grad_norm": 0.5079040388836952, "learning_rate": 4.695795632931477e-07, "loss": 0.1553, "step": 7858 }, { "epoch": 2.6244782100517616, "grad_norm": 0.48829885332335254, "learning_rate": 4.687576857254328e-07, "loss": 0.1446, "step": 7859 }, { "epoch": 2.624812155618634, "grad_norm": 0.4728253401593144, "learning_rate": 4.679364926552238e-07, "loss": 0.1451, "step": 7860 }, { "epoch": 2.6251461011855066, "grad_norm": 0.49875372399465623, "learning_rate": 4.671159842065698e-07, "loss": 0.1521, "step": 7861 }, { "epoch": 2.6254800467523793, "grad_norm": 0.5076996510828747, "learning_rate": 4.662961605034194e-07, "loss": 0.1442, "step": 7862 }, { "epoch": 2.625813992319252, "grad_norm": 0.5122407403110252, "learning_rate": 4.654770216696169e-07, "loss": 0.15, "step": 7863 }, { "epoch": 2.6261479378861248, "grad_norm": 0.5321463370987862, "learning_rate": 4.646585678289034e-07, "loss": 0.1531, "step": 7864 }, { "epoch": 2.626481883452997, "grad_norm": 0.5579442129363771, "learning_rate": 4.6384079910491376e-07, "loss": 0.1634, "step": 7865 }, { "epoch": 2.6268158290198698, "grad_norm": 0.5191857872148999, "learning_rate": 4.630237156211842e-07, "loss": 0.1521, "step": 7866 }, { "epoch": 2.6271497745867425, "grad_norm": 0.537176066982126, "learning_rate": 4.6220731750114267e-07, "loss": 0.1581, "step": 7867 }, { "epoch": 2.6274837201536148, "grad_norm": 0.5506531503517323, "learning_rate": 4.6139160486811663e-07, "loss": 0.1521, "step": 7868 }, { "epoch": 2.6278176657204875, "grad_norm": 0.48213991422421537, "learning_rate": 4.605765778453292e-07, "loss": 0.1415, "step": 7869 }, { "epoch": 2.62815161128736, "grad_norm": 0.6021241570026848, "learning_rate": 4.597622365559007e-07, "loss": 0.1767, "step": 7870 }, { "epoch": 2.628485556854233, "grad_norm": 0.5383826080314704, "learning_rate": 4.5894858112284445e-07, "loss": 0.1555, "step": 7871 }, { "epoch": 2.6288195024211056, "grad_norm": 0.5133156672319394, "learning_rate": 4.581356116690755e-07, "loss": 0.1429, "step": 7872 }, { "epoch": 2.629153447987978, "grad_norm": 0.45845178214461585, "learning_rate": 4.573233283173989e-07, "loss": 0.1351, "step": 7873 }, { "epoch": 2.6294873935548506, "grad_norm": 0.5028748544138391, "learning_rate": 4.5651173119052427e-07, "loss": 0.1568, "step": 7874 }, { "epoch": 2.6298213391217233, "grad_norm": 0.523847639748306, "learning_rate": 4.5570082041104915e-07, "loss": 0.1592, "step": 7875 }, { "epoch": 2.6301552846885956, "grad_norm": 0.5533083847144648, "learning_rate": 4.5489059610147323e-07, "loss": 0.1642, "step": 7876 }, { "epoch": 2.6304892302554683, "grad_norm": 0.5741688540753057, "learning_rate": 4.5408105838418924e-07, "loss": 0.1677, "step": 7877 }, { "epoch": 2.630823175822341, "grad_norm": 0.5274978341726365, "learning_rate": 4.5327220738148823e-07, "loss": 0.1568, "step": 7878 }, { "epoch": 2.6311571213892133, "grad_norm": 0.49812528970220843, "learning_rate": 4.524640432155558e-07, "loss": 0.1485, "step": 7879 }, { "epoch": 2.631491066956086, "grad_norm": 0.5285848686406707, "learning_rate": 4.516565660084754e-07, "loss": 0.1573, "step": 7880 }, { "epoch": 2.6318250125229588, "grad_norm": 0.544151028133756, "learning_rate": 4.5084977588222613e-07, "loss": 0.1612, "step": 7881 }, { "epoch": 2.6321589580898315, "grad_norm": 0.5246947358977803, "learning_rate": 4.500436729586821e-07, "loss": 0.1584, "step": 7882 }, { "epoch": 2.632492903656704, "grad_norm": 0.5428300093923333, "learning_rate": 4.4923825735961604e-07, "loss": 0.165, "step": 7883 }, { "epoch": 2.6328268492235765, "grad_norm": 0.4968719145895019, "learning_rate": 4.484335292066938e-07, "loss": 0.1451, "step": 7884 }, { "epoch": 2.633160794790449, "grad_norm": 0.5269566759015472, "learning_rate": 4.476294886214799e-07, "loss": 0.1479, "step": 7885 }, { "epoch": 2.633494740357322, "grad_norm": 0.5511501631755724, "learning_rate": 4.468261357254339e-07, "loss": 0.1517, "step": 7886 }, { "epoch": 2.633828685924194, "grad_norm": 0.5047376296992027, "learning_rate": 4.46023470639913e-07, "loss": 0.1522, "step": 7887 }, { "epoch": 2.634162631491067, "grad_norm": 0.5883433464805333, "learning_rate": 4.452214934861676e-07, "loss": 0.165, "step": 7888 }, { "epoch": 2.6344965770579396, "grad_norm": 0.5735857232549988, "learning_rate": 4.4442020438534737e-07, "loss": 0.1584, "step": 7889 }, { "epoch": 2.6348305226248123, "grad_norm": 0.5085681511536392, "learning_rate": 4.436196034584944e-07, "loss": 0.1553, "step": 7890 }, { "epoch": 2.6351644681916846, "grad_norm": 0.5134750451469148, "learning_rate": 4.4281969082654976e-07, "loss": 0.1521, "step": 7891 }, { "epoch": 2.6354984137585573, "grad_norm": 0.5501752039523184, "learning_rate": 4.4202046661035e-07, "loss": 0.1586, "step": 7892 }, { "epoch": 2.63583235932543, "grad_norm": 0.4935318484140184, "learning_rate": 4.4122193093062815e-07, "loss": 0.1499, "step": 7893 }, { "epoch": 2.6361663048923027, "grad_norm": 0.5381149489809288, "learning_rate": 4.4042408390801097e-07, "loss": 0.1524, "step": 7894 }, { "epoch": 2.636500250459175, "grad_norm": 0.48618271758439613, "learning_rate": 4.3962692566302366e-07, "loss": 0.1459, "step": 7895 }, { "epoch": 2.6368341960260477, "grad_norm": 0.5542917107201565, "learning_rate": 4.38830456316085e-07, "loss": 0.1621, "step": 7896 }, { "epoch": 2.6371681415929205, "grad_norm": 0.5934052591504712, "learning_rate": 4.38034675987512e-07, "loss": 0.1693, "step": 7897 }, { "epoch": 2.6375020871597927, "grad_norm": 0.6050578302721633, "learning_rate": 4.372395847975164e-07, "loss": 0.1661, "step": 7898 }, { "epoch": 2.6378360327266654, "grad_norm": 0.5416419907657133, "learning_rate": 4.364451828662075e-07, "loss": 0.1546, "step": 7899 }, { "epoch": 2.638169978293538, "grad_norm": 0.5494613936282062, "learning_rate": 4.356514703135867e-07, "loss": 0.1629, "step": 7900 }, { "epoch": 2.638503923860411, "grad_norm": 0.5203439914354697, "learning_rate": 4.348584472595557e-07, "loss": 0.1557, "step": 7901 }, { "epoch": 2.6388378694272836, "grad_norm": 0.5468240192643921, "learning_rate": 4.3406611382390826e-07, "loss": 0.1554, "step": 7902 }, { "epoch": 2.639171814994156, "grad_norm": 0.5255586767892553, "learning_rate": 4.3327447012633695e-07, "loss": 0.1536, "step": 7903 }, { "epoch": 2.6395057605610286, "grad_norm": 0.5141342995815591, "learning_rate": 4.324835162864283e-07, "loss": 0.1467, "step": 7904 }, { "epoch": 2.6398397061279013, "grad_norm": 0.5304691027531064, "learning_rate": 4.31693252423665e-07, "loss": 0.1543, "step": 7905 }, { "epoch": 2.6401736516947736, "grad_norm": 0.5023980897506727, "learning_rate": 4.3090367865742666e-07, "loss": 0.1553, "step": 7906 }, { "epoch": 2.6405075972616463, "grad_norm": 0.5085208473006091, "learning_rate": 4.3011479510698615e-07, "loss": 0.1499, "step": 7907 }, { "epoch": 2.640841542828519, "grad_norm": 0.5327667092975095, "learning_rate": 4.293266018915149e-07, "loss": 0.1558, "step": 7908 }, { "epoch": 2.6411754883953913, "grad_norm": 0.5607038079211306, "learning_rate": 4.2853909913007807e-07, "loss": 0.1694, "step": 7909 }, { "epoch": 2.641509433962264, "grad_norm": 0.5499665080788384, "learning_rate": 4.277522869416384e-07, "loss": 0.1645, "step": 7910 }, { "epoch": 2.6418433795291367, "grad_norm": 0.5290539398893629, "learning_rate": 4.269661654450513e-07, "loss": 0.1529, "step": 7911 }, { "epoch": 2.6421773250960094, "grad_norm": 0.49108126097882077, "learning_rate": 4.261807347590713e-07, "loss": 0.1364, "step": 7912 }, { "epoch": 2.642511270662882, "grad_norm": 0.5284309578302578, "learning_rate": 4.253959950023456e-07, "loss": 0.1559, "step": 7913 }, { "epoch": 2.6428452162297544, "grad_norm": 0.5074179173504525, "learning_rate": 4.246119462934195e-07, "loss": 0.1559, "step": 7914 }, { "epoch": 2.643179161796627, "grad_norm": 0.5065896426675138, "learning_rate": 4.238285887507315e-07, "loss": 0.155, "step": 7915 }, { "epoch": 2.6435131073635, "grad_norm": 0.5039285388619513, "learning_rate": 4.230459224926198e-07, "loss": 0.1531, "step": 7916 }, { "epoch": 2.643847052930372, "grad_norm": 0.5283799716103906, "learning_rate": 4.222639476373119e-07, "loss": 0.1448, "step": 7917 }, { "epoch": 2.644180998497245, "grad_norm": 0.5626566515484439, "learning_rate": 4.2148266430293627e-07, "loss": 0.154, "step": 7918 }, { "epoch": 2.6445149440641176, "grad_norm": 0.5156261937650805, "learning_rate": 4.207020726075145e-07, "loss": 0.1585, "step": 7919 }, { "epoch": 2.6448488896309903, "grad_norm": 0.5440110225549866, "learning_rate": 4.199221726689634e-07, "loss": 0.1593, "step": 7920 }, { "epoch": 2.645182835197863, "grad_norm": 0.570266964313994, "learning_rate": 4.191429646050971e-07, "loss": 0.1671, "step": 7921 }, { "epoch": 2.6455167807647353, "grad_norm": 0.5221227396321205, "learning_rate": 4.1836444853362465e-07, "loss": 0.1506, "step": 7922 }, { "epoch": 2.645850726331608, "grad_norm": 0.497219251017224, "learning_rate": 4.1758662457214884e-07, "loss": 0.1373, "step": 7923 }, { "epoch": 2.6461846718984807, "grad_norm": 0.541261652338555, "learning_rate": 4.1680949283816996e-07, "loss": 0.1576, "step": 7924 }, { "epoch": 2.646518617465353, "grad_norm": 0.6069496137295698, "learning_rate": 4.160330534490814e-07, "loss": 0.158, "step": 7925 }, { "epoch": 2.6468525630322257, "grad_norm": 0.5806445366255634, "learning_rate": 4.152573065221749e-07, "loss": 0.1691, "step": 7926 }, { "epoch": 2.6471865085990984, "grad_norm": 0.5463692447613053, "learning_rate": 4.1448225217463724e-07, "loss": 0.1595, "step": 7927 }, { "epoch": 2.6475204541659707, "grad_norm": 0.493295458582906, "learning_rate": 4.1370789052354644e-07, "loss": 0.1481, "step": 7928 }, { "epoch": 2.6478543997328434, "grad_norm": 0.5120650298647899, "learning_rate": 4.129342216858817e-07, "loss": 0.1541, "step": 7929 }, { "epoch": 2.648188345299716, "grad_norm": 0.5501003904091634, "learning_rate": 4.1216124577851293e-07, "loss": 0.1646, "step": 7930 }, { "epoch": 2.648522290866589, "grad_norm": 0.5994137158090266, "learning_rate": 4.113889629182083e-07, "loss": 0.1743, "step": 7931 }, { "epoch": 2.6488562364334616, "grad_norm": 0.5321524927305665, "learning_rate": 4.106173732216295e-07, "loss": 0.1548, "step": 7932 }, { "epoch": 2.649190182000334, "grad_norm": 0.5392827869317124, "learning_rate": 4.0984647680533564e-07, "loss": 0.1551, "step": 7933 }, { "epoch": 2.6495241275672066, "grad_norm": 0.5489927227974719, "learning_rate": 4.090762737857784e-07, "loss": 0.1408, "step": 7934 }, { "epoch": 2.6498580731340793, "grad_norm": 0.5787246971378018, "learning_rate": 4.0830676427930646e-07, "loss": 0.1632, "step": 7935 }, { "epoch": 2.6501920187009516, "grad_norm": 0.5134979626899223, "learning_rate": 4.0753794840216296e-07, "loss": 0.1568, "step": 7936 }, { "epoch": 2.6505259642678243, "grad_norm": 0.5604056768059363, "learning_rate": 4.067698262704878e-07, "loss": 0.1671, "step": 7937 }, { "epoch": 2.650859909834697, "grad_norm": 0.5144901739013071, "learning_rate": 4.0600239800031136e-07, "loss": 0.1545, "step": 7938 }, { "epoch": 2.6511938554015697, "grad_norm": 0.5203460567839596, "learning_rate": 4.0523566370756774e-07, "loss": 0.1491, "step": 7939 }, { "epoch": 2.651527800968442, "grad_norm": 0.5067411197472471, "learning_rate": 4.044696235080775e-07, "loss": 0.1451, "step": 7940 }, { "epoch": 2.6518617465353147, "grad_norm": 0.5252416651013114, "learning_rate": 4.037042775175626e-07, "loss": 0.1578, "step": 7941 }, { "epoch": 2.6521956921021874, "grad_norm": 0.6042433021264649, "learning_rate": 4.0293962585163493e-07, "loss": 0.1583, "step": 7942 }, { "epoch": 2.65252963766906, "grad_norm": 0.49441861117276115, "learning_rate": 4.02175668625806e-07, "loss": 0.1454, "step": 7943 }, { "epoch": 2.6528635832359324, "grad_norm": 0.5240774832604242, "learning_rate": 4.014124059554786e-07, "loss": 0.1564, "step": 7944 }, { "epoch": 2.653197528802805, "grad_norm": 0.5320277327365568, "learning_rate": 4.006498379559559e-07, "loss": 0.1569, "step": 7945 }, { "epoch": 2.653531474369678, "grad_norm": 0.530128976732692, "learning_rate": 3.9988796474242977e-07, "loss": 0.149, "step": 7946 }, { "epoch": 2.65386541993655, "grad_norm": 0.5389535705555505, "learning_rate": 3.9912678642999134e-07, "loss": 0.1621, "step": 7947 }, { "epoch": 2.654199365503423, "grad_norm": 0.511151976632125, "learning_rate": 3.983663031336249e-07, "loss": 0.1574, "step": 7948 }, { "epoch": 2.6545333110702956, "grad_norm": 0.5122387147490539, "learning_rate": 3.976065149682112e-07, "loss": 0.1439, "step": 7949 }, { "epoch": 2.6548672566371683, "grad_norm": 0.5142207485921048, "learning_rate": 3.968474220485252e-07, "loss": 0.1555, "step": 7950 }, { "epoch": 2.655201202204041, "grad_norm": 0.6583635200461101, "learning_rate": 3.960890244892362e-07, "loss": 0.1672, "step": 7951 }, { "epoch": 2.6555351477709133, "grad_norm": 0.4891978157653677, "learning_rate": 3.953313224049099e-07, "loss": 0.151, "step": 7952 }, { "epoch": 2.655869093337786, "grad_norm": 0.535158030555081, "learning_rate": 3.945743159100046e-07, "loss": 0.1524, "step": 7953 }, { "epoch": 2.6562030389046587, "grad_norm": 0.5370561320841315, "learning_rate": 3.938180051188756e-07, "loss": 0.1574, "step": 7954 }, { "epoch": 2.656536984471531, "grad_norm": 0.5358457597336919, "learning_rate": 3.930623901457736e-07, "loss": 0.1575, "step": 7955 }, { "epoch": 2.6568709300384037, "grad_norm": 0.55410806790667, "learning_rate": 3.92307471104843e-07, "loss": 0.1572, "step": 7956 }, { "epoch": 2.6572048756052764, "grad_norm": 0.5379106010763365, "learning_rate": 3.915532481101225e-07, "loss": 0.1611, "step": 7957 }, { "epoch": 2.6575388211721487, "grad_norm": 0.5380414994383523, "learning_rate": 3.9079972127554657e-07, "loss": 0.1574, "step": 7958 }, { "epoch": 2.6578727667390214, "grad_norm": 0.5306946489832371, "learning_rate": 3.9004689071494406e-07, "loss": 0.1552, "step": 7959 }, { "epoch": 2.658206712305894, "grad_norm": 0.5083058729379549, "learning_rate": 3.8929475654203963e-07, "loss": 0.1509, "step": 7960 }, { "epoch": 2.658540657872767, "grad_norm": 0.5221341981384049, "learning_rate": 3.8854331887045016e-07, "loss": 0.1501, "step": 7961 }, { "epoch": 2.6588746034396396, "grad_norm": 0.5406700255249568, "learning_rate": 3.877925778136921e-07, "loss": 0.1588, "step": 7962 }, { "epoch": 2.659208549006512, "grad_norm": 0.5407476407616856, "learning_rate": 3.870425334851713e-07, "loss": 0.158, "step": 7963 }, { "epoch": 2.6595424945733845, "grad_norm": 0.5643950493663836, "learning_rate": 3.8629318599819224e-07, "loss": 0.1672, "step": 7964 }, { "epoch": 2.6598764401402573, "grad_norm": 0.4808623015760249, "learning_rate": 3.855445354659515e-07, "loss": 0.1474, "step": 7965 }, { "epoch": 2.6602103857071295, "grad_norm": 0.554313310672822, "learning_rate": 3.847965820015426e-07, "loss": 0.1649, "step": 7966 }, { "epoch": 2.6605443312740022, "grad_norm": 0.49048685231778866, "learning_rate": 3.8404932571795115e-07, "loss": 0.1489, "step": 7967 }, { "epoch": 2.660878276840875, "grad_norm": 0.5770239422265581, "learning_rate": 3.833027667280614e-07, "loss": 0.1627, "step": 7968 }, { "epoch": 2.6612122224077477, "grad_norm": 0.5259826441398753, "learning_rate": 3.825569051446476e-07, "loss": 0.1535, "step": 7969 }, { "epoch": 2.6615461679746204, "grad_norm": 0.5236874169599129, "learning_rate": 3.8181174108038286e-07, "loss": 0.164, "step": 7970 }, { "epoch": 2.6618801135414927, "grad_norm": 0.5653557919006781, "learning_rate": 3.810672746478317e-07, "loss": 0.1617, "step": 7971 }, { "epoch": 2.6622140591083654, "grad_norm": 0.5668011201139045, "learning_rate": 3.803235059594551e-07, "loss": 0.1694, "step": 7972 }, { "epoch": 2.662548004675238, "grad_norm": 0.5805953775265659, "learning_rate": 3.795804351276072e-07, "loss": 0.1654, "step": 7973 }, { "epoch": 2.6628819502421104, "grad_norm": 0.5111928242314996, "learning_rate": 3.788380622645382e-07, "loss": 0.1611, "step": 7974 }, { "epoch": 2.663215895808983, "grad_norm": 0.5324550619095361, "learning_rate": 3.780963874823934e-07, "loss": 0.152, "step": 7975 }, { "epoch": 2.663549841375856, "grad_norm": 0.533264878533902, "learning_rate": 3.773554108932093e-07, "loss": 0.1625, "step": 7976 }, { "epoch": 2.663883786942728, "grad_norm": 0.5447286073196415, "learning_rate": 3.7661513260892067e-07, "loss": 0.1549, "step": 7977 }, { "epoch": 2.664217732509601, "grad_norm": 0.5342876864651773, "learning_rate": 3.7587555274135544e-07, "loss": 0.1578, "step": 7978 }, { "epoch": 2.6645516780764735, "grad_norm": 0.5094628488311667, "learning_rate": 3.751366714022342e-07, "loss": 0.1491, "step": 7979 }, { "epoch": 2.6648856236433462, "grad_norm": 0.5346577689522607, "learning_rate": 3.7439848870317487e-07, "loss": 0.1602, "step": 7980 }, { "epoch": 2.665219569210219, "grad_norm": 0.5777180909450799, "learning_rate": 3.7366100475568935e-07, "loss": 0.1643, "step": 7981 }, { "epoch": 2.6655535147770912, "grad_norm": 0.552682877062851, "learning_rate": 3.7292421967118185e-07, "loss": 0.1615, "step": 7982 }, { "epoch": 2.665887460343964, "grad_norm": 0.5213827805993274, "learning_rate": 3.72188133560954e-07, "loss": 0.154, "step": 7983 }, { "epoch": 2.6662214059108367, "grad_norm": 0.5552413572866788, "learning_rate": 3.7145274653619776e-07, "loss": 0.158, "step": 7984 }, { "epoch": 2.666555351477709, "grad_norm": 0.4755073461749714, "learning_rate": 3.7071805870800395e-07, "loss": 0.145, "step": 7985 }, { "epoch": 2.6668892970445817, "grad_norm": 0.5444612344044898, "learning_rate": 3.6998407018735525e-07, "loss": 0.1573, "step": 7986 }, { "epoch": 2.6672232426114544, "grad_norm": 0.5403035052984416, "learning_rate": 3.6925078108513033e-07, "loss": 0.1538, "step": 7987 }, { "epoch": 2.667557188178327, "grad_norm": 0.4817666520309885, "learning_rate": 3.6851819151209947e-07, "loss": 0.1466, "step": 7988 }, { "epoch": 2.6678911337451994, "grad_norm": 0.5338765353416609, "learning_rate": 3.677863015789307e-07, "loss": 0.1503, "step": 7989 }, { "epoch": 2.668225079312072, "grad_norm": 0.4705920965256273, "learning_rate": 3.6705511139618177e-07, "loss": 0.1401, "step": 7990 }, { "epoch": 2.668559024878945, "grad_norm": 0.5343266008666374, "learning_rate": 3.66324621074311e-07, "loss": 0.1522, "step": 7991 }, { "epoch": 2.6688929704458175, "grad_norm": 0.5223355051647398, "learning_rate": 3.6559483072366506e-07, "loss": 0.1549, "step": 7992 }, { "epoch": 2.66922691601269, "grad_norm": 0.5378562856207899, "learning_rate": 3.6486574045448973e-07, "loss": 0.1561, "step": 7993 }, { "epoch": 2.6695608615795625, "grad_norm": 0.5167077818281667, "learning_rate": 3.6413735037691966e-07, "loss": 0.1433, "step": 7994 }, { "epoch": 2.6698948071464352, "grad_norm": 0.5341312147095918, "learning_rate": 3.634096606009896e-07, "loss": 0.1602, "step": 7995 }, { "epoch": 2.6702287527133075, "grad_norm": 0.5225832292402557, "learning_rate": 3.626826712366233e-07, "loss": 0.1424, "step": 7996 }, { "epoch": 2.6705626982801802, "grad_norm": 0.5189043060999742, "learning_rate": 3.6195638239364225e-07, "loss": 0.1518, "step": 7997 }, { "epoch": 2.670896643847053, "grad_norm": 0.5300387936707925, "learning_rate": 3.612307941817622e-07, "loss": 0.1572, "step": 7998 }, { "epoch": 2.6712305894139257, "grad_norm": 0.45974788723278426, "learning_rate": 3.605059067105887e-07, "loss": 0.1372, "step": 7999 }, { "epoch": 2.6715645349807984, "grad_norm": 0.5306840434506943, "learning_rate": 3.59781720089627e-07, "loss": 0.1524, "step": 8000 }, { "epoch": 2.6718984805476707, "grad_norm": 0.5618883169905055, "learning_rate": 3.5905823442827393e-07, "loss": 0.1654, "step": 8001 }, { "epoch": 2.6722324261145434, "grad_norm": 0.5110715136379484, "learning_rate": 3.583354498358188e-07, "loss": 0.1548, "step": 8002 }, { "epoch": 2.672566371681416, "grad_norm": 0.5165177851710339, "learning_rate": 3.576133664214476e-07, "loss": 0.1534, "step": 8003 }, { "epoch": 2.6729003172482884, "grad_norm": 0.5427360752665066, "learning_rate": 3.568919842942409e-07, "loss": 0.1529, "step": 8004 }, { "epoch": 2.673234262815161, "grad_norm": 0.5339557782418821, "learning_rate": 3.5617130356316977e-07, "loss": 0.1593, "step": 8005 }, { "epoch": 2.673568208382034, "grad_norm": 0.556828539859666, "learning_rate": 3.554513243371038e-07, "loss": 0.1574, "step": 8006 }, { "epoch": 2.673902153948906, "grad_norm": 0.538619853386981, "learning_rate": 3.5473204672480224e-07, "loss": 0.1667, "step": 8007 }, { "epoch": 2.674236099515779, "grad_norm": 0.5434513800104122, "learning_rate": 3.5401347083492077e-07, "loss": 0.1479, "step": 8008 }, { "epoch": 2.6745700450826515, "grad_norm": 0.5290721889738227, "learning_rate": 3.532955967760093e-07, "loss": 0.1585, "step": 8009 }, { "epoch": 2.674903990649524, "grad_norm": 0.49434759453802185, "learning_rate": 3.5257842465651226e-07, "loss": 0.1473, "step": 8010 }, { "epoch": 2.675237936216397, "grad_norm": 0.5113588034851804, "learning_rate": 3.5186195458476515e-07, "loss": 0.1493, "step": 8011 }, { "epoch": 2.675571881783269, "grad_norm": 0.5688152683995327, "learning_rate": 3.5114618666900023e-07, "loss": 0.1628, "step": 8012 }, { "epoch": 2.675905827350142, "grad_norm": 0.5039811736577918, "learning_rate": 3.5043112101734166e-07, "loss": 0.151, "step": 8013 }, { "epoch": 2.6762397729170146, "grad_norm": 0.4828551437518733, "learning_rate": 3.4971675773780913e-07, "loss": 0.1432, "step": 8014 }, { "epoch": 2.676573718483887, "grad_norm": 0.5549856306713297, "learning_rate": 3.490030969383157e-07, "loss": 0.1592, "step": 8015 }, { "epoch": 2.6769076640507596, "grad_norm": 0.5379246934070531, "learning_rate": 3.482901387266685e-07, "loss": 0.1618, "step": 8016 }, { "epoch": 2.6772416096176324, "grad_norm": 0.5072517308727558, "learning_rate": 3.475778832105681e-07, "loss": 0.1423, "step": 8017 }, { "epoch": 2.677575555184505, "grad_norm": 0.4998020934814151, "learning_rate": 3.468663304976089e-07, "loss": 0.1552, "step": 8018 }, { "epoch": 2.677909500751378, "grad_norm": 0.5309140304675917, "learning_rate": 3.4615548069527883e-07, "loss": 0.1557, "step": 8019 }, { "epoch": 2.67824344631825, "grad_norm": 0.5489735365073403, "learning_rate": 3.4544533391096093e-07, "loss": 0.154, "step": 8020 }, { "epoch": 2.678577391885123, "grad_norm": 0.6022169835721196, "learning_rate": 3.4473589025193155e-07, "loss": 0.1569, "step": 8021 }, { "epoch": 2.6789113374519955, "grad_norm": 0.5545056781397845, "learning_rate": 3.440271498253589e-07, "loss": 0.1612, "step": 8022 }, { "epoch": 2.6792452830188678, "grad_norm": 0.5777137091933007, "learning_rate": 3.433191127383079e-07, "loss": 0.1696, "step": 8023 }, { "epoch": 2.6795792285857405, "grad_norm": 0.496180545244153, "learning_rate": 3.4261177909773624e-07, "loss": 0.1493, "step": 8024 }, { "epoch": 2.679913174152613, "grad_norm": 0.5524161786976409, "learning_rate": 3.419051490104935e-07, "loss": 0.1599, "step": 8025 }, { "epoch": 2.6802471197194855, "grad_norm": 0.5548262424062345, "learning_rate": 3.4119922258332496e-07, "loss": 0.1652, "step": 8026 }, { "epoch": 2.680581065286358, "grad_norm": 0.5153735852895094, "learning_rate": 3.4049399992287067e-07, "loss": 0.1486, "step": 8027 }, { "epoch": 2.680915010853231, "grad_norm": 0.5127645749070047, "learning_rate": 3.3978948113566056e-07, "loss": 0.1538, "step": 8028 }, { "epoch": 2.6812489564201036, "grad_norm": 0.5397099844954941, "learning_rate": 3.390856663281228e-07, "loss": 0.1595, "step": 8029 }, { "epoch": 2.6815829019869764, "grad_norm": 0.5237062122977899, "learning_rate": 3.3838255560657453e-07, "loss": 0.1513, "step": 8030 }, { "epoch": 2.6819168475538486, "grad_norm": 0.5395864823693816, "learning_rate": 3.3768014907722966e-07, "loss": 0.1548, "step": 8031 }, { "epoch": 2.6822507931207213, "grad_norm": 0.4867347579548099, "learning_rate": 3.369784468461956e-07, "loss": 0.1428, "step": 8032 }, { "epoch": 2.682584738687594, "grad_norm": 0.5527869275755876, "learning_rate": 3.3627744901947313e-07, "loss": 0.1527, "step": 8033 }, { "epoch": 2.6829186842544663, "grad_norm": 0.5023108032433418, "learning_rate": 3.3557715570295523e-07, "loss": 0.1514, "step": 8034 }, { "epoch": 2.683252629821339, "grad_norm": 0.5360412608959365, "learning_rate": 3.3487756700243014e-07, "loss": 0.1604, "step": 8035 }, { "epoch": 2.6835865753882118, "grad_norm": 0.5424139704297755, "learning_rate": 3.341786830235777e-07, "loss": 0.1544, "step": 8036 }, { "epoch": 2.6839205209550845, "grad_norm": 0.5048209881726411, "learning_rate": 3.334805038719735e-07, "loss": 0.1545, "step": 8037 }, { "epoch": 2.6842544665219568, "grad_norm": 0.5486406304826595, "learning_rate": 3.3278302965308593e-07, "loss": 0.1686, "step": 8038 }, { "epoch": 2.6845884120888295, "grad_norm": 0.5300204256810848, "learning_rate": 3.3208626047227687e-07, "loss": 0.1601, "step": 8039 }, { "epoch": 2.684922357655702, "grad_norm": 0.5323499926681304, "learning_rate": 3.313901964348004e-07, "loss": 0.1554, "step": 8040 }, { "epoch": 2.685256303222575, "grad_norm": 0.5355346384229268, "learning_rate": 3.306948376458069e-07, "loss": 0.163, "step": 8041 }, { "epoch": 2.685590248789447, "grad_norm": 0.5435803333076292, "learning_rate": 3.3000018421033675e-07, "loss": 0.1511, "step": 8042 }, { "epoch": 2.68592419435632, "grad_norm": 0.5284470262839093, "learning_rate": 3.29306236233326e-07, "loss": 0.1517, "step": 8043 }, { "epoch": 2.6862581399231926, "grad_norm": 0.5375116338923487, "learning_rate": 3.286129938196048e-07, "loss": 0.149, "step": 8044 }, { "epoch": 2.686592085490065, "grad_norm": 0.5196788029953022, "learning_rate": 3.279204570738936e-07, "loss": 0.1494, "step": 8045 }, { "epoch": 2.6869260310569376, "grad_norm": 0.5989969976700711, "learning_rate": 3.272286261008095e-07, "loss": 0.1594, "step": 8046 }, { "epoch": 2.6872599766238103, "grad_norm": 0.533802596049426, "learning_rate": 3.2653750100486213e-07, "loss": 0.1606, "step": 8047 }, { "epoch": 2.687593922190683, "grad_norm": 0.5985362027338466, "learning_rate": 3.25847081890453e-07, "loss": 0.1645, "step": 8048 }, { "epoch": 2.6879278677575558, "grad_norm": 0.5123641039133764, "learning_rate": 3.251573688618781e-07, "loss": 0.1541, "step": 8049 }, { "epoch": 2.688261813324428, "grad_norm": 0.5633459684436906, "learning_rate": 3.2446836202332854e-07, "loss": 0.1641, "step": 8050 }, { "epoch": 2.6885957588913008, "grad_norm": 0.5383588555865818, "learning_rate": 3.237800614788844e-07, "loss": 0.1596, "step": 8051 }, { "epoch": 2.6889297044581735, "grad_norm": 0.5287494546807463, "learning_rate": 3.230924673325231e-07, "loss": 0.1483, "step": 8052 }, { "epoch": 2.6892636500250457, "grad_norm": 0.5349375464782349, "learning_rate": 3.2240557968811315e-07, "loss": 0.1511, "step": 8053 }, { "epoch": 2.6895975955919185, "grad_norm": 0.5826177066382261, "learning_rate": 3.217193986494177e-07, "loss": 0.1605, "step": 8054 }, { "epoch": 2.689931541158791, "grad_norm": 0.5289988608050177, "learning_rate": 3.2103392432009105e-07, "loss": 0.1425, "step": 8055 }, { "epoch": 2.6902654867256635, "grad_norm": 0.5460451950316173, "learning_rate": 3.203491568036843e-07, "loss": 0.1566, "step": 8056 }, { "epoch": 2.690599432292536, "grad_norm": 0.47868697237246965, "learning_rate": 3.196650962036374e-07, "loss": 0.148, "step": 8057 }, { "epoch": 2.690933377859409, "grad_norm": 0.5647617020216217, "learning_rate": 3.189817426232883e-07, "loss": 0.1633, "step": 8058 }, { "epoch": 2.6912673234262816, "grad_norm": 0.5517400532178436, "learning_rate": 3.182990961658633e-07, "loss": 0.1611, "step": 8059 }, { "epoch": 2.6916012689931543, "grad_norm": 0.5601119667361114, "learning_rate": 3.1761715693448546e-07, "loss": 0.1586, "step": 8060 }, { "epoch": 2.6919352145600266, "grad_norm": 0.5071398052612933, "learning_rate": 3.1693592503216795e-07, "loss": 0.1504, "step": 8061 }, { "epoch": 2.6922691601268993, "grad_norm": 0.5063705615598098, "learning_rate": 3.162554005618218e-07, "loss": 0.1574, "step": 8062 }, { "epoch": 2.692603105693772, "grad_norm": 0.5263427681179844, "learning_rate": 3.155755836262464e-07, "loss": 0.1428, "step": 8063 }, { "epoch": 2.6929370512606443, "grad_norm": 0.5440253785171091, "learning_rate": 3.148964743281363e-07, "loss": 0.1609, "step": 8064 }, { "epoch": 2.693270996827517, "grad_norm": 0.5213216657932812, "learning_rate": 3.1421807277007885e-07, "loss": 0.1525, "step": 8065 }, { "epoch": 2.6936049423943897, "grad_norm": 0.5149269451276092, "learning_rate": 3.1354037905455547e-07, "loss": 0.1546, "step": 8066 }, { "epoch": 2.6939388879612625, "grad_norm": 0.5446427194292313, "learning_rate": 3.1286339328393755e-07, "loss": 0.166, "step": 8067 }, { "epoch": 2.694272833528135, "grad_norm": 0.5545769562726003, "learning_rate": 3.1218711556049494e-07, "loss": 0.1616, "step": 8068 }, { "epoch": 2.6946067790950075, "grad_norm": 0.5233064501341601, "learning_rate": 3.115115459863849e-07, "loss": 0.1564, "step": 8069 }, { "epoch": 2.69494072466188, "grad_norm": 0.5760191622579325, "learning_rate": 3.108366846636618e-07, "loss": 0.162, "step": 8070 }, { "epoch": 2.695274670228753, "grad_norm": 0.52977035763498, "learning_rate": 3.101625316942697e-07, "loss": 0.1517, "step": 8071 }, { "epoch": 2.695608615795625, "grad_norm": 0.5592586244479276, "learning_rate": 3.094890871800488e-07, "loss": 0.1644, "step": 8072 }, { "epoch": 2.695942561362498, "grad_norm": 0.5035640831811913, "learning_rate": 3.0881635122273047e-07, "loss": 0.1459, "step": 8073 }, { "epoch": 2.6962765069293706, "grad_norm": 0.5410337877985646, "learning_rate": 3.0814432392393847e-07, "loss": 0.1599, "step": 8074 }, { "epoch": 2.696610452496243, "grad_norm": 0.5273773774689604, "learning_rate": 3.074730053851921e-07, "loss": 0.1553, "step": 8075 }, { "epoch": 2.6969443980631156, "grad_norm": 0.48100970564002676, "learning_rate": 3.068023957078997e-07, "loss": 0.1479, "step": 8076 }, { "epoch": 2.6972783436299883, "grad_norm": 0.49067868878960497, "learning_rate": 3.061324949933675e-07, "loss": 0.1342, "step": 8077 }, { "epoch": 2.697612289196861, "grad_norm": 0.5583056416030368, "learning_rate": 3.054633033427884e-07, "loss": 0.1661, "step": 8078 }, { "epoch": 2.6979462347637337, "grad_norm": 0.5215917195872181, "learning_rate": 3.0479482085725545e-07, "loss": 0.1538, "step": 8079 }, { "epoch": 2.698280180330606, "grad_norm": 0.5729067492916314, "learning_rate": 3.0412704763774836e-07, "loss": 0.1695, "step": 8080 }, { "epoch": 2.6986141258974787, "grad_norm": 0.5332376143310963, "learning_rate": 3.034599837851432e-07, "loss": 0.1607, "step": 8081 }, { "epoch": 2.6989480714643515, "grad_norm": 0.4942390417812226, "learning_rate": 3.027936294002071e-07, "loss": 0.1476, "step": 8082 }, { "epoch": 2.6992820170312237, "grad_norm": 0.5414771162644846, "learning_rate": 3.021279845836017e-07, "loss": 0.1559, "step": 8083 }, { "epoch": 2.6996159625980964, "grad_norm": 0.5527997876111395, "learning_rate": 3.0146304943587833e-07, "loss": 0.1595, "step": 8084 }, { "epoch": 2.699949908164969, "grad_norm": 0.5062233776814364, "learning_rate": 3.007988240574866e-07, "loss": 0.1522, "step": 8085 }, { "epoch": 2.7002838537318414, "grad_norm": 0.5204497543953683, "learning_rate": 3.0013530854876296e-07, "loss": 0.1471, "step": 8086 }, { "epoch": 2.700617799298714, "grad_norm": 0.5209478565356521, "learning_rate": 2.9947250300994046e-07, "loss": 0.1586, "step": 8087 }, { "epoch": 2.700951744865587, "grad_norm": 0.5312789973530363, "learning_rate": 2.98810407541143e-07, "loss": 0.1588, "step": 8088 }, { "epoch": 2.7012856904324596, "grad_norm": 0.5075938953896761, "learning_rate": 2.9814902224238886e-07, "loss": 0.1484, "step": 8089 }, { "epoch": 2.7016196359993323, "grad_norm": 0.5160275474594337, "learning_rate": 2.974883472135859e-07, "loss": 0.147, "step": 8090 }, { "epoch": 2.7019535815662046, "grad_norm": 0.5232544001518299, "learning_rate": 2.968283825545398e-07, "loss": 0.1496, "step": 8091 }, { "epoch": 2.7022875271330773, "grad_norm": 0.5034655279070774, "learning_rate": 2.961691283649437e-07, "loss": 0.1468, "step": 8092 }, { "epoch": 2.70262147269995, "grad_norm": 0.5510510157293431, "learning_rate": 2.955105847443873e-07, "loss": 0.1611, "step": 8093 }, { "epoch": 2.7029554182668223, "grad_norm": 0.5315473818061955, "learning_rate": 2.9485275179235e-07, "loss": 0.15, "step": 8094 }, { "epoch": 2.703289363833695, "grad_norm": 0.536253149180482, "learning_rate": 2.9419562960820656e-07, "loss": 0.1589, "step": 8095 }, { "epoch": 2.7036233094005677, "grad_norm": 0.5306591757905975, "learning_rate": 2.9353921829122167e-07, "loss": 0.153, "step": 8096 }, { "epoch": 2.7039572549674404, "grad_norm": 0.5436819952810656, "learning_rate": 2.928835179405548e-07, "loss": 0.1599, "step": 8097 }, { "epoch": 2.704291200534313, "grad_norm": 0.5402673068926906, "learning_rate": 2.922285286552579e-07, "loss": 0.1529, "step": 8098 }, { "epoch": 2.7046251461011854, "grad_norm": 0.5004065904993527, "learning_rate": 2.915742505342728e-07, "loss": 0.1487, "step": 8099 }, { "epoch": 2.704959091668058, "grad_norm": 0.5229279567002475, "learning_rate": 2.9092068367643776e-07, "loss": 0.1545, "step": 8100 }, { "epoch": 2.705293037234931, "grad_norm": 0.48729652498503284, "learning_rate": 2.902678281804805e-07, "loss": 0.1414, "step": 8101 }, { "epoch": 2.705626982801803, "grad_norm": 0.4747083845884613, "learning_rate": 2.896156841450232e-07, "loss": 0.1441, "step": 8102 }, { "epoch": 2.705960928368676, "grad_norm": 0.5342365800558092, "learning_rate": 2.8896425166857976e-07, "loss": 0.1543, "step": 8103 }, { "epoch": 2.7062948739355486, "grad_norm": 0.5292850700939392, "learning_rate": 2.8831353084955717e-07, "loss": 0.1537, "step": 8104 }, { "epoch": 2.706628819502421, "grad_norm": 0.5500522283182325, "learning_rate": 2.8766352178625387e-07, "loss": 0.1616, "step": 8105 }, { "epoch": 2.7069627650692936, "grad_norm": 0.5134535972496134, "learning_rate": 2.87014224576862e-07, "loss": 0.1523, "step": 8106 }, { "epoch": 2.7072967106361663, "grad_norm": 0.5471782056045605, "learning_rate": 2.863656393194636e-07, "loss": 0.1475, "step": 8107 }, { "epoch": 2.707630656203039, "grad_norm": 0.5110760153803138, "learning_rate": 2.8571776611203804e-07, "loss": 0.1513, "step": 8108 }, { "epoch": 2.7079646017699117, "grad_norm": 0.5385910497219654, "learning_rate": 2.850706050524521e-07, "loss": 0.1604, "step": 8109 }, { "epoch": 2.708298547336784, "grad_norm": 0.5334396636556735, "learning_rate": 2.844241562384686e-07, "loss": 0.1556, "step": 8110 }, { "epoch": 2.7086324929036567, "grad_norm": 0.5204610290097634, "learning_rate": 2.8377841976773955e-07, "loss": 0.1543, "step": 8111 }, { "epoch": 2.7089664384705294, "grad_norm": 0.5496955299983243, "learning_rate": 2.83133395737813e-07, "loss": 0.1698, "step": 8112 }, { "epoch": 2.7093003840374017, "grad_norm": 0.5536709752852906, "learning_rate": 2.824890842461242e-07, "loss": 0.1603, "step": 8113 }, { "epoch": 2.7096343296042744, "grad_norm": 0.5010653680915442, "learning_rate": 2.818454853900082e-07, "loss": 0.1461, "step": 8114 }, { "epoch": 2.709968275171147, "grad_norm": 0.5267900896237233, "learning_rate": 2.8120259926668505e-07, "loss": 0.152, "step": 8115 }, { "epoch": 2.71030222073802, "grad_norm": 0.5478437369126266, "learning_rate": 2.8056042597327196e-07, "loss": 0.1687, "step": 8116 }, { "epoch": 2.7106361663048926, "grad_norm": 0.5359815228385162, "learning_rate": 2.799189656067758e-07, "loss": 0.156, "step": 8117 }, { "epoch": 2.710970111871765, "grad_norm": 0.5151592514466387, "learning_rate": 2.792782182640974e-07, "loss": 0.153, "step": 8118 }, { "epoch": 2.7113040574386376, "grad_norm": 0.5344553467307052, "learning_rate": 2.7863818404202823e-07, "loss": 0.16, "step": 8119 }, { "epoch": 2.7116380030055103, "grad_norm": 0.5700785819621733, "learning_rate": 2.7799886303725376e-07, "loss": 0.1635, "step": 8120 }, { "epoch": 2.7119719485723826, "grad_norm": 0.5333711626924268, "learning_rate": 2.7736025534635115e-07, "loss": 0.1471, "step": 8121 }, { "epoch": 2.7123058941392553, "grad_norm": 0.5161369376262971, "learning_rate": 2.767223610657888e-07, "loss": 0.1532, "step": 8122 }, { "epoch": 2.712639839706128, "grad_norm": 0.579164591351255, "learning_rate": 2.7608518029192897e-07, "loss": 0.1704, "step": 8123 }, { "epoch": 2.7129737852730003, "grad_norm": 0.556780216361864, "learning_rate": 2.7544871312102485e-07, "loss": 0.1614, "step": 8124 }, { "epoch": 2.713307730839873, "grad_norm": 0.5134496409120298, "learning_rate": 2.7481295964922216e-07, "loss": 0.1524, "step": 8125 }, { "epoch": 2.7136416764067457, "grad_norm": 0.5205512225709891, "learning_rate": 2.7417791997255916e-07, "loss": 0.1511, "step": 8126 }, { "epoch": 2.7139756219736184, "grad_norm": 0.5524009827531768, "learning_rate": 2.735435941869663e-07, "loss": 0.1591, "step": 8127 }, { "epoch": 2.714309567540491, "grad_norm": 0.5320081308305175, "learning_rate": 2.7290998238826584e-07, "loss": 0.1518, "step": 8128 }, { "epoch": 2.7146435131073634, "grad_norm": 0.5274646254287043, "learning_rate": 2.7227708467217227e-07, "loss": 0.1586, "step": 8129 }, { "epoch": 2.714977458674236, "grad_norm": 0.5314673716570912, "learning_rate": 2.71644901134292e-07, "loss": 0.1594, "step": 8130 }, { "epoch": 2.715311404241109, "grad_norm": 0.5680112090581357, "learning_rate": 2.7101343187012354e-07, "loss": 0.166, "step": 8131 }, { "epoch": 2.715645349807981, "grad_norm": 0.5302166166756883, "learning_rate": 2.7038267697505894e-07, "loss": 0.1601, "step": 8132 }, { "epoch": 2.715979295374854, "grad_norm": 0.5334984611340821, "learning_rate": 2.697526365443803e-07, "loss": 0.1538, "step": 8133 }, { "epoch": 2.7163132409417265, "grad_norm": 0.533924310055932, "learning_rate": 2.691233106732627e-07, "loss": 0.1499, "step": 8134 }, { "epoch": 2.716647186508599, "grad_norm": 0.5165454426854755, "learning_rate": 2.684946994567733e-07, "loss": 0.1507, "step": 8135 }, { "epoch": 2.7169811320754715, "grad_norm": 0.5319683743964586, "learning_rate": 2.678668029898712e-07, "loss": 0.1534, "step": 8136 }, { "epoch": 2.7173150776423443, "grad_norm": 0.49055927054446136, "learning_rate": 2.672396213674072e-07, "loss": 0.1473, "step": 8137 }, { "epoch": 2.717649023209217, "grad_norm": 0.5013407224064176, "learning_rate": 2.66613154684125e-07, "loss": 0.1515, "step": 8138 }, { "epoch": 2.7179829687760897, "grad_norm": 0.557314050415258, "learning_rate": 2.659874030346604e-07, "loss": 0.16, "step": 8139 }, { "epoch": 2.718316914342962, "grad_norm": 0.5188704409179276, "learning_rate": 2.653623665135391e-07, "loss": 0.1485, "step": 8140 }, { "epoch": 2.7186508599098347, "grad_norm": 0.4733489482475054, "learning_rate": 2.6473804521518097e-07, "loss": 0.1369, "step": 8141 }, { "epoch": 2.7189848054767074, "grad_norm": 0.5158640174213729, "learning_rate": 2.641144392338968e-07, "loss": 0.1615, "step": 8142 }, { "epoch": 2.7193187510435797, "grad_norm": 0.5175936291666983, "learning_rate": 2.6349154866389e-07, "loss": 0.1487, "step": 8143 }, { "epoch": 2.7196526966104524, "grad_norm": 0.4851398068455481, "learning_rate": 2.6286937359925545e-07, "loss": 0.1484, "step": 8144 }, { "epoch": 2.719986642177325, "grad_norm": 0.4612050712346406, "learning_rate": 2.622479141339801e-07, "loss": 0.1426, "step": 8145 }, { "epoch": 2.720320587744198, "grad_norm": 0.5103788486703124, "learning_rate": 2.6162717036194274e-07, "loss": 0.1545, "step": 8146 }, { "epoch": 2.7206545333110705, "grad_norm": 0.5508718255938416, "learning_rate": 2.610071423769128e-07, "loss": 0.1552, "step": 8147 }, { "epoch": 2.720988478877943, "grad_norm": 0.5482206957546422, "learning_rate": 2.603878302725543e-07, "loss": 0.162, "step": 8148 }, { "epoch": 2.7213224244448155, "grad_norm": 0.5354816153222969, "learning_rate": 2.5976923414242126e-07, "loss": 0.1591, "step": 8149 }, { "epoch": 2.7216563700116883, "grad_norm": 0.508383617507333, "learning_rate": 2.5915135407996005e-07, "loss": 0.1486, "step": 8150 }, { "epoch": 2.7219903155785605, "grad_norm": 0.4846830693165029, "learning_rate": 2.585341901785082e-07, "loss": 0.1463, "step": 8151 }, { "epoch": 2.7223242611454332, "grad_norm": 0.5589614124312149, "learning_rate": 2.579177425312962e-07, "loss": 0.1631, "step": 8152 }, { "epoch": 2.722658206712306, "grad_norm": 0.5085255581901547, "learning_rate": 2.5730201123144503e-07, "loss": 0.156, "step": 8153 }, { "epoch": 2.7229921522791782, "grad_norm": 0.5678891288995872, "learning_rate": 2.566869963719681e-07, "loss": 0.1583, "step": 8154 }, { "epoch": 2.723326097846051, "grad_norm": 0.5160552742462742, "learning_rate": 2.5607269804577174e-07, "loss": 0.1487, "step": 8155 }, { "epoch": 2.7236600434129237, "grad_norm": 0.5679998884899234, "learning_rate": 2.5545911634565266e-07, "loss": 0.1675, "step": 8156 }, { "epoch": 2.7239939889797964, "grad_norm": 0.49259962902633603, "learning_rate": 2.5484625136429854e-07, "loss": 0.1528, "step": 8157 }, { "epoch": 2.724327934546669, "grad_norm": 0.5321498066466733, "learning_rate": 2.5423410319429075e-07, "loss": 0.1574, "step": 8158 }, { "epoch": 2.7246618801135414, "grad_norm": 0.5491822405598497, "learning_rate": 2.5362267192810095e-07, "loss": 0.1565, "step": 8159 }, { "epoch": 2.724995825680414, "grad_norm": 0.5484143142827499, "learning_rate": 2.530119576580936e-07, "loss": 0.1648, "step": 8160 }, { "epoch": 2.725329771247287, "grad_norm": 0.5123547918351172, "learning_rate": 2.5240196047652377e-07, "loss": 0.1561, "step": 8161 }, { "epoch": 2.725663716814159, "grad_norm": 0.5079554577300277, "learning_rate": 2.5179268047553937e-07, "loss": 0.1473, "step": 8162 }, { "epoch": 2.725997662381032, "grad_norm": 0.5048876817838505, "learning_rate": 2.5118411774717857e-07, "loss": 0.1455, "step": 8163 }, { "epoch": 2.7263316079479045, "grad_norm": 0.5285246244585439, "learning_rate": 2.5057627238337324e-07, "loss": 0.1548, "step": 8164 }, { "epoch": 2.7266655535147772, "grad_norm": 0.5301222062922429, "learning_rate": 2.4996914447594334e-07, "loss": 0.1638, "step": 8165 }, { "epoch": 2.72699949908165, "grad_norm": 0.5486195673192618, "learning_rate": 2.493627341166044e-07, "loss": 0.1557, "step": 8166 }, { "epoch": 2.7273334446485222, "grad_norm": 0.5147618754850339, "learning_rate": 2.48757041396962e-07, "loss": 0.1524, "step": 8167 }, { "epoch": 2.727667390215395, "grad_norm": 0.47570026933007525, "learning_rate": 2.481520664085113e-07, "loss": 0.1414, "step": 8168 }, { "epoch": 2.7280013357822677, "grad_norm": 0.5695233117084313, "learning_rate": 2.4754780924264366e-07, "loss": 0.1671, "step": 8169 }, { "epoch": 2.72833528134914, "grad_norm": 0.5044921765852178, "learning_rate": 2.4694426999063657e-07, "loss": 0.1566, "step": 8170 }, { "epoch": 2.7286692269160127, "grad_norm": 0.5059445019609731, "learning_rate": 2.463414487436633e-07, "loss": 0.151, "step": 8171 }, { "epoch": 2.7290031724828854, "grad_norm": 0.5047425599919322, "learning_rate": 2.4573934559278646e-07, "loss": 0.1518, "step": 8172 }, { "epoch": 2.7293371180497576, "grad_norm": 0.5073492122785407, "learning_rate": 2.4513796062896166e-07, "loss": 0.1472, "step": 8173 }, { "epoch": 2.7296710636166304, "grad_norm": 0.5427657012511299, "learning_rate": 2.4453729394303404e-07, "loss": 0.1567, "step": 8174 }, { "epoch": 2.730005009183503, "grad_norm": 0.5360771115077887, "learning_rate": 2.439373456257427e-07, "loss": 0.1562, "step": 8175 }, { "epoch": 2.730338954750376, "grad_norm": 0.5371756975065656, "learning_rate": 2.433381157677156e-07, "loss": 0.1519, "step": 8176 }, { "epoch": 2.7306729003172485, "grad_norm": 0.5073709026677747, "learning_rate": 2.427396044594743e-07, "loss": 0.1501, "step": 8177 }, { "epoch": 2.731006845884121, "grad_norm": 0.6418391887464554, "learning_rate": 2.421418117914298e-07, "loss": 0.1538, "step": 8178 }, { "epoch": 2.7313407914509935, "grad_norm": 0.5285420352825599, "learning_rate": 2.415447378538871e-07, "loss": 0.1597, "step": 8179 }, { "epoch": 2.7316747370178662, "grad_norm": 0.594718310704362, "learning_rate": 2.409483827370407e-07, "loss": 0.1739, "step": 8180 }, { "epoch": 2.7320086825847385, "grad_norm": 0.4977407152777746, "learning_rate": 2.4035274653097797e-07, "loss": 0.1474, "step": 8181 }, { "epoch": 2.732342628151611, "grad_norm": 0.5529928738083278, "learning_rate": 2.3975782932567473e-07, "loss": 0.1595, "step": 8182 }, { "epoch": 2.732676573718484, "grad_norm": 0.5064571077259249, "learning_rate": 2.391636312110024e-07, "loss": 0.146, "step": 8183 }, { "epoch": 2.733010519285356, "grad_norm": 0.5539490252056876, "learning_rate": 2.385701522767192e-07, "loss": 0.1657, "step": 8184 }, { "epoch": 2.733344464852229, "grad_norm": 0.5679411279580505, "learning_rate": 2.3797739261247955e-07, "loss": 0.1631, "step": 8185 }, { "epoch": 2.7336784104191016, "grad_norm": 0.5833955890630944, "learning_rate": 2.3738535230782568e-07, "loss": 0.1643, "step": 8186 }, { "epoch": 2.7340123559859744, "grad_norm": 0.5280431470599531, "learning_rate": 2.3679403145219214e-07, "loss": 0.1478, "step": 8187 }, { "epoch": 2.734346301552847, "grad_norm": 0.5736288252761815, "learning_rate": 2.362034301349053e-07, "loss": 0.1657, "step": 8188 }, { "epoch": 2.7346802471197194, "grad_norm": 0.5530244579608455, "learning_rate": 2.3561354844518157e-07, "loss": 0.1642, "step": 8189 }, { "epoch": 2.735014192686592, "grad_norm": 0.5518902439512378, "learning_rate": 2.3502438647213132e-07, "loss": 0.1647, "step": 8190 }, { "epoch": 2.735348138253465, "grad_norm": 0.5247716871272328, "learning_rate": 2.3443594430475224e-07, "loss": 0.1542, "step": 8191 }, { "epoch": 2.735682083820337, "grad_norm": 0.5177961555436752, "learning_rate": 2.3384822203193714e-07, "loss": 0.1528, "step": 8192 }, { "epoch": 2.73601602938721, "grad_norm": 0.5447187428407865, "learning_rate": 2.332612197424672e-07, "loss": 0.159, "step": 8193 }, { "epoch": 2.7363499749540825, "grad_norm": 0.5400715115866794, "learning_rate": 2.32674937525017e-07, "loss": 0.1518, "step": 8194 }, { "epoch": 2.736683920520955, "grad_norm": 0.518061402756023, "learning_rate": 2.3208937546815026e-07, "loss": 0.1563, "step": 8195 }, { "epoch": 2.737017866087828, "grad_norm": 0.5073899376170143, "learning_rate": 2.3150453366032445e-07, "loss": 0.1533, "step": 8196 }, { "epoch": 2.7373518116547, "grad_norm": 0.5308916049545908, "learning_rate": 2.309204121898856e-07, "loss": 0.1586, "step": 8197 }, { "epoch": 2.737685757221573, "grad_norm": 0.5483262825685362, "learning_rate": 2.3033701114507313e-07, "loss": 0.1642, "step": 8198 }, { "epoch": 2.7380197027884456, "grad_norm": 0.5920765856051325, "learning_rate": 2.2975433061401541e-07, "loss": 0.1734, "step": 8199 }, { "epoch": 2.738353648355318, "grad_norm": 0.5495984257125057, "learning_rate": 2.2917237068473484e-07, "loss": 0.1591, "step": 8200 }, { "epoch": 2.7386875939221906, "grad_norm": 0.543004345814433, "learning_rate": 2.2859113144514055e-07, "loss": 0.1581, "step": 8201 }, { "epoch": 2.7390215394890634, "grad_norm": 0.49894758949368256, "learning_rate": 2.2801061298303895e-07, "loss": 0.154, "step": 8202 }, { "epoch": 2.7393554850559356, "grad_norm": 0.5334926320488186, "learning_rate": 2.2743081538612154e-07, "loss": 0.1525, "step": 8203 }, { "epoch": 2.7396894306228083, "grad_norm": 0.5413584792641948, "learning_rate": 2.268517387419761e-07, "loss": 0.1558, "step": 8204 }, { "epoch": 2.740023376189681, "grad_norm": 0.5686222507446349, "learning_rate": 2.2627338313807645e-07, "loss": 0.1636, "step": 8205 }, { "epoch": 2.7403573217565538, "grad_norm": 0.5348153988527491, "learning_rate": 2.2569574866179166e-07, "loss": 0.1592, "step": 8206 }, { "epoch": 2.7406912673234265, "grad_norm": 0.5642140442258513, "learning_rate": 2.2511883540037805e-07, "loss": 0.1573, "step": 8207 }, { "epoch": 2.7410252128902988, "grad_norm": 0.5024714154716956, "learning_rate": 2.2454264344098865e-07, "loss": 0.1479, "step": 8208 }, { "epoch": 2.7413591584571715, "grad_norm": 0.517979664407899, "learning_rate": 2.2396717287066106e-07, "loss": 0.145, "step": 8209 }, { "epoch": 2.741693104024044, "grad_norm": 0.5289911278223267, "learning_rate": 2.233924237763291e-07, "loss": 0.1537, "step": 8210 }, { "epoch": 2.7420270495909165, "grad_norm": 0.519553175252985, "learning_rate": 2.2281839624481328e-07, "loss": 0.1495, "step": 8211 }, { "epoch": 2.742360995157789, "grad_norm": 0.5335564407178233, "learning_rate": 2.222450903628287e-07, "loss": 0.1557, "step": 8212 }, { "epoch": 2.742694940724662, "grad_norm": 0.5617213957103794, "learning_rate": 2.2167250621697944e-07, "loss": 0.1529, "step": 8213 }, { "epoch": 2.7430288862915346, "grad_norm": 0.533394370268705, "learning_rate": 2.2110064389376017e-07, "loss": 0.1554, "step": 8214 }, { "epoch": 2.7433628318584073, "grad_norm": 0.5242839412788696, "learning_rate": 2.205295034795596e-07, "loss": 0.1545, "step": 8215 }, { "epoch": 2.7436967774252796, "grad_norm": 0.5180468545852273, "learning_rate": 2.1995908506065366e-07, "loss": 0.1562, "step": 8216 }, { "epoch": 2.7440307229921523, "grad_norm": 0.5525667020861501, "learning_rate": 2.1938938872321014e-07, "loss": 0.153, "step": 8217 }, { "epoch": 2.744364668559025, "grad_norm": 0.5713615936439899, "learning_rate": 2.1882041455329073e-07, "loss": 0.1606, "step": 8218 }, { "epoch": 2.7446986141258973, "grad_norm": 0.5264399545928933, "learning_rate": 2.1825216263684336e-07, "loss": 0.1556, "step": 8219 }, { "epoch": 2.74503255969277, "grad_norm": 0.4902137034825769, "learning_rate": 2.176846330597099e-07, "loss": 0.1515, "step": 8220 }, { "epoch": 2.7453665052596428, "grad_norm": 0.5132192422380915, "learning_rate": 2.1711782590762344e-07, "loss": 0.148, "step": 8221 }, { "epoch": 2.745700450826515, "grad_norm": 0.5177457173738288, "learning_rate": 2.165517412662055e-07, "loss": 0.1553, "step": 8222 }, { "epoch": 2.7460343963933878, "grad_norm": 0.5390903052995786, "learning_rate": 2.1598637922097098e-07, "loss": 0.1537, "step": 8223 }, { "epoch": 2.7463683419602605, "grad_norm": 0.5250086474438852, "learning_rate": 2.1542173985732274e-07, "loss": 0.1467, "step": 8224 }, { "epoch": 2.746702287527133, "grad_norm": 0.4842237891444192, "learning_rate": 2.148578232605575e-07, "loss": 0.1439, "step": 8225 }, { "epoch": 2.747036233094006, "grad_norm": 0.4949766102656577, "learning_rate": 2.14294629515861e-07, "loss": 0.1492, "step": 8226 }, { "epoch": 2.747370178660878, "grad_norm": 0.5230593113441051, "learning_rate": 2.137321587083119e-07, "loss": 0.1517, "step": 8227 }, { "epoch": 2.747704124227751, "grad_norm": 0.5123186979953736, "learning_rate": 2.1317041092287548e-07, "loss": 0.1467, "step": 8228 }, { "epoch": 2.7480380697946236, "grad_norm": 0.5123779178667591, "learning_rate": 2.126093862444123e-07, "loss": 0.1604, "step": 8229 }, { "epoch": 2.748372015361496, "grad_norm": 0.5225226900228949, "learning_rate": 2.1204908475767005e-07, "loss": 0.1521, "step": 8230 }, { "epoch": 2.7487059609283686, "grad_norm": 0.5023831154513653, "learning_rate": 2.114895065472905e-07, "loss": 0.1449, "step": 8231 }, { "epoch": 2.7490399064952413, "grad_norm": 0.5847888545067589, "learning_rate": 2.109306516978038e-07, "loss": 0.1677, "step": 8232 }, { "epoch": 2.7493738520621136, "grad_norm": 0.49554499992889467, "learning_rate": 2.1037252029363242e-07, "loss": 0.1469, "step": 8233 }, { "epoch": 2.7497077976289863, "grad_norm": 0.5556392349656654, "learning_rate": 2.098151124190867e-07, "loss": 0.164, "step": 8234 }, { "epoch": 2.750041743195859, "grad_norm": 0.5186943346251153, "learning_rate": 2.092584281583715e-07, "loss": 0.1501, "step": 8235 }, { "epoch": 2.7503756887627318, "grad_norm": 0.5265346475920051, "learning_rate": 2.0870246759557956e-07, "loss": 0.1491, "step": 8236 }, { "epoch": 2.7507096343296045, "grad_norm": 0.5111789881374325, "learning_rate": 2.0814723081469535e-07, "loss": 0.1546, "step": 8237 }, { "epoch": 2.7510435798964767, "grad_norm": 0.5401318445314278, "learning_rate": 2.0759271789959513e-07, "loss": 0.1534, "step": 8238 }, { "epoch": 2.7513775254633495, "grad_norm": 0.534377040910652, "learning_rate": 2.0703892893404299e-07, "loss": 0.1541, "step": 8239 }, { "epoch": 2.751711471030222, "grad_norm": 0.5721615250971579, "learning_rate": 2.064858640016959e-07, "loss": 0.1592, "step": 8240 }, { "epoch": 2.7520454165970945, "grad_norm": 0.5306616003142766, "learning_rate": 2.0593352318610093e-07, "loss": 0.1585, "step": 8241 }, { "epoch": 2.752379362163967, "grad_norm": 0.502808647723062, "learning_rate": 2.0538190657069523e-07, "loss": 0.1422, "step": 8242 }, { "epoch": 2.75271330773084, "grad_norm": 0.5580436841293485, "learning_rate": 2.048310142388077e-07, "loss": 0.161, "step": 8243 }, { "epoch": 2.7530472532977126, "grad_norm": 0.5351425052272484, "learning_rate": 2.0428084627365729e-07, "loss": 0.149, "step": 8244 }, { "epoch": 2.7533811988645853, "grad_norm": 0.4946587104254831, "learning_rate": 2.0373140275835203e-07, "loss": 0.1361, "step": 8245 }, { "epoch": 2.7537151444314576, "grad_norm": 0.5167893750141161, "learning_rate": 2.0318268377589323e-07, "loss": 0.1504, "step": 8246 }, { "epoch": 2.7540490899983303, "grad_norm": 0.550129526570199, "learning_rate": 2.026346894091702e-07, "loss": 0.1593, "step": 8247 }, { "epoch": 2.754383035565203, "grad_norm": 0.5486648519214529, "learning_rate": 2.0208741974096445e-07, "loss": 0.1584, "step": 8248 }, { "epoch": 2.7547169811320753, "grad_norm": 0.5266687534141231, "learning_rate": 2.0154087485394713e-07, "loss": 0.1468, "step": 8249 }, { "epoch": 2.755050926698948, "grad_norm": 0.5303620209898399, "learning_rate": 2.0099505483068216e-07, "loss": 0.1524, "step": 8250 }, { "epoch": 2.7553848722658207, "grad_norm": 0.5017401492165686, "learning_rate": 2.0044995975361914e-07, "loss": 0.1476, "step": 8251 }, { "epoch": 2.755718817832693, "grad_norm": 0.5512204576696247, "learning_rate": 1.9990558970510388e-07, "loss": 0.1627, "step": 8252 }, { "epoch": 2.7560527633995657, "grad_norm": 0.5328549457695625, "learning_rate": 1.9936194476736782e-07, "loss": 0.1469, "step": 8253 }, { "epoch": 2.7563867089664384, "grad_norm": 0.5332369435035683, "learning_rate": 1.9881902502253525e-07, "loss": 0.1586, "step": 8254 }, { "epoch": 2.756720654533311, "grad_norm": 0.5442681969901398, "learning_rate": 1.9827683055262114e-07, "loss": 0.1625, "step": 8255 }, { "epoch": 2.757054600100184, "grad_norm": 0.5073616134036689, "learning_rate": 1.977353614395311e-07, "loss": 0.1479, "step": 8256 }, { "epoch": 2.757388545667056, "grad_norm": 0.5378551125440103, "learning_rate": 1.971946177650591e-07, "loss": 0.1518, "step": 8257 }, { "epoch": 2.757722491233929, "grad_norm": 0.549121416484626, "learning_rate": 1.966545996108915e-07, "loss": 0.1548, "step": 8258 }, { "epoch": 2.7580564368008016, "grad_norm": 0.550399829623678, "learning_rate": 1.961153070586036e-07, "loss": 0.1606, "step": 8259 }, { "epoch": 2.758390382367674, "grad_norm": 0.5449800142650917, "learning_rate": 1.9557674018966244e-07, "loss": 0.1538, "step": 8260 }, { "epoch": 2.7587243279345466, "grad_norm": 0.5752625807188275, "learning_rate": 1.9503889908542572e-07, "loss": 0.1673, "step": 8261 }, { "epoch": 2.7590582735014193, "grad_norm": 0.5737368577234173, "learning_rate": 1.9450178382713957e-07, "loss": 0.1677, "step": 8262 }, { "epoch": 2.759392219068292, "grad_norm": 0.5051673849530389, "learning_rate": 1.9396539449594131e-07, "loss": 0.1503, "step": 8263 }, { "epoch": 2.7597261646351647, "grad_norm": 0.4914480531561124, "learning_rate": 1.9342973117286056e-07, "loss": 0.144, "step": 8264 }, { "epoch": 2.760060110202037, "grad_norm": 0.5631823006449259, "learning_rate": 1.9289479393881317e-07, "loss": 0.165, "step": 8265 }, { "epoch": 2.7603940557689097, "grad_norm": 0.5355383401496868, "learning_rate": 1.9236058287460946e-07, "loss": 0.1565, "step": 8266 }, { "epoch": 2.7607280013357824, "grad_norm": 0.5450111267966022, "learning_rate": 1.9182709806094823e-07, "loss": 0.1658, "step": 8267 }, { "epoch": 2.7610619469026547, "grad_norm": 0.5399961091071, "learning_rate": 1.9129433957841781e-07, "loss": 0.1513, "step": 8268 }, { "epoch": 2.7613958924695274, "grad_norm": 0.5322165741350056, "learning_rate": 1.907623075074988e-07, "loss": 0.1516, "step": 8269 }, { "epoch": 2.7617298380364, "grad_norm": 0.5481681573013868, "learning_rate": 1.9023100192855914e-07, "loss": 0.1608, "step": 8270 }, { "epoch": 2.7620637836032724, "grad_norm": 0.544718465373997, "learning_rate": 1.897004229218602e-07, "loss": 0.1524, "step": 8271 }, { "epoch": 2.762397729170145, "grad_norm": 0.5542344358536933, "learning_rate": 1.8917057056755172e-07, "loss": 0.1545, "step": 8272 }, { "epoch": 2.762731674737018, "grad_norm": 0.4983410315703037, "learning_rate": 1.8864144494567528e-07, "loss": 0.1503, "step": 8273 }, { "epoch": 2.7630656203038906, "grad_norm": 0.5290491211012668, "learning_rate": 1.881130461361591e-07, "loss": 0.1575, "step": 8274 }, { "epoch": 2.7633995658707633, "grad_norm": 0.5105092858702401, "learning_rate": 1.8758537421882662e-07, "loss": 0.1462, "step": 8275 }, { "epoch": 2.7637335114376356, "grad_norm": 0.5636688731155922, "learning_rate": 1.870584292733868e-07, "loss": 0.1596, "step": 8276 }, { "epoch": 2.7640674570045083, "grad_norm": 0.547183058318822, "learning_rate": 1.8653221137944155e-07, "loss": 0.1678, "step": 8277 }, { "epoch": 2.764401402571381, "grad_norm": 0.49128257503638373, "learning_rate": 1.8600672061648283e-07, "loss": 0.1442, "step": 8278 }, { "epoch": 2.7647353481382533, "grad_norm": 0.5670280593322085, "learning_rate": 1.8548195706389272e-07, "loss": 0.1703, "step": 8279 }, { "epoch": 2.765069293705126, "grad_norm": 0.4735398261466528, "learning_rate": 1.849579208009411e-07, "loss": 0.1331, "step": 8280 }, { "epoch": 2.7654032392719987, "grad_norm": 0.5617939142612081, "learning_rate": 1.844346119067919e-07, "loss": 0.167, "step": 8281 }, { "epoch": 2.765737184838871, "grad_norm": 0.5642449533519758, "learning_rate": 1.8391203046049522e-07, "loss": 0.1643, "step": 8282 }, { "epoch": 2.7660711304057437, "grad_norm": 0.5244528138173514, "learning_rate": 1.8339017654099344e-07, "loss": 0.1534, "step": 8283 }, { "epoch": 2.7664050759726164, "grad_norm": 0.5292603150792725, "learning_rate": 1.828690502271202e-07, "loss": 0.1611, "step": 8284 }, { "epoch": 2.766739021539489, "grad_norm": 0.5377384654986338, "learning_rate": 1.823486515975964e-07, "loss": 0.1575, "step": 8285 }, { "epoch": 2.767072967106362, "grad_norm": 0.5817154299132605, "learning_rate": 1.818289807310347e-07, "loss": 0.1533, "step": 8286 }, { "epoch": 2.767406912673234, "grad_norm": 0.5228418557171426, "learning_rate": 1.813100377059379e-07, "loss": 0.1563, "step": 8287 }, { "epoch": 2.767740858240107, "grad_norm": 0.5760879601371929, "learning_rate": 1.8079182260069773e-07, "loss": 0.1561, "step": 8288 }, { "epoch": 2.7680748038069796, "grad_norm": 0.49105535586314847, "learning_rate": 1.8027433549359764e-07, "loss": 0.1542, "step": 8289 }, { "epoch": 2.768408749373852, "grad_norm": 0.5680155038927197, "learning_rate": 1.7975757646280955e-07, "loss": 0.1668, "step": 8290 }, { "epoch": 2.7687426949407246, "grad_norm": 0.5082235918459275, "learning_rate": 1.792415455863955e-07, "loss": 0.1557, "step": 8291 }, { "epoch": 2.7690766405075973, "grad_norm": 0.5112458964828165, "learning_rate": 1.7872624294230924e-07, "loss": 0.1538, "step": 8292 }, { "epoch": 2.76941058607447, "grad_norm": 0.5128409883230706, "learning_rate": 1.7821166860839179e-07, "loss": 0.1502, "step": 8293 }, { "epoch": 2.7697445316413427, "grad_norm": 0.5237625902927323, "learning_rate": 1.7769782266237767e-07, "loss": 0.1508, "step": 8294 }, { "epoch": 2.770078477208215, "grad_norm": 0.4762761343236909, "learning_rate": 1.7718470518188645e-07, "loss": 0.1441, "step": 8295 }, { "epoch": 2.7704124227750877, "grad_norm": 0.5057082621318426, "learning_rate": 1.7667231624443393e-07, "loss": 0.1431, "step": 8296 }, { "epoch": 2.7707463683419604, "grad_norm": 0.5366015119904906, "learning_rate": 1.7616065592742038e-07, "loss": 0.1633, "step": 8297 }, { "epoch": 2.7710803139088327, "grad_norm": 0.5063382829156947, "learning_rate": 1.7564972430813899e-07, "loss": 0.1511, "step": 8298 }, { "epoch": 2.7714142594757054, "grad_norm": 0.584640844229602, "learning_rate": 1.751395214637708e-07, "loss": 0.165, "step": 8299 }, { "epoch": 2.771748205042578, "grad_norm": 0.51047234479896, "learning_rate": 1.7463004747138967e-07, "loss": 0.1502, "step": 8300 }, { "epoch": 2.7720821506094504, "grad_norm": 0.5145726980495454, "learning_rate": 1.7412130240795578e-07, "loss": 0.1496, "step": 8301 }, { "epoch": 2.772416096176323, "grad_norm": 0.5479476488710432, "learning_rate": 1.736132863503226e-07, "loss": 0.1591, "step": 8302 }, { "epoch": 2.772750041743196, "grad_norm": 0.5223980288241024, "learning_rate": 1.7310599937523153e-07, "loss": 0.16, "step": 8303 }, { "epoch": 2.7730839873100686, "grad_norm": 0.5203609186908762, "learning_rate": 1.7259944155931407e-07, "loss": 0.154, "step": 8304 }, { "epoch": 2.7734179328769413, "grad_norm": 0.5262438153454855, "learning_rate": 1.720936129790912e-07, "loss": 0.1534, "step": 8305 }, { "epoch": 2.7737518784438135, "grad_norm": 0.5572448507311574, "learning_rate": 1.7158851371097518e-07, "loss": 0.164, "step": 8306 }, { "epoch": 2.7740858240106863, "grad_norm": 0.5516458484449425, "learning_rate": 1.7108414383126658e-07, "loss": 0.1575, "step": 8307 }, { "epoch": 2.774419769577559, "grad_norm": 0.5305221309834752, "learning_rate": 1.7058050341615783e-07, "loss": 0.1504, "step": 8308 }, { "epoch": 2.7747537151444313, "grad_norm": 0.5465339511703754, "learning_rate": 1.7007759254172752e-07, "loss": 0.1597, "step": 8309 }, { "epoch": 2.775087660711304, "grad_norm": 0.5236311349194479, "learning_rate": 1.6957541128394817e-07, "loss": 0.151, "step": 8310 }, { "epoch": 2.7754216062781767, "grad_norm": 0.5794917232299244, "learning_rate": 1.6907395971867858e-07, "loss": 0.165, "step": 8311 }, { "epoch": 2.7757555518450494, "grad_norm": 0.5111090214492914, "learning_rate": 1.685732379216698e-07, "loss": 0.1537, "step": 8312 }, { "epoch": 2.776089497411922, "grad_norm": 0.5182270741000585, "learning_rate": 1.680732459685619e-07, "loss": 0.1485, "step": 8313 }, { "epoch": 2.7764234429787944, "grad_norm": 0.5299871536028107, "learning_rate": 1.6757398393488443e-07, "loss": 0.1615, "step": 8314 }, { "epoch": 2.776757388545667, "grad_norm": 0.5229619474271693, "learning_rate": 1.6707545189605657e-07, "loss": 0.1549, "step": 8315 }, { "epoch": 2.77709133411254, "grad_norm": 0.5312633993540525, "learning_rate": 1.6657764992738746e-07, "loss": 0.1508, "step": 8316 }, { "epoch": 2.777425279679412, "grad_norm": 0.5207042176983809, "learning_rate": 1.6608057810407586e-07, "loss": 0.1548, "step": 8317 }, { "epoch": 2.777759225246285, "grad_norm": 0.509134113266697, "learning_rate": 1.6558423650121003e-07, "loss": 0.149, "step": 8318 }, { "epoch": 2.7780931708131575, "grad_norm": 0.5635185316038204, "learning_rate": 1.6508862519376945e-07, "loss": 0.166, "step": 8319 }, { "epoch": 2.77842711638003, "grad_norm": 0.5039009942903678, "learning_rate": 1.6459374425662088e-07, "loss": 0.152, "step": 8320 }, { "epoch": 2.7787610619469025, "grad_norm": 0.5185011394458492, "learning_rate": 1.6409959376452289e-07, "loss": 0.1497, "step": 8321 }, { "epoch": 2.7790950075137753, "grad_norm": 0.5035741229676389, "learning_rate": 1.6360617379212185e-07, "loss": 0.147, "step": 8322 }, { "epoch": 2.779428953080648, "grad_norm": 0.5234105413015845, "learning_rate": 1.6311348441395535e-07, "loss": 0.1525, "step": 8323 }, { "epoch": 2.7797628986475207, "grad_norm": 0.5402213344741763, "learning_rate": 1.6262152570444777e-07, "loss": 0.1567, "step": 8324 }, { "epoch": 2.780096844214393, "grad_norm": 0.500714119408257, "learning_rate": 1.6213029773791912e-07, "loss": 0.1503, "step": 8325 }, { "epoch": 2.7804307897812657, "grad_norm": 0.5327017668640379, "learning_rate": 1.6163980058857164e-07, "loss": 0.159, "step": 8326 }, { "epoch": 2.7807647353481384, "grad_norm": 0.5552130569413134, "learning_rate": 1.6115003433050336e-07, "loss": 0.1523, "step": 8327 }, { "epoch": 2.7810986809150107, "grad_norm": 0.5036265248479774, "learning_rate": 1.6066099903769726e-07, "loss": 0.1443, "step": 8328 }, { "epoch": 2.7814326264818834, "grad_norm": 0.5280869369504387, "learning_rate": 1.6017269478402875e-07, "loss": 0.1594, "step": 8329 }, { "epoch": 2.781766572048756, "grad_norm": 0.5299118807830349, "learning_rate": 1.59685121643261e-07, "loss": 0.1493, "step": 8330 }, { "epoch": 2.7821005176156284, "grad_norm": 0.5115595941536983, "learning_rate": 1.5919827968904955e-07, "loss": 0.1498, "step": 8331 }, { "epoch": 2.782434463182501, "grad_norm": 0.5517706631282778, "learning_rate": 1.5871216899493612e-07, "loss": 0.1623, "step": 8332 }, { "epoch": 2.782768408749374, "grad_norm": 0.5322940156215018, "learning_rate": 1.5822678963435479e-07, "loss": 0.1637, "step": 8333 }, { "epoch": 2.7831023543162465, "grad_norm": 0.5279044532161087, "learning_rate": 1.5774214168062575e-07, "loss": 0.1544, "step": 8334 }, { "epoch": 2.7834362998831192, "grad_norm": 0.5358377006180688, "learning_rate": 1.5725822520696267e-07, "loss": 0.159, "step": 8335 }, { "epoch": 2.7837702454499915, "grad_norm": 0.5181269469955557, "learning_rate": 1.567750402864654e-07, "loss": 0.149, "step": 8336 }, { "epoch": 2.7841041910168642, "grad_norm": 0.5627060721708065, "learning_rate": 1.5629258699212613e-07, "loss": 0.1598, "step": 8337 }, { "epoch": 2.784438136583737, "grad_norm": 0.5672074227846221, "learning_rate": 1.5581086539682433e-07, "loss": 0.1647, "step": 8338 }, { "epoch": 2.7847720821506092, "grad_norm": 0.5532121190782273, "learning_rate": 1.5532987557332902e-07, "loss": 0.1606, "step": 8339 }, { "epoch": 2.785106027717482, "grad_norm": 0.5144356673510838, "learning_rate": 1.5484961759430095e-07, "loss": 0.1483, "step": 8340 }, { "epoch": 2.7854399732843547, "grad_norm": 0.5476944503372652, "learning_rate": 1.5437009153228766e-07, "loss": 0.1581, "step": 8341 }, { "epoch": 2.7857739188512274, "grad_norm": 0.5373962416791143, "learning_rate": 1.538912974597273e-07, "loss": 0.1578, "step": 8342 }, { "epoch": 2.7861078644181, "grad_norm": 0.5169478563051959, "learning_rate": 1.5341323544894758e-07, "loss": 0.1531, "step": 8343 }, { "epoch": 2.7864418099849724, "grad_norm": 0.5270574826065255, "learning_rate": 1.5293590557216577e-07, "loss": 0.1554, "step": 8344 }, { "epoch": 2.786775755551845, "grad_norm": 0.5322542069948488, "learning_rate": 1.5245930790148743e-07, "loss": 0.1533, "step": 8345 }, { "epoch": 2.787109701118718, "grad_norm": 0.5002626708460715, "learning_rate": 1.5198344250890894e-07, "loss": 0.1479, "step": 8346 }, { "epoch": 2.78744364668559, "grad_norm": 0.510836318152733, "learning_rate": 1.515083094663139e-07, "loss": 0.1564, "step": 8347 }, { "epoch": 2.787777592252463, "grad_norm": 0.5024767543651935, "learning_rate": 1.5103390884547931e-07, "loss": 0.1496, "step": 8348 }, { "epoch": 2.7881115378193355, "grad_norm": 0.5256020147012425, "learning_rate": 1.5056024071806674e-07, "loss": 0.156, "step": 8349 }, { "epoch": 2.788445483386208, "grad_norm": 0.5513860801672124, "learning_rate": 1.5008730515563064e-07, "loss": 0.1702, "step": 8350 }, { "epoch": 2.7887794289530805, "grad_norm": 0.5583302740203661, "learning_rate": 1.4961510222961216e-07, "loss": 0.1689, "step": 8351 }, { "epoch": 2.7891133745199532, "grad_norm": 0.5442873203295383, "learning_rate": 1.4914363201134486e-07, "loss": 0.1543, "step": 8352 }, { "epoch": 2.789447320086826, "grad_norm": 0.5524028432461694, "learning_rate": 1.4867289457204726e-07, "loss": 0.1726, "step": 8353 }, { "epoch": 2.7897812656536987, "grad_norm": 0.530554499586277, "learning_rate": 1.4820288998283304e-07, "loss": 0.1517, "step": 8354 }, { "epoch": 2.790115211220571, "grad_norm": 0.556591191428861, "learning_rate": 1.477336183146999e-07, "loss": 0.1557, "step": 8355 }, { "epoch": 2.7904491567874437, "grad_norm": 0.544705956885302, "learning_rate": 1.4726507963853776e-07, "loss": 0.1627, "step": 8356 }, { "epoch": 2.7907831023543164, "grad_norm": 0.5220600913224868, "learning_rate": 1.4679727402512334e-07, "loss": 0.1527, "step": 8357 }, { "epoch": 2.7911170479211886, "grad_norm": 0.52810589422968, "learning_rate": 1.4633020154512677e-07, "loss": 0.1551, "step": 8358 }, { "epoch": 2.7914509934880614, "grad_norm": 0.4970005355575271, "learning_rate": 1.458638622691022e-07, "loss": 0.1479, "step": 8359 }, { "epoch": 2.791784939054934, "grad_norm": 0.5212873276869456, "learning_rate": 1.4539825626749715e-07, "loss": 0.1541, "step": 8360 }, { "epoch": 2.792118884621807, "grad_norm": 0.5042382836183906, "learning_rate": 1.4493338361064646e-07, "loss": 0.1455, "step": 8361 }, { "epoch": 2.7924528301886795, "grad_norm": 0.5309456541518355, "learning_rate": 1.4446924436877507e-07, "loss": 0.1609, "step": 8362 }, { "epoch": 2.792786775755552, "grad_norm": 0.5118129734037632, "learning_rate": 1.4400583861199636e-07, "loss": 0.1541, "step": 8363 }, { "epoch": 2.7931207213224245, "grad_norm": 0.4853226783481245, "learning_rate": 1.4354316641031263e-07, "loss": 0.1543, "step": 8364 }, { "epoch": 2.7934546668892972, "grad_norm": 0.541481071236972, "learning_rate": 1.4308122783361688e-07, "loss": 0.1523, "step": 8365 }, { "epoch": 2.7937886124561695, "grad_norm": 0.5278220146033721, "learning_rate": 1.4262002295168997e-07, "loss": 0.1547, "step": 8366 }, { "epoch": 2.794122558023042, "grad_norm": 0.5135966918537538, "learning_rate": 1.4215955183420282e-07, "loss": 0.1502, "step": 8367 }, { "epoch": 2.794456503589915, "grad_norm": 0.4920830612333697, "learning_rate": 1.4169981455071368e-07, "loss": 0.1571, "step": 8368 }, { "epoch": 2.794790449156787, "grad_norm": 0.5148476681687206, "learning_rate": 1.4124081117067313e-07, "loss": 0.1553, "step": 8369 }, { "epoch": 2.79512439472366, "grad_norm": 0.5234172849783939, "learning_rate": 1.4078254176341788e-07, "loss": 0.1583, "step": 8370 }, { "epoch": 2.7954583402905326, "grad_norm": 0.5072703583829362, "learning_rate": 1.4032500639817426e-07, "loss": 0.154, "step": 8371 }, { "epoch": 2.7957922858574054, "grad_norm": 0.5591966083853513, "learning_rate": 1.3986820514405973e-07, "loss": 0.1635, "step": 8372 }, { "epoch": 2.796126231424278, "grad_norm": 0.5831281500644744, "learning_rate": 1.394121380700797e-07, "loss": 0.1666, "step": 8373 }, { "epoch": 2.7964601769911503, "grad_norm": 0.5324043094043531, "learning_rate": 1.3895680524512734e-07, "loss": 0.1529, "step": 8374 }, { "epoch": 2.796794122558023, "grad_norm": 0.5407819919549826, "learning_rate": 1.3850220673798655e-07, "loss": 0.1523, "step": 8375 }, { "epoch": 2.797128068124896, "grad_norm": 0.571743758557889, "learning_rate": 1.3804834261732957e-07, "loss": 0.1604, "step": 8376 }, { "epoch": 2.797462013691768, "grad_norm": 0.5261550026856082, "learning_rate": 1.3759521295171773e-07, "loss": 0.1579, "step": 8377 }, { "epoch": 2.7977959592586408, "grad_norm": 0.5077553496486351, "learning_rate": 1.3714281780960237e-07, "loss": 0.1549, "step": 8378 }, { "epoch": 2.7981299048255135, "grad_norm": 0.5512498148977449, "learning_rate": 1.366911572593227e-07, "loss": 0.1462, "step": 8379 }, { "epoch": 2.7984638503923858, "grad_norm": 0.5278418783066358, "learning_rate": 1.3624023136910691e-07, "loss": 0.1527, "step": 8380 }, { "epoch": 2.7987977959592585, "grad_norm": 0.5720969297382702, "learning_rate": 1.3579004020707387e-07, "loss": 0.1616, "step": 8381 }, { "epoch": 2.799131741526131, "grad_norm": 0.5391404210914308, "learning_rate": 1.3534058384122862e-07, "loss": 0.1588, "step": 8382 }, { "epoch": 2.799465687093004, "grad_norm": 0.5391505004911138, "learning_rate": 1.3489186233946793e-07, "loss": 0.1586, "step": 8383 }, { "epoch": 2.7997996326598766, "grad_norm": 0.5405451852070394, "learning_rate": 1.3444387576957706e-07, "loss": 0.162, "step": 8384 }, { "epoch": 2.800133578226749, "grad_norm": 0.5523476563053142, "learning_rate": 1.33996624199228e-07, "loss": 0.1588, "step": 8385 }, { "epoch": 2.8004675237936216, "grad_norm": 0.520024555301227, "learning_rate": 1.335501076959844e-07, "loss": 0.1544, "step": 8386 }, { "epoch": 2.8008014693604943, "grad_norm": 0.5772233450893358, "learning_rate": 1.331043263272974e-07, "loss": 0.1648, "step": 8387 }, { "epoch": 2.8011354149273666, "grad_norm": 0.5240091069680041, "learning_rate": 1.3265928016050756e-07, "loss": 0.1468, "step": 8388 }, { "epoch": 2.8014693604942393, "grad_norm": 0.5134959826858604, "learning_rate": 1.3221496926284493e-07, "loss": 0.1523, "step": 8389 }, { "epoch": 2.801803306061112, "grad_norm": 0.53411701119722, "learning_rate": 1.3177139370142755e-07, "loss": 0.1533, "step": 8390 }, { "epoch": 2.8021372516279848, "grad_norm": 0.5451994225507716, "learning_rate": 1.3132855354326236e-07, "loss": 0.1565, "step": 8391 }, { "epoch": 2.8024711971948575, "grad_norm": 0.4988335998137933, "learning_rate": 1.3088644885524637e-07, "loss": 0.1479, "step": 8392 }, { "epoch": 2.8028051427617298, "grad_norm": 0.5470946888485676, "learning_rate": 1.3044507970416398e-07, "loss": 0.1557, "step": 8393 }, { "epoch": 2.8031390883286025, "grad_norm": 0.5434547017527317, "learning_rate": 1.3000444615668906e-07, "loss": 0.1557, "step": 8394 }, { "epoch": 2.803473033895475, "grad_norm": 0.5210177368074225, "learning_rate": 1.2956454827938557e-07, "loss": 0.1471, "step": 8395 }, { "epoch": 2.8038069794623475, "grad_norm": 0.506645320292903, "learning_rate": 1.291253861387043e-07, "loss": 0.1385, "step": 8396 }, { "epoch": 2.80414092502922, "grad_norm": 0.5153043838572172, "learning_rate": 1.28686959800986e-07, "loss": 0.1547, "step": 8397 }, { "epoch": 2.804474870596093, "grad_norm": 0.5433374471677337, "learning_rate": 1.2824926933246106e-07, "loss": 0.1577, "step": 8398 }, { "epoch": 2.804808816162965, "grad_norm": 0.5741759511423757, "learning_rate": 1.2781231479924606e-07, "loss": 0.1671, "step": 8399 }, { "epoch": 2.805142761729838, "grad_norm": 0.5348545400171368, "learning_rate": 1.2737609626734927e-07, "loss": 0.1571, "step": 8400 }, { "epoch": 2.8054767072967106, "grad_norm": 0.5347605161338985, "learning_rate": 1.269406138026663e-07, "loss": 0.1603, "step": 8401 }, { "epoch": 2.8058106528635833, "grad_norm": 0.5372346879839113, "learning_rate": 1.2650586747098238e-07, "loss": 0.1609, "step": 8402 }, { "epoch": 2.806144598430456, "grad_norm": 0.5184156394531372, "learning_rate": 1.2607185733797044e-07, "loss": 0.1531, "step": 8403 }, { "epoch": 2.8064785439973283, "grad_norm": 0.5193297902452196, "learning_rate": 1.2563858346919365e-07, "loss": 0.1495, "step": 8404 }, { "epoch": 2.806812489564201, "grad_norm": 0.4735671716020055, "learning_rate": 1.2520604593010189e-07, "loss": 0.139, "step": 8405 }, { "epoch": 2.8071464351310738, "grad_norm": 0.5452990496424134, "learning_rate": 1.247742447860356e-07, "loss": 0.1518, "step": 8406 }, { "epoch": 2.807480380697946, "grad_norm": 0.5197495861568605, "learning_rate": 1.2434318010222434e-07, "loss": 0.1531, "step": 8407 }, { "epoch": 2.8078143262648187, "grad_norm": 0.5682800571844647, "learning_rate": 1.2391285194378433e-07, "loss": 0.1558, "step": 8408 }, { "epoch": 2.8081482718316915, "grad_norm": 0.5476986226433352, "learning_rate": 1.2348326037572244e-07, "loss": 0.1642, "step": 8409 }, { "epoch": 2.808482217398564, "grad_norm": 0.5660502282746109, "learning_rate": 1.2305440546293236e-07, "loss": 0.1727, "step": 8410 }, { "epoch": 2.808816162965437, "grad_norm": 0.4987505875655572, "learning_rate": 1.2262628727019942e-07, "loss": 0.1414, "step": 8411 }, { "epoch": 2.809150108532309, "grad_norm": 0.5209194368951366, "learning_rate": 1.221989058621942e-07, "loss": 0.1512, "step": 8412 }, { "epoch": 2.809484054099182, "grad_norm": 0.5773338493631209, "learning_rate": 1.2177226130347886e-07, "loss": 0.1533, "step": 8413 }, { "epoch": 2.8098179996660546, "grad_norm": 0.49229443873769146, "learning_rate": 1.21346353658503e-07, "loss": 0.1488, "step": 8414 }, { "epoch": 2.810151945232927, "grad_norm": 0.5239314850265931, "learning_rate": 1.209211829916046e-07, "loss": 0.1632, "step": 8415 }, { "epoch": 2.8104858907997996, "grad_norm": 0.5733286509903617, "learning_rate": 1.204967493670106e-07, "loss": 0.1632, "step": 8416 }, { "epoch": 2.8108198363666723, "grad_norm": 0.5649617767440496, "learning_rate": 1.2007305284883696e-07, "loss": 0.1524, "step": 8417 }, { "epoch": 2.8111537819335446, "grad_norm": 0.5495402597096266, "learning_rate": 1.1965009350108747e-07, "loss": 0.1856, "step": 8418 }, { "epoch": 2.8114877275004173, "grad_norm": 0.4862318982701236, "learning_rate": 1.1922787138765656e-07, "loss": 0.1439, "step": 8419 }, { "epoch": 2.81182167306729, "grad_norm": 0.5452192079690045, "learning_rate": 1.188063865723238e-07, "loss": 0.1584, "step": 8420 }, { "epoch": 2.8121556186341627, "grad_norm": 0.5197784434274771, "learning_rate": 1.1838563911876155e-07, "loss": 0.1568, "step": 8421 }, { "epoch": 2.8124895642010355, "grad_norm": 0.5583439489780697, "learning_rate": 1.1796562909052734e-07, "loss": 0.1571, "step": 8422 }, { "epoch": 2.8128235097679077, "grad_norm": 0.5495414718376356, "learning_rate": 1.1754635655106928e-07, "loss": 0.1638, "step": 8423 }, { "epoch": 2.8131574553347805, "grad_norm": 0.5353757303775786, "learning_rate": 1.1712782156372226e-07, "loss": 0.1556, "step": 8424 }, { "epoch": 2.813491400901653, "grad_norm": 0.5316180867730131, "learning_rate": 1.167100241917124e-07, "loss": 0.1542, "step": 8425 }, { "epoch": 2.8138253464685254, "grad_norm": 0.5410694261321864, "learning_rate": 1.1629296449815197e-07, "loss": 0.1539, "step": 8426 }, { "epoch": 2.814159292035398, "grad_norm": 0.5255062069267001, "learning_rate": 1.1587664254604336e-07, "loss": 0.1558, "step": 8427 }, { "epoch": 2.814493237602271, "grad_norm": 0.5564039864164055, "learning_rate": 1.1546105839827626e-07, "loss": 0.1642, "step": 8428 }, { "epoch": 2.814827183169143, "grad_norm": 0.5451419332138157, "learning_rate": 1.150462121176299e-07, "loss": 0.1605, "step": 8429 }, { "epoch": 2.815161128736016, "grad_norm": 0.5608596813939348, "learning_rate": 1.1463210376677192e-07, "loss": 0.1678, "step": 8430 }, { "epoch": 2.8154950743028886, "grad_norm": 0.5426405423185976, "learning_rate": 1.1421873340825729e-07, "loss": 0.1549, "step": 8431 }, { "epoch": 2.8158290198697613, "grad_norm": 0.5354590434247339, "learning_rate": 1.1380610110453217e-07, "loss": 0.1545, "step": 8432 }, { "epoch": 2.816162965436634, "grad_norm": 0.5214195383604076, "learning_rate": 1.133942069179278e-07, "loss": 0.1524, "step": 8433 }, { "epoch": 2.8164969110035063, "grad_norm": 0.5846181741316001, "learning_rate": 1.1298305091066664e-07, "loss": 0.1571, "step": 8434 }, { "epoch": 2.816830856570379, "grad_norm": 0.5684128730306873, "learning_rate": 1.1257263314485844e-07, "loss": 0.1588, "step": 8435 }, { "epoch": 2.8171648021372517, "grad_norm": 0.5148921793629571, "learning_rate": 1.1216295368250196e-07, "loss": 0.1561, "step": 8436 }, { "epoch": 2.817498747704124, "grad_norm": 0.5536026332038109, "learning_rate": 1.1175401258548324e-07, "loss": 0.1539, "step": 8437 }, { "epoch": 2.8178326932709967, "grad_norm": 0.5272870234520881, "learning_rate": 1.1134580991557842e-07, "loss": 0.1623, "step": 8438 }, { "epoch": 2.8181666388378694, "grad_norm": 0.49663883935703573, "learning_rate": 1.1093834573445094e-07, "loss": 0.1494, "step": 8439 }, { "epoch": 2.818500584404742, "grad_norm": 0.5741127853098581, "learning_rate": 1.1053162010365326e-07, "loss": 0.1507, "step": 8440 }, { "epoch": 2.818834529971615, "grad_norm": 0.5546891679590489, "learning_rate": 1.1012563308462565e-07, "loss": 0.1574, "step": 8441 }, { "epoch": 2.819168475538487, "grad_norm": 0.5037622547843496, "learning_rate": 1.0972038473869795e-07, "loss": 0.151, "step": 8442 }, { "epoch": 2.81950242110536, "grad_norm": 0.5486643889881713, "learning_rate": 1.093158751270873e-07, "loss": 0.1506, "step": 8443 }, { "epoch": 2.8198363666722326, "grad_norm": 0.5590759169843443, "learning_rate": 1.0891210431089983e-07, "loss": 0.1634, "step": 8444 }, { "epoch": 2.820170312239105, "grad_norm": 0.5382380812540658, "learning_rate": 1.0850907235112895e-07, "loss": 0.1579, "step": 8445 }, { "epoch": 2.8205042578059776, "grad_norm": 0.5253716527065034, "learning_rate": 1.0810677930865876e-07, "loss": 0.1521, "step": 8446 }, { "epoch": 2.8208382033728503, "grad_norm": 0.48555707327772146, "learning_rate": 1.0770522524425898e-07, "loss": 0.1417, "step": 8447 }, { "epoch": 2.8211721489397226, "grad_norm": 0.5528063179335293, "learning_rate": 1.0730441021859106e-07, "loss": 0.1491, "step": 8448 }, { "epoch": 2.8215060945065953, "grad_norm": 0.5496044182417962, "learning_rate": 1.0690433429220049e-07, "loss": 0.1506, "step": 8449 }, { "epoch": 2.821840040073468, "grad_norm": 0.5346500000652207, "learning_rate": 1.0650499752552557e-07, "loss": 0.1539, "step": 8450 }, { "epoch": 2.8221739856403407, "grad_norm": 0.5479349408608167, "learning_rate": 1.0610639997888917e-07, "loss": 0.1557, "step": 8451 }, { "epoch": 2.8225079312072134, "grad_norm": 0.46744982444128813, "learning_rate": 1.0570854171250478e-07, "loss": 0.1374, "step": 8452 }, { "epoch": 2.8228418767740857, "grad_norm": 0.5441636040790427, "learning_rate": 1.0531142278647378e-07, "loss": 0.153, "step": 8453 }, { "epoch": 2.8231758223409584, "grad_norm": 0.5717208544928984, "learning_rate": 1.0491504326078483e-07, "loss": 0.1606, "step": 8454 }, { "epoch": 2.823509767907831, "grad_norm": 0.5434976551693014, "learning_rate": 1.0451940319531728e-07, "loss": 0.1495, "step": 8455 }, { "epoch": 2.8238437134747034, "grad_norm": 0.5563013385219229, "learning_rate": 1.0412450264983609e-07, "loss": 0.1582, "step": 8456 }, { "epoch": 2.824177659041576, "grad_norm": 0.5133006701112091, "learning_rate": 1.0373034168399521e-07, "loss": 0.1468, "step": 8457 }, { "epoch": 2.824511604608449, "grad_norm": 0.5368851278335857, "learning_rate": 1.0333692035733867e-07, "loss": 0.1577, "step": 8458 }, { "epoch": 2.8248455501753216, "grad_norm": 0.5658419408433192, "learning_rate": 1.0294423872929615e-07, "loss": 0.1458, "step": 8459 }, { "epoch": 2.8251794957421943, "grad_norm": 0.565524948708705, "learning_rate": 1.0255229685918744e-07, "loss": 0.1694, "step": 8460 }, { "epoch": 2.8255134413090666, "grad_norm": 0.5525057994914118, "learning_rate": 1.0216109480622017e-07, "loss": 0.1674, "step": 8461 }, { "epoch": 2.8258473868759393, "grad_norm": 0.7857522622361894, "learning_rate": 1.0177063262948927e-07, "loss": 0.1641, "step": 8462 }, { "epoch": 2.826181332442812, "grad_norm": 0.5695931540924961, "learning_rate": 1.0138091038797982e-07, "loss": 0.1592, "step": 8463 }, { "epoch": 2.8265152780096843, "grad_norm": 0.5376746632499131, "learning_rate": 1.0099192814056247e-07, "loss": 0.1663, "step": 8464 }, { "epoch": 2.826849223576557, "grad_norm": 0.4698801948441204, "learning_rate": 1.0060368594599856e-07, "loss": 0.136, "step": 8465 }, { "epoch": 2.8271831691434297, "grad_norm": 0.544981428758501, "learning_rate": 1.002161838629362e-07, "loss": 0.1525, "step": 8466 }, { "epoch": 2.827517114710302, "grad_norm": 0.5350947441046976, "learning_rate": 9.982942194991297e-08, "loss": 0.1503, "step": 8467 }, { "epoch": 2.8278510602771747, "grad_norm": 0.5659243272345094, "learning_rate": 9.94434002653527e-08, "loss": 0.1621, "step": 8468 }, { "epoch": 2.8281850058440474, "grad_norm": 0.4893884162320132, "learning_rate": 9.905811886756933e-08, "loss": 0.1444, "step": 8469 }, { "epoch": 2.82851895141092, "grad_norm": 0.5115181229873957, "learning_rate": 9.867357781476294e-08, "loss": 0.1536, "step": 8470 }, { "epoch": 2.828852896977793, "grad_norm": 0.5310645008445086, "learning_rate": 9.828977716502486e-08, "loss": 0.1527, "step": 8471 }, { "epoch": 2.829186842544665, "grad_norm": 0.5390370539150535, "learning_rate": 9.790671697633092e-08, "loss": 0.1607, "step": 8472 }, { "epoch": 2.829520788111538, "grad_norm": 0.5542118402025401, "learning_rate": 9.752439730654872e-08, "loss": 0.1641, "step": 8473 }, { "epoch": 2.8298547336784106, "grad_norm": 0.5423612496980644, "learning_rate": 9.714281821343041e-08, "loss": 0.1626, "step": 8474 }, { "epoch": 2.830188679245283, "grad_norm": 0.5342496957180441, "learning_rate": 9.676197975461876e-08, "loss": 0.1582, "step": 8475 }, { "epoch": 2.8305226248121556, "grad_norm": 0.49551719740241496, "learning_rate": 9.638188198764387e-08, "loss": 0.1514, "step": 8476 }, { "epoch": 2.8308565703790283, "grad_norm": 0.514845882985583, "learning_rate": 9.600252496992369e-08, "loss": 0.1455, "step": 8477 }, { "epoch": 2.8311905159459005, "grad_norm": 0.5246479501288792, "learning_rate": 9.562390875876515e-08, "loss": 0.1571, "step": 8478 }, { "epoch": 2.8315244615127733, "grad_norm": 0.5151252059582621, "learning_rate": 9.524603341136251e-08, "loss": 0.15, "step": 8479 }, { "epoch": 2.831858407079646, "grad_norm": 0.5659310966269068, "learning_rate": 9.486889898479734e-08, "loss": 0.156, "step": 8480 }, { "epoch": 2.8321923526465187, "grad_norm": 0.5146682132991759, "learning_rate": 9.449250553604184e-08, "loss": 0.1505, "step": 8481 }, { "epoch": 2.8325262982133914, "grad_norm": 0.4889795191225081, "learning_rate": 9.41168531219533e-08, "loss": 0.1508, "step": 8482 }, { "epoch": 2.8328602437802637, "grad_norm": 0.56495527958627, "learning_rate": 9.374194179927909e-08, "loss": 0.1577, "step": 8483 }, { "epoch": 2.8331941893471364, "grad_norm": 0.5605532903002794, "learning_rate": 9.336777162465449e-08, "loss": 0.1633, "step": 8484 }, { "epoch": 2.833528134914009, "grad_norm": 0.4893798584627006, "learning_rate": 9.299434265460095e-08, "loss": 0.1394, "step": 8485 }, { "epoch": 2.8338620804808814, "grad_norm": 0.5367020601261826, "learning_rate": 9.262165494553055e-08, "loss": 0.1521, "step": 8486 }, { "epoch": 2.834196026047754, "grad_norm": 0.5127849134301988, "learning_rate": 9.22497085537416e-08, "loss": 0.1504, "step": 8487 }, { "epoch": 2.834529971614627, "grad_norm": 0.5615166401386916, "learning_rate": 9.187850353542082e-08, "loss": 0.1581, "step": 8488 }, { "epoch": 2.8348639171814995, "grad_norm": 0.5169398545404523, "learning_rate": 9.150803994664337e-08, "loss": 0.1505, "step": 8489 }, { "epoch": 2.8351978627483723, "grad_norm": 0.5682736343819038, "learning_rate": 9.113831784337279e-08, "loss": 0.1531, "step": 8490 }, { "epoch": 2.8355318083152445, "grad_norm": 0.5340266016385549, "learning_rate": 9.076933728145832e-08, "loss": 0.151, "step": 8491 }, { "epoch": 2.8358657538821173, "grad_norm": 0.5585134485577232, "learning_rate": 9.040109831664035e-08, "loss": 0.1619, "step": 8492 }, { "epoch": 2.83619969944899, "grad_norm": 0.5725765003377334, "learning_rate": 9.003360100454495e-08, "loss": 0.1592, "step": 8493 }, { "epoch": 2.8365336450158622, "grad_norm": 0.534273889129343, "learning_rate": 8.966684540068659e-08, "loss": 0.1553, "step": 8494 }, { "epoch": 2.836867590582735, "grad_norm": 0.5171157368775522, "learning_rate": 8.930083156046931e-08, "loss": 0.1517, "step": 8495 }, { "epoch": 2.8372015361496077, "grad_norm": 0.5634182648212217, "learning_rate": 8.893555953918276e-08, "loss": 0.1567, "step": 8496 }, { "epoch": 2.83753548171648, "grad_norm": 0.5264929981389637, "learning_rate": 8.857102939200557e-08, "loss": 0.1459, "step": 8497 }, { "epoch": 2.8378694272833527, "grad_norm": 0.5735554438040473, "learning_rate": 8.820724117400536e-08, "loss": 0.1652, "step": 8498 }, { "epoch": 2.8382033728502254, "grad_norm": 0.5039614742487958, "learning_rate": 8.784419494013541e-08, "loss": 0.1444, "step": 8499 }, { "epoch": 2.838537318417098, "grad_norm": 0.5278294997838044, "learning_rate": 8.74818907452385e-08, "loss": 0.1614, "step": 8500 }, { "epoch": 2.838871263983971, "grad_norm": 0.510269423877444, "learning_rate": 8.712032864404529e-08, "loss": 0.1497, "step": 8501 }, { "epoch": 2.839205209550843, "grad_norm": 0.5285759359254864, "learning_rate": 8.675950869117323e-08, "loss": 0.1573, "step": 8502 }, { "epoch": 2.839539155117716, "grad_norm": 0.5220521008233078, "learning_rate": 8.639943094112868e-08, "loss": 0.1458, "step": 8503 }, { "epoch": 2.8398731006845885, "grad_norm": 0.5595841029538259, "learning_rate": 8.604009544830705e-08, "loss": 0.1601, "step": 8504 }, { "epoch": 2.840207046251461, "grad_norm": 0.5506637831429321, "learning_rate": 8.568150226698823e-08, "loss": 0.1599, "step": 8505 }, { "epoch": 2.8405409918183335, "grad_norm": 0.5610602819838691, "learning_rate": 8.532365145134226e-08, "loss": 0.1546, "step": 8506 }, { "epoch": 2.8408749373852062, "grad_norm": 0.5818644312959905, "learning_rate": 8.496654305542807e-08, "loss": 0.1728, "step": 8507 }, { "epoch": 2.841208882952079, "grad_norm": 0.5776511122322144, "learning_rate": 8.461017713318976e-08, "loss": 0.1654, "step": 8508 }, { "epoch": 2.8415428285189517, "grad_norm": 0.5189470178377121, "learning_rate": 8.425455373846147e-08, "loss": 0.1497, "step": 8509 }, { "epoch": 2.841876774085824, "grad_norm": 0.4788685040909344, "learning_rate": 8.38996729249636e-08, "loss": 0.1392, "step": 8510 }, { "epoch": 2.8422107196526967, "grad_norm": 0.5386980557967677, "learning_rate": 8.354553474630489e-08, "loss": 0.1554, "step": 8511 }, { "epoch": 2.8425446652195694, "grad_norm": 0.5621709868208091, "learning_rate": 8.319213925598258e-08, "loss": 0.1595, "step": 8512 }, { "epoch": 2.8428786107864417, "grad_norm": 0.5321301128110231, "learning_rate": 8.283948650738172e-08, "loss": 0.147, "step": 8513 }, { "epoch": 2.8432125563533144, "grad_norm": 0.521599077794686, "learning_rate": 8.248757655377415e-08, "loss": 0.1559, "step": 8514 }, { "epoch": 2.843546501920187, "grad_norm": 0.5471021279762034, "learning_rate": 8.213640944831957e-08, "loss": 0.1635, "step": 8515 }, { "epoch": 2.8438804474870594, "grad_norm": 0.5022501808867641, "learning_rate": 8.178598524406667e-08, "loss": 0.1464, "step": 8516 }, { "epoch": 2.844214393053932, "grad_norm": 0.5173838285389819, "learning_rate": 8.143630399395031e-08, "loss": 0.1457, "step": 8517 }, { "epoch": 2.844548338620805, "grad_norm": 0.5329604081610628, "learning_rate": 8.108736575079434e-08, "loss": 0.1615, "step": 8518 }, { "epoch": 2.8448822841876775, "grad_norm": 0.49406524837234916, "learning_rate": 8.073917056731106e-08, "loss": 0.1471, "step": 8519 }, { "epoch": 2.8452162297545502, "grad_norm": 0.503715390690022, "learning_rate": 8.039171849609728e-08, "loss": 0.1481, "step": 8520 }, { "epoch": 2.8455501753214225, "grad_norm": 0.5202325840935615, "learning_rate": 8.004500958964211e-08, "loss": 0.1472, "step": 8521 }, { "epoch": 2.8458841208882952, "grad_norm": 0.5296350657811596, "learning_rate": 7.969904390031812e-08, "loss": 0.1518, "step": 8522 }, { "epoch": 2.846218066455168, "grad_norm": 0.5206405247107793, "learning_rate": 7.935382148038794e-08, "loss": 0.1583, "step": 8523 }, { "epoch": 2.8465520120220402, "grad_norm": 0.5080001963788301, "learning_rate": 7.900934238200265e-08, "loss": 0.1458, "step": 8524 }, { "epoch": 2.846885957588913, "grad_norm": 0.5533271921936429, "learning_rate": 7.866560665719836e-08, "loss": 0.1606, "step": 8525 }, { "epoch": 2.8472199031557857, "grad_norm": 0.5516994173535836, "learning_rate": 7.832261435790078e-08, "loss": 0.1581, "step": 8526 }, { "epoch": 2.847553848722658, "grad_norm": 0.603291791816102, "learning_rate": 7.798036553592403e-08, "loss": 0.1708, "step": 8527 }, { "epoch": 2.8478877942895306, "grad_norm": 0.5226165249381783, "learning_rate": 7.763886024296729e-08, "loss": 0.1494, "step": 8528 }, { "epoch": 2.8482217398564034, "grad_norm": 0.4984085875281068, "learning_rate": 7.729809853061987e-08, "loss": 0.1477, "step": 8529 }, { "epoch": 2.848555685423276, "grad_norm": 0.5546006723134326, "learning_rate": 7.69580804503578e-08, "loss": 0.16, "step": 8530 }, { "epoch": 2.848889630990149, "grad_norm": 0.4951006251440752, "learning_rate": 7.661880605354444e-08, "loss": 0.1459, "step": 8531 }, { "epoch": 2.849223576557021, "grad_norm": 0.5475486717210286, "learning_rate": 7.628027539143156e-08, "loss": 0.155, "step": 8532 }, { "epoch": 2.849557522123894, "grad_norm": 0.4853112230938831, "learning_rate": 7.594248851515717e-08, "loss": 0.1446, "step": 8533 }, { "epoch": 2.8498914676907665, "grad_norm": 0.5277882995951618, "learning_rate": 7.560544547574988e-08, "loss": 0.157, "step": 8534 }, { "epoch": 2.850225413257639, "grad_norm": 0.5129719043043828, "learning_rate": 7.526914632412175e-08, "loss": 0.1493, "step": 8535 }, { "epoch": 2.8505593588245115, "grad_norm": 0.5254590538526176, "learning_rate": 7.493359111107712e-08, "loss": 0.1562, "step": 8536 }, { "epoch": 2.850893304391384, "grad_norm": 0.5457808147998661, "learning_rate": 7.459877988730325e-08, "loss": 0.1568, "step": 8537 }, { "epoch": 2.851227249958257, "grad_norm": 0.5225349529691248, "learning_rate": 7.42647127033791e-08, "loss": 0.1501, "step": 8538 }, { "epoch": 2.8515611955251297, "grad_norm": 0.5583061635314059, "learning_rate": 7.393138960976876e-08, "loss": 0.1536, "step": 8539 }, { "epoch": 2.851895141092002, "grad_norm": 0.5121547924256945, "learning_rate": 7.359881065682473e-08, "loss": 0.1438, "step": 8540 }, { "epoch": 2.8522290866588746, "grad_norm": 0.49864090386106596, "learning_rate": 7.32669758947857e-08, "loss": 0.144, "step": 8541 }, { "epoch": 2.8525630322257474, "grad_norm": 0.4893222689278706, "learning_rate": 7.29358853737816e-08, "loss": 0.146, "step": 8542 }, { "epoch": 2.8528969777926196, "grad_norm": 0.5348409656064933, "learning_rate": 7.260553914382573e-08, "loss": 0.149, "step": 8543 }, { "epoch": 2.8532309233594924, "grad_norm": 0.5435223373432201, "learning_rate": 7.227593725482207e-08, "loss": 0.1587, "step": 8544 }, { "epoch": 2.853564868926365, "grad_norm": 0.5217911226961017, "learning_rate": 7.194707975655912e-08, "loss": 0.1444, "step": 8545 }, { "epoch": 2.8538988144932373, "grad_norm": 0.6065711866534749, "learning_rate": 7.161896669871605e-08, "loss": 0.1735, "step": 8546 }, { "epoch": 2.85423276006011, "grad_norm": 0.5330691052638803, "learning_rate": 7.129159813085817e-08, "loss": 0.158, "step": 8547 }, { "epoch": 2.854566705626983, "grad_norm": 0.5350161629397456, "learning_rate": 7.096497410243819e-08, "loss": 0.1528, "step": 8548 }, { "epoch": 2.8549006511938555, "grad_norm": 0.5560545581651952, "learning_rate": 7.063909466279605e-08, "loss": 0.1656, "step": 8549 }, { "epoch": 2.855234596760728, "grad_norm": 0.585578949405154, "learning_rate": 7.031395986116019e-08, "loss": 0.1588, "step": 8550 }, { "epoch": 2.8555685423276005, "grad_norm": 0.5165515533972673, "learning_rate": 6.998956974664573e-08, "loss": 0.1473, "step": 8551 }, { "epoch": 2.855902487894473, "grad_norm": 0.5970286051761957, "learning_rate": 6.966592436825514e-08, "loss": 0.1683, "step": 8552 }, { "epoch": 2.856236433461346, "grad_norm": 0.5173373849701015, "learning_rate": 6.934302377488045e-08, "loss": 0.1488, "step": 8553 }, { "epoch": 2.856570379028218, "grad_norm": 0.5131753943094373, "learning_rate": 6.902086801529817e-08, "loss": 0.1519, "step": 8554 }, { "epoch": 2.856904324595091, "grad_norm": 0.5156084894747482, "learning_rate": 6.869945713817438e-08, "loss": 0.1485, "step": 8555 }, { "epoch": 2.8572382701619636, "grad_norm": 0.4849489841706419, "learning_rate": 6.837879119206192e-08, "loss": 0.1476, "step": 8556 }, { "epoch": 2.8575722157288364, "grad_norm": 0.6184373363815887, "learning_rate": 6.805887022540093e-08, "loss": 0.1686, "step": 8557 }, { "epoch": 2.857906161295709, "grad_norm": 0.5476203063313209, "learning_rate": 6.773969428651883e-08, "loss": 0.165, "step": 8558 }, { "epoch": 2.8582401068625813, "grad_norm": 0.5373994473998241, "learning_rate": 6.742126342363153e-08, "loss": 0.1498, "step": 8559 }, { "epoch": 2.858574052429454, "grad_norm": 0.5178688034759278, "learning_rate": 6.710357768484165e-08, "loss": 0.1571, "step": 8560 }, { "epoch": 2.8589079979963268, "grad_norm": 0.5039114208119598, "learning_rate": 6.67866371181397e-08, "loss": 0.1535, "step": 8561 }, { "epoch": 2.859241943563199, "grad_norm": 0.5332718448517993, "learning_rate": 6.647044177140293e-08, "loss": 0.1601, "step": 8562 }, { "epoch": 2.8595758891300718, "grad_norm": 0.5102172652244273, "learning_rate": 6.615499169239647e-08, "loss": 0.1399, "step": 8563 }, { "epoch": 2.8599098346969445, "grad_norm": 0.531015741793665, "learning_rate": 6.584028692877164e-08, "loss": 0.1588, "step": 8564 }, { "epoch": 2.8602437802638168, "grad_norm": 0.523805491269051, "learning_rate": 6.552632752807042e-08, "loss": 0.1589, "step": 8565 }, { "epoch": 2.8605777258306895, "grad_norm": 0.5326354340246765, "learning_rate": 6.52131135377182e-08, "loss": 0.1538, "step": 8566 }, { "epoch": 2.860911671397562, "grad_norm": 0.511949106766765, "learning_rate": 6.490064500503102e-08, "loss": 0.1453, "step": 8567 }, { "epoch": 2.861245616964435, "grad_norm": 0.5271829727987514, "learning_rate": 6.458892197721e-08, "loss": 0.1585, "step": 8568 }, { "epoch": 2.8615795625313076, "grad_norm": 0.5430974193562058, "learning_rate": 6.427794450134529e-08, "loss": 0.1573, "step": 8569 }, { "epoch": 2.86191350809818, "grad_norm": 0.5402436743016346, "learning_rate": 6.396771262441259e-08, "loss": 0.1587, "step": 8570 }, { "epoch": 2.8622474536650526, "grad_norm": 0.5228404957057184, "learning_rate": 6.365822639327724e-08, "loss": 0.1527, "step": 8571 }, { "epoch": 2.8625813992319253, "grad_norm": 0.5431852960693747, "learning_rate": 6.334948585469014e-08, "loss": 0.1516, "step": 8572 }, { "epoch": 2.8629153447987976, "grad_norm": 0.5535152802323873, "learning_rate": 6.304149105529067e-08, "loss": 0.1597, "step": 8573 }, { "epoch": 2.8632492903656703, "grad_norm": 0.503798876425636, "learning_rate": 6.273424204160438e-08, "loss": 0.1581, "step": 8574 }, { "epoch": 2.863583235932543, "grad_norm": 0.4954582075562573, "learning_rate": 6.242773886004583e-08, "loss": 0.148, "step": 8575 }, { "epoch": 2.8639171814994153, "grad_norm": 0.5227935230801231, "learning_rate": 6.212198155691518e-08, "loss": 0.1569, "step": 8576 }, { "epoch": 2.864251127066288, "grad_norm": 0.5072845345431894, "learning_rate": 6.181697017840049e-08, "loss": 0.1568, "step": 8577 }, { "epoch": 2.8645850726331608, "grad_norm": 0.5680395290888719, "learning_rate": 6.151270477057825e-08, "loss": 0.1665, "step": 8578 }, { "epoch": 2.8649190182000335, "grad_norm": 0.5723718892695402, "learning_rate": 6.120918537941001e-08, "loss": 0.1689, "step": 8579 }, { "epoch": 2.865252963766906, "grad_norm": 0.5543284280807889, "learning_rate": 6.090641205074743e-08, "loss": 0.164, "step": 8580 }, { "epoch": 2.8655869093337785, "grad_norm": 0.5035318018400274, "learning_rate": 6.060438483032671e-08, "loss": 0.142, "step": 8581 }, { "epoch": 2.865920854900651, "grad_norm": 0.5355651436216142, "learning_rate": 6.030310376377302e-08, "loss": 0.1612, "step": 8582 }, { "epoch": 2.866254800467524, "grad_norm": 0.48977033513131546, "learning_rate": 6.000256889659883e-08, "loss": 0.144, "step": 8583 }, { "epoch": 2.866588746034396, "grad_norm": 0.5436549484026989, "learning_rate": 5.97027802742034e-08, "loss": 0.168, "step": 8584 }, { "epoch": 2.866922691601269, "grad_norm": 0.572267212305895, "learning_rate": 5.940373794187326e-08, "loss": 0.1636, "step": 8585 }, { "epoch": 2.8672566371681416, "grad_norm": 0.5209482329083956, "learning_rate": 5.910544194478174e-08, "loss": 0.1509, "step": 8586 }, { "epoch": 2.8675905827350143, "grad_norm": 0.5484505549507601, "learning_rate": 5.880789232799e-08, "loss": 0.1565, "step": 8587 }, { "epoch": 2.867924528301887, "grad_norm": 0.5478787273088694, "learning_rate": 5.851108913644765e-08, "loss": 0.1641, "step": 8588 }, { "epoch": 2.8682584738687593, "grad_norm": 0.5522406436362908, "learning_rate": 5.821503241498882e-08, "loss": 0.1636, "step": 8589 }, { "epoch": 2.868592419435632, "grad_norm": 0.511907461284583, "learning_rate": 5.791972220833719e-08, "loss": 0.1552, "step": 8590 }, { "epoch": 2.8689263650025048, "grad_norm": 0.5152899425702656, "learning_rate": 5.762515856110262e-08, "loss": 0.1517, "step": 8591 }, { "epoch": 2.869260310569377, "grad_norm": 0.5258169726148847, "learning_rate": 5.7331341517782855e-08, "loss": 0.1528, "step": 8592 }, { "epoch": 2.8695942561362497, "grad_norm": 0.4713808899737005, "learning_rate": 5.703827112276128e-08, "loss": 0.1416, "step": 8593 }, { "epoch": 2.8699282017031225, "grad_norm": 0.5273185402336777, "learning_rate": 5.674594742031081e-08, "loss": 0.1601, "step": 8594 }, { "epoch": 2.8702621472699947, "grad_norm": 0.5220436578594836, "learning_rate": 5.6454370454589456e-08, "loss": 0.1562, "step": 8595 }, { "epoch": 2.8705960928368675, "grad_norm": 0.5221063380938278, "learning_rate": 5.6163540269644215e-08, "loss": 0.1518, "step": 8596 }, { "epoch": 2.87093003840374, "grad_norm": 0.4974464271881414, "learning_rate": 5.5873456909407706e-08, "loss": 0.1505, "step": 8597 }, { "epoch": 2.871263983970613, "grad_norm": 0.5046804081978551, "learning_rate": 5.5584120417701005e-08, "loss": 0.1511, "step": 8598 }, { "epoch": 2.8715979295374856, "grad_norm": 0.5053148608639023, "learning_rate": 5.529553083823136e-08, "loss": 0.1516, "step": 8599 }, { "epoch": 2.871931875104358, "grad_norm": 0.5777837178110677, "learning_rate": 5.50076882145939e-08, "loss": 0.1639, "step": 8600 }, { "epoch": 2.8722658206712306, "grad_norm": 0.5620172653957106, "learning_rate": 5.472059259027051e-08, "loss": 0.1632, "step": 8601 }, { "epoch": 2.8725997662381033, "grad_norm": 0.5347887245031796, "learning_rate": 5.44342440086304e-08, "loss": 0.1568, "step": 8602 }, { "epoch": 2.8729337118049756, "grad_norm": 0.5236727280193502, "learning_rate": 5.414864251293006e-08, "loss": 0.1484, "step": 8603 }, { "epoch": 2.8732676573718483, "grad_norm": 0.5322026712399329, "learning_rate": 5.386378814631277e-08, "loss": 0.1536, "step": 8604 }, { "epoch": 2.873601602938721, "grad_norm": 0.534740394002748, "learning_rate": 5.3579680951808545e-08, "loss": 0.1604, "step": 8605 }, { "epoch": 2.8739355485055937, "grad_norm": 0.5853877385693443, "learning_rate": 5.329632097233639e-08, "loss": 0.1597, "step": 8606 }, { "epoch": 2.8742694940724665, "grad_norm": 0.5011755113078209, "learning_rate": 5.3013708250700405e-08, "loss": 0.1491, "step": 8607 }, { "epoch": 2.8746034396393387, "grad_norm": 0.5460105318096763, "learning_rate": 5.2731842829591984e-08, "loss": 0.1603, "step": 8608 }, { "epoch": 2.8749373852062114, "grad_norm": 0.5535251387041829, "learning_rate": 5.2450724751592076e-08, "loss": 0.1646, "step": 8609 }, { "epoch": 2.875271330773084, "grad_norm": 0.5412106966495831, "learning_rate": 5.217035405916449e-08, "loss": 0.1524, "step": 8610 }, { "epoch": 2.8756052763399564, "grad_norm": 0.48812577783849903, "learning_rate": 5.1890730794664227e-08, "loss": 0.1438, "step": 8611 }, { "epoch": 2.875939221906829, "grad_norm": 0.5630739912686819, "learning_rate": 5.161185500033139e-08, "loss": 0.1621, "step": 8612 }, { "epoch": 2.876273167473702, "grad_norm": 0.5637930034961525, "learning_rate": 5.1333726718293396e-08, "loss": 0.1584, "step": 8613 }, { "epoch": 2.876607113040574, "grad_norm": 0.5413995620609211, "learning_rate": 5.105634599056386e-08, "loss": 0.1517, "step": 8614 }, { "epoch": 2.876941058607447, "grad_norm": 0.5284011452682914, "learning_rate": 5.077971285904593e-08, "loss": 0.1532, "step": 8615 }, { "epoch": 2.8772750041743196, "grad_norm": 0.5373211068578061, "learning_rate": 5.050382736552728e-08, "loss": 0.1567, "step": 8616 }, { "epoch": 2.8776089497411923, "grad_norm": 0.5164966070831943, "learning_rate": 5.022868955168403e-08, "loss": 0.1498, "step": 8617 }, { "epoch": 2.877942895308065, "grad_norm": 0.5016749655968777, "learning_rate": 4.995429945907848e-08, "loss": 0.148, "step": 8618 }, { "epoch": 2.8782768408749373, "grad_norm": 0.5350771758808319, "learning_rate": 4.968065712916137e-08, "loss": 0.1571, "step": 8619 }, { "epoch": 2.87861078644181, "grad_norm": 0.5315077919152822, "learning_rate": 4.940776260326907e-08, "loss": 0.149, "step": 8620 }, { "epoch": 2.8789447320086827, "grad_norm": 0.530178375587157, "learning_rate": 4.913561592262528e-08, "loss": 0.1555, "step": 8621 }, { "epoch": 2.879278677575555, "grad_norm": 0.5217846163655511, "learning_rate": 4.886421712834155e-08, "loss": 0.1571, "step": 8622 }, { "epoch": 2.8796126231424277, "grad_norm": 0.5115490799371355, "learning_rate": 4.859356626141509e-08, "loss": 0.1491, "step": 8623 }, { "epoch": 2.8799465687093004, "grad_norm": 0.5516763395859796, "learning_rate": 4.8323663362732084e-08, "loss": 0.166, "step": 8624 }, { "epoch": 2.8802805142761727, "grad_norm": 0.5599558340991891, "learning_rate": 4.8054508473063253e-08, "loss": 0.1685, "step": 8625 }, { "epoch": 2.8806144598430454, "grad_norm": 0.4982081440561017, "learning_rate": 4.778610163306885e-08, "loss": 0.1551, "step": 8626 }, { "epoch": 2.880948405409918, "grad_norm": 0.5606710703278645, "learning_rate": 4.751844288329366e-08, "loss": 0.1567, "step": 8627 }, { "epoch": 2.881282350976791, "grad_norm": 0.5087763751751658, "learning_rate": 4.72515322641709e-08, "loss": 0.1464, "step": 8628 }, { "epoch": 2.8816162965436636, "grad_norm": 0.552549417686956, "learning_rate": 4.6985369816021644e-08, "loss": 0.1565, "step": 8629 }, { "epoch": 2.881950242110536, "grad_norm": 0.49592917632561173, "learning_rate": 4.6719955579052064e-08, "loss": 0.1448, "step": 8630 }, { "epoch": 2.8822841876774086, "grad_norm": 0.5255353569803617, "learning_rate": 4.6455289593355656e-08, "loss": 0.1557, "step": 8631 }, { "epoch": 2.8826181332442813, "grad_norm": 0.5534574457295879, "learning_rate": 4.619137189891432e-08, "loss": 0.163, "step": 8632 }, { "epoch": 2.8829520788111536, "grad_norm": 0.5397072465178986, "learning_rate": 4.5928202535595044e-08, "loss": 0.157, "step": 8633 }, { "epoch": 2.8832860243780263, "grad_norm": 0.549349296130443, "learning_rate": 4.5665781543153266e-08, "loss": 0.1645, "step": 8634 }, { "epoch": 2.883619969944899, "grad_norm": 0.5420717576627341, "learning_rate": 4.54041089612306e-08, "loss": 0.1656, "step": 8635 }, { "epoch": 2.8839539155117717, "grad_norm": 0.5363732368933584, "learning_rate": 4.514318482935598e-08, "loss": 0.1617, "step": 8636 }, { "epoch": 2.8842878610786444, "grad_norm": 0.5350097682625145, "learning_rate": 4.488300918694455e-08, "loss": 0.1562, "step": 8637 }, { "epoch": 2.8846218066455167, "grad_norm": 0.5246389948891459, "learning_rate": 4.4623582073299864e-08, "loss": 0.158, "step": 8638 }, { "epoch": 2.8849557522123894, "grad_norm": 0.5436712211339741, "learning_rate": 4.4364903527610026e-08, "loss": 0.1545, "step": 8639 }, { "epoch": 2.885289697779262, "grad_norm": 0.5009484631759727, "learning_rate": 4.410697358895211e-08, "loss": 0.1464, "step": 8640 }, { "epoch": 2.8856236433461344, "grad_norm": 0.5290374848110081, "learning_rate": 4.384979229628994e-08, "loss": 0.1482, "step": 8641 }, { "epoch": 2.885957588913007, "grad_norm": 0.5504796169207347, "learning_rate": 4.359335968847356e-08, "loss": 0.1583, "step": 8642 }, { "epoch": 2.88629153447988, "grad_norm": 0.5589515588770203, "learning_rate": 4.333767580423976e-08, "loss": 0.1672, "step": 8643 }, { "epoch": 2.886625480046752, "grad_norm": 0.5394714452979373, "learning_rate": 4.3082740682213186e-08, "loss": 0.1581, "step": 8644 }, { "epoch": 2.886959425613625, "grad_norm": 0.5311277472998632, "learning_rate": 4.2828554360904165e-08, "loss": 0.16, "step": 8645 }, { "epoch": 2.8872933711804976, "grad_norm": 0.5214855770881811, "learning_rate": 4.25751168787103e-08, "loss": 0.1584, "step": 8646 }, { "epoch": 2.8876273167473703, "grad_norm": 0.5196864326489907, "learning_rate": 4.2322428273917635e-08, "loss": 0.1505, "step": 8647 }, { "epoch": 2.887961262314243, "grad_norm": 0.5395236365751814, "learning_rate": 4.2070488584696754e-08, "loss": 0.1583, "step": 8648 }, { "epoch": 2.8882952078811153, "grad_norm": 0.5281187130550312, "learning_rate": 4.18192978491061e-08, "loss": 0.157, "step": 8649 }, { "epoch": 2.888629153447988, "grad_norm": 0.5335230980635554, "learning_rate": 4.1568856105091424e-08, "loss": 0.1605, "step": 8650 }, { "epoch": 2.8889630990148607, "grad_norm": 0.5585441771953708, "learning_rate": 4.1319163390484693e-08, "loss": 0.1614, "step": 8651 }, { "epoch": 2.889297044581733, "grad_norm": 0.5469020513513515, "learning_rate": 4.107021974300407e-08, "loss": 0.1586, "step": 8652 }, { "epoch": 2.8896309901486057, "grad_norm": 0.507051026666806, "learning_rate": 4.082202520025724e-08, "loss": 0.151, "step": 8653 }, { "epoch": 2.8899649357154784, "grad_norm": 0.4997794435735754, "learning_rate": 4.0574579799735335e-08, "loss": 0.1458, "step": 8654 }, { "epoch": 2.890298881282351, "grad_norm": 0.530000754000752, "learning_rate": 4.0327883578819006e-08, "loss": 0.1558, "step": 8655 }, { "epoch": 2.890632826849224, "grad_norm": 0.5068361461273592, "learning_rate": 4.008193657477399e-08, "loss": 0.1485, "step": 8656 }, { "epoch": 2.890966772416096, "grad_norm": 0.5529734401920643, "learning_rate": 3.9836738824753364e-08, "loss": 0.1637, "step": 8657 }, { "epoch": 2.891300717982969, "grad_norm": 0.5096699557010229, "learning_rate": 3.959229036579748e-08, "loss": 0.1483, "step": 8658 }, { "epoch": 2.8916346635498416, "grad_norm": 0.4798650693812542, "learning_rate": 3.9348591234832926e-08, "loss": 0.1448, "step": 8659 }, { "epoch": 2.891968609116714, "grad_norm": 0.4990085154143496, "learning_rate": 3.9105641468673574e-08, "loss": 0.1561, "step": 8660 }, { "epoch": 2.8923025546835865, "grad_norm": 0.5224750928551145, "learning_rate": 3.886344110402007e-08, "loss": 0.1531, "step": 8661 }, { "epoch": 2.8926365002504593, "grad_norm": 0.5338496892667479, "learning_rate": 3.862199017745871e-08, "loss": 0.1585, "step": 8662 }, { "epoch": 2.8929704458173315, "grad_norm": 0.5134775255661097, "learning_rate": 3.838128872546421e-08, "loss": 0.1547, "step": 8663 }, { "epoch": 2.8933043913842043, "grad_norm": 0.5331918577725364, "learning_rate": 3.814133678439691e-08, "loss": 0.1587, "step": 8664 }, { "epoch": 2.893638336951077, "grad_norm": 0.517842146198162, "learning_rate": 3.790213439050561e-08, "loss": 0.1533, "step": 8665 }, { "epoch": 2.8939722825179497, "grad_norm": 0.5189498284952392, "learning_rate": 3.766368157992306e-08, "loss": 0.1523, "step": 8666 }, { "epoch": 2.8943062280848224, "grad_norm": 0.49860727440804087, "learning_rate": 3.7425978388671014e-08, "loss": 0.1468, "step": 8667 }, { "epoch": 2.8946401736516947, "grad_norm": 0.5297330753233082, "learning_rate": 3.718902485265741e-08, "loss": 0.1577, "step": 8668 }, { "epoch": 2.8949741192185674, "grad_norm": 0.5334255049213774, "learning_rate": 3.6952821007676943e-08, "loss": 0.1537, "step": 8669 }, { "epoch": 2.89530806478544, "grad_norm": 0.5289833683098253, "learning_rate": 3.671736688941108e-08, "loss": 0.152, "step": 8670 }, { "epoch": 2.8956420103523124, "grad_norm": 0.4915608352001022, "learning_rate": 3.6482662533426914e-08, "loss": 0.1436, "step": 8671 }, { "epoch": 2.895975955919185, "grad_norm": 0.5486892456552841, "learning_rate": 3.6248707975181096e-08, "loss": 0.1543, "step": 8672 }, { "epoch": 2.896309901486058, "grad_norm": 0.5503337548743263, "learning_rate": 3.601550325001313e-08, "loss": 0.1567, "step": 8673 }, { "epoch": 2.89664384705293, "grad_norm": 0.5034194587207542, "learning_rate": 3.578304839315316e-08, "loss": 0.1445, "step": 8674 }, { "epoch": 2.896977792619803, "grad_norm": 0.5146376386794662, "learning_rate": 3.5551343439715336e-08, "loss": 0.1441, "step": 8675 }, { "epoch": 2.8973117381866755, "grad_norm": 0.5207461435783136, "learning_rate": 3.5320388424701644e-08, "loss": 0.1455, "step": 8676 }, { "epoch": 2.8976456837535483, "grad_norm": 0.5822846711151547, "learning_rate": 3.50901833830003e-08, "loss": 0.1609, "step": 8677 }, { "epoch": 2.897979629320421, "grad_norm": 0.4881700169042898, "learning_rate": 3.4860728349386807e-08, "loss": 0.151, "step": 8678 }, { "epoch": 2.8983135748872932, "grad_norm": 0.5621883536149694, "learning_rate": 3.4632023358522894e-08, "loss": 0.1619, "step": 8679 }, { "epoch": 2.898647520454166, "grad_norm": 0.5110749883128612, "learning_rate": 3.440406844495758e-08, "loss": 0.1549, "step": 8680 }, { "epoch": 2.8989814660210387, "grad_norm": 0.4713601733022798, "learning_rate": 3.4176863643125e-08, "loss": 0.1366, "step": 8681 }, { "epoch": 2.899315411587911, "grad_norm": 0.5227456137198871, "learning_rate": 3.395040898734825e-08, "loss": 0.1552, "step": 8682 }, { "epoch": 2.8996493571547837, "grad_norm": 0.6136173309903541, "learning_rate": 3.372470451183496e-08, "loss": 0.1657, "step": 8683 }, { "epoch": 2.8999833027216564, "grad_norm": 0.5297325697192652, "learning_rate": 3.349975025068175e-08, "loss": 0.1527, "step": 8684 }, { "epoch": 2.900317248288529, "grad_norm": 0.6288686454375647, "learning_rate": 3.327554623786977e-08, "loss": 0.161, "step": 8685 }, { "epoch": 2.900651193855402, "grad_norm": 0.5269797630905554, "learning_rate": 3.305209250726804e-08, "loss": 0.1501, "step": 8686 }, { "epoch": 2.900985139422274, "grad_norm": 0.5554879926954489, "learning_rate": 3.282938909263122e-08, "loss": 0.1624, "step": 8687 }, { "epoch": 2.901319084989147, "grad_norm": 0.5208615726777216, "learning_rate": 3.2607436027601854e-08, "loss": 0.1534, "step": 8688 }, { "epoch": 2.9016530305560195, "grad_norm": 0.5420289935437654, "learning_rate": 3.238623334570812e-08, "loss": 0.1535, "step": 8689 }, { "epoch": 2.901986976122892, "grad_norm": 0.538732737361838, "learning_rate": 3.2165781080366054e-08, "loss": 0.1569, "step": 8690 }, { "epoch": 2.9023209216897645, "grad_norm": 0.5313485948505864, "learning_rate": 3.194607926487681e-08, "loss": 0.1586, "step": 8691 }, { "epoch": 2.9026548672566372, "grad_norm": 0.5356772127350496, "learning_rate": 3.1727127932429936e-08, "loss": 0.1568, "step": 8692 }, { "epoch": 2.9029888128235095, "grad_norm": 0.5169393673994584, "learning_rate": 3.150892711609899e-08, "loss": 0.1603, "step": 8693 }, { "epoch": 2.9033227583903822, "grad_norm": 0.4894913655189434, "learning_rate": 3.129147684884704e-08, "loss": 0.1385, "step": 8694 }, { "epoch": 2.903656703957255, "grad_norm": 0.53281078755593, "learning_rate": 3.107477716352225e-08, "loss": 0.1507, "step": 8695 }, { "epoch": 2.9039906495241277, "grad_norm": 0.5359916182265713, "learning_rate": 3.0858828092859564e-08, "loss": 0.1701, "step": 8696 }, { "epoch": 2.9043245950910004, "grad_norm": 0.5262814413620647, "learning_rate": 3.0643629669480644e-08, "loss": 0.1534, "step": 8697 }, { "epoch": 2.9046585406578727, "grad_norm": 0.5497355759559763, "learning_rate": 3.042918192589395e-08, "loss": 0.1596, "step": 8698 }, { "epoch": 2.9049924862247454, "grad_norm": 0.5412939441164044, "learning_rate": 3.021548489449355e-08, "loss": 0.1588, "step": 8699 }, { "epoch": 2.905326431791618, "grad_norm": 0.49003677617408303, "learning_rate": 3.000253860756197e-08, "loss": 0.1559, "step": 8700 }, { "epoch": 2.9056603773584904, "grad_norm": 0.5473785424325217, "learning_rate": 2.979034309726625e-08, "loss": 0.1618, "step": 8701 }, { "epoch": 2.905994322925363, "grad_norm": 0.5097771881556972, "learning_rate": 2.9578898395661858e-08, "loss": 0.1488, "step": 8702 }, { "epoch": 2.906328268492236, "grad_norm": 0.5149654857058888, "learning_rate": 2.9368204534689916e-08, "loss": 0.1468, "step": 8703 }, { "epoch": 2.9066622140591085, "grad_norm": 0.5259625454544891, "learning_rate": 2.915826154617718e-08, "loss": 0.1545, "step": 8704 }, { "epoch": 2.9069961596259812, "grad_norm": 0.48948710540364265, "learning_rate": 2.8949069461839952e-08, "loss": 0.1401, "step": 8705 }, { "epoch": 2.9073301051928535, "grad_norm": 0.5531487170068914, "learning_rate": 2.8740628313276842e-08, "loss": 0.1606, "step": 8706 }, { "epoch": 2.9076640507597262, "grad_norm": 0.5032223065794879, "learning_rate": 2.853293813197766e-08, "loss": 0.1505, "step": 8707 }, { "epoch": 2.907997996326599, "grad_norm": 0.5328715714346113, "learning_rate": 2.8325998949314536e-08, "loss": 0.1536, "step": 8708 }, { "epoch": 2.908331941893471, "grad_norm": 0.6225983310360557, "learning_rate": 2.811981079654913e-08, "loss": 0.1652, "step": 8709 }, { "epoch": 2.908665887460344, "grad_norm": 0.5526958652803692, "learning_rate": 2.7914373704827634e-08, "loss": 0.1601, "step": 8710 }, { "epoch": 2.9089998330272167, "grad_norm": 0.5632023342482809, "learning_rate": 2.7709687705185227e-08, "loss": 0.1673, "step": 8711 }, { "epoch": 2.909333778594089, "grad_norm": 0.5155594811296534, "learning_rate": 2.7505752828541065e-08, "loss": 0.1518, "step": 8712 }, { "epoch": 2.9096677241609616, "grad_norm": 0.5339483867535059, "learning_rate": 2.730256910570217e-08, "loss": 0.1641, "step": 8713 }, { "epoch": 2.9100016697278344, "grad_norm": 0.5461665859120574, "learning_rate": 2.7100136567361767e-08, "loss": 0.1576, "step": 8714 }, { "epoch": 2.910335615294707, "grad_norm": 0.5336837655705691, "learning_rate": 2.689845524409984e-08, "loss": 0.1493, "step": 8715 }, { "epoch": 2.91066956086158, "grad_norm": 0.4644299519054088, "learning_rate": 2.6697525166382575e-08, "loss": 0.1365, "step": 8716 }, { "epoch": 2.911003506428452, "grad_norm": 0.5243383754514328, "learning_rate": 2.649734636456236e-08, "loss": 0.1487, "step": 8717 }, { "epoch": 2.911337451995325, "grad_norm": 0.566255311577455, "learning_rate": 2.629791886888e-08, "loss": 0.1657, "step": 8718 }, { "epoch": 2.9116713975621975, "grad_norm": 0.5151497217026064, "learning_rate": 2.6099242709459737e-08, "loss": 0.1535, "step": 8719 }, { "epoch": 2.91200534312907, "grad_norm": 0.5286692237715818, "learning_rate": 2.5901317916314783e-08, "loss": 0.1574, "step": 8720 }, { "epoch": 2.9123392886959425, "grad_norm": 0.5286053495921429, "learning_rate": 2.5704144519344e-08, "loss": 0.1609, "step": 8721 }, { "epoch": 2.912673234262815, "grad_norm": 0.5265287526525314, "learning_rate": 2.5507722548332446e-08, "loss": 0.151, "step": 8722 }, { "epoch": 2.9130071798296875, "grad_norm": 0.5179557694097449, "learning_rate": 2.5312052032952505e-08, "loss": 0.1517, "step": 8723 }, { "epoch": 2.91334112539656, "grad_norm": 0.5090029191612081, "learning_rate": 2.5117133002762196e-08, "loss": 0.1455, "step": 8724 }, { "epoch": 2.913675070963433, "grad_norm": 0.5325582522074706, "learning_rate": 2.492296548720574e-08, "loss": 0.1581, "step": 8725 }, { "epoch": 2.9140090165303056, "grad_norm": 0.5281192943466827, "learning_rate": 2.4729549515615235e-08, "loss": 0.1561, "step": 8726 }, { "epoch": 2.9143429620971784, "grad_norm": 0.5038006994679268, "learning_rate": 2.453688511720842e-08, "loss": 0.1418, "step": 8727 }, { "epoch": 2.9146769076640506, "grad_norm": 0.5549235611646274, "learning_rate": 2.4344972321089234e-08, "loss": 0.1599, "step": 8728 }, { "epoch": 2.9150108532309233, "grad_norm": 0.5302992496522636, "learning_rate": 2.415381115624782e-08, "loss": 0.1574, "step": 8729 }, { "epoch": 2.915344798797796, "grad_norm": 0.5110893643807364, "learning_rate": 2.3963401651562747e-08, "loss": 0.1544, "step": 8730 }, { "epoch": 2.9156787443646683, "grad_norm": 0.48901569574272535, "learning_rate": 2.3773743835796558e-08, "loss": 0.148, "step": 8731 }, { "epoch": 2.916012689931541, "grad_norm": 0.5057738968119625, "learning_rate": 2.358483773759912e-08, "loss": 0.148, "step": 8732 }, { "epoch": 2.9163466354984138, "grad_norm": 0.5454906371257616, "learning_rate": 2.33966833855076e-08, "loss": 0.1649, "step": 8733 }, { "epoch": 2.9166805810652865, "grad_norm": 0.5538324525215251, "learning_rate": 2.320928080794482e-08, "loss": 0.1628, "step": 8734 }, { "epoch": 2.917014526632159, "grad_norm": 0.5046291060510665, "learning_rate": 2.3022630033219807e-08, "loss": 0.1389, "step": 8735 }, { "epoch": 2.9173484721990315, "grad_norm": 0.5542770135094989, "learning_rate": 2.2836731089528886e-08, "loss": 0.1655, "step": 8736 }, { "epoch": 2.917682417765904, "grad_norm": 0.5699978406392153, "learning_rate": 2.2651584004953485e-08, "loss": 0.1642, "step": 8737 }, { "epoch": 2.918016363332777, "grad_norm": 0.5293384260135789, "learning_rate": 2.2467188807462902e-08, "loss": 0.1564, "step": 8738 }, { "epoch": 2.918350308899649, "grad_norm": 0.551306712890341, "learning_rate": 2.2283545524912075e-08, "loss": 0.1614, "step": 8739 }, { "epoch": 2.918684254466522, "grad_norm": 0.5530682179532525, "learning_rate": 2.210065418504215e-08, "loss": 0.1504, "step": 8740 }, { "epoch": 2.9190182000333946, "grad_norm": 0.5133652618900586, "learning_rate": 2.1918514815481572e-08, "loss": 0.1513, "step": 8741 }, { "epoch": 2.919352145600267, "grad_norm": 0.48003189542915814, "learning_rate": 2.17371274437439e-08, "loss": 0.139, "step": 8742 }, { "epoch": 2.9196860911671396, "grad_norm": 0.5691839116317102, "learning_rate": 2.155649209723054e-08, "loss": 0.1588, "step": 8743 }, { "epoch": 2.9200200367340123, "grad_norm": 0.5663322722634725, "learning_rate": 2.137660880322856e-08, "loss": 0.1551, "step": 8744 }, { "epoch": 2.920353982300885, "grad_norm": 0.522117785518442, "learning_rate": 2.1197477588910666e-08, "loss": 0.1534, "step": 8745 }, { "epoch": 2.9206879278677578, "grad_norm": 0.5412269304337229, "learning_rate": 2.101909848133743e-08, "loss": 0.1605, "step": 8746 }, { "epoch": 2.92102187343463, "grad_norm": 0.5590537588726184, "learning_rate": 2.0841471507455635e-08, "loss": 0.1613, "step": 8747 }, { "epoch": 2.9213558190015028, "grad_norm": 0.5697624900762938, "learning_rate": 2.0664596694096596e-08, "loss": 0.1606, "step": 8748 }, { "epoch": 2.9216897645683755, "grad_norm": 0.5509931109216466, "learning_rate": 2.0488474067980045e-08, "loss": 0.1608, "step": 8749 }, { "epoch": 2.9220237101352478, "grad_norm": 0.5328571613716071, "learning_rate": 2.0313103655711373e-08, "loss": 0.1549, "step": 8750 }, { "epoch": 2.9223576557021205, "grad_norm": 0.513781721288196, "learning_rate": 2.0138485483782723e-08, "loss": 0.1522, "step": 8751 }, { "epoch": 2.922691601268993, "grad_norm": 0.5293720468326389, "learning_rate": 1.996461957857132e-08, "loss": 0.1501, "step": 8752 }, { "epoch": 2.923025546835866, "grad_norm": 0.5633355764295512, "learning_rate": 1.9791505966342273e-08, "loss": 0.1624, "step": 8753 }, { "epoch": 2.9233594924027386, "grad_norm": 0.5170586045028958, "learning_rate": 1.9619144673246325e-08, "loss": 0.1503, "step": 8754 }, { "epoch": 2.923693437969611, "grad_norm": 0.5323221698384542, "learning_rate": 1.9447535725320987e-08, "loss": 0.1494, "step": 8755 }, { "epoch": 2.9240273835364836, "grad_norm": 0.5356685252034754, "learning_rate": 1.9276679148488854e-08, "loss": 0.1478, "step": 8756 }, { "epoch": 2.9243613291033563, "grad_norm": 0.5517955598586819, "learning_rate": 1.9106574968560943e-08, "loss": 0.1537, "step": 8757 }, { "epoch": 2.9246952746702286, "grad_norm": 0.5070202914078584, "learning_rate": 1.8937223211232257e-08, "loss": 0.1442, "step": 8758 }, { "epoch": 2.9250292202371013, "grad_norm": 0.5321126462836772, "learning_rate": 1.876862390208678e-08, "loss": 0.1546, "step": 8759 }, { "epoch": 2.925363165803974, "grad_norm": 0.5484493215810594, "learning_rate": 1.8600777066593023e-08, "loss": 0.1554, "step": 8760 }, { "epoch": 2.9256971113708463, "grad_norm": 0.5291482520966377, "learning_rate": 1.8433682730105706e-08, "loss": 0.1558, "step": 8761 }, { "epoch": 2.926031056937719, "grad_norm": 0.5176849375689132, "learning_rate": 1.8267340917866306e-08, "loss": 0.1506, "step": 8762 }, { "epoch": 2.9263650025045918, "grad_norm": 0.5205766280424573, "learning_rate": 1.8101751655003053e-08, "loss": 0.1494, "step": 8763 }, { "epoch": 2.9266989480714645, "grad_norm": 0.5050130872096809, "learning_rate": 1.793691496653094e-08, "loss": 0.1546, "step": 8764 }, { "epoch": 2.927032893638337, "grad_norm": 0.5509444172234437, "learning_rate": 1.7772830877348933e-08, "loss": 0.159, "step": 8765 }, { "epoch": 2.9273668392052095, "grad_norm": 0.5237455644993183, "learning_rate": 1.760949941224499e-08, "loss": 0.1498, "step": 8766 }, { "epoch": 2.927700784772082, "grad_norm": 0.5474546675198331, "learning_rate": 1.7446920595892147e-08, "loss": 0.1599, "step": 8767 }, { "epoch": 2.928034730338955, "grad_norm": 0.5292033875636737, "learning_rate": 1.7285094452849095e-08, "loss": 0.1528, "step": 8768 }, { "epoch": 2.928368675905827, "grad_norm": 0.5326258288778125, "learning_rate": 1.7124021007562385e-08, "loss": 0.1541, "step": 8769 }, { "epoch": 2.9287026214727, "grad_norm": 0.505259236701369, "learning_rate": 1.696370028436367e-08, "loss": 0.1504, "step": 8770 }, { "epoch": 2.9290365670395726, "grad_norm": 0.5295968720784965, "learning_rate": 1.6804132307471354e-08, "loss": 0.1554, "step": 8771 }, { "epoch": 2.929370512606445, "grad_norm": 0.5855530439478099, "learning_rate": 1.6645317100990044e-08, "loss": 0.1655, "step": 8772 }, { "epoch": 2.9297044581733176, "grad_norm": 0.5341122406061138, "learning_rate": 1.6487254688910546e-08, "loss": 0.1496, "step": 8773 }, { "epoch": 2.9300384037401903, "grad_norm": 0.4966688117422215, "learning_rate": 1.6329945095110435e-08, "loss": 0.1437, "step": 8774 }, { "epoch": 2.930372349307063, "grad_norm": 0.5169723744277109, "learning_rate": 1.6173388343352915e-08, "loss": 0.154, "step": 8775 }, { "epoch": 2.9307062948739357, "grad_norm": 0.5287153064523404, "learning_rate": 1.601758445728796e-08, "loss": 0.146, "step": 8776 }, { "epoch": 2.931040240440808, "grad_norm": 0.5645974728665406, "learning_rate": 1.586253346045119e-08, "loss": 0.1636, "step": 8777 }, { "epoch": 2.9313741860076807, "grad_norm": 0.5405173803625365, "learning_rate": 1.570823537626498e-08, "loss": 0.1619, "step": 8778 }, { "epoch": 2.9317081315745535, "grad_norm": 0.47878908052664143, "learning_rate": 1.5554690228037905e-08, "loss": 0.1496, "step": 8779 }, { "epoch": 2.9320420771414257, "grad_norm": 0.5136540347637661, "learning_rate": 1.5401898038964748e-08, "loss": 0.1505, "step": 8780 }, { "epoch": 2.9323760227082984, "grad_norm": 0.5066164231201058, "learning_rate": 1.5249858832126486e-08, "loss": 0.1515, "step": 8781 }, { "epoch": 2.932709968275171, "grad_norm": 0.5494650012983797, "learning_rate": 1.5098572630491414e-08, "loss": 0.1557, "step": 8782 }, { "epoch": 2.933043913842044, "grad_norm": 0.5356394443781805, "learning_rate": 1.4948039456911256e-08, "loss": 0.1598, "step": 8783 }, { "epoch": 2.9333778594089166, "grad_norm": 0.579231974043971, "learning_rate": 1.4798259334127263e-08, "loss": 0.1622, "step": 8784 }, { "epoch": 2.933711804975789, "grad_norm": 0.549857407176255, "learning_rate": 1.4649232284765225e-08, "loss": 0.1566, "step": 8785 }, { "epoch": 2.9340457505426616, "grad_norm": 0.4940294824980054, "learning_rate": 1.4500958331337134e-08, "loss": 0.1479, "step": 8786 }, { "epoch": 2.9343796961095343, "grad_norm": 0.5158826721565871, "learning_rate": 1.435343749624174e-08, "loss": 0.1515, "step": 8787 }, { "epoch": 2.9347136416764066, "grad_norm": 0.5551253298998934, "learning_rate": 1.420666980176344e-08, "loss": 0.1695, "step": 8788 }, { "epoch": 2.9350475872432793, "grad_norm": 0.5605107676992664, "learning_rate": 1.4060655270073387e-08, "loss": 0.1479, "step": 8789 }, { "epoch": 2.935381532810152, "grad_norm": 0.535586571303752, "learning_rate": 1.3915393923228936e-08, "loss": 0.1575, "step": 8790 }, { "epoch": 2.9357154783770243, "grad_norm": 0.4790608505081332, "learning_rate": 1.3770885783173649e-08, "loss": 0.1492, "step": 8791 }, { "epoch": 2.936049423943897, "grad_norm": 0.5189679586079701, "learning_rate": 1.3627130871737282e-08, "loss": 0.1468, "step": 8792 }, { "epoch": 2.9363833695107697, "grad_norm": 0.5060738164366134, "learning_rate": 1.3484129210635243e-08, "loss": 0.1486, "step": 8793 }, { "epoch": 2.9367173150776424, "grad_norm": 0.5236931826293424, "learning_rate": 1.3341880821469699e-08, "loss": 0.1517, "step": 8794 }, { "epoch": 2.937051260644515, "grad_norm": 0.5122034983251454, "learning_rate": 1.3200385725729014e-08, "loss": 0.146, "step": 8795 }, { "epoch": 2.9373852062113874, "grad_norm": 0.5644397660384538, "learning_rate": 1.3059643944787759e-08, "loss": 0.1627, "step": 8796 }, { "epoch": 2.93771915177826, "grad_norm": 0.5436746008349438, "learning_rate": 1.2919655499906703e-08, "loss": 0.1595, "step": 8797 }, { "epoch": 2.938053097345133, "grad_norm": 0.5430037231902027, "learning_rate": 1.2780420412232263e-08, "loss": 0.1577, "step": 8798 }, { "epoch": 2.938387042912005, "grad_norm": 0.5761697034083223, "learning_rate": 1.2641938702798174e-08, "loss": 0.1583, "step": 8799 }, { "epoch": 2.938720988478878, "grad_norm": 0.47887126004228137, "learning_rate": 1.2504210392523808e-08, "loss": 0.1374, "step": 8800 }, { "epoch": 2.9390549340457506, "grad_norm": 0.5759489993737833, "learning_rate": 1.2367235502214192e-08, "loss": 0.1603, "step": 8801 }, { "epoch": 2.9393888796126233, "grad_norm": 0.5153572086231668, "learning_rate": 1.2231014052560553e-08, "loss": 0.1525, "step": 8802 }, { "epoch": 2.939722825179496, "grad_norm": 0.518999090589087, "learning_rate": 1.2095546064141982e-08, "loss": 0.16, "step": 8803 }, { "epoch": 2.9400567707463683, "grad_norm": 0.54913576945233, "learning_rate": 1.196083155742156e-08, "loss": 0.159, "step": 8804 }, { "epoch": 2.940390716313241, "grad_norm": 0.5129445655764181, "learning_rate": 1.1826870552749669e-08, "loss": 0.1531, "step": 8805 }, { "epoch": 2.9407246618801137, "grad_norm": 0.5302553484489443, "learning_rate": 1.169366307036346e-08, "loss": 0.1511, "step": 8806 }, { "epoch": 2.941058607446986, "grad_norm": 0.5156122133349168, "learning_rate": 1.1561209130384055e-08, "loss": 0.145, "step": 8807 }, { "epoch": 2.9413925530138587, "grad_norm": 0.5127356121469071, "learning_rate": 1.1429508752821561e-08, "loss": 0.1478, "step": 8808 }, { "epoch": 2.9417264985807314, "grad_norm": 0.522416127983697, "learning_rate": 1.1298561957570065e-08, "loss": 0.156, "step": 8809 }, { "epoch": 2.9420604441476037, "grad_norm": 0.5832906496004197, "learning_rate": 1.1168368764410408e-08, "loss": 0.1676, "step": 8810 }, { "epoch": 2.9423943897144764, "grad_norm": 0.5065280786514194, "learning_rate": 1.103892919301075e-08, "loss": 0.1494, "step": 8811 }, { "epoch": 2.942728335281349, "grad_norm": 0.4869535167280794, "learning_rate": 1.0910243262923781e-08, "loss": 0.146, "step": 8812 }, { "epoch": 2.943062280848222, "grad_norm": 0.5054299642033462, "learning_rate": 1.0782310993589506e-08, "loss": 0.1485, "step": 8813 }, { "epoch": 2.9433962264150946, "grad_norm": 0.48002626148076355, "learning_rate": 1.0655132404333024e-08, "loss": 0.1442, "step": 8814 }, { "epoch": 2.943730171981967, "grad_norm": 0.5758905705157378, "learning_rate": 1.0528707514366743e-08, "loss": 0.168, "step": 8815 }, { "epoch": 2.9440641175488396, "grad_norm": 0.5102727690819755, "learning_rate": 1.0403036342787609e-08, "loss": 0.1505, "step": 8816 }, { "epoch": 2.9443980631157123, "grad_norm": 0.48725259548600885, "learning_rate": 1.0278118908580992e-08, "loss": 0.1423, "step": 8817 }, { "epoch": 2.9447320086825846, "grad_norm": 0.5339536559315551, "learning_rate": 1.0153955230616241e-08, "loss": 0.1593, "step": 8818 }, { "epoch": 2.9450659542494573, "grad_norm": 0.5337907365193634, "learning_rate": 1.0030545327650576e-08, "loss": 0.1554, "step": 8819 }, { "epoch": 2.94539989981633, "grad_norm": 0.5371286184405106, "learning_rate": 9.907889218325751e-09, "loss": 0.162, "step": 8820 }, { "epoch": 2.9457338453832023, "grad_norm": 0.5106944556580268, "learning_rate": 9.78598692117083e-09, "loss": 0.1527, "step": 8821 }, { "epoch": 2.946067790950075, "grad_norm": 0.4891694550107267, "learning_rate": 9.664838454599978e-09, "loss": 0.1417, "step": 8822 }, { "epoch": 2.9464017365169477, "grad_norm": 0.5067859025666626, "learning_rate": 9.544443836914664e-09, "loss": 0.1561, "step": 8823 }, { "epoch": 2.9467356820838204, "grad_norm": 0.6033702392881961, "learning_rate": 9.42480308630256e-09, "loss": 0.1666, "step": 8824 }, { "epoch": 2.947069627650693, "grad_norm": 0.5274076271656751, "learning_rate": 9.30591622083532e-09, "loss": 0.1581, "step": 8825 }, { "epoch": 2.9474035732175654, "grad_norm": 0.6016450274515811, "learning_rate": 9.187783258473027e-09, "loss": 0.1543, "step": 8826 }, { "epoch": 2.947737518784438, "grad_norm": 0.629833846802068, "learning_rate": 9.070404217061402e-09, "loss": 0.1559, "step": 8827 }, { "epoch": 2.948071464351311, "grad_norm": 0.49603332101775693, "learning_rate": 8.953779114331262e-09, "loss": 0.1417, "step": 8828 }, { "epoch": 2.948405409918183, "grad_norm": 0.5690481056431702, "learning_rate": 8.837907967900183e-09, "loss": 0.1615, "step": 8829 }, { "epoch": 2.948739355485056, "grad_norm": 0.5286074793185255, "learning_rate": 8.722790795272495e-09, "loss": 0.1498, "step": 8830 }, { "epoch": 2.9490733010519286, "grad_norm": 0.5394688875104848, "learning_rate": 8.608427613837622e-09, "loss": 0.1599, "step": 8831 }, { "epoch": 2.9494072466188013, "grad_norm": 0.5232591082172079, "learning_rate": 8.494818440871189e-09, "loss": 0.1518, "step": 8832 }, { "epoch": 2.949741192185674, "grad_norm": 0.5323106596009022, "learning_rate": 8.381963293535577e-09, "loss": 0.1489, "step": 8833 }, { "epoch": 2.9500751377525463, "grad_norm": 0.5059971519222234, "learning_rate": 8.269862188879374e-09, "loss": 0.1531, "step": 8834 }, { "epoch": 2.950409083319419, "grad_norm": 0.5337561349373425, "learning_rate": 8.158515143835698e-09, "loss": 0.1498, "step": 8835 }, { "epoch": 2.9507430288862917, "grad_norm": 0.5203134409775217, "learning_rate": 8.047922175225542e-09, "loss": 0.1616, "step": 8836 }, { "epoch": 2.951076974453164, "grad_norm": 0.5669691571198704, "learning_rate": 7.938083299754984e-09, "loss": 0.1622, "step": 8837 }, { "epoch": 2.9514109200200367, "grad_norm": 0.5413633005491444, "learning_rate": 7.828998534016308e-09, "loss": 0.1566, "step": 8838 }, { "epoch": 2.9517448655869094, "grad_norm": 0.5485356620391724, "learning_rate": 7.720667894488554e-09, "loss": 0.1603, "step": 8839 }, { "epoch": 2.9520788111537817, "grad_norm": 0.5412801017017537, "learning_rate": 7.613091397535855e-09, "loss": 0.1529, "step": 8840 }, { "epoch": 2.9524127567206544, "grad_norm": 0.5082343717572196, "learning_rate": 7.506269059409654e-09, "loss": 0.1489, "step": 8841 }, { "epoch": 2.952746702287527, "grad_norm": 0.5188298301684736, "learning_rate": 7.400200896245935e-09, "loss": 0.1548, "step": 8842 }, { "epoch": 2.9530806478544, "grad_norm": 0.5401849776800605, "learning_rate": 7.29488692406799e-09, "loss": 0.1637, "step": 8843 }, { "epoch": 2.9534145934212725, "grad_norm": 0.6017819588969197, "learning_rate": 7.190327158784205e-09, "loss": 0.17, "step": 8844 }, { "epoch": 2.953748538988145, "grad_norm": 0.5050764086292866, "learning_rate": 7.0865216161902785e-09, "loss": 0.1495, "step": 8845 }, { "epoch": 2.9540824845550175, "grad_norm": 0.5397065334704347, "learning_rate": 6.983470311967e-09, "loss": 0.1542, "step": 8846 }, { "epoch": 2.9544164301218903, "grad_norm": 0.49879861658122404, "learning_rate": 6.881173261680807e-09, "loss": 0.1403, "step": 8847 }, { "epoch": 2.9547503756887625, "grad_norm": 0.5275264970743618, "learning_rate": 6.779630480786004e-09, "loss": 0.1495, "step": 8848 }, { "epoch": 2.9550843212556352, "grad_norm": 0.5011084154145502, "learning_rate": 6.678841984621432e-09, "loss": 0.1522, "step": 8849 }, { "epoch": 2.955418266822508, "grad_norm": 0.5681686799273573, "learning_rate": 6.578807788411579e-09, "loss": 0.1636, "step": 8850 }, { "epoch": 2.9557522123893807, "grad_norm": 0.531060734134599, "learning_rate": 6.479527907268801e-09, "loss": 0.1514, "step": 8851 }, { "epoch": 2.9560861579562534, "grad_norm": 0.5219698663221229, "learning_rate": 6.381002356189991e-09, "loss": 0.1504, "step": 8852 }, { "epoch": 2.9564201035231257, "grad_norm": 0.5363223857696832, "learning_rate": 6.283231150058799e-09, "loss": 0.156, "step": 8853 }, { "epoch": 2.9567540490899984, "grad_norm": 0.5360700439648506, "learning_rate": 6.186214303645077e-09, "loss": 0.1574, "step": 8854 }, { "epoch": 2.957087994656871, "grad_norm": 0.5512505946034206, "learning_rate": 6.0899518316032135e-09, "loss": 0.1598, "step": 8855 }, { "epoch": 2.9574219402237434, "grad_norm": 0.5071274370861918, "learning_rate": 5.99444374847602e-09, "loss": 0.1502, "step": 8856 }, { "epoch": 2.957755885790616, "grad_norm": 0.5092307827434925, "learning_rate": 5.899690068690289e-09, "loss": 0.1435, "step": 8857 }, { "epoch": 2.958089831357489, "grad_norm": 0.49280916083565207, "learning_rate": 5.805690806560127e-09, "loss": 0.1444, "step": 8858 }, { "epoch": 2.958423776924361, "grad_norm": 0.5065398927260356, "learning_rate": 5.712445976285286e-09, "loss": 0.1511, "step": 8859 }, { "epoch": 2.958757722491234, "grad_norm": 0.5279817308600407, "learning_rate": 5.619955591951165e-09, "loss": 0.1616, "step": 8860 }, { "epoch": 2.9590916680581065, "grad_norm": 0.5624691691259425, "learning_rate": 5.528219667529921e-09, "loss": 0.1622, "step": 8861 }, { "epoch": 2.9594256136249792, "grad_norm": 0.5183889213776066, "learning_rate": 5.437238216878804e-09, "loss": 0.1538, "step": 8862 }, { "epoch": 2.959759559191852, "grad_norm": 0.5406835519360788, "learning_rate": 5.347011253741819e-09, "loss": 0.1587, "step": 8863 }, { "epoch": 2.9600935047587242, "grad_norm": 0.49802577951748117, "learning_rate": 5.257538791749173e-09, "loss": 0.1481, "step": 8864 }, { "epoch": 2.960427450325597, "grad_norm": 0.5261683688675758, "learning_rate": 5.168820844416167e-09, "loss": 0.1565, "step": 8865 }, { "epoch": 2.9607613958924697, "grad_norm": 0.5114224521968415, "learning_rate": 5.080857425145413e-09, "loss": 0.1521, "step": 8866 }, { "epoch": 2.961095341459342, "grad_norm": 0.5208384086955586, "learning_rate": 4.993648547224062e-09, "loss": 0.1551, "step": 8867 }, { "epoch": 2.9614292870262147, "grad_norm": 0.5482296679225996, "learning_rate": 4.907194223826572e-09, "loss": 0.1543, "step": 8868 }, { "epoch": 2.9617632325930874, "grad_norm": 0.5106259843517691, "learning_rate": 4.8214944680125e-09, "loss": 0.1499, "step": 8869 }, { "epoch": 2.9620971781599597, "grad_norm": 0.5261301025915729, "learning_rate": 4.736549292728154e-09, "loss": 0.1586, "step": 8870 }, { "epoch": 2.9624311237268324, "grad_norm": 0.5858625891292126, "learning_rate": 4.652358710805494e-09, "loss": 0.1639, "step": 8871 }, { "epoch": 2.962765069293705, "grad_norm": 0.5259863655766891, "learning_rate": 4.5689227349626775e-09, "loss": 0.1551, "step": 8872 }, { "epoch": 2.963099014860578, "grad_norm": 0.5277440908987092, "learning_rate": 4.486241377802958e-09, "loss": 0.1527, "step": 8873 }, { "epoch": 2.9634329604274505, "grad_norm": 0.5230736899290115, "learning_rate": 4.404314651816344e-09, "loss": 0.1591, "step": 8874 }, { "epoch": 2.963766905994323, "grad_norm": 0.5575061010473056, "learning_rate": 4.323142569379602e-09, "loss": 0.1604, "step": 8875 }, { "epoch": 2.9641008515611955, "grad_norm": 0.5401053105604766, "learning_rate": 4.242725142754589e-09, "loss": 0.1464, "step": 8876 }, { "epoch": 2.9644347971280682, "grad_norm": 0.5475423256849493, "learning_rate": 4.163062384088812e-09, "loss": 0.1624, "step": 8877 }, { "epoch": 2.9647687426949405, "grad_norm": 0.5765524254562182, "learning_rate": 4.0841543054165324e-09, "loss": 0.1742, "step": 8878 }, { "epoch": 2.9651026882618132, "grad_norm": 0.5045226252131164, "learning_rate": 4.006000918658215e-09, "loss": 0.1519, "step": 8879 }, { "epoch": 2.965436633828686, "grad_norm": 0.5294171058153377, "learning_rate": 3.928602235618861e-09, "loss": 0.146, "step": 8880 }, { "epoch": 2.9657705793955587, "grad_norm": 0.5557587243982196, "learning_rate": 3.851958267990785e-09, "loss": 0.16, "step": 8881 }, { "epoch": 2.9661045249624314, "grad_norm": 0.516710441466049, "learning_rate": 3.776069027352503e-09, "loss": 0.1487, "step": 8882 }, { "epoch": 2.9664384705293037, "grad_norm": 0.5615655633903573, "learning_rate": 3.700934525167621e-09, "loss": 0.1602, "step": 8883 }, { "epoch": 2.9667724160961764, "grad_norm": 0.5047773838898094, "learning_rate": 3.626554772786506e-09, "loss": 0.1444, "step": 8884 }, { "epoch": 2.967106361663049, "grad_norm": 0.515974634919327, "learning_rate": 3.5529297814440587e-09, "loss": 0.1498, "step": 8885 }, { "epoch": 2.9674403072299214, "grad_norm": 0.536967546601452, "learning_rate": 3.4800595622630497e-09, "loss": 0.1507, "step": 8886 }, { "epoch": 2.967774252796794, "grad_norm": 0.5415943444305699, "learning_rate": 3.407944126251339e-09, "loss": 0.1511, "step": 8887 }, { "epoch": 2.968108198363667, "grad_norm": 0.5237385427929732, "learning_rate": 3.336583484301881e-09, "loss": 0.1463, "step": 8888 }, { "epoch": 2.968442143930539, "grad_norm": 0.5448904790540473, "learning_rate": 3.2659776471960505e-09, "loss": 0.1659, "step": 8889 }, { "epoch": 2.968776089497412, "grad_norm": 0.47703613603288947, "learning_rate": 3.19612662559865e-09, "loss": 0.1421, "step": 8890 }, { "epoch": 2.9691100350642845, "grad_norm": 0.5685768307596133, "learning_rate": 3.1270304300617947e-09, "loss": 0.1746, "step": 8891 }, { "epoch": 2.969443980631157, "grad_norm": 0.5260057299774317, "learning_rate": 3.0586890710232465e-09, "loss": 0.1579, "step": 8892 }, { "epoch": 2.96977792619803, "grad_norm": 0.48821693615311545, "learning_rate": 2.9911025588069685e-09, "loss": 0.1446, "step": 8893 }, { "epoch": 2.970111871764902, "grad_norm": 0.4848027115717547, "learning_rate": 2.9242709036225723e-09, "loss": 0.1406, "step": 8894 }, { "epoch": 2.970445817331775, "grad_norm": 0.5188283228711802, "learning_rate": 2.858194115565871e-09, "loss": 0.151, "step": 8895 }, { "epoch": 2.9707797628986476, "grad_norm": 0.5553633231710993, "learning_rate": 2.7928722046177692e-09, "loss": 0.1645, "step": 8896 }, { "epoch": 2.97111370846552, "grad_norm": 0.510796810430822, "learning_rate": 2.7283051806470394e-09, "loss": 0.1477, "step": 8897 }, { "epoch": 2.9714476540323926, "grad_norm": 0.565538056069778, "learning_rate": 2.664493053406436e-09, "loss": 0.1614, "step": 8898 }, { "epoch": 2.9717815995992654, "grad_norm": 0.5381306694870884, "learning_rate": 2.6014358325360256e-09, "loss": 0.1585, "step": 8899 }, { "epoch": 2.972115545166138, "grad_norm": 0.5257043175764273, "learning_rate": 2.5391335275609665e-09, "loss": 0.1528, "step": 8900 }, { "epoch": 2.972449490733011, "grad_norm": 0.5353702583181583, "learning_rate": 2.4775861478937293e-09, "loss": 0.1523, "step": 8901 }, { "epoch": 2.972783436299883, "grad_norm": 0.508839273196237, "learning_rate": 2.416793702830211e-09, "loss": 0.1445, "step": 8902 }, { "epoch": 2.973117381866756, "grad_norm": 0.5087754840730636, "learning_rate": 2.3567562015547328e-09, "loss": 0.1427, "step": 8903 }, { "epoch": 2.9734513274336285, "grad_norm": 0.524446864735963, "learning_rate": 2.297473653136706e-09, "loss": 0.1457, "step": 8904 }, { "epoch": 2.9737852730005008, "grad_norm": 0.5317021238776584, "learning_rate": 2.2389460665317443e-09, "loss": 0.1547, "step": 8905 }, { "epoch": 2.9741192185673735, "grad_norm": 0.5407645623936888, "learning_rate": 2.1811734505799985e-09, "loss": 0.1641, "step": 8906 }, { "epoch": 2.974453164134246, "grad_norm": 0.5124507419550098, "learning_rate": 2.1241558140100426e-09, "loss": 0.1574, "step": 8907 }, { "epoch": 2.9747871097011185, "grad_norm": 0.5441593837282322, "learning_rate": 2.0678931654344314e-09, "loss": 0.158, "step": 8908 }, { "epoch": 2.975121055267991, "grad_norm": 0.5132818149239089, "learning_rate": 2.012385513351922e-09, "loss": 0.15, "step": 8909 }, { "epoch": 2.975455000834864, "grad_norm": 0.5508960448044162, "learning_rate": 1.9576328661480293e-09, "loss": 0.1608, "step": 8910 }, { "epoch": 2.9757889464017366, "grad_norm": 0.5478676465623783, "learning_rate": 1.9036352320939146e-09, "loss": 0.1563, "step": 8911 }, { "epoch": 2.9761228919686094, "grad_norm": 0.5743289834297103, "learning_rate": 1.850392619345831e-09, "loss": 0.169, "step": 8912 }, { "epoch": 2.9764568375354816, "grad_norm": 0.540526385238044, "learning_rate": 1.7979050359479e-09, "loss": 0.1572, "step": 8913 }, { "epoch": 2.9767907831023543, "grad_norm": 0.5118515001947324, "learning_rate": 1.746172489828224e-09, "loss": 0.1496, "step": 8914 }, { "epoch": 2.977124728669227, "grad_norm": 0.5295962054993424, "learning_rate": 1.6951949888016627e-09, "loss": 0.1488, "step": 8915 }, { "epoch": 2.9774586742360993, "grad_norm": 0.5556596310017836, "learning_rate": 1.6449725405687234e-09, "loss": 0.1555, "step": 8916 }, { "epoch": 2.977792619802972, "grad_norm": 0.568570152916496, "learning_rate": 1.59550515271667e-09, "loss": 0.1674, "step": 8917 }, { "epoch": 2.9781265653698448, "grad_norm": 0.5121218556638442, "learning_rate": 1.5467928327178582e-09, "loss": 0.1512, "step": 8918 }, { "epoch": 2.978460510936717, "grad_norm": 0.545092074861627, "learning_rate": 1.498835587930847e-09, "loss": 0.1579, "step": 8919 }, { "epoch": 2.9787944565035898, "grad_norm": 0.5238858042064578, "learning_rate": 1.4516334256003962e-09, "loss": 0.1558, "step": 8920 }, { "epoch": 2.9791284020704625, "grad_norm": 0.5058952627939988, "learning_rate": 1.4051863528563581e-09, "loss": 0.1447, "step": 8921 }, { "epoch": 2.979462347637335, "grad_norm": 0.48109655383730054, "learning_rate": 1.3594943767158974e-09, "loss": 0.1414, "step": 8922 }, { "epoch": 2.979796293204208, "grad_norm": 0.5352658964999676, "learning_rate": 1.3145575040801605e-09, "loss": 0.1466, "step": 8923 }, { "epoch": 2.98013023877108, "grad_norm": 0.5336342161222689, "learning_rate": 1.2703757417387164e-09, "loss": 0.1514, "step": 8924 }, { "epoch": 2.980464184337953, "grad_norm": 0.526133531148846, "learning_rate": 1.2269490963651154e-09, "loss": 0.154, "step": 8925 }, { "epoch": 2.9807981299048256, "grad_norm": 0.5280976403546764, "learning_rate": 1.1842775745196655e-09, "loss": 0.1564, "step": 8926 }, { "epoch": 2.981132075471698, "grad_norm": 0.49864255780159394, "learning_rate": 1.1423611826477665e-09, "loss": 0.153, "step": 8927 }, { "epoch": 2.9814660210385706, "grad_norm": 0.4990574803432966, "learning_rate": 1.1011999270821305e-09, "loss": 0.1519, "step": 8928 }, { "epoch": 2.9817999666054433, "grad_norm": 0.5206535132804454, "learning_rate": 1.0607938140400064e-09, "loss": 0.1481, "step": 8929 }, { "epoch": 2.982133912172316, "grad_norm": 0.5203593281225992, "learning_rate": 1.0211428496259557e-09, "loss": 0.1585, "step": 8930 }, { "epoch": 2.9824678577391888, "grad_norm": 0.511652837070169, "learning_rate": 9.822470398296312e-10, "loss": 0.1445, "step": 8931 }, { "epoch": 2.982801803306061, "grad_norm": 0.5299622217401528, "learning_rate": 9.441063905257785e-10, "loss": 0.1523, "step": 8932 }, { "epoch": 2.9831357488729338, "grad_norm": 0.5267298343073171, "learning_rate": 9.067209074770101e-10, "loss": 0.1556, "step": 8933 }, { "epoch": 2.9834696944398065, "grad_norm": 0.5149660634700207, "learning_rate": 8.700905963304751e-10, "loss": 0.1552, "step": 8934 }, { "epoch": 2.9838036400066787, "grad_norm": 0.6143707298607329, "learning_rate": 8.342154626195254e-10, "loss": 0.1573, "step": 8935 }, { "epoch": 2.9841375855735515, "grad_norm": 0.5435314943549732, "learning_rate": 7.990955117631594e-10, "loss": 0.1528, "step": 8936 }, { "epoch": 2.984471531140424, "grad_norm": 0.5484366140014967, "learning_rate": 7.647307490676881e-10, "loss": 0.1598, "step": 8937 }, { "epoch": 2.9848054767072965, "grad_norm": 0.55852719855818, "learning_rate": 7.311211797234041e-10, "loss": 0.1551, "step": 8938 }, { "epoch": 2.985139422274169, "grad_norm": 0.557832342106755, "learning_rate": 6.982668088079126e-10, "loss": 0.1673, "step": 8939 }, { "epoch": 2.985473367841042, "grad_norm": 0.5086856936416371, "learning_rate": 6.661676412844653e-10, "loss": 0.1518, "step": 8940 }, { "epoch": 2.9858073134079146, "grad_norm": 0.5437075245689668, "learning_rate": 6.348236820008513e-10, "loss": 0.1728, "step": 8941 }, { "epoch": 2.9861412589747873, "grad_norm": 0.5162048454653639, "learning_rate": 6.042349356932819e-10, "loss": 0.1616, "step": 8942 }, { "epoch": 2.9864752045416596, "grad_norm": 0.539657063914861, "learning_rate": 5.744014069819503e-10, "loss": 0.155, "step": 8943 }, { "epoch": 2.9868091501085323, "grad_norm": 0.5276528948415139, "learning_rate": 5.453231003732518e-10, "loss": 0.1614, "step": 8944 }, { "epoch": 2.987143095675405, "grad_norm": 0.5228415015085567, "learning_rate": 5.170000202608938e-10, "loss": 0.1488, "step": 8945 }, { "epoch": 2.9874770412422773, "grad_norm": 0.5328035030755964, "learning_rate": 4.894321709220106e-10, "loss": 0.1496, "step": 8946 }, { "epoch": 2.98781098680915, "grad_norm": 0.5211736551909211, "learning_rate": 4.626195565221592e-10, "loss": 0.156, "step": 8947 }, { "epoch": 2.9881449323760227, "grad_norm": 0.5664070819139843, "learning_rate": 4.365621811108778e-10, "loss": 0.162, "step": 8948 }, { "epoch": 2.9884788779428955, "grad_norm": 0.5453801039366839, "learning_rate": 4.112600486250173e-10, "loss": 0.1543, "step": 8949 }, { "epoch": 2.988812823509768, "grad_norm": 0.5163009694570976, "learning_rate": 3.867131628865206e-10, "loss": 0.15, "step": 8950 }, { "epoch": 2.9891467690766405, "grad_norm": 0.5144527921379415, "learning_rate": 3.629215276035325e-10, "loss": 0.1479, "step": 8951 }, { "epoch": 2.989480714643513, "grad_norm": 0.4984509452806607, "learning_rate": 3.3988514637040003e-10, "loss": 0.1503, "step": 8952 }, { "epoch": 2.989814660210386, "grad_norm": 0.530959054033469, "learning_rate": 3.176040226660071e-10, "loss": 0.1607, "step": 8953 }, { "epoch": 2.990148605777258, "grad_norm": 0.5323475806559018, "learning_rate": 2.960781598576601e-10, "loss": 0.1558, "step": 8954 }, { "epoch": 2.990482551344131, "grad_norm": 0.525452993656753, "learning_rate": 2.7530756119609204e-10, "loss": 0.1534, "step": 8955 }, { "epoch": 2.9908164969110036, "grad_norm": 0.5141292907473098, "learning_rate": 2.5529222981879323e-10, "loss": 0.1516, "step": 8956 }, { "epoch": 2.991150442477876, "grad_norm": 0.5160401121615004, "learning_rate": 2.360321687500111e-10, "loss": 0.1578, "step": 8957 }, { "epoch": 2.9914843880447486, "grad_norm": 0.5226041005544119, "learning_rate": 2.175273808985301e-10, "loss": 0.1587, "step": 8958 }, { "epoch": 2.9918183336116213, "grad_norm": 0.5481442045964605, "learning_rate": 1.9977786906044683e-10, "loss": 0.1589, "step": 8959 }, { "epoch": 2.992152279178494, "grad_norm": 0.5130316124877334, "learning_rate": 1.827836359163948e-10, "loss": 0.1543, "step": 8960 }, { "epoch": 2.9924862247453667, "grad_norm": 0.49218211497168046, "learning_rate": 1.665446840343199e-10, "loss": 0.1487, "step": 8961 }, { "epoch": 2.992820170312239, "grad_norm": 0.5173019376331516, "learning_rate": 1.5106101586614963e-10, "loss": 0.1496, "step": 8962 }, { "epoch": 2.9931541158791117, "grad_norm": 0.5519389841712395, "learning_rate": 1.3633263375223414e-10, "loss": 0.1646, "step": 8963 }, { "epoch": 2.9934880614459844, "grad_norm": 0.49634411519978117, "learning_rate": 1.223595399163502e-10, "loss": 0.1522, "step": 8964 }, { "epoch": 2.9938220070128567, "grad_norm": 0.5190963445321033, "learning_rate": 1.091417364695868e-10, "loss": 0.1518, "step": 8965 }, { "epoch": 2.9941559525797294, "grad_norm": 0.5046318641135656, "learning_rate": 9.667922540868013e-11, "loss": 0.1506, "step": 8966 }, { "epoch": 2.994489898146602, "grad_norm": 0.5811234054996668, "learning_rate": 8.49720086165684e-11, "loss": 0.1624, "step": 8967 }, { "epoch": 2.9948238437134744, "grad_norm": 0.5187060796598542, "learning_rate": 7.40200878618369e-11, "loss": 0.1494, "step": 8968 }, { "epoch": 2.995157789280347, "grad_norm": 0.5210939621866848, "learning_rate": 6.382346479816282e-11, "loss": 0.1562, "step": 8969 }, { "epoch": 2.99549173484722, "grad_norm": 0.5147922038095906, "learning_rate": 5.438214096653571e-11, "loss": 0.1591, "step": 8970 }, { "epoch": 2.9958256804140926, "grad_norm": 0.5091227792055766, "learning_rate": 4.569611779248195e-11, "loss": 0.1474, "step": 8971 }, { "epoch": 2.9961596259809653, "grad_norm": 0.52462660718752, "learning_rate": 3.776539658939538e-11, "loss": 0.1587, "step": 8972 }, { "epoch": 2.9964935715478376, "grad_norm": 0.5151458507703925, "learning_rate": 3.0589978553541286e-11, "loss": 0.1616, "step": 8973 }, { "epoch": 2.9968275171147103, "grad_norm": 0.5064544199820976, "learning_rate": 2.416986477071781e-11, "loss": 0.1483, "step": 8974 }, { "epoch": 2.997161462681583, "grad_norm": 0.5835195691906906, "learning_rate": 1.850505620903942e-11, "loss": 0.1629, "step": 8975 }, { "epoch": 2.9974954082484553, "grad_norm": 0.5602503239016194, "learning_rate": 1.3595553725598287e-11, "loss": 0.1666, "step": 8976 }, { "epoch": 2.997829353815328, "grad_norm": 0.5032549204664997, "learning_rate": 9.441358061468286e-12, "loss": 0.156, "step": 8977 }, { "epoch": 2.9981632993822007, "grad_norm": 0.5091638685400584, "learning_rate": 6.042469843925425e-12, "loss": 0.1564, "step": 8978 }, { "epoch": 2.9984972449490734, "grad_norm": 0.5291218098615191, "learning_rate": 3.398889586447851e-12, "loss": 0.1521, "step": 8979 }, { "epoch": 2.998831190515946, "grad_norm": 0.5183009155912531, "learning_rate": 1.5106176892709656e-12, "loss": 0.1503, "step": 8980 }, { "epoch": 2.9991651360828184, "grad_norm": 0.5279125117346501, "learning_rate": 3.7765443661186283e-13, "loss": 0.1586, "step": 8981 }, { "epoch": 2.999499081649691, "grad_norm": 0.4923848154444246, "learning_rate": 0.0, "loss": 0.1483, "step": 8982 }, { "epoch": 2.999499081649691, "eval_loss": 0.21598096191883087, "eval_runtime": 183.8798, "eval_samples_per_second": 109.708, "eval_steps_per_second": 1.719, "step": 8982 }, { "epoch": 2.999499081649691, "step": 8982, "total_flos": 2.8091411711025807e+18, "train_loss": 0.2020581066641609, "train_runtime": 42907.438, "train_samples_per_second": 26.797, "train_steps_per_second": 0.209 } ], "logging_steps": 1, "max_steps": 8982, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8091411711025807e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }