{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3737, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002675943270002676, "grad_norm": 12.188919067382812, "learning_rate": 1.0695187165775401e-08, "loss": 1.3815, "step": 1 }, { "epoch": 0.0005351886540005352, "grad_norm": 9.284855842590332, "learning_rate": 2.1390374331550803e-08, "loss": 1.4795, "step": 2 }, { "epoch": 0.0008027829810008028, "grad_norm": 10.941431999206543, "learning_rate": 3.2085561497326206e-08, "loss": 1.3657, "step": 3 }, { "epoch": 0.0010703773080010704, "grad_norm": 13.973172187805176, "learning_rate": 4.2780748663101606e-08, "loss": 1.4645, "step": 4 }, { "epoch": 0.001337971635001338, "grad_norm": 10.671640396118164, "learning_rate": 5.3475935828877005e-08, "loss": 1.3105, "step": 5 }, { "epoch": 0.0016055659620016055, "grad_norm": 12.536456108093262, "learning_rate": 6.417112299465241e-08, "loss": 1.4371, "step": 6 }, { "epoch": 0.0018731602890018732, "grad_norm": 12.72536849975586, "learning_rate": 7.48663101604278e-08, "loss": 1.3752, "step": 7 }, { "epoch": 0.0021407546160021407, "grad_norm": 11.476215362548828, "learning_rate": 8.556149732620321e-08, "loss": 1.3656, "step": 8 }, { "epoch": 0.002408348943002408, "grad_norm": 11.222175598144531, "learning_rate": 9.625668449197862e-08, "loss": 1.2515, "step": 9 }, { "epoch": 0.002675943270002676, "grad_norm": 10.473676681518555, "learning_rate": 1.0695187165775401e-07, "loss": 1.3108, "step": 10 }, { "epoch": 0.0029435375970029436, "grad_norm": 13.289176940917969, "learning_rate": 1.1764705882352942e-07, "loss": 1.4861, "step": 11 }, { "epoch": 0.003211131924003211, "grad_norm": 12.152190208435059, "learning_rate": 1.2834224598930482e-07, "loss": 1.4196, "step": 12 }, { "epoch": 0.0034787262510034785, "grad_norm": 12.78124713897705, "learning_rate": 1.3903743315508023e-07, "loss": 1.3621, "step": 13 }, { "epoch": 0.0037463205780037465, "grad_norm": 13.670173645019531, "learning_rate": 1.497326203208556e-07, "loss": 1.3979, "step": 14 }, { "epoch": 0.004013914905004014, "grad_norm": 11.728761672973633, "learning_rate": 1.6042780748663104e-07, "loss": 1.4335, "step": 15 }, { "epoch": 0.004281509232004281, "grad_norm": 12.166805267333984, "learning_rate": 1.7112299465240642e-07, "loss": 1.2879, "step": 16 }, { "epoch": 0.004549103559004549, "grad_norm": 11.862377166748047, "learning_rate": 1.8181818181818183e-07, "loss": 1.5192, "step": 17 }, { "epoch": 0.004816697886004816, "grad_norm": 14.839558601379395, "learning_rate": 1.9251336898395724e-07, "loss": 1.4282, "step": 18 }, { "epoch": 0.005084292213005084, "grad_norm": 9.195609092712402, "learning_rate": 2.0320855614973264e-07, "loss": 1.3435, "step": 19 }, { "epoch": 0.005351886540005352, "grad_norm": 12.21860408782959, "learning_rate": 2.1390374331550802e-07, "loss": 1.438, "step": 20 }, { "epoch": 0.00561948086700562, "grad_norm": 14.291121482849121, "learning_rate": 2.2459893048128345e-07, "loss": 1.478, "step": 21 }, { "epoch": 0.005887075194005887, "grad_norm": 11.789957046508789, "learning_rate": 2.3529411764705883e-07, "loss": 1.3549, "step": 22 }, { "epoch": 0.006154669521006155, "grad_norm": 8.266191482543945, "learning_rate": 2.459893048128342e-07, "loss": 1.3308, "step": 23 }, { "epoch": 0.006422263848006422, "grad_norm": 14.99113941192627, "learning_rate": 2.5668449197860965e-07, "loss": 1.4145, "step": 24 }, { "epoch": 0.00668985817500669, "grad_norm": 12.698420524597168, "learning_rate": 2.6737967914438503e-07, "loss": 1.3392, "step": 25 }, { "epoch": 0.006957452502006957, "grad_norm": 9.676013946533203, "learning_rate": 2.7807486631016046e-07, "loss": 1.2585, "step": 26 }, { "epoch": 0.0072250468290072254, "grad_norm": 10.009092330932617, "learning_rate": 2.8877005347593584e-07, "loss": 1.4387, "step": 27 }, { "epoch": 0.007492641156007493, "grad_norm": 10.498503684997559, "learning_rate": 2.994652406417112e-07, "loss": 1.2998, "step": 28 }, { "epoch": 0.00776023548300776, "grad_norm": 8.508102416992188, "learning_rate": 3.1016042780748665e-07, "loss": 1.4256, "step": 29 }, { "epoch": 0.008027829810008028, "grad_norm": 13.676020622253418, "learning_rate": 3.208556149732621e-07, "loss": 1.4097, "step": 30 }, { "epoch": 0.008295424137008296, "grad_norm": 8.145886421203613, "learning_rate": 3.3155080213903747e-07, "loss": 1.4282, "step": 31 }, { "epoch": 0.008563018464008563, "grad_norm": 9.673611640930176, "learning_rate": 3.4224598930481285e-07, "loss": 1.4322, "step": 32 }, { "epoch": 0.008830612791008831, "grad_norm": 8.00688362121582, "learning_rate": 3.529411764705883e-07, "loss": 1.2729, "step": 33 }, { "epoch": 0.009098207118009098, "grad_norm": 8.261327743530273, "learning_rate": 3.6363636363636366e-07, "loss": 1.3191, "step": 34 }, { "epoch": 0.009365801445009366, "grad_norm": 7.580234527587891, "learning_rate": 3.7433155080213904e-07, "loss": 1.3552, "step": 35 }, { "epoch": 0.009633395772009633, "grad_norm": 6.747824668884277, "learning_rate": 3.8502673796791447e-07, "loss": 1.3651, "step": 36 }, { "epoch": 0.009900990099009901, "grad_norm": 7.468786239624023, "learning_rate": 3.957219251336899e-07, "loss": 1.4516, "step": 37 }, { "epoch": 0.010168584426010168, "grad_norm": 7.431623935699463, "learning_rate": 4.064171122994653e-07, "loss": 1.3, "step": 38 }, { "epoch": 0.010436178753010436, "grad_norm": 7.183818817138672, "learning_rate": 4.1711229946524066e-07, "loss": 1.279, "step": 39 }, { "epoch": 0.010703773080010704, "grad_norm": 7.624693393707275, "learning_rate": 4.2780748663101604e-07, "loss": 1.2432, "step": 40 }, { "epoch": 0.010971367407010971, "grad_norm": 6.89495325088501, "learning_rate": 4.3850267379679153e-07, "loss": 1.3082, "step": 41 }, { "epoch": 0.01123896173401124, "grad_norm": 6.955787181854248, "learning_rate": 4.491978609625669e-07, "loss": 1.3646, "step": 42 }, { "epoch": 0.011506556061011506, "grad_norm": 6.60789155960083, "learning_rate": 4.598930481283423e-07, "loss": 1.2452, "step": 43 }, { "epoch": 0.011774150388011774, "grad_norm": 5.894134998321533, "learning_rate": 4.7058823529411767e-07, "loss": 1.2042, "step": 44 }, { "epoch": 0.012041744715012041, "grad_norm": 6.411407470703125, "learning_rate": 4.812834224598931e-07, "loss": 1.3002, "step": 45 }, { "epoch": 0.01230933904201231, "grad_norm": 6.972503185272217, "learning_rate": 4.919786096256684e-07, "loss": 1.3314, "step": 46 }, { "epoch": 0.012576933369012578, "grad_norm": 6.686539649963379, "learning_rate": 5.02673796791444e-07, "loss": 1.3888, "step": 47 }, { "epoch": 0.012844527696012844, "grad_norm": 5.989624500274658, "learning_rate": 5.133689839572193e-07, "loss": 1.2706, "step": 48 }, { "epoch": 0.013112122023013113, "grad_norm": 5.950018405914307, "learning_rate": 5.240641711229947e-07, "loss": 1.2127, "step": 49 }, { "epoch": 0.01337971635001338, "grad_norm": 6.040707588195801, "learning_rate": 5.347593582887701e-07, "loss": 1.3227, "step": 50 }, { "epoch": 0.013647310677013648, "grad_norm": 5.798983573913574, "learning_rate": 5.454545454545455e-07, "loss": 1.274, "step": 51 }, { "epoch": 0.013914905004013914, "grad_norm": 6.321473598480225, "learning_rate": 5.561497326203209e-07, "loss": 1.3229, "step": 52 }, { "epoch": 0.014182499331014183, "grad_norm": 6.0828447341918945, "learning_rate": 5.668449197860964e-07, "loss": 1.2053, "step": 53 }, { "epoch": 0.014450093658014451, "grad_norm": 6.756501197814941, "learning_rate": 5.775401069518717e-07, "loss": 1.2211, "step": 54 }, { "epoch": 0.014717687985014717, "grad_norm": 6.133362770080566, "learning_rate": 5.882352941176471e-07, "loss": 1.0539, "step": 55 }, { "epoch": 0.014985282312014986, "grad_norm": 6.778918743133545, "learning_rate": 5.989304812834224e-07, "loss": 1.2877, "step": 56 }, { "epoch": 0.015252876639015252, "grad_norm": 6.3190836906433105, "learning_rate": 6.096256684491979e-07, "loss": 1.2435, "step": 57 }, { "epoch": 0.01552047096601552, "grad_norm": 6.347466468811035, "learning_rate": 6.203208556149733e-07, "loss": 1.2126, "step": 58 }, { "epoch": 0.01578806529301579, "grad_norm": 6.51020622253418, "learning_rate": 6.310160427807486e-07, "loss": 1.2392, "step": 59 }, { "epoch": 0.016055659620016056, "grad_norm": 5.295354843139648, "learning_rate": 6.417112299465242e-07, "loss": 1.2241, "step": 60 }, { "epoch": 0.016323253947016322, "grad_norm": 5.243033409118652, "learning_rate": 6.524064171122996e-07, "loss": 1.1209, "step": 61 }, { "epoch": 0.016590848274016592, "grad_norm": 6.233066558837891, "learning_rate": 6.631016042780749e-07, "loss": 1.3712, "step": 62 }, { "epoch": 0.01685844260101686, "grad_norm": 6.268922328948975, "learning_rate": 6.737967914438504e-07, "loss": 1.2861, "step": 63 }, { "epoch": 0.017126036928017126, "grad_norm": 6.287389755249023, "learning_rate": 6.844919786096257e-07, "loss": 1.3305, "step": 64 }, { "epoch": 0.017393631255017392, "grad_norm": 5.882622718811035, "learning_rate": 6.951871657754011e-07, "loss": 1.2595, "step": 65 }, { "epoch": 0.017661225582017662, "grad_norm": 6.735020637512207, "learning_rate": 7.058823529411766e-07, "loss": 1.2909, "step": 66 }, { "epoch": 0.01792881990901793, "grad_norm": 6.339001178741455, "learning_rate": 7.165775401069519e-07, "loss": 1.4648, "step": 67 }, { "epoch": 0.018196414236018196, "grad_norm": 5.405106544494629, "learning_rate": 7.272727272727273e-07, "loss": 1.1235, "step": 68 }, { "epoch": 0.018464008563018466, "grad_norm": 5.867051124572754, "learning_rate": 7.379679144385026e-07, "loss": 1.3021, "step": 69 }, { "epoch": 0.018731602890018732, "grad_norm": 6.250916481018066, "learning_rate": 7.486631016042781e-07, "loss": 1.2687, "step": 70 }, { "epoch": 0.018999197217019, "grad_norm": 5.591010093688965, "learning_rate": 7.593582887700536e-07, "loss": 1.1995, "step": 71 }, { "epoch": 0.019266791544019266, "grad_norm": 5.43657112121582, "learning_rate": 7.700534759358289e-07, "loss": 1.2271, "step": 72 }, { "epoch": 0.019534385871019536, "grad_norm": 5.751687526702881, "learning_rate": 7.807486631016044e-07, "loss": 1.3095, "step": 73 }, { "epoch": 0.019801980198019802, "grad_norm": 5.247724533081055, "learning_rate": 7.914438502673798e-07, "loss": 1.257, "step": 74 }, { "epoch": 0.02006957452502007, "grad_norm": 5.353349208831787, "learning_rate": 8.021390374331551e-07, "loss": 1.1566, "step": 75 }, { "epoch": 0.020337168852020335, "grad_norm": 5.401248455047607, "learning_rate": 8.128342245989306e-07, "loss": 1.1666, "step": 76 }, { "epoch": 0.020604763179020606, "grad_norm": 5.6218953132629395, "learning_rate": 8.235294117647059e-07, "loss": 1.2186, "step": 77 }, { "epoch": 0.020872357506020872, "grad_norm": 4.6730875968933105, "learning_rate": 8.342245989304813e-07, "loss": 1.0226, "step": 78 }, { "epoch": 0.02113995183302114, "grad_norm": 5.531125545501709, "learning_rate": 8.449197860962568e-07, "loss": 1.1753, "step": 79 }, { "epoch": 0.02140754616002141, "grad_norm": 5.861851215362549, "learning_rate": 8.556149732620321e-07, "loss": 1.21, "step": 80 }, { "epoch": 0.021675140487021675, "grad_norm": 5.872004508972168, "learning_rate": 8.663101604278075e-07, "loss": 1.1952, "step": 81 }, { "epoch": 0.021942734814021942, "grad_norm": 5.292346000671387, "learning_rate": 8.770053475935831e-07, "loss": 1.1094, "step": 82 }, { "epoch": 0.02221032914102221, "grad_norm": 5.308231353759766, "learning_rate": 8.877005347593584e-07, "loss": 1.1175, "step": 83 }, { "epoch": 0.02247792346802248, "grad_norm": 5.5853071212768555, "learning_rate": 8.983957219251338e-07, "loss": 1.2289, "step": 84 }, { "epoch": 0.022745517795022745, "grad_norm": 5.129279613494873, "learning_rate": 9.090909090909091e-07, "loss": 1.1456, "step": 85 }, { "epoch": 0.023013112122023012, "grad_norm": 5.8378753662109375, "learning_rate": 9.197860962566846e-07, "loss": 1.3367, "step": 86 }, { "epoch": 0.023280706449023282, "grad_norm": 5.8775715827941895, "learning_rate": 9.3048128342246e-07, "loss": 1.2341, "step": 87 }, { "epoch": 0.02354830077602355, "grad_norm": 5.519059658050537, "learning_rate": 9.411764705882353e-07, "loss": 1.2504, "step": 88 }, { "epoch": 0.023815895103023815, "grad_norm": 5.483979225158691, "learning_rate": 9.518716577540108e-07, "loss": 1.1266, "step": 89 }, { "epoch": 0.024083489430024082, "grad_norm": 6.002791881561279, "learning_rate": 9.625668449197862e-07, "loss": 1.279, "step": 90 }, { "epoch": 0.024351083757024352, "grad_norm": 5.5504021644592285, "learning_rate": 9.732620320855615e-07, "loss": 1.1381, "step": 91 }, { "epoch": 0.02461867808402462, "grad_norm": 5.171264171600342, "learning_rate": 9.839572192513369e-07, "loss": 1.217, "step": 92 }, { "epoch": 0.024886272411024885, "grad_norm": 5.0262370109558105, "learning_rate": 9.946524064171124e-07, "loss": 1.2421, "step": 93 }, { "epoch": 0.025153866738025155, "grad_norm": 6.277072429656982, "learning_rate": 1.005347593582888e-06, "loss": 1.3543, "step": 94 }, { "epoch": 0.025421461065025422, "grad_norm": 5.420050144195557, "learning_rate": 1.0160427807486633e-06, "loss": 1.2239, "step": 95 }, { "epoch": 0.02568905539202569, "grad_norm": 5.405261516571045, "learning_rate": 1.0267379679144386e-06, "loss": 1.1811, "step": 96 }, { "epoch": 0.025956649719025955, "grad_norm": 5.828834533691406, "learning_rate": 1.037433155080214e-06, "loss": 1.1913, "step": 97 }, { "epoch": 0.026224244046026225, "grad_norm": 5.571322441101074, "learning_rate": 1.0481283422459895e-06, "loss": 1.1917, "step": 98 }, { "epoch": 0.026491838373026492, "grad_norm": 5.72471284866333, "learning_rate": 1.0588235294117648e-06, "loss": 1.3015, "step": 99 }, { "epoch": 0.02675943270002676, "grad_norm": 5.436988353729248, "learning_rate": 1.0695187165775401e-06, "loss": 1.3139, "step": 100 }, { "epoch": 0.02702702702702703, "grad_norm": 6.0810394287109375, "learning_rate": 1.0802139037433156e-06, "loss": 1.1722, "step": 101 }, { "epoch": 0.027294621354027295, "grad_norm": 5.316585540771484, "learning_rate": 1.090909090909091e-06, "loss": 1.1669, "step": 102 }, { "epoch": 0.027562215681027562, "grad_norm": 4.8456950187683105, "learning_rate": 1.1016042780748663e-06, "loss": 1.063, "step": 103 }, { "epoch": 0.02782981000802783, "grad_norm": 5.4848952293396, "learning_rate": 1.1122994652406418e-06, "loss": 1.1014, "step": 104 }, { "epoch": 0.0280974043350281, "grad_norm": 5.489200592041016, "learning_rate": 1.1229946524064172e-06, "loss": 1.1174, "step": 105 }, { "epoch": 0.028364998662028365, "grad_norm": 5.760312080383301, "learning_rate": 1.1336898395721927e-06, "loss": 1.3387, "step": 106 }, { "epoch": 0.02863259298902863, "grad_norm": 5.3693413734436035, "learning_rate": 1.144385026737968e-06, "loss": 1.2021, "step": 107 }, { "epoch": 0.028900187316028902, "grad_norm": 5.530979633331299, "learning_rate": 1.1550802139037434e-06, "loss": 1.3164, "step": 108 }, { "epoch": 0.02916778164302917, "grad_norm": 6.090900897979736, "learning_rate": 1.165775401069519e-06, "loss": 1.3232, "step": 109 }, { "epoch": 0.029435375970029435, "grad_norm": 5.393311023712158, "learning_rate": 1.1764705882352942e-06, "loss": 1.2509, "step": 110 }, { "epoch": 0.0297029702970297, "grad_norm": 5.429086685180664, "learning_rate": 1.1871657754010696e-06, "loss": 1.115, "step": 111 }, { "epoch": 0.02997056462402997, "grad_norm": 5.629342079162598, "learning_rate": 1.1978609625668449e-06, "loss": 1.0911, "step": 112 }, { "epoch": 0.03023815895103024, "grad_norm": 5.1444621086120605, "learning_rate": 1.2085561497326204e-06, "loss": 1.2814, "step": 113 }, { "epoch": 0.030505753278030505, "grad_norm": 5.754062652587891, "learning_rate": 1.2192513368983957e-06, "loss": 1.2656, "step": 114 }, { "epoch": 0.030773347605030775, "grad_norm": 5.319810390472412, "learning_rate": 1.2299465240641713e-06, "loss": 1.1083, "step": 115 }, { "epoch": 0.03104094193203104, "grad_norm": 5.084403991699219, "learning_rate": 1.2406417112299466e-06, "loss": 1.0987, "step": 116 }, { "epoch": 0.03130853625903131, "grad_norm": 5.552883625030518, "learning_rate": 1.251336898395722e-06, "loss": 1.1867, "step": 117 }, { "epoch": 0.03157613058603158, "grad_norm": 5.5615410804748535, "learning_rate": 1.2620320855614973e-06, "loss": 1.3064, "step": 118 }, { "epoch": 0.03184372491303184, "grad_norm": 5.348892688751221, "learning_rate": 1.2727272727272728e-06, "loss": 1.2016, "step": 119 }, { "epoch": 0.03211131924003211, "grad_norm": 5.782661437988281, "learning_rate": 1.2834224598930483e-06, "loss": 1.1626, "step": 120 }, { "epoch": 0.03237891356703238, "grad_norm": 4.8546977043151855, "learning_rate": 1.2941176470588237e-06, "loss": 1.0428, "step": 121 }, { "epoch": 0.032646507894032645, "grad_norm": 5.160636901855469, "learning_rate": 1.3048128342245992e-06, "loss": 1.1558, "step": 122 }, { "epoch": 0.032914102221032915, "grad_norm": 5.260807514190674, "learning_rate": 1.3155080213903745e-06, "loss": 1.1594, "step": 123 }, { "epoch": 0.033181696548033185, "grad_norm": 5.723674774169922, "learning_rate": 1.3262032085561499e-06, "loss": 1.0968, "step": 124 }, { "epoch": 0.03344929087503345, "grad_norm": 5.457815170288086, "learning_rate": 1.3368983957219254e-06, "loss": 1.2186, "step": 125 }, { "epoch": 0.03371688520203372, "grad_norm": 5.3614501953125, "learning_rate": 1.3475935828877007e-06, "loss": 1.2612, "step": 126 }, { "epoch": 0.03398447952903398, "grad_norm": 5.161847114562988, "learning_rate": 1.358288770053476e-06, "loss": 1.1159, "step": 127 }, { "epoch": 0.03425207385603425, "grad_norm": 5.856586456298828, "learning_rate": 1.3689839572192514e-06, "loss": 1.2016, "step": 128 }, { "epoch": 0.03451966818303452, "grad_norm": 5.2745490074157715, "learning_rate": 1.379679144385027e-06, "loss": 1.241, "step": 129 }, { "epoch": 0.034787262510034785, "grad_norm": 5.403688907623291, "learning_rate": 1.3903743315508022e-06, "loss": 1.1044, "step": 130 }, { "epoch": 0.035054856837035055, "grad_norm": 5.162592887878418, "learning_rate": 1.4010695187165776e-06, "loss": 1.0964, "step": 131 }, { "epoch": 0.035322451164035325, "grad_norm": 5.914812088012695, "learning_rate": 1.4117647058823531e-06, "loss": 1.2008, "step": 132 }, { "epoch": 0.03559004549103559, "grad_norm": 5.173002243041992, "learning_rate": 1.4224598930481284e-06, "loss": 1.2276, "step": 133 }, { "epoch": 0.03585763981803586, "grad_norm": 5.35471773147583, "learning_rate": 1.4331550802139038e-06, "loss": 1.2126, "step": 134 }, { "epoch": 0.03612523414503613, "grad_norm": 5.432989120483398, "learning_rate": 1.4438502673796793e-06, "loss": 1.2237, "step": 135 }, { "epoch": 0.03639282847203639, "grad_norm": 5.986301898956299, "learning_rate": 1.4545454545454546e-06, "loss": 1.3529, "step": 136 }, { "epoch": 0.03666042279903666, "grad_norm": 4.566310882568359, "learning_rate": 1.46524064171123e-06, "loss": 1.1274, "step": 137 }, { "epoch": 0.03692801712603693, "grad_norm": 4.906930923461914, "learning_rate": 1.4759358288770053e-06, "loss": 1.077, "step": 138 }, { "epoch": 0.037195611453037195, "grad_norm": 5.325079917907715, "learning_rate": 1.4866310160427808e-06, "loss": 1.1759, "step": 139 }, { "epoch": 0.037463205780037465, "grad_norm": 5.840808868408203, "learning_rate": 1.4973262032085562e-06, "loss": 1.2583, "step": 140 }, { "epoch": 0.03773080010703773, "grad_norm": 5.32767915725708, "learning_rate": 1.5080213903743315e-06, "loss": 1.1345, "step": 141 }, { "epoch": 0.037998394434038, "grad_norm": 5.714527130126953, "learning_rate": 1.5187165775401072e-06, "loss": 1.1113, "step": 142 }, { "epoch": 0.03826598876103827, "grad_norm": 5.20102596282959, "learning_rate": 1.5294117647058826e-06, "loss": 1.1802, "step": 143 }, { "epoch": 0.03853358308803853, "grad_norm": 5.638003826141357, "learning_rate": 1.5401069518716579e-06, "loss": 1.1773, "step": 144 }, { "epoch": 0.0388011774150388, "grad_norm": 6.044027328491211, "learning_rate": 1.5508021390374334e-06, "loss": 1.2413, "step": 145 }, { "epoch": 0.03906877174203907, "grad_norm": 5.571484565734863, "learning_rate": 1.5614973262032088e-06, "loss": 1.2316, "step": 146 }, { "epoch": 0.039336366069039334, "grad_norm": 4.9136457443237305, "learning_rate": 1.572192513368984e-06, "loss": 1.2048, "step": 147 }, { "epoch": 0.039603960396039604, "grad_norm": 5.684943675994873, "learning_rate": 1.5828877005347596e-06, "loss": 1.1958, "step": 148 }, { "epoch": 0.039871554723039875, "grad_norm": 5.061483860015869, "learning_rate": 1.593582887700535e-06, "loss": 1.0256, "step": 149 }, { "epoch": 0.04013914905004014, "grad_norm": 5.615631580352783, "learning_rate": 1.6042780748663103e-06, "loss": 1.1739, "step": 150 }, { "epoch": 0.04040674337704041, "grad_norm": 5.1106791496276855, "learning_rate": 1.6149732620320858e-06, "loss": 1.1252, "step": 151 }, { "epoch": 0.04067433770404067, "grad_norm": 5.248224258422852, "learning_rate": 1.6256684491978611e-06, "loss": 1.1636, "step": 152 }, { "epoch": 0.04094193203104094, "grad_norm": 5.454551696777344, "learning_rate": 1.6363636363636365e-06, "loss": 1.2073, "step": 153 }, { "epoch": 0.04120952635804121, "grad_norm": 4.683351039886475, "learning_rate": 1.6470588235294118e-06, "loss": 1.0651, "step": 154 }, { "epoch": 0.041477120685041474, "grad_norm": 5.561789512634277, "learning_rate": 1.6577540106951873e-06, "loss": 1.1654, "step": 155 }, { "epoch": 0.041744715012041744, "grad_norm": 4.899862766265869, "learning_rate": 1.6684491978609627e-06, "loss": 1.116, "step": 156 }, { "epoch": 0.042012309339042014, "grad_norm": 5.253740310668945, "learning_rate": 1.679144385026738e-06, "loss": 1.1066, "step": 157 }, { "epoch": 0.04227990366604228, "grad_norm": 5.0830183029174805, "learning_rate": 1.6898395721925135e-06, "loss": 1.0951, "step": 158 }, { "epoch": 0.04254749799304255, "grad_norm": 5.221453666687012, "learning_rate": 1.7005347593582888e-06, "loss": 1.1063, "step": 159 }, { "epoch": 0.04281509232004282, "grad_norm": 5.58682918548584, "learning_rate": 1.7112299465240642e-06, "loss": 1.0932, "step": 160 }, { "epoch": 0.04308268664704308, "grad_norm": 5.066821098327637, "learning_rate": 1.7219251336898395e-06, "loss": 1.2249, "step": 161 }, { "epoch": 0.04335028097404335, "grad_norm": 4.870170593261719, "learning_rate": 1.732620320855615e-06, "loss": 1.0948, "step": 162 }, { "epoch": 0.04361787530104362, "grad_norm": 5.372590065002441, "learning_rate": 1.7433155080213904e-06, "loss": 1.1966, "step": 163 }, { "epoch": 0.043885469628043884, "grad_norm": 5.09646463394165, "learning_rate": 1.7540106951871661e-06, "loss": 1.0542, "step": 164 }, { "epoch": 0.044153063955044154, "grad_norm": 5.390144348144531, "learning_rate": 1.7647058823529414e-06, "loss": 1.197, "step": 165 }, { "epoch": 0.04442065828204442, "grad_norm": 5.653879642486572, "learning_rate": 1.7754010695187168e-06, "loss": 1.2669, "step": 166 }, { "epoch": 0.04468825260904469, "grad_norm": 5.457263469696045, "learning_rate": 1.7860962566844923e-06, "loss": 1.1263, "step": 167 }, { "epoch": 0.04495584693604496, "grad_norm": 5.214939594268799, "learning_rate": 1.7967914438502676e-06, "loss": 1.272, "step": 168 }, { "epoch": 0.04522344126304522, "grad_norm": 5.01685905456543, "learning_rate": 1.807486631016043e-06, "loss": 1.1515, "step": 169 }, { "epoch": 0.04549103559004549, "grad_norm": 5.313577175140381, "learning_rate": 1.8181818181818183e-06, "loss": 1.0548, "step": 170 }, { "epoch": 0.04575862991704576, "grad_norm": 5.595825672149658, "learning_rate": 1.8288770053475938e-06, "loss": 1.2787, "step": 171 }, { "epoch": 0.046026224244046024, "grad_norm": 5.331969738006592, "learning_rate": 1.8395721925133692e-06, "loss": 1.2543, "step": 172 }, { "epoch": 0.046293818571046294, "grad_norm": 5.564863204956055, "learning_rate": 1.8502673796791445e-06, "loss": 1.1323, "step": 173 }, { "epoch": 0.046561412898046564, "grad_norm": 4.885172367095947, "learning_rate": 1.86096256684492e-06, "loss": 1.136, "step": 174 }, { "epoch": 0.04682900722504683, "grad_norm": 5.40251350402832, "learning_rate": 1.8716577540106954e-06, "loss": 1.1442, "step": 175 }, { "epoch": 0.0470966015520471, "grad_norm": 5.090615749359131, "learning_rate": 1.8823529411764707e-06, "loss": 1.1505, "step": 176 }, { "epoch": 0.04736419587904737, "grad_norm": 5.03092622756958, "learning_rate": 1.893048128342246e-06, "loss": 1.2347, "step": 177 }, { "epoch": 0.04763179020604763, "grad_norm": 5.308589935302734, "learning_rate": 1.9037433155080215e-06, "loss": 1.1856, "step": 178 }, { "epoch": 0.0478993845330479, "grad_norm": 5.609830379486084, "learning_rate": 1.914438502673797e-06, "loss": 1.208, "step": 179 }, { "epoch": 0.048166978860048164, "grad_norm": 4.926831245422363, "learning_rate": 1.9251336898395724e-06, "loss": 1.1978, "step": 180 }, { "epoch": 0.048434573187048434, "grad_norm": 5.084292888641357, "learning_rate": 1.9358288770053475e-06, "loss": 1.2949, "step": 181 }, { "epoch": 0.048702167514048704, "grad_norm": 4.909692764282227, "learning_rate": 1.946524064171123e-06, "loss": 1.1134, "step": 182 }, { "epoch": 0.04896976184104897, "grad_norm": 5.650701522827148, "learning_rate": 1.9572192513368986e-06, "loss": 1.2427, "step": 183 }, { "epoch": 0.04923735616804924, "grad_norm": 5.057121753692627, "learning_rate": 1.9679144385026737e-06, "loss": 1.1445, "step": 184 }, { "epoch": 0.04950495049504951, "grad_norm": 5.226599216461182, "learning_rate": 1.9786096256684497e-06, "loss": 1.1969, "step": 185 }, { "epoch": 0.04977254482204977, "grad_norm": 5.469078540802002, "learning_rate": 1.989304812834225e-06, "loss": 1.2631, "step": 186 }, { "epoch": 0.05004013914905004, "grad_norm": 5.3292460441589355, "learning_rate": 2.0000000000000003e-06, "loss": 1.2036, "step": 187 }, { "epoch": 0.05030773347605031, "grad_norm": 5.132472991943359, "learning_rate": 2.010695187165776e-06, "loss": 1.1629, "step": 188 }, { "epoch": 0.050575327803050574, "grad_norm": 5.4047369956970215, "learning_rate": 2.021390374331551e-06, "loss": 1.1455, "step": 189 }, { "epoch": 0.050842922130050844, "grad_norm": 5.656977653503418, "learning_rate": 2.0320855614973265e-06, "loss": 1.205, "step": 190 }, { "epoch": 0.051110516457051114, "grad_norm": 5.2581963539123535, "learning_rate": 2.0427807486631016e-06, "loss": 1.1071, "step": 191 }, { "epoch": 0.05137811078405138, "grad_norm": 6.479303359985352, "learning_rate": 2.053475935828877e-06, "loss": 1.1909, "step": 192 }, { "epoch": 0.05164570511105165, "grad_norm": 5.340463638305664, "learning_rate": 2.0641711229946527e-06, "loss": 1.139, "step": 193 }, { "epoch": 0.05191329943805191, "grad_norm": 5.29105281829834, "learning_rate": 2.074866310160428e-06, "loss": 1.1934, "step": 194 }, { "epoch": 0.05218089376505218, "grad_norm": 5.378291606903076, "learning_rate": 2.0855614973262034e-06, "loss": 1.2956, "step": 195 }, { "epoch": 0.05244848809205245, "grad_norm": 5.540526390075684, "learning_rate": 2.096256684491979e-06, "loss": 1.2126, "step": 196 }, { "epoch": 0.052716082419052714, "grad_norm": 4.78275728225708, "learning_rate": 2.106951871657754e-06, "loss": 1.126, "step": 197 }, { "epoch": 0.052983676746052984, "grad_norm": 5.544436931610107, "learning_rate": 2.1176470588235296e-06, "loss": 1.1113, "step": 198 }, { "epoch": 0.053251271073053254, "grad_norm": 5.038266658782959, "learning_rate": 2.128342245989305e-06, "loss": 1.1551, "step": 199 }, { "epoch": 0.05351886540005352, "grad_norm": 5.480011463165283, "learning_rate": 2.1390374331550802e-06, "loss": 1.1223, "step": 200 }, { "epoch": 0.05378645972705379, "grad_norm": 5.360974311828613, "learning_rate": 2.1497326203208558e-06, "loss": 1.1213, "step": 201 }, { "epoch": 0.05405405405405406, "grad_norm": 4.914999008178711, "learning_rate": 2.1604278074866313e-06, "loss": 1.1067, "step": 202 }, { "epoch": 0.05432164838105432, "grad_norm": 5.201199531555176, "learning_rate": 2.1711229946524064e-06, "loss": 1.0909, "step": 203 }, { "epoch": 0.05458924270805459, "grad_norm": 5.064680576324463, "learning_rate": 2.181818181818182e-06, "loss": 1.1301, "step": 204 }, { "epoch": 0.05485683703505485, "grad_norm": 5.714580535888672, "learning_rate": 2.1925133689839575e-06, "loss": 1.3127, "step": 205 }, { "epoch": 0.055124431362055124, "grad_norm": 5.075433731079102, "learning_rate": 2.2032085561497326e-06, "loss": 1.2102, "step": 206 }, { "epoch": 0.055392025689055394, "grad_norm": 5.047552108764648, "learning_rate": 2.213903743315508e-06, "loss": 1.0519, "step": 207 }, { "epoch": 0.05565962001605566, "grad_norm": 5.115383148193359, "learning_rate": 2.2245989304812837e-06, "loss": 1.0859, "step": 208 }, { "epoch": 0.05592721434305593, "grad_norm": 4.877355575561523, "learning_rate": 2.2352941176470592e-06, "loss": 0.9786, "step": 209 }, { "epoch": 0.0561948086700562, "grad_norm": 4.9516921043396, "learning_rate": 2.2459893048128343e-06, "loss": 1.1192, "step": 210 }, { "epoch": 0.05646240299705646, "grad_norm": 4.995131969451904, "learning_rate": 2.25668449197861e-06, "loss": 1.1766, "step": 211 }, { "epoch": 0.05672999732405673, "grad_norm": 5.119645595550537, "learning_rate": 2.2673796791443854e-06, "loss": 1.0791, "step": 212 }, { "epoch": 0.056997591651057, "grad_norm": 5.06790828704834, "learning_rate": 2.2780748663101605e-06, "loss": 1.2245, "step": 213 }, { "epoch": 0.05726518597805726, "grad_norm": 5.313665390014648, "learning_rate": 2.288770053475936e-06, "loss": 1.1242, "step": 214 }, { "epoch": 0.05753278030505753, "grad_norm": 5.126317977905273, "learning_rate": 2.2994652406417116e-06, "loss": 1.234, "step": 215 }, { "epoch": 0.057800374632057804, "grad_norm": 5.17064094543457, "learning_rate": 2.3101604278074867e-06, "loss": 1.1468, "step": 216 }, { "epoch": 0.05806796895905807, "grad_norm": 5.41604471206665, "learning_rate": 2.3208556149732623e-06, "loss": 1.1766, "step": 217 }, { "epoch": 0.05833556328605834, "grad_norm": 5.253145217895508, "learning_rate": 2.331550802139038e-06, "loss": 1.2682, "step": 218 }, { "epoch": 0.0586031576130586, "grad_norm": 5.144425868988037, "learning_rate": 2.342245989304813e-06, "loss": 1.1138, "step": 219 }, { "epoch": 0.05887075194005887, "grad_norm": 5.531948089599609, "learning_rate": 2.3529411764705885e-06, "loss": 1.2583, "step": 220 }, { "epoch": 0.05913834626705914, "grad_norm": 5.511721134185791, "learning_rate": 2.363636363636364e-06, "loss": 1.2773, "step": 221 }, { "epoch": 0.0594059405940594, "grad_norm": 5.116267204284668, "learning_rate": 2.374331550802139e-06, "loss": 1.2009, "step": 222 }, { "epoch": 0.05967353492105967, "grad_norm": 5.6089630126953125, "learning_rate": 2.3850267379679146e-06, "loss": 1.2062, "step": 223 }, { "epoch": 0.05994112924805994, "grad_norm": 4.710421085357666, "learning_rate": 2.3957219251336898e-06, "loss": 1.1464, "step": 224 }, { "epoch": 0.060208723575060207, "grad_norm": 5.434922218322754, "learning_rate": 2.4064171122994653e-06, "loss": 1.3069, "step": 225 }, { "epoch": 0.06047631790206048, "grad_norm": 4.87479305267334, "learning_rate": 2.417112299465241e-06, "loss": 1.1407, "step": 226 }, { "epoch": 0.06074391222906075, "grad_norm": 4.942396640777588, "learning_rate": 2.427807486631016e-06, "loss": 1.0771, "step": 227 }, { "epoch": 0.06101150655606101, "grad_norm": 4.659417629241943, "learning_rate": 2.4385026737967915e-06, "loss": 1.0804, "step": 228 }, { "epoch": 0.06127910088306128, "grad_norm": 4.819082260131836, "learning_rate": 2.449197860962567e-06, "loss": 1.1493, "step": 229 }, { "epoch": 0.06154669521006155, "grad_norm": 5.2966437339782715, "learning_rate": 2.4598930481283426e-06, "loss": 1.0462, "step": 230 }, { "epoch": 0.06181428953706181, "grad_norm": 4.857460021972656, "learning_rate": 2.470588235294118e-06, "loss": 1.1203, "step": 231 }, { "epoch": 0.06208188386406208, "grad_norm": 5.175891399383545, "learning_rate": 2.4812834224598932e-06, "loss": 1.2988, "step": 232 }, { "epoch": 0.062349478191062346, "grad_norm": 5.051662445068359, "learning_rate": 2.4919786096256688e-06, "loss": 1.1729, "step": 233 }, { "epoch": 0.06261707251806262, "grad_norm": 5.011804580688477, "learning_rate": 2.502673796791444e-06, "loss": 1.1163, "step": 234 }, { "epoch": 0.06288466684506289, "grad_norm": 5.881048202514648, "learning_rate": 2.5133689839572194e-06, "loss": 1.1809, "step": 235 }, { "epoch": 0.06315226117206316, "grad_norm": 5.304912567138672, "learning_rate": 2.5240641711229945e-06, "loss": 1.1589, "step": 236 }, { "epoch": 0.06341985549906343, "grad_norm": 4.8746256828308105, "learning_rate": 2.5347593582887705e-06, "loss": 1.1181, "step": 237 }, { "epoch": 0.06368744982606368, "grad_norm": 5.147508144378662, "learning_rate": 2.5454545454545456e-06, "loss": 1.2074, "step": 238 }, { "epoch": 0.06395504415306395, "grad_norm": 5.325894355773926, "learning_rate": 2.556149732620321e-06, "loss": 1.0711, "step": 239 }, { "epoch": 0.06422263848006422, "grad_norm": 5.343993186950684, "learning_rate": 2.5668449197860967e-06, "loss": 1.1532, "step": 240 }, { "epoch": 0.0644902328070645, "grad_norm": 5.7134270668029785, "learning_rate": 2.577540106951872e-06, "loss": 1.2013, "step": 241 }, { "epoch": 0.06475782713406476, "grad_norm": 4.556005001068115, "learning_rate": 2.5882352941176473e-06, "loss": 0.9915, "step": 242 }, { "epoch": 0.06502542146106502, "grad_norm": 5.159268856048584, "learning_rate": 2.5989304812834225e-06, "loss": 1.1166, "step": 243 }, { "epoch": 0.06529301578806529, "grad_norm": 5.182318210601807, "learning_rate": 2.6096256684491984e-06, "loss": 1.1421, "step": 244 }, { "epoch": 0.06556061011506556, "grad_norm": 5.662712574005127, "learning_rate": 2.6203208556149735e-06, "loss": 1.2302, "step": 245 }, { "epoch": 0.06582820444206583, "grad_norm": 5.712917804718018, "learning_rate": 2.631016042780749e-06, "loss": 1.3106, "step": 246 }, { "epoch": 0.0660957987690661, "grad_norm": 5.176303863525391, "learning_rate": 2.641711229946524e-06, "loss": 1.2056, "step": 247 }, { "epoch": 0.06636339309606637, "grad_norm": 5.5327982902526855, "learning_rate": 2.6524064171122997e-06, "loss": 1.2833, "step": 248 }, { "epoch": 0.06663098742306663, "grad_norm": 5.480837345123291, "learning_rate": 2.663101604278075e-06, "loss": 1.1822, "step": 249 }, { "epoch": 0.0668985817500669, "grad_norm": 5.221635818481445, "learning_rate": 2.673796791443851e-06, "loss": 1.1049, "step": 250 }, { "epoch": 0.06716617607706717, "grad_norm": 5.3436689376831055, "learning_rate": 2.684491978609626e-06, "loss": 1.1557, "step": 251 }, { "epoch": 0.06743377040406744, "grad_norm": 5.277728080749512, "learning_rate": 2.6951871657754015e-06, "loss": 1.1756, "step": 252 }, { "epoch": 0.0677013647310677, "grad_norm": 4.869785308837891, "learning_rate": 2.7058823529411766e-06, "loss": 1.0731, "step": 253 }, { "epoch": 0.06796895905806796, "grad_norm": 4.855271339416504, "learning_rate": 2.716577540106952e-06, "loss": 1.1427, "step": 254 }, { "epoch": 0.06823655338506823, "grad_norm": 5.062397003173828, "learning_rate": 2.7272727272727272e-06, "loss": 1.0324, "step": 255 }, { "epoch": 0.0685041477120685, "grad_norm": 5.3740620613098145, "learning_rate": 2.7379679144385028e-06, "loss": 1.2113, "step": 256 }, { "epoch": 0.06877174203906877, "grad_norm": 5.443506717681885, "learning_rate": 2.748663101604278e-06, "loss": 1.2146, "step": 257 }, { "epoch": 0.06903933636606904, "grad_norm": 5.629642009735107, "learning_rate": 2.759358288770054e-06, "loss": 1.1804, "step": 258 }, { "epoch": 0.06930693069306931, "grad_norm": 4.957646369934082, "learning_rate": 2.770053475935829e-06, "loss": 1.0639, "step": 259 }, { "epoch": 0.06957452502006957, "grad_norm": 5.224496841430664, "learning_rate": 2.7807486631016045e-06, "loss": 1.0334, "step": 260 }, { "epoch": 0.06984211934706984, "grad_norm": 4.813484191894531, "learning_rate": 2.79144385026738e-06, "loss": 1.0519, "step": 261 }, { "epoch": 0.07010971367407011, "grad_norm": 5.483233451843262, "learning_rate": 2.802139037433155e-06, "loss": 1.1352, "step": 262 }, { "epoch": 0.07037730800107038, "grad_norm": 5.673671245574951, "learning_rate": 2.812834224598931e-06, "loss": 1.1306, "step": 263 }, { "epoch": 0.07064490232807065, "grad_norm": 5.3372955322265625, "learning_rate": 2.8235294117647062e-06, "loss": 1.3173, "step": 264 }, { "epoch": 0.0709124966550709, "grad_norm": 5.20352840423584, "learning_rate": 2.8342245989304818e-06, "loss": 1.2713, "step": 265 }, { "epoch": 0.07118009098207118, "grad_norm": 5.352963924407959, "learning_rate": 2.844919786096257e-06, "loss": 1.1604, "step": 266 }, { "epoch": 0.07144768530907145, "grad_norm": 5.504600524902344, "learning_rate": 2.8556149732620324e-06, "loss": 1.1683, "step": 267 }, { "epoch": 0.07171527963607172, "grad_norm": 4.989468097686768, "learning_rate": 2.8663101604278075e-06, "loss": 1.1422, "step": 268 }, { "epoch": 0.07198287396307199, "grad_norm": 5.034707069396973, "learning_rate": 2.8770053475935835e-06, "loss": 1.1341, "step": 269 }, { "epoch": 0.07225046829007226, "grad_norm": 5.23364782333374, "learning_rate": 2.8877005347593586e-06, "loss": 1.2117, "step": 270 }, { "epoch": 0.07251806261707251, "grad_norm": 4.977064609527588, "learning_rate": 2.898395721925134e-06, "loss": 1.1179, "step": 271 }, { "epoch": 0.07278565694407278, "grad_norm": 5.462220668792725, "learning_rate": 2.9090909090909093e-06, "loss": 1.1499, "step": 272 }, { "epoch": 0.07305325127107305, "grad_norm": 4.852994918823242, "learning_rate": 2.919786096256685e-06, "loss": 1.0156, "step": 273 }, { "epoch": 0.07332084559807332, "grad_norm": 5.298532485961914, "learning_rate": 2.93048128342246e-06, "loss": 1.2225, "step": 274 }, { "epoch": 0.07358843992507359, "grad_norm": 4.783885955810547, "learning_rate": 2.9411764705882355e-06, "loss": 1.1382, "step": 275 }, { "epoch": 0.07385603425207386, "grad_norm": 5.855717182159424, "learning_rate": 2.9518716577540106e-06, "loss": 1.2779, "step": 276 }, { "epoch": 0.07412362857907412, "grad_norm": 4.655195713043213, "learning_rate": 2.9625668449197865e-06, "loss": 1.0411, "step": 277 }, { "epoch": 0.07439122290607439, "grad_norm": 5.513675689697266, "learning_rate": 2.9732620320855617e-06, "loss": 1.2022, "step": 278 }, { "epoch": 0.07465881723307466, "grad_norm": 4.99501371383667, "learning_rate": 2.983957219251337e-06, "loss": 1.2175, "step": 279 }, { "epoch": 0.07492641156007493, "grad_norm": 5.31617546081543, "learning_rate": 2.9946524064171123e-06, "loss": 1.2313, "step": 280 }, { "epoch": 0.0751940058870752, "grad_norm": 4.424401760101318, "learning_rate": 3.005347593582888e-06, "loss": 1.0215, "step": 281 }, { "epoch": 0.07546160021407546, "grad_norm": 6.041825771331787, "learning_rate": 3.016042780748663e-06, "loss": 1.1797, "step": 282 }, { "epoch": 0.07572919454107573, "grad_norm": 5.0878424644470215, "learning_rate": 3.026737967914439e-06, "loss": 1.117, "step": 283 }, { "epoch": 0.075996788868076, "grad_norm": 5.10042667388916, "learning_rate": 3.0374331550802145e-06, "loss": 1.1119, "step": 284 }, { "epoch": 0.07626438319507627, "grad_norm": 4.863668918609619, "learning_rate": 3.0481283422459896e-06, "loss": 1.1497, "step": 285 }, { "epoch": 0.07653197752207654, "grad_norm": 5.0864739418029785, "learning_rate": 3.058823529411765e-06, "loss": 1.1288, "step": 286 }, { "epoch": 0.0767995718490768, "grad_norm": 5.120104789733887, "learning_rate": 3.0695187165775402e-06, "loss": 1.111, "step": 287 }, { "epoch": 0.07706716617607706, "grad_norm": 4.764978885650635, "learning_rate": 3.0802139037433158e-06, "loss": 1.1646, "step": 288 }, { "epoch": 0.07733476050307733, "grad_norm": 4.789579391479492, "learning_rate": 3.090909090909091e-06, "loss": 1.1771, "step": 289 }, { "epoch": 0.0776023548300776, "grad_norm": 5.163838863372803, "learning_rate": 3.101604278074867e-06, "loss": 1.1569, "step": 290 }, { "epoch": 0.07786994915707787, "grad_norm": 5.361174583435059, "learning_rate": 3.112299465240642e-06, "loss": 1.0451, "step": 291 }, { "epoch": 0.07813754348407814, "grad_norm": 5.318236827850342, "learning_rate": 3.1229946524064175e-06, "loss": 1.1614, "step": 292 }, { "epoch": 0.0784051378110784, "grad_norm": 5.045052528381348, "learning_rate": 3.1336898395721926e-06, "loss": 1.2178, "step": 293 }, { "epoch": 0.07867273213807867, "grad_norm": 5.2366414070129395, "learning_rate": 3.144385026737968e-06, "loss": 1.1697, "step": 294 }, { "epoch": 0.07894032646507894, "grad_norm": 4.973846912384033, "learning_rate": 3.1550802139037433e-06, "loss": 1.1503, "step": 295 }, { "epoch": 0.07920792079207921, "grad_norm": 5.384598731994629, "learning_rate": 3.1657754010695192e-06, "loss": 1.0959, "step": 296 }, { "epoch": 0.07947551511907948, "grad_norm": 5.26463508605957, "learning_rate": 3.1764705882352943e-06, "loss": 1.122, "step": 297 }, { "epoch": 0.07974310944607975, "grad_norm": 5.0575737953186035, "learning_rate": 3.18716577540107e-06, "loss": 1.226, "step": 298 }, { "epoch": 0.08001070377308, "grad_norm": 5.238304615020752, "learning_rate": 3.197860962566845e-06, "loss": 1.2016, "step": 299 }, { "epoch": 0.08027829810008028, "grad_norm": 5.158024787902832, "learning_rate": 3.2085561497326205e-06, "loss": 1.174, "step": 300 }, { "epoch": 0.08054589242708055, "grad_norm": 5.37693452835083, "learning_rate": 3.2192513368983957e-06, "loss": 1.2009, "step": 301 }, { "epoch": 0.08081348675408082, "grad_norm": 5.042771816253662, "learning_rate": 3.2299465240641716e-06, "loss": 1.0364, "step": 302 }, { "epoch": 0.08108108108108109, "grad_norm": 5.316686153411865, "learning_rate": 3.2406417112299467e-06, "loss": 1.1912, "step": 303 }, { "epoch": 0.08134867540808134, "grad_norm": 4.9131011962890625, "learning_rate": 3.2513368983957223e-06, "loss": 1.1555, "step": 304 }, { "epoch": 0.08161626973508161, "grad_norm": 5.539770126342773, "learning_rate": 3.262032085561498e-06, "loss": 1.3409, "step": 305 }, { "epoch": 0.08188386406208188, "grad_norm": 4.658355712890625, "learning_rate": 3.272727272727273e-06, "loss": 1.2177, "step": 306 }, { "epoch": 0.08215145838908215, "grad_norm": 4.792846202850342, "learning_rate": 3.2834224598930485e-06, "loss": 0.9993, "step": 307 }, { "epoch": 0.08241905271608242, "grad_norm": 4.886536598205566, "learning_rate": 3.2941176470588236e-06, "loss": 1.0884, "step": 308 }, { "epoch": 0.08268664704308269, "grad_norm": 4.976652145385742, "learning_rate": 3.3048128342245995e-06, "loss": 1.1197, "step": 309 }, { "epoch": 0.08295424137008295, "grad_norm": 4.819093704223633, "learning_rate": 3.3155080213903747e-06, "loss": 1.1089, "step": 310 }, { "epoch": 0.08322183569708322, "grad_norm": 5.2282843589782715, "learning_rate": 3.32620320855615e-06, "loss": 1.1593, "step": 311 }, { "epoch": 0.08348943002408349, "grad_norm": 5.575779438018799, "learning_rate": 3.3368983957219253e-06, "loss": 1.1697, "step": 312 }, { "epoch": 0.08375702435108376, "grad_norm": 5.0405354499816895, "learning_rate": 3.347593582887701e-06, "loss": 1.2207, "step": 313 }, { "epoch": 0.08402461867808403, "grad_norm": 4.537448406219482, "learning_rate": 3.358288770053476e-06, "loss": 0.9928, "step": 314 }, { "epoch": 0.0842922130050843, "grad_norm": 4.672475337982178, "learning_rate": 3.368983957219252e-06, "loss": 1.0073, "step": 315 }, { "epoch": 0.08455980733208456, "grad_norm": 5.076086044311523, "learning_rate": 3.379679144385027e-06, "loss": 1.1783, "step": 316 }, { "epoch": 0.08482740165908483, "grad_norm": 4.888884544372559, "learning_rate": 3.3903743315508026e-06, "loss": 1.1204, "step": 317 }, { "epoch": 0.0850949959860851, "grad_norm": 5.4211554527282715, "learning_rate": 3.4010695187165777e-06, "loss": 1.2754, "step": 318 }, { "epoch": 0.08536259031308537, "grad_norm": 5.268496513366699, "learning_rate": 3.4117647058823532e-06, "loss": 1.1334, "step": 319 }, { "epoch": 0.08563018464008564, "grad_norm": 4.835329532623291, "learning_rate": 3.4224598930481284e-06, "loss": 1.1577, "step": 320 }, { "epoch": 0.08589777896708589, "grad_norm": 4.869121074676514, "learning_rate": 3.433155080213904e-06, "loss": 1.173, "step": 321 }, { "epoch": 0.08616537329408616, "grad_norm": 5.070735931396484, "learning_rate": 3.443850267379679e-06, "loss": 1.1476, "step": 322 }, { "epoch": 0.08643296762108643, "grad_norm": 5.45928430557251, "learning_rate": 3.454545454545455e-06, "loss": 1.1205, "step": 323 }, { "epoch": 0.0867005619480867, "grad_norm": 5.189511299133301, "learning_rate": 3.46524064171123e-06, "loss": 1.3158, "step": 324 }, { "epoch": 0.08696815627508697, "grad_norm": 4.831698417663574, "learning_rate": 3.4759358288770056e-06, "loss": 1.0329, "step": 325 }, { "epoch": 0.08723575060208724, "grad_norm": 4.864784240722656, "learning_rate": 3.4866310160427807e-06, "loss": 1.0805, "step": 326 }, { "epoch": 0.0875033449290875, "grad_norm": 4.9103240966796875, "learning_rate": 3.4973262032085563e-06, "loss": 1.1277, "step": 327 }, { "epoch": 0.08777093925608777, "grad_norm": 4.773064136505127, "learning_rate": 3.5080213903743322e-06, "loss": 1.0463, "step": 328 }, { "epoch": 0.08803853358308804, "grad_norm": 5.021261692047119, "learning_rate": 3.5187165775401074e-06, "loss": 1.0795, "step": 329 }, { "epoch": 0.08830612791008831, "grad_norm": 5.683427810668945, "learning_rate": 3.529411764705883e-06, "loss": 1.094, "step": 330 }, { "epoch": 0.08857372223708858, "grad_norm": 4.894428253173828, "learning_rate": 3.540106951871658e-06, "loss": 1.2105, "step": 331 }, { "epoch": 0.08884131656408883, "grad_norm": 5.363523006439209, "learning_rate": 3.5508021390374335e-06, "loss": 1.3021, "step": 332 }, { "epoch": 0.0891089108910891, "grad_norm": 4.995115756988525, "learning_rate": 3.5614973262032087e-06, "loss": 1.2159, "step": 333 }, { "epoch": 0.08937650521808937, "grad_norm": 4.57165002822876, "learning_rate": 3.5721925133689846e-06, "loss": 1.1042, "step": 334 }, { "epoch": 0.08964409954508964, "grad_norm": 4.659427165985107, "learning_rate": 3.5828877005347597e-06, "loss": 1.0984, "step": 335 }, { "epoch": 0.08991169387208992, "grad_norm": 4.8430986404418945, "learning_rate": 3.5935828877005353e-06, "loss": 1.0503, "step": 336 }, { "epoch": 0.09017928819909019, "grad_norm": 5.450077056884766, "learning_rate": 3.6042780748663104e-06, "loss": 1.2578, "step": 337 }, { "epoch": 0.09044688252609044, "grad_norm": 5.203562259674072, "learning_rate": 3.614973262032086e-06, "loss": 1.1925, "step": 338 }, { "epoch": 0.09071447685309071, "grad_norm": 5.250705718994141, "learning_rate": 3.625668449197861e-06, "loss": 1.1434, "step": 339 }, { "epoch": 0.09098207118009098, "grad_norm": 5.062129020690918, "learning_rate": 3.6363636363636366e-06, "loss": 1.1685, "step": 340 }, { "epoch": 0.09124966550709125, "grad_norm": 5.255050182342529, "learning_rate": 3.6470588235294117e-06, "loss": 1.1763, "step": 341 }, { "epoch": 0.09151725983409152, "grad_norm": 5.397471904754639, "learning_rate": 3.6577540106951877e-06, "loss": 1.1878, "step": 342 }, { "epoch": 0.09178485416109179, "grad_norm": 4.998739242553711, "learning_rate": 3.6684491978609628e-06, "loss": 1.2435, "step": 343 }, { "epoch": 0.09205244848809205, "grad_norm": 5.0231475830078125, "learning_rate": 3.6791443850267383e-06, "loss": 1.201, "step": 344 }, { "epoch": 0.09232004281509232, "grad_norm": 4.59348201751709, "learning_rate": 3.6898395721925134e-06, "loss": 1.1119, "step": 345 }, { "epoch": 0.09258763714209259, "grad_norm": 5.16015100479126, "learning_rate": 3.700534759358289e-06, "loss": 1.3093, "step": 346 }, { "epoch": 0.09285523146909286, "grad_norm": 4.382453441619873, "learning_rate": 3.711229946524064e-06, "loss": 1.0883, "step": 347 }, { "epoch": 0.09312282579609313, "grad_norm": 4.668209075927734, "learning_rate": 3.72192513368984e-06, "loss": 1.0241, "step": 348 }, { "epoch": 0.09339042012309338, "grad_norm": 4.655612945556641, "learning_rate": 3.7326203208556156e-06, "loss": 1.0818, "step": 349 }, { "epoch": 0.09365801445009365, "grad_norm": 4.565972805023193, "learning_rate": 3.7433155080213907e-06, "loss": 1.1419, "step": 350 }, { "epoch": 0.09392560877709392, "grad_norm": 5.171647548675537, "learning_rate": 3.7540106951871662e-06, "loss": 1.2268, "step": 351 }, { "epoch": 0.0941932031040942, "grad_norm": 4.986495018005371, "learning_rate": 3.7647058823529414e-06, "loss": 1.1435, "step": 352 }, { "epoch": 0.09446079743109446, "grad_norm": 5.132668495178223, "learning_rate": 3.775401069518717e-06, "loss": 1.1817, "step": 353 }, { "epoch": 0.09472839175809473, "grad_norm": 4.863659858703613, "learning_rate": 3.786096256684492e-06, "loss": 1.1636, "step": 354 }, { "epoch": 0.09499598608509499, "grad_norm": 4.890793323516846, "learning_rate": 3.796791443850268e-06, "loss": 1.099, "step": 355 }, { "epoch": 0.09526358041209526, "grad_norm": 4.9269208908081055, "learning_rate": 3.807486631016043e-06, "loss": 0.9963, "step": 356 }, { "epoch": 0.09553117473909553, "grad_norm": 5.276472091674805, "learning_rate": 3.818181818181819e-06, "loss": 1.1995, "step": 357 }, { "epoch": 0.0957987690660958, "grad_norm": 5.187767505645752, "learning_rate": 3.828877005347594e-06, "loss": 0.9875, "step": 358 }, { "epoch": 0.09606636339309607, "grad_norm": 4.884994983673096, "learning_rate": 3.839572192513369e-06, "loss": 1.1166, "step": 359 }, { "epoch": 0.09633395772009633, "grad_norm": 4.8466715812683105, "learning_rate": 3.850267379679145e-06, "loss": 1.1187, "step": 360 }, { "epoch": 0.0966015520470966, "grad_norm": 4.6448655128479, "learning_rate": 3.86096256684492e-06, "loss": 1.2536, "step": 361 }, { "epoch": 0.09686914637409687, "grad_norm": 4.447425365447998, "learning_rate": 3.871657754010695e-06, "loss": 1.0654, "step": 362 }, { "epoch": 0.09713674070109714, "grad_norm": 4.957208633422852, "learning_rate": 3.882352941176471e-06, "loss": 1.2859, "step": 363 }, { "epoch": 0.09740433502809741, "grad_norm": 4.9123735427856445, "learning_rate": 3.893048128342246e-06, "loss": 1.1159, "step": 364 }, { "epoch": 0.09767192935509768, "grad_norm": 5.830307960510254, "learning_rate": 3.903743315508022e-06, "loss": 1.05, "step": 365 }, { "epoch": 0.09793952368209793, "grad_norm": 4.788443088531494, "learning_rate": 3.914438502673797e-06, "loss": 1.1427, "step": 366 }, { "epoch": 0.0982071180090982, "grad_norm": 4.874475479125977, "learning_rate": 3.925133689839573e-06, "loss": 1.1247, "step": 367 }, { "epoch": 0.09847471233609847, "grad_norm": 5.284448623657227, "learning_rate": 3.9358288770053474e-06, "loss": 1.1494, "step": 368 }, { "epoch": 0.09874230666309874, "grad_norm": 5.056131839752197, "learning_rate": 3.946524064171123e-06, "loss": 1.1843, "step": 369 }, { "epoch": 0.09900990099009901, "grad_norm": 4.933049201965332, "learning_rate": 3.957219251336899e-06, "loss": 1.0876, "step": 370 }, { "epoch": 0.09927749531709927, "grad_norm": 5.440591335296631, "learning_rate": 3.967914438502674e-06, "loss": 1.0903, "step": 371 }, { "epoch": 0.09954508964409954, "grad_norm": 5.242448806762695, "learning_rate": 3.97860962566845e-06, "loss": 1.1539, "step": 372 }, { "epoch": 0.09981268397109981, "grad_norm": 5.424898147583008, "learning_rate": 3.989304812834225e-06, "loss": 1.1798, "step": 373 }, { "epoch": 0.10008027829810008, "grad_norm": 5.486216068267822, "learning_rate": 4.000000000000001e-06, "loss": 1.195, "step": 374 }, { "epoch": 0.10034787262510035, "grad_norm": 4.9611029624938965, "learning_rate": 4.010695187165775e-06, "loss": 1.1332, "step": 375 }, { "epoch": 0.10061546695210062, "grad_norm": 5.002806663513184, "learning_rate": 4.021390374331552e-06, "loss": 1.0903, "step": 376 }, { "epoch": 0.10088306127910088, "grad_norm": 5.656718730926514, "learning_rate": 4.0320855614973264e-06, "loss": 1.2747, "step": 377 }, { "epoch": 0.10115065560610115, "grad_norm": 5.98917293548584, "learning_rate": 4.042780748663102e-06, "loss": 1.1138, "step": 378 }, { "epoch": 0.10141824993310142, "grad_norm": 4.683370590209961, "learning_rate": 4.0534759358288775e-06, "loss": 1.1109, "step": 379 }, { "epoch": 0.10168584426010169, "grad_norm": 5.65017557144165, "learning_rate": 4.064171122994653e-06, "loss": 1.3569, "step": 380 }, { "epoch": 0.10195343858710196, "grad_norm": 5.555070400238037, "learning_rate": 4.074866310160428e-06, "loss": 1.4389, "step": 381 }, { "epoch": 0.10222103291410223, "grad_norm": 4.874694347381592, "learning_rate": 4.085561497326203e-06, "loss": 1.0894, "step": 382 }, { "epoch": 0.10248862724110248, "grad_norm": 4.907220840454102, "learning_rate": 4.096256684491979e-06, "loss": 1.1923, "step": 383 }, { "epoch": 0.10275622156810275, "grad_norm": 4.953684329986572, "learning_rate": 4.106951871657754e-06, "loss": 1.3313, "step": 384 }, { "epoch": 0.10302381589510302, "grad_norm": 4.817149639129639, "learning_rate": 4.11764705882353e-06, "loss": 1.139, "step": 385 }, { "epoch": 0.1032914102221033, "grad_norm": 5.111240386962891, "learning_rate": 4.1283422459893054e-06, "loss": 1.1787, "step": 386 }, { "epoch": 0.10355900454910356, "grad_norm": 4.8276519775390625, "learning_rate": 4.13903743315508e-06, "loss": 1.1326, "step": 387 }, { "epoch": 0.10382659887610382, "grad_norm": 4.992558479309082, "learning_rate": 4.149732620320856e-06, "loss": 1.2047, "step": 388 }, { "epoch": 0.10409419320310409, "grad_norm": 4.973186016082764, "learning_rate": 4.160427807486631e-06, "loss": 1.2083, "step": 389 }, { "epoch": 0.10436178753010436, "grad_norm": 5.174978733062744, "learning_rate": 4.171122994652407e-06, "loss": 1.1953, "step": 390 }, { "epoch": 0.10462938185710463, "grad_norm": 5.181015968322754, "learning_rate": 4.181818181818182e-06, "loss": 1.1337, "step": 391 }, { "epoch": 0.1048969761841049, "grad_norm": 5.914229869842529, "learning_rate": 4.192513368983958e-06, "loss": 1.3395, "step": 392 }, { "epoch": 0.10516457051110517, "grad_norm": 5.254291534423828, "learning_rate": 4.203208556149733e-06, "loss": 1.2202, "step": 393 }, { "epoch": 0.10543216483810543, "grad_norm": 5.055797100067139, "learning_rate": 4.213903743315508e-06, "loss": 1.1474, "step": 394 }, { "epoch": 0.1056997591651057, "grad_norm": 4.354243755340576, "learning_rate": 4.224598930481284e-06, "loss": 0.958, "step": 395 }, { "epoch": 0.10596735349210597, "grad_norm": 4.838346004486084, "learning_rate": 4.235294117647059e-06, "loss": 1.1825, "step": 396 }, { "epoch": 0.10623494781910624, "grad_norm": 4.711790561676025, "learning_rate": 4.245989304812835e-06, "loss": 1.196, "step": 397 }, { "epoch": 0.10650254214610651, "grad_norm": 4.71934175491333, "learning_rate": 4.25668449197861e-06, "loss": 1.0371, "step": 398 }, { "epoch": 0.10677013647310676, "grad_norm": 4.441000938415527, "learning_rate": 4.267379679144386e-06, "loss": 1.1646, "step": 399 }, { "epoch": 0.10703773080010703, "grad_norm": 5.28547477722168, "learning_rate": 4.2780748663101604e-06, "loss": 1.2089, "step": 400 }, { "epoch": 0.1073053251271073, "grad_norm": 4.684313774108887, "learning_rate": 4.288770053475936e-06, "loss": 1.1529, "step": 401 }, { "epoch": 0.10757291945410757, "grad_norm": 4.92221212387085, "learning_rate": 4.2994652406417115e-06, "loss": 1.1109, "step": 402 }, { "epoch": 0.10784051378110784, "grad_norm": 4.630762100219727, "learning_rate": 4.310160427807487e-06, "loss": 1.2035, "step": 403 }, { "epoch": 0.10810810810810811, "grad_norm": 5.12864875793457, "learning_rate": 4.320855614973263e-06, "loss": 1.2126, "step": 404 }, { "epoch": 0.10837570243510837, "grad_norm": 5.339291095733643, "learning_rate": 4.331550802139038e-06, "loss": 1.2983, "step": 405 }, { "epoch": 0.10864329676210864, "grad_norm": 5.000173568725586, "learning_rate": 4.342245989304813e-06, "loss": 1.2949, "step": 406 }, { "epoch": 0.10891089108910891, "grad_norm": 5.139687538146973, "learning_rate": 4.352941176470588e-06, "loss": 1.1753, "step": 407 }, { "epoch": 0.10917848541610918, "grad_norm": 5.229654788970947, "learning_rate": 4.363636363636364e-06, "loss": 1.1499, "step": 408 }, { "epoch": 0.10944607974310945, "grad_norm": 4.853805065155029, "learning_rate": 4.3743315508021394e-06, "loss": 1.2261, "step": 409 }, { "epoch": 0.1097136740701097, "grad_norm": 5.141970157623291, "learning_rate": 4.385026737967915e-06, "loss": 1.2721, "step": 410 }, { "epoch": 0.10998126839710998, "grad_norm": 5.1554436683654785, "learning_rate": 4.3957219251336905e-06, "loss": 1.2238, "step": 411 }, { "epoch": 0.11024886272411025, "grad_norm": 5.058832168579102, "learning_rate": 4.406417112299465e-06, "loss": 1.2816, "step": 412 }, { "epoch": 0.11051645705111052, "grad_norm": 4.609223365783691, "learning_rate": 4.417112299465241e-06, "loss": 1.1348, "step": 413 }, { "epoch": 0.11078405137811079, "grad_norm": 5.325019359588623, "learning_rate": 4.427807486631016e-06, "loss": 1.1162, "step": 414 }, { "epoch": 0.11105164570511106, "grad_norm": 4.600208759307861, "learning_rate": 4.438502673796792e-06, "loss": 1.0443, "step": 415 }, { "epoch": 0.11131924003211131, "grad_norm": 5.451298236846924, "learning_rate": 4.449197860962567e-06, "loss": 1.197, "step": 416 }, { "epoch": 0.11158683435911158, "grad_norm": 5.0797505378723145, "learning_rate": 4.459893048128343e-06, "loss": 1.2068, "step": 417 }, { "epoch": 0.11185442868611185, "grad_norm": 5.17997932434082, "learning_rate": 4.4705882352941184e-06, "loss": 1.137, "step": 418 }, { "epoch": 0.11212202301311212, "grad_norm": 5.312300682067871, "learning_rate": 4.481283422459893e-06, "loss": 1.176, "step": 419 }, { "epoch": 0.1123896173401124, "grad_norm": 4.956272602081299, "learning_rate": 4.491978609625669e-06, "loss": 1.1349, "step": 420 }, { "epoch": 0.11265721166711266, "grad_norm": 4.7235517501831055, "learning_rate": 4.502673796791444e-06, "loss": 1.1691, "step": 421 }, { "epoch": 0.11292480599411292, "grad_norm": 4.887537956237793, "learning_rate": 4.51336898395722e-06, "loss": 1.1562, "step": 422 }, { "epoch": 0.11319240032111319, "grad_norm": 4.688408851623535, "learning_rate": 4.524064171122995e-06, "loss": 1.1013, "step": 423 }, { "epoch": 0.11345999464811346, "grad_norm": 5.215854644775391, "learning_rate": 4.534759358288771e-06, "loss": 1.1452, "step": 424 }, { "epoch": 0.11372758897511373, "grad_norm": 5.092518329620361, "learning_rate": 4.5454545454545455e-06, "loss": 1.274, "step": 425 }, { "epoch": 0.113995183302114, "grad_norm": 4.888270854949951, "learning_rate": 4.556149732620321e-06, "loss": 1.1974, "step": 426 }, { "epoch": 0.11426277762911426, "grad_norm": 5.114696979522705, "learning_rate": 4.566844919786097e-06, "loss": 1.1434, "step": 427 }, { "epoch": 0.11453037195611453, "grad_norm": 5.443094730377197, "learning_rate": 4.577540106951872e-06, "loss": 1.3022, "step": 428 }, { "epoch": 0.1147979662831148, "grad_norm": 4.617439270019531, "learning_rate": 4.588235294117647e-06, "loss": 1.1046, "step": 429 }, { "epoch": 0.11506556061011507, "grad_norm": 5.151831150054932, "learning_rate": 4.598930481283423e-06, "loss": 1.1525, "step": 430 }, { "epoch": 0.11533315493711534, "grad_norm": 4.646505355834961, "learning_rate": 4.609625668449198e-06, "loss": 1.0613, "step": 431 }, { "epoch": 0.11560074926411561, "grad_norm": 4.780506610870361, "learning_rate": 4.6203208556149734e-06, "loss": 1.0874, "step": 432 }, { "epoch": 0.11586834359111586, "grad_norm": 4.499149322509766, "learning_rate": 4.631016042780749e-06, "loss": 1.0649, "step": 433 }, { "epoch": 0.11613593791811613, "grad_norm": 5.027551651000977, "learning_rate": 4.6417112299465245e-06, "loss": 1.1688, "step": 434 }, { "epoch": 0.1164035322451164, "grad_norm": 4.565614223480225, "learning_rate": 4.6524064171123e-06, "loss": 1.1468, "step": 435 }, { "epoch": 0.11667112657211667, "grad_norm": 4.508991241455078, "learning_rate": 4.663101604278076e-06, "loss": 1.1169, "step": 436 }, { "epoch": 0.11693872089911694, "grad_norm": 4.733094692230225, "learning_rate": 4.673796791443851e-06, "loss": 1.145, "step": 437 }, { "epoch": 0.1172063152261172, "grad_norm": 4.995217323303223, "learning_rate": 4.684491978609626e-06, "loss": 1.1946, "step": 438 }, { "epoch": 0.11747390955311747, "grad_norm": 4.931241512298584, "learning_rate": 4.695187165775401e-06, "loss": 1.0798, "step": 439 }, { "epoch": 0.11774150388011774, "grad_norm": 4.939948558807373, "learning_rate": 4.705882352941177e-06, "loss": 1.3138, "step": 440 }, { "epoch": 0.11800909820711801, "grad_norm": 5.553315162658691, "learning_rate": 4.7165775401069524e-06, "loss": 1.2837, "step": 441 }, { "epoch": 0.11827669253411828, "grad_norm": 5.024171829223633, "learning_rate": 4.727272727272728e-06, "loss": 1.0769, "step": 442 }, { "epoch": 0.11854428686111855, "grad_norm": 4.998294353485107, "learning_rate": 4.7379679144385035e-06, "loss": 1.2191, "step": 443 }, { "epoch": 0.1188118811881188, "grad_norm": 5.217951774597168, "learning_rate": 4.748663101604278e-06, "loss": 1.3233, "step": 444 }, { "epoch": 0.11907947551511908, "grad_norm": 4.932075500488281, "learning_rate": 4.759358288770054e-06, "loss": 1.2598, "step": 445 }, { "epoch": 0.11934706984211935, "grad_norm": 4.477123260498047, "learning_rate": 4.770053475935829e-06, "loss": 1.0329, "step": 446 }, { "epoch": 0.11961466416911962, "grad_norm": 4.998135566711426, "learning_rate": 4.780748663101605e-06, "loss": 1.1807, "step": 447 }, { "epoch": 0.11988225849611989, "grad_norm": 5.117345333099365, "learning_rate": 4.7914438502673795e-06, "loss": 1.1254, "step": 448 }, { "epoch": 0.12014985282312014, "grad_norm": 4.747807025909424, "learning_rate": 4.802139037433156e-06, "loss": 1.0701, "step": 449 }, { "epoch": 0.12041744715012041, "grad_norm": 4.674474716186523, "learning_rate": 4.812834224598931e-06, "loss": 1.2146, "step": 450 }, { "epoch": 0.12068504147712068, "grad_norm": 5.200889587402344, "learning_rate": 4.823529411764706e-06, "loss": 1.1634, "step": 451 }, { "epoch": 0.12095263580412095, "grad_norm": 4.857826232910156, "learning_rate": 4.834224598930482e-06, "loss": 1.13, "step": 452 }, { "epoch": 0.12122023013112122, "grad_norm": 4.851617336273193, "learning_rate": 4.844919786096257e-06, "loss": 1.1596, "step": 453 }, { "epoch": 0.1214878244581215, "grad_norm": 4.770223617553711, "learning_rate": 4.855614973262032e-06, "loss": 1.1326, "step": 454 }, { "epoch": 0.12175541878512175, "grad_norm": 5.090690612792969, "learning_rate": 4.866310160427808e-06, "loss": 1.1485, "step": 455 }, { "epoch": 0.12202301311212202, "grad_norm": 4.741364002227783, "learning_rate": 4.877005347593583e-06, "loss": 1.2239, "step": 456 }, { "epoch": 0.12229060743912229, "grad_norm": 4.698870658874512, "learning_rate": 4.8877005347593585e-06, "loss": 1.1703, "step": 457 }, { "epoch": 0.12255820176612256, "grad_norm": 5.272980213165283, "learning_rate": 4.898395721925134e-06, "loss": 1.1262, "step": 458 }, { "epoch": 0.12282579609312283, "grad_norm": 4.762371063232422, "learning_rate": 4.90909090909091e-06, "loss": 1.1365, "step": 459 }, { "epoch": 0.1230933904201231, "grad_norm": 4.594496726989746, "learning_rate": 4.919786096256685e-06, "loss": 1.1921, "step": 460 }, { "epoch": 0.12336098474712336, "grad_norm": 4.638429164886475, "learning_rate": 4.93048128342246e-06, "loss": 1.0768, "step": 461 }, { "epoch": 0.12362857907412363, "grad_norm": 5.253578186035156, "learning_rate": 4.941176470588236e-06, "loss": 1.2366, "step": 462 }, { "epoch": 0.1238961734011239, "grad_norm": 5.03195858001709, "learning_rate": 4.951871657754011e-06, "loss": 1.1402, "step": 463 }, { "epoch": 0.12416376772812417, "grad_norm": 4.741814136505127, "learning_rate": 4.9625668449197864e-06, "loss": 1.038, "step": 464 }, { "epoch": 0.12443136205512444, "grad_norm": 5.368718147277832, "learning_rate": 4.973262032085562e-06, "loss": 1.2952, "step": 465 }, { "epoch": 0.12469895638212469, "grad_norm": 4.668884754180908, "learning_rate": 4.9839572192513375e-06, "loss": 1.1318, "step": 466 }, { "epoch": 0.12496655070912496, "grad_norm": 5.185303688049316, "learning_rate": 4.994652406417112e-06, "loss": 1.1945, "step": 467 }, { "epoch": 0.12523414503612523, "grad_norm": 4.929427623748779, "learning_rate": 5.005347593582888e-06, "loss": 1.2231, "step": 468 }, { "epoch": 0.1255017393631255, "grad_norm": 4.767603397369385, "learning_rate": 5.016042780748663e-06, "loss": 1.2551, "step": 469 }, { "epoch": 0.12576933369012577, "grad_norm": 5.308717250823975, "learning_rate": 5.026737967914439e-06, "loss": 1.1529, "step": 470 }, { "epoch": 0.12603692801712604, "grad_norm": 4.83845329284668, "learning_rate": 5.037433155080214e-06, "loss": 1.2712, "step": 471 }, { "epoch": 0.1263045223441263, "grad_norm": 4.922050476074219, "learning_rate": 5.048128342245989e-06, "loss": 1.0939, "step": 472 }, { "epoch": 0.12657211667112658, "grad_norm": 4.888375282287598, "learning_rate": 5.058823529411765e-06, "loss": 1.1761, "step": 473 }, { "epoch": 0.12683971099812685, "grad_norm": 4.710062026977539, "learning_rate": 5.069518716577541e-06, "loss": 1.236, "step": 474 }, { "epoch": 0.1271073053251271, "grad_norm": 5.260262966156006, "learning_rate": 5.0802139037433165e-06, "loss": 1.2279, "step": 475 }, { "epoch": 0.12737489965212737, "grad_norm": 4.409514904022217, "learning_rate": 5.090909090909091e-06, "loss": 1.2382, "step": 476 }, { "epoch": 0.12764249397912764, "grad_norm": 4.516629695892334, "learning_rate": 5.101604278074867e-06, "loss": 0.9502, "step": 477 }, { "epoch": 0.1279100883061279, "grad_norm": 5.594369888305664, "learning_rate": 5.112299465240642e-06, "loss": 1.1869, "step": 478 }, { "epoch": 0.12817768263312818, "grad_norm": 4.493462562561035, "learning_rate": 5.122994652406418e-06, "loss": 1.0508, "step": 479 }, { "epoch": 0.12844527696012845, "grad_norm": 4.775510787963867, "learning_rate": 5.133689839572193e-06, "loss": 1.2422, "step": 480 }, { "epoch": 0.12871287128712872, "grad_norm": 4.813394546508789, "learning_rate": 5.144385026737968e-06, "loss": 1.1887, "step": 481 }, { "epoch": 0.128980465614129, "grad_norm": 4.693298816680908, "learning_rate": 5.155080213903744e-06, "loss": 1.0501, "step": 482 }, { "epoch": 0.12924805994112926, "grad_norm": 4.395559787750244, "learning_rate": 5.165775401069519e-06, "loss": 1.1917, "step": 483 }, { "epoch": 0.12951565426812953, "grad_norm": 5.032355785369873, "learning_rate": 5.176470588235295e-06, "loss": 1.2152, "step": 484 }, { "epoch": 0.1297832485951298, "grad_norm": 4.638949871063232, "learning_rate": 5.187165775401069e-06, "loss": 1.0553, "step": 485 }, { "epoch": 0.13005084292213004, "grad_norm": 4.831664562225342, "learning_rate": 5.197860962566845e-06, "loss": 1.0504, "step": 486 }, { "epoch": 0.1303184372491303, "grad_norm": 5.181875705718994, "learning_rate": 5.208556149732621e-06, "loss": 1.2088, "step": 487 }, { "epoch": 0.13058603157613058, "grad_norm": 5.028466701507568, "learning_rate": 5.219251336898397e-06, "loss": 1.0759, "step": 488 }, { "epoch": 0.13085362590313085, "grad_norm": 4.613313674926758, "learning_rate": 5.2299465240641715e-06, "loss": 1.2305, "step": 489 }, { "epoch": 0.13112122023013112, "grad_norm": 4.535508155822754, "learning_rate": 5.240641711229947e-06, "loss": 1.1403, "step": 490 }, { "epoch": 0.1313888145571314, "grad_norm": 4.558447360992432, "learning_rate": 5.251336898395723e-06, "loss": 1.1301, "step": 491 }, { "epoch": 0.13165640888413166, "grad_norm": 4.6473588943481445, "learning_rate": 5.262032085561498e-06, "loss": 1.0308, "step": 492 }, { "epoch": 0.13192400321113193, "grad_norm": 4.9026198387146, "learning_rate": 5.272727272727273e-06, "loss": 1.3224, "step": 493 }, { "epoch": 0.1321915975381322, "grad_norm": 4.340352535247803, "learning_rate": 5.283422459893048e-06, "loss": 1.1216, "step": 494 }, { "epoch": 0.13245919186513247, "grad_norm": 4.947085857391357, "learning_rate": 5.294117647058824e-06, "loss": 1.3081, "step": 495 }, { "epoch": 0.13272678619213274, "grad_norm": 5.271705627441406, "learning_rate": 5.3048128342245995e-06, "loss": 1.2907, "step": 496 }, { "epoch": 0.13299438051913298, "grad_norm": 4.9826507568359375, "learning_rate": 5.315508021390374e-06, "loss": 1.1883, "step": 497 }, { "epoch": 0.13326197484613325, "grad_norm": 4.606426239013672, "learning_rate": 5.32620320855615e-06, "loss": 1.129, "step": 498 }, { "epoch": 0.13352956917313352, "grad_norm": 4.6019392013549805, "learning_rate": 5.336898395721925e-06, "loss": 1.1887, "step": 499 }, { "epoch": 0.1337971635001338, "grad_norm": 5.553493976593018, "learning_rate": 5.347593582887702e-06, "loss": 1.2985, "step": 500 }, { "epoch": 0.1337971635001338, "eval_loss": 1.1709299087524414, "eval_runtime": 11.4546, "eval_samples_per_second": 34.92, "eval_steps_per_second": 4.365, "step": 500 }, { "epoch": 0.13406475782713406, "grad_norm": 4.656076431274414, "learning_rate": 5.358288770053477e-06, "loss": 1.1667, "step": 501 }, { "epoch": 0.13433235215413433, "grad_norm": 4.8764543533325195, "learning_rate": 5.368983957219252e-06, "loss": 1.1945, "step": 502 }, { "epoch": 0.1345999464811346, "grad_norm": 4.712137699127197, "learning_rate": 5.379679144385027e-06, "loss": 1.1168, "step": 503 }, { "epoch": 0.13486754080813487, "grad_norm": 4.951474666595459, "learning_rate": 5.390374331550803e-06, "loss": 1.1441, "step": 504 }, { "epoch": 0.13513513513513514, "grad_norm": 5.019460678100586, "learning_rate": 5.4010695187165785e-06, "loss": 1.2449, "step": 505 }, { "epoch": 0.1354027294621354, "grad_norm": 4.8025689125061035, "learning_rate": 5.411764705882353e-06, "loss": 1.062, "step": 506 }, { "epoch": 0.13567032378913568, "grad_norm": 4.835244655609131, "learning_rate": 5.422459893048129e-06, "loss": 1.0729, "step": 507 }, { "epoch": 0.13593791811613593, "grad_norm": 5.318262577056885, "learning_rate": 5.433155080213904e-06, "loss": 1.1688, "step": 508 }, { "epoch": 0.1362055124431362, "grad_norm": 4.434688568115234, "learning_rate": 5.44385026737968e-06, "loss": 1.0925, "step": 509 }, { "epoch": 0.13647310677013647, "grad_norm": 4.781643867492676, "learning_rate": 5.4545454545454545e-06, "loss": 1.192, "step": 510 }, { "epoch": 0.13674070109713674, "grad_norm": 4.806861877441406, "learning_rate": 5.46524064171123e-06, "loss": 1.2121, "step": 511 }, { "epoch": 0.137008295424137, "grad_norm": 4.502013206481934, "learning_rate": 5.4759358288770055e-06, "loss": 1.075, "step": 512 }, { "epoch": 0.13727588975113728, "grad_norm": 5.604802131652832, "learning_rate": 5.486631016042782e-06, "loss": 1.2734, "step": 513 }, { "epoch": 0.13754348407813755, "grad_norm": 5.166036128997803, "learning_rate": 5.497326203208556e-06, "loss": 1.1035, "step": 514 }, { "epoch": 0.13781107840513782, "grad_norm": 5.001628875732422, "learning_rate": 5.508021390374332e-06, "loss": 1.2467, "step": 515 }, { "epoch": 0.13807867273213809, "grad_norm": 4.5005693435668945, "learning_rate": 5.518716577540108e-06, "loss": 1.1308, "step": 516 }, { "epoch": 0.13834626705913836, "grad_norm": 5.138829231262207, "learning_rate": 5.529411764705883e-06, "loss": 1.174, "step": 517 }, { "epoch": 0.13861386138613863, "grad_norm": 4.762211322784424, "learning_rate": 5.540106951871658e-06, "loss": 1.1242, "step": 518 }, { "epoch": 0.13888145571313887, "grad_norm": 5.18784761428833, "learning_rate": 5.5508021390374335e-06, "loss": 1.2918, "step": 519 }, { "epoch": 0.13914905004013914, "grad_norm": 4.61662483215332, "learning_rate": 5.561497326203209e-06, "loss": 1.1913, "step": 520 }, { "epoch": 0.1394166443671394, "grad_norm": 4.903599262237549, "learning_rate": 5.5721925133689845e-06, "loss": 1.3014, "step": 521 }, { "epoch": 0.13968423869413968, "grad_norm": 4.205623149871826, "learning_rate": 5.58288770053476e-06, "loss": 1.0465, "step": 522 }, { "epoch": 0.13995183302113995, "grad_norm": 3.9266059398651123, "learning_rate": 5.593582887700535e-06, "loss": 1.0034, "step": 523 }, { "epoch": 0.14021942734814022, "grad_norm": 5.096248626708984, "learning_rate": 5.60427807486631e-06, "loss": 1.2393, "step": 524 }, { "epoch": 0.1404870216751405, "grad_norm": 4.701903820037842, "learning_rate": 5.614973262032086e-06, "loss": 1.1296, "step": 525 }, { "epoch": 0.14075461600214076, "grad_norm": 4.736352443695068, "learning_rate": 5.625668449197862e-06, "loss": 1.1166, "step": 526 }, { "epoch": 0.14102221032914103, "grad_norm": 4.55366325378418, "learning_rate": 5.636363636363636e-06, "loss": 1.2381, "step": 527 }, { "epoch": 0.1412898046561413, "grad_norm": 4.388349533081055, "learning_rate": 5.6470588235294125e-06, "loss": 1.0277, "step": 528 }, { "epoch": 0.14155739898314157, "grad_norm": 4.596952438354492, "learning_rate": 5.657754010695188e-06, "loss": 0.9623, "step": 529 }, { "epoch": 0.1418249933101418, "grad_norm": 4.9525251388549805, "learning_rate": 5.6684491978609635e-06, "loss": 1.1406, "step": 530 }, { "epoch": 0.14209258763714208, "grad_norm": 4.623518466949463, "learning_rate": 5.679144385026738e-06, "loss": 1.0717, "step": 531 }, { "epoch": 0.14236018196414235, "grad_norm": 4.766755104064941, "learning_rate": 5.689839572192514e-06, "loss": 1.2016, "step": 532 }, { "epoch": 0.14262777629114262, "grad_norm": 4.868133068084717, "learning_rate": 5.700534759358289e-06, "loss": 1.1728, "step": 533 }, { "epoch": 0.1428953706181429, "grad_norm": 4.722245216369629, "learning_rate": 5.711229946524065e-06, "loss": 1.2795, "step": 534 }, { "epoch": 0.14316296494514316, "grad_norm": 4.916394233703613, "learning_rate": 5.7219251336898395e-06, "loss": 1.141, "step": 535 }, { "epoch": 0.14343055927214343, "grad_norm": 4.942296028137207, "learning_rate": 5.732620320855615e-06, "loss": 1.2017, "step": 536 }, { "epoch": 0.1436981535991437, "grad_norm": 4.585607051849365, "learning_rate": 5.743315508021391e-06, "loss": 1.2109, "step": 537 }, { "epoch": 0.14396574792614397, "grad_norm": 4.965005874633789, "learning_rate": 5.754010695187167e-06, "loss": 1.1582, "step": 538 }, { "epoch": 0.14423334225314424, "grad_norm": 5.032000541687012, "learning_rate": 5.764705882352941e-06, "loss": 1.0772, "step": 539 }, { "epoch": 0.1445009365801445, "grad_norm": 4.349190711975098, "learning_rate": 5.775401069518717e-06, "loss": 1.2469, "step": 540 }, { "epoch": 0.14476853090714478, "grad_norm": 4.369176387786865, "learning_rate": 5.786096256684493e-06, "loss": 1.131, "step": 541 }, { "epoch": 0.14503612523414502, "grad_norm": 4.241110324859619, "learning_rate": 5.796791443850268e-06, "loss": 0.9937, "step": 542 }, { "epoch": 0.1453037195611453, "grad_norm": 4.846850395202637, "learning_rate": 5.807486631016043e-06, "loss": 1.2059, "step": 543 }, { "epoch": 0.14557131388814556, "grad_norm": 5.102479457855225, "learning_rate": 5.8181818181818185e-06, "loss": 1.1612, "step": 544 }, { "epoch": 0.14583890821514583, "grad_norm": 4.706130027770996, "learning_rate": 5.828877005347594e-06, "loss": 1.1608, "step": 545 }, { "epoch": 0.1461065025421461, "grad_norm": 5.125561237335205, "learning_rate": 5.83957219251337e-06, "loss": 1.143, "step": 546 }, { "epoch": 0.14637409686914638, "grad_norm": 4.503932952880859, "learning_rate": 5.850267379679145e-06, "loss": 1.127, "step": 547 }, { "epoch": 0.14664169119614665, "grad_norm": 4.410585880279541, "learning_rate": 5.86096256684492e-06, "loss": 1.0207, "step": 548 }, { "epoch": 0.14690928552314692, "grad_norm": 4.253677845001221, "learning_rate": 5.871657754010695e-06, "loss": 1.0741, "step": 549 }, { "epoch": 0.14717687985014719, "grad_norm": 4.8487868309021, "learning_rate": 5.882352941176471e-06, "loss": 1.1049, "step": 550 }, { "epoch": 0.14744447417714746, "grad_norm": 5.069744110107422, "learning_rate": 5.893048128342247e-06, "loss": 1.2483, "step": 551 }, { "epoch": 0.14771206850414773, "grad_norm": 4.3907470703125, "learning_rate": 5.903743315508021e-06, "loss": 1.0139, "step": 552 }, { "epoch": 0.14797966283114797, "grad_norm": 5.0639142990112305, "learning_rate": 5.9144385026737975e-06, "loss": 1.2476, "step": 553 }, { "epoch": 0.14824725715814824, "grad_norm": 4.5384016036987305, "learning_rate": 5.925133689839573e-06, "loss": 1.1994, "step": 554 }, { "epoch": 0.1485148514851485, "grad_norm": 4.997219085693359, "learning_rate": 5.935828877005349e-06, "loss": 1.133, "step": 555 }, { "epoch": 0.14878244581214878, "grad_norm": 5.0610551834106445, "learning_rate": 5.946524064171123e-06, "loss": 1.3546, "step": 556 }, { "epoch": 0.14905004013914905, "grad_norm": 4.485021114349365, "learning_rate": 5.957219251336899e-06, "loss": 1.1425, "step": 557 }, { "epoch": 0.14931763446614932, "grad_norm": 4.8803229331970215, "learning_rate": 5.967914438502674e-06, "loss": 1.2082, "step": 558 }, { "epoch": 0.1495852287931496, "grad_norm": 4.79873514175415, "learning_rate": 5.97860962566845e-06, "loss": 1.225, "step": 559 }, { "epoch": 0.14985282312014986, "grad_norm": 4.734536170959473, "learning_rate": 5.989304812834225e-06, "loss": 1.1506, "step": 560 }, { "epoch": 0.15012041744715013, "grad_norm": 4.322850227355957, "learning_rate": 6e-06, "loss": 1.0387, "step": 561 }, { "epoch": 0.1503880117741504, "grad_norm": 4.721519947052002, "learning_rate": 6.010695187165776e-06, "loss": 1.0448, "step": 562 }, { "epoch": 0.15065560610115067, "grad_norm": 4.884403228759766, "learning_rate": 6.021390374331551e-06, "loss": 1.1416, "step": 563 }, { "epoch": 0.1509232004281509, "grad_norm": 5.24191427230835, "learning_rate": 6.032085561497326e-06, "loss": 1.2222, "step": 564 }, { "epoch": 0.15119079475515118, "grad_norm": 4.954929351806641, "learning_rate": 6.0427807486631015e-06, "loss": 1.2712, "step": 565 }, { "epoch": 0.15145838908215145, "grad_norm": 4.613723278045654, "learning_rate": 6.053475935828878e-06, "loss": 1.1032, "step": 566 }, { "epoch": 0.15172598340915172, "grad_norm": 4.400996685028076, "learning_rate": 6.064171122994653e-06, "loss": 1.091, "step": 567 }, { "epoch": 0.151993577736152, "grad_norm": 4.841631889343262, "learning_rate": 6.074866310160429e-06, "loss": 1.2592, "step": 568 }, { "epoch": 0.15226117206315226, "grad_norm": 5.009564399719238, "learning_rate": 6.085561497326204e-06, "loss": 1.1364, "step": 569 }, { "epoch": 0.15252876639015253, "grad_norm": 4.932076930999756, "learning_rate": 6.096256684491979e-06, "loss": 1.1759, "step": 570 }, { "epoch": 0.1527963607171528, "grad_norm": 5.142986297607422, "learning_rate": 6.106951871657755e-06, "loss": 1.2236, "step": 571 }, { "epoch": 0.15306395504415307, "grad_norm": 5.11539363861084, "learning_rate": 6.11764705882353e-06, "loss": 1.1623, "step": 572 }, { "epoch": 0.15333154937115334, "grad_norm": 4.659823417663574, "learning_rate": 6.128342245989305e-06, "loss": 1.2424, "step": 573 }, { "epoch": 0.1535991436981536, "grad_norm": 5.004172325134277, "learning_rate": 6.1390374331550805e-06, "loss": 1.2536, "step": 574 }, { "epoch": 0.15386673802515385, "grad_norm": 4.277651309967041, "learning_rate": 6.149732620320856e-06, "loss": 1.1239, "step": 575 }, { "epoch": 0.15413433235215412, "grad_norm": 4.292529582977295, "learning_rate": 6.1604278074866315e-06, "loss": 1.1876, "step": 576 }, { "epoch": 0.1544019266791544, "grad_norm": 4.799615859985352, "learning_rate": 6.171122994652406e-06, "loss": 1.1205, "step": 577 }, { "epoch": 0.15466952100615466, "grad_norm": 4.678570747375488, "learning_rate": 6.181818181818182e-06, "loss": 1.1356, "step": 578 }, { "epoch": 0.15493711533315493, "grad_norm": 4.6860246658325195, "learning_rate": 6.192513368983958e-06, "loss": 1.1228, "step": 579 }, { "epoch": 0.1552047096601552, "grad_norm": 4.289163112640381, "learning_rate": 6.203208556149734e-06, "loss": 1.0872, "step": 580 }, { "epoch": 0.15547230398715547, "grad_norm": 4.854632377624512, "learning_rate": 6.213903743315508e-06, "loss": 1.3285, "step": 581 }, { "epoch": 0.15573989831415574, "grad_norm": 5.2167253494262695, "learning_rate": 6.224598930481284e-06, "loss": 1.076, "step": 582 }, { "epoch": 0.15600749264115601, "grad_norm": 4.5353264808654785, "learning_rate": 6.2352941176470595e-06, "loss": 1.1824, "step": 583 }, { "epoch": 0.15627508696815628, "grad_norm": 4.770082950592041, "learning_rate": 6.245989304812835e-06, "loss": 1.0642, "step": 584 }, { "epoch": 0.15654268129515655, "grad_norm": 5.027703762054443, "learning_rate": 6.25668449197861e-06, "loss": 1.2616, "step": 585 }, { "epoch": 0.1568102756221568, "grad_norm": 4.812859058380127, "learning_rate": 6.267379679144385e-06, "loss": 1.2037, "step": 586 }, { "epoch": 0.15707786994915707, "grad_norm": 5.672885894775391, "learning_rate": 6.278074866310161e-06, "loss": 1.3296, "step": 587 }, { "epoch": 0.15734546427615734, "grad_norm": 4.318905830383301, "learning_rate": 6.288770053475936e-06, "loss": 1.0487, "step": 588 }, { "epoch": 0.1576130586031576, "grad_norm": 4.390570163726807, "learning_rate": 6.299465240641713e-06, "loss": 1.1174, "step": 589 }, { "epoch": 0.15788065293015788, "grad_norm": 5.302069664001465, "learning_rate": 6.3101604278074865e-06, "loss": 1.0765, "step": 590 }, { "epoch": 0.15814824725715815, "grad_norm": 5.114290237426758, "learning_rate": 6.320855614973262e-06, "loss": 1.2838, "step": 591 }, { "epoch": 0.15841584158415842, "grad_norm": 4.3737335205078125, "learning_rate": 6.3315508021390385e-06, "loss": 1.072, "step": 592 }, { "epoch": 0.1586834359111587, "grad_norm": 4.571005344390869, "learning_rate": 6.342245989304814e-06, "loss": 1.1507, "step": 593 }, { "epoch": 0.15895103023815896, "grad_norm": 4.546551704406738, "learning_rate": 6.352941176470589e-06, "loss": 1.1058, "step": 594 }, { "epoch": 0.15921862456515923, "grad_norm": 4.901880741119385, "learning_rate": 6.363636363636364e-06, "loss": 1.1524, "step": 595 }, { "epoch": 0.1594862188921595, "grad_norm": 4.456069469451904, "learning_rate": 6.37433155080214e-06, "loss": 1.1826, "step": 596 }, { "epoch": 0.15975381321915974, "grad_norm": 4.513467788696289, "learning_rate": 6.385026737967915e-06, "loss": 1.1069, "step": 597 }, { "epoch": 0.16002140754616, "grad_norm": 4.525417804718018, "learning_rate": 6.39572192513369e-06, "loss": 1.1583, "step": 598 }, { "epoch": 0.16028900187316028, "grad_norm": 4.3607177734375, "learning_rate": 6.4064171122994655e-06, "loss": 1.1446, "step": 599 }, { "epoch": 0.16055659620016055, "grad_norm": 4.644144058227539, "learning_rate": 6.417112299465241e-06, "loss": 1.1466, "step": 600 }, { "epoch": 0.16082419052716082, "grad_norm": 4.352504730224609, "learning_rate": 6.427807486631017e-06, "loss": 1.1164, "step": 601 }, { "epoch": 0.1610917848541611, "grad_norm": 5.058422088623047, "learning_rate": 6.438502673796791e-06, "loss": 1.1716, "step": 602 }, { "epoch": 0.16135937918116136, "grad_norm": 4.505871772766113, "learning_rate": 6.449197860962567e-06, "loss": 1.1306, "step": 603 }, { "epoch": 0.16162697350816163, "grad_norm": 4.627199649810791, "learning_rate": 6.459893048128343e-06, "loss": 1.2105, "step": 604 }, { "epoch": 0.1618945678351619, "grad_norm": 5.190435409545898, "learning_rate": 6.470588235294119e-06, "loss": 1.2797, "step": 605 }, { "epoch": 0.16216216216216217, "grad_norm": 4.629772186279297, "learning_rate": 6.4812834224598935e-06, "loss": 1.0904, "step": 606 }, { "epoch": 0.16242975648916244, "grad_norm": 4.735287189483643, "learning_rate": 6.491978609625669e-06, "loss": 1.1999, "step": 607 }, { "epoch": 0.16269735081616268, "grad_norm": 5.2313008308410645, "learning_rate": 6.5026737967914445e-06, "loss": 1.3026, "step": 608 }, { "epoch": 0.16296494514316295, "grad_norm": 4.605459213256836, "learning_rate": 6.51336898395722e-06, "loss": 1.1164, "step": 609 }, { "epoch": 0.16323253947016322, "grad_norm": 4.5824480056762695, "learning_rate": 6.524064171122996e-06, "loss": 1.0526, "step": 610 }, { "epoch": 0.1635001337971635, "grad_norm": 4.864238739013672, "learning_rate": 6.53475935828877e-06, "loss": 1.1137, "step": 611 }, { "epoch": 0.16376772812416376, "grad_norm": 4.430417537689209, "learning_rate": 6.545454545454546e-06, "loss": 1.1407, "step": 612 }, { "epoch": 0.16403532245116403, "grad_norm": 4.643566131591797, "learning_rate": 6.556149732620321e-06, "loss": 1.115, "step": 613 }, { "epoch": 0.1643029167781643, "grad_norm": 5.602782249450684, "learning_rate": 6.566844919786097e-06, "loss": 1.4065, "step": 614 }, { "epoch": 0.16457051110516457, "grad_norm": 4.812868118286133, "learning_rate": 6.577540106951872e-06, "loss": 1.1505, "step": 615 }, { "epoch": 0.16483810543216484, "grad_norm": 4.6687235832214355, "learning_rate": 6.588235294117647e-06, "loss": 1.1733, "step": 616 }, { "epoch": 0.16510569975916511, "grad_norm": 4.8625264167785645, "learning_rate": 6.5989304812834235e-06, "loss": 1.14, "step": 617 }, { "epoch": 0.16537329408616538, "grad_norm": 5.044530868530273, "learning_rate": 6.609625668449199e-06, "loss": 1.2254, "step": 618 }, { "epoch": 0.16564088841316565, "grad_norm": 4.458752632141113, "learning_rate": 6.620320855614974e-06, "loss": 1.1927, "step": 619 }, { "epoch": 0.1659084827401659, "grad_norm": 4.7606377601623535, "learning_rate": 6.631016042780749e-06, "loss": 1.1916, "step": 620 }, { "epoch": 0.16617607706716617, "grad_norm": 5.007805824279785, "learning_rate": 6.641711229946525e-06, "loss": 1.2655, "step": 621 }, { "epoch": 0.16644367139416644, "grad_norm": 4.409674167633057, "learning_rate": 6.6524064171123e-06, "loss": 1.0725, "step": 622 }, { "epoch": 0.1667112657211667, "grad_norm": 4.561901569366455, "learning_rate": 6.663101604278075e-06, "loss": 1.1336, "step": 623 }, { "epoch": 0.16697886004816698, "grad_norm": 5.645256996154785, "learning_rate": 6.673796791443851e-06, "loss": 1.2585, "step": 624 }, { "epoch": 0.16724645437516725, "grad_norm": 5.0422139167785645, "learning_rate": 6.684491978609626e-06, "loss": 1.2117, "step": 625 }, { "epoch": 0.16751404870216752, "grad_norm": 5.541776180267334, "learning_rate": 6.695187165775402e-06, "loss": 1.2715, "step": 626 }, { "epoch": 0.1677816430291678, "grad_norm": 4.81757116317749, "learning_rate": 6.705882352941176e-06, "loss": 1.1431, "step": 627 }, { "epoch": 0.16804923735616806, "grad_norm": 5.481652736663818, "learning_rate": 6.716577540106952e-06, "loss": 1.174, "step": 628 }, { "epoch": 0.16831683168316833, "grad_norm": 4.777329444885254, "learning_rate": 6.7272727272727275e-06, "loss": 1.1886, "step": 629 }, { "epoch": 0.1685844260101686, "grad_norm": 4.763789176940918, "learning_rate": 6.737967914438504e-06, "loss": 1.1154, "step": 630 }, { "epoch": 0.16885202033716884, "grad_norm": 4.949760437011719, "learning_rate": 6.748663101604279e-06, "loss": 1.1888, "step": 631 }, { "epoch": 0.1691196146641691, "grad_norm": 4.344736099243164, "learning_rate": 6.759358288770054e-06, "loss": 1.2278, "step": 632 }, { "epoch": 0.16938720899116938, "grad_norm": 4.495877265930176, "learning_rate": 6.77005347593583e-06, "loss": 1.1668, "step": 633 }, { "epoch": 0.16965480331816965, "grad_norm": 4.895537853240967, "learning_rate": 6.780748663101605e-06, "loss": 1.3816, "step": 634 }, { "epoch": 0.16992239764516992, "grad_norm": 4.664587497711182, "learning_rate": 6.791443850267381e-06, "loss": 1.2055, "step": 635 }, { "epoch": 0.1701899919721702, "grad_norm": 4.564089775085449, "learning_rate": 6.802139037433155e-06, "loss": 1.1425, "step": 636 }, { "epoch": 0.17045758629917046, "grad_norm": 4.690885066986084, "learning_rate": 6.812834224598931e-06, "loss": 1.0908, "step": 637 }, { "epoch": 0.17072518062617073, "grad_norm": 4.54403018951416, "learning_rate": 6.8235294117647065e-06, "loss": 1.0681, "step": 638 }, { "epoch": 0.170992774953171, "grad_norm": 4.301973342895508, "learning_rate": 6.834224598930482e-06, "loss": 1.1184, "step": 639 }, { "epoch": 0.17126036928017127, "grad_norm": 4.822204113006592, "learning_rate": 6.844919786096257e-06, "loss": 1.1718, "step": 640 }, { "epoch": 0.17152796360717154, "grad_norm": 4.2204413414001465, "learning_rate": 6.855614973262032e-06, "loss": 1.141, "step": 641 }, { "epoch": 0.17179555793417178, "grad_norm": 4.727780818939209, "learning_rate": 6.866310160427808e-06, "loss": 1.2378, "step": 642 }, { "epoch": 0.17206315226117205, "grad_norm": 4.156445503234863, "learning_rate": 6.877005347593584e-06, "loss": 1.1066, "step": 643 }, { "epoch": 0.17233074658817232, "grad_norm": 4.479008197784424, "learning_rate": 6.887700534759358e-06, "loss": 1.171, "step": 644 }, { "epoch": 0.1725983409151726, "grad_norm": 4.782415866851807, "learning_rate": 6.898395721925134e-06, "loss": 1.1557, "step": 645 }, { "epoch": 0.17286593524217286, "grad_norm": 4.781481742858887, "learning_rate": 6.90909090909091e-06, "loss": 1.3044, "step": 646 }, { "epoch": 0.17313352956917313, "grad_norm": 4.513900279998779, "learning_rate": 6.9197860962566855e-06, "loss": 1.189, "step": 647 }, { "epoch": 0.1734011238961734, "grad_norm": 5.123539924621582, "learning_rate": 6.93048128342246e-06, "loss": 1.2388, "step": 648 }, { "epoch": 0.17366871822317367, "grad_norm": 5.24996280670166, "learning_rate": 6.941176470588236e-06, "loss": 1.2528, "step": 649 }, { "epoch": 0.17393631255017394, "grad_norm": 4.407766819000244, "learning_rate": 6.951871657754011e-06, "loss": 1.0828, "step": 650 }, { "epoch": 0.1742039068771742, "grad_norm": 4.964326858520508, "learning_rate": 6.962566844919787e-06, "loss": 1.1248, "step": 651 }, { "epoch": 0.17447150120417448, "grad_norm": 4.530794620513916, "learning_rate": 6.9732620320855615e-06, "loss": 1.1584, "step": 652 }, { "epoch": 0.17473909553117473, "grad_norm": 4.297457218170166, "learning_rate": 6.983957219251337e-06, "loss": 1.1548, "step": 653 }, { "epoch": 0.175006689858175, "grad_norm": 4.825823783874512, "learning_rate": 6.9946524064171125e-06, "loss": 1.2084, "step": 654 }, { "epoch": 0.17527428418517527, "grad_norm": 4.5333709716796875, "learning_rate": 7.005347593582889e-06, "loss": 1.1385, "step": 655 }, { "epoch": 0.17554187851217554, "grad_norm": 4.513311386108398, "learning_rate": 7.0160427807486645e-06, "loss": 1.1604, "step": 656 }, { "epoch": 0.1758094728391758, "grad_norm": 4.645889759063721, "learning_rate": 7.026737967914438e-06, "loss": 1.3132, "step": 657 }, { "epoch": 0.17607706716617608, "grad_norm": 4.844141006469727, "learning_rate": 7.037433155080215e-06, "loss": 1.1617, "step": 658 }, { "epoch": 0.17634466149317635, "grad_norm": 4.618659973144531, "learning_rate": 7.04812834224599e-06, "loss": 1.1599, "step": 659 }, { "epoch": 0.17661225582017662, "grad_norm": 4.780247688293457, "learning_rate": 7.058823529411766e-06, "loss": 1.2249, "step": 660 }, { "epoch": 0.1768798501471769, "grad_norm": 4.695610046386719, "learning_rate": 7.0695187165775405e-06, "loss": 1.2523, "step": 661 }, { "epoch": 0.17714744447417716, "grad_norm": 4.643034934997559, "learning_rate": 7.080213903743316e-06, "loss": 1.2802, "step": 662 }, { "epoch": 0.17741503880117743, "grad_norm": 4.363466739654541, "learning_rate": 7.0909090909090916e-06, "loss": 1.0768, "step": 663 }, { "epoch": 0.17768263312817767, "grad_norm": 4.794258117675781, "learning_rate": 7.101604278074867e-06, "loss": 1.2522, "step": 664 }, { "epoch": 0.17795022745517794, "grad_norm": 4.560819149017334, "learning_rate": 7.112299465240642e-06, "loss": 1.1901, "step": 665 }, { "epoch": 0.1782178217821782, "grad_norm": 4.56439733505249, "learning_rate": 7.122994652406417e-06, "loss": 1.1813, "step": 666 }, { "epoch": 0.17848541610917848, "grad_norm": 4.605260848999023, "learning_rate": 7.133689839572193e-06, "loss": 1.1981, "step": 667 }, { "epoch": 0.17875301043617875, "grad_norm": 4.7326483726501465, "learning_rate": 7.144385026737969e-06, "loss": 1.1832, "step": 668 }, { "epoch": 0.17902060476317902, "grad_norm": 4.547402858734131, "learning_rate": 7.155080213903743e-06, "loss": 1.0722, "step": 669 }, { "epoch": 0.1792881990901793, "grad_norm": 4.594086170196533, "learning_rate": 7.1657754010695195e-06, "loss": 1.1557, "step": 670 }, { "epoch": 0.17955579341717956, "grad_norm": 4.440776824951172, "learning_rate": 7.176470588235295e-06, "loss": 1.1161, "step": 671 }, { "epoch": 0.17982338774417983, "grad_norm": 5.013535976409912, "learning_rate": 7.1871657754010706e-06, "loss": 1.1546, "step": 672 }, { "epoch": 0.1800909820711801, "grad_norm": 5.5731000900268555, "learning_rate": 7.197860962566845e-06, "loss": 1.315, "step": 673 }, { "epoch": 0.18035857639818037, "grad_norm": 4.811005592346191, "learning_rate": 7.208556149732621e-06, "loss": 1.0571, "step": 674 }, { "epoch": 0.1806261707251806, "grad_norm": 4.496854782104492, "learning_rate": 7.219251336898396e-06, "loss": 1.0867, "step": 675 }, { "epoch": 0.18089376505218088, "grad_norm": 4.781049728393555, "learning_rate": 7.229946524064172e-06, "loss": 1.0135, "step": 676 }, { "epoch": 0.18116135937918115, "grad_norm": 4.150574684143066, "learning_rate": 7.240641711229947e-06, "loss": 1.1745, "step": 677 }, { "epoch": 0.18142895370618142, "grad_norm": 4.843429088592529, "learning_rate": 7.251336898395722e-06, "loss": 1.2394, "step": 678 }, { "epoch": 0.1816965480331817, "grad_norm": 4.525768280029297, "learning_rate": 7.262032085561498e-06, "loss": 1.2715, "step": 679 }, { "epoch": 0.18196414236018196, "grad_norm": 4.916580677032471, "learning_rate": 7.272727272727273e-06, "loss": 1.2347, "step": 680 }, { "epoch": 0.18223173668718223, "grad_norm": 4.803800106048584, "learning_rate": 7.2834224598930496e-06, "loss": 1.1586, "step": 681 }, { "epoch": 0.1824993310141825, "grad_norm": 4.679764747619629, "learning_rate": 7.294117647058823e-06, "loss": 1.268, "step": 682 }, { "epoch": 0.18276692534118277, "grad_norm": 4.965787410736084, "learning_rate": 7.3048128342246e-06, "loss": 1.1855, "step": 683 }, { "epoch": 0.18303451966818304, "grad_norm": 4.892383575439453, "learning_rate": 7.315508021390375e-06, "loss": 1.1683, "step": 684 }, { "epoch": 0.1833021139951833, "grad_norm": 4.476233005523682, "learning_rate": 7.326203208556151e-06, "loss": 1.1109, "step": 685 }, { "epoch": 0.18356970832218358, "grad_norm": 4.431989669799805, "learning_rate": 7.3368983957219256e-06, "loss": 1.1575, "step": 686 }, { "epoch": 0.18383730264918383, "grad_norm": 4.837761878967285, "learning_rate": 7.347593582887701e-06, "loss": 1.2535, "step": 687 }, { "epoch": 0.1841048969761841, "grad_norm": 4.285210132598877, "learning_rate": 7.358288770053477e-06, "loss": 1.0863, "step": 688 }, { "epoch": 0.18437249130318437, "grad_norm": 4.910134315490723, "learning_rate": 7.368983957219252e-06, "loss": 1.1904, "step": 689 }, { "epoch": 0.18464008563018464, "grad_norm": 4.774014472961426, "learning_rate": 7.379679144385027e-06, "loss": 1.1826, "step": 690 }, { "epoch": 0.1849076799571849, "grad_norm": 5.281838893890381, "learning_rate": 7.390374331550802e-06, "loss": 1.3556, "step": 691 }, { "epoch": 0.18517527428418518, "grad_norm": 4.740875244140625, "learning_rate": 7.401069518716578e-06, "loss": 1.3182, "step": 692 }, { "epoch": 0.18544286861118545, "grad_norm": 4.560650825500488, "learning_rate": 7.4117647058823535e-06, "loss": 1.2666, "step": 693 }, { "epoch": 0.18571046293818572, "grad_norm": 4.770612716674805, "learning_rate": 7.422459893048128e-06, "loss": 1.0551, "step": 694 }, { "epoch": 0.185978057265186, "grad_norm": 4.479051113128662, "learning_rate": 7.433155080213904e-06, "loss": 1.1718, "step": 695 }, { "epoch": 0.18624565159218626, "grad_norm": 4.537865161895752, "learning_rate": 7.44385026737968e-06, "loss": 1.0874, "step": 696 }, { "epoch": 0.18651324591918653, "grad_norm": 4.282291412353516, "learning_rate": 7.454545454545456e-06, "loss": 1.2062, "step": 697 }, { "epoch": 0.18678084024618677, "grad_norm": 4.386539459228516, "learning_rate": 7.465240641711231e-06, "loss": 1.313, "step": 698 }, { "epoch": 0.18704843457318704, "grad_norm": 4.664721488952637, "learning_rate": 7.475935828877006e-06, "loss": 1.2399, "step": 699 }, { "epoch": 0.1873160289001873, "grad_norm": 5.261703014373779, "learning_rate": 7.486631016042781e-06, "loss": 1.3199, "step": 700 }, { "epoch": 0.18758362322718758, "grad_norm": 4.195591449737549, "learning_rate": 7.497326203208557e-06, "loss": 1.035, "step": 701 }, { "epoch": 0.18785121755418785, "grad_norm": 4.815860271453857, "learning_rate": 7.5080213903743325e-06, "loss": 1.2643, "step": 702 }, { "epoch": 0.18811881188118812, "grad_norm": 5.00251579284668, "learning_rate": 7.518716577540107e-06, "loss": 1.1611, "step": 703 }, { "epoch": 0.1883864062081884, "grad_norm": 4.371436595916748, "learning_rate": 7.529411764705883e-06, "loss": 1.1734, "step": 704 }, { "epoch": 0.18865400053518866, "grad_norm": 4.646690368652344, "learning_rate": 7.540106951871658e-06, "loss": 1.1298, "step": 705 }, { "epoch": 0.18892159486218893, "grad_norm": 4.49533748626709, "learning_rate": 7.550802139037434e-06, "loss": 1.0886, "step": 706 }, { "epoch": 0.1891891891891892, "grad_norm": 4.740173816680908, "learning_rate": 7.5614973262032085e-06, "loss": 1.1291, "step": 707 }, { "epoch": 0.18945678351618947, "grad_norm": 4.919492721557617, "learning_rate": 7.572192513368984e-06, "loss": 1.1714, "step": 708 }, { "epoch": 0.1897243778431897, "grad_norm": 4.677563190460205, "learning_rate": 7.58288770053476e-06, "loss": 1.1417, "step": 709 }, { "epoch": 0.18999197217018998, "grad_norm": 4.335318088531494, "learning_rate": 7.593582887700536e-06, "loss": 1.0924, "step": 710 }, { "epoch": 0.19025956649719025, "grad_norm": 4.638528347015381, "learning_rate": 7.604278074866311e-06, "loss": 1.2186, "step": 711 }, { "epoch": 0.19052716082419052, "grad_norm": 4.537407398223877, "learning_rate": 7.614973262032086e-06, "loss": 1.1797, "step": 712 }, { "epoch": 0.1907947551511908, "grad_norm": 4.735195159912109, "learning_rate": 7.625668449197862e-06, "loss": 1.2728, "step": 713 }, { "epoch": 0.19106234947819106, "grad_norm": 4.434914588928223, "learning_rate": 7.636363636363638e-06, "loss": 1.1357, "step": 714 }, { "epoch": 0.19132994380519133, "grad_norm": 4.431911945343018, "learning_rate": 7.647058823529411e-06, "loss": 1.2785, "step": 715 }, { "epoch": 0.1915975381321916, "grad_norm": 4.211305618286133, "learning_rate": 7.657754010695187e-06, "loss": 1.1322, "step": 716 }, { "epoch": 0.19186513245919187, "grad_norm": 4.698652267456055, "learning_rate": 7.668449197860964e-06, "loss": 1.2302, "step": 717 }, { "epoch": 0.19213272678619214, "grad_norm": 4.491962909698486, "learning_rate": 7.679144385026739e-06, "loss": 1.1325, "step": 718 }, { "epoch": 0.1924003211131924, "grad_norm": 4.714018821716309, "learning_rate": 7.689839572192515e-06, "loss": 1.152, "step": 719 }, { "epoch": 0.19266791544019266, "grad_norm": 4.598504066467285, "learning_rate": 7.70053475935829e-06, "loss": 1.0786, "step": 720 }, { "epoch": 0.19293550976719293, "grad_norm": 4.4915008544921875, "learning_rate": 7.711229946524064e-06, "loss": 1.1606, "step": 721 }, { "epoch": 0.1932031040941932, "grad_norm": 4.305722236633301, "learning_rate": 7.72192513368984e-06, "loss": 1.1375, "step": 722 }, { "epoch": 0.19347069842119347, "grad_norm": 4.845047473907471, "learning_rate": 7.732620320855615e-06, "loss": 1.0849, "step": 723 }, { "epoch": 0.19373829274819374, "grad_norm": 4.809256553649902, "learning_rate": 7.74331550802139e-06, "loss": 1.0855, "step": 724 }, { "epoch": 0.194005887075194, "grad_norm": 5.05698823928833, "learning_rate": 7.754010695187166e-06, "loss": 1.215, "step": 725 }, { "epoch": 0.19427348140219428, "grad_norm": 4.64973258972168, "learning_rate": 7.764705882352941e-06, "loss": 1.1856, "step": 726 }, { "epoch": 0.19454107572919455, "grad_norm": 4.284728050231934, "learning_rate": 7.775401069518718e-06, "loss": 1.013, "step": 727 }, { "epoch": 0.19480867005619482, "grad_norm": 4.597956657409668, "learning_rate": 7.786096256684492e-06, "loss": 1.1597, "step": 728 }, { "epoch": 0.19507626438319509, "grad_norm": 4.795129299163818, "learning_rate": 7.796791443850269e-06, "loss": 1.26, "step": 729 }, { "epoch": 0.19534385871019536, "grad_norm": 4.353721618652344, "learning_rate": 7.807486631016043e-06, "loss": 1.1202, "step": 730 }, { "epoch": 0.1956114530371956, "grad_norm": 4.6432108879089355, "learning_rate": 7.81818181818182e-06, "loss": 1.1496, "step": 731 }, { "epoch": 0.19587904736419587, "grad_norm": 4.320937156677246, "learning_rate": 7.828877005347594e-06, "loss": 1.0743, "step": 732 }, { "epoch": 0.19614664169119614, "grad_norm": 4.268731594085693, "learning_rate": 7.839572192513369e-06, "loss": 1.0991, "step": 733 }, { "epoch": 0.1964142360181964, "grad_norm": 4.839014530181885, "learning_rate": 7.850267379679145e-06, "loss": 1.142, "step": 734 }, { "epoch": 0.19668183034519668, "grad_norm": 4.309354305267334, "learning_rate": 7.86096256684492e-06, "loss": 1.055, "step": 735 }, { "epoch": 0.19694942467219695, "grad_norm": 4.399764060974121, "learning_rate": 7.871657754010695e-06, "loss": 1.2, "step": 736 }, { "epoch": 0.19721701899919722, "grad_norm": 4.814887523651123, "learning_rate": 7.882352941176471e-06, "loss": 1.1129, "step": 737 }, { "epoch": 0.1974846133261975, "grad_norm": 4.662134647369385, "learning_rate": 7.893048128342246e-06, "loss": 1.3224, "step": 738 }, { "epoch": 0.19775220765319776, "grad_norm": 4.743928909301758, "learning_rate": 7.903743315508022e-06, "loss": 1.2364, "step": 739 }, { "epoch": 0.19801980198019803, "grad_norm": 4.6992716789245605, "learning_rate": 7.914438502673799e-06, "loss": 1.0913, "step": 740 }, { "epoch": 0.1982873963071983, "grad_norm": 4.529000759124756, "learning_rate": 7.925133689839572e-06, "loss": 1.1083, "step": 741 }, { "epoch": 0.19855499063419854, "grad_norm": 4.22991418838501, "learning_rate": 7.935828877005348e-06, "loss": 1.1632, "step": 742 }, { "epoch": 0.1988225849611988, "grad_norm": 4.685365676879883, "learning_rate": 7.946524064171124e-06, "loss": 1.1653, "step": 743 }, { "epoch": 0.19909017928819908, "grad_norm": 5.151124954223633, "learning_rate": 7.9572192513369e-06, "loss": 1.1468, "step": 744 }, { "epoch": 0.19935777361519935, "grad_norm": 4.344570636749268, "learning_rate": 7.967914438502674e-06, "loss": 1.084, "step": 745 }, { "epoch": 0.19962536794219962, "grad_norm": 4.775820255279541, "learning_rate": 7.97860962566845e-06, "loss": 1.0849, "step": 746 }, { "epoch": 0.1998929622691999, "grad_norm": 4.6123433113098145, "learning_rate": 7.989304812834225e-06, "loss": 1.1837, "step": 747 }, { "epoch": 0.20016055659620016, "grad_norm": 4.325228691101074, "learning_rate": 8.000000000000001e-06, "loss": 1.1404, "step": 748 }, { "epoch": 0.20042815092320043, "grad_norm": 4.531330108642578, "learning_rate": 8.010695187165776e-06, "loss": 1.4233, "step": 749 }, { "epoch": 0.2006957452502007, "grad_norm": 4.567444801330566, "learning_rate": 8.02139037433155e-06, "loss": 1.1898, "step": 750 }, { "epoch": 0.20096333957720097, "grad_norm": 4.629062175750732, "learning_rate": 8.032085561497327e-06, "loss": 1.2206, "step": 751 }, { "epoch": 0.20123093390420124, "grad_norm": 4.17169713973999, "learning_rate": 8.042780748663103e-06, "loss": 1.0784, "step": 752 }, { "epoch": 0.20149852823120148, "grad_norm": 4.538808345794678, "learning_rate": 8.053475935828876e-06, "loss": 1.2097, "step": 753 }, { "epoch": 0.20176612255820175, "grad_norm": 4.794569492340088, "learning_rate": 8.064171122994653e-06, "loss": 1.2594, "step": 754 }, { "epoch": 0.20203371688520202, "grad_norm": 4.9203972816467285, "learning_rate": 8.07486631016043e-06, "loss": 1.4261, "step": 755 }, { "epoch": 0.2023013112122023, "grad_norm": 4.924014091491699, "learning_rate": 8.085561497326204e-06, "loss": 1.1343, "step": 756 }, { "epoch": 0.20256890553920257, "grad_norm": 4.558595657348633, "learning_rate": 8.096256684491979e-06, "loss": 1.1778, "step": 757 }, { "epoch": 0.20283649986620284, "grad_norm": 4.965837478637695, "learning_rate": 8.106951871657755e-06, "loss": 1.406, "step": 758 }, { "epoch": 0.2031040941932031, "grad_norm": 4.6557207107543945, "learning_rate": 8.11764705882353e-06, "loss": 1.2242, "step": 759 }, { "epoch": 0.20337168852020338, "grad_norm": 4.143162250518799, "learning_rate": 8.128342245989306e-06, "loss": 1.0762, "step": 760 }, { "epoch": 0.20363928284720365, "grad_norm": 4.899580001831055, "learning_rate": 8.13903743315508e-06, "loss": 1.2824, "step": 761 }, { "epoch": 0.20390687717420392, "grad_norm": 4.938472270965576, "learning_rate": 8.149732620320855e-06, "loss": 1.3493, "step": 762 }, { "epoch": 0.20417447150120419, "grad_norm": 4.2447943687438965, "learning_rate": 8.160427807486632e-06, "loss": 1.1482, "step": 763 }, { "epoch": 0.20444206582820446, "grad_norm": 4.182919025421143, "learning_rate": 8.171122994652407e-06, "loss": 1.0402, "step": 764 }, { "epoch": 0.2047096601552047, "grad_norm": 4.690080165863037, "learning_rate": 8.181818181818183e-06, "loss": 1.3051, "step": 765 }, { "epoch": 0.20497725448220497, "grad_norm": 4.1133832931518555, "learning_rate": 8.192513368983958e-06, "loss": 1.0852, "step": 766 }, { "epoch": 0.20524484880920524, "grad_norm": 4.17720365524292, "learning_rate": 8.203208556149734e-06, "loss": 1.0509, "step": 767 }, { "epoch": 0.2055124431362055, "grad_norm": 4.893587589263916, "learning_rate": 8.213903743315509e-06, "loss": 1.2572, "step": 768 }, { "epoch": 0.20578003746320578, "grad_norm": 4.39441442489624, "learning_rate": 8.224598930481285e-06, "loss": 1.0804, "step": 769 }, { "epoch": 0.20604763179020605, "grad_norm": 4.479729652404785, "learning_rate": 8.23529411764706e-06, "loss": 1.129, "step": 770 }, { "epoch": 0.20631522611720632, "grad_norm": 4.792821407318115, "learning_rate": 8.245989304812834e-06, "loss": 1.3325, "step": 771 }, { "epoch": 0.2065828204442066, "grad_norm": 4.284221649169922, "learning_rate": 8.256684491978611e-06, "loss": 1.2608, "step": 772 }, { "epoch": 0.20685041477120686, "grad_norm": 4.049210071563721, "learning_rate": 8.267379679144386e-06, "loss": 1.1589, "step": 773 }, { "epoch": 0.20711800909820713, "grad_norm": 4.67439079284668, "learning_rate": 8.27807486631016e-06, "loss": 1.2496, "step": 774 }, { "epoch": 0.2073856034252074, "grad_norm": 4.5758843421936035, "learning_rate": 8.288770053475937e-06, "loss": 1.1285, "step": 775 }, { "epoch": 0.20765319775220764, "grad_norm": 4.632938861846924, "learning_rate": 8.299465240641711e-06, "loss": 1.3281, "step": 776 }, { "epoch": 0.2079207920792079, "grad_norm": 4.83327054977417, "learning_rate": 8.310160427807488e-06, "loss": 1.2126, "step": 777 }, { "epoch": 0.20818838640620818, "grad_norm": 4.5972137451171875, "learning_rate": 8.320855614973262e-06, "loss": 1.1829, "step": 778 }, { "epoch": 0.20845598073320845, "grad_norm": 4.194045543670654, "learning_rate": 8.331550802139037e-06, "loss": 1.1207, "step": 779 }, { "epoch": 0.20872357506020872, "grad_norm": 4.589977264404297, "learning_rate": 8.342245989304813e-06, "loss": 1.0869, "step": 780 }, { "epoch": 0.208991169387209, "grad_norm": 4.733802795410156, "learning_rate": 8.35294117647059e-06, "loss": 1.2378, "step": 781 }, { "epoch": 0.20925876371420926, "grad_norm": 4.47822380065918, "learning_rate": 8.363636363636365e-06, "loss": 1.1433, "step": 782 }, { "epoch": 0.20952635804120953, "grad_norm": 3.875276803970337, "learning_rate": 8.37433155080214e-06, "loss": 1.0709, "step": 783 }, { "epoch": 0.2097939523682098, "grad_norm": 4.252140522003174, "learning_rate": 8.385026737967916e-06, "loss": 1.1106, "step": 784 }, { "epoch": 0.21006154669521007, "grad_norm": 4.29549503326416, "learning_rate": 8.39572192513369e-06, "loss": 1.1556, "step": 785 }, { "epoch": 0.21032914102221034, "grad_norm": 4.358144760131836, "learning_rate": 8.406417112299467e-06, "loss": 1.053, "step": 786 }, { "epoch": 0.21059673534921058, "grad_norm": 4.602996826171875, "learning_rate": 8.417112299465241e-06, "loss": 1.2131, "step": 787 }, { "epoch": 0.21086432967621085, "grad_norm": 4.466192722320557, "learning_rate": 8.427807486631016e-06, "loss": 1.3099, "step": 788 }, { "epoch": 0.21113192400321112, "grad_norm": 4.629776954650879, "learning_rate": 8.438502673796792e-06, "loss": 1.176, "step": 789 }, { "epoch": 0.2113995183302114, "grad_norm": 4.807766437530518, "learning_rate": 8.449197860962567e-06, "loss": 1.2082, "step": 790 }, { "epoch": 0.21166711265721166, "grad_norm": 4.741950035095215, "learning_rate": 8.459893048128342e-06, "loss": 1.228, "step": 791 }, { "epoch": 0.21193470698421193, "grad_norm": 4.33003044128418, "learning_rate": 8.470588235294118e-06, "loss": 1.1136, "step": 792 }, { "epoch": 0.2122023013112122, "grad_norm": 4.555398941040039, "learning_rate": 8.481283422459895e-06, "loss": 1.2896, "step": 793 }, { "epoch": 0.21246989563821247, "grad_norm": 4.31208610534668, "learning_rate": 8.49197860962567e-06, "loss": 1.1487, "step": 794 }, { "epoch": 0.21273748996521274, "grad_norm": 4.462785243988037, "learning_rate": 8.502673796791444e-06, "loss": 1.1853, "step": 795 }, { "epoch": 0.21300508429221301, "grad_norm": 4.457045078277588, "learning_rate": 8.51336898395722e-06, "loss": 1.1769, "step": 796 }, { "epoch": 0.21327267861921329, "grad_norm": 4.705628871917725, "learning_rate": 8.524064171122995e-06, "loss": 1.1599, "step": 797 }, { "epoch": 0.21354027294621353, "grad_norm": 4.765135288238525, "learning_rate": 8.534759358288771e-06, "loss": 1.3051, "step": 798 }, { "epoch": 0.2138078672732138, "grad_norm": 4.394601345062256, "learning_rate": 8.545454545454546e-06, "loss": 1.1349, "step": 799 }, { "epoch": 0.21407546160021407, "grad_norm": 4.035240173339844, "learning_rate": 8.556149732620321e-06, "loss": 1.1051, "step": 800 }, { "epoch": 0.21434305592721434, "grad_norm": 4.072005271911621, "learning_rate": 8.566844919786097e-06, "loss": 1.1045, "step": 801 }, { "epoch": 0.2146106502542146, "grad_norm": 4.543212413787842, "learning_rate": 8.577540106951872e-06, "loss": 1.144, "step": 802 }, { "epoch": 0.21487824458121488, "grad_norm": 4.204556941986084, "learning_rate": 8.588235294117647e-06, "loss": 1.2146, "step": 803 }, { "epoch": 0.21514583890821515, "grad_norm": 3.9721314907073975, "learning_rate": 8.598930481283423e-06, "loss": 1.1586, "step": 804 }, { "epoch": 0.21541343323521542, "grad_norm": 3.9580788612365723, "learning_rate": 8.609625668449198e-06, "loss": 1.0575, "step": 805 }, { "epoch": 0.2156810275622157, "grad_norm": 4.39721155166626, "learning_rate": 8.620320855614974e-06, "loss": 1.2462, "step": 806 }, { "epoch": 0.21594862188921596, "grad_norm": 4.285038471221924, "learning_rate": 8.63101604278075e-06, "loss": 1.1557, "step": 807 }, { "epoch": 0.21621621621621623, "grad_norm": 4.489853382110596, "learning_rate": 8.641711229946525e-06, "loss": 1.3112, "step": 808 }, { "epoch": 0.21648381054321647, "grad_norm": 4.501437664031982, "learning_rate": 8.6524064171123e-06, "loss": 1.1479, "step": 809 }, { "epoch": 0.21675140487021674, "grad_norm": 4.656176567077637, "learning_rate": 8.663101604278076e-06, "loss": 1.2141, "step": 810 }, { "epoch": 0.217018999197217, "grad_norm": 4.57153844833374, "learning_rate": 8.673796791443851e-06, "loss": 1.1898, "step": 811 }, { "epoch": 0.21728659352421728, "grad_norm": 4.217146873474121, "learning_rate": 8.684491978609626e-06, "loss": 1.1277, "step": 812 }, { "epoch": 0.21755418785121755, "grad_norm": 5.0501227378845215, "learning_rate": 8.695187165775402e-06, "loss": 1.2793, "step": 813 }, { "epoch": 0.21782178217821782, "grad_norm": 4.156916618347168, "learning_rate": 8.705882352941177e-06, "loss": 1.0608, "step": 814 }, { "epoch": 0.2180893765052181, "grad_norm": 4.722466468811035, "learning_rate": 8.716577540106953e-06, "loss": 1.2908, "step": 815 }, { "epoch": 0.21835697083221836, "grad_norm": 4.382132053375244, "learning_rate": 8.727272727272728e-06, "loss": 1.1733, "step": 816 }, { "epoch": 0.21862456515921863, "grad_norm": 4.8200225830078125, "learning_rate": 8.737967914438502e-06, "loss": 1.3151, "step": 817 }, { "epoch": 0.2188921594862189, "grad_norm": 4.401098728179932, "learning_rate": 8.748663101604279e-06, "loss": 1.1091, "step": 818 }, { "epoch": 0.21915975381321917, "grad_norm": 4.914200305938721, "learning_rate": 8.759358288770055e-06, "loss": 1.3693, "step": 819 }, { "epoch": 0.2194273481402194, "grad_norm": 4.085461616516113, "learning_rate": 8.77005347593583e-06, "loss": 1.1384, "step": 820 }, { "epoch": 0.21969494246721968, "grad_norm": 3.852440595626831, "learning_rate": 8.780748663101605e-06, "loss": 1.1228, "step": 821 }, { "epoch": 0.21996253679421995, "grad_norm": 4.607455253601074, "learning_rate": 8.791443850267381e-06, "loss": 1.2419, "step": 822 }, { "epoch": 0.22023013112122022, "grad_norm": 4.384522438049316, "learning_rate": 8.802139037433156e-06, "loss": 1.3108, "step": 823 }, { "epoch": 0.2204977254482205, "grad_norm": 4.342321872711182, "learning_rate": 8.81283422459893e-06, "loss": 1.1794, "step": 824 }, { "epoch": 0.22076531977522076, "grad_norm": 4.432126045227051, "learning_rate": 8.823529411764707e-06, "loss": 1.1947, "step": 825 }, { "epoch": 0.22103291410222103, "grad_norm": 4.4877777099609375, "learning_rate": 8.834224598930481e-06, "loss": 1.2243, "step": 826 }, { "epoch": 0.2213005084292213, "grad_norm": 4.3614325523376465, "learning_rate": 8.844919786096258e-06, "loss": 1.2309, "step": 827 }, { "epoch": 0.22156810275622157, "grad_norm": 4.3788580894470215, "learning_rate": 8.855614973262033e-06, "loss": 1.1391, "step": 828 }, { "epoch": 0.22183569708322184, "grad_norm": 4.09984016418457, "learning_rate": 8.866310160427807e-06, "loss": 1.1843, "step": 829 }, { "epoch": 0.22210329141022211, "grad_norm": 4.093768119812012, "learning_rate": 8.877005347593584e-06, "loss": 1.1382, "step": 830 }, { "epoch": 0.22237088573722238, "grad_norm": 4.717266082763672, "learning_rate": 8.88770053475936e-06, "loss": 1.3751, "step": 831 }, { "epoch": 0.22263848006422263, "grad_norm": 4.382028579711914, "learning_rate": 8.898395721925135e-06, "loss": 1.2114, "step": 832 }, { "epoch": 0.2229060743912229, "grad_norm": 4.509121894836426, "learning_rate": 8.90909090909091e-06, "loss": 1.2096, "step": 833 }, { "epoch": 0.22317366871822317, "grad_norm": 4.2888078689575195, "learning_rate": 8.919786096256686e-06, "loss": 1.2023, "step": 834 }, { "epoch": 0.22344126304522344, "grad_norm": 3.797525405883789, "learning_rate": 8.93048128342246e-06, "loss": 1.1453, "step": 835 }, { "epoch": 0.2237088573722237, "grad_norm": 3.918774127960205, "learning_rate": 8.941176470588237e-06, "loss": 1.0776, "step": 836 }, { "epoch": 0.22397645169922398, "grad_norm": 4.301737308502197, "learning_rate": 8.951871657754012e-06, "loss": 1.1801, "step": 837 }, { "epoch": 0.22424404602622425, "grad_norm": 4.121411323547363, "learning_rate": 8.962566844919786e-06, "loss": 1.0812, "step": 838 }, { "epoch": 0.22451164035322452, "grad_norm": 4.318382740020752, "learning_rate": 8.973262032085563e-06, "loss": 1.1899, "step": 839 }, { "epoch": 0.2247792346802248, "grad_norm": 4.362233638763428, "learning_rate": 8.983957219251337e-06, "loss": 1.137, "step": 840 }, { "epoch": 0.22504682900722506, "grad_norm": 4.285608291625977, "learning_rate": 8.994652406417112e-06, "loss": 1.1922, "step": 841 }, { "epoch": 0.22531442333422533, "grad_norm": 4.41885232925415, "learning_rate": 9.005347593582888e-06, "loss": 1.2026, "step": 842 }, { "epoch": 0.22558201766122557, "grad_norm": 4.712429046630859, "learning_rate": 9.016042780748663e-06, "loss": 1.2112, "step": 843 }, { "epoch": 0.22584961198822584, "grad_norm": 3.9474940299987793, "learning_rate": 9.02673796791444e-06, "loss": 1.0856, "step": 844 }, { "epoch": 0.2261172063152261, "grad_norm": 4.865321159362793, "learning_rate": 9.037433155080214e-06, "loss": 1.2806, "step": 845 }, { "epoch": 0.22638480064222638, "grad_norm": 4.013378620147705, "learning_rate": 9.04812834224599e-06, "loss": 1.112, "step": 846 }, { "epoch": 0.22665239496922665, "grad_norm": 4.2192702293396, "learning_rate": 9.058823529411765e-06, "loss": 1.2246, "step": 847 }, { "epoch": 0.22691998929622692, "grad_norm": 4.709174633026123, "learning_rate": 9.069518716577542e-06, "loss": 1.2746, "step": 848 }, { "epoch": 0.2271875836232272, "grad_norm": 4.175418376922607, "learning_rate": 9.080213903743316e-06, "loss": 1.1651, "step": 849 }, { "epoch": 0.22745517795022746, "grad_norm": 4.398164749145508, "learning_rate": 9.090909090909091e-06, "loss": 1.2002, "step": 850 }, { "epoch": 0.22772277227722773, "grad_norm": 4.27931022644043, "learning_rate": 9.101604278074867e-06, "loss": 1.1041, "step": 851 }, { "epoch": 0.227990366604228, "grad_norm": 4.752706050872803, "learning_rate": 9.112299465240642e-06, "loss": 1.3382, "step": 852 }, { "epoch": 0.22825796093122827, "grad_norm": 4.658750057220459, "learning_rate": 9.122994652406418e-06, "loss": 1.2518, "step": 853 }, { "epoch": 0.2285255552582285, "grad_norm": 4.37801456451416, "learning_rate": 9.133689839572193e-06, "loss": 1.2284, "step": 854 }, { "epoch": 0.22879314958522878, "grad_norm": 4.360160827636719, "learning_rate": 9.144385026737968e-06, "loss": 1.107, "step": 855 }, { "epoch": 0.22906074391222905, "grad_norm": 4.552803993225098, "learning_rate": 9.155080213903744e-06, "loss": 1.2773, "step": 856 }, { "epoch": 0.22932833823922932, "grad_norm": 4.319884300231934, "learning_rate": 9.16577540106952e-06, "loss": 1.3304, "step": 857 }, { "epoch": 0.2295959325662296, "grad_norm": 4.7817840576171875, "learning_rate": 9.176470588235294e-06, "loss": 1.3394, "step": 858 }, { "epoch": 0.22986352689322986, "grad_norm": 3.597621202468872, "learning_rate": 9.18716577540107e-06, "loss": 1.0479, "step": 859 }, { "epoch": 0.23013112122023013, "grad_norm": 4.924500465393066, "learning_rate": 9.197860962566846e-06, "loss": 1.2405, "step": 860 }, { "epoch": 0.2303987155472304, "grad_norm": 4.659447193145752, "learning_rate": 9.208556149732621e-06, "loss": 1.2231, "step": 861 }, { "epoch": 0.23066630987423067, "grad_norm": 4.4317145347595215, "learning_rate": 9.219251336898396e-06, "loss": 1.2094, "step": 862 }, { "epoch": 0.23093390420123094, "grad_norm": 3.976191520690918, "learning_rate": 9.229946524064172e-06, "loss": 1.0144, "step": 863 }, { "epoch": 0.23120149852823121, "grad_norm": 4.48732852935791, "learning_rate": 9.240641711229947e-06, "loss": 1.0518, "step": 864 }, { "epoch": 0.23146909285523146, "grad_norm": 4.009017467498779, "learning_rate": 9.251336898395723e-06, "loss": 1.1445, "step": 865 }, { "epoch": 0.23173668718223173, "grad_norm": 4.176751136779785, "learning_rate": 9.262032085561498e-06, "loss": 1.1074, "step": 866 }, { "epoch": 0.232004281509232, "grad_norm": 4.7490763664245605, "learning_rate": 9.272727272727273e-06, "loss": 1.351, "step": 867 }, { "epoch": 0.23227187583623227, "grad_norm": 4.492088794708252, "learning_rate": 9.283422459893049e-06, "loss": 1.2427, "step": 868 }, { "epoch": 0.23253947016323254, "grad_norm": 3.9468204975128174, "learning_rate": 9.294117647058824e-06, "loss": 1.0236, "step": 869 }, { "epoch": 0.2328070644902328, "grad_norm": 4.703409194946289, "learning_rate": 9.3048128342246e-06, "loss": 1.1154, "step": 870 }, { "epoch": 0.23307465881723308, "grad_norm": 4.1995110511779785, "learning_rate": 9.315508021390375e-06, "loss": 1.1273, "step": 871 }, { "epoch": 0.23334225314423335, "grad_norm": 4.209486484527588, "learning_rate": 9.326203208556151e-06, "loss": 1.1375, "step": 872 }, { "epoch": 0.23360984747123362, "grad_norm": 3.9918205738067627, "learning_rate": 9.336898395721926e-06, "loss": 1.1212, "step": 873 }, { "epoch": 0.2338774417982339, "grad_norm": 4.315709114074707, "learning_rate": 9.347593582887702e-06, "loss": 1.1351, "step": 874 }, { "epoch": 0.23414503612523416, "grad_norm": 4.223841190338135, "learning_rate": 9.358288770053477e-06, "loss": 1.224, "step": 875 }, { "epoch": 0.2344126304522344, "grad_norm": 4.296685218811035, "learning_rate": 9.368983957219252e-06, "loss": 1.1524, "step": 876 }, { "epoch": 0.23468022477923467, "grad_norm": 4.791153430938721, "learning_rate": 9.379679144385028e-06, "loss": 1.315, "step": 877 }, { "epoch": 0.23494781910623494, "grad_norm": 4.414406776428223, "learning_rate": 9.390374331550803e-06, "loss": 1.2116, "step": 878 }, { "epoch": 0.2352154134332352, "grad_norm": 4.986870288848877, "learning_rate": 9.401069518716577e-06, "loss": 1.2292, "step": 879 }, { "epoch": 0.23548300776023548, "grad_norm": 4.407514572143555, "learning_rate": 9.411764705882354e-06, "loss": 1.3158, "step": 880 }, { "epoch": 0.23575060208723575, "grad_norm": 4.413543701171875, "learning_rate": 9.422459893048129e-06, "loss": 0.968, "step": 881 }, { "epoch": 0.23601819641423602, "grad_norm": 4.498653411865234, "learning_rate": 9.433155080213905e-06, "loss": 1.1329, "step": 882 }, { "epoch": 0.2362857907412363, "grad_norm": 4.2039313316345215, "learning_rate": 9.44385026737968e-06, "loss": 1.1976, "step": 883 }, { "epoch": 0.23655338506823656, "grad_norm": 4.075275421142578, "learning_rate": 9.454545454545456e-06, "loss": 1.1194, "step": 884 }, { "epoch": 0.23682097939523683, "grad_norm": 4.131809234619141, "learning_rate": 9.46524064171123e-06, "loss": 1.2058, "step": 885 }, { "epoch": 0.2370885737222371, "grad_norm": 4.1411824226379395, "learning_rate": 9.475935828877007e-06, "loss": 1.1203, "step": 886 }, { "epoch": 0.23735616804923734, "grad_norm": 5.270638942718506, "learning_rate": 9.486631016042782e-06, "loss": 1.2022, "step": 887 }, { "epoch": 0.2376237623762376, "grad_norm": 4.125979900360107, "learning_rate": 9.497326203208556e-06, "loss": 1.0407, "step": 888 }, { "epoch": 0.23789135670323788, "grad_norm": 4.254225730895996, "learning_rate": 9.508021390374333e-06, "loss": 1.1919, "step": 889 }, { "epoch": 0.23815895103023815, "grad_norm": 4.1460723876953125, "learning_rate": 9.518716577540108e-06, "loss": 1.1937, "step": 890 }, { "epoch": 0.23842654535723842, "grad_norm": 4.267801761627197, "learning_rate": 9.529411764705882e-06, "loss": 1.2268, "step": 891 }, { "epoch": 0.2386941396842387, "grad_norm": 4.095164775848389, "learning_rate": 9.540106951871659e-06, "loss": 1.2323, "step": 892 }, { "epoch": 0.23896173401123896, "grad_norm": 4.400330066680908, "learning_rate": 9.550802139037433e-06, "loss": 1.3224, "step": 893 }, { "epoch": 0.23922932833823923, "grad_norm": 4.906595706939697, "learning_rate": 9.56149732620321e-06, "loss": 1.3625, "step": 894 }, { "epoch": 0.2394969226652395, "grad_norm": 4.529881000518799, "learning_rate": 9.572192513368986e-06, "loss": 1.1608, "step": 895 }, { "epoch": 0.23976451699223977, "grad_norm": 4.229710102081299, "learning_rate": 9.582887700534759e-06, "loss": 1.1961, "step": 896 }, { "epoch": 0.24003211131924004, "grad_norm": 4.66829776763916, "learning_rate": 9.593582887700535e-06, "loss": 1.2154, "step": 897 }, { "epoch": 0.24029970564624029, "grad_norm": 4.366943836212158, "learning_rate": 9.604278074866312e-06, "loss": 1.1817, "step": 898 }, { "epoch": 0.24056729997324056, "grad_norm": 4.251003265380859, "learning_rate": 9.614973262032087e-06, "loss": 1.3212, "step": 899 }, { "epoch": 0.24083489430024083, "grad_norm": 5.345521450042725, "learning_rate": 9.625668449197861e-06, "loss": 1.1808, "step": 900 }, { "epoch": 0.2411024886272411, "grad_norm": 4.079299449920654, "learning_rate": 9.636363636363638e-06, "loss": 1.1816, "step": 901 }, { "epoch": 0.24137008295424137, "grad_norm": 4.181840896606445, "learning_rate": 9.647058823529412e-06, "loss": 1.141, "step": 902 }, { "epoch": 0.24163767728124164, "grad_norm": 4.736073017120361, "learning_rate": 9.657754010695189e-06, "loss": 1.0541, "step": 903 }, { "epoch": 0.2419052716082419, "grad_norm": 4.228132724761963, "learning_rate": 9.668449197860963e-06, "loss": 1.0897, "step": 904 }, { "epoch": 0.24217286593524218, "grad_norm": 4.429383277893066, "learning_rate": 9.679144385026738e-06, "loss": 1.1879, "step": 905 }, { "epoch": 0.24244046026224245, "grad_norm": 4.360840320587158, "learning_rate": 9.689839572192514e-06, "loss": 1.1828, "step": 906 }, { "epoch": 0.24270805458924272, "grad_norm": 4.852614879608154, "learning_rate": 9.700534759358289e-06, "loss": 1.2815, "step": 907 }, { "epoch": 0.242975648916243, "grad_norm": 4.6722846031188965, "learning_rate": 9.711229946524064e-06, "loss": 1.3035, "step": 908 }, { "epoch": 0.24324324324324326, "grad_norm": 4.601790904998779, "learning_rate": 9.72192513368984e-06, "loss": 1.2348, "step": 909 }, { "epoch": 0.2435108375702435, "grad_norm": 4.581474781036377, "learning_rate": 9.732620320855617e-06, "loss": 1.2717, "step": 910 }, { "epoch": 0.24377843189724377, "grad_norm": 4.073735237121582, "learning_rate": 9.743315508021391e-06, "loss": 1.2133, "step": 911 }, { "epoch": 0.24404602622424404, "grad_norm": 4.351081848144531, "learning_rate": 9.754010695187166e-06, "loss": 1.1797, "step": 912 }, { "epoch": 0.2443136205512443, "grad_norm": 3.7765159606933594, "learning_rate": 9.764705882352942e-06, "loss": 1.163, "step": 913 }, { "epoch": 0.24458121487824458, "grad_norm": 4.727344512939453, "learning_rate": 9.775401069518717e-06, "loss": 1.3226, "step": 914 }, { "epoch": 0.24484880920524485, "grad_norm": 4.661051273345947, "learning_rate": 9.786096256684493e-06, "loss": 1.1071, "step": 915 }, { "epoch": 0.24511640353224512, "grad_norm": 4.205208778381348, "learning_rate": 9.796791443850268e-06, "loss": 1.1642, "step": 916 }, { "epoch": 0.2453839978592454, "grad_norm": 4.339627265930176, "learning_rate": 9.807486631016043e-06, "loss": 1.0605, "step": 917 }, { "epoch": 0.24565159218624566, "grad_norm": 4.589977741241455, "learning_rate": 9.81818181818182e-06, "loss": 1.2584, "step": 918 }, { "epoch": 0.24591918651324593, "grad_norm": 4.377978801727295, "learning_rate": 9.828877005347594e-06, "loss": 1.303, "step": 919 }, { "epoch": 0.2461867808402462, "grad_norm": 4.110877513885498, "learning_rate": 9.83957219251337e-06, "loss": 1.1833, "step": 920 }, { "epoch": 0.24645437516724644, "grad_norm": 4.5038743019104, "learning_rate": 9.850267379679145e-06, "loss": 1.2471, "step": 921 }, { "epoch": 0.2467219694942467, "grad_norm": 4.485939025878906, "learning_rate": 9.86096256684492e-06, "loss": 1.1391, "step": 922 }, { "epoch": 0.24698956382124698, "grad_norm": 4.139279365539551, "learning_rate": 9.871657754010696e-06, "loss": 1.1146, "step": 923 }, { "epoch": 0.24725715814824725, "grad_norm": 3.7969651222229004, "learning_rate": 9.882352941176472e-06, "loss": 1.0658, "step": 924 }, { "epoch": 0.24752475247524752, "grad_norm": 3.978060483932495, "learning_rate": 9.893048128342247e-06, "loss": 1.1051, "step": 925 }, { "epoch": 0.2477923468022478, "grad_norm": 4.101005554199219, "learning_rate": 9.903743315508022e-06, "loss": 1.1725, "step": 926 }, { "epoch": 0.24805994112924806, "grad_norm": 3.89359188079834, "learning_rate": 9.914438502673798e-06, "loss": 1.1501, "step": 927 }, { "epoch": 0.24832753545624833, "grad_norm": 4.291905879974365, "learning_rate": 9.925133689839573e-06, "loss": 1.2188, "step": 928 }, { "epoch": 0.2485951297832486, "grad_norm": 4.537034034729004, "learning_rate": 9.935828877005348e-06, "loss": 1.2074, "step": 929 }, { "epoch": 0.24886272411024887, "grad_norm": 4.324453830718994, "learning_rate": 9.946524064171124e-06, "loss": 1.1561, "step": 930 }, { "epoch": 0.24913031843724914, "grad_norm": 4.010372638702393, "learning_rate": 9.957219251336899e-06, "loss": 1.156, "step": 931 }, { "epoch": 0.24939791276424939, "grad_norm": 4.636694431304932, "learning_rate": 9.967914438502675e-06, "loss": 1.3193, "step": 932 }, { "epoch": 0.24966550709124966, "grad_norm": 4.060527324676514, "learning_rate": 9.97860962566845e-06, "loss": 1.2258, "step": 933 }, { "epoch": 0.24993310141824993, "grad_norm": 4.463012218475342, "learning_rate": 9.989304812834224e-06, "loss": 1.1717, "step": 934 }, { "epoch": 0.2502006957452502, "grad_norm": 3.8592917919158936, "learning_rate": 1e-05, "loss": 1.1587, "step": 935 }, { "epoch": 0.25046829007225047, "grad_norm": 4.335379600524902, "learning_rate": 9.999999921685345e-06, "loss": 1.3002, "step": 936 }, { "epoch": 0.25073588439925076, "grad_norm": 4.3303680419921875, "learning_rate": 9.999999686741384e-06, "loss": 1.1696, "step": 937 }, { "epoch": 0.251003478726251, "grad_norm": 4.4580607414245605, "learning_rate": 9.999999295168122e-06, "loss": 1.2942, "step": 938 }, { "epoch": 0.25127107305325125, "grad_norm": 3.974984884262085, "learning_rate": 9.999998746965573e-06, "loss": 1.1459, "step": 939 }, { "epoch": 0.25153866738025155, "grad_norm": 4.089166164398193, "learning_rate": 9.999998042133754e-06, "loss": 1.2405, "step": 940 }, { "epoch": 0.2518062617072518, "grad_norm": 4.120057582855225, "learning_rate": 9.999997180672684e-06, "loss": 1.1968, "step": 941 }, { "epoch": 0.2520738560342521, "grad_norm": 3.840859889984131, "learning_rate": 9.999996162582396e-06, "loss": 1.144, "step": 942 }, { "epoch": 0.25234145036125233, "grad_norm": 4.502830505371094, "learning_rate": 9.999994987862916e-06, "loss": 1.2564, "step": 943 }, { "epoch": 0.2526090446882526, "grad_norm": 4.325287818908691, "learning_rate": 9.999993656514284e-06, "loss": 1.2646, "step": 944 }, { "epoch": 0.25287663901525287, "grad_norm": 4.583348751068115, "learning_rate": 9.999992168536542e-06, "loss": 1.1912, "step": 945 }, { "epoch": 0.25314423334225317, "grad_norm": 4.394077301025391, "learning_rate": 9.999990523929734e-06, "loss": 1.242, "step": 946 }, { "epoch": 0.2534118276692534, "grad_norm": 4.394894599914551, "learning_rate": 9.999988722693914e-06, "loss": 1.1904, "step": 947 }, { "epoch": 0.2536794219962537, "grad_norm": 4.380218982696533, "learning_rate": 9.999986764829137e-06, "loss": 1.3293, "step": 948 }, { "epoch": 0.25394701632325395, "grad_norm": 4.508794784545898, "learning_rate": 9.999984650335468e-06, "loss": 1.2141, "step": 949 }, { "epoch": 0.2542146106502542, "grad_norm": 4.407951831817627, "learning_rate": 9.999982379212967e-06, "loss": 1.1078, "step": 950 }, { "epoch": 0.2544822049772545, "grad_norm": 5.039391040802002, "learning_rate": 9.99997995146171e-06, "loss": 1.3326, "step": 951 }, { "epoch": 0.25474979930425473, "grad_norm": 4.499945640563965, "learning_rate": 9.99997736708177e-06, "loss": 1.3474, "step": 952 }, { "epoch": 0.25501739363125503, "grad_norm": 4.34948205947876, "learning_rate": 9.99997462607323e-06, "loss": 1.1468, "step": 953 }, { "epoch": 0.25528498795825527, "grad_norm": 4.453915596008301, "learning_rate": 9.999971728436174e-06, "loss": 1.4107, "step": 954 }, { "epoch": 0.25555258228525557, "grad_norm": 4.159339904785156, "learning_rate": 9.999968674170697e-06, "loss": 1.2531, "step": 955 }, { "epoch": 0.2558201766122558, "grad_norm": 4.395799160003662, "learning_rate": 9.999965463276888e-06, "loss": 1.372, "step": 956 }, { "epoch": 0.2560877709392561, "grad_norm": 4.233826637268066, "learning_rate": 9.999962095754854e-06, "loss": 1.1476, "step": 957 }, { "epoch": 0.25635536526625635, "grad_norm": 4.740029335021973, "learning_rate": 9.999958571604697e-06, "loss": 1.2241, "step": 958 }, { "epoch": 0.25662295959325665, "grad_norm": 4.2474846839904785, "learning_rate": 9.999954890826528e-06, "loss": 1.1625, "step": 959 }, { "epoch": 0.2568905539202569, "grad_norm": 4.043703556060791, "learning_rate": 9.99995105342046e-06, "loss": 1.1664, "step": 960 }, { "epoch": 0.25715814824725713, "grad_norm": 4.318393707275391, "learning_rate": 9.99994705938662e-06, "loss": 1.2221, "step": 961 }, { "epoch": 0.25742574257425743, "grad_norm": 4.372133731842041, "learning_rate": 9.999942908725127e-06, "loss": 1.1626, "step": 962 }, { "epoch": 0.2576933369012577, "grad_norm": 4.380350589752197, "learning_rate": 9.999938601436111e-06, "loss": 1.1594, "step": 963 }, { "epoch": 0.257960931228258, "grad_norm": 4.29257345199585, "learning_rate": 9.999934137519711e-06, "loss": 1.1906, "step": 964 }, { "epoch": 0.2582285255552582, "grad_norm": 3.729611873626709, "learning_rate": 9.999929516976063e-06, "loss": 0.9909, "step": 965 }, { "epoch": 0.2584961198822585, "grad_norm": 9.831878662109375, "learning_rate": 9.999924739805313e-06, "loss": 1.2233, "step": 966 }, { "epoch": 0.25876371420925875, "grad_norm": 4.3758544921875, "learning_rate": 9.999919806007612e-06, "loss": 1.3428, "step": 967 }, { "epoch": 0.25903130853625905, "grad_norm": 4.322572708129883, "learning_rate": 9.999914715583114e-06, "loss": 1.1024, "step": 968 }, { "epoch": 0.2592989028632593, "grad_norm": 4.245995998382568, "learning_rate": 9.999909468531977e-06, "loss": 1.1555, "step": 969 }, { "epoch": 0.2595664971902596, "grad_norm": 3.9678421020507812, "learning_rate": 9.999904064854367e-06, "loss": 1.0857, "step": 970 }, { "epoch": 0.25983409151725984, "grad_norm": 4.337567329406738, "learning_rate": 9.999898504550452e-06, "loss": 1.2654, "step": 971 }, { "epoch": 0.2601016858442601, "grad_norm": 4.172070503234863, "learning_rate": 9.999892787620407e-06, "loss": 1.1528, "step": 972 }, { "epoch": 0.2603692801712604, "grad_norm": 4.25397253036499, "learning_rate": 9.999886914064411e-06, "loss": 1.1656, "step": 973 }, { "epoch": 0.2606368744982606, "grad_norm": 4.268383979797363, "learning_rate": 9.999880883882647e-06, "loss": 1.1387, "step": 974 }, { "epoch": 0.2609044688252609, "grad_norm": 4.6072235107421875, "learning_rate": 9.999874697075304e-06, "loss": 1.1754, "step": 975 }, { "epoch": 0.26117206315226116, "grad_norm": 4.203128814697266, "learning_rate": 9.999868353642579e-06, "loss": 1.2258, "step": 976 }, { "epoch": 0.26143965747926146, "grad_norm": 3.7577922344207764, "learning_rate": 9.999861853584666e-06, "loss": 1.0342, "step": 977 }, { "epoch": 0.2617072518062617, "grad_norm": 4.237786769866943, "learning_rate": 9.999855196901773e-06, "loss": 1.2248, "step": 978 }, { "epoch": 0.261974846133262, "grad_norm": 4.332390785217285, "learning_rate": 9.999848383594107e-06, "loss": 1.2544, "step": 979 }, { "epoch": 0.26224244046026224, "grad_norm": 4.468963146209717, "learning_rate": 9.999841413661878e-06, "loss": 1.1949, "step": 980 }, { "epoch": 0.26251003478726254, "grad_norm": 4.479465484619141, "learning_rate": 9.999834287105307e-06, "loss": 1.1904, "step": 981 }, { "epoch": 0.2627776291142628, "grad_norm": 4.083310127258301, "learning_rate": 9.99982700392462e-06, "loss": 1.1588, "step": 982 }, { "epoch": 0.263045223441263, "grad_norm": 4.362917900085449, "learning_rate": 9.999819564120042e-06, "loss": 1.1189, "step": 983 }, { "epoch": 0.2633128177682633, "grad_norm": 4.328512191772461, "learning_rate": 9.999811967691805e-06, "loss": 1.045, "step": 984 }, { "epoch": 0.26358041209526356, "grad_norm": 4.410714149475098, "learning_rate": 9.999804214640151e-06, "loss": 1.157, "step": 985 }, { "epoch": 0.26384800642226386, "grad_norm": 3.88704776763916, "learning_rate": 9.999796304965318e-06, "loss": 1.2426, "step": 986 }, { "epoch": 0.2641156007492641, "grad_norm": 4.107239246368408, "learning_rate": 9.999788238667558e-06, "loss": 1.1627, "step": 987 }, { "epoch": 0.2643831950762644, "grad_norm": 4.336009979248047, "learning_rate": 9.999780015747122e-06, "loss": 1.2247, "step": 988 }, { "epoch": 0.26465078940326464, "grad_norm": 4.0795464515686035, "learning_rate": 9.999771636204267e-06, "loss": 1.1928, "step": 989 }, { "epoch": 0.26491838373026494, "grad_norm": 4.309201717376709, "learning_rate": 9.999763100039256e-06, "loss": 1.1767, "step": 990 }, { "epoch": 0.2651859780572652, "grad_norm": 4.34153938293457, "learning_rate": 9.999754407252356e-06, "loss": 1.3238, "step": 991 }, { "epoch": 0.2654535723842655, "grad_norm": 4.0108842849731445, "learning_rate": 9.99974555784384e-06, "loss": 1.0851, "step": 992 }, { "epoch": 0.2657211667112657, "grad_norm": 4.6918768882751465, "learning_rate": 9.999736551813986e-06, "loss": 1.24, "step": 993 }, { "epoch": 0.26598876103826596, "grad_norm": 4.068446636199951, "learning_rate": 9.999727389163074e-06, "loss": 1.1031, "step": 994 }, { "epoch": 0.26625635536526626, "grad_norm": 4.262712478637695, "learning_rate": 9.999718069891392e-06, "loss": 1.274, "step": 995 }, { "epoch": 0.2665239496922665, "grad_norm": 4.3243889808654785, "learning_rate": 9.999708593999234e-06, "loss": 1.2472, "step": 996 }, { "epoch": 0.2667915440192668, "grad_norm": 4.188782691955566, "learning_rate": 9.999698961486892e-06, "loss": 1.2658, "step": 997 }, { "epoch": 0.26705913834626704, "grad_norm": 3.9021859169006348, "learning_rate": 9.999689172354672e-06, "loss": 1.0972, "step": 998 }, { "epoch": 0.26732673267326734, "grad_norm": 4.386773586273193, "learning_rate": 9.999679226602878e-06, "loss": 1.1707, "step": 999 }, { "epoch": 0.2675943270002676, "grad_norm": 4.411870956420898, "learning_rate": 9.999669124231824e-06, "loss": 1.188, "step": 1000 }, { "epoch": 0.2675943270002676, "eval_loss": 1.216786503791809, "eval_runtime": 11.6813, "eval_samples_per_second": 34.243, "eval_steps_per_second": 4.28, "step": 1000 }, { "epoch": 0.2678619213272679, "grad_norm": 4.481796741485596, "learning_rate": 9.999658865241827e-06, "loss": 1.2667, "step": 1001 }, { "epoch": 0.2681295156542681, "grad_norm": 3.979875087738037, "learning_rate": 9.999648449633204e-06, "loss": 1.1305, "step": 1002 }, { "epoch": 0.2683971099812684, "grad_norm": 3.8728244304656982, "learning_rate": 9.999637877406284e-06, "loss": 1.1231, "step": 1003 }, { "epoch": 0.26866470430826866, "grad_norm": 4.459341526031494, "learning_rate": 9.999627148561399e-06, "loss": 1.1543, "step": 1004 }, { "epoch": 0.2689322986352689, "grad_norm": 4.173006534576416, "learning_rate": 9.999616263098886e-06, "loss": 1.2025, "step": 1005 }, { "epoch": 0.2691998929622692, "grad_norm": 4.177968978881836, "learning_rate": 9.999605221019082e-06, "loss": 1.194, "step": 1006 }, { "epoch": 0.26946748728926945, "grad_norm": 4.970066547393799, "learning_rate": 9.999594022322334e-06, "loss": 1.2869, "step": 1007 }, { "epoch": 0.26973508161626975, "grad_norm": 4.600182056427002, "learning_rate": 9.999582667008995e-06, "loss": 1.3119, "step": 1008 }, { "epoch": 0.27000267594327, "grad_norm": 4.465086460113525, "learning_rate": 9.999571155079422e-06, "loss": 1.1683, "step": 1009 }, { "epoch": 0.2702702702702703, "grad_norm": 4.228415012359619, "learning_rate": 9.999559486533971e-06, "loss": 1.1939, "step": 1010 }, { "epoch": 0.2705378645972705, "grad_norm": 4.573855400085449, "learning_rate": 9.99954766137301e-06, "loss": 1.325, "step": 1011 }, { "epoch": 0.2708054589242708, "grad_norm": 4.398594379425049, "learning_rate": 9.99953567959691e-06, "loss": 1.1402, "step": 1012 }, { "epoch": 0.27107305325127107, "grad_norm": 4.025271892547607, "learning_rate": 9.999523541206044e-06, "loss": 1.1139, "step": 1013 }, { "epoch": 0.27134064757827137, "grad_norm": 4.192676067352295, "learning_rate": 9.999511246200795e-06, "loss": 1.1699, "step": 1014 }, { "epoch": 0.2716082419052716, "grad_norm": 4.185833930969238, "learning_rate": 9.999498794581548e-06, "loss": 1.1549, "step": 1015 }, { "epoch": 0.27187583623227185, "grad_norm": 4.256872653961182, "learning_rate": 9.99948618634869e-06, "loss": 1.1454, "step": 1016 }, { "epoch": 0.27214343055927215, "grad_norm": 4.221078395843506, "learning_rate": 9.99947342150262e-06, "loss": 1.1097, "step": 1017 }, { "epoch": 0.2724110248862724, "grad_norm": 4.532137870788574, "learning_rate": 9.999460500043734e-06, "loss": 1.341, "step": 1018 }, { "epoch": 0.2726786192132727, "grad_norm": 3.931379795074463, "learning_rate": 9.999447421972439e-06, "loss": 1.1228, "step": 1019 }, { "epoch": 0.27294621354027293, "grad_norm": 4.363259792327881, "learning_rate": 9.999434187289145e-06, "loss": 1.1633, "step": 1020 }, { "epoch": 0.27321380786727323, "grad_norm": 3.8973734378814697, "learning_rate": 9.999420795994266e-06, "loss": 1.2312, "step": 1021 }, { "epoch": 0.27348140219427347, "grad_norm": 4.490160942077637, "learning_rate": 9.99940724808822e-06, "loss": 1.2081, "step": 1022 }, { "epoch": 0.27374899652127377, "grad_norm": 4.149991035461426, "learning_rate": 9.999393543571434e-06, "loss": 1.2413, "step": 1023 }, { "epoch": 0.274016590848274, "grad_norm": 4.234299659729004, "learning_rate": 9.999379682444338e-06, "loss": 1.2663, "step": 1024 }, { "epoch": 0.2742841851752743, "grad_norm": 4.101381301879883, "learning_rate": 9.999365664707361e-06, "loss": 1.1646, "step": 1025 }, { "epoch": 0.27455177950227455, "grad_norm": 4.511719226837158, "learning_rate": 9.999351490360947e-06, "loss": 1.3628, "step": 1026 }, { "epoch": 0.2748193738292748, "grad_norm": 4.125613212585449, "learning_rate": 9.999337159405538e-06, "loss": 1.1866, "step": 1027 }, { "epoch": 0.2750869681562751, "grad_norm": 4.274496555328369, "learning_rate": 9.999322671841583e-06, "loss": 1.1889, "step": 1028 }, { "epoch": 0.27535456248327533, "grad_norm": 4.035276889801025, "learning_rate": 9.999308027669537e-06, "loss": 1.096, "step": 1029 }, { "epoch": 0.27562215681027563, "grad_norm": 4.041557312011719, "learning_rate": 9.999293226889857e-06, "loss": 1.1437, "step": 1030 }, { "epoch": 0.2758897511372759, "grad_norm": 4.458560943603516, "learning_rate": 9.999278269503008e-06, "loss": 1.1608, "step": 1031 }, { "epoch": 0.27615734546427617, "grad_norm": 3.992985486984253, "learning_rate": 9.999263155509459e-06, "loss": 1.0251, "step": 1032 }, { "epoch": 0.2764249397912764, "grad_norm": 3.9736506938934326, "learning_rate": 9.999247884909682e-06, "loss": 1.2267, "step": 1033 }, { "epoch": 0.2766925341182767, "grad_norm": 4.004456996917725, "learning_rate": 9.999232457704155e-06, "loss": 1.0958, "step": 1034 }, { "epoch": 0.27696012844527695, "grad_norm": 4.022693157196045, "learning_rate": 9.999216873893364e-06, "loss": 1.1375, "step": 1035 }, { "epoch": 0.27722772277227725, "grad_norm": 3.8458046913146973, "learning_rate": 9.999201133477793e-06, "loss": 1.1408, "step": 1036 }, { "epoch": 0.2774953170992775, "grad_norm": 4.127901554107666, "learning_rate": 9.999185236457941e-06, "loss": 1.3119, "step": 1037 }, { "epoch": 0.27776291142627774, "grad_norm": 4.242637634277344, "learning_rate": 9.9991691828343e-06, "loss": 1.1941, "step": 1038 }, { "epoch": 0.27803050575327803, "grad_norm": 4.139479160308838, "learning_rate": 9.999152972607377e-06, "loss": 1.0765, "step": 1039 }, { "epoch": 0.2782981000802783, "grad_norm": 4.560730457305908, "learning_rate": 9.999136605777678e-06, "loss": 1.4193, "step": 1040 }, { "epoch": 0.2785656944072786, "grad_norm": 4.292839050292969, "learning_rate": 9.999120082345714e-06, "loss": 1.2548, "step": 1041 }, { "epoch": 0.2788332887342788, "grad_norm": 4.288617134094238, "learning_rate": 9.999103402312005e-06, "loss": 1.2433, "step": 1042 }, { "epoch": 0.2791008830612791, "grad_norm": 4.059001445770264, "learning_rate": 9.999086565677075e-06, "loss": 1.2556, "step": 1043 }, { "epoch": 0.27936847738827936, "grad_norm": 4.238238334655762, "learning_rate": 9.999069572441448e-06, "loss": 1.2258, "step": 1044 }, { "epoch": 0.27963607171527965, "grad_norm": 3.9122133255004883, "learning_rate": 9.999052422605657e-06, "loss": 1.1679, "step": 1045 }, { "epoch": 0.2799036660422799, "grad_norm": 4.169795513153076, "learning_rate": 9.999035116170241e-06, "loss": 1.19, "step": 1046 }, { "epoch": 0.2801712603692802, "grad_norm": 3.937116861343384, "learning_rate": 9.999017653135744e-06, "loss": 1.1576, "step": 1047 }, { "epoch": 0.28043885469628044, "grad_norm": 4.014969348907471, "learning_rate": 9.999000033502706e-06, "loss": 1.2584, "step": 1048 }, { "epoch": 0.2807064490232807, "grad_norm": 3.8742942810058594, "learning_rate": 9.998982257271685e-06, "loss": 1.0865, "step": 1049 }, { "epoch": 0.280974043350281, "grad_norm": 4.11707878112793, "learning_rate": 9.998964324443235e-06, "loss": 1.2188, "step": 1050 }, { "epoch": 0.2812416376772812, "grad_norm": 4.459323406219482, "learning_rate": 9.998946235017918e-06, "loss": 1.2243, "step": 1051 }, { "epoch": 0.2815092320042815, "grad_norm": 4.1629815101623535, "learning_rate": 9.998927988996303e-06, "loss": 1.3085, "step": 1052 }, { "epoch": 0.28177682633128176, "grad_norm": 4.150962829589844, "learning_rate": 9.998909586378959e-06, "loss": 1.1903, "step": 1053 }, { "epoch": 0.28204442065828206, "grad_norm": 3.7340071201324463, "learning_rate": 9.998891027166466e-06, "loss": 1.0209, "step": 1054 }, { "epoch": 0.2823120149852823, "grad_norm": 4.1432695388793945, "learning_rate": 9.9988723113594e-06, "loss": 1.1885, "step": 1055 }, { "epoch": 0.2825796093122826, "grad_norm": 4.373791217803955, "learning_rate": 9.998853438958352e-06, "loss": 1.2612, "step": 1056 }, { "epoch": 0.28284720363928284, "grad_norm": 4.626842021942139, "learning_rate": 9.99883440996391e-06, "loss": 1.3311, "step": 1057 }, { "epoch": 0.28311479796628314, "grad_norm": 4.128498077392578, "learning_rate": 9.998815224376672e-06, "loss": 1.2753, "step": 1058 }, { "epoch": 0.2833823922932834, "grad_norm": 4.729836463928223, "learning_rate": 9.998795882197238e-06, "loss": 1.3457, "step": 1059 }, { "epoch": 0.2836499866202836, "grad_norm": 4.123654365539551, "learning_rate": 9.998776383426217e-06, "loss": 1.2035, "step": 1060 }, { "epoch": 0.2839175809472839, "grad_norm": 3.9608731269836426, "learning_rate": 9.998756728064213e-06, "loss": 1.1559, "step": 1061 }, { "epoch": 0.28418517527428416, "grad_norm": 3.8772714138031006, "learning_rate": 9.998736916111848e-06, "loss": 1.1901, "step": 1062 }, { "epoch": 0.28445276960128446, "grad_norm": 4.203121185302734, "learning_rate": 9.998716947569741e-06, "loss": 1.1789, "step": 1063 }, { "epoch": 0.2847203639282847, "grad_norm": 4.262762069702148, "learning_rate": 9.998696822438516e-06, "loss": 1.3464, "step": 1064 }, { "epoch": 0.284987958255285, "grad_norm": 4.056782245635986, "learning_rate": 9.998676540718805e-06, "loss": 1.1512, "step": 1065 }, { "epoch": 0.28525555258228524, "grad_norm": 4.1949639320373535, "learning_rate": 9.998656102411245e-06, "loss": 1.2288, "step": 1066 }, { "epoch": 0.28552314690928554, "grad_norm": 4.408857345581055, "learning_rate": 9.99863550751647e-06, "loss": 1.2456, "step": 1067 }, { "epoch": 0.2857907412362858, "grad_norm": 4.202237606048584, "learning_rate": 9.998614756035132e-06, "loss": 1.2651, "step": 1068 }, { "epoch": 0.2860583355632861, "grad_norm": 4.139695167541504, "learning_rate": 9.998593847967877e-06, "loss": 1.1924, "step": 1069 }, { "epoch": 0.2863259298902863, "grad_norm": 4.123232841491699, "learning_rate": 9.998572783315361e-06, "loss": 1.2642, "step": 1070 }, { "epoch": 0.28659352421728657, "grad_norm": 4.614407539367676, "learning_rate": 9.998551562078245e-06, "loss": 1.1987, "step": 1071 }, { "epoch": 0.28686111854428686, "grad_norm": 4.054043769836426, "learning_rate": 9.998530184257194e-06, "loss": 1.1046, "step": 1072 }, { "epoch": 0.2871287128712871, "grad_norm": 4.0423760414123535, "learning_rate": 9.998508649852874e-06, "loss": 1.1435, "step": 1073 }, { "epoch": 0.2873963071982874, "grad_norm": 4.187506198883057, "learning_rate": 9.998486958865965e-06, "loss": 1.1171, "step": 1074 }, { "epoch": 0.28766390152528765, "grad_norm": 4.307306289672852, "learning_rate": 9.998465111297141e-06, "loss": 1.1656, "step": 1075 }, { "epoch": 0.28793149585228794, "grad_norm": 4.108502388000488, "learning_rate": 9.99844310714709e-06, "loss": 1.1522, "step": 1076 }, { "epoch": 0.2881990901792882, "grad_norm": 4.7379069328308105, "learning_rate": 9.9984209464165e-06, "loss": 1.3547, "step": 1077 }, { "epoch": 0.2884666845062885, "grad_norm": 4.607676029205322, "learning_rate": 9.998398629106068e-06, "loss": 1.2304, "step": 1078 }, { "epoch": 0.2887342788332887, "grad_norm": 4.571547031402588, "learning_rate": 9.998376155216487e-06, "loss": 1.2825, "step": 1079 }, { "epoch": 0.289001873160289, "grad_norm": 4.113447189331055, "learning_rate": 9.998353524748468e-06, "loss": 1.143, "step": 1080 }, { "epoch": 0.28926946748728927, "grad_norm": 4.24326753616333, "learning_rate": 9.998330737702714e-06, "loss": 1.2782, "step": 1081 }, { "epoch": 0.28953706181428956, "grad_norm": 3.839808464050293, "learning_rate": 9.998307794079942e-06, "loss": 1.1638, "step": 1082 }, { "epoch": 0.2898046561412898, "grad_norm": 4.0532355308532715, "learning_rate": 9.998284693880871e-06, "loss": 1.2157, "step": 1083 }, { "epoch": 0.29007225046829005, "grad_norm": 4.362560272216797, "learning_rate": 9.998261437106223e-06, "loss": 1.3047, "step": 1084 }, { "epoch": 0.29033984479529035, "grad_norm": 4.096391201019287, "learning_rate": 9.998238023756727e-06, "loss": 1.3003, "step": 1085 }, { "epoch": 0.2906074391222906, "grad_norm": 3.965895891189575, "learning_rate": 9.998214453833118e-06, "loss": 1.2066, "step": 1086 }, { "epoch": 0.2908750334492909, "grad_norm": 4.827084541320801, "learning_rate": 9.998190727336133e-06, "loss": 1.3066, "step": 1087 }, { "epoch": 0.29114262777629113, "grad_norm": 3.926433563232422, "learning_rate": 9.998166844266515e-06, "loss": 1.1789, "step": 1088 }, { "epoch": 0.2914102221032914, "grad_norm": 4.2660651206970215, "learning_rate": 9.998142804625011e-06, "loss": 1.259, "step": 1089 }, { "epoch": 0.29167781643029167, "grad_norm": 4.009738922119141, "learning_rate": 9.998118608412378e-06, "loss": 1.1624, "step": 1090 }, { "epoch": 0.29194541075729197, "grad_norm": 4.329594612121582, "learning_rate": 9.99809425562937e-06, "loss": 1.2244, "step": 1091 }, { "epoch": 0.2922130050842922, "grad_norm": 4.109816551208496, "learning_rate": 9.998069746276752e-06, "loss": 1.1706, "step": 1092 }, { "epoch": 0.2924805994112925, "grad_norm": 4.28621244430542, "learning_rate": 9.998045080355291e-06, "loss": 1.2071, "step": 1093 }, { "epoch": 0.29274819373829275, "grad_norm": 4.0972747802734375, "learning_rate": 9.99802025786576e-06, "loss": 1.1558, "step": 1094 }, { "epoch": 0.293015788065293, "grad_norm": 4.451328277587891, "learning_rate": 9.997995278808936e-06, "loss": 1.3491, "step": 1095 }, { "epoch": 0.2932833823922933, "grad_norm": 3.864147663116455, "learning_rate": 9.997970143185603e-06, "loss": 1.1395, "step": 1096 }, { "epoch": 0.29355097671929353, "grad_norm": 4.177571773529053, "learning_rate": 9.997944850996546e-06, "loss": 1.319, "step": 1097 }, { "epoch": 0.29381857104629383, "grad_norm": 3.75541615486145, "learning_rate": 9.99791940224256e-06, "loss": 1.1284, "step": 1098 }, { "epoch": 0.2940861653732941, "grad_norm": 3.947469711303711, "learning_rate": 9.99789379692444e-06, "loss": 1.0627, "step": 1099 }, { "epoch": 0.29435375970029437, "grad_norm": 4.161018371582031, "learning_rate": 9.99786803504299e-06, "loss": 1.2537, "step": 1100 }, { "epoch": 0.2946213540272946, "grad_norm": 4.357724189758301, "learning_rate": 9.997842116599014e-06, "loss": 1.3133, "step": 1101 }, { "epoch": 0.2948889483542949, "grad_norm": 4.223912715911865, "learning_rate": 9.997816041593327e-06, "loss": 1.2574, "step": 1102 }, { "epoch": 0.29515654268129515, "grad_norm": 3.6964030265808105, "learning_rate": 9.997789810026746e-06, "loss": 1.0187, "step": 1103 }, { "epoch": 0.29542413700829545, "grad_norm": 3.9817559719085693, "learning_rate": 9.99776342190009e-06, "loss": 1.1749, "step": 1104 }, { "epoch": 0.2956917313352957, "grad_norm": 4.123600959777832, "learning_rate": 9.997736877214187e-06, "loss": 1.192, "step": 1105 }, { "epoch": 0.29595932566229594, "grad_norm": 4.295464515686035, "learning_rate": 9.99771017596987e-06, "loss": 1.1886, "step": 1106 }, { "epoch": 0.29622691998929623, "grad_norm": 4.501376628875732, "learning_rate": 9.997683318167972e-06, "loss": 1.2161, "step": 1107 }, { "epoch": 0.2964945143162965, "grad_norm": 4.243162631988525, "learning_rate": 9.997656303809338e-06, "loss": 1.2048, "step": 1108 }, { "epoch": 0.2967621086432968, "grad_norm": 4.504419326782227, "learning_rate": 9.997629132894812e-06, "loss": 1.263, "step": 1109 }, { "epoch": 0.297029702970297, "grad_norm": 4.300513744354248, "learning_rate": 9.997601805425246e-06, "loss": 1.0954, "step": 1110 }, { "epoch": 0.2972972972972973, "grad_norm": 4.057127952575684, "learning_rate": 9.997574321401495e-06, "loss": 1.1716, "step": 1111 }, { "epoch": 0.29756489162429756, "grad_norm": 3.755995035171509, "learning_rate": 9.997546680824422e-06, "loss": 1.0806, "step": 1112 }, { "epoch": 0.29783248595129785, "grad_norm": 4.294164180755615, "learning_rate": 9.99751888369489e-06, "loss": 1.358, "step": 1113 }, { "epoch": 0.2981000802782981, "grad_norm": 3.7034502029418945, "learning_rate": 9.997490930013773e-06, "loss": 1.1258, "step": 1114 }, { "epoch": 0.2983676746052984, "grad_norm": 4.4115071296691895, "learning_rate": 9.997462819781944e-06, "loss": 1.3023, "step": 1115 }, { "epoch": 0.29863526893229864, "grad_norm": 4.0352678298950195, "learning_rate": 9.997434553000286e-06, "loss": 1.1215, "step": 1116 }, { "epoch": 0.2989028632592989, "grad_norm": 3.9848623275756836, "learning_rate": 9.997406129669682e-06, "loss": 1.1101, "step": 1117 }, { "epoch": 0.2991704575862992, "grad_norm": 4.004817962646484, "learning_rate": 9.997377549791025e-06, "loss": 1.2029, "step": 1118 }, { "epoch": 0.2994380519132994, "grad_norm": 4.244535446166992, "learning_rate": 9.997348813365207e-06, "loss": 1.1389, "step": 1119 }, { "epoch": 0.2997056462402997, "grad_norm": 4.067032337188721, "learning_rate": 9.997319920393131e-06, "loss": 1.0913, "step": 1120 }, { "epoch": 0.29997324056729996, "grad_norm": 3.9365594387054443, "learning_rate": 9.997290870875703e-06, "loss": 1.1128, "step": 1121 }, { "epoch": 0.30024083489430026, "grad_norm": 3.7220211029052734, "learning_rate": 9.997261664813827e-06, "loss": 1.0801, "step": 1122 }, { "epoch": 0.3005084292213005, "grad_norm": 4.3840131759643555, "learning_rate": 9.997232302208425e-06, "loss": 1.2494, "step": 1123 }, { "epoch": 0.3007760235483008, "grad_norm": 3.811455011367798, "learning_rate": 9.997202783060413e-06, "loss": 1.13, "step": 1124 }, { "epoch": 0.30104361787530104, "grad_norm": 4.011319637298584, "learning_rate": 9.997173107370717e-06, "loss": 1.216, "step": 1125 }, { "epoch": 0.30131121220230134, "grad_norm": 3.8559553623199463, "learning_rate": 9.997143275140266e-06, "loss": 1.1322, "step": 1126 }, { "epoch": 0.3015788065293016, "grad_norm": 3.9331884384155273, "learning_rate": 9.997113286369995e-06, "loss": 1.132, "step": 1127 }, { "epoch": 0.3018464008563018, "grad_norm": 3.7718307971954346, "learning_rate": 9.997083141060842e-06, "loss": 1.2221, "step": 1128 }, { "epoch": 0.3021139951833021, "grad_norm": 4.190203666687012, "learning_rate": 9.997052839213752e-06, "loss": 1.2322, "step": 1129 }, { "epoch": 0.30238158951030236, "grad_norm": 3.806379795074463, "learning_rate": 9.997022380829677e-06, "loss": 1.1844, "step": 1130 }, { "epoch": 0.30264918383730266, "grad_norm": 3.8173298835754395, "learning_rate": 9.996991765909568e-06, "loss": 1.1185, "step": 1131 }, { "epoch": 0.3029167781643029, "grad_norm": 4.177835464477539, "learning_rate": 9.996960994454383e-06, "loss": 1.2292, "step": 1132 }, { "epoch": 0.3031843724913032, "grad_norm": 4.42379903793335, "learning_rate": 9.996930066465091e-06, "loss": 1.2661, "step": 1133 }, { "epoch": 0.30345196681830344, "grad_norm": 4.7846455574035645, "learning_rate": 9.996898981942655e-06, "loss": 1.1785, "step": 1134 }, { "epoch": 0.30371956114530374, "grad_norm": 4.796987533569336, "learning_rate": 9.996867740888052e-06, "loss": 1.2928, "step": 1135 }, { "epoch": 0.303987155472304, "grad_norm": 4.014819145202637, "learning_rate": 9.996836343302261e-06, "loss": 1.1626, "step": 1136 }, { "epoch": 0.3042547497993043, "grad_norm": 4.26397180557251, "learning_rate": 9.996804789186263e-06, "loss": 1.0987, "step": 1137 }, { "epoch": 0.3045223441263045, "grad_norm": 4.477066516876221, "learning_rate": 9.99677307854105e-06, "loss": 1.28, "step": 1138 }, { "epoch": 0.30478993845330477, "grad_norm": 4.38161039352417, "learning_rate": 9.996741211367613e-06, "loss": 1.2668, "step": 1139 }, { "epoch": 0.30505753278030506, "grad_norm": 4.141867160797119, "learning_rate": 9.996709187666951e-06, "loss": 1.2651, "step": 1140 }, { "epoch": 0.3053251271073053, "grad_norm": 3.891883134841919, "learning_rate": 9.996677007440065e-06, "loss": 1.2046, "step": 1141 }, { "epoch": 0.3055927214343056, "grad_norm": 4.575502395629883, "learning_rate": 9.996644670687966e-06, "loss": 1.3873, "step": 1142 }, { "epoch": 0.30586031576130585, "grad_norm": 3.6624574661254883, "learning_rate": 9.996612177411667e-06, "loss": 1.1507, "step": 1143 }, { "epoch": 0.30612791008830614, "grad_norm": 4.047989845275879, "learning_rate": 9.996579527612182e-06, "loss": 1.1696, "step": 1144 }, { "epoch": 0.3063955044153064, "grad_norm": 4.16288948059082, "learning_rate": 9.99654672129054e-06, "loss": 1.191, "step": 1145 }, { "epoch": 0.3066630987423067, "grad_norm": 4.057130813598633, "learning_rate": 9.996513758447764e-06, "loss": 1.2684, "step": 1146 }, { "epoch": 0.3069306930693069, "grad_norm": 4.151482582092285, "learning_rate": 9.996480639084887e-06, "loss": 1.3478, "step": 1147 }, { "epoch": 0.3071982873963072, "grad_norm": 3.9425978660583496, "learning_rate": 9.996447363202947e-06, "loss": 1.1573, "step": 1148 }, { "epoch": 0.30746588172330747, "grad_norm": 4.016078948974609, "learning_rate": 9.996413930802988e-06, "loss": 1.175, "step": 1149 }, { "epoch": 0.3077334760503077, "grad_norm": 4.067404270172119, "learning_rate": 9.996380341886055e-06, "loss": 1.2151, "step": 1150 }, { "epoch": 0.308001070377308, "grad_norm": 4.010457992553711, "learning_rate": 9.996346596453202e-06, "loss": 1.2175, "step": 1151 }, { "epoch": 0.30826866470430825, "grad_norm": 4.103924751281738, "learning_rate": 9.996312694505486e-06, "loss": 1.1351, "step": 1152 }, { "epoch": 0.30853625903130855, "grad_norm": 4.0794243812561035, "learning_rate": 9.996278636043966e-06, "loss": 1.1801, "step": 1153 }, { "epoch": 0.3088038533583088, "grad_norm": 3.910602331161499, "learning_rate": 9.996244421069714e-06, "loss": 1.2453, "step": 1154 }, { "epoch": 0.3090714476853091, "grad_norm": 4.31195068359375, "learning_rate": 9.996210049583796e-06, "loss": 1.1257, "step": 1155 }, { "epoch": 0.30933904201230933, "grad_norm": 4.224134922027588, "learning_rate": 9.996175521587294e-06, "loss": 1.2855, "step": 1156 }, { "epoch": 0.3096066363393096, "grad_norm": 4.1098198890686035, "learning_rate": 9.996140837081288e-06, "loss": 1.2321, "step": 1157 }, { "epoch": 0.30987423066630987, "grad_norm": 4.49318265914917, "learning_rate": 9.996105996066862e-06, "loss": 1.2987, "step": 1158 }, { "epoch": 0.31014182499331017, "grad_norm": 4.257841110229492, "learning_rate": 9.99607099854511e-06, "loss": 1.2069, "step": 1159 }, { "epoch": 0.3104094193203104, "grad_norm": 4.5224385261535645, "learning_rate": 9.996035844517129e-06, "loss": 1.1976, "step": 1160 }, { "epoch": 0.31067701364731065, "grad_norm": 4.277895927429199, "learning_rate": 9.996000533984017e-06, "loss": 1.2005, "step": 1161 }, { "epoch": 0.31094460797431095, "grad_norm": 4.397223472595215, "learning_rate": 9.995965066946885e-06, "loss": 1.1852, "step": 1162 }, { "epoch": 0.3112122023013112, "grad_norm": 4.2652764320373535, "learning_rate": 9.995929443406838e-06, "loss": 1.2549, "step": 1163 }, { "epoch": 0.3114797966283115, "grad_norm": 4.212392807006836, "learning_rate": 9.995893663364997e-06, "loss": 1.1547, "step": 1164 }, { "epoch": 0.31174739095531173, "grad_norm": 3.8345203399658203, "learning_rate": 9.99585772682248e-06, "loss": 1.0897, "step": 1165 }, { "epoch": 0.31201498528231203, "grad_norm": 4.444775581359863, "learning_rate": 9.995821633780413e-06, "loss": 1.2715, "step": 1166 }, { "epoch": 0.31228257960931227, "grad_norm": 4.151453971862793, "learning_rate": 9.99578538423993e-06, "loss": 1.1795, "step": 1167 }, { "epoch": 0.31255017393631257, "grad_norm": 4.223361492156982, "learning_rate": 9.99574897820216e-06, "loss": 1.2359, "step": 1168 }, { "epoch": 0.3128177682633128, "grad_norm": 4.0662841796875, "learning_rate": 9.99571241566825e-06, "loss": 1.1072, "step": 1169 }, { "epoch": 0.3130853625903131, "grad_norm": 4.007144451141357, "learning_rate": 9.99567569663934e-06, "loss": 1.198, "step": 1170 }, { "epoch": 0.31335295691731335, "grad_norm": 3.9480855464935303, "learning_rate": 9.995638821116585e-06, "loss": 1.0293, "step": 1171 }, { "epoch": 0.3136205512443136, "grad_norm": 3.7751834392547607, "learning_rate": 9.995601789101138e-06, "loss": 1.0231, "step": 1172 }, { "epoch": 0.3138881455713139, "grad_norm": 4.609216690063477, "learning_rate": 9.995564600594159e-06, "loss": 1.1539, "step": 1173 }, { "epoch": 0.31415573989831413, "grad_norm": 4.05670166015625, "learning_rate": 9.995527255596812e-06, "loss": 1.1977, "step": 1174 }, { "epoch": 0.31442333422531443, "grad_norm": 3.651618242263794, "learning_rate": 9.995489754110268e-06, "loss": 1.0947, "step": 1175 }, { "epoch": 0.3146909285523147, "grad_norm": 4.308838844299316, "learning_rate": 9.995452096135703e-06, "loss": 1.1942, "step": 1176 }, { "epoch": 0.314958522879315, "grad_norm": 3.8746747970581055, "learning_rate": 9.995414281674294e-06, "loss": 1.1572, "step": 1177 }, { "epoch": 0.3152261172063152, "grad_norm": 4.089914321899414, "learning_rate": 9.995376310727227e-06, "loss": 1.2842, "step": 1178 }, { "epoch": 0.3154937115333155, "grad_norm": 4.342733860015869, "learning_rate": 9.995338183295693e-06, "loss": 1.285, "step": 1179 }, { "epoch": 0.31576130586031576, "grad_norm": 3.697603225708008, "learning_rate": 9.995299899380884e-06, "loss": 1.1125, "step": 1180 }, { "epoch": 0.31602890018731605, "grad_norm": 4.422861099243164, "learning_rate": 9.995261458983999e-06, "loss": 1.2552, "step": 1181 }, { "epoch": 0.3162964945143163, "grad_norm": 4.288775444030762, "learning_rate": 9.995222862106245e-06, "loss": 1.3295, "step": 1182 }, { "epoch": 0.31656408884131654, "grad_norm": 3.8024377822875977, "learning_rate": 9.995184108748827e-06, "loss": 1.1542, "step": 1183 }, { "epoch": 0.31683168316831684, "grad_norm": 4.05307674407959, "learning_rate": 9.995145198912962e-06, "loss": 1.1841, "step": 1184 }, { "epoch": 0.3170992774953171, "grad_norm": 4.2776265144348145, "learning_rate": 9.995106132599869e-06, "loss": 1.1981, "step": 1185 }, { "epoch": 0.3173668718223174, "grad_norm": 3.631357431411743, "learning_rate": 9.995066909810771e-06, "loss": 1.1741, "step": 1186 }, { "epoch": 0.3176344661493176, "grad_norm": 3.9295742511749268, "learning_rate": 9.995027530546895e-06, "loss": 1.0733, "step": 1187 }, { "epoch": 0.3179020604763179, "grad_norm": 3.839838743209839, "learning_rate": 9.994987994809478e-06, "loss": 1.2681, "step": 1188 }, { "epoch": 0.31816965480331816, "grad_norm": 3.7129993438720703, "learning_rate": 9.994948302599757e-06, "loss": 1.0686, "step": 1189 }, { "epoch": 0.31843724913031846, "grad_norm": 4.252348899841309, "learning_rate": 9.994908453918973e-06, "loss": 1.2652, "step": 1190 }, { "epoch": 0.3187048434573187, "grad_norm": 4.2434916496276855, "learning_rate": 9.994868448768378e-06, "loss": 1.227, "step": 1191 }, { "epoch": 0.318972437784319, "grad_norm": 3.613661766052246, "learning_rate": 9.994828287149224e-06, "loss": 1.0919, "step": 1192 }, { "epoch": 0.31924003211131924, "grad_norm": 4.0223469734191895, "learning_rate": 9.994787969062767e-06, "loss": 1.161, "step": 1193 }, { "epoch": 0.3195076264383195, "grad_norm": 3.802426815032959, "learning_rate": 9.994747494510274e-06, "loss": 1.2561, "step": 1194 }, { "epoch": 0.3197752207653198, "grad_norm": 3.8129377365112305, "learning_rate": 9.994706863493007e-06, "loss": 1.1638, "step": 1195 }, { "epoch": 0.32004281509232, "grad_norm": 4.0562872886657715, "learning_rate": 9.994666076012245e-06, "loss": 1.2713, "step": 1196 }, { "epoch": 0.3203104094193203, "grad_norm": 4.090336322784424, "learning_rate": 9.994625132069263e-06, "loss": 1.1567, "step": 1197 }, { "epoch": 0.32057800374632056, "grad_norm": 4.030067443847656, "learning_rate": 9.994584031665345e-06, "loss": 1.1686, "step": 1198 }, { "epoch": 0.32084559807332086, "grad_norm": 4.26224422454834, "learning_rate": 9.994542774801774e-06, "loss": 1.1967, "step": 1199 }, { "epoch": 0.3211131924003211, "grad_norm": 4.3625102043151855, "learning_rate": 9.994501361479847e-06, "loss": 1.2828, "step": 1200 }, { "epoch": 0.3213807867273214, "grad_norm": 4.201301574707031, "learning_rate": 9.99445979170086e-06, "loss": 1.2463, "step": 1201 }, { "epoch": 0.32164838105432164, "grad_norm": 3.624171733856201, "learning_rate": 9.994418065466116e-06, "loss": 1.1004, "step": 1202 }, { "epoch": 0.32191597538132194, "grad_norm": 3.7936317920684814, "learning_rate": 9.99437618277692e-06, "loss": 1.0925, "step": 1203 }, { "epoch": 0.3221835697083222, "grad_norm": 3.740135431289673, "learning_rate": 9.994334143634587e-06, "loss": 1.1698, "step": 1204 }, { "epoch": 0.3224511640353224, "grad_norm": 4.0127458572387695, "learning_rate": 9.994291948040429e-06, "loss": 1.2089, "step": 1205 }, { "epoch": 0.3227187583623227, "grad_norm": 4.131107807159424, "learning_rate": 9.994249595995774e-06, "loss": 1.1964, "step": 1206 }, { "epoch": 0.32298635268932296, "grad_norm": 3.945056200027466, "learning_rate": 9.994207087501945e-06, "loss": 1.2649, "step": 1207 }, { "epoch": 0.32325394701632326, "grad_norm": 4.262823581695557, "learning_rate": 9.994164422560273e-06, "loss": 1.2617, "step": 1208 }, { "epoch": 0.3235215413433235, "grad_norm": 4.310561180114746, "learning_rate": 9.994121601172097e-06, "loss": 1.2077, "step": 1209 }, { "epoch": 0.3237891356703238, "grad_norm": 4.025747299194336, "learning_rate": 9.994078623338757e-06, "loss": 1.1637, "step": 1210 }, { "epoch": 0.32405672999732404, "grad_norm": 3.766697883605957, "learning_rate": 9.9940354890616e-06, "loss": 1.0568, "step": 1211 }, { "epoch": 0.32432432432432434, "grad_norm": 4.299594402313232, "learning_rate": 9.993992198341976e-06, "loss": 1.2301, "step": 1212 }, { "epoch": 0.3245919186513246, "grad_norm": 3.945216417312622, "learning_rate": 9.993948751181243e-06, "loss": 1.2631, "step": 1213 }, { "epoch": 0.3248595129783249, "grad_norm": 4.33341121673584, "learning_rate": 9.99390514758076e-06, "loss": 1.2464, "step": 1214 }, { "epoch": 0.3251271073053251, "grad_norm": 4.582106590270996, "learning_rate": 9.993861387541894e-06, "loss": 1.2877, "step": 1215 }, { "epoch": 0.32539470163232537, "grad_norm": 4.363495349884033, "learning_rate": 9.993817471066016e-06, "loss": 1.1593, "step": 1216 }, { "epoch": 0.32566229595932566, "grad_norm": 4.1914873123168945, "learning_rate": 9.9937733981545e-06, "loss": 1.2427, "step": 1217 }, { "epoch": 0.3259298902863259, "grad_norm": 4.207976341247559, "learning_rate": 9.99372916880873e-06, "loss": 1.2129, "step": 1218 }, { "epoch": 0.3261974846133262, "grad_norm": 3.9144184589385986, "learning_rate": 9.99368478303009e-06, "loss": 1.1604, "step": 1219 }, { "epoch": 0.32646507894032645, "grad_norm": 3.9831881523132324, "learning_rate": 9.993640240819966e-06, "loss": 1.3163, "step": 1220 }, { "epoch": 0.32673267326732675, "grad_norm": 4.0437331199646, "learning_rate": 9.993595542179762e-06, "loss": 1.2957, "step": 1221 }, { "epoch": 0.327000267594327, "grad_norm": 3.9239695072174072, "learning_rate": 9.99355068711087e-06, "loss": 1.1359, "step": 1222 }, { "epoch": 0.3272678619213273, "grad_norm": 3.7763185501098633, "learning_rate": 9.993505675614699e-06, "loss": 1.1569, "step": 1223 }, { "epoch": 0.32753545624832753, "grad_norm": 3.6293134689331055, "learning_rate": 9.99346050769266e-06, "loss": 1.0692, "step": 1224 }, { "epoch": 0.3278030505753278, "grad_norm": 3.8709805011749268, "learning_rate": 9.993415183346168e-06, "loss": 1.0574, "step": 1225 }, { "epoch": 0.32807064490232807, "grad_norm": 4.066141605377197, "learning_rate": 9.993369702576638e-06, "loss": 1.2466, "step": 1226 }, { "epoch": 0.32833823922932837, "grad_norm": 4.21537446975708, "learning_rate": 9.993324065385499e-06, "loss": 1.2357, "step": 1227 }, { "epoch": 0.3286058335563286, "grad_norm": 3.732475757598877, "learning_rate": 9.99327827177418e-06, "loss": 1.0866, "step": 1228 }, { "epoch": 0.32887342788332885, "grad_norm": 3.632660150527954, "learning_rate": 9.993232321744117e-06, "loss": 1.0751, "step": 1229 }, { "epoch": 0.32914102221032915, "grad_norm": 4.376312732696533, "learning_rate": 9.993186215296747e-06, "loss": 1.1535, "step": 1230 }, { "epoch": 0.3294086165373294, "grad_norm": 4.29062557220459, "learning_rate": 9.993139952433513e-06, "loss": 1.298, "step": 1231 }, { "epoch": 0.3296762108643297, "grad_norm": 4.182230472564697, "learning_rate": 9.99309353315587e-06, "loss": 1.1764, "step": 1232 }, { "epoch": 0.32994380519132993, "grad_norm": 4.025058269500732, "learning_rate": 9.993046957465264e-06, "loss": 1.0655, "step": 1233 }, { "epoch": 0.33021139951833023, "grad_norm": 4.065793514251709, "learning_rate": 9.99300022536316e-06, "loss": 1.2838, "step": 1234 }, { "epoch": 0.33047899384533047, "grad_norm": 4.3913397789001465, "learning_rate": 9.99295333685102e-06, "loss": 1.3101, "step": 1235 }, { "epoch": 0.33074658817233077, "grad_norm": 4.253934860229492, "learning_rate": 9.992906291930315e-06, "loss": 1.2625, "step": 1236 }, { "epoch": 0.331014182499331, "grad_norm": 4.285333633422852, "learning_rate": 9.992859090602515e-06, "loss": 1.246, "step": 1237 }, { "epoch": 0.3312817768263313, "grad_norm": 4.150413513183594, "learning_rate": 9.992811732869102e-06, "loss": 1.1379, "step": 1238 }, { "epoch": 0.33154937115333155, "grad_norm": 4.417994976043701, "learning_rate": 9.992764218731556e-06, "loss": 1.2882, "step": 1239 }, { "epoch": 0.3318169654803318, "grad_norm": 4.12443733215332, "learning_rate": 9.992716548191369e-06, "loss": 1.1555, "step": 1240 }, { "epoch": 0.3320845598073321, "grad_norm": 3.823629140853882, "learning_rate": 9.992668721250031e-06, "loss": 1.145, "step": 1241 }, { "epoch": 0.33235215413433233, "grad_norm": 3.9831488132476807, "learning_rate": 9.992620737909045e-06, "loss": 1.1958, "step": 1242 }, { "epoch": 0.33261974846133263, "grad_norm": 4.018994331359863, "learning_rate": 9.99257259816991e-06, "loss": 1.1069, "step": 1243 }, { "epoch": 0.3328873427883329, "grad_norm": 3.987264394760132, "learning_rate": 9.992524302034133e-06, "loss": 1.0961, "step": 1244 }, { "epoch": 0.33315493711533317, "grad_norm": 4.08268404006958, "learning_rate": 9.992475849503232e-06, "loss": 1.2255, "step": 1245 }, { "epoch": 0.3334225314423334, "grad_norm": 4.237321853637695, "learning_rate": 9.992427240578719e-06, "loss": 1.2254, "step": 1246 }, { "epoch": 0.3336901257693337, "grad_norm": 4.234129905700684, "learning_rate": 9.99237847526212e-06, "loss": 1.31, "step": 1247 }, { "epoch": 0.33395772009633395, "grad_norm": 3.7745895385742188, "learning_rate": 9.992329553554964e-06, "loss": 1.2299, "step": 1248 }, { "epoch": 0.33422531442333425, "grad_norm": 3.8982863426208496, "learning_rate": 9.99228047545878e-06, "loss": 1.1502, "step": 1249 }, { "epoch": 0.3344929087503345, "grad_norm": 3.888578414916992, "learning_rate": 9.992231240975107e-06, "loss": 1.2675, "step": 1250 }, { "epoch": 0.33476050307733474, "grad_norm": 3.7839229106903076, "learning_rate": 9.992181850105488e-06, "loss": 1.1895, "step": 1251 }, { "epoch": 0.33502809740433503, "grad_norm": 3.796337366104126, "learning_rate": 9.992132302851471e-06, "loss": 1.1802, "step": 1252 }, { "epoch": 0.3352956917313353, "grad_norm": 3.6959662437438965, "learning_rate": 9.992082599214605e-06, "loss": 1.0366, "step": 1253 }, { "epoch": 0.3355632860583356, "grad_norm": 4.231655120849609, "learning_rate": 9.99203273919645e-06, "loss": 1.3236, "step": 1254 }, { "epoch": 0.3358308803853358, "grad_norm": 3.774073600769043, "learning_rate": 9.991982722798565e-06, "loss": 1.2142, "step": 1255 }, { "epoch": 0.3360984747123361, "grad_norm": 3.9392263889312744, "learning_rate": 9.99193255002252e-06, "loss": 1.1911, "step": 1256 }, { "epoch": 0.33636606903933636, "grad_norm": 3.8191981315612793, "learning_rate": 9.991882220869885e-06, "loss": 1.1639, "step": 1257 }, { "epoch": 0.33663366336633666, "grad_norm": 3.6881232261657715, "learning_rate": 9.991831735342235e-06, "loss": 1.179, "step": 1258 }, { "epoch": 0.3369012576933369, "grad_norm": 3.9517464637756348, "learning_rate": 9.991781093441156e-06, "loss": 1.3195, "step": 1259 }, { "epoch": 0.3371688520203372, "grad_norm": 4.152409076690674, "learning_rate": 9.991730295168229e-06, "loss": 1.3316, "step": 1260 }, { "epoch": 0.33743644634733744, "grad_norm": 4.356308937072754, "learning_rate": 9.991679340525048e-06, "loss": 1.3085, "step": 1261 }, { "epoch": 0.3377040406743377, "grad_norm": 3.8445913791656494, "learning_rate": 9.991628229513212e-06, "loss": 1.1096, "step": 1262 }, { "epoch": 0.337971635001338, "grad_norm": 4.005192279815674, "learning_rate": 9.991576962134317e-06, "loss": 1.0536, "step": 1263 }, { "epoch": 0.3382392293283382, "grad_norm": 4.3194355964660645, "learning_rate": 9.991525538389971e-06, "loss": 1.228, "step": 1264 }, { "epoch": 0.3385068236553385, "grad_norm": 4.254610538482666, "learning_rate": 9.991473958281787e-06, "loss": 1.2584, "step": 1265 }, { "epoch": 0.33877441798233876, "grad_norm": 4.035154819488525, "learning_rate": 9.991422221811377e-06, "loss": 1.2187, "step": 1266 }, { "epoch": 0.33904201230933906, "grad_norm": 4.172974109649658, "learning_rate": 9.991370328980365e-06, "loss": 1.229, "step": 1267 }, { "epoch": 0.3393096066363393, "grad_norm": 4.167996883392334, "learning_rate": 9.991318279790376e-06, "loss": 1.1717, "step": 1268 }, { "epoch": 0.3395772009633396, "grad_norm": 3.9925200939178467, "learning_rate": 9.991266074243038e-06, "loss": 1.0795, "step": 1269 }, { "epoch": 0.33984479529033984, "grad_norm": 4.081603050231934, "learning_rate": 9.99121371233999e-06, "loss": 1.2579, "step": 1270 }, { "epoch": 0.34011238961734014, "grad_norm": 3.936547040939331, "learning_rate": 9.991161194082868e-06, "loss": 1.1121, "step": 1271 }, { "epoch": 0.3403799839443404, "grad_norm": 3.9780871868133545, "learning_rate": 9.991108519473321e-06, "loss": 1.0613, "step": 1272 }, { "epoch": 0.3406475782713406, "grad_norm": 4.46980619430542, "learning_rate": 9.991055688512996e-06, "loss": 1.3501, "step": 1273 }, { "epoch": 0.3409151725983409, "grad_norm": 4.383254051208496, "learning_rate": 9.991002701203552e-06, "loss": 1.2561, "step": 1274 }, { "epoch": 0.34118276692534116, "grad_norm": 3.765019178390503, "learning_rate": 9.990949557546644e-06, "loss": 1.1322, "step": 1275 }, { "epoch": 0.34145036125234146, "grad_norm": 4.063820838928223, "learning_rate": 9.99089625754394e-06, "loss": 1.194, "step": 1276 }, { "epoch": 0.3417179555793417, "grad_norm": 3.9652814865112305, "learning_rate": 9.990842801197109e-06, "loss": 1.2013, "step": 1277 }, { "epoch": 0.341985549906342, "grad_norm": 3.9601919651031494, "learning_rate": 9.990789188507827e-06, "loss": 1.4019, "step": 1278 }, { "epoch": 0.34225314423334224, "grad_norm": 3.9643354415893555, "learning_rate": 9.990735419477771e-06, "loss": 1.1947, "step": 1279 }, { "epoch": 0.34252073856034254, "grad_norm": 4.578568458557129, "learning_rate": 9.990681494108625e-06, "loss": 1.2858, "step": 1280 }, { "epoch": 0.3427883328873428, "grad_norm": 3.9634618759155273, "learning_rate": 9.990627412402081e-06, "loss": 1.0718, "step": 1281 }, { "epoch": 0.3430559272143431, "grad_norm": 4.035841941833496, "learning_rate": 9.990573174359831e-06, "loss": 1.0878, "step": 1282 }, { "epoch": 0.3433235215413433, "grad_norm": 3.8855788707733154, "learning_rate": 9.990518779983575e-06, "loss": 1.08, "step": 1283 }, { "epoch": 0.34359111586834357, "grad_norm": 4.141454219818115, "learning_rate": 9.990464229275017e-06, "loss": 1.2422, "step": 1284 }, { "epoch": 0.34385871019534386, "grad_norm": 4.580347537994385, "learning_rate": 9.990409522235866e-06, "loss": 1.2064, "step": 1285 }, { "epoch": 0.3441263045223441, "grad_norm": 4.343654155731201, "learning_rate": 9.990354658867833e-06, "loss": 1.2125, "step": 1286 }, { "epoch": 0.3443938988493444, "grad_norm": 4.314458847045898, "learning_rate": 9.990299639172643e-06, "loss": 1.2558, "step": 1287 }, { "epoch": 0.34466149317634465, "grad_norm": 3.7052886486053467, "learning_rate": 9.990244463152012e-06, "loss": 1.0901, "step": 1288 }, { "epoch": 0.34492908750334494, "grad_norm": 4.012930393218994, "learning_rate": 9.990189130807672e-06, "loss": 1.2457, "step": 1289 }, { "epoch": 0.3451966818303452, "grad_norm": 4.098269462585449, "learning_rate": 9.990133642141359e-06, "loss": 1.1806, "step": 1290 }, { "epoch": 0.3454642761573455, "grad_norm": 3.833272695541382, "learning_rate": 9.990077997154807e-06, "loss": 1.1566, "step": 1291 }, { "epoch": 0.3457318704843457, "grad_norm": 4.549833297729492, "learning_rate": 9.99002219584976e-06, "loss": 1.3042, "step": 1292 }, { "epoch": 0.345999464811346, "grad_norm": 3.901430368423462, "learning_rate": 9.989966238227967e-06, "loss": 1.1204, "step": 1293 }, { "epoch": 0.34626705913834627, "grad_norm": 4.016772270202637, "learning_rate": 9.989910124291182e-06, "loss": 1.0479, "step": 1294 }, { "epoch": 0.3465346534653465, "grad_norm": 3.785675048828125, "learning_rate": 9.989853854041158e-06, "loss": 1.0056, "step": 1295 }, { "epoch": 0.3468022477923468, "grad_norm": 3.9900505542755127, "learning_rate": 9.989797427479663e-06, "loss": 1.1034, "step": 1296 }, { "epoch": 0.34706984211934705, "grad_norm": 3.6167192459106445, "learning_rate": 9.989740844608464e-06, "loss": 1.1602, "step": 1297 }, { "epoch": 0.34733743644634735, "grad_norm": 4.165998458862305, "learning_rate": 9.989684105429332e-06, "loss": 1.2538, "step": 1298 }, { "epoch": 0.3476050307733476, "grad_norm": 3.6239192485809326, "learning_rate": 9.989627209944044e-06, "loss": 1.1523, "step": 1299 }, { "epoch": 0.3478726251003479, "grad_norm": 3.9420888423919678, "learning_rate": 9.989570158154383e-06, "loss": 1.1796, "step": 1300 }, { "epoch": 0.34814021942734813, "grad_norm": 3.861833333969116, "learning_rate": 9.989512950062135e-06, "loss": 1.1694, "step": 1301 }, { "epoch": 0.3484078137543484, "grad_norm": 4.555881023406982, "learning_rate": 9.989455585669093e-06, "loss": 1.1641, "step": 1302 }, { "epoch": 0.34867540808134867, "grad_norm": 3.9727768898010254, "learning_rate": 9.989398064977057e-06, "loss": 1.2632, "step": 1303 }, { "epoch": 0.34894300240834897, "grad_norm": 4.150755882263184, "learning_rate": 9.989340387987823e-06, "loss": 1.288, "step": 1304 }, { "epoch": 0.3492105967353492, "grad_norm": 4.13301944732666, "learning_rate": 9.989282554703202e-06, "loss": 1.3014, "step": 1305 }, { "epoch": 0.34947819106234945, "grad_norm": 4.18637752532959, "learning_rate": 9.989224565125003e-06, "loss": 1.2925, "step": 1306 }, { "epoch": 0.34974578538934975, "grad_norm": 4.216982841491699, "learning_rate": 9.989166419255047e-06, "loss": 1.2506, "step": 1307 }, { "epoch": 0.35001337971635, "grad_norm": 4.059083938598633, "learning_rate": 9.989108117095152e-06, "loss": 1.3471, "step": 1308 }, { "epoch": 0.3502809740433503, "grad_norm": 4.72033166885376, "learning_rate": 9.989049658647146e-06, "loss": 1.273, "step": 1309 }, { "epoch": 0.35054856837035053, "grad_norm": 3.916358232498169, "learning_rate": 9.988991043912857e-06, "loss": 1.2104, "step": 1310 }, { "epoch": 0.35081616269735083, "grad_norm": 4.03465461730957, "learning_rate": 9.988932272894123e-06, "loss": 1.2057, "step": 1311 }, { "epoch": 0.3510837570243511, "grad_norm": 3.646699905395508, "learning_rate": 9.988873345592786e-06, "loss": 1.1352, "step": 1312 }, { "epoch": 0.35135135135135137, "grad_norm": 3.854741334915161, "learning_rate": 9.988814262010692e-06, "loss": 1.1613, "step": 1313 }, { "epoch": 0.3516189456783516, "grad_norm": 4.387909889221191, "learning_rate": 9.988755022149692e-06, "loss": 1.3018, "step": 1314 }, { "epoch": 0.3518865400053519, "grad_norm": 3.9308204650878906, "learning_rate": 9.988695626011639e-06, "loss": 1.045, "step": 1315 }, { "epoch": 0.35215413433235215, "grad_norm": 4.186442852020264, "learning_rate": 9.988636073598396e-06, "loss": 1.1992, "step": 1316 }, { "epoch": 0.3524217286593524, "grad_norm": 3.7117176055908203, "learning_rate": 9.98857636491183e-06, "loss": 1.1623, "step": 1317 }, { "epoch": 0.3526893229863527, "grad_norm": 3.9418506622314453, "learning_rate": 9.988516499953807e-06, "loss": 1.1666, "step": 1318 }, { "epoch": 0.35295691731335294, "grad_norm": 4.194133758544922, "learning_rate": 9.988456478726207e-06, "loss": 1.3279, "step": 1319 }, { "epoch": 0.35322451164035323, "grad_norm": 3.8580989837646484, "learning_rate": 9.988396301230908e-06, "loss": 1.2154, "step": 1320 }, { "epoch": 0.3534921059673535, "grad_norm": 4.107762813568115, "learning_rate": 9.988335967469794e-06, "loss": 1.3117, "step": 1321 }, { "epoch": 0.3537597002943538, "grad_norm": 3.7596476078033447, "learning_rate": 9.988275477444756e-06, "loss": 1.1496, "step": 1322 }, { "epoch": 0.354027294621354, "grad_norm": 4.2492289543151855, "learning_rate": 9.98821483115769e-06, "loss": 1.1693, "step": 1323 }, { "epoch": 0.3542948889483543, "grad_norm": 3.3397791385650635, "learning_rate": 9.988154028610496e-06, "loss": 0.9744, "step": 1324 }, { "epoch": 0.35456248327535456, "grad_norm": 3.7433207035064697, "learning_rate": 9.988093069805074e-06, "loss": 1.1848, "step": 1325 }, { "epoch": 0.35483007760235485, "grad_norm": 3.7859861850738525, "learning_rate": 9.98803195474334e-06, "loss": 1.1676, "step": 1326 }, { "epoch": 0.3550976719293551, "grad_norm": 4.002213954925537, "learning_rate": 9.987970683427205e-06, "loss": 1.1791, "step": 1327 }, { "epoch": 0.35536526625635534, "grad_norm": 4.300050258636475, "learning_rate": 9.987909255858588e-06, "loss": 1.1968, "step": 1328 }, { "epoch": 0.35563286058335564, "grad_norm": 3.8620917797088623, "learning_rate": 9.987847672039416e-06, "loss": 1.101, "step": 1329 }, { "epoch": 0.3559004549103559, "grad_norm": 4.3346757888793945, "learning_rate": 9.987785931971616e-06, "loss": 1.105, "step": 1330 }, { "epoch": 0.3561680492373562, "grad_norm": 3.949228048324585, "learning_rate": 9.987724035657122e-06, "loss": 1.225, "step": 1331 }, { "epoch": 0.3564356435643564, "grad_norm": 4.213968276977539, "learning_rate": 9.987661983097875e-06, "loss": 1.3405, "step": 1332 }, { "epoch": 0.3567032378913567, "grad_norm": 3.694033622741699, "learning_rate": 9.987599774295815e-06, "loss": 1.1712, "step": 1333 }, { "epoch": 0.35697083221835696, "grad_norm": 3.9766345024108887, "learning_rate": 9.987537409252895e-06, "loss": 1.1874, "step": 1334 }, { "epoch": 0.35723842654535726, "grad_norm": 3.906832456588745, "learning_rate": 9.987474887971067e-06, "loss": 1.1985, "step": 1335 }, { "epoch": 0.3575060208723575, "grad_norm": 3.593148708343506, "learning_rate": 9.987412210452288e-06, "loss": 1.0753, "step": 1336 }, { "epoch": 0.3577736151993578, "grad_norm": 4.227734088897705, "learning_rate": 9.987349376698522e-06, "loss": 1.2267, "step": 1337 }, { "epoch": 0.35804120952635804, "grad_norm": 3.786079168319702, "learning_rate": 9.98728638671174e-06, "loss": 1.113, "step": 1338 }, { "epoch": 0.3583088038533583, "grad_norm": 4.259524345397949, "learning_rate": 9.987223240493912e-06, "loss": 1.233, "step": 1339 }, { "epoch": 0.3585763981803586, "grad_norm": 4.269252777099609, "learning_rate": 9.987159938047018e-06, "loss": 1.328, "step": 1340 }, { "epoch": 0.3588439925073588, "grad_norm": 3.9735991954803467, "learning_rate": 9.98709647937304e-06, "loss": 1.2262, "step": 1341 }, { "epoch": 0.3591115868343591, "grad_norm": 3.9140255451202393, "learning_rate": 9.987032864473966e-06, "loss": 1.1495, "step": 1342 }, { "epoch": 0.35937918116135936, "grad_norm": 3.9749045372009277, "learning_rate": 9.986969093351789e-06, "loss": 1.071, "step": 1343 }, { "epoch": 0.35964677548835966, "grad_norm": 4.553966522216797, "learning_rate": 9.986905166008506e-06, "loss": 1.2779, "step": 1344 }, { "epoch": 0.3599143698153599, "grad_norm": 4.131070613861084, "learning_rate": 9.98684108244612e-06, "loss": 1.315, "step": 1345 }, { "epoch": 0.3601819641423602, "grad_norm": 3.922656297683716, "learning_rate": 9.986776842666641e-06, "loss": 1.1049, "step": 1346 }, { "epoch": 0.36044955846936044, "grad_norm": 4.215112209320068, "learning_rate": 9.98671244667208e-06, "loss": 1.1876, "step": 1347 }, { "epoch": 0.36071715279636074, "grad_norm": 3.91481614112854, "learning_rate": 9.986647894464452e-06, "loss": 1.1058, "step": 1348 }, { "epoch": 0.360984747123361, "grad_norm": 4.0664777755737305, "learning_rate": 9.98658318604578e-06, "loss": 1.205, "step": 1349 }, { "epoch": 0.3612523414503612, "grad_norm": 4.033042907714844, "learning_rate": 9.986518321418091e-06, "loss": 1.2229, "step": 1350 }, { "epoch": 0.3615199357773615, "grad_norm": 4.329224109649658, "learning_rate": 9.986453300583419e-06, "loss": 1.1991, "step": 1351 }, { "epoch": 0.36178753010436177, "grad_norm": 4.066847324371338, "learning_rate": 9.986388123543798e-06, "loss": 1.0257, "step": 1352 }, { "epoch": 0.36205512443136206, "grad_norm": 4.082132816314697, "learning_rate": 9.986322790301272e-06, "loss": 1.3073, "step": 1353 }, { "epoch": 0.3623227187583623, "grad_norm": 4.0690765380859375, "learning_rate": 9.986257300857885e-06, "loss": 1.3173, "step": 1354 }, { "epoch": 0.3625903130853626, "grad_norm": 3.81072735786438, "learning_rate": 9.986191655215692e-06, "loss": 1.1571, "step": 1355 }, { "epoch": 0.36285790741236285, "grad_norm": 4.073006629943848, "learning_rate": 9.986125853376747e-06, "loss": 1.2328, "step": 1356 }, { "epoch": 0.36312550173936314, "grad_norm": 4.015659332275391, "learning_rate": 9.986059895343113e-06, "loss": 1.1722, "step": 1357 }, { "epoch": 0.3633930960663634, "grad_norm": 4.220362663269043, "learning_rate": 9.985993781116853e-06, "loss": 1.2666, "step": 1358 }, { "epoch": 0.3636606903933637, "grad_norm": 4.876560688018799, "learning_rate": 9.985927510700043e-06, "loss": 1.3855, "step": 1359 }, { "epoch": 0.3639282847203639, "grad_norm": 3.7819228172302246, "learning_rate": 9.985861084094754e-06, "loss": 1.2191, "step": 1360 }, { "epoch": 0.36419587904736417, "grad_norm": 5.581944465637207, "learning_rate": 9.98579450130307e-06, "loss": 1.3441, "step": 1361 }, { "epoch": 0.36446347337436447, "grad_norm": 4.042576789855957, "learning_rate": 9.985727762327075e-06, "loss": 1.23, "step": 1362 }, { "epoch": 0.3647310677013647, "grad_norm": 3.75724720954895, "learning_rate": 9.985660867168862e-06, "loss": 1.1289, "step": 1363 }, { "epoch": 0.364998662028365, "grad_norm": 3.9243931770324707, "learning_rate": 9.985593815830524e-06, "loss": 1.1703, "step": 1364 }, { "epoch": 0.36526625635536525, "grad_norm": 3.6307761669158936, "learning_rate": 9.985526608314162e-06, "loss": 1.1219, "step": 1365 }, { "epoch": 0.36553385068236555, "grad_norm": 4.060052394866943, "learning_rate": 9.985459244621883e-06, "loss": 1.2764, "step": 1366 }, { "epoch": 0.3658014450093658, "grad_norm": 4.747690200805664, "learning_rate": 9.985391724755796e-06, "loss": 1.2811, "step": 1367 }, { "epoch": 0.3660690393363661, "grad_norm": 3.579979658126831, "learning_rate": 9.985324048718014e-06, "loss": 1.1203, "step": 1368 }, { "epoch": 0.36633663366336633, "grad_norm": 3.809176206588745, "learning_rate": 9.985256216510661e-06, "loss": 1.1502, "step": 1369 }, { "epoch": 0.3666042279903666, "grad_norm": 3.7964789867401123, "learning_rate": 9.98518822813586e-06, "loss": 1.0763, "step": 1370 }, { "epoch": 0.36687182231736687, "grad_norm": 3.559234619140625, "learning_rate": 9.985120083595742e-06, "loss": 1.2019, "step": 1371 }, { "epoch": 0.36713941664436717, "grad_norm": 4.064184665679932, "learning_rate": 9.985051782892439e-06, "loss": 1.3257, "step": 1372 }, { "epoch": 0.3674070109713674, "grad_norm": 3.9166617393493652, "learning_rate": 9.984983326028093e-06, "loss": 1.0911, "step": 1373 }, { "epoch": 0.36767460529836765, "grad_norm": 3.9536726474761963, "learning_rate": 9.984914713004847e-06, "loss": 1.1804, "step": 1374 }, { "epoch": 0.36794219962536795, "grad_norm": 4.240631103515625, "learning_rate": 9.98484594382485e-06, "loss": 1.2855, "step": 1375 }, { "epoch": 0.3682097939523682, "grad_norm": 3.7650909423828125, "learning_rate": 9.984777018490258e-06, "loss": 1.1098, "step": 1376 }, { "epoch": 0.3684773882793685, "grad_norm": 3.4372477531433105, "learning_rate": 9.98470793700323e-06, "loss": 1.1356, "step": 1377 }, { "epoch": 0.36874498260636873, "grad_norm": 3.8635547161102295, "learning_rate": 9.984638699365928e-06, "loss": 1.0865, "step": 1378 }, { "epoch": 0.36901257693336903, "grad_norm": 4.2199554443359375, "learning_rate": 9.984569305580523e-06, "loss": 1.1303, "step": 1379 }, { "epoch": 0.36928017126036927, "grad_norm": 4.431107044219971, "learning_rate": 9.984499755649188e-06, "loss": 1.3348, "step": 1380 }, { "epoch": 0.36954776558736957, "grad_norm": 3.5743043422698975, "learning_rate": 9.984430049574103e-06, "loss": 1.1717, "step": 1381 }, { "epoch": 0.3698153599143698, "grad_norm": 3.696826934814453, "learning_rate": 9.98436018735745e-06, "loss": 1.1214, "step": 1382 }, { "epoch": 0.3700829542413701, "grad_norm": 4.062804222106934, "learning_rate": 9.984290169001418e-06, "loss": 1.182, "step": 1383 }, { "epoch": 0.37035054856837035, "grad_norm": 3.5979297161102295, "learning_rate": 9.984219994508199e-06, "loss": 1.0661, "step": 1384 }, { "epoch": 0.3706181428953706, "grad_norm": 3.755028247833252, "learning_rate": 9.984149663879994e-06, "loss": 1.1072, "step": 1385 }, { "epoch": 0.3708857372223709, "grad_norm": 3.7246670722961426, "learning_rate": 9.984079177119003e-06, "loss": 1.1284, "step": 1386 }, { "epoch": 0.37115333154937113, "grad_norm": 3.9325149059295654, "learning_rate": 9.984008534227439e-06, "loss": 1.2296, "step": 1387 }, { "epoch": 0.37142092587637143, "grad_norm": 4.277532577514648, "learning_rate": 9.983937735207509e-06, "loss": 1.2242, "step": 1388 }, { "epoch": 0.3716885202033717, "grad_norm": 3.9127614498138428, "learning_rate": 9.983866780061435e-06, "loss": 1.1925, "step": 1389 }, { "epoch": 0.371956114530372, "grad_norm": 3.858065366744995, "learning_rate": 9.983795668791435e-06, "loss": 1.1462, "step": 1390 }, { "epoch": 0.3722237088573722, "grad_norm": 3.7166876792907715, "learning_rate": 9.983724401399745e-06, "loss": 1.1978, "step": 1391 }, { "epoch": 0.3724913031843725, "grad_norm": 4.299930572509766, "learning_rate": 9.98365297788859e-06, "loss": 1.1856, "step": 1392 }, { "epoch": 0.37275889751137276, "grad_norm": 3.803140640258789, "learning_rate": 9.983581398260211e-06, "loss": 1.1731, "step": 1393 }, { "epoch": 0.37302649183837305, "grad_norm": 3.9758286476135254, "learning_rate": 9.983509662516848e-06, "loss": 1.2402, "step": 1394 }, { "epoch": 0.3732940861653733, "grad_norm": 3.708829402923584, "learning_rate": 9.98343777066075e-06, "loss": 1.1367, "step": 1395 }, { "epoch": 0.37356168049237354, "grad_norm": 3.941568613052368, "learning_rate": 9.983365722694166e-06, "loss": 1.187, "step": 1396 }, { "epoch": 0.37382927481937384, "grad_norm": 3.5624454021453857, "learning_rate": 9.983293518619358e-06, "loss": 1.0969, "step": 1397 }, { "epoch": 0.3740968691463741, "grad_norm": 4.011288642883301, "learning_rate": 9.983221158438585e-06, "loss": 1.1643, "step": 1398 }, { "epoch": 0.3743644634733744, "grad_norm": 5.278192520141602, "learning_rate": 9.983148642154114e-06, "loss": 1.1881, "step": 1399 }, { "epoch": 0.3746320578003746, "grad_norm": 3.4236741065979004, "learning_rate": 9.983075969768217e-06, "loss": 1.1508, "step": 1400 }, { "epoch": 0.3748996521273749, "grad_norm": 3.687683582305908, "learning_rate": 9.98300314128317e-06, "loss": 1.1077, "step": 1401 }, { "epoch": 0.37516724645437516, "grad_norm": 3.9771618843078613, "learning_rate": 9.982930156701254e-06, "loss": 1.1796, "step": 1402 }, { "epoch": 0.37543484078137546, "grad_norm": 3.828674554824829, "learning_rate": 9.982857016024757e-06, "loss": 1.2694, "step": 1403 }, { "epoch": 0.3757024351083757, "grad_norm": 3.8206582069396973, "learning_rate": 9.982783719255968e-06, "loss": 1.1139, "step": 1404 }, { "epoch": 0.375970029435376, "grad_norm": 3.9748029708862305, "learning_rate": 9.982710266397184e-06, "loss": 1.1027, "step": 1405 }, { "epoch": 0.37623762376237624, "grad_norm": 3.5067262649536133, "learning_rate": 9.982636657450706e-06, "loss": 1.1222, "step": 1406 }, { "epoch": 0.3765052180893765, "grad_norm": 4.0315093994140625, "learning_rate": 9.98256289241884e-06, "loss": 1.3018, "step": 1407 }, { "epoch": 0.3767728124163768, "grad_norm": 3.5691301822662354, "learning_rate": 9.982488971303899e-06, "loss": 1.0404, "step": 1408 }, { "epoch": 0.377040406743377, "grad_norm": 4.119424819946289, "learning_rate": 9.982414894108194e-06, "loss": 1.234, "step": 1409 }, { "epoch": 0.3773080010703773, "grad_norm": 3.957841157913208, "learning_rate": 9.982340660834049e-06, "loss": 1.4368, "step": 1410 }, { "epoch": 0.37757559539737756, "grad_norm": 3.7822635173797607, "learning_rate": 9.982266271483787e-06, "loss": 1.1002, "step": 1411 }, { "epoch": 0.37784318972437786, "grad_norm": 3.610924005508423, "learning_rate": 9.982191726059742e-06, "loss": 1.1034, "step": 1412 }, { "epoch": 0.3781107840513781, "grad_norm": 3.8048479557037354, "learning_rate": 9.982117024564244e-06, "loss": 1.1641, "step": 1413 }, { "epoch": 0.3783783783783784, "grad_norm": 3.299978256225586, "learning_rate": 9.982042166999639e-06, "loss": 1.0626, "step": 1414 }, { "epoch": 0.37864597270537864, "grad_norm": 4.0182061195373535, "learning_rate": 9.981967153368266e-06, "loss": 1.2374, "step": 1415 }, { "epoch": 0.37891356703237894, "grad_norm": 3.6433804035186768, "learning_rate": 9.981891983672481e-06, "loss": 1.0366, "step": 1416 }, { "epoch": 0.3791811613593792, "grad_norm": 3.748567581176758, "learning_rate": 9.981816657914633e-06, "loss": 1.0423, "step": 1417 }, { "epoch": 0.3794487556863794, "grad_norm": 3.7218706607818604, "learning_rate": 9.981741176097084e-06, "loss": 1.0621, "step": 1418 }, { "epoch": 0.3797163500133797, "grad_norm": 4.026986122131348, "learning_rate": 9.981665538222201e-06, "loss": 1.1626, "step": 1419 }, { "epoch": 0.37998394434037996, "grad_norm": 4.299746513366699, "learning_rate": 9.98158974429235e-06, "loss": 1.2972, "step": 1420 }, { "epoch": 0.38025153866738026, "grad_norm": 3.5890026092529297, "learning_rate": 9.981513794309905e-06, "loss": 1.1985, "step": 1421 }, { "epoch": 0.3805191329943805, "grad_norm": 3.7244949340820312, "learning_rate": 9.981437688277248e-06, "loss": 1.0564, "step": 1422 }, { "epoch": 0.3807867273213808, "grad_norm": 4.018440246582031, "learning_rate": 9.981361426196763e-06, "loss": 1.1399, "step": 1423 }, { "epoch": 0.38105432164838104, "grad_norm": 4.051723003387451, "learning_rate": 9.981285008070836e-06, "loss": 1.3185, "step": 1424 }, { "epoch": 0.38132191597538134, "grad_norm": 3.7371041774749756, "learning_rate": 9.981208433901864e-06, "loss": 1.1645, "step": 1425 }, { "epoch": 0.3815895103023816, "grad_norm": 3.600698709487915, "learning_rate": 9.981131703692241e-06, "loss": 1.1944, "step": 1426 }, { "epoch": 0.3818571046293819, "grad_norm": 3.748783826828003, "learning_rate": 9.981054817444378e-06, "loss": 1.1702, "step": 1427 }, { "epoch": 0.3821246989563821, "grad_norm": 4.829683303833008, "learning_rate": 9.980977775160676e-06, "loss": 1.1126, "step": 1428 }, { "epoch": 0.38239229328338237, "grad_norm": 3.9870803356170654, "learning_rate": 9.980900576843555e-06, "loss": 1.1593, "step": 1429 }, { "epoch": 0.38265988761038267, "grad_norm": 4.103924751281738, "learning_rate": 9.980823222495429e-06, "loss": 1.3044, "step": 1430 }, { "epoch": 0.3829274819373829, "grad_norm": 3.9906015396118164, "learning_rate": 9.980745712118722e-06, "loss": 1.1956, "step": 1431 }, { "epoch": 0.3831950762643832, "grad_norm": 3.7663869857788086, "learning_rate": 9.980668045715864e-06, "loss": 1.1876, "step": 1432 }, { "epoch": 0.38346267059138345, "grad_norm": 3.752241373062134, "learning_rate": 9.980590223289284e-06, "loss": 1.2941, "step": 1433 }, { "epoch": 0.38373026491838375, "grad_norm": 3.90246844291687, "learning_rate": 9.980512244841424e-06, "loss": 1.1741, "step": 1434 }, { "epoch": 0.383997859245384, "grad_norm": 3.9306254386901855, "learning_rate": 9.980434110374725e-06, "loss": 1.2318, "step": 1435 }, { "epoch": 0.3842654535723843, "grad_norm": 3.9048655033111572, "learning_rate": 9.980355819891634e-06, "loss": 1.1667, "step": 1436 }, { "epoch": 0.38453304789938453, "grad_norm": 4.364803314208984, "learning_rate": 9.980277373394604e-06, "loss": 1.3952, "step": 1437 }, { "epoch": 0.3848006422263848, "grad_norm": 4.139352798461914, "learning_rate": 9.980198770886094e-06, "loss": 1.1513, "step": 1438 }, { "epoch": 0.38506823655338507, "grad_norm": 4.017728805541992, "learning_rate": 9.980120012368564e-06, "loss": 1.3192, "step": 1439 }, { "epoch": 0.3853358308803853, "grad_norm": 4.370884418487549, "learning_rate": 9.980041097844482e-06, "loss": 1.2861, "step": 1440 }, { "epoch": 0.3856034252073856, "grad_norm": 3.9100890159606934, "learning_rate": 9.979962027316322e-06, "loss": 1.1261, "step": 1441 }, { "epoch": 0.38587101953438585, "grad_norm": 4.041008949279785, "learning_rate": 9.979882800786556e-06, "loss": 1.223, "step": 1442 }, { "epoch": 0.38613861386138615, "grad_norm": 3.6667256355285645, "learning_rate": 9.97980341825767e-06, "loss": 1.217, "step": 1443 }, { "epoch": 0.3864062081883864, "grad_norm": 3.8240602016448975, "learning_rate": 9.979723879732151e-06, "loss": 1.098, "step": 1444 }, { "epoch": 0.3866738025153867, "grad_norm": 3.704866647720337, "learning_rate": 9.979644185212489e-06, "loss": 1.0308, "step": 1445 }, { "epoch": 0.38694139684238693, "grad_norm": 3.873335123062134, "learning_rate": 9.97956433470118e-06, "loss": 1.1319, "step": 1446 }, { "epoch": 0.38720899116938723, "grad_norm": 3.9278151988983154, "learning_rate": 9.979484328200726e-06, "loss": 1.1702, "step": 1447 }, { "epoch": 0.38747658549638747, "grad_norm": 4.0510077476501465, "learning_rate": 9.979404165713633e-06, "loss": 1.2193, "step": 1448 }, { "epoch": 0.38774417982338777, "grad_norm": 3.8461694717407227, "learning_rate": 9.979323847242414e-06, "loss": 1.2015, "step": 1449 }, { "epoch": 0.388011774150388, "grad_norm": 3.905766010284424, "learning_rate": 9.979243372789583e-06, "loss": 1.1764, "step": 1450 }, { "epoch": 0.38827936847738825, "grad_norm": 3.528315305709839, "learning_rate": 9.979162742357661e-06, "loss": 1.1254, "step": 1451 }, { "epoch": 0.38854696280438855, "grad_norm": 3.523634433746338, "learning_rate": 9.979081955949176e-06, "loss": 1.1075, "step": 1452 }, { "epoch": 0.3888145571313888, "grad_norm": 3.9340994358062744, "learning_rate": 9.979001013566656e-06, "loss": 1.1919, "step": 1453 }, { "epoch": 0.3890821514583891, "grad_norm": 3.8037092685699463, "learning_rate": 9.978919915212637e-06, "loss": 1.2163, "step": 1454 }, { "epoch": 0.38934974578538933, "grad_norm": 4.22614049911499, "learning_rate": 9.978838660889662e-06, "loss": 1.3132, "step": 1455 }, { "epoch": 0.38961734011238963, "grad_norm": 3.9353504180908203, "learning_rate": 9.978757250600273e-06, "loss": 1.0995, "step": 1456 }, { "epoch": 0.3898849344393899, "grad_norm": 3.8454160690307617, "learning_rate": 9.978675684347022e-06, "loss": 1.1743, "step": 1457 }, { "epoch": 0.39015252876639017, "grad_norm": 3.7511603832244873, "learning_rate": 9.978593962132464e-06, "loss": 1.0399, "step": 1458 }, { "epoch": 0.3904201230933904, "grad_norm": 3.736814022064209, "learning_rate": 9.97851208395916e-06, "loss": 1.1429, "step": 1459 }, { "epoch": 0.3906877174203907, "grad_norm": 4.143425464630127, "learning_rate": 9.978430049829672e-06, "loss": 1.2349, "step": 1460 }, { "epoch": 0.39095531174739095, "grad_norm": 3.73406720161438, "learning_rate": 9.978347859746572e-06, "loss": 1.1704, "step": 1461 }, { "epoch": 0.3912229060743912, "grad_norm": 3.936199903488159, "learning_rate": 9.978265513712435e-06, "loss": 1.0558, "step": 1462 }, { "epoch": 0.3914905004013915, "grad_norm": 3.9773452281951904, "learning_rate": 9.97818301172984e-06, "loss": 1.0348, "step": 1463 }, { "epoch": 0.39175809472839174, "grad_norm": 4.385336875915527, "learning_rate": 9.97810035380137e-06, "loss": 1.3109, "step": 1464 }, { "epoch": 0.39202568905539203, "grad_norm": 3.7281599044799805, "learning_rate": 9.978017539929617e-06, "loss": 1.0922, "step": 1465 }, { "epoch": 0.3922932833823923, "grad_norm": 3.8303089141845703, "learning_rate": 9.977934570117173e-06, "loss": 1.2507, "step": 1466 }, { "epoch": 0.3925608777093926, "grad_norm": 4.046004295349121, "learning_rate": 9.97785144436664e-06, "loss": 1.2282, "step": 1467 }, { "epoch": 0.3928284720363928, "grad_norm": 3.8748390674591064, "learning_rate": 9.977768162680616e-06, "loss": 1.2089, "step": 1468 }, { "epoch": 0.3930960663633931, "grad_norm": 3.917059898376465, "learning_rate": 9.977684725061716e-06, "loss": 1.3255, "step": 1469 }, { "epoch": 0.39336366069039336, "grad_norm": 3.7279913425445557, "learning_rate": 9.977601131512553e-06, "loss": 1.326, "step": 1470 }, { "epoch": 0.39363125501739366, "grad_norm": 3.7477800846099854, "learning_rate": 9.977517382035743e-06, "loss": 1.066, "step": 1471 }, { "epoch": 0.3938988493443939, "grad_norm": 3.699467420578003, "learning_rate": 9.97743347663391e-06, "loss": 1.1433, "step": 1472 }, { "epoch": 0.39416644367139414, "grad_norm": 4.19885778427124, "learning_rate": 9.977349415309682e-06, "loss": 1.2562, "step": 1473 }, { "epoch": 0.39443403799839444, "grad_norm": 4.173817157745361, "learning_rate": 9.977265198065696e-06, "loss": 1.3787, "step": 1474 }, { "epoch": 0.3947016323253947, "grad_norm": 3.8569087982177734, "learning_rate": 9.977180824904586e-06, "loss": 1.1537, "step": 1475 }, { "epoch": 0.394969226652395, "grad_norm": 3.5903382301330566, "learning_rate": 9.977096295828998e-06, "loss": 1.082, "step": 1476 }, { "epoch": 0.3952368209793952, "grad_norm": 4.043254375457764, "learning_rate": 9.977011610841579e-06, "loss": 1.2256, "step": 1477 }, { "epoch": 0.3955044153063955, "grad_norm": 3.817080020904541, "learning_rate": 9.97692676994498e-06, "loss": 1.162, "step": 1478 }, { "epoch": 0.39577200963339576, "grad_norm": 3.870049238204956, "learning_rate": 9.976841773141862e-06, "loss": 1.2824, "step": 1479 }, { "epoch": 0.39603960396039606, "grad_norm": 3.870774984359741, "learning_rate": 9.976756620434882e-06, "loss": 1.0247, "step": 1480 }, { "epoch": 0.3963071982873963, "grad_norm": 4.375112056732178, "learning_rate": 9.976671311826714e-06, "loss": 1.2235, "step": 1481 }, { "epoch": 0.3965747926143966, "grad_norm": 3.724886894226074, "learning_rate": 9.976585847320028e-06, "loss": 1.1431, "step": 1482 }, { "epoch": 0.39684238694139684, "grad_norm": 3.800994873046875, "learning_rate": 9.9765002269175e-06, "loss": 1.0945, "step": 1483 }, { "epoch": 0.3971099812683971, "grad_norm": 4.027763843536377, "learning_rate": 9.976414450621812e-06, "loss": 1.0553, "step": 1484 }, { "epoch": 0.3973775755953974, "grad_norm": 3.982628345489502, "learning_rate": 9.976328518435654e-06, "loss": 1.2203, "step": 1485 }, { "epoch": 0.3976451699223976, "grad_norm": 3.9969089031219482, "learning_rate": 9.976242430361714e-06, "loss": 1.2567, "step": 1486 }, { "epoch": 0.3979127642493979, "grad_norm": 3.8449573516845703, "learning_rate": 9.976156186402691e-06, "loss": 1.2397, "step": 1487 }, { "epoch": 0.39818035857639816, "grad_norm": 4.029581069946289, "learning_rate": 9.976069786561286e-06, "loss": 1.2279, "step": 1488 }, { "epoch": 0.39844795290339846, "grad_norm": 3.9746222496032715, "learning_rate": 9.975983230840208e-06, "loss": 1.3537, "step": 1489 }, { "epoch": 0.3987155472303987, "grad_norm": 3.8316519260406494, "learning_rate": 9.975896519242165e-06, "loss": 1.103, "step": 1490 }, { "epoch": 0.398983141557399, "grad_norm": 3.714109182357788, "learning_rate": 9.975809651769874e-06, "loss": 1.0821, "step": 1491 }, { "epoch": 0.39925073588439924, "grad_norm": 3.9708900451660156, "learning_rate": 9.97572262842606e-06, "loss": 1.1406, "step": 1492 }, { "epoch": 0.39951833021139954, "grad_norm": 3.9574031829833984, "learning_rate": 9.975635449213443e-06, "loss": 1.2042, "step": 1493 }, { "epoch": 0.3997859245383998, "grad_norm": 3.648296594619751, "learning_rate": 9.975548114134756e-06, "loss": 1.1087, "step": 1494 }, { "epoch": 0.4000535188654, "grad_norm": 3.9714877605438232, "learning_rate": 9.975460623192738e-06, "loss": 1.3016, "step": 1495 }, { "epoch": 0.4003211131924003, "grad_norm": 4.270079612731934, "learning_rate": 9.975372976390126e-06, "loss": 1.2158, "step": 1496 }, { "epoch": 0.40058870751940057, "grad_norm": 3.275508165359497, "learning_rate": 9.975285173729668e-06, "loss": 1.0631, "step": 1497 }, { "epoch": 0.40085630184640086, "grad_norm": 3.839301824569702, "learning_rate": 9.975197215214113e-06, "loss": 1.2388, "step": 1498 }, { "epoch": 0.4011238961734011, "grad_norm": 3.7420620918273926, "learning_rate": 9.975109100846216e-06, "loss": 1.1396, "step": 1499 }, { "epoch": 0.4013914905004014, "grad_norm": 3.785066604614258, "learning_rate": 9.975020830628741e-06, "loss": 1.2032, "step": 1500 }, { "epoch": 0.4013914905004014, "eval_loss": 1.2037502527236938, "eval_runtime": 11.6819, "eval_samples_per_second": 34.241, "eval_steps_per_second": 4.28, "step": 1500 }, { "epoch": 0.40165908482740165, "grad_norm": 3.9376888275146484, "learning_rate": 9.974932404564448e-06, "loss": 1.1378, "step": 1501 }, { "epoch": 0.40192667915440194, "grad_norm": 4.073405742645264, "learning_rate": 9.97484382265611e-06, "loss": 1.1687, "step": 1502 }, { "epoch": 0.4021942734814022, "grad_norm": 3.9197230339050293, "learning_rate": 9.974755084906503e-06, "loss": 1.164, "step": 1503 }, { "epoch": 0.4024618678084025, "grad_norm": 4.129556655883789, "learning_rate": 9.974666191318402e-06, "loss": 1.2084, "step": 1504 }, { "epoch": 0.4027294621354027, "grad_norm": 4.0111799240112305, "learning_rate": 9.974577141894597e-06, "loss": 1.2002, "step": 1505 }, { "epoch": 0.40299705646240297, "grad_norm": 4.588496685028076, "learning_rate": 9.974487936637873e-06, "loss": 1.115, "step": 1506 }, { "epoch": 0.40326465078940327, "grad_norm": 3.992095708847046, "learning_rate": 9.974398575551029e-06, "loss": 1.2977, "step": 1507 }, { "epoch": 0.4035322451164035, "grad_norm": 4.14756965637207, "learning_rate": 9.97430905863686e-06, "loss": 1.2135, "step": 1508 }, { "epoch": 0.4037998394434038, "grad_norm": 3.6382899284362793, "learning_rate": 9.974219385898174e-06, "loss": 1.1663, "step": 1509 }, { "epoch": 0.40406743377040405, "grad_norm": 3.954108953475952, "learning_rate": 9.974129557337777e-06, "loss": 1.2709, "step": 1510 }, { "epoch": 0.40433502809740435, "grad_norm": 3.5056028366088867, "learning_rate": 9.974039572958486e-06, "loss": 1.0011, "step": 1511 }, { "epoch": 0.4046026224244046, "grad_norm": 3.9158694744110107, "learning_rate": 9.973949432763117e-06, "loss": 1.2319, "step": 1512 }, { "epoch": 0.4048702167514049, "grad_norm": 3.6687309741973877, "learning_rate": 9.973859136754495e-06, "loss": 1.1885, "step": 1513 }, { "epoch": 0.40513781107840513, "grad_norm": 4.025513172149658, "learning_rate": 9.973768684935448e-06, "loss": 1.1389, "step": 1514 }, { "epoch": 0.40540540540540543, "grad_norm": 3.8120996952056885, "learning_rate": 9.973678077308811e-06, "loss": 1.1946, "step": 1515 }, { "epoch": 0.40567299973240567, "grad_norm": 3.88718318939209, "learning_rate": 9.97358731387742e-06, "loss": 1.2046, "step": 1516 }, { "epoch": 0.40594059405940597, "grad_norm": 4.027118682861328, "learning_rate": 9.97349639464412e-06, "loss": 1.2053, "step": 1517 }, { "epoch": 0.4062081883864062, "grad_norm": 3.675534963607788, "learning_rate": 9.973405319611757e-06, "loss": 1.1274, "step": 1518 }, { "epoch": 0.40647578271340645, "grad_norm": 3.914788007736206, "learning_rate": 9.973314088783188e-06, "loss": 1.2117, "step": 1519 }, { "epoch": 0.40674337704040675, "grad_norm": 3.8196732997894287, "learning_rate": 9.973222702161267e-06, "loss": 1.1037, "step": 1520 }, { "epoch": 0.407010971367407, "grad_norm": 3.492936611175537, "learning_rate": 9.97313115974886e-06, "loss": 1.1087, "step": 1521 }, { "epoch": 0.4072785656944073, "grad_norm": 4.102333068847656, "learning_rate": 9.97303946154883e-06, "loss": 1.2887, "step": 1522 }, { "epoch": 0.40754616002140753, "grad_norm": 3.951390027999878, "learning_rate": 9.972947607564056e-06, "loss": 1.2433, "step": 1523 }, { "epoch": 0.40781375434840783, "grad_norm": 3.7122180461883545, "learning_rate": 9.972855597797408e-06, "loss": 1.0165, "step": 1524 }, { "epoch": 0.4080813486754081, "grad_norm": 3.8031108379364014, "learning_rate": 9.972763432251775e-06, "loss": 1.1836, "step": 1525 }, { "epoch": 0.40834894300240837, "grad_norm": 3.916783571243286, "learning_rate": 9.972671110930041e-06, "loss": 1.2287, "step": 1526 }, { "epoch": 0.4086165373294086, "grad_norm": 4.449172019958496, "learning_rate": 9.972578633835096e-06, "loss": 1.212, "step": 1527 }, { "epoch": 0.4088841316564089, "grad_norm": 3.9276909828186035, "learning_rate": 9.972486000969842e-06, "loss": 1.2655, "step": 1528 }, { "epoch": 0.40915172598340915, "grad_norm": 4.05131196975708, "learning_rate": 9.972393212337178e-06, "loss": 1.2497, "step": 1529 }, { "epoch": 0.4094193203104094, "grad_norm": 3.8336915969848633, "learning_rate": 9.972300267940009e-06, "loss": 1.2201, "step": 1530 }, { "epoch": 0.4096869146374097, "grad_norm": 3.7255733013153076, "learning_rate": 9.97220716778125e-06, "loss": 1.2857, "step": 1531 }, { "epoch": 0.40995450896440994, "grad_norm": 3.4183714389801025, "learning_rate": 9.972113911863815e-06, "loss": 1.0868, "step": 1532 }, { "epoch": 0.41022210329141023, "grad_norm": 3.8064022064208984, "learning_rate": 9.972020500190626e-06, "loss": 1.2251, "step": 1533 }, { "epoch": 0.4104896976184105, "grad_norm": 4.1284565925598145, "learning_rate": 9.971926932764609e-06, "loss": 1.246, "step": 1534 }, { "epoch": 0.4107572919454108, "grad_norm": 4.064891338348389, "learning_rate": 9.971833209588696e-06, "loss": 1.1329, "step": 1535 }, { "epoch": 0.411024886272411, "grad_norm": 3.891404151916504, "learning_rate": 9.971739330665821e-06, "loss": 1.2359, "step": 1536 }, { "epoch": 0.4112924805994113, "grad_norm": 3.7822113037109375, "learning_rate": 9.971645295998929e-06, "loss": 1.243, "step": 1537 }, { "epoch": 0.41156007492641156, "grad_norm": 3.785557746887207, "learning_rate": 9.97155110559096e-06, "loss": 1.2446, "step": 1538 }, { "epoch": 0.41182766925341185, "grad_norm": 3.562366008758545, "learning_rate": 9.971456759444869e-06, "loss": 1.1905, "step": 1539 }, { "epoch": 0.4120952635804121, "grad_norm": 3.9148495197296143, "learning_rate": 9.971362257563609e-06, "loss": 1.2355, "step": 1540 }, { "epoch": 0.41236285790741234, "grad_norm": 4.079963684082031, "learning_rate": 9.971267599950142e-06, "loss": 1.2154, "step": 1541 }, { "epoch": 0.41263045223441264, "grad_norm": 4.090665340423584, "learning_rate": 9.971172786607433e-06, "loss": 1.0667, "step": 1542 }, { "epoch": 0.4128980465614129, "grad_norm": 3.9606683254241943, "learning_rate": 9.97107781753845e-06, "loss": 1.1426, "step": 1543 }, { "epoch": 0.4131656408884132, "grad_norm": 3.7528765201568604, "learning_rate": 9.970982692746171e-06, "loss": 1.2156, "step": 1544 }, { "epoch": 0.4134332352154134, "grad_norm": 3.5512781143188477, "learning_rate": 9.970887412233574e-06, "loss": 1.1394, "step": 1545 }, { "epoch": 0.4137008295424137, "grad_norm": 3.8421177864074707, "learning_rate": 9.970791976003644e-06, "loss": 1.11, "step": 1546 }, { "epoch": 0.41396842386941396, "grad_norm": 3.389683961868286, "learning_rate": 9.97069638405937e-06, "loss": 1.2226, "step": 1547 }, { "epoch": 0.41423601819641426, "grad_norm": 4.224984645843506, "learning_rate": 9.97060063640375e-06, "loss": 1.1678, "step": 1548 }, { "epoch": 0.4145036125234145, "grad_norm": 3.8544325828552246, "learning_rate": 9.970504733039778e-06, "loss": 1.1398, "step": 1549 }, { "epoch": 0.4147712068504148, "grad_norm": 3.8598458766937256, "learning_rate": 9.970408673970464e-06, "loss": 1.1928, "step": 1550 }, { "epoch": 0.41503880117741504, "grad_norm": 3.5871057510375977, "learning_rate": 9.970312459198812e-06, "loss": 1.0778, "step": 1551 }, { "epoch": 0.4153063955044153, "grad_norm": 3.948990821838379, "learning_rate": 9.970216088727838e-06, "loss": 1.2415, "step": 1552 }, { "epoch": 0.4155739898314156, "grad_norm": 3.909735679626465, "learning_rate": 9.970119562560562e-06, "loss": 1.1809, "step": 1553 }, { "epoch": 0.4158415841584158, "grad_norm": 3.529320240020752, "learning_rate": 9.970022880700006e-06, "loss": 1.1612, "step": 1554 }, { "epoch": 0.4161091784854161, "grad_norm": 3.7973170280456543, "learning_rate": 9.9699260431492e-06, "loss": 1.1705, "step": 1555 }, { "epoch": 0.41637677281241636, "grad_norm": 3.872828483581543, "learning_rate": 9.969829049911178e-06, "loss": 1.1882, "step": 1556 }, { "epoch": 0.41664436713941666, "grad_norm": 3.9552464485168457, "learning_rate": 9.969731900988975e-06, "loss": 1.1696, "step": 1557 }, { "epoch": 0.4169119614664169, "grad_norm": 3.7764220237731934, "learning_rate": 9.969634596385637e-06, "loss": 1.2108, "step": 1558 }, { "epoch": 0.4171795557934172, "grad_norm": 3.64782452583313, "learning_rate": 9.969537136104213e-06, "loss": 1.2066, "step": 1559 }, { "epoch": 0.41744715012041744, "grad_norm": 3.8925864696502686, "learning_rate": 9.969439520147754e-06, "loss": 1.1694, "step": 1560 }, { "epoch": 0.41771474444741774, "grad_norm": 3.523120880126953, "learning_rate": 9.969341748519319e-06, "loss": 1.1243, "step": 1561 }, { "epoch": 0.417982338774418, "grad_norm": 3.78109073638916, "learning_rate": 9.969243821221972e-06, "loss": 1.0945, "step": 1562 }, { "epoch": 0.4182499331014182, "grad_norm": 3.5155014991760254, "learning_rate": 9.969145738258776e-06, "loss": 1.0598, "step": 1563 }, { "epoch": 0.4185175274284185, "grad_norm": 4.167483806610107, "learning_rate": 9.969047499632808e-06, "loss": 1.2385, "step": 1564 }, { "epoch": 0.41878512175541877, "grad_norm": 3.761597156524658, "learning_rate": 9.968949105347146e-06, "loss": 1.2115, "step": 1565 }, { "epoch": 0.41905271608241906, "grad_norm": 3.6628715991973877, "learning_rate": 9.968850555404867e-06, "loss": 1.084, "step": 1566 }, { "epoch": 0.4193203104094193, "grad_norm": 3.8627333641052246, "learning_rate": 9.968751849809063e-06, "loss": 1.2907, "step": 1567 }, { "epoch": 0.4195879047364196, "grad_norm": 4.0308518409729, "learning_rate": 9.968652988562826e-06, "loss": 1.2336, "step": 1568 }, { "epoch": 0.41985549906341985, "grad_norm": 3.72426438331604, "learning_rate": 9.96855397166925e-06, "loss": 1.2231, "step": 1569 }, { "epoch": 0.42012309339042014, "grad_norm": 3.9212002754211426, "learning_rate": 9.968454799131439e-06, "loss": 1.1774, "step": 1570 }, { "epoch": 0.4203906877174204, "grad_norm": 3.7344274520874023, "learning_rate": 9.968355470952498e-06, "loss": 1.1205, "step": 1571 }, { "epoch": 0.4206582820444207, "grad_norm": 3.6700868606567383, "learning_rate": 9.96825598713554e-06, "loss": 1.201, "step": 1572 }, { "epoch": 0.4209258763714209, "grad_norm": 3.989650249481201, "learning_rate": 9.968156347683682e-06, "loss": 1.2025, "step": 1573 }, { "epoch": 0.42119347069842117, "grad_norm": 3.851297616958618, "learning_rate": 9.968056552600043e-06, "loss": 1.2169, "step": 1574 }, { "epoch": 0.42146106502542147, "grad_norm": 3.8957512378692627, "learning_rate": 9.967956601887751e-06, "loss": 1.3342, "step": 1575 }, { "epoch": 0.4217286593524217, "grad_norm": 3.9603443145751953, "learning_rate": 9.967856495549935e-06, "loss": 1.2206, "step": 1576 }, { "epoch": 0.421996253679422, "grad_norm": 3.6075241565704346, "learning_rate": 9.967756233589734e-06, "loss": 1.1752, "step": 1577 }, { "epoch": 0.42226384800642225, "grad_norm": 3.690418243408203, "learning_rate": 9.967655816010287e-06, "loss": 1.2098, "step": 1578 }, { "epoch": 0.42253144233342255, "grad_norm": 3.748853921890259, "learning_rate": 9.967555242814738e-06, "loss": 1.1701, "step": 1579 }, { "epoch": 0.4227990366604228, "grad_norm": 3.668382406234741, "learning_rate": 9.96745451400624e-06, "loss": 1.1536, "step": 1580 }, { "epoch": 0.4230666309874231, "grad_norm": 4.043965816497803, "learning_rate": 9.967353629587948e-06, "loss": 1.1062, "step": 1581 }, { "epoch": 0.42333422531442333, "grad_norm": 3.860582113265991, "learning_rate": 9.967252589563023e-06, "loss": 1.202, "step": 1582 }, { "epoch": 0.4236018196414236, "grad_norm": 3.919570207595825, "learning_rate": 9.967151393934628e-06, "loss": 1.0513, "step": 1583 }, { "epoch": 0.42386941396842387, "grad_norm": 4.271496772766113, "learning_rate": 9.967050042705934e-06, "loss": 1.198, "step": 1584 }, { "epoch": 0.4241370082954241, "grad_norm": 3.9681901931762695, "learning_rate": 9.966948535880118e-06, "loss": 1.0558, "step": 1585 }, { "epoch": 0.4244046026224244, "grad_norm": 3.7213032245635986, "learning_rate": 9.966846873460357e-06, "loss": 1.1218, "step": 1586 }, { "epoch": 0.42467219694942465, "grad_norm": 3.8692944049835205, "learning_rate": 9.966745055449835e-06, "loss": 1.1898, "step": 1587 }, { "epoch": 0.42493979127642495, "grad_norm": 3.475710868835449, "learning_rate": 9.966643081851746e-06, "loss": 1.0925, "step": 1588 }, { "epoch": 0.4252073856034252, "grad_norm": 3.985151767730713, "learning_rate": 9.966540952669279e-06, "loss": 1.1674, "step": 1589 }, { "epoch": 0.4254749799304255, "grad_norm": 3.913224935531616, "learning_rate": 9.966438667905637e-06, "loss": 1.2583, "step": 1590 }, { "epoch": 0.42574257425742573, "grad_norm": 4.151821613311768, "learning_rate": 9.966336227564022e-06, "loss": 1.1612, "step": 1591 }, { "epoch": 0.42601016858442603, "grad_norm": 4.1624603271484375, "learning_rate": 9.966233631647646e-06, "loss": 1.2323, "step": 1592 }, { "epoch": 0.4262777629114263, "grad_norm": 3.706627368927002, "learning_rate": 9.96613088015972e-06, "loss": 1.1456, "step": 1593 }, { "epoch": 0.42654535723842657, "grad_norm": 3.398106336593628, "learning_rate": 9.966027973103462e-06, "loss": 1.0205, "step": 1594 }, { "epoch": 0.4268129515654268, "grad_norm": 3.6161367893218994, "learning_rate": 9.9659249104821e-06, "loss": 1.1139, "step": 1595 }, { "epoch": 0.42708054589242705, "grad_norm": 3.886651039123535, "learning_rate": 9.965821692298858e-06, "loss": 1.1711, "step": 1596 }, { "epoch": 0.42734814021942735, "grad_norm": 4.018932342529297, "learning_rate": 9.965718318556971e-06, "loss": 1.3092, "step": 1597 }, { "epoch": 0.4276157345464276, "grad_norm": 3.6425957679748535, "learning_rate": 9.96561478925968e-06, "loss": 1.1201, "step": 1598 }, { "epoch": 0.4278833288734279, "grad_norm": 4.067368030548096, "learning_rate": 9.965511104410224e-06, "loss": 1.2909, "step": 1599 }, { "epoch": 0.42815092320042814, "grad_norm": 3.597480297088623, "learning_rate": 9.965407264011852e-06, "loss": 1.264, "step": 1600 }, { "epoch": 0.42841851752742843, "grad_norm": 3.685746669769287, "learning_rate": 9.965303268067819e-06, "loss": 1.2146, "step": 1601 }, { "epoch": 0.4286861118544287, "grad_norm": 3.8286211490631104, "learning_rate": 9.965199116581381e-06, "loss": 1.1627, "step": 1602 }, { "epoch": 0.428953706181429, "grad_norm": 3.950927495956421, "learning_rate": 9.9650948095558e-06, "loss": 1.1935, "step": 1603 }, { "epoch": 0.4292213005084292, "grad_norm": 3.7244269847869873, "learning_rate": 9.964990346994346e-06, "loss": 1.1994, "step": 1604 }, { "epoch": 0.4294888948354295, "grad_norm": 3.8138673305511475, "learning_rate": 9.96488572890029e-06, "loss": 1.0713, "step": 1605 }, { "epoch": 0.42975648916242976, "grad_norm": 3.689394474029541, "learning_rate": 9.964780955276909e-06, "loss": 1.1475, "step": 1606 }, { "epoch": 0.43002408348943, "grad_norm": 3.922783851623535, "learning_rate": 9.964676026127484e-06, "loss": 1.2439, "step": 1607 }, { "epoch": 0.4302916778164303, "grad_norm": 4.035757541656494, "learning_rate": 9.964570941455304e-06, "loss": 1.2235, "step": 1608 }, { "epoch": 0.43055927214343054, "grad_norm": 3.4453186988830566, "learning_rate": 9.96446570126366e-06, "loss": 1.0857, "step": 1609 }, { "epoch": 0.43082686647043084, "grad_norm": 3.796252727508545, "learning_rate": 9.96436030555585e-06, "loss": 1.3118, "step": 1610 }, { "epoch": 0.4310944607974311, "grad_norm": 3.9162800312042236, "learning_rate": 9.964254754335172e-06, "loss": 1.2191, "step": 1611 }, { "epoch": 0.4313620551244314, "grad_norm": 3.400801658630371, "learning_rate": 9.964149047604936e-06, "loss": 1.1256, "step": 1612 }, { "epoch": 0.4316296494514316, "grad_norm": 3.6633102893829346, "learning_rate": 9.964043185368453e-06, "loss": 1.0817, "step": 1613 }, { "epoch": 0.4318972437784319, "grad_norm": 3.536027193069458, "learning_rate": 9.963937167629039e-06, "loss": 1.0987, "step": 1614 }, { "epoch": 0.43216483810543216, "grad_norm": 3.694162368774414, "learning_rate": 9.963830994390014e-06, "loss": 1.2215, "step": 1615 }, { "epoch": 0.43243243243243246, "grad_norm": 3.681429862976074, "learning_rate": 9.963724665654704e-06, "loss": 1.2003, "step": 1616 }, { "epoch": 0.4327000267594327, "grad_norm": 3.9966611862182617, "learning_rate": 9.963618181426443e-06, "loss": 1.1236, "step": 1617 }, { "epoch": 0.43296762108643294, "grad_norm": 3.875614643096924, "learning_rate": 9.96351154170856e-06, "loss": 1.1395, "step": 1618 }, { "epoch": 0.43323521541343324, "grad_norm": 3.63798451423645, "learning_rate": 9.963404746504403e-06, "loss": 1.1578, "step": 1619 }, { "epoch": 0.4335028097404335, "grad_norm": 3.4750473499298096, "learning_rate": 9.963297795817312e-06, "loss": 1.1385, "step": 1620 }, { "epoch": 0.4337704040674338, "grad_norm": 3.4065208435058594, "learning_rate": 9.963190689650642e-06, "loss": 0.9694, "step": 1621 }, { "epoch": 0.434037998394434, "grad_norm": 3.5707483291625977, "learning_rate": 9.963083428007744e-06, "loss": 1.1541, "step": 1622 }, { "epoch": 0.4343055927214343, "grad_norm": 4.226485252380371, "learning_rate": 9.96297601089198e-06, "loss": 1.2521, "step": 1623 }, { "epoch": 0.43457318704843456, "grad_norm": 3.8039398193359375, "learning_rate": 9.962868438306714e-06, "loss": 1.1212, "step": 1624 }, { "epoch": 0.43484078137543486, "grad_norm": 3.8403160572052, "learning_rate": 9.962760710255317e-06, "loss": 1.1157, "step": 1625 }, { "epoch": 0.4351083757024351, "grad_norm": 3.634899139404297, "learning_rate": 9.962652826741164e-06, "loss": 1.1387, "step": 1626 }, { "epoch": 0.4353759700294354, "grad_norm": 3.621347188949585, "learning_rate": 9.962544787767634e-06, "loss": 1.1823, "step": 1627 }, { "epoch": 0.43564356435643564, "grad_norm": 4.029088973999023, "learning_rate": 9.962436593338109e-06, "loss": 1.0639, "step": 1628 }, { "epoch": 0.4359111586834359, "grad_norm": 3.6505720615386963, "learning_rate": 9.962328243455983e-06, "loss": 1.1589, "step": 1629 }, { "epoch": 0.4361787530104362, "grad_norm": 3.7965028285980225, "learning_rate": 9.962219738124645e-06, "loss": 1.1716, "step": 1630 }, { "epoch": 0.4364463473374364, "grad_norm": 3.631714105606079, "learning_rate": 9.962111077347499e-06, "loss": 1.1798, "step": 1631 }, { "epoch": 0.4367139416644367, "grad_norm": 3.4737563133239746, "learning_rate": 9.962002261127946e-06, "loss": 1.224, "step": 1632 }, { "epoch": 0.43698153599143696, "grad_norm": 4.074963092803955, "learning_rate": 9.961893289469394e-06, "loss": 1.2144, "step": 1633 }, { "epoch": 0.43724913031843726, "grad_norm": 3.729600429534912, "learning_rate": 9.961784162375258e-06, "loss": 1.2326, "step": 1634 }, { "epoch": 0.4375167246454375, "grad_norm": 3.4806137084960938, "learning_rate": 9.961674879848957e-06, "loss": 0.9848, "step": 1635 }, { "epoch": 0.4377843189724378, "grad_norm": 3.5938944816589355, "learning_rate": 9.961565441893914e-06, "loss": 1.0944, "step": 1636 }, { "epoch": 0.43805191329943804, "grad_norm": 3.7552433013916016, "learning_rate": 9.961455848513557e-06, "loss": 1.1817, "step": 1637 }, { "epoch": 0.43831950762643834, "grad_norm": 3.6959292888641357, "learning_rate": 9.961346099711319e-06, "loss": 1.1635, "step": 1638 }, { "epoch": 0.4385871019534386, "grad_norm": 4.031107425689697, "learning_rate": 9.961236195490638e-06, "loss": 1.3005, "step": 1639 }, { "epoch": 0.4388546962804388, "grad_norm": 3.8287787437438965, "learning_rate": 9.961126135854957e-06, "loss": 1.0702, "step": 1640 }, { "epoch": 0.4391222906074391, "grad_norm": 3.573241949081421, "learning_rate": 9.961015920807722e-06, "loss": 1.0902, "step": 1641 }, { "epoch": 0.43938988493443937, "grad_norm": 3.861870050430298, "learning_rate": 9.96090555035239e-06, "loss": 1.1281, "step": 1642 }, { "epoch": 0.43965747926143967, "grad_norm": 3.52722430229187, "learning_rate": 9.960795024492413e-06, "loss": 1.0831, "step": 1643 }, { "epoch": 0.4399250735884399, "grad_norm": 3.898618221282959, "learning_rate": 9.960684343231258e-06, "loss": 1.1533, "step": 1644 }, { "epoch": 0.4401926679154402, "grad_norm": 3.61409854888916, "learning_rate": 9.960573506572391e-06, "loss": 1.103, "step": 1645 }, { "epoch": 0.44046026224244045, "grad_norm": 3.5603694915771484, "learning_rate": 9.96046251451928e-06, "loss": 1.0903, "step": 1646 }, { "epoch": 0.44072785656944075, "grad_norm": 3.9450957775115967, "learning_rate": 9.960351367075407e-06, "loss": 1.316, "step": 1647 }, { "epoch": 0.440995450896441, "grad_norm": 3.022848606109619, "learning_rate": 9.960240064244253e-06, "loss": 0.9156, "step": 1648 }, { "epoch": 0.4412630452234413, "grad_norm": 3.8123509883880615, "learning_rate": 9.960128606029302e-06, "loss": 1.2591, "step": 1649 }, { "epoch": 0.44153063955044153, "grad_norm": 3.739405870437622, "learning_rate": 9.960016992434047e-06, "loss": 1.1892, "step": 1650 }, { "epoch": 0.44179823387744177, "grad_norm": 4.091071605682373, "learning_rate": 9.959905223461985e-06, "loss": 1.2138, "step": 1651 }, { "epoch": 0.44206582820444207, "grad_norm": 3.7963550090789795, "learning_rate": 9.959793299116617e-06, "loss": 1.1637, "step": 1652 }, { "epoch": 0.4423334225314423, "grad_norm": 3.7637977600097656, "learning_rate": 9.959681219401449e-06, "loss": 1.1168, "step": 1653 }, { "epoch": 0.4426010168584426, "grad_norm": 3.74827241897583, "learning_rate": 9.959568984319991e-06, "loss": 1.1661, "step": 1654 }, { "epoch": 0.44286861118544285, "grad_norm": 3.401951551437378, "learning_rate": 9.95945659387576e-06, "loss": 1.1605, "step": 1655 }, { "epoch": 0.44313620551244315, "grad_norm": 3.677436590194702, "learning_rate": 9.959344048072278e-06, "loss": 1.2423, "step": 1656 }, { "epoch": 0.4434037998394434, "grad_norm": 4.045171737670898, "learning_rate": 9.959231346913068e-06, "loss": 1.2646, "step": 1657 }, { "epoch": 0.4436713941664437, "grad_norm": 4.872179985046387, "learning_rate": 9.95911849040166e-06, "loss": 1.2235, "step": 1658 }, { "epoch": 0.44393898849344393, "grad_norm": 3.714308023452759, "learning_rate": 9.959005478541592e-06, "loss": 1.074, "step": 1659 }, { "epoch": 0.44420658282044423, "grad_norm": 3.844395160675049, "learning_rate": 9.958892311336404e-06, "loss": 1.1599, "step": 1660 }, { "epoch": 0.44447417714744447, "grad_norm": 3.9484751224517822, "learning_rate": 9.958778988789639e-06, "loss": 1.088, "step": 1661 }, { "epoch": 0.44474177147444477, "grad_norm": 4.029232501983643, "learning_rate": 9.958665510904849e-06, "loss": 1.2989, "step": 1662 }, { "epoch": 0.445009365801445, "grad_norm": 4.975223064422607, "learning_rate": 9.958551877685586e-06, "loss": 1.104, "step": 1663 }, { "epoch": 0.44527696012844525, "grad_norm": 3.774137496948242, "learning_rate": 9.958438089135413e-06, "loss": 1.0331, "step": 1664 }, { "epoch": 0.44554455445544555, "grad_norm": 3.653740644454956, "learning_rate": 9.958324145257893e-06, "loss": 1.1029, "step": 1665 }, { "epoch": 0.4458121487824458, "grad_norm": 4.142993450164795, "learning_rate": 9.958210046056596e-06, "loss": 1.2692, "step": 1666 }, { "epoch": 0.4460797431094461, "grad_norm": 4.0442609786987305, "learning_rate": 9.958095791535095e-06, "loss": 1.1862, "step": 1667 }, { "epoch": 0.44634733743644633, "grad_norm": 3.8392021656036377, "learning_rate": 9.957981381696971e-06, "loss": 1.2256, "step": 1668 }, { "epoch": 0.44661493176344663, "grad_norm": 3.4839348793029785, "learning_rate": 9.957866816545804e-06, "loss": 1.1202, "step": 1669 }, { "epoch": 0.4468825260904469, "grad_norm": 3.9099719524383545, "learning_rate": 9.957752096085187e-06, "loss": 1.1985, "step": 1670 }, { "epoch": 0.44715012041744717, "grad_norm": 3.7614877223968506, "learning_rate": 9.957637220318711e-06, "loss": 1.2736, "step": 1671 }, { "epoch": 0.4474177147444474, "grad_norm": 4.555272579193115, "learning_rate": 9.957522189249979e-06, "loss": 1.1661, "step": 1672 }, { "epoch": 0.4476853090714477, "grad_norm": 3.6910009384155273, "learning_rate": 9.95740700288259e-06, "loss": 1.3053, "step": 1673 }, { "epoch": 0.44795290339844795, "grad_norm": 3.7056405544281006, "learning_rate": 9.957291661220154e-06, "loss": 1.1668, "step": 1674 }, { "epoch": 0.4482204977254482, "grad_norm": 3.738818645477295, "learning_rate": 9.957176164266283e-06, "loss": 1.2925, "step": 1675 }, { "epoch": 0.4484880920524485, "grad_norm": 3.5437395572662354, "learning_rate": 9.957060512024595e-06, "loss": 1.1237, "step": 1676 }, { "epoch": 0.44875568637944874, "grad_norm": 3.508234977722168, "learning_rate": 9.956944704498715e-06, "loss": 1.1684, "step": 1677 }, { "epoch": 0.44902328070644904, "grad_norm": 3.910888433456421, "learning_rate": 9.95682874169227e-06, "loss": 1.1898, "step": 1678 }, { "epoch": 0.4492908750334493, "grad_norm": 3.8787474632263184, "learning_rate": 9.956712623608892e-06, "loss": 1.1573, "step": 1679 }, { "epoch": 0.4495584693604496, "grad_norm": 3.672773838043213, "learning_rate": 9.95659635025222e-06, "loss": 1.042, "step": 1680 }, { "epoch": 0.4498260636874498, "grad_norm": 3.811082601547241, "learning_rate": 9.956479921625892e-06, "loss": 1.2272, "step": 1681 }, { "epoch": 0.4500936580144501, "grad_norm": 4.042838096618652, "learning_rate": 9.95636333773356e-06, "loss": 1.2527, "step": 1682 }, { "epoch": 0.45036125234145036, "grad_norm": 3.418757915496826, "learning_rate": 9.956246598578874e-06, "loss": 1.1833, "step": 1683 }, { "epoch": 0.45062884666845066, "grad_norm": 3.8703603744506836, "learning_rate": 9.956129704165491e-06, "loss": 1.2848, "step": 1684 }, { "epoch": 0.4508964409954509, "grad_norm": 3.5439083576202393, "learning_rate": 9.956012654497073e-06, "loss": 1.0018, "step": 1685 }, { "epoch": 0.45116403532245114, "grad_norm": 3.858811616897583, "learning_rate": 9.955895449577289e-06, "loss": 1.204, "step": 1686 }, { "epoch": 0.45143162964945144, "grad_norm": 3.636284351348877, "learning_rate": 9.955778089409806e-06, "loss": 1.1952, "step": 1687 }, { "epoch": 0.4516992239764517, "grad_norm": 3.6916327476501465, "learning_rate": 9.955660573998305e-06, "loss": 1.1277, "step": 1688 }, { "epoch": 0.451966818303452, "grad_norm": 3.550443172454834, "learning_rate": 9.955542903346462e-06, "loss": 1.1669, "step": 1689 }, { "epoch": 0.4522344126304522, "grad_norm": 3.5514254570007324, "learning_rate": 9.95542507745797e-06, "loss": 1.2038, "step": 1690 }, { "epoch": 0.4525020069574525, "grad_norm": 3.6606192588806152, "learning_rate": 9.955307096336513e-06, "loss": 1.0902, "step": 1691 }, { "epoch": 0.45276960128445276, "grad_norm": 3.676407814025879, "learning_rate": 9.955188959985792e-06, "loss": 1.1543, "step": 1692 }, { "epoch": 0.45303719561145306, "grad_norm": 3.630408763885498, "learning_rate": 9.955070668409505e-06, "loss": 1.1552, "step": 1693 }, { "epoch": 0.4533047899384533, "grad_norm": 3.992326259613037, "learning_rate": 9.954952221611359e-06, "loss": 1.2438, "step": 1694 }, { "epoch": 0.4535723842654536, "grad_norm": 3.313997507095337, "learning_rate": 9.954833619595062e-06, "loss": 1.1001, "step": 1695 }, { "epoch": 0.45383997859245384, "grad_norm": 3.4902310371398926, "learning_rate": 9.954714862364331e-06, "loss": 1.0505, "step": 1696 }, { "epoch": 0.4541075729194541, "grad_norm": 3.9076476097106934, "learning_rate": 9.954595949922889e-06, "loss": 1.3215, "step": 1697 }, { "epoch": 0.4543751672464544, "grad_norm": 4.445606708526611, "learning_rate": 9.954476882274458e-06, "loss": 1.2867, "step": 1698 }, { "epoch": 0.4546427615734546, "grad_norm": 4.114322662353516, "learning_rate": 9.954357659422766e-06, "loss": 1.2867, "step": 1699 }, { "epoch": 0.4549103559004549, "grad_norm": 4.046489238739014, "learning_rate": 9.95423828137155e-06, "loss": 1.3018, "step": 1700 }, { "epoch": 0.45517795022745516, "grad_norm": 4.090691089630127, "learning_rate": 9.954118748124552e-06, "loss": 1.1618, "step": 1701 }, { "epoch": 0.45544554455445546, "grad_norm": 3.791952133178711, "learning_rate": 9.953999059685513e-06, "loss": 1.2585, "step": 1702 }, { "epoch": 0.4557131388814557, "grad_norm": 3.9405517578125, "learning_rate": 9.953879216058185e-06, "loss": 1.2347, "step": 1703 }, { "epoch": 0.455980733208456, "grad_norm": 3.291191816329956, "learning_rate": 9.953759217246318e-06, "loss": 1.1054, "step": 1704 }, { "epoch": 0.45624832753545624, "grad_norm": 3.942545175552368, "learning_rate": 9.953639063253675e-06, "loss": 1.1939, "step": 1705 }, { "epoch": 0.45651592186245654, "grad_norm": 3.7756292819976807, "learning_rate": 9.953518754084019e-06, "loss": 1.248, "step": 1706 }, { "epoch": 0.4567835161894568, "grad_norm": 3.8124051094055176, "learning_rate": 9.953398289741116e-06, "loss": 1.2876, "step": 1707 }, { "epoch": 0.457051110516457, "grad_norm": 3.8491921424865723, "learning_rate": 9.953277670228745e-06, "loss": 1.1995, "step": 1708 }, { "epoch": 0.4573187048434573, "grad_norm": 3.814667224884033, "learning_rate": 9.95315689555068e-06, "loss": 1.0789, "step": 1709 }, { "epoch": 0.45758629917045757, "grad_norm": 3.2791123390197754, "learning_rate": 9.953035965710707e-06, "loss": 1.0521, "step": 1710 }, { "epoch": 0.45785389349745786, "grad_norm": 3.559877395629883, "learning_rate": 9.952914880712611e-06, "loss": 1.0571, "step": 1711 }, { "epoch": 0.4581214878244581, "grad_norm": 3.3893074989318848, "learning_rate": 9.952793640560189e-06, "loss": 1.0054, "step": 1712 }, { "epoch": 0.4583890821514584, "grad_norm": 3.697608232498169, "learning_rate": 9.952672245257238e-06, "loss": 1.1619, "step": 1713 }, { "epoch": 0.45865667647845865, "grad_norm": 3.5724332332611084, "learning_rate": 9.95255069480756e-06, "loss": 1.1086, "step": 1714 }, { "epoch": 0.45892427080545894, "grad_norm": 4.316122055053711, "learning_rate": 9.952428989214962e-06, "loss": 1.3456, "step": 1715 }, { "epoch": 0.4591918651324592, "grad_norm": 3.5813887119293213, "learning_rate": 9.952307128483257e-06, "loss": 1.2169, "step": 1716 }, { "epoch": 0.4594594594594595, "grad_norm": 4.544564723968506, "learning_rate": 9.952185112616263e-06, "loss": 1.3719, "step": 1717 }, { "epoch": 0.4597270537864597, "grad_norm": 3.653928518295288, "learning_rate": 9.952062941617801e-06, "loss": 1.1425, "step": 1718 }, { "epoch": 0.45999464811345997, "grad_norm": 3.9661028385162354, "learning_rate": 9.9519406154917e-06, "loss": 1.2444, "step": 1719 }, { "epoch": 0.46026224244046027, "grad_norm": 3.9497625827789307, "learning_rate": 9.95181813424179e-06, "loss": 1.1364, "step": 1720 }, { "epoch": 0.4605298367674605, "grad_norm": 3.682626962661743, "learning_rate": 9.95169549787191e-06, "loss": 1.0874, "step": 1721 }, { "epoch": 0.4607974310944608, "grad_norm": 4.0337233543396, "learning_rate": 9.951572706385901e-06, "loss": 1.2206, "step": 1722 }, { "epoch": 0.46106502542146105, "grad_norm": 3.902106761932373, "learning_rate": 9.951449759787608e-06, "loss": 1.2841, "step": 1723 }, { "epoch": 0.46133261974846135, "grad_norm": 3.6979806423187256, "learning_rate": 9.951326658080881e-06, "loss": 1.1272, "step": 1724 }, { "epoch": 0.4616002140754616, "grad_norm": 3.5009329319000244, "learning_rate": 9.951203401269582e-06, "loss": 1.0615, "step": 1725 }, { "epoch": 0.4618678084024619, "grad_norm": 3.846033811569214, "learning_rate": 9.951079989357569e-06, "loss": 1.267, "step": 1726 }, { "epoch": 0.46213540272946213, "grad_norm": 3.7044687271118164, "learning_rate": 9.950956422348708e-06, "loss": 1.228, "step": 1727 }, { "epoch": 0.46240299705646243, "grad_norm": 3.8444931507110596, "learning_rate": 9.950832700246868e-06, "loss": 1.1271, "step": 1728 }, { "epoch": 0.46267059138346267, "grad_norm": 3.61540150642395, "learning_rate": 9.950708823055926e-06, "loss": 1.1431, "step": 1729 }, { "epoch": 0.4629381857104629, "grad_norm": 3.617910623550415, "learning_rate": 9.950584790779765e-06, "loss": 1.1046, "step": 1730 }, { "epoch": 0.4632057800374632, "grad_norm": 4.220783233642578, "learning_rate": 9.950460603422266e-06, "loss": 1.2734, "step": 1731 }, { "epoch": 0.46347337436446345, "grad_norm": 3.5768558979034424, "learning_rate": 9.950336260987323e-06, "loss": 1.1693, "step": 1732 }, { "epoch": 0.46374096869146375, "grad_norm": 3.8983094692230225, "learning_rate": 9.950211763478829e-06, "loss": 1.1813, "step": 1733 }, { "epoch": 0.464008563018464, "grad_norm": 3.659959316253662, "learning_rate": 9.950087110900686e-06, "loss": 1.204, "step": 1734 }, { "epoch": 0.4642761573454643, "grad_norm": 3.40910005569458, "learning_rate": 9.949962303256796e-06, "loss": 1.1507, "step": 1735 }, { "epoch": 0.46454375167246453, "grad_norm": 3.770167112350464, "learning_rate": 9.949837340551072e-06, "loss": 1.121, "step": 1736 }, { "epoch": 0.46481134599946483, "grad_norm": 3.7433769702911377, "learning_rate": 9.949712222787426e-06, "loss": 1.1993, "step": 1737 }, { "epoch": 0.4650789403264651, "grad_norm": 3.9215848445892334, "learning_rate": 9.94958694996978e-06, "loss": 1.211, "step": 1738 }, { "epoch": 0.46534653465346537, "grad_norm": 3.646552324295044, "learning_rate": 9.949461522102056e-06, "loss": 1.1609, "step": 1739 }, { "epoch": 0.4656141289804656, "grad_norm": 3.452594757080078, "learning_rate": 9.949335939188181e-06, "loss": 1.0887, "step": 1740 }, { "epoch": 0.46588172330746586, "grad_norm": 3.6262283325195312, "learning_rate": 9.949210201232094e-06, "loss": 1.2381, "step": 1741 }, { "epoch": 0.46614931763446615, "grad_norm": 3.7934439182281494, "learning_rate": 9.949084308237731e-06, "loss": 1.2578, "step": 1742 }, { "epoch": 0.4664169119614664, "grad_norm": 4.024771690368652, "learning_rate": 9.948958260209036e-06, "loss": 1.2287, "step": 1743 }, { "epoch": 0.4666845062884667, "grad_norm": 4.1637115478515625, "learning_rate": 9.948832057149958e-06, "loss": 1.4259, "step": 1744 }, { "epoch": 0.46695210061546694, "grad_norm": 3.3593788146972656, "learning_rate": 9.948705699064452e-06, "loss": 1.0906, "step": 1745 }, { "epoch": 0.46721969494246723, "grad_norm": 4.606101989746094, "learning_rate": 9.948579185956472e-06, "loss": 1.3554, "step": 1746 }, { "epoch": 0.4674872892694675, "grad_norm": 4.054565906524658, "learning_rate": 9.948452517829984e-06, "loss": 1.316, "step": 1747 }, { "epoch": 0.4677548835964678, "grad_norm": 3.526566982269287, "learning_rate": 9.948325694688957e-06, "loss": 1.1971, "step": 1748 }, { "epoch": 0.468022477923468, "grad_norm": 3.647592544555664, "learning_rate": 9.948198716537361e-06, "loss": 1.1751, "step": 1749 }, { "epoch": 0.4682900722504683, "grad_norm": 4.159237384796143, "learning_rate": 9.948071583379176e-06, "loss": 1.1648, "step": 1750 }, { "epoch": 0.46855766657746856, "grad_norm": 3.9114818572998047, "learning_rate": 9.947944295218384e-06, "loss": 1.2213, "step": 1751 }, { "epoch": 0.4688252609044688, "grad_norm": 3.539741039276123, "learning_rate": 9.947816852058972e-06, "loss": 1.1406, "step": 1752 }, { "epoch": 0.4690928552314691, "grad_norm": 3.950688600540161, "learning_rate": 9.947689253904932e-06, "loss": 1.1622, "step": 1753 }, { "epoch": 0.46936044955846934, "grad_norm": 3.5240933895111084, "learning_rate": 9.94756150076026e-06, "loss": 1.1184, "step": 1754 }, { "epoch": 0.46962804388546964, "grad_norm": 3.455580472946167, "learning_rate": 9.947433592628964e-06, "loss": 1.1482, "step": 1755 }, { "epoch": 0.4698956382124699, "grad_norm": 3.8551056385040283, "learning_rate": 9.947305529515041e-06, "loss": 1.2818, "step": 1756 }, { "epoch": 0.4701632325394702, "grad_norm": 3.4578497409820557, "learning_rate": 9.947177311422513e-06, "loss": 1.1184, "step": 1757 }, { "epoch": 0.4704308268664704, "grad_norm": 3.092772960662842, "learning_rate": 9.947048938355389e-06, "loss": 1.1181, "step": 1758 }, { "epoch": 0.4706984211934707, "grad_norm": 3.7363767623901367, "learning_rate": 9.946920410317694e-06, "loss": 1.1509, "step": 1759 }, { "epoch": 0.47096601552047096, "grad_norm": 3.5836987495422363, "learning_rate": 9.946791727313453e-06, "loss": 1.2198, "step": 1760 }, { "epoch": 0.47123360984747126, "grad_norm": 3.2804808616638184, "learning_rate": 9.946662889346693e-06, "loss": 1.2418, "step": 1761 }, { "epoch": 0.4715012041744715, "grad_norm": 3.3668692111968994, "learning_rate": 9.94653389642146e-06, "loss": 1.0194, "step": 1762 }, { "epoch": 0.47176879850147174, "grad_norm": 3.753690481185913, "learning_rate": 9.946404748541787e-06, "loss": 1.194, "step": 1763 }, { "epoch": 0.47203639282847204, "grad_norm": 3.6186954975128174, "learning_rate": 9.946275445711722e-06, "loss": 1.2537, "step": 1764 }, { "epoch": 0.4723039871554723, "grad_norm": 3.8216750621795654, "learning_rate": 9.946145987935315e-06, "loss": 1.0967, "step": 1765 }, { "epoch": 0.4725715814824726, "grad_norm": 3.866121530532837, "learning_rate": 9.946016375216624e-06, "loss": 1.1481, "step": 1766 }, { "epoch": 0.4728391758094728, "grad_norm": 3.7349894046783447, "learning_rate": 9.945886607559703e-06, "loss": 1.1937, "step": 1767 }, { "epoch": 0.4731067701364731, "grad_norm": 3.244333505630493, "learning_rate": 9.945756684968624e-06, "loss": 1.062, "step": 1768 }, { "epoch": 0.47337436446347336, "grad_norm": 3.341917037963867, "learning_rate": 9.945626607447452e-06, "loss": 1.1912, "step": 1769 }, { "epoch": 0.47364195879047366, "grad_norm": 3.748084545135498, "learning_rate": 9.945496375000265e-06, "loss": 1.1499, "step": 1770 }, { "epoch": 0.4739095531174739, "grad_norm": 4.144589900970459, "learning_rate": 9.94536598763114e-06, "loss": 1.3394, "step": 1771 }, { "epoch": 0.4741771474444742, "grad_norm": 3.9339520931243896, "learning_rate": 9.945235445344164e-06, "loss": 1.319, "step": 1772 }, { "epoch": 0.47444474177147444, "grad_norm": 3.8457014560699463, "learning_rate": 9.945104748143426e-06, "loss": 1.1954, "step": 1773 }, { "epoch": 0.4747123360984747, "grad_norm": 3.6537280082702637, "learning_rate": 9.944973896033017e-06, "loss": 1.1368, "step": 1774 }, { "epoch": 0.474979930425475, "grad_norm": 3.5252084732055664, "learning_rate": 9.944842889017042e-06, "loss": 1.1575, "step": 1775 }, { "epoch": 0.4752475247524752, "grad_norm": 3.692296266555786, "learning_rate": 9.944711727099597e-06, "loss": 1.1172, "step": 1776 }, { "epoch": 0.4755151190794755, "grad_norm": 3.7176449298858643, "learning_rate": 9.944580410284799e-06, "loss": 1.3006, "step": 1777 }, { "epoch": 0.47578271340647577, "grad_norm": 3.9314935207366943, "learning_rate": 9.944448938576755e-06, "loss": 1.1859, "step": 1778 }, { "epoch": 0.47605030773347606, "grad_norm": 3.7619473934173584, "learning_rate": 9.944317311979587e-06, "loss": 1.273, "step": 1779 }, { "epoch": 0.4763179020604763, "grad_norm": 3.6345512866973877, "learning_rate": 9.944185530497419e-06, "loss": 1.1356, "step": 1780 }, { "epoch": 0.4765854963874766, "grad_norm": 4.064966201782227, "learning_rate": 9.944053594134374e-06, "loss": 1.3057, "step": 1781 }, { "epoch": 0.47685309071447685, "grad_norm": 3.3477392196655273, "learning_rate": 9.943921502894593e-06, "loss": 1.1066, "step": 1782 }, { "epoch": 0.47712068504147714, "grad_norm": 3.5054497718811035, "learning_rate": 9.943789256782208e-06, "loss": 1.1497, "step": 1783 }, { "epoch": 0.4773882793684774, "grad_norm": 3.596972703933716, "learning_rate": 9.943656855801364e-06, "loss": 1.195, "step": 1784 }, { "epoch": 0.47765587369547763, "grad_norm": 3.870314598083496, "learning_rate": 9.943524299956206e-06, "loss": 1.1513, "step": 1785 }, { "epoch": 0.4779234680224779, "grad_norm": 3.6923234462738037, "learning_rate": 9.94339158925089e-06, "loss": 1.2705, "step": 1786 }, { "epoch": 0.47819106234947817, "grad_norm": 3.6829121112823486, "learning_rate": 9.94325872368957e-06, "loss": 1.1401, "step": 1787 }, { "epoch": 0.47845865667647847, "grad_norm": 3.6094305515289307, "learning_rate": 9.943125703276411e-06, "loss": 1.2009, "step": 1788 }, { "epoch": 0.4787262510034787, "grad_norm": 3.806605339050293, "learning_rate": 9.94299252801558e-06, "loss": 1.1255, "step": 1789 }, { "epoch": 0.478993845330479, "grad_norm": 3.4413986206054688, "learning_rate": 9.942859197911246e-06, "loss": 1.099, "step": 1790 }, { "epoch": 0.47926143965747925, "grad_norm": 3.7464005947113037, "learning_rate": 9.942725712967587e-06, "loss": 1.1829, "step": 1791 }, { "epoch": 0.47952903398447955, "grad_norm": 4.125034809112549, "learning_rate": 9.942592073188783e-06, "loss": 1.3325, "step": 1792 }, { "epoch": 0.4797966283114798, "grad_norm": 3.741257429122925, "learning_rate": 9.942458278579026e-06, "loss": 1.1842, "step": 1793 }, { "epoch": 0.4800642226384801, "grad_norm": 3.944084405899048, "learning_rate": 9.9423243291425e-06, "loss": 1.3479, "step": 1794 }, { "epoch": 0.48033181696548033, "grad_norm": 3.8254520893096924, "learning_rate": 9.942190224883406e-06, "loss": 1.204, "step": 1795 }, { "epoch": 0.48059941129248057, "grad_norm": 3.4253695011138916, "learning_rate": 9.942055965805943e-06, "loss": 1.0251, "step": 1796 }, { "epoch": 0.48086700561948087, "grad_norm": 3.6683967113494873, "learning_rate": 9.941921551914318e-06, "loss": 1.1936, "step": 1797 }, { "epoch": 0.4811345999464811, "grad_norm": 3.3697001934051514, "learning_rate": 9.94178698321274e-06, "loss": 1.0839, "step": 1798 }, { "epoch": 0.4814021942734814, "grad_norm": 3.724254846572876, "learning_rate": 9.941652259705425e-06, "loss": 1.2582, "step": 1799 }, { "epoch": 0.48166978860048165, "grad_norm": 3.8191325664520264, "learning_rate": 9.941517381396594e-06, "loss": 1.1972, "step": 1800 }, { "epoch": 0.48193738292748195, "grad_norm": 3.812429904937744, "learning_rate": 9.941382348290471e-06, "loss": 1.1348, "step": 1801 }, { "epoch": 0.4822049772544822, "grad_norm": 3.5466363430023193, "learning_rate": 9.941247160391288e-06, "loss": 1.2157, "step": 1802 }, { "epoch": 0.4824725715814825, "grad_norm": 3.9619693756103516, "learning_rate": 9.94111181770328e-06, "loss": 1.2637, "step": 1803 }, { "epoch": 0.48274016590848273, "grad_norm": 3.724550485610962, "learning_rate": 9.940976320230682e-06, "loss": 1.1937, "step": 1804 }, { "epoch": 0.48300776023548303, "grad_norm": 3.91395902633667, "learning_rate": 9.940840667977745e-06, "loss": 1.1857, "step": 1805 }, { "epoch": 0.4832753545624833, "grad_norm": 3.2114241123199463, "learning_rate": 9.940704860948713e-06, "loss": 1.0522, "step": 1806 }, { "epoch": 0.48354294888948357, "grad_norm": 3.380030870437622, "learning_rate": 9.940568899147844e-06, "loss": 1.1107, "step": 1807 }, { "epoch": 0.4838105432164838, "grad_norm": 3.728666067123413, "learning_rate": 9.940432782579395e-06, "loss": 1.2433, "step": 1808 }, { "epoch": 0.48407813754348405, "grad_norm": 3.6429684162139893, "learning_rate": 9.940296511247631e-06, "loss": 1.0235, "step": 1809 }, { "epoch": 0.48434573187048435, "grad_norm": 3.6570608615875244, "learning_rate": 9.94016008515682e-06, "loss": 1.1839, "step": 1810 }, { "epoch": 0.4846133261974846, "grad_norm": 3.6205060482025146, "learning_rate": 9.940023504311237e-06, "loss": 1.0398, "step": 1811 }, { "epoch": 0.4848809205244849, "grad_norm": 3.8173046112060547, "learning_rate": 9.93988676871516e-06, "loss": 1.1421, "step": 1812 }, { "epoch": 0.48514851485148514, "grad_norm": 3.792316198348999, "learning_rate": 9.939749878372873e-06, "loss": 1.2172, "step": 1813 }, { "epoch": 0.48541610917848543, "grad_norm": 3.5528106689453125, "learning_rate": 9.939612833288662e-06, "loss": 1.0964, "step": 1814 }, { "epoch": 0.4856837035054857, "grad_norm": 3.7796194553375244, "learning_rate": 9.939475633466822e-06, "loss": 1.1715, "step": 1815 }, { "epoch": 0.485951297832486, "grad_norm": 4.104870319366455, "learning_rate": 9.93933827891165e-06, "loss": 1.321, "step": 1816 }, { "epoch": 0.4862188921594862, "grad_norm": 3.6252858638763428, "learning_rate": 9.93920076962745e-06, "loss": 1.1834, "step": 1817 }, { "epoch": 0.4864864864864865, "grad_norm": 3.8419394493103027, "learning_rate": 9.939063105618525e-06, "loss": 1.0172, "step": 1818 }, { "epoch": 0.48675408081348676, "grad_norm": 3.7632715702056885, "learning_rate": 9.938925286889194e-06, "loss": 1.1501, "step": 1819 }, { "epoch": 0.487021675140487, "grad_norm": 3.928379535675049, "learning_rate": 9.938787313443771e-06, "loss": 1.2283, "step": 1820 }, { "epoch": 0.4872892694674873, "grad_norm": 3.341074228286743, "learning_rate": 9.93864918528658e-06, "loss": 1.038, "step": 1821 }, { "epoch": 0.48755686379448754, "grad_norm": 3.8273613452911377, "learning_rate": 9.938510902421945e-06, "loss": 1.2315, "step": 1822 }, { "epoch": 0.48782445812148784, "grad_norm": 3.6578738689422607, "learning_rate": 9.938372464854198e-06, "loss": 1.1331, "step": 1823 }, { "epoch": 0.4880920524484881, "grad_norm": 3.7590830326080322, "learning_rate": 9.93823387258768e-06, "loss": 1.0829, "step": 1824 }, { "epoch": 0.4883596467754884, "grad_norm": 3.6043503284454346, "learning_rate": 9.938095125626726e-06, "loss": 1.0529, "step": 1825 }, { "epoch": 0.4886272411024886, "grad_norm": 3.854071617126465, "learning_rate": 9.93795622397569e-06, "loss": 1.2383, "step": 1826 }, { "epoch": 0.4888948354294889, "grad_norm": 3.758488416671753, "learning_rate": 9.937817167638914e-06, "loss": 1.0957, "step": 1827 }, { "epoch": 0.48916242975648916, "grad_norm": 3.695533514022827, "learning_rate": 9.937677956620764e-06, "loss": 1.3151, "step": 1828 }, { "epoch": 0.48943002408348946, "grad_norm": 3.5443248748779297, "learning_rate": 9.937538590925593e-06, "loss": 1.0494, "step": 1829 }, { "epoch": 0.4896976184104897, "grad_norm": 3.6536788940429688, "learning_rate": 9.937399070557771e-06, "loss": 1.218, "step": 1830 }, { "epoch": 0.48996521273748994, "grad_norm": 3.929737091064453, "learning_rate": 9.937259395521667e-06, "loss": 1.1923, "step": 1831 }, { "epoch": 0.49023280706449024, "grad_norm": 3.3342623710632324, "learning_rate": 9.937119565821658e-06, "loss": 1.1186, "step": 1832 }, { "epoch": 0.4905004013914905, "grad_norm": 3.6802546977996826, "learning_rate": 9.936979581462122e-06, "loss": 1.2171, "step": 1833 }, { "epoch": 0.4907679957184908, "grad_norm": 3.4526920318603516, "learning_rate": 9.936839442447446e-06, "loss": 1.0922, "step": 1834 }, { "epoch": 0.491035590045491, "grad_norm": 3.7504050731658936, "learning_rate": 9.936699148782018e-06, "loss": 1.0743, "step": 1835 }, { "epoch": 0.4913031843724913, "grad_norm": 3.873074531555176, "learning_rate": 9.936558700470234e-06, "loss": 1.2213, "step": 1836 }, { "epoch": 0.49157077869949156, "grad_norm": 3.6571013927459717, "learning_rate": 9.936418097516495e-06, "loss": 1.123, "step": 1837 }, { "epoch": 0.49183837302649186, "grad_norm": 3.5951497554779053, "learning_rate": 9.936277339925205e-06, "loss": 1.1968, "step": 1838 }, { "epoch": 0.4921059673534921, "grad_norm": 3.9791698455810547, "learning_rate": 9.93613642770077e-06, "loss": 1.1248, "step": 1839 }, { "epoch": 0.4923735616804924, "grad_norm": 4.2198100090026855, "learning_rate": 9.935995360847608e-06, "loss": 1.1946, "step": 1840 }, { "epoch": 0.49264115600749264, "grad_norm": 3.915623188018799, "learning_rate": 9.935854139370139e-06, "loss": 1.1836, "step": 1841 }, { "epoch": 0.4929087503344929, "grad_norm": 3.8059470653533936, "learning_rate": 9.93571276327278e-06, "loss": 1.2146, "step": 1842 }, { "epoch": 0.4931763446614932, "grad_norm": 4.118159770965576, "learning_rate": 9.93557123255997e-06, "loss": 1.1451, "step": 1843 }, { "epoch": 0.4934439389884934, "grad_norm": 4.63586950302124, "learning_rate": 9.935429547236131e-06, "loss": 1.4108, "step": 1844 }, { "epoch": 0.4937115333154937, "grad_norm": 3.541332244873047, "learning_rate": 9.935287707305712e-06, "loss": 1.0874, "step": 1845 }, { "epoch": 0.49397912764249396, "grad_norm": 3.4757399559020996, "learning_rate": 9.93514571277315e-06, "loss": 1.1712, "step": 1846 }, { "epoch": 0.49424672196949426, "grad_norm": 3.835604190826416, "learning_rate": 9.935003563642895e-06, "loss": 1.1442, "step": 1847 }, { "epoch": 0.4945143162964945, "grad_norm": 3.701040029525757, "learning_rate": 9.934861259919399e-06, "loss": 1.0242, "step": 1848 }, { "epoch": 0.4947819106234948, "grad_norm": 3.7247939109802246, "learning_rate": 9.934718801607122e-06, "loss": 1.2422, "step": 1849 }, { "epoch": 0.49504950495049505, "grad_norm": 4.011390209197998, "learning_rate": 9.934576188710524e-06, "loss": 1.2711, "step": 1850 }, { "epoch": 0.49531709927749534, "grad_norm": 3.3684377670288086, "learning_rate": 9.934433421234073e-06, "loss": 1.1051, "step": 1851 }, { "epoch": 0.4955846936044956, "grad_norm": 3.550625801086426, "learning_rate": 9.934290499182244e-06, "loss": 1.2236, "step": 1852 }, { "epoch": 0.4958522879314958, "grad_norm": 4.006191253662109, "learning_rate": 9.93414742255951e-06, "loss": 1.2605, "step": 1853 }, { "epoch": 0.4961198822584961, "grad_norm": 3.7999353408813477, "learning_rate": 9.934004191370356e-06, "loss": 1.2019, "step": 1854 }, { "epoch": 0.49638747658549637, "grad_norm": 3.6491141319274902, "learning_rate": 9.933860805619269e-06, "loss": 1.1939, "step": 1855 }, { "epoch": 0.49665507091249667, "grad_norm": 3.60182785987854, "learning_rate": 9.933717265310739e-06, "loss": 1.185, "step": 1856 }, { "epoch": 0.4969226652394969, "grad_norm": 3.517396926879883, "learning_rate": 9.933573570449262e-06, "loss": 1.0801, "step": 1857 }, { "epoch": 0.4971902595664972, "grad_norm": 3.847062349319458, "learning_rate": 9.93342972103934e-06, "loss": 1.1699, "step": 1858 }, { "epoch": 0.49745785389349745, "grad_norm": 3.5466854572296143, "learning_rate": 9.933285717085482e-06, "loss": 1.088, "step": 1859 }, { "epoch": 0.49772544822049775, "grad_norm": 4.013504981994629, "learning_rate": 9.933141558592196e-06, "loss": 1.2217, "step": 1860 }, { "epoch": 0.497993042547498, "grad_norm": 4.0954155921936035, "learning_rate": 9.932997245563997e-06, "loss": 1.231, "step": 1861 }, { "epoch": 0.4982606368744983, "grad_norm": 3.723498821258545, "learning_rate": 9.93285277800541e-06, "loss": 1.1645, "step": 1862 }, { "epoch": 0.49852823120149853, "grad_norm": 3.436872720718384, "learning_rate": 9.932708155920957e-06, "loss": 1.1673, "step": 1863 }, { "epoch": 0.49879582552849877, "grad_norm": 3.8395087718963623, "learning_rate": 9.932563379315168e-06, "loss": 1.2485, "step": 1864 }, { "epoch": 0.49906341985549907, "grad_norm": 3.937257766723633, "learning_rate": 9.93241844819258e-06, "loss": 1.2447, "step": 1865 }, { "epoch": 0.4993310141824993, "grad_norm": 3.5979080200195312, "learning_rate": 9.932273362557734e-06, "loss": 1.153, "step": 1866 }, { "epoch": 0.4995986085094996, "grad_norm": 3.8511085510253906, "learning_rate": 9.932128122415173e-06, "loss": 1.1053, "step": 1867 }, { "epoch": 0.49986620283649985, "grad_norm": 4.010068893432617, "learning_rate": 9.931982727769448e-06, "loss": 1.155, "step": 1868 }, { "epoch": 0.5001337971635001, "grad_norm": 3.749917507171631, "learning_rate": 9.931837178625111e-06, "loss": 1.1328, "step": 1869 }, { "epoch": 0.5004013914905004, "grad_norm": 3.668951988220215, "learning_rate": 9.931691474986726e-06, "loss": 1.0613, "step": 1870 }, { "epoch": 0.5006689858175006, "grad_norm": 3.563898801803589, "learning_rate": 9.931545616858853e-06, "loss": 1.1231, "step": 1871 }, { "epoch": 0.5009365801445009, "grad_norm": 3.758409023284912, "learning_rate": 9.931399604246064e-06, "loss": 1.2123, "step": 1872 }, { "epoch": 0.5012041744715012, "grad_norm": 3.4294962882995605, "learning_rate": 9.93125343715293e-06, "loss": 1.0552, "step": 1873 }, { "epoch": 0.5014717687985015, "grad_norm": 3.464952230453491, "learning_rate": 9.931107115584034e-06, "loss": 1.1708, "step": 1874 }, { "epoch": 0.5017393631255017, "grad_norm": 3.9118897914886475, "learning_rate": 9.930960639543956e-06, "loss": 1.1202, "step": 1875 }, { "epoch": 0.502006957452502, "grad_norm": 3.2876811027526855, "learning_rate": 9.930814009037286e-06, "loss": 1.0269, "step": 1876 }, { "epoch": 0.5022745517795023, "grad_norm": 3.571906566619873, "learning_rate": 9.930667224068618e-06, "loss": 1.1515, "step": 1877 }, { "epoch": 0.5025421461065025, "grad_norm": 3.775341510772705, "learning_rate": 9.930520284642548e-06, "loss": 1.1708, "step": 1878 }, { "epoch": 0.5028097404335028, "grad_norm": 3.8571135997772217, "learning_rate": 9.93037319076368e-06, "loss": 1.1639, "step": 1879 }, { "epoch": 0.5030773347605031, "grad_norm": 3.8149497509002686, "learning_rate": 9.930225942436623e-06, "loss": 1.2267, "step": 1880 }, { "epoch": 0.5033449290875034, "grad_norm": 3.51364803314209, "learning_rate": 9.930078539665988e-06, "loss": 1.1698, "step": 1881 }, { "epoch": 0.5036125234145036, "grad_norm": 3.4830048084259033, "learning_rate": 9.929930982456395e-06, "loss": 1.1729, "step": 1882 }, { "epoch": 0.5038801177415039, "grad_norm": 3.5782647132873535, "learning_rate": 9.929783270812464e-06, "loss": 1.1596, "step": 1883 }, { "epoch": 0.5041477120685042, "grad_norm": 3.836897373199463, "learning_rate": 9.929635404738822e-06, "loss": 1.2473, "step": 1884 }, { "epoch": 0.5044153063955045, "grad_norm": 3.5359864234924316, "learning_rate": 9.929487384240103e-06, "loss": 1.052, "step": 1885 }, { "epoch": 0.5046829007225047, "grad_norm": 3.7053215503692627, "learning_rate": 9.929339209320944e-06, "loss": 1.1623, "step": 1886 }, { "epoch": 0.504950495049505, "grad_norm": 3.838304042816162, "learning_rate": 9.929190879985982e-06, "loss": 1.2307, "step": 1887 }, { "epoch": 0.5052180893765053, "grad_norm": 3.681903123855591, "learning_rate": 9.929042396239869e-06, "loss": 1.3262, "step": 1888 }, { "epoch": 0.5054856837035054, "grad_norm": 3.6203062534332275, "learning_rate": 9.928893758087254e-06, "loss": 1.1616, "step": 1889 }, { "epoch": 0.5057532780305057, "grad_norm": 3.843017339706421, "learning_rate": 9.928744965532795e-06, "loss": 1.184, "step": 1890 }, { "epoch": 0.506020872357506, "grad_norm": 4.003540992736816, "learning_rate": 9.928596018581151e-06, "loss": 1.0719, "step": 1891 }, { "epoch": 0.5062884666845063, "grad_norm": 3.225344657897949, "learning_rate": 9.928446917236988e-06, "loss": 0.9902, "step": 1892 }, { "epoch": 0.5065560610115065, "grad_norm": 4.046036720275879, "learning_rate": 9.928297661504978e-06, "loss": 1.1583, "step": 1893 }, { "epoch": 0.5068236553385068, "grad_norm": 3.522110939025879, "learning_rate": 9.928148251389796e-06, "loss": 1.0941, "step": 1894 }, { "epoch": 0.5070912496655071, "grad_norm": 3.5445072650909424, "learning_rate": 9.92799868689612e-06, "loss": 1.1043, "step": 1895 }, { "epoch": 0.5073588439925074, "grad_norm": 3.7460379600524902, "learning_rate": 9.927848968028642e-06, "loss": 1.1259, "step": 1896 }, { "epoch": 0.5076264383195076, "grad_norm": 3.518141508102417, "learning_rate": 9.927699094792045e-06, "loss": 1.0938, "step": 1897 }, { "epoch": 0.5078940326465079, "grad_norm": 4.169661521911621, "learning_rate": 9.927549067191026e-06, "loss": 1.3043, "step": 1898 }, { "epoch": 0.5081616269735082, "grad_norm": 3.7639896869659424, "learning_rate": 9.927398885230286e-06, "loss": 1.254, "step": 1899 }, { "epoch": 0.5084292213005084, "grad_norm": 3.5845093727111816, "learning_rate": 9.927248548914528e-06, "loss": 1.0115, "step": 1900 }, { "epoch": 0.5086968156275087, "grad_norm": 3.618220806121826, "learning_rate": 9.927098058248463e-06, "loss": 1.1713, "step": 1901 }, { "epoch": 0.508964409954509, "grad_norm": 3.6645729541778564, "learning_rate": 9.926947413236806e-06, "loss": 1.1468, "step": 1902 }, { "epoch": 0.5092320042815093, "grad_norm": 3.4273576736450195, "learning_rate": 9.926796613884271e-06, "loss": 1.0282, "step": 1903 }, { "epoch": 0.5094995986085095, "grad_norm": 4.018494606018066, "learning_rate": 9.926645660195588e-06, "loss": 1.2789, "step": 1904 }, { "epoch": 0.5097671929355098, "grad_norm": 3.431507110595703, "learning_rate": 9.926494552175484e-06, "loss": 1.1095, "step": 1905 }, { "epoch": 0.5100347872625101, "grad_norm": 3.723026752471924, "learning_rate": 9.926343289828689e-06, "loss": 1.1774, "step": 1906 }, { "epoch": 0.5103023815895104, "grad_norm": 4.003593921661377, "learning_rate": 9.926191873159945e-06, "loss": 1.2947, "step": 1907 }, { "epoch": 0.5105699759165105, "grad_norm": 3.923344373703003, "learning_rate": 9.926040302173995e-06, "loss": 1.3416, "step": 1908 }, { "epoch": 0.5108375702435108, "grad_norm": 4.057835578918457, "learning_rate": 9.925888576875588e-06, "loss": 1.1635, "step": 1909 }, { "epoch": 0.5111051645705111, "grad_norm": 3.939828395843506, "learning_rate": 9.925736697269474e-06, "loss": 1.3077, "step": 1910 }, { "epoch": 0.5113727588975113, "grad_norm": 4.334293365478516, "learning_rate": 9.925584663360412e-06, "loss": 1.2711, "step": 1911 }, { "epoch": 0.5116403532245116, "grad_norm": 3.6700150966644287, "learning_rate": 9.925432475153166e-06, "loss": 1.2447, "step": 1912 }, { "epoch": 0.5119079475515119, "grad_norm": 3.7518320083618164, "learning_rate": 9.925280132652503e-06, "loss": 1.1256, "step": 1913 }, { "epoch": 0.5121755418785122, "grad_norm": 3.581819534301758, "learning_rate": 9.925127635863195e-06, "loss": 1.0175, "step": 1914 }, { "epoch": 0.5124431362055124, "grad_norm": 3.7574949264526367, "learning_rate": 9.924974984790016e-06, "loss": 1.2528, "step": 1915 }, { "epoch": 0.5127107305325127, "grad_norm": 3.8194570541381836, "learning_rate": 9.924822179437752e-06, "loss": 1.2685, "step": 1916 }, { "epoch": 0.512978324859513, "grad_norm": 3.690627336502075, "learning_rate": 9.924669219811188e-06, "loss": 1.2214, "step": 1917 }, { "epoch": 0.5132459191865133, "grad_norm": 3.204648494720459, "learning_rate": 9.924516105915116e-06, "loss": 0.9857, "step": 1918 }, { "epoch": 0.5135135135135135, "grad_norm": 3.702674627304077, "learning_rate": 9.924362837754334e-06, "loss": 1.0301, "step": 1919 }, { "epoch": 0.5137811078405138, "grad_norm": 3.366229295730591, "learning_rate": 9.92420941533364e-06, "loss": 1.0714, "step": 1920 }, { "epoch": 0.5140487021675141, "grad_norm": 3.501063823699951, "learning_rate": 9.92405583865784e-06, "loss": 1.1025, "step": 1921 }, { "epoch": 0.5143162964945143, "grad_norm": 3.3142244815826416, "learning_rate": 9.92390210773175e-06, "loss": 1.0532, "step": 1922 }, { "epoch": 0.5145838908215146, "grad_norm": 3.998425006866455, "learning_rate": 9.923748222560181e-06, "loss": 1.1796, "step": 1923 }, { "epoch": 0.5148514851485149, "grad_norm": 3.6948330402374268, "learning_rate": 9.923594183147954e-06, "loss": 1.0869, "step": 1924 }, { "epoch": 0.5151190794755152, "grad_norm": 3.7560575008392334, "learning_rate": 9.923439989499897e-06, "loss": 1.1566, "step": 1925 }, { "epoch": 0.5153866738025153, "grad_norm": 3.8775906562805176, "learning_rate": 9.923285641620838e-06, "loss": 1.1781, "step": 1926 }, { "epoch": 0.5156542681295156, "grad_norm": 3.8323404788970947, "learning_rate": 9.923131139515613e-06, "loss": 1.1228, "step": 1927 }, { "epoch": 0.515921862456516, "grad_norm": 3.4766688346862793, "learning_rate": 9.922976483189061e-06, "loss": 1.0528, "step": 1928 }, { "epoch": 0.5161894567835162, "grad_norm": 3.5990777015686035, "learning_rate": 9.922821672646028e-06, "loss": 1.0601, "step": 1929 }, { "epoch": 0.5164570511105164, "grad_norm": 3.702481269836426, "learning_rate": 9.922666707891361e-06, "loss": 1.1455, "step": 1930 }, { "epoch": 0.5167246454375167, "grad_norm": 3.4668917655944824, "learning_rate": 9.92251158892992e-06, "loss": 1.0795, "step": 1931 }, { "epoch": 0.516992239764517, "grad_norm": 3.769757032394409, "learning_rate": 9.922356315766557e-06, "loss": 1.1749, "step": 1932 }, { "epoch": 0.5172598340915172, "grad_norm": 3.681917428970337, "learning_rate": 9.922200888406142e-06, "loss": 1.1752, "step": 1933 }, { "epoch": 0.5175274284185175, "grad_norm": 3.566633701324463, "learning_rate": 9.922045306853542e-06, "loss": 1.0806, "step": 1934 }, { "epoch": 0.5177950227455178, "grad_norm": 3.5221433639526367, "learning_rate": 9.921889571113629e-06, "loss": 1.1242, "step": 1935 }, { "epoch": 0.5180626170725181, "grad_norm": 3.574681043624878, "learning_rate": 9.921733681191283e-06, "loss": 1.117, "step": 1936 }, { "epoch": 0.5183302113995183, "grad_norm": 4.224633693695068, "learning_rate": 9.921577637091388e-06, "loss": 1.2129, "step": 1937 }, { "epoch": 0.5185978057265186, "grad_norm": 3.799368381500244, "learning_rate": 9.92142143881883e-06, "loss": 1.0609, "step": 1938 }, { "epoch": 0.5188654000535189, "grad_norm": 3.3646318912506104, "learning_rate": 9.921265086378504e-06, "loss": 1.1139, "step": 1939 }, { "epoch": 0.5191329943805192, "grad_norm": 4.179952621459961, "learning_rate": 9.921108579775307e-06, "loss": 1.2536, "step": 1940 }, { "epoch": 0.5194005887075194, "grad_norm": 3.8131916522979736, "learning_rate": 9.920951919014144e-06, "loss": 1.1239, "step": 1941 }, { "epoch": 0.5196681830345197, "grad_norm": 3.50144624710083, "learning_rate": 9.920795104099919e-06, "loss": 1.0744, "step": 1942 }, { "epoch": 0.51993577736152, "grad_norm": 3.8942971229553223, "learning_rate": 9.920638135037545e-06, "loss": 1.1104, "step": 1943 }, { "epoch": 0.5202033716885202, "grad_norm": 3.6234724521636963, "learning_rate": 9.920481011831941e-06, "loss": 1.1766, "step": 1944 }, { "epoch": 0.5204709660155205, "grad_norm": 4.271646022796631, "learning_rate": 9.92032373448803e-06, "loss": 1.3459, "step": 1945 }, { "epoch": 0.5207385603425208, "grad_norm": 3.910745143890381, "learning_rate": 9.920166303010737e-06, "loss": 1.1466, "step": 1946 }, { "epoch": 0.521006154669521, "grad_norm": 3.472041606903076, "learning_rate": 9.92000871740499e-06, "loss": 1.2786, "step": 1947 }, { "epoch": 0.5212737489965212, "grad_norm": 3.5486903190612793, "learning_rate": 9.919850977675732e-06, "loss": 1.1269, "step": 1948 }, { "epoch": 0.5215413433235215, "grad_norm": 3.486093044281006, "learning_rate": 9.919693083827902e-06, "loss": 1.0447, "step": 1949 }, { "epoch": 0.5218089376505218, "grad_norm": 3.836215019226074, "learning_rate": 9.919535035866444e-06, "loss": 1.179, "step": 1950 }, { "epoch": 0.5220765319775221, "grad_norm": 3.5467727184295654, "learning_rate": 9.919376833796312e-06, "loss": 1.0668, "step": 1951 }, { "epoch": 0.5223441263045223, "grad_norm": 3.5442044734954834, "learning_rate": 9.91921847762246e-06, "loss": 1.0542, "step": 1952 }, { "epoch": 0.5226117206315226, "grad_norm": 3.7540347576141357, "learning_rate": 9.919059967349848e-06, "loss": 1.0402, "step": 1953 }, { "epoch": 0.5228793149585229, "grad_norm": 4.026261329650879, "learning_rate": 9.918901302983445e-06, "loss": 1.2437, "step": 1954 }, { "epoch": 0.5231469092855231, "grad_norm": 3.6572134494781494, "learning_rate": 9.918742484528218e-06, "loss": 1.1397, "step": 1955 }, { "epoch": 0.5234145036125234, "grad_norm": 3.5838277339935303, "learning_rate": 9.918583511989142e-06, "loss": 1.0844, "step": 1956 }, { "epoch": 0.5236820979395237, "grad_norm": 3.8754079341888428, "learning_rate": 9.918424385371199e-06, "loss": 1.2264, "step": 1957 }, { "epoch": 0.523949692266524, "grad_norm": 3.196148633956909, "learning_rate": 9.918265104679371e-06, "loss": 1.0584, "step": 1958 }, { "epoch": 0.5242172865935242, "grad_norm": 4.228190898895264, "learning_rate": 9.918105669918652e-06, "loss": 1.2559, "step": 1959 }, { "epoch": 0.5244848809205245, "grad_norm": 3.834376573562622, "learning_rate": 9.917946081094033e-06, "loss": 1.0941, "step": 1960 }, { "epoch": 0.5247524752475248, "grad_norm": 3.5881540775299072, "learning_rate": 9.917786338210513e-06, "loss": 1.1777, "step": 1961 }, { "epoch": 0.5250200695745251, "grad_norm": 3.671957492828369, "learning_rate": 9.917626441273099e-06, "loss": 1.2193, "step": 1962 }, { "epoch": 0.5252876639015253, "grad_norm": 3.508430242538452, "learning_rate": 9.917466390286797e-06, "loss": 1.1494, "step": 1963 }, { "epoch": 0.5255552582285256, "grad_norm": 4.060336112976074, "learning_rate": 9.917306185256621e-06, "loss": 1.2024, "step": 1964 }, { "epoch": 0.5258228525555259, "grad_norm": 3.5298852920532227, "learning_rate": 9.91714582618759e-06, "loss": 1.1166, "step": 1965 }, { "epoch": 0.526090446882526, "grad_norm": 3.5156521797180176, "learning_rate": 9.91698531308473e-06, "loss": 1.1366, "step": 1966 }, { "epoch": 0.5263580412095263, "grad_norm": 3.63799786567688, "learning_rate": 9.916824645953065e-06, "loss": 1.2219, "step": 1967 }, { "epoch": 0.5266256355365266, "grad_norm": 3.7056069374084473, "learning_rate": 9.916663824797633e-06, "loss": 1.16, "step": 1968 }, { "epoch": 0.5268932298635269, "grad_norm": 3.2435388565063477, "learning_rate": 9.916502849623467e-06, "loss": 1.0117, "step": 1969 }, { "epoch": 0.5271608241905271, "grad_norm": 3.5529932975769043, "learning_rate": 9.916341720435609e-06, "loss": 1.0804, "step": 1970 }, { "epoch": 0.5274284185175274, "grad_norm": 3.3724541664123535, "learning_rate": 9.91618043723911e-06, "loss": 1.0444, "step": 1971 }, { "epoch": 0.5276960128445277, "grad_norm": 3.614671230316162, "learning_rate": 9.916019000039024e-06, "loss": 1.0751, "step": 1972 }, { "epoch": 0.527963607171528, "grad_norm": 3.8645894527435303, "learning_rate": 9.915857408840405e-06, "loss": 1.25, "step": 1973 }, { "epoch": 0.5282312014985282, "grad_norm": 3.3444855213165283, "learning_rate": 9.915695663648315e-06, "loss": 1.0344, "step": 1974 }, { "epoch": 0.5284987958255285, "grad_norm": 3.8077821731567383, "learning_rate": 9.91553376446782e-06, "loss": 1.1244, "step": 1975 }, { "epoch": 0.5287663901525288, "grad_norm": 3.517341375350952, "learning_rate": 9.915371711303994e-06, "loss": 1.1202, "step": 1976 }, { "epoch": 0.529033984479529, "grad_norm": 4.2117767333984375, "learning_rate": 9.915209504161914e-06, "loss": 1.2998, "step": 1977 }, { "epoch": 0.5293015788065293, "grad_norm": 3.684497117996216, "learning_rate": 9.915047143046656e-06, "loss": 1.1494, "step": 1978 }, { "epoch": 0.5295691731335296, "grad_norm": 4.2827630043029785, "learning_rate": 9.914884627963312e-06, "loss": 1.3014, "step": 1979 }, { "epoch": 0.5298367674605299, "grad_norm": 3.7791380882263184, "learning_rate": 9.914721958916971e-06, "loss": 1.2749, "step": 1980 }, { "epoch": 0.5301043617875301, "grad_norm": 3.7178707122802734, "learning_rate": 9.91455913591273e-06, "loss": 1.0304, "step": 1981 }, { "epoch": 0.5303719561145304, "grad_norm": 3.6490297317504883, "learning_rate": 9.914396158955685e-06, "loss": 1.0867, "step": 1982 }, { "epoch": 0.5306395504415307, "grad_norm": 4.041894912719727, "learning_rate": 9.914233028050945e-06, "loss": 1.1857, "step": 1983 }, { "epoch": 0.530907144768531, "grad_norm": 3.4716479778289795, "learning_rate": 9.91406974320362e-06, "loss": 1.0954, "step": 1984 }, { "epoch": 0.5311747390955311, "grad_norm": 3.4045979976654053, "learning_rate": 9.913906304418825e-06, "loss": 1.2435, "step": 1985 }, { "epoch": 0.5314423334225314, "grad_norm": 3.823096752166748, "learning_rate": 9.91374271170168e-06, "loss": 1.1779, "step": 1986 }, { "epoch": 0.5317099277495317, "grad_norm": 3.7356925010681152, "learning_rate": 9.91357896505731e-06, "loss": 1.2326, "step": 1987 }, { "epoch": 0.5319775220765319, "grad_norm": 3.4389915466308594, "learning_rate": 9.91341506449084e-06, "loss": 1.0635, "step": 1988 }, { "epoch": 0.5322451164035322, "grad_norm": 3.3921926021575928, "learning_rate": 9.913251010007413e-06, "loss": 1.0636, "step": 1989 }, { "epoch": 0.5325127107305325, "grad_norm": 3.5323266983032227, "learning_rate": 9.913086801612159e-06, "loss": 1.0485, "step": 1990 }, { "epoch": 0.5327803050575328, "grad_norm": 3.2040328979492188, "learning_rate": 9.91292243931023e-06, "loss": 1.0664, "step": 1991 }, { "epoch": 0.533047899384533, "grad_norm": 3.604896068572998, "learning_rate": 9.912757923106769e-06, "loss": 1.1632, "step": 1992 }, { "epoch": 0.5333154937115333, "grad_norm": 3.466099262237549, "learning_rate": 9.91259325300693e-06, "loss": 1.118, "step": 1993 }, { "epoch": 0.5335830880385336, "grad_norm": 3.788372039794922, "learning_rate": 9.912428429015874e-06, "loss": 1.2205, "step": 1994 }, { "epoch": 0.5338506823655339, "grad_norm": 3.699796199798584, "learning_rate": 9.912263451138764e-06, "loss": 1.0773, "step": 1995 }, { "epoch": 0.5341182766925341, "grad_norm": 3.928880453109741, "learning_rate": 9.912098319380767e-06, "loss": 1.24, "step": 1996 }, { "epoch": 0.5343858710195344, "grad_norm": 3.5852925777435303, "learning_rate": 9.911933033747056e-06, "loss": 1.0727, "step": 1997 }, { "epoch": 0.5346534653465347, "grad_norm": 4.054876327514648, "learning_rate": 9.91176759424281e-06, "loss": 1.1812, "step": 1998 }, { "epoch": 0.5349210596735349, "grad_norm": 3.9897444248199463, "learning_rate": 9.91160200087321e-06, "loss": 1.2996, "step": 1999 }, { "epoch": 0.5351886540005352, "grad_norm": 3.893026113510132, "learning_rate": 9.911436253643445e-06, "loss": 1.1287, "step": 2000 }, { "epoch": 0.5351886540005352, "eval_loss": 1.1917240619659424, "eval_runtime": 11.6396, "eval_samples_per_second": 34.365, "eval_steps_per_second": 4.296, "step": 2000 }, { "epoch": 0.5354562483275355, "grad_norm": 4.189493656158447, "learning_rate": 9.911270352558703e-06, "loss": 1.2612, "step": 2001 }, { "epoch": 0.5357238426545358, "grad_norm": 3.7188894748687744, "learning_rate": 9.911104297624186e-06, "loss": 1.1238, "step": 2002 }, { "epoch": 0.535991436981536, "grad_norm": 3.495906352996826, "learning_rate": 9.910938088845095e-06, "loss": 1.0895, "step": 2003 }, { "epoch": 0.5362590313085362, "grad_norm": 3.8715004920959473, "learning_rate": 9.910771726226634e-06, "loss": 1.1578, "step": 2004 }, { "epoch": 0.5365266256355365, "grad_norm": 5.872176170349121, "learning_rate": 9.910605209774016e-06, "loss": 1.2899, "step": 2005 }, { "epoch": 0.5367942199625368, "grad_norm": 3.8072023391723633, "learning_rate": 9.910438539492457e-06, "loss": 1.0038, "step": 2006 }, { "epoch": 0.537061814289537, "grad_norm": 3.388889789581299, "learning_rate": 9.91027171538718e-06, "loss": 1.0829, "step": 2007 }, { "epoch": 0.5373294086165373, "grad_norm": 3.782205104827881, "learning_rate": 9.910104737463406e-06, "loss": 1.1912, "step": 2008 }, { "epoch": 0.5375970029435376, "grad_norm": 3.77671217918396, "learning_rate": 9.90993760572637e-06, "loss": 1.2044, "step": 2009 }, { "epoch": 0.5378645972705378, "grad_norm": 3.633802652359009, "learning_rate": 9.909770320181306e-06, "loss": 1.3179, "step": 2010 }, { "epoch": 0.5381321915975381, "grad_norm": 3.744126558303833, "learning_rate": 9.909602880833458e-06, "loss": 1.1907, "step": 2011 }, { "epoch": 0.5383997859245384, "grad_norm": 3.903366804122925, "learning_rate": 9.909435287688065e-06, "loss": 1.1737, "step": 2012 }, { "epoch": 0.5386673802515387, "grad_norm": 3.7042882442474365, "learning_rate": 9.90926754075038e-06, "loss": 1.2119, "step": 2013 }, { "epoch": 0.5389349745785389, "grad_norm": 3.662655830383301, "learning_rate": 9.90909964002566e-06, "loss": 1.1722, "step": 2014 }, { "epoch": 0.5392025689055392, "grad_norm": 3.9184234142303467, "learning_rate": 9.90893158551916e-06, "loss": 1.176, "step": 2015 }, { "epoch": 0.5394701632325395, "grad_norm": 3.6793618202209473, "learning_rate": 9.90876337723615e-06, "loss": 1.1942, "step": 2016 }, { "epoch": 0.5397377575595398, "grad_norm": 3.438577175140381, "learning_rate": 9.908595015181893e-06, "loss": 1.0737, "step": 2017 }, { "epoch": 0.54000535188654, "grad_norm": 3.8159797191619873, "learning_rate": 9.908426499361668e-06, "loss": 1.2024, "step": 2018 }, { "epoch": 0.5402729462135403, "grad_norm": 3.6021339893341064, "learning_rate": 9.908257829780752e-06, "loss": 1.0793, "step": 2019 }, { "epoch": 0.5405405405405406, "grad_norm": 3.960874319076538, "learning_rate": 9.908089006444427e-06, "loss": 1.2732, "step": 2020 }, { "epoch": 0.5408081348675408, "grad_norm": 3.724120616912842, "learning_rate": 9.907920029357986e-06, "loss": 1.1938, "step": 2021 }, { "epoch": 0.541075729194541, "grad_norm": 3.7229902744293213, "learning_rate": 9.90775089852672e-06, "loss": 1.124, "step": 2022 }, { "epoch": 0.5413433235215414, "grad_norm": 3.4035604000091553, "learning_rate": 9.907581613955924e-06, "loss": 1.0212, "step": 2023 }, { "epoch": 0.5416109178485417, "grad_norm": 3.6770973205566406, "learning_rate": 9.907412175650905e-06, "loss": 1.0409, "step": 2024 }, { "epoch": 0.5418785121755418, "grad_norm": 3.5748701095581055, "learning_rate": 9.907242583616972e-06, "loss": 1.0902, "step": 2025 }, { "epoch": 0.5421461065025421, "grad_norm": 4.297303199768066, "learning_rate": 9.907072837859434e-06, "loss": 1.1205, "step": 2026 }, { "epoch": 0.5424137008295424, "grad_norm": 3.765982151031494, "learning_rate": 9.90690293838361e-06, "loss": 1.2402, "step": 2027 }, { "epoch": 0.5426812951565427, "grad_norm": 3.948046922683716, "learning_rate": 9.906732885194821e-06, "loss": 1.2607, "step": 2028 }, { "epoch": 0.5429488894835429, "grad_norm": 3.599590539932251, "learning_rate": 9.906562678298394e-06, "loss": 1.113, "step": 2029 }, { "epoch": 0.5432164838105432, "grad_norm": 3.43281626701355, "learning_rate": 9.906392317699665e-06, "loss": 1.0782, "step": 2030 }, { "epoch": 0.5434840781375435, "grad_norm": 3.7561564445495605, "learning_rate": 9.906221803403967e-06, "loss": 1.2796, "step": 2031 }, { "epoch": 0.5437516724645437, "grad_norm": 3.3608622550964355, "learning_rate": 9.90605113541664e-06, "loss": 1.1263, "step": 2032 }, { "epoch": 0.544019266791544, "grad_norm": 3.4457077980041504, "learning_rate": 9.905880313743035e-06, "loss": 1.1016, "step": 2033 }, { "epoch": 0.5442868611185443, "grad_norm": 3.601628065109253, "learning_rate": 9.905709338388499e-06, "loss": 1.1218, "step": 2034 }, { "epoch": 0.5445544554455446, "grad_norm": 3.8327248096466064, "learning_rate": 9.90553820935839e-06, "loss": 1.0964, "step": 2035 }, { "epoch": 0.5448220497725448, "grad_norm": 3.6931264400482178, "learning_rate": 9.905366926658068e-06, "loss": 1.2855, "step": 2036 }, { "epoch": 0.5450896440995451, "grad_norm": 3.9936089515686035, "learning_rate": 9.9051954902929e-06, "loss": 1.2546, "step": 2037 }, { "epoch": 0.5453572384265454, "grad_norm": 4.2173991203308105, "learning_rate": 9.905023900268255e-06, "loss": 1.2468, "step": 2038 }, { "epoch": 0.5456248327535457, "grad_norm": 3.5092899799346924, "learning_rate": 9.904852156589508e-06, "loss": 1.0156, "step": 2039 }, { "epoch": 0.5458924270805459, "grad_norm": 3.5375232696533203, "learning_rate": 9.90468025926204e-06, "loss": 1.1003, "step": 2040 }, { "epoch": 0.5461600214075462, "grad_norm": 3.232635974884033, "learning_rate": 9.904508208291236e-06, "loss": 1.1159, "step": 2041 }, { "epoch": 0.5464276157345465, "grad_norm": 3.6317005157470703, "learning_rate": 9.904336003682484e-06, "loss": 1.2561, "step": 2042 }, { "epoch": 0.5466952100615466, "grad_norm": 3.4912993907928467, "learning_rate": 9.90416364544118e-06, "loss": 1.2616, "step": 2043 }, { "epoch": 0.5469628043885469, "grad_norm": 3.744119882583618, "learning_rate": 9.903991133572722e-06, "loss": 1.1231, "step": 2044 }, { "epoch": 0.5472303987155472, "grad_norm": 3.8304286003112793, "learning_rate": 9.903818468082515e-06, "loss": 1.1488, "step": 2045 }, { "epoch": 0.5474979930425475, "grad_norm": 4.009277820587158, "learning_rate": 9.903645648975967e-06, "loss": 1.1767, "step": 2046 }, { "epoch": 0.5477655873695477, "grad_norm": 3.655991315841675, "learning_rate": 9.903472676258494e-06, "loss": 1.1274, "step": 2047 }, { "epoch": 0.548033181696548, "grad_norm": 3.522969961166382, "learning_rate": 9.903299549935514e-06, "loss": 1.0944, "step": 2048 }, { "epoch": 0.5483007760235483, "grad_norm": 3.9753992557525635, "learning_rate": 9.903126270012446e-06, "loss": 1.1597, "step": 2049 }, { "epoch": 0.5485683703505486, "grad_norm": 3.4897477626800537, "learning_rate": 9.902952836494724e-06, "loss": 1.213, "step": 2050 }, { "epoch": 0.5488359646775488, "grad_norm": 3.901291847229004, "learning_rate": 9.902779249387777e-06, "loss": 1.1803, "step": 2051 }, { "epoch": 0.5491035590045491, "grad_norm": 3.8959672451019287, "learning_rate": 9.902605508697045e-06, "loss": 1.1598, "step": 2052 }, { "epoch": 0.5493711533315494, "grad_norm": 3.5497238636016846, "learning_rate": 9.90243161442797e-06, "loss": 1.1193, "step": 2053 }, { "epoch": 0.5496387476585496, "grad_norm": 3.6735222339630127, "learning_rate": 9.902257566585997e-06, "loss": 1.2209, "step": 2054 }, { "epoch": 0.5499063419855499, "grad_norm": 3.902233123779297, "learning_rate": 9.902083365176583e-06, "loss": 1.2137, "step": 2055 }, { "epoch": 0.5501739363125502, "grad_norm": 3.133127212524414, "learning_rate": 9.90190901020518e-06, "loss": 1.0771, "step": 2056 }, { "epoch": 0.5504415306395505, "grad_norm": 3.489025115966797, "learning_rate": 9.901734501677254e-06, "loss": 1.0428, "step": 2057 }, { "epoch": 0.5507091249665507, "grad_norm": 3.8350815773010254, "learning_rate": 9.90155983959827e-06, "loss": 1.1333, "step": 2058 }, { "epoch": 0.550976719293551, "grad_norm": 3.393089771270752, "learning_rate": 9.901385023973698e-06, "loss": 1.1228, "step": 2059 }, { "epoch": 0.5512443136205513, "grad_norm": 3.4112391471862793, "learning_rate": 9.901210054809015e-06, "loss": 1.0732, "step": 2060 }, { "epoch": 0.5515119079475516, "grad_norm": 3.4298675060272217, "learning_rate": 9.901034932109702e-06, "loss": 1.1072, "step": 2061 }, { "epoch": 0.5517795022745517, "grad_norm": 3.8485376834869385, "learning_rate": 9.900859655881248e-06, "loss": 1.2126, "step": 2062 }, { "epoch": 0.552047096601552, "grad_norm": 3.713818073272705, "learning_rate": 9.90068422612914e-06, "loss": 1.0999, "step": 2063 }, { "epoch": 0.5523146909285523, "grad_norm": 3.7916266918182373, "learning_rate": 9.900508642858874e-06, "loss": 1.2815, "step": 2064 }, { "epoch": 0.5525822852555525, "grad_norm": 3.449904203414917, "learning_rate": 9.900332906075951e-06, "loss": 1.056, "step": 2065 }, { "epoch": 0.5528498795825528, "grad_norm": 3.417433500289917, "learning_rate": 9.900157015785876e-06, "loss": 0.9831, "step": 2066 }, { "epoch": 0.5531174739095531, "grad_norm": 4.198076248168945, "learning_rate": 9.899980971994158e-06, "loss": 1.2668, "step": 2067 }, { "epoch": 0.5533850682365534, "grad_norm": 3.3924946784973145, "learning_rate": 9.899804774706314e-06, "loss": 1.1, "step": 2068 }, { "epoch": 0.5536526625635536, "grad_norm": 3.6874961853027344, "learning_rate": 9.899628423927861e-06, "loss": 1.2336, "step": 2069 }, { "epoch": 0.5539202568905539, "grad_norm": 3.614410161972046, "learning_rate": 9.899451919664325e-06, "loss": 1.1163, "step": 2070 }, { "epoch": 0.5541878512175542, "grad_norm": 3.503385066986084, "learning_rate": 9.899275261921236e-06, "loss": 1.1361, "step": 2071 }, { "epoch": 0.5544554455445545, "grad_norm": 3.857766628265381, "learning_rate": 9.899098450704125e-06, "loss": 1.2757, "step": 2072 }, { "epoch": 0.5547230398715547, "grad_norm": 3.7878856658935547, "learning_rate": 9.898921486018532e-06, "loss": 1.1438, "step": 2073 }, { "epoch": 0.554990634198555, "grad_norm": 3.2437705993652344, "learning_rate": 9.898744367870001e-06, "loss": 1.0622, "step": 2074 }, { "epoch": 0.5552582285255553, "grad_norm": 3.193298816680908, "learning_rate": 9.898567096264082e-06, "loss": 1.0384, "step": 2075 }, { "epoch": 0.5555258228525555, "grad_norm": 3.3267760276794434, "learning_rate": 9.898389671206324e-06, "loss": 1.0635, "step": 2076 }, { "epoch": 0.5557934171795558, "grad_norm": 3.255155086517334, "learning_rate": 9.898212092702288e-06, "loss": 1.0574, "step": 2077 }, { "epoch": 0.5560610115065561, "grad_norm": 3.881344795227051, "learning_rate": 9.898034360757538e-06, "loss": 1.2048, "step": 2078 }, { "epoch": 0.5563286058335564, "grad_norm": 3.6974213123321533, "learning_rate": 9.897856475377638e-06, "loss": 1.2133, "step": 2079 }, { "epoch": 0.5565962001605566, "grad_norm": 3.4741365909576416, "learning_rate": 9.897678436568164e-06, "loss": 1.1787, "step": 2080 }, { "epoch": 0.5568637944875569, "grad_norm": 3.6926300525665283, "learning_rate": 9.89750024433469e-06, "loss": 1.1299, "step": 2081 }, { "epoch": 0.5571313888145571, "grad_norm": 3.9215118885040283, "learning_rate": 9.8973218986828e-06, "loss": 1.2031, "step": 2082 }, { "epoch": 0.5573989831415574, "grad_norm": 3.4052512645721436, "learning_rate": 9.897143399618081e-06, "loss": 1.1094, "step": 2083 }, { "epoch": 0.5576665774685576, "grad_norm": 3.8671302795410156, "learning_rate": 9.896964747146125e-06, "loss": 1.2339, "step": 2084 }, { "epoch": 0.5579341717955579, "grad_norm": 3.723543643951416, "learning_rate": 9.896785941272524e-06, "loss": 1.2115, "step": 2085 }, { "epoch": 0.5582017661225582, "grad_norm": 3.7372453212738037, "learning_rate": 9.896606982002886e-06, "loss": 1.1701, "step": 2086 }, { "epoch": 0.5584693604495584, "grad_norm": 3.7154757976531982, "learning_rate": 9.896427869342812e-06, "loss": 1.0744, "step": 2087 }, { "epoch": 0.5587369547765587, "grad_norm": 3.733175039291382, "learning_rate": 9.896248603297915e-06, "loss": 1.0824, "step": 2088 }, { "epoch": 0.559004549103559, "grad_norm": 3.589911460876465, "learning_rate": 9.896069183873809e-06, "loss": 1.1208, "step": 2089 }, { "epoch": 0.5592721434305593, "grad_norm": 3.778308868408203, "learning_rate": 9.895889611076119e-06, "loss": 1.2553, "step": 2090 }, { "epoch": 0.5595397377575595, "grad_norm": 3.737415313720703, "learning_rate": 9.895709884910464e-06, "loss": 1.1706, "step": 2091 }, { "epoch": 0.5598073320845598, "grad_norm": 3.6272811889648438, "learning_rate": 9.895530005382478e-06, "loss": 1.099, "step": 2092 }, { "epoch": 0.5600749264115601, "grad_norm": 3.2790331840515137, "learning_rate": 9.895349972497796e-06, "loss": 0.9707, "step": 2093 }, { "epoch": 0.5603425207385604, "grad_norm": 3.618961811065674, "learning_rate": 9.895169786262055e-06, "loss": 1.1975, "step": 2094 }, { "epoch": 0.5606101150655606, "grad_norm": 3.8534488677978516, "learning_rate": 9.894989446680901e-06, "loss": 1.2889, "step": 2095 }, { "epoch": 0.5608777093925609, "grad_norm": 3.748040199279785, "learning_rate": 9.894808953759984e-06, "loss": 1.1573, "step": 2096 }, { "epoch": 0.5611453037195612, "grad_norm": 3.8036909103393555, "learning_rate": 9.894628307504959e-06, "loss": 1.0905, "step": 2097 }, { "epoch": 0.5614128980465614, "grad_norm": 3.3763818740844727, "learning_rate": 9.894447507921482e-06, "loss": 0.9967, "step": 2098 }, { "epoch": 0.5616804923735617, "grad_norm": 3.4757957458496094, "learning_rate": 9.894266555015218e-06, "loss": 1.199, "step": 2099 }, { "epoch": 0.561948086700562, "grad_norm": 3.526400089263916, "learning_rate": 9.894085448791836e-06, "loss": 1.3028, "step": 2100 }, { "epoch": 0.5622156810275623, "grad_norm": 3.7849979400634766, "learning_rate": 9.89390418925701e-06, "loss": 1.166, "step": 2101 }, { "epoch": 0.5624832753545624, "grad_norm": 3.8639450073242188, "learning_rate": 9.893722776416415e-06, "loss": 1.1507, "step": 2102 }, { "epoch": 0.5627508696815627, "grad_norm": 3.6054041385650635, "learning_rate": 9.893541210275736e-06, "loss": 1.216, "step": 2103 }, { "epoch": 0.563018464008563, "grad_norm": 3.552934408187866, "learning_rate": 9.893359490840662e-06, "loss": 1.2079, "step": 2104 }, { "epoch": 0.5632860583355633, "grad_norm": 3.804652690887451, "learning_rate": 9.893177618116885e-06, "loss": 1.2398, "step": 2105 }, { "epoch": 0.5635536526625635, "grad_norm": 3.506537675857544, "learning_rate": 9.892995592110099e-06, "loss": 1.1581, "step": 2106 }, { "epoch": 0.5638212469895638, "grad_norm": 3.4469141960144043, "learning_rate": 9.89281341282601e-06, "loss": 1.1475, "step": 2107 }, { "epoch": 0.5640888413165641, "grad_norm": 3.478013753890991, "learning_rate": 9.892631080270325e-06, "loss": 1.2376, "step": 2108 }, { "epoch": 0.5643564356435643, "grad_norm": 3.774752378463745, "learning_rate": 9.89244859444875e-06, "loss": 1.1787, "step": 2109 }, { "epoch": 0.5646240299705646, "grad_norm": 3.786384344100952, "learning_rate": 9.89226595536701e-06, "loss": 1.2119, "step": 2110 }, { "epoch": 0.5648916242975649, "grad_norm": 3.7795796394348145, "learning_rate": 9.892083163030822e-06, "loss": 1.1884, "step": 2111 }, { "epoch": 0.5651592186245652, "grad_norm": 3.500213146209717, "learning_rate": 9.89190021744591e-06, "loss": 1.01, "step": 2112 }, { "epoch": 0.5654268129515654, "grad_norm": 3.490860939025879, "learning_rate": 9.891717118618008e-06, "loss": 1.1551, "step": 2113 }, { "epoch": 0.5656944072785657, "grad_norm": 3.658153772354126, "learning_rate": 9.891533866552852e-06, "loss": 1.2155, "step": 2114 }, { "epoch": 0.565962001605566, "grad_norm": 3.7145233154296875, "learning_rate": 9.891350461256179e-06, "loss": 1.2243, "step": 2115 }, { "epoch": 0.5662295959325663, "grad_norm": 3.5172886848449707, "learning_rate": 9.89116690273374e-06, "loss": 1.206, "step": 2116 }, { "epoch": 0.5664971902595665, "grad_norm": 3.58321475982666, "learning_rate": 9.890983190991278e-06, "loss": 1.2536, "step": 2117 }, { "epoch": 0.5667647845865668, "grad_norm": 3.534895420074463, "learning_rate": 9.890799326034556e-06, "loss": 1.1384, "step": 2118 }, { "epoch": 0.5670323789135671, "grad_norm": 3.564685583114624, "learning_rate": 9.890615307869326e-06, "loss": 1.1677, "step": 2119 }, { "epoch": 0.5672999732405672, "grad_norm": 4.110241413116455, "learning_rate": 9.89043113650136e-06, "loss": 1.1706, "step": 2120 }, { "epoch": 0.5675675675675675, "grad_norm": 3.5671589374542236, "learning_rate": 9.890246811936421e-06, "loss": 1.1117, "step": 2121 }, { "epoch": 0.5678351618945678, "grad_norm": 3.429584264755249, "learning_rate": 9.890062334180286e-06, "loss": 1.1273, "step": 2122 }, { "epoch": 0.5681027562215681, "grad_norm": 3.8296971321105957, "learning_rate": 9.889877703238732e-06, "loss": 1.2361, "step": 2123 }, { "epoch": 0.5683703505485683, "grad_norm": 3.43332839012146, "learning_rate": 9.889692919117546e-06, "loss": 1.0847, "step": 2124 }, { "epoch": 0.5686379448755686, "grad_norm": 3.417013168334961, "learning_rate": 9.889507981822515e-06, "loss": 1.1709, "step": 2125 }, { "epoch": 0.5689055392025689, "grad_norm": 3.507187843322754, "learning_rate": 9.88932289135943e-06, "loss": 1.1102, "step": 2126 }, { "epoch": 0.5691731335295692, "grad_norm": 3.821469783782959, "learning_rate": 9.889137647734094e-06, "loss": 1.1736, "step": 2127 }, { "epoch": 0.5694407278565694, "grad_norm": 3.63112735748291, "learning_rate": 9.888952250952305e-06, "loss": 1.1239, "step": 2128 }, { "epoch": 0.5697083221835697, "grad_norm": 4.068948745727539, "learning_rate": 9.888766701019873e-06, "loss": 1.2714, "step": 2129 }, { "epoch": 0.56997591651057, "grad_norm": 3.552907943725586, "learning_rate": 9.88858099794261e-06, "loss": 1.0754, "step": 2130 }, { "epoch": 0.5702435108375702, "grad_norm": 4.019528388977051, "learning_rate": 9.888395141726335e-06, "loss": 1.2183, "step": 2131 }, { "epoch": 0.5705111051645705, "grad_norm": 3.55165696144104, "learning_rate": 9.888209132376866e-06, "loss": 1.0137, "step": 2132 }, { "epoch": 0.5707786994915708, "grad_norm": 3.8330440521240234, "learning_rate": 9.888022969900036e-06, "loss": 1.2188, "step": 2133 }, { "epoch": 0.5710462938185711, "grad_norm": 3.5315418243408203, "learning_rate": 9.887836654301671e-06, "loss": 1.1769, "step": 2134 }, { "epoch": 0.5713138881455713, "grad_norm": 3.613337755203247, "learning_rate": 9.887650185587612e-06, "loss": 1.1539, "step": 2135 }, { "epoch": 0.5715814824725716, "grad_norm": 3.3528521060943604, "learning_rate": 9.887463563763695e-06, "loss": 1.1673, "step": 2136 }, { "epoch": 0.5718490767995719, "grad_norm": 3.672227382659912, "learning_rate": 9.887276788835772e-06, "loss": 1.3125, "step": 2137 }, { "epoch": 0.5721166711265722, "grad_norm": 3.4449851512908936, "learning_rate": 9.88708986080969e-06, "loss": 1.1545, "step": 2138 }, { "epoch": 0.5723842654535723, "grad_norm": 3.5263442993164062, "learning_rate": 9.886902779691306e-06, "loss": 1.1188, "step": 2139 }, { "epoch": 0.5726518597805726, "grad_norm": 3.499302864074707, "learning_rate": 9.88671554548648e-06, "loss": 1.2045, "step": 2140 }, { "epoch": 0.572919454107573, "grad_norm": 3.5615437030792236, "learning_rate": 9.886528158201076e-06, "loss": 1.1357, "step": 2141 }, { "epoch": 0.5731870484345731, "grad_norm": 3.0443129539489746, "learning_rate": 9.886340617840968e-06, "loss": 0.9957, "step": 2142 }, { "epoch": 0.5734546427615734, "grad_norm": 3.494044542312622, "learning_rate": 9.886152924412027e-06, "loss": 1.1044, "step": 2143 }, { "epoch": 0.5737222370885737, "grad_norm": 3.9444684982299805, "learning_rate": 9.885965077920135e-06, "loss": 1.2436, "step": 2144 }, { "epoch": 0.573989831415574, "grad_norm": 3.808692455291748, "learning_rate": 9.885777078371174e-06, "loss": 1.2591, "step": 2145 }, { "epoch": 0.5742574257425742, "grad_norm": 3.586069107055664, "learning_rate": 9.885588925771037e-06, "loss": 1.1695, "step": 2146 }, { "epoch": 0.5745250200695745, "grad_norm": 3.6232335567474365, "learning_rate": 9.885400620125616e-06, "loss": 1.2411, "step": 2147 }, { "epoch": 0.5747926143965748, "grad_norm": 4.283682346343994, "learning_rate": 9.885212161440808e-06, "loss": 1.2519, "step": 2148 }, { "epoch": 0.5750602087235751, "grad_norm": 3.391270160675049, "learning_rate": 9.885023549722518e-06, "loss": 1.1671, "step": 2149 }, { "epoch": 0.5753278030505753, "grad_norm": 3.8860385417938232, "learning_rate": 9.884834784976658e-06, "loss": 1.1987, "step": 2150 }, { "epoch": 0.5755953973775756, "grad_norm": 3.611828565597534, "learning_rate": 9.884645867209133e-06, "loss": 1.2138, "step": 2151 }, { "epoch": 0.5758629917045759, "grad_norm": 3.7692012786865234, "learning_rate": 9.884456796425869e-06, "loss": 1.2613, "step": 2152 }, { "epoch": 0.5761305860315761, "grad_norm": 3.578130006790161, "learning_rate": 9.884267572632786e-06, "loss": 1.2619, "step": 2153 }, { "epoch": 0.5763981803585764, "grad_norm": 3.362647771835327, "learning_rate": 9.884078195835812e-06, "loss": 1.138, "step": 2154 }, { "epoch": 0.5766657746855767, "grad_norm": 3.4358744621276855, "learning_rate": 9.883888666040876e-06, "loss": 1.1468, "step": 2155 }, { "epoch": 0.576933369012577, "grad_norm": 3.8814890384674072, "learning_rate": 9.88369898325392e-06, "loss": 1.2645, "step": 2156 }, { "epoch": 0.5772009633395772, "grad_norm": 3.75591778755188, "learning_rate": 9.883509147480883e-06, "loss": 1.2342, "step": 2157 }, { "epoch": 0.5774685576665775, "grad_norm": 3.7901089191436768, "learning_rate": 9.883319158727714e-06, "loss": 1.2423, "step": 2158 }, { "epoch": 0.5777361519935778, "grad_norm": 3.8552255630493164, "learning_rate": 9.88312901700036e-06, "loss": 1.2367, "step": 2159 }, { "epoch": 0.578003746320578, "grad_norm": 3.6209921836853027, "learning_rate": 9.882938722304785e-06, "loss": 1.0368, "step": 2160 }, { "epoch": 0.5782713406475782, "grad_norm": 3.403076171875, "learning_rate": 9.882748274646942e-06, "loss": 1.122, "step": 2161 }, { "epoch": 0.5785389349745785, "grad_norm": 3.6946861743927, "learning_rate": 9.882557674032804e-06, "loss": 1.2632, "step": 2162 }, { "epoch": 0.5788065293015788, "grad_norm": 3.478731393814087, "learning_rate": 9.882366920468336e-06, "loss": 1.1385, "step": 2163 }, { "epoch": 0.5790741236285791, "grad_norm": 3.849747896194458, "learning_rate": 9.882176013959517e-06, "loss": 1.1953, "step": 2164 }, { "epoch": 0.5793417179555793, "grad_norm": 3.2899606227874756, "learning_rate": 9.881984954512325e-06, "loss": 1.1515, "step": 2165 }, { "epoch": 0.5796093122825796, "grad_norm": 3.6500260829925537, "learning_rate": 9.881793742132748e-06, "loss": 1.0992, "step": 2166 }, { "epoch": 0.5798769066095799, "grad_norm": 3.4262735843658447, "learning_rate": 9.881602376826773e-06, "loss": 1.202, "step": 2167 }, { "epoch": 0.5801445009365801, "grad_norm": 3.7987382411956787, "learning_rate": 9.881410858600397e-06, "loss": 1.1983, "step": 2168 }, { "epoch": 0.5804120952635804, "grad_norm": 3.716843605041504, "learning_rate": 9.88121918745962e-06, "loss": 1.2688, "step": 2169 }, { "epoch": 0.5806796895905807, "grad_norm": 3.5449235439300537, "learning_rate": 9.881027363410441e-06, "loss": 1.1251, "step": 2170 }, { "epoch": 0.580947283917581, "grad_norm": 3.4094340801239014, "learning_rate": 9.880835386458873e-06, "loss": 1.1097, "step": 2171 }, { "epoch": 0.5812148782445812, "grad_norm": 3.626004934310913, "learning_rate": 9.880643256610931e-06, "loss": 1.2376, "step": 2172 }, { "epoch": 0.5814824725715815, "grad_norm": 3.6833388805389404, "learning_rate": 9.880450973872632e-06, "loss": 1.2113, "step": 2173 }, { "epoch": 0.5817500668985818, "grad_norm": 3.619957685470581, "learning_rate": 9.880258538250001e-06, "loss": 1.1476, "step": 2174 }, { "epoch": 0.5820176612255821, "grad_norm": 3.6567726135253906, "learning_rate": 9.880065949749063e-06, "loss": 1.1462, "step": 2175 }, { "epoch": 0.5822852555525823, "grad_norm": 3.6730329990386963, "learning_rate": 9.879873208375854e-06, "loss": 1.1644, "step": 2176 }, { "epoch": 0.5825528498795826, "grad_norm": 3.4919209480285645, "learning_rate": 9.879680314136409e-06, "loss": 1.1, "step": 2177 }, { "epoch": 0.5828204442065829, "grad_norm": 3.7565135955810547, "learning_rate": 9.879487267036774e-06, "loss": 1.176, "step": 2178 }, { "epoch": 0.583088038533583, "grad_norm": 3.6856677532196045, "learning_rate": 9.879294067082994e-06, "loss": 1.0928, "step": 2179 }, { "epoch": 0.5833556328605833, "grad_norm": 3.7307024002075195, "learning_rate": 9.87910071428112e-06, "loss": 1.2073, "step": 2180 }, { "epoch": 0.5836232271875836, "grad_norm": 3.657536745071411, "learning_rate": 9.878907208637214e-06, "loss": 1.1352, "step": 2181 }, { "epoch": 0.5838908215145839, "grad_norm": 3.54951810836792, "learning_rate": 9.878713550157331e-06, "loss": 1.1183, "step": 2182 }, { "epoch": 0.5841584158415841, "grad_norm": 3.6624770164489746, "learning_rate": 9.878519738847543e-06, "loss": 1.1002, "step": 2183 }, { "epoch": 0.5844260101685844, "grad_norm": 3.5097527503967285, "learning_rate": 9.87832577471392e-06, "loss": 1.2197, "step": 2184 }, { "epoch": 0.5846936044955847, "grad_norm": 3.292865037918091, "learning_rate": 9.878131657762535e-06, "loss": 1.0721, "step": 2185 }, { "epoch": 0.584961198822585, "grad_norm": 3.937479019165039, "learning_rate": 9.877937387999473e-06, "loss": 1.3144, "step": 2186 }, { "epoch": 0.5852287931495852, "grad_norm": 3.6731297969818115, "learning_rate": 9.877742965430816e-06, "loss": 1.0068, "step": 2187 }, { "epoch": 0.5854963874765855, "grad_norm": 3.6115329265594482, "learning_rate": 9.877548390062656e-06, "loss": 1.1998, "step": 2188 }, { "epoch": 0.5857639818035858, "grad_norm": 3.6412646770477295, "learning_rate": 9.87735366190109e-06, "loss": 1.1148, "step": 2189 }, { "epoch": 0.586031576130586, "grad_norm": 3.576279401779175, "learning_rate": 9.877158780952218e-06, "loss": 1.1437, "step": 2190 }, { "epoch": 0.5862991704575863, "grad_norm": 3.5560824871063232, "learning_rate": 9.876963747222142e-06, "loss": 1.1313, "step": 2191 }, { "epoch": 0.5865667647845866, "grad_norm": 3.5082075595855713, "learning_rate": 9.876768560716972e-06, "loss": 1.1694, "step": 2192 }, { "epoch": 0.5868343591115869, "grad_norm": 3.724195718765259, "learning_rate": 9.876573221442824e-06, "loss": 1.212, "step": 2193 }, { "epoch": 0.5871019534385871, "grad_norm": 3.5083227157592773, "learning_rate": 9.876377729405817e-06, "loss": 1.1469, "step": 2194 }, { "epoch": 0.5873695477655874, "grad_norm": 3.8225934505462646, "learning_rate": 9.876182084612076e-06, "loss": 1.2833, "step": 2195 }, { "epoch": 0.5876371420925877, "grad_norm": 4.439055919647217, "learning_rate": 9.875986287067726e-06, "loss": 1.2021, "step": 2196 }, { "epoch": 0.587904736419588, "grad_norm": 3.612614393234253, "learning_rate": 9.875790336778903e-06, "loss": 1.1595, "step": 2197 }, { "epoch": 0.5881723307465881, "grad_norm": 3.598160982131958, "learning_rate": 9.875594233751746e-06, "loss": 1.1245, "step": 2198 }, { "epoch": 0.5884399250735884, "grad_norm": 3.281412124633789, "learning_rate": 9.875397977992397e-06, "loss": 1.075, "step": 2199 }, { "epoch": 0.5887075194005887, "grad_norm": 3.0353622436523438, "learning_rate": 9.875201569507004e-06, "loss": 1.0529, "step": 2200 }, { "epoch": 0.5889751137275889, "grad_norm": 3.3993475437164307, "learning_rate": 9.875005008301719e-06, "loss": 1.2462, "step": 2201 }, { "epoch": 0.5892427080545892, "grad_norm": 3.3722541332244873, "learning_rate": 9.8748082943827e-06, "loss": 1.1508, "step": 2202 }, { "epoch": 0.5895103023815895, "grad_norm": 3.270134687423706, "learning_rate": 9.874611427756111e-06, "loss": 1.2163, "step": 2203 }, { "epoch": 0.5897778967085898, "grad_norm": 3.6814143657684326, "learning_rate": 9.874414408428116e-06, "loss": 1.2098, "step": 2204 }, { "epoch": 0.59004549103559, "grad_norm": 3.3593337535858154, "learning_rate": 9.874217236404889e-06, "loss": 1.1041, "step": 2205 }, { "epoch": 0.5903130853625903, "grad_norm": 3.556748628616333, "learning_rate": 9.874019911692606e-06, "loss": 1.1655, "step": 2206 }, { "epoch": 0.5905806796895906, "grad_norm": 3.696110486984253, "learning_rate": 9.873822434297448e-06, "loss": 1.0674, "step": 2207 }, { "epoch": 0.5908482740165909, "grad_norm": 3.481388807296753, "learning_rate": 9.873624804225602e-06, "loss": 1.1462, "step": 2208 }, { "epoch": 0.5911158683435911, "grad_norm": 3.828707695007324, "learning_rate": 9.873427021483256e-06, "loss": 1.1755, "step": 2209 }, { "epoch": 0.5913834626705914, "grad_norm": 3.4342329502105713, "learning_rate": 9.87322908607661e-06, "loss": 1.1296, "step": 2210 }, { "epoch": 0.5916510569975917, "grad_norm": 4.072646141052246, "learning_rate": 9.873030998011861e-06, "loss": 1.4232, "step": 2211 }, { "epoch": 0.5919186513245919, "grad_norm": 3.8405468463897705, "learning_rate": 9.872832757295216e-06, "loss": 1.2178, "step": 2212 }, { "epoch": 0.5921862456515922, "grad_norm": 3.6950206756591797, "learning_rate": 9.872634363932887e-06, "loss": 1.1332, "step": 2213 }, { "epoch": 0.5924538399785925, "grad_norm": 4.053956985473633, "learning_rate": 9.872435817931085e-06, "loss": 1.3148, "step": 2214 }, { "epoch": 0.5927214343055928, "grad_norm": 3.2921195030212402, "learning_rate": 9.87223711929603e-06, "loss": 1.0644, "step": 2215 }, { "epoch": 0.592989028632593, "grad_norm": 3.3384501934051514, "learning_rate": 9.87203826803395e-06, "loss": 1.1453, "step": 2216 }, { "epoch": 0.5932566229595932, "grad_norm": 3.292581558227539, "learning_rate": 9.871839264151071e-06, "loss": 1.0399, "step": 2217 }, { "epoch": 0.5935242172865935, "grad_norm": 3.5483226776123047, "learning_rate": 9.871640107653629e-06, "loss": 1.2065, "step": 2218 }, { "epoch": 0.5937918116135938, "grad_norm": 3.443068742752075, "learning_rate": 9.87144079854786e-06, "loss": 1.1305, "step": 2219 }, { "epoch": 0.594059405940594, "grad_norm": 3.495704412460327, "learning_rate": 9.871241336840009e-06, "loss": 1.1877, "step": 2220 }, { "epoch": 0.5943270002675943, "grad_norm": 3.619189739227295, "learning_rate": 9.871041722536326e-06, "loss": 1.1417, "step": 2221 }, { "epoch": 0.5945945945945946, "grad_norm": 3.7714147567749023, "learning_rate": 9.87084195564306e-06, "loss": 1.2656, "step": 2222 }, { "epoch": 0.5948621889215948, "grad_norm": 3.2320003509521484, "learning_rate": 9.870642036166474e-06, "loss": 0.9794, "step": 2223 }, { "epoch": 0.5951297832485951, "grad_norm": 3.6784067153930664, "learning_rate": 9.870441964112826e-06, "loss": 1.149, "step": 2224 }, { "epoch": 0.5953973775755954, "grad_norm": 3.8272829055786133, "learning_rate": 9.870241739488387e-06, "loss": 1.2293, "step": 2225 }, { "epoch": 0.5956649719025957, "grad_norm": 3.3917317390441895, "learning_rate": 9.870041362299428e-06, "loss": 1.0405, "step": 2226 }, { "epoch": 0.5959325662295959, "grad_norm": 3.6060194969177246, "learning_rate": 9.869840832552224e-06, "loss": 1.2424, "step": 2227 }, { "epoch": 0.5962001605565962, "grad_norm": 3.5458180904388428, "learning_rate": 9.86964015025306e-06, "loss": 1.2202, "step": 2228 }, { "epoch": 0.5964677548835965, "grad_norm": 3.5996251106262207, "learning_rate": 9.86943931540822e-06, "loss": 1.2383, "step": 2229 }, { "epoch": 0.5967353492105968, "grad_norm": 3.6928818225860596, "learning_rate": 9.869238328023996e-06, "loss": 1.0798, "step": 2230 }, { "epoch": 0.597002943537597, "grad_norm": 3.3863589763641357, "learning_rate": 9.869037188106684e-06, "loss": 1.0548, "step": 2231 }, { "epoch": 0.5972705378645973, "grad_norm": 3.744899272918701, "learning_rate": 9.868835895662588e-06, "loss": 1.1532, "step": 2232 }, { "epoch": 0.5975381321915976, "grad_norm": 4.080715656280518, "learning_rate": 9.868634450698009e-06, "loss": 1.2823, "step": 2233 }, { "epoch": 0.5978057265185978, "grad_norm": 4.020185947418213, "learning_rate": 9.868432853219259e-06, "loss": 1.3154, "step": 2234 }, { "epoch": 0.598073320845598, "grad_norm": 3.684755325317383, "learning_rate": 9.868231103232655e-06, "loss": 1.1825, "step": 2235 }, { "epoch": 0.5983409151725984, "grad_norm": 3.9021434783935547, "learning_rate": 9.868029200744515e-06, "loss": 1.3453, "step": 2236 }, { "epoch": 0.5986085094995987, "grad_norm": 3.224306344985962, "learning_rate": 9.867827145761164e-06, "loss": 1.1202, "step": 2237 }, { "epoch": 0.5988761038265988, "grad_norm": 3.318912982940674, "learning_rate": 9.86762493828893e-06, "loss": 1.0726, "step": 2238 }, { "epoch": 0.5991436981535991, "grad_norm": 3.5440762042999268, "learning_rate": 9.867422578334154e-06, "loss": 1.1485, "step": 2239 }, { "epoch": 0.5994112924805994, "grad_norm": 3.5095126628875732, "learning_rate": 9.867220065903167e-06, "loss": 1.1142, "step": 2240 }, { "epoch": 0.5996788868075997, "grad_norm": 3.494436502456665, "learning_rate": 9.867017401002316e-06, "loss": 1.0809, "step": 2241 }, { "epoch": 0.5999464811345999, "grad_norm": 3.985200881958008, "learning_rate": 9.86681458363795e-06, "loss": 1.1823, "step": 2242 }, { "epoch": 0.6002140754616002, "grad_norm": 3.566523313522339, "learning_rate": 9.866611613816425e-06, "loss": 1.2669, "step": 2243 }, { "epoch": 0.6004816697886005, "grad_norm": 3.53113055229187, "learning_rate": 9.866408491544095e-06, "loss": 1.0821, "step": 2244 }, { "epoch": 0.6007492641156007, "grad_norm": 3.8554863929748535, "learning_rate": 9.866205216827323e-06, "loss": 1.3485, "step": 2245 }, { "epoch": 0.601016858442601, "grad_norm": 3.4552130699157715, "learning_rate": 9.866001789672479e-06, "loss": 1.1277, "step": 2246 }, { "epoch": 0.6012844527696013, "grad_norm": 3.2801413536071777, "learning_rate": 9.865798210085935e-06, "loss": 1.0526, "step": 2247 }, { "epoch": 0.6015520470966016, "grad_norm": 3.6641762256622314, "learning_rate": 9.865594478074068e-06, "loss": 1.1389, "step": 2248 }, { "epoch": 0.6018196414236018, "grad_norm": 3.1713666915893555, "learning_rate": 9.865390593643261e-06, "loss": 0.9773, "step": 2249 }, { "epoch": 0.6020872357506021, "grad_norm": 3.2642340660095215, "learning_rate": 9.8651865567999e-06, "loss": 1.1162, "step": 2250 }, { "epoch": 0.6023548300776024, "grad_norm": 3.8581626415252686, "learning_rate": 9.864982367550375e-06, "loss": 1.2288, "step": 2251 }, { "epoch": 0.6026224244046027, "grad_norm": 3.619734525680542, "learning_rate": 9.864778025901086e-06, "loss": 1.1009, "step": 2252 }, { "epoch": 0.6028900187316029, "grad_norm": 3.6816861629486084, "learning_rate": 9.86457353185843e-06, "loss": 1.2656, "step": 2253 }, { "epoch": 0.6031576130586032, "grad_norm": 3.9430642127990723, "learning_rate": 9.864368885428816e-06, "loss": 1.2013, "step": 2254 }, { "epoch": 0.6034252073856035, "grad_norm": 3.3938138484954834, "learning_rate": 9.864164086618656e-06, "loss": 1.0831, "step": 2255 }, { "epoch": 0.6036928017126036, "grad_norm": 3.3266994953155518, "learning_rate": 9.863959135434361e-06, "loss": 1.1322, "step": 2256 }, { "epoch": 0.6039603960396039, "grad_norm": 3.3137824535369873, "learning_rate": 9.863754031882355e-06, "loss": 1.1232, "step": 2257 }, { "epoch": 0.6042279903666042, "grad_norm": 3.363191604614258, "learning_rate": 9.863548775969061e-06, "loss": 1.0118, "step": 2258 }, { "epoch": 0.6044955846936045, "grad_norm": 3.245950222015381, "learning_rate": 9.863343367700909e-06, "loss": 1.0168, "step": 2259 }, { "epoch": 0.6047631790206047, "grad_norm": 3.398611545562744, "learning_rate": 9.863137807084336e-06, "loss": 1.1561, "step": 2260 }, { "epoch": 0.605030773347605, "grad_norm": 3.793672800064087, "learning_rate": 9.862932094125778e-06, "loss": 1.0614, "step": 2261 }, { "epoch": 0.6052983676746053, "grad_norm": 3.716275691986084, "learning_rate": 9.86272622883168e-06, "loss": 1.2033, "step": 2262 }, { "epoch": 0.6055659620016056, "grad_norm": 3.418994903564453, "learning_rate": 9.862520211208493e-06, "loss": 1.1246, "step": 2263 }, { "epoch": 0.6058335563286058, "grad_norm": 3.4987545013427734, "learning_rate": 9.862314041262668e-06, "loss": 1.1269, "step": 2264 }, { "epoch": 0.6061011506556061, "grad_norm": 3.595693826675415, "learning_rate": 9.862107719000667e-06, "loss": 1.1729, "step": 2265 }, { "epoch": 0.6063687449826064, "grad_norm": 3.5446066856384277, "learning_rate": 9.861901244428949e-06, "loss": 1.1141, "step": 2266 }, { "epoch": 0.6066363393096066, "grad_norm": 3.108658790588379, "learning_rate": 9.861694617553983e-06, "loss": 1.0365, "step": 2267 }, { "epoch": 0.6069039336366069, "grad_norm": 3.6176912784576416, "learning_rate": 9.861487838382244e-06, "loss": 1.182, "step": 2268 }, { "epoch": 0.6071715279636072, "grad_norm": 3.7221384048461914, "learning_rate": 9.861280906920208e-06, "loss": 1.0479, "step": 2269 }, { "epoch": 0.6074391222906075, "grad_norm": 3.526144504547119, "learning_rate": 9.861073823174357e-06, "loss": 1.0778, "step": 2270 }, { "epoch": 0.6077067166176077, "grad_norm": 3.49381160736084, "learning_rate": 9.86086658715118e-06, "loss": 1.0654, "step": 2271 }, { "epoch": 0.607974310944608, "grad_norm": 3.485805034637451, "learning_rate": 9.860659198857166e-06, "loss": 1.138, "step": 2272 }, { "epoch": 0.6082419052716083, "grad_norm": 3.3944783210754395, "learning_rate": 9.860451658298813e-06, "loss": 1.1153, "step": 2273 }, { "epoch": 0.6085094995986086, "grad_norm": 3.5149385929107666, "learning_rate": 9.860243965482623e-06, "loss": 1.1654, "step": 2274 }, { "epoch": 0.6087770939256087, "grad_norm": 3.7925617694854736, "learning_rate": 9.860036120415102e-06, "loss": 1.2223, "step": 2275 }, { "epoch": 0.609044688252609, "grad_norm": 3.524855852127075, "learning_rate": 9.859828123102759e-06, "loss": 1.0867, "step": 2276 }, { "epoch": 0.6093122825796093, "grad_norm": 3.534085750579834, "learning_rate": 9.859619973552112e-06, "loss": 1.0719, "step": 2277 }, { "epoch": 0.6095798769066095, "grad_norm": 3.5767481327056885, "learning_rate": 9.859411671769682e-06, "loss": 1.2826, "step": 2278 }, { "epoch": 0.6098474712336098, "grad_norm": 3.26108980178833, "learning_rate": 9.859203217761993e-06, "loss": 1.0839, "step": 2279 }, { "epoch": 0.6101150655606101, "grad_norm": 3.6995849609375, "learning_rate": 9.858994611535572e-06, "loss": 1.2193, "step": 2280 }, { "epoch": 0.6103826598876104, "grad_norm": 3.7640321254730225, "learning_rate": 9.858785853096958e-06, "loss": 1.2932, "step": 2281 }, { "epoch": 0.6106502542146106, "grad_norm": 3.795732021331787, "learning_rate": 9.85857694245269e-06, "loss": 1.3135, "step": 2282 }, { "epoch": 0.6109178485416109, "grad_norm": 3.552950620651245, "learning_rate": 9.858367879609311e-06, "loss": 1.106, "step": 2283 }, { "epoch": 0.6111854428686112, "grad_norm": 3.506056547164917, "learning_rate": 9.85815866457337e-06, "loss": 1.0596, "step": 2284 }, { "epoch": 0.6114530371956115, "grad_norm": 3.822715997695923, "learning_rate": 9.857949297351423e-06, "loss": 1.1044, "step": 2285 }, { "epoch": 0.6117206315226117, "grad_norm": 3.263763427734375, "learning_rate": 9.857739777950026e-06, "loss": 1.0387, "step": 2286 }, { "epoch": 0.611988225849612, "grad_norm": 3.378865957260132, "learning_rate": 9.857530106375743e-06, "loss": 1.0867, "step": 2287 }, { "epoch": 0.6122558201766123, "grad_norm": 3.8504269123077393, "learning_rate": 9.857320282635143e-06, "loss": 1.2017, "step": 2288 }, { "epoch": 0.6125234145036125, "grad_norm": 3.375674247741699, "learning_rate": 9.857110306734798e-06, "loss": 1.0055, "step": 2289 }, { "epoch": 0.6127910088306128, "grad_norm": 3.5643208026885986, "learning_rate": 9.856900178681287e-06, "loss": 1.2542, "step": 2290 }, { "epoch": 0.6130586031576131, "grad_norm": 3.1768534183502197, "learning_rate": 9.856689898481191e-06, "loss": 0.9672, "step": 2291 }, { "epoch": 0.6133261974846134, "grad_norm": 3.2512409687042236, "learning_rate": 9.856479466141098e-06, "loss": 1.065, "step": 2292 }, { "epoch": 0.6135937918116136, "grad_norm": 3.486975908279419, "learning_rate": 9.8562688816676e-06, "loss": 1.2013, "step": 2293 }, { "epoch": 0.6138613861386139, "grad_norm": 3.7750918865203857, "learning_rate": 9.856058145067293e-06, "loss": 1.2465, "step": 2294 }, { "epoch": 0.6141289804656141, "grad_norm": 4.267007827758789, "learning_rate": 9.85584725634678e-06, "loss": 1.2918, "step": 2295 }, { "epoch": 0.6143965747926144, "grad_norm": 3.3109710216522217, "learning_rate": 9.855636215512666e-06, "loss": 1.0852, "step": 2296 }, { "epoch": 0.6146641691196146, "grad_norm": 3.4727590084075928, "learning_rate": 9.85542502257156e-06, "loss": 1.1332, "step": 2297 }, { "epoch": 0.6149317634466149, "grad_norm": 3.525007724761963, "learning_rate": 9.855213677530083e-06, "loss": 1.1852, "step": 2298 }, { "epoch": 0.6151993577736152, "grad_norm": 4.421526908874512, "learning_rate": 9.85500218039485e-06, "loss": 1.3733, "step": 2299 }, { "epoch": 0.6154669521006154, "grad_norm": 3.6387100219726562, "learning_rate": 9.854790531172491e-06, "loss": 1.1027, "step": 2300 }, { "epoch": 0.6157345464276157, "grad_norm": 3.6601171493530273, "learning_rate": 9.854578729869634e-06, "loss": 1.1533, "step": 2301 }, { "epoch": 0.616002140754616, "grad_norm": 3.661722183227539, "learning_rate": 9.854366776492915e-06, "loss": 1.0665, "step": 2302 }, { "epoch": 0.6162697350816163, "grad_norm": 3.5786993503570557, "learning_rate": 9.85415467104897e-06, "loss": 1.2227, "step": 2303 }, { "epoch": 0.6165373294086165, "grad_norm": 3.527582883834839, "learning_rate": 9.853942413544448e-06, "loss": 1.2771, "step": 2304 }, { "epoch": 0.6168049237356168, "grad_norm": 3.9386675357818604, "learning_rate": 9.853730003985995e-06, "loss": 1.2679, "step": 2305 }, { "epoch": 0.6170725180626171, "grad_norm": 3.3264570236206055, "learning_rate": 9.853517442380266e-06, "loss": 1.093, "step": 2306 }, { "epoch": 0.6173401123896174, "grad_norm": 3.631671905517578, "learning_rate": 9.85330472873392e-06, "loss": 1.1611, "step": 2307 }, { "epoch": 0.6176077067166176, "grad_norm": 3.9412624835968018, "learning_rate": 9.853091863053621e-06, "loss": 1.198, "step": 2308 }, { "epoch": 0.6178753010436179, "grad_norm": 3.4055187702178955, "learning_rate": 9.852878845346035e-06, "loss": 1.0783, "step": 2309 }, { "epoch": 0.6181428953706182, "grad_norm": 3.639285087585449, "learning_rate": 9.852665675617837e-06, "loss": 1.2475, "step": 2310 }, { "epoch": 0.6184104896976184, "grad_norm": 3.5802559852600098, "learning_rate": 9.852452353875705e-06, "loss": 1.1369, "step": 2311 }, { "epoch": 0.6186780840246187, "grad_norm": 3.259661912918091, "learning_rate": 9.852238880126319e-06, "loss": 1.0025, "step": 2312 }, { "epoch": 0.618945678351619, "grad_norm": 3.9171831607818604, "learning_rate": 9.852025254376367e-06, "loss": 1.2405, "step": 2313 }, { "epoch": 0.6192132726786193, "grad_norm": 3.7371790409088135, "learning_rate": 9.851811476632544e-06, "loss": 1.2399, "step": 2314 }, { "epoch": 0.6194808670056194, "grad_norm": 3.9764063358306885, "learning_rate": 9.851597546901543e-06, "loss": 1.3006, "step": 2315 }, { "epoch": 0.6197484613326197, "grad_norm": 3.6764659881591797, "learning_rate": 9.851383465190068e-06, "loss": 1.1916, "step": 2316 }, { "epoch": 0.62001605565962, "grad_norm": 3.4635825157165527, "learning_rate": 9.851169231504825e-06, "loss": 1.0243, "step": 2317 }, { "epoch": 0.6202836499866203, "grad_norm": 3.5511868000030518, "learning_rate": 9.850954845852522e-06, "loss": 1.1825, "step": 2318 }, { "epoch": 0.6205512443136205, "grad_norm": 3.948732376098633, "learning_rate": 9.85074030823988e-06, "loss": 1.3428, "step": 2319 }, { "epoch": 0.6208188386406208, "grad_norm": 3.748976469039917, "learning_rate": 9.850525618673615e-06, "loss": 1.2, "step": 2320 }, { "epoch": 0.6210864329676211, "grad_norm": 3.6761586666107178, "learning_rate": 9.850310777160454e-06, "loss": 1.2541, "step": 2321 }, { "epoch": 0.6213540272946213, "grad_norm": 3.328855514526367, "learning_rate": 9.85009578370713e-06, "loss": 1.0451, "step": 2322 }, { "epoch": 0.6216216216216216, "grad_norm": 3.2399799823760986, "learning_rate": 9.849880638320372e-06, "loss": 1.0936, "step": 2323 }, { "epoch": 0.6218892159486219, "grad_norm": 3.481745481491089, "learning_rate": 9.849665341006924e-06, "loss": 1.2136, "step": 2324 }, { "epoch": 0.6221568102756222, "grad_norm": 3.219832181930542, "learning_rate": 9.849449891773529e-06, "loss": 1.059, "step": 2325 }, { "epoch": 0.6224244046026224, "grad_norm": 3.4119327068328857, "learning_rate": 9.849234290626937e-06, "loss": 1.0072, "step": 2326 }, { "epoch": 0.6226919989296227, "grad_norm": 3.2931737899780273, "learning_rate": 9.8490185375739e-06, "loss": 1.169, "step": 2327 }, { "epoch": 0.622959593256623, "grad_norm": 4.0458760261535645, "learning_rate": 9.848802632621177e-06, "loss": 1.2028, "step": 2328 }, { "epoch": 0.6232271875836233, "grad_norm": 3.2181153297424316, "learning_rate": 9.848586575775534e-06, "loss": 0.9779, "step": 2329 }, { "epoch": 0.6234947819106235, "grad_norm": 3.359768867492676, "learning_rate": 9.848370367043737e-06, "loss": 1.0074, "step": 2330 }, { "epoch": 0.6237623762376238, "grad_norm": 3.5515081882476807, "learning_rate": 9.848154006432559e-06, "loss": 1.0557, "step": 2331 }, { "epoch": 0.6240299705646241, "grad_norm": 4.038802623748779, "learning_rate": 9.847937493948778e-06, "loss": 1.1691, "step": 2332 }, { "epoch": 0.6242975648916242, "grad_norm": 3.4252140522003174, "learning_rate": 9.847720829599177e-06, "loss": 1.0728, "step": 2333 }, { "epoch": 0.6245651592186245, "grad_norm": 3.5178418159484863, "learning_rate": 9.847504013390542e-06, "loss": 1.0433, "step": 2334 }, { "epoch": 0.6248327535456248, "grad_norm": 4.008810043334961, "learning_rate": 9.847287045329665e-06, "loss": 1.2534, "step": 2335 }, { "epoch": 0.6251003478726251, "grad_norm": 3.4519779682159424, "learning_rate": 9.847069925423342e-06, "loss": 1.2137, "step": 2336 }, { "epoch": 0.6253679421996253, "grad_norm": 3.9247629642486572, "learning_rate": 9.846852653678377e-06, "loss": 1.0946, "step": 2337 }, { "epoch": 0.6256355365266256, "grad_norm": 3.3218302726745605, "learning_rate": 9.846635230101578e-06, "loss": 0.992, "step": 2338 }, { "epoch": 0.6259031308536259, "grad_norm": 3.259517192840576, "learning_rate": 9.846417654699748e-06, "loss": 1.025, "step": 2339 }, { "epoch": 0.6261707251806262, "grad_norm": 3.9205453395843506, "learning_rate": 9.846199927479711e-06, "loss": 1.2215, "step": 2340 }, { "epoch": 0.6264383195076264, "grad_norm": 3.4169704914093018, "learning_rate": 9.845982048448283e-06, "loss": 1.0521, "step": 2341 }, { "epoch": 0.6267059138346267, "grad_norm": 3.2617716789245605, "learning_rate": 9.845764017612291e-06, "loss": 1.0927, "step": 2342 }, { "epoch": 0.626973508161627, "grad_norm": 3.432112455368042, "learning_rate": 9.845545834978565e-06, "loss": 1.0838, "step": 2343 }, { "epoch": 0.6272411024886272, "grad_norm": 3.6730408668518066, "learning_rate": 9.845327500553938e-06, "loss": 1.1048, "step": 2344 }, { "epoch": 0.6275086968156275, "grad_norm": 3.4062979221343994, "learning_rate": 9.845109014345251e-06, "loss": 1.1069, "step": 2345 }, { "epoch": 0.6277762911426278, "grad_norm": 3.237093687057495, "learning_rate": 9.844890376359348e-06, "loss": 1.1357, "step": 2346 }, { "epoch": 0.6280438854696281, "grad_norm": 3.722663640975952, "learning_rate": 9.844671586603079e-06, "loss": 1.2362, "step": 2347 }, { "epoch": 0.6283114797966283, "grad_norm": 3.7158944606781006, "learning_rate": 9.844452645083295e-06, "loss": 1.2066, "step": 2348 }, { "epoch": 0.6285790741236286, "grad_norm": 3.6207492351531982, "learning_rate": 9.844233551806857e-06, "loss": 1.1971, "step": 2349 }, { "epoch": 0.6288466684506289, "grad_norm": 3.799163579940796, "learning_rate": 9.844014306780627e-06, "loss": 1.1569, "step": 2350 }, { "epoch": 0.6291142627776292, "grad_norm": 3.326672077178955, "learning_rate": 9.843794910011476e-06, "loss": 1.0336, "step": 2351 }, { "epoch": 0.6293818571046293, "grad_norm": 3.5804383754730225, "learning_rate": 9.84357536150627e-06, "loss": 1.2486, "step": 2352 }, { "epoch": 0.6296494514316296, "grad_norm": 3.147380828857422, "learning_rate": 9.843355661271895e-06, "loss": 1.0599, "step": 2353 }, { "epoch": 0.62991704575863, "grad_norm": 3.6518685817718506, "learning_rate": 9.843135809315227e-06, "loss": 1.195, "step": 2354 }, { "epoch": 0.6301846400856301, "grad_norm": 3.393224000930786, "learning_rate": 9.842915805643156e-06, "loss": 1.1262, "step": 2355 }, { "epoch": 0.6304522344126304, "grad_norm": 3.6997387409210205, "learning_rate": 9.842695650262573e-06, "loss": 1.1872, "step": 2356 }, { "epoch": 0.6307198287396307, "grad_norm": 3.419063091278076, "learning_rate": 9.842475343180375e-06, "loss": 1.2947, "step": 2357 }, { "epoch": 0.630987423066631, "grad_norm": 3.517101764678955, "learning_rate": 9.842254884403463e-06, "loss": 1.2461, "step": 2358 }, { "epoch": 0.6312550173936312, "grad_norm": 3.4831290245056152, "learning_rate": 9.842034273938744e-06, "loss": 1.153, "step": 2359 }, { "epoch": 0.6315226117206315, "grad_norm": 3.965106248855591, "learning_rate": 9.841813511793126e-06, "loss": 1.2851, "step": 2360 }, { "epoch": 0.6317902060476318, "grad_norm": 2.9913620948791504, "learning_rate": 9.841592597973528e-06, "loss": 1.1356, "step": 2361 }, { "epoch": 0.6320578003746321, "grad_norm": 3.26570463180542, "learning_rate": 9.841371532486867e-06, "loss": 1.1497, "step": 2362 }, { "epoch": 0.6323253947016323, "grad_norm": 3.5169339179992676, "learning_rate": 9.841150315340071e-06, "loss": 1.1598, "step": 2363 }, { "epoch": 0.6325929890286326, "grad_norm": 3.4498212337493896, "learning_rate": 9.84092894654007e-06, "loss": 1.1632, "step": 2364 }, { "epoch": 0.6328605833556329, "grad_norm": 4.316896915435791, "learning_rate": 9.840707426093795e-06, "loss": 1.2331, "step": 2365 }, { "epoch": 0.6331281776826331, "grad_norm": 3.5566680431365967, "learning_rate": 9.840485754008188e-06, "loss": 1.1958, "step": 2366 }, { "epoch": 0.6333957720096334, "grad_norm": 3.55718994140625, "learning_rate": 9.840263930290192e-06, "loss": 1.1707, "step": 2367 }, { "epoch": 0.6336633663366337, "grad_norm": 3.3516623973846436, "learning_rate": 9.840041954946757e-06, "loss": 1.1279, "step": 2368 }, { "epoch": 0.633930960663634, "grad_norm": 3.7608842849731445, "learning_rate": 9.839819827984835e-06, "loss": 1.0901, "step": 2369 }, { "epoch": 0.6341985549906342, "grad_norm": 3.342604637145996, "learning_rate": 9.839597549411389e-06, "loss": 1.1313, "step": 2370 }, { "epoch": 0.6344661493176345, "grad_norm": 3.692324161529541, "learning_rate": 9.839375119233375e-06, "loss": 1.1783, "step": 2371 }, { "epoch": 0.6347337436446348, "grad_norm": 3.4589786529541016, "learning_rate": 9.839152537457764e-06, "loss": 1.0795, "step": 2372 }, { "epoch": 0.635001337971635, "grad_norm": 3.76045560836792, "learning_rate": 9.83892980409153e-06, "loss": 1.2872, "step": 2373 }, { "epoch": 0.6352689322986352, "grad_norm": 3.486509323120117, "learning_rate": 9.838706919141649e-06, "loss": 0.9929, "step": 2374 }, { "epoch": 0.6355365266256355, "grad_norm": 3.1999824047088623, "learning_rate": 9.838483882615101e-06, "loss": 1.1086, "step": 2375 }, { "epoch": 0.6358041209526358, "grad_norm": 3.3866939544677734, "learning_rate": 9.838260694518877e-06, "loss": 1.1782, "step": 2376 }, { "epoch": 0.636071715279636, "grad_norm": 3.8350670337677, "learning_rate": 9.838037354859967e-06, "loss": 1.2023, "step": 2377 }, { "epoch": 0.6363393096066363, "grad_norm": 3.401334762573242, "learning_rate": 9.837813863645367e-06, "loss": 1.2159, "step": 2378 }, { "epoch": 0.6366069039336366, "grad_norm": 3.399458646774292, "learning_rate": 9.837590220882076e-06, "loss": 1.03, "step": 2379 }, { "epoch": 0.6368744982606369, "grad_norm": 3.1889894008636475, "learning_rate": 9.837366426577102e-06, "loss": 1.0268, "step": 2380 }, { "epoch": 0.6371420925876371, "grad_norm": 3.411510467529297, "learning_rate": 9.837142480737457e-06, "loss": 1.1182, "step": 2381 }, { "epoch": 0.6374096869146374, "grad_norm": 3.746042251586914, "learning_rate": 9.836918383370153e-06, "loss": 1.1736, "step": 2382 }, { "epoch": 0.6376772812416377, "grad_norm": 3.678807020187378, "learning_rate": 9.836694134482212e-06, "loss": 1.1744, "step": 2383 }, { "epoch": 0.637944875568638, "grad_norm": 3.2534291744232178, "learning_rate": 9.836469734080658e-06, "loss": 0.9784, "step": 2384 }, { "epoch": 0.6382124698956382, "grad_norm": 3.305079221725464, "learning_rate": 9.83624518217252e-06, "loss": 1.1002, "step": 2385 }, { "epoch": 0.6384800642226385, "grad_norm": 3.6762077808380127, "learning_rate": 9.836020478764835e-06, "loss": 1.0991, "step": 2386 }, { "epoch": 0.6387476585496388, "grad_norm": 3.3923799991607666, "learning_rate": 9.83579562386464e-06, "loss": 1.123, "step": 2387 }, { "epoch": 0.639015252876639, "grad_norm": 3.8256936073303223, "learning_rate": 9.835570617478976e-06, "loss": 1.1498, "step": 2388 }, { "epoch": 0.6392828472036393, "grad_norm": 3.488901138305664, "learning_rate": 9.835345459614897e-06, "loss": 1.19, "step": 2389 }, { "epoch": 0.6395504415306396, "grad_norm": 3.408535957336426, "learning_rate": 9.835120150279454e-06, "loss": 1.1097, "step": 2390 }, { "epoch": 0.6398180358576399, "grad_norm": 3.648115634918213, "learning_rate": 9.834894689479703e-06, "loss": 1.0789, "step": 2391 }, { "epoch": 0.64008563018464, "grad_norm": 3.6117544174194336, "learning_rate": 9.83466907722271e-06, "loss": 1.256, "step": 2392 }, { "epoch": 0.6403532245116403, "grad_norm": 3.7180707454681396, "learning_rate": 9.834443313515542e-06, "loss": 1.1885, "step": 2393 }, { "epoch": 0.6406208188386406, "grad_norm": 5.158202648162842, "learning_rate": 9.834217398365268e-06, "loss": 1.1757, "step": 2394 }, { "epoch": 0.6408884131656409, "grad_norm": 3.770582914352417, "learning_rate": 9.83399133177897e-06, "loss": 1.2923, "step": 2395 }, { "epoch": 0.6411560074926411, "grad_norm": 3.824382781982422, "learning_rate": 9.833765113763723e-06, "loss": 1.2032, "step": 2396 }, { "epoch": 0.6414236018196414, "grad_norm": 3.29740309715271, "learning_rate": 9.83353874432662e-06, "loss": 1.2306, "step": 2397 }, { "epoch": 0.6416911961466417, "grad_norm": 3.3349862098693848, "learning_rate": 9.83331222347475e-06, "loss": 1.0847, "step": 2398 }, { "epoch": 0.6419587904736419, "grad_norm": 3.7271625995635986, "learning_rate": 9.833085551215206e-06, "loss": 1.252, "step": 2399 }, { "epoch": 0.6422263848006422, "grad_norm": 3.7548937797546387, "learning_rate": 9.832858727555095e-06, "loss": 1.1225, "step": 2400 }, { "epoch": 0.6424939791276425, "grad_norm": 3.6367075443267822, "learning_rate": 9.832631752501515e-06, "loss": 1.1896, "step": 2401 }, { "epoch": 0.6427615734546428, "grad_norm": 3.585908889770508, "learning_rate": 9.832404626061582e-06, "loss": 1.24, "step": 2402 }, { "epoch": 0.643029167781643, "grad_norm": 3.4509429931640625, "learning_rate": 9.832177348242408e-06, "loss": 1.1011, "step": 2403 }, { "epoch": 0.6432967621086433, "grad_norm": 3.6890709400177, "learning_rate": 9.831949919051116e-06, "loss": 1.1894, "step": 2404 }, { "epoch": 0.6435643564356436, "grad_norm": 3.348698139190674, "learning_rate": 9.831722338494826e-06, "loss": 1.2294, "step": 2405 }, { "epoch": 0.6438319507626439, "grad_norm": 3.424172878265381, "learning_rate": 9.831494606580669e-06, "loss": 1.0647, "step": 2406 }, { "epoch": 0.6440995450896441, "grad_norm": 3.4821624755859375, "learning_rate": 9.83126672331578e-06, "loss": 1.124, "step": 2407 }, { "epoch": 0.6443671394166444, "grad_norm": 3.271749973297119, "learning_rate": 9.831038688707296e-06, "loss": 1.0989, "step": 2408 }, { "epoch": 0.6446347337436447, "grad_norm": 3.6748054027557373, "learning_rate": 9.83081050276236e-06, "loss": 1.1704, "step": 2409 }, { "epoch": 0.6449023280706448, "grad_norm": 3.3000192642211914, "learning_rate": 9.830582165488123e-06, "loss": 1.1656, "step": 2410 }, { "epoch": 0.6451699223976451, "grad_norm": 4.096604824066162, "learning_rate": 9.830353676891736e-06, "loss": 1.1799, "step": 2411 }, { "epoch": 0.6454375167246454, "grad_norm": 3.337603807449341, "learning_rate": 9.830125036980353e-06, "loss": 1.1693, "step": 2412 }, { "epoch": 0.6457051110516457, "grad_norm": 3.6105048656463623, "learning_rate": 9.829896245761144e-06, "loss": 1.2169, "step": 2413 }, { "epoch": 0.6459727053786459, "grad_norm": 3.29010272026062, "learning_rate": 9.829667303241271e-06, "loss": 1.0089, "step": 2414 }, { "epoch": 0.6462402997056462, "grad_norm": 3.5054385662078857, "learning_rate": 9.829438209427907e-06, "loss": 1.105, "step": 2415 }, { "epoch": 0.6465078940326465, "grad_norm": 3.4805397987365723, "learning_rate": 9.829208964328228e-06, "loss": 1.0914, "step": 2416 }, { "epoch": 0.6467754883596468, "grad_norm": 3.1424105167388916, "learning_rate": 9.828979567949416e-06, "loss": 1.0573, "step": 2417 }, { "epoch": 0.647043082686647, "grad_norm": 4.121860980987549, "learning_rate": 9.828750020298656e-06, "loss": 1.1732, "step": 2418 }, { "epoch": 0.6473106770136473, "grad_norm": 3.2964742183685303, "learning_rate": 9.828520321383142e-06, "loss": 1.1536, "step": 2419 }, { "epoch": 0.6475782713406476, "grad_norm": 3.4967031478881836, "learning_rate": 9.828290471210064e-06, "loss": 1.1049, "step": 2420 }, { "epoch": 0.6478458656676478, "grad_norm": 3.3950541019439697, "learning_rate": 9.828060469786626e-06, "loss": 1.151, "step": 2421 }, { "epoch": 0.6481134599946481, "grad_norm": 3.585238218307495, "learning_rate": 9.827830317120033e-06, "loss": 1.1172, "step": 2422 }, { "epoch": 0.6483810543216484, "grad_norm": 2.9747002124786377, "learning_rate": 9.827600013217496e-06, "loss": 0.9499, "step": 2423 }, { "epoch": 0.6486486486486487, "grad_norm": 3.2427027225494385, "learning_rate": 9.827369558086225e-06, "loss": 1.0767, "step": 2424 }, { "epoch": 0.6489162429756489, "grad_norm": 3.419710874557495, "learning_rate": 9.827138951733441e-06, "loss": 1.1198, "step": 2425 }, { "epoch": 0.6491838373026492, "grad_norm": 3.5654327869415283, "learning_rate": 9.82690819416637e-06, "loss": 1.1684, "step": 2426 }, { "epoch": 0.6494514316296495, "grad_norm": 3.414553642272949, "learning_rate": 9.826677285392238e-06, "loss": 1.1018, "step": 2427 }, { "epoch": 0.6497190259566498, "grad_norm": 3.366098642349243, "learning_rate": 9.826446225418282e-06, "loss": 1.0191, "step": 2428 }, { "epoch": 0.64998662028365, "grad_norm": 3.913783311843872, "learning_rate": 9.826215014251738e-06, "loss": 1.235, "step": 2429 }, { "epoch": 0.6502542146106502, "grad_norm": 4.037808418273926, "learning_rate": 9.825983651899847e-06, "loss": 1.2542, "step": 2430 }, { "epoch": 0.6505218089376505, "grad_norm": 2.9834325313568115, "learning_rate": 9.82575213836986e-06, "loss": 0.967, "step": 2431 }, { "epoch": 0.6507894032646507, "grad_norm": 3.3896093368530273, "learning_rate": 9.825520473669026e-06, "loss": 1.1163, "step": 2432 }, { "epoch": 0.651056997591651, "grad_norm": 3.965498685836792, "learning_rate": 9.825288657804606e-06, "loss": 1.2024, "step": 2433 }, { "epoch": 0.6513245919186513, "grad_norm": 3.836982011795044, "learning_rate": 9.825056690783859e-06, "loss": 1.2839, "step": 2434 }, { "epoch": 0.6515921862456516, "grad_norm": 4.303612232208252, "learning_rate": 9.82482457261405e-06, "loss": 1.1528, "step": 2435 }, { "epoch": 0.6518597805726518, "grad_norm": 3.613075017929077, "learning_rate": 9.824592303302455e-06, "loss": 1.1773, "step": 2436 }, { "epoch": 0.6521273748996521, "grad_norm": 3.2512998580932617, "learning_rate": 9.824359882856347e-06, "loss": 1.0795, "step": 2437 }, { "epoch": 0.6523949692266524, "grad_norm": 3.6601617336273193, "learning_rate": 9.824127311283007e-06, "loss": 1.1032, "step": 2438 }, { "epoch": 0.6526625635536527, "grad_norm": 3.5576727390289307, "learning_rate": 9.823894588589722e-06, "loss": 1.1383, "step": 2439 }, { "epoch": 0.6529301578806529, "grad_norm": 3.50748610496521, "learning_rate": 9.823661714783781e-06, "loss": 1.2066, "step": 2440 }, { "epoch": 0.6531977522076532, "grad_norm": 3.7736473083496094, "learning_rate": 9.823428689872479e-06, "loss": 1.2547, "step": 2441 }, { "epoch": 0.6534653465346535, "grad_norm": 3.476040840148926, "learning_rate": 9.823195513863114e-06, "loss": 1.1075, "step": 2442 }, { "epoch": 0.6537329408616537, "grad_norm": 3.444315195083618, "learning_rate": 9.822962186762994e-06, "loss": 1.1135, "step": 2443 }, { "epoch": 0.654000535188654, "grad_norm": 3.9835290908813477, "learning_rate": 9.822728708579425e-06, "loss": 1.1706, "step": 2444 }, { "epoch": 0.6542681295156543, "grad_norm": 3.669281482696533, "learning_rate": 9.822495079319725e-06, "loss": 1.1828, "step": 2445 }, { "epoch": 0.6545357238426546, "grad_norm": 3.555455446243286, "learning_rate": 9.822261298991208e-06, "loss": 1.1348, "step": 2446 }, { "epoch": 0.6548033181696548, "grad_norm": 3.5849578380584717, "learning_rate": 9.822027367601199e-06, "loss": 1.1241, "step": 2447 }, { "epoch": 0.6550709124966551, "grad_norm": 3.71714186668396, "learning_rate": 9.821793285157027e-06, "loss": 1.255, "step": 2448 }, { "epoch": 0.6553385068236554, "grad_norm": 3.6075050830841064, "learning_rate": 9.821559051666025e-06, "loss": 1.1514, "step": 2449 }, { "epoch": 0.6556061011506557, "grad_norm": 3.3877387046813965, "learning_rate": 9.82132466713553e-06, "loss": 1.1232, "step": 2450 }, { "epoch": 0.6558736954776558, "grad_norm": 3.499657154083252, "learning_rate": 9.821090131572883e-06, "loss": 1.1694, "step": 2451 }, { "epoch": 0.6561412898046561, "grad_norm": 3.8426098823547363, "learning_rate": 9.820855444985433e-06, "loss": 1.2109, "step": 2452 }, { "epoch": 0.6564088841316564, "grad_norm": 3.5373287200927734, "learning_rate": 9.82062060738053e-06, "loss": 1.0852, "step": 2453 }, { "epoch": 0.6566764784586567, "grad_norm": 3.0332095623016357, "learning_rate": 9.820385618765532e-06, "loss": 1.0035, "step": 2454 }, { "epoch": 0.6569440727856569, "grad_norm": 3.5709455013275146, "learning_rate": 9.8201504791478e-06, "loss": 1.1012, "step": 2455 }, { "epoch": 0.6572116671126572, "grad_norm": 3.4466726779937744, "learning_rate": 9.819915188534699e-06, "loss": 1.1192, "step": 2456 }, { "epoch": 0.6574792614396575, "grad_norm": 3.5553793907165527, "learning_rate": 9.8196797469336e-06, "loss": 1.2776, "step": 2457 }, { "epoch": 0.6577468557666577, "grad_norm": 3.3105359077453613, "learning_rate": 9.81944415435188e-06, "loss": 1.1082, "step": 2458 }, { "epoch": 0.658014450093658, "grad_norm": 3.7504870891571045, "learning_rate": 9.819208410796916e-06, "loss": 1.0523, "step": 2459 }, { "epoch": 0.6582820444206583, "grad_norm": 3.4031195640563965, "learning_rate": 9.818972516276096e-06, "loss": 1.1827, "step": 2460 }, { "epoch": 0.6585496387476586, "grad_norm": 3.87593412399292, "learning_rate": 9.818736470796807e-06, "loss": 1.1583, "step": 2461 }, { "epoch": 0.6588172330746588, "grad_norm": 3.425092935562134, "learning_rate": 9.818500274366448e-06, "loss": 1.0955, "step": 2462 }, { "epoch": 0.6590848274016591, "grad_norm": 3.820794105529785, "learning_rate": 9.818263926992411e-06, "loss": 1.2023, "step": 2463 }, { "epoch": 0.6593524217286594, "grad_norm": 3.5655276775360107, "learning_rate": 9.818027428682104e-06, "loss": 1.1085, "step": 2464 }, { "epoch": 0.6596200160556597, "grad_norm": 3.5070512294769287, "learning_rate": 9.817790779442937e-06, "loss": 1.3138, "step": 2465 }, { "epoch": 0.6598876103826599, "grad_norm": 4.04046106338501, "learning_rate": 9.81755397928232e-06, "loss": 1.1665, "step": 2466 }, { "epoch": 0.6601552047096602, "grad_norm": 3.2360928058624268, "learning_rate": 9.81731702820767e-06, "loss": 1.0778, "step": 2467 }, { "epoch": 0.6604227990366605, "grad_norm": 3.552029848098755, "learning_rate": 9.817079926226417e-06, "loss": 1.181, "step": 2468 }, { "epoch": 0.6606903933636606, "grad_norm": 3.4324593544006348, "learning_rate": 9.816842673345979e-06, "loss": 1.153, "step": 2469 }, { "epoch": 0.6609579876906609, "grad_norm": 3.6090657711029053, "learning_rate": 9.816605269573794e-06, "loss": 1.0663, "step": 2470 }, { "epoch": 0.6612255820176612, "grad_norm": 4.007713794708252, "learning_rate": 9.816367714917296e-06, "loss": 1.2343, "step": 2471 }, { "epoch": 0.6614931763446615, "grad_norm": 3.3371682167053223, "learning_rate": 9.81613000938393e-06, "loss": 1.1184, "step": 2472 }, { "epoch": 0.6617607706716617, "grad_norm": 3.256664752960205, "learning_rate": 9.815892152981138e-06, "loss": 0.9794, "step": 2473 }, { "epoch": 0.662028364998662, "grad_norm": 3.7881510257720947, "learning_rate": 9.815654145716376e-06, "loss": 1.1446, "step": 2474 }, { "epoch": 0.6622959593256623, "grad_norm": 3.406993865966797, "learning_rate": 9.815415987597096e-06, "loss": 1.2445, "step": 2475 }, { "epoch": 0.6625635536526626, "grad_norm": 3.7865562438964844, "learning_rate": 9.81517767863076e-06, "loss": 1.3335, "step": 2476 }, { "epoch": 0.6628311479796628, "grad_norm": 3.755580425262451, "learning_rate": 9.814939218824831e-06, "loss": 1.1506, "step": 2477 }, { "epoch": 0.6630987423066631, "grad_norm": 3.753258466720581, "learning_rate": 9.814700608186783e-06, "loss": 1.1372, "step": 2478 }, { "epoch": 0.6633663366336634, "grad_norm": 3.5832202434539795, "learning_rate": 9.814461846724087e-06, "loss": 1.158, "step": 2479 }, { "epoch": 0.6636339309606636, "grad_norm": 3.4956367015838623, "learning_rate": 9.814222934444223e-06, "loss": 1.1532, "step": 2480 }, { "epoch": 0.6639015252876639, "grad_norm": 3.7934727668762207, "learning_rate": 9.81398387135468e-06, "loss": 1.1813, "step": 2481 }, { "epoch": 0.6641691196146642, "grad_norm": 3.5121653079986572, "learning_rate": 9.813744657462941e-06, "loss": 1.2199, "step": 2482 }, { "epoch": 0.6644367139416645, "grad_norm": 3.3426973819732666, "learning_rate": 9.8135052927765e-06, "loss": 1.18, "step": 2483 }, { "epoch": 0.6647043082686647, "grad_norm": 3.5111615657806396, "learning_rate": 9.813265777302858e-06, "loss": 1.1257, "step": 2484 }, { "epoch": 0.664971902595665, "grad_norm": 3.1432745456695557, "learning_rate": 9.813026111049514e-06, "loss": 1.0037, "step": 2485 }, { "epoch": 0.6652394969226653, "grad_norm": 3.3801767826080322, "learning_rate": 9.812786294023983e-06, "loss": 1.1871, "step": 2486 }, { "epoch": 0.6655070912496656, "grad_norm": 3.4595744609832764, "learning_rate": 9.812546326233771e-06, "loss": 1.1732, "step": 2487 }, { "epoch": 0.6657746855766657, "grad_norm": 3.835479736328125, "learning_rate": 9.812306207686398e-06, "loss": 1.1428, "step": 2488 }, { "epoch": 0.666042279903666, "grad_norm": 3.608619213104248, "learning_rate": 9.812065938389384e-06, "loss": 1.2981, "step": 2489 }, { "epoch": 0.6663098742306663, "grad_norm": 3.5188703536987305, "learning_rate": 9.811825518350257e-06, "loss": 1.2452, "step": 2490 }, { "epoch": 0.6665774685576665, "grad_norm": 3.481654644012451, "learning_rate": 9.81158494757655e-06, "loss": 1.2473, "step": 2491 }, { "epoch": 0.6668450628846668, "grad_norm": 3.2645812034606934, "learning_rate": 9.811344226075795e-06, "loss": 1.0821, "step": 2492 }, { "epoch": 0.6671126572116671, "grad_norm": 3.3354525566101074, "learning_rate": 9.811103353855535e-06, "loss": 1.08, "step": 2493 }, { "epoch": 0.6673802515386674, "grad_norm": 3.778996467590332, "learning_rate": 9.810862330923317e-06, "loss": 1.3063, "step": 2494 }, { "epoch": 0.6676478458656676, "grad_norm": 3.2988641262054443, "learning_rate": 9.810621157286688e-06, "loss": 1.0607, "step": 2495 }, { "epoch": 0.6679154401926679, "grad_norm": 3.771205425262451, "learning_rate": 9.810379832953207e-06, "loss": 1.1692, "step": 2496 }, { "epoch": 0.6681830345196682, "grad_norm": 3.594296455383301, "learning_rate": 9.81013835793043e-06, "loss": 1.2804, "step": 2497 }, { "epoch": 0.6684506288466685, "grad_norm": 3.33087420463562, "learning_rate": 9.809896732225923e-06, "loss": 1.0088, "step": 2498 }, { "epoch": 0.6687182231736687, "grad_norm": 3.6876564025878906, "learning_rate": 9.809654955847256e-06, "loss": 1.1182, "step": 2499 }, { "epoch": 0.668985817500669, "grad_norm": 3.4345877170562744, "learning_rate": 9.809413028802002e-06, "loss": 1.1175, "step": 2500 }, { "epoch": 0.668985817500669, "eval_loss": 1.1746242046356201, "eval_runtime": 11.5946, "eval_samples_per_second": 34.499, "eval_steps_per_second": 4.312, "step": 2500 }, { "epoch": 0.6692534118276693, "grad_norm": 3.555928945541382, "learning_rate": 9.809170951097739e-06, "loss": 1.1236, "step": 2501 }, { "epoch": 0.6695210061546695, "grad_norm": 3.356553316116333, "learning_rate": 9.80892872274205e-06, "loss": 1.0821, "step": 2502 }, { "epoch": 0.6697886004816698, "grad_norm": 3.5524895191192627, "learning_rate": 9.808686343742524e-06, "loss": 1.3042, "step": 2503 }, { "epoch": 0.6700561948086701, "grad_norm": 3.2740256786346436, "learning_rate": 9.808443814106754e-06, "loss": 1.091, "step": 2504 }, { "epoch": 0.6703237891356704, "grad_norm": 3.2746195793151855, "learning_rate": 9.808201133842337e-06, "loss": 1.0107, "step": 2505 }, { "epoch": 0.6705913834626706, "grad_norm": 3.7611098289489746, "learning_rate": 9.807958302956875e-06, "loss": 1.1164, "step": 2506 }, { "epoch": 0.6708589777896709, "grad_norm": 3.5693981647491455, "learning_rate": 9.807715321457976e-06, "loss": 1.1661, "step": 2507 }, { "epoch": 0.6711265721166711, "grad_norm": 3.7224698066711426, "learning_rate": 9.807472189353249e-06, "loss": 1.3212, "step": 2508 }, { "epoch": 0.6713941664436714, "grad_norm": 3.767155408859253, "learning_rate": 9.807228906650312e-06, "loss": 1.294, "step": 2509 }, { "epoch": 0.6716617607706716, "grad_norm": 4.016858100891113, "learning_rate": 9.806985473356787e-06, "loss": 1.1964, "step": 2510 }, { "epoch": 0.6719293550976719, "grad_norm": 3.684230089187622, "learning_rate": 9.806741889480298e-06, "loss": 1.1301, "step": 2511 }, { "epoch": 0.6721969494246722, "grad_norm": 3.254202365875244, "learning_rate": 9.806498155028477e-06, "loss": 1.0444, "step": 2512 }, { "epoch": 0.6724645437516724, "grad_norm": 3.6285407543182373, "learning_rate": 9.806254270008959e-06, "loss": 1.1154, "step": 2513 }, { "epoch": 0.6727321380786727, "grad_norm": 3.4263675212860107, "learning_rate": 9.806010234429382e-06, "loss": 1.1158, "step": 2514 }, { "epoch": 0.672999732405673, "grad_norm": 3.619586229324341, "learning_rate": 9.805766048297392e-06, "loss": 1.0731, "step": 2515 }, { "epoch": 0.6732673267326733, "grad_norm": 3.5182855129241943, "learning_rate": 9.80552171162064e-06, "loss": 1.1724, "step": 2516 }, { "epoch": 0.6735349210596735, "grad_norm": 3.505631446838379, "learning_rate": 9.805277224406776e-06, "loss": 1.1398, "step": 2517 }, { "epoch": 0.6738025153866738, "grad_norm": 3.540221929550171, "learning_rate": 9.805032586663462e-06, "loss": 1.1665, "step": 2518 }, { "epoch": 0.6740701097136741, "grad_norm": 3.7385308742523193, "learning_rate": 9.804787798398361e-06, "loss": 1.1794, "step": 2519 }, { "epoch": 0.6743377040406744, "grad_norm": 3.9296083450317383, "learning_rate": 9.80454285961914e-06, "loss": 1.2484, "step": 2520 }, { "epoch": 0.6746052983676746, "grad_norm": 3.585625171661377, "learning_rate": 9.804297770333472e-06, "loss": 1.2348, "step": 2521 }, { "epoch": 0.6748728926946749, "grad_norm": 3.768056869506836, "learning_rate": 9.804052530549038e-06, "loss": 1.0857, "step": 2522 }, { "epoch": 0.6751404870216752, "grad_norm": 3.6394028663635254, "learning_rate": 9.803807140273516e-06, "loss": 1.1641, "step": 2523 }, { "epoch": 0.6754080813486754, "grad_norm": 3.505856990814209, "learning_rate": 9.803561599514594e-06, "loss": 1.0889, "step": 2524 }, { "epoch": 0.6756756756756757, "grad_norm": 3.4862112998962402, "learning_rate": 9.803315908279966e-06, "loss": 1.1436, "step": 2525 }, { "epoch": 0.675943270002676, "grad_norm": 3.91096568107605, "learning_rate": 9.803070066577327e-06, "loss": 1.1813, "step": 2526 }, { "epoch": 0.6762108643296763, "grad_norm": 3.642303228378296, "learning_rate": 9.802824074414378e-06, "loss": 1.1385, "step": 2527 }, { "epoch": 0.6764784586566764, "grad_norm": 3.8517065048217773, "learning_rate": 9.802577931798826e-06, "loss": 1.1738, "step": 2528 }, { "epoch": 0.6767460529836767, "grad_norm": 3.9151949882507324, "learning_rate": 9.80233163873838e-06, "loss": 1.3214, "step": 2529 }, { "epoch": 0.677013647310677, "grad_norm": 3.326645612716675, "learning_rate": 9.802085195240755e-06, "loss": 1.1715, "step": 2530 }, { "epoch": 0.6772812416376773, "grad_norm": 3.2645928859710693, "learning_rate": 9.801838601313674e-06, "loss": 1.0983, "step": 2531 }, { "epoch": 0.6775488359646775, "grad_norm": 3.471367835998535, "learning_rate": 9.801591856964859e-06, "loss": 1.0831, "step": 2532 }, { "epoch": 0.6778164302916778, "grad_norm": 3.4886016845703125, "learning_rate": 9.80134496220204e-06, "loss": 1.1807, "step": 2533 }, { "epoch": 0.6780840246186781, "grad_norm": 3.3703372478485107, "learning_rate": 9.801097917032951e-06, "loss": 1.1011, "step": 2534 }, { "epoch": 0.6783516189456783, "grad_norm": 3.6529722213745117, "learning_rate": 9.800850721465334e-06, "loss": 1.2387, "step": 2535 }, { "epoch": 0.6786192132726786, "grad_norm": 3.183479070663452, "learning_rate": 9.800603375506928e-06, "loss": 1.0238, "step": 2536 }, { "epoch": 0.6788868075996789, "grad_norm": 3.3142735958099365, "learning_rate": 9.800355879165485e-06, "loss": 1.0489, "step": 2537 }, { "epoch": 0.6791544019266792, "grad_norm": 3.7475812435150146, "learning_rate": 9.800108232448754e-06, "loss": 1.2292, "step": 2538 }, { "epoch": 0.6794219962536794, "grad_norm": 3.2578468322753906, "learning_rate": 9.7998604353645e-06, "loss": 1.1332, "step": 2539 }, { "epoch": 0.6796895905806797, "grad_norm": 3.501826286315918, "learning_rate": 9.799612487920476e-06, "loss": 1.1691, "step": 2540 }, { "epoch": 0.67995718490768, "grad_norm": 3.8417768478393555, "learning_rate": 9.799364390124456e-06, "loss": 1.1488, "step": 2541 }, { "epoch": 0.6802247792346803, "grad_norm": 4.020801544189453, "learning_rate": 9.799116141984209e-06, "loss": 1.2232, "step": 2542 }, { "epoch": 0.6804923735616805, "grad_norm": 3.748538017272949, "learning_rate": 9.798867743507512e-06, "loss": 1.22, "step": 2543 }, { "epoch": 0.6807599678886808, "grad_norm": 3.597007989883423, "learning_rate": 9.798619194702148e-06, "loss": 1.1873, "step": 2544 }, { "epoch": 0.6810275622156811, "grad_norm": 3.8766472339630127, "learning_rate": 9.798370495575901e-06, "loss": 1.3015, "step": 2545 }, { "epoch": 0.6812951565426812, "grad_norm": 3.568079948425293, "learning_rate": 9.798121646136562e-06, "loss": 1.1225, "step": 2546 }, { "epoch": 0.6815627508696815, "grad_norm": 3.2755115032196045, "learning_rate": 9.797872646391926e-06, "loss": 1.0251, "step": 2547 }, { "epoch": 0.6818303451966818, "grad_norm": 3.6031720638275146, "learning_rate": 9.797623496349795e-06, "loss": 1.0804, "step": 2548 }, { "epoch": 0.6820979395236821, "grad_norm": 3.285602331161499, "learning_rate": 9.797374196017974e-06, "loss": 1.0666, "step": 2549 }, { "epoch": 0.6823655338506823, "grad_norm": 3.9866554737091064, "learning_rate": 9.79712474540427e-06, "loss": 1.2911, "step": 2550 }, { "epoch": 0.6826331281776826, "grad_norm": 3.1174442768096924, "learning_rate": 9.796875144516498e-06, "loss": 1.0572, "step": 2551 }, { "epoch": 0.6829007225046829, "grad_norm": 3.3973238468170166, "learning_rate": 9.796625393362477e-06, "loss": 1.0371, "step": 2552 }, { "epoch": 0.6831683168316832, "grad_norm": 3.1947076320648193, "learning_rate": 9.796375491950034e-06, "loss": 1.1874, "step": 2553 }, { "epoch": 0.6834359111586834, "grad_norm": 3.0682270526885986, "learning_rate": 9.796125440286992e-06, "loss": 1.1072, "step": 2554 }, { "epoch": 0.6837035054856837, "grad_norm": 3.7304959297180176, "learning_rate": 9.795875238381188e-06, "loss": 1.1563, "step": 2555 }, { "epoch": 0.683971099812684, "grad_norm": 3.118598461151123, "learning_rate": 9.795624886240458e-06, "loss": 1.133, "step": 2556 }, { "epoch": 0.6842386941396842, "grad_norm": 4.15332555770874, "learning_rate": 9.795374383872645e-06, "loss": 1.2752, "step": 2557 }, { "epoch": 0.6845062884666845, "grad_norm": 3.629516124725342, "learning_rate": 9.795123731285595e-06, "loss": 1.2345, "step": 2558 }, { "epoch": 0.6847738827936848, "grad_norm": 3.6850171089172363, "learning_rate": 9.794872928487163e-06, "loss": 1.2808, "step": 2559 }, { "epoch": 0.6850414771206851, "grad_norm": 3.699629306793213, "learning_rate": 9.7946219754852e-06, "loss": 1.3603, "step": 2560 }, { "epoch": 0.6853090714476853, "grad_norm": 3.9391591548919678, "learning_rate": 9.794370872287575e-06, "loss": 1.2984, "step": 2561 }, { "epoch": 0.6855766657746856, "grad_norm": 3.434231758117676, "learning_rate": 9.79411961890215e-06, "loss": 1.2203, "step": 2562 }, { "epoch": 0.6858442601016859, "grad_norm": 3.2698097229003906, "learning_rate": 9.793868215336792e-06, "loss": 1.1053, "step": 2563 }, { "epoch": 0.6861118544286862, "grad_norm": 3.335155725479126, "learning_rate": 9.793616661599384e-06, "loss": 1.2078, "step": 2564 }, { "epoch": 0.6863794487556863, "grad_norm": 3.870070695877075, "learning_rate": 9.7933649576978e-06, "loss": 1.2166, "step": 2565 }, { "epoch": 0.6866470430826866, "grad_norm": 3.4541990756988525, "learning_rate": 9.79311310363993e-06, "loss": 1.1562, "step": 2566 }, { "epoch": 0.686914637409687, "grad_norm": 3.1093223094940186, "learning_rate": 9.792861099433657e-06, "loss": 1.0556, "step": 2567 }, { "epoch": 0.6871822317366871, "grad_norm": 3.6408331394195557, "learning_rate": 9.79260894508688e-06, "loss": 1.1649, "step": 2568 }, { "epoch": 0.6874498260636874, "grad_norm": 3.420346260070801, "learning_rate": 9.792356640607497e-06, "loss": 1.0884, "step": 2569 }, { "epoch": 0.6877174203906877, "grad_norm": 3.3369221687316895, "learning_rate": 9.792104186003412e-06, "loss": 1.1023, "step": 2570 }, { "epoch": 0.687985014717688, "grad_norm": 3.251084089279175, "learning_rate": 9.791851581282533e-06, "loss": 1.0486, "step": 2571 }, { "epoch": 0.6882526090446882, "grad_norm": 3.6394076347351074, "learning_rate": 9.791598826452773e-06, "loss": 1.0097, "step": 2572 }, { "epoch": 0.6885202033716885, "grad_norm": 3.8325955867767334, "learning_rate": 9.79134592152205e-06, "loss": 1.2028, "step": 2573 }, { "epoch": 0.6887877976986888, "grad_norm": 3.359297037124634, "learning_rate": 9.791092866498286e-06, "loss": 1.0754, "step": 2574 }, { "epoch": 0.6890553920256891, "grad_norm": 3.287555694580078, "learning_rate": 9.790839661389408e-06, "loss": 1.0958, "step": 2575 }, { "epoch": 0.6893229863526893, "grad_norm": 3.197094202041626, "learning_rate": 9.790586306203348e-06, "loss": 1.0084, "step": 2576 }, { "epoch": 0.6895905806796896, "grad_norm": 3.1888086795806885, "learning_rate": 9.790332800948044e-06, "loss": 1.1168, "step": 2577 }, { "epoch": 0.6898581750066899, "grad_norm": 3.7352941036224365, "learning_rate": 9.790079145631434e-06, "loss": 1.1924, "step": 2578 }, { "epoch": 0.6901257693336901, "grad_norm": 3.5531890392303467, "learning_rate": 9.789825340261467e-06, "loss": 1.0547, "step": 2579 }, { "epoch": 0.6903933636606904, "grad_norm": 3.3544304370880127, "learning_rate": 9.789571384846093e-06, "loss": 1.0319, "step": 2580 }, { "epoch": 0.6906609579876907, "grad_norm": 3.5817840099334717, "learning_rate": 9.789317279393267e-06, "loss": 1.2264, "step": 2581 }, { "epoch": 0.690928552314691, "grad_norm": 3.4651858806610107, "learning_rate": 9.78906302391095e-06, "loss": 1.0902, "step": 2582 }, { "epoch": 0.6911961466416912, "grad_norm": 3.6912760734558105, "learning_rate": 9.788808618407103e-06, "loss": 1.2353, "step": 2583 }, { "epoch": 0.6914637409686915, "grad_norm": 3.6648828983306885, "learning_rate": 9.788554062889702e-06, "loss": 1.2044, "step": 2584 }, { "epoch": 0.6917313352956918, "grad_norm": 3.2582671642303467, "learning_rate": 9.788299357366717e-06, "loss": 1.0388, "step": 2585 }, { "epoch": 0.691998929622692, "grad_norm": 3.759870767593384, "learning_rate": 9.788044501846125e-06, "loss": 1.2292, "step": 2586 }, { "epoch": 0.6922665239496922, "grad_norm": 3.5340397357940674, "learning_rate": 9.787789496335913e-06, "loss": 1.2696, "step": 2587 }, { "epoch": 0.6925341182766925, "grad_norm": 3.402407169342041, "learning_rate": 9.78753434084407e-06, "loss": 1.2269, "step": 2588 }, { "epoch": 0.6928017126036928, "grad_norm": 3.7191381454467773, "learning_rate": 9.787279035378585e-06, "loss": 1.1591, "step": 2589 }, { "epoch": 0.693069306930693, "grad_norm": 3.3745412826538086, "learning_rate": 9.78702357994746e-06, "loss": 1.1188, "step": 2590 }, { "epoch": 0.6933369012576933, "grad_norm": 3.5345706939697266, "learning_rate": 9.786767974558693e-06, "loss": 1.2377, "step": 2591 }, { "epoch": 0.6936044955846936, "grad_norm": 3.54662823677063, "learning_rate": 9.786512219220294e-06, "loss": 1.2069, "step": 2592 }, { "epoch": 0.6938720899116939, "grad_norm": 3.509596109390259, "learning_rate": 9.786256313940276e-06, "loss": 1.2492, "step": 2593 }, { "epoch": 0.6941396842386941, "grad_norm": 3.594794273376465, "learning_rate": 9.786000258726652e-06, "loss": 1.1751, "step": 2594 }, { "epoch": 0.6944072785656944, "grad_norm": 3.4790191650390625, "learning_rate": 9.785744053587445e-06, "loss": 1.2485, "step": 2595 }, { "epoch": 0.6946748728926947, "grad_norm": 3.57783842086792, "learning_rate": 9.78548769853068e-06, "loss": 1.1048, "step": 2596 }, { "epoch": 0.694942467219695, "grad_norm": 3.960777521133423, "learning_rate": 9.785231193564388e-06, "loss": 1.2018, "step": 2597 }, { "epoch": 0.6952100615466952, "grad_norm": 3.92084002494812, "learning_rate": 9.784974538696606e-06, "loss": 1.2637, "step": 2598 }, { "epoch": 0.6954776558736955, "grad_norm": 3.7744603157043457, "learning_rate": 9.78471773393537e-06, "loss": 1.2868, "step": 2599 }, { "epoch": 0.6957452502006958, "grad_norm": 3.3519065380096436, "learning_rate": 9.784460779288727e-06, "loss": 1.0901, "step": 2600 }, { "epoch": 0.696012844527696, "grad_norm": 3.282240390777588, "learning_rate": 9.784203674764727e-06, "loss": 1.1259, "step": 2601 }, { "epoch": 0.6962804388546963, "grad_norm": 3.7202768325805664, "learning_rate": 9.783946420371424e-06, "loss": 1.1036, "step": 2602 }, { "epoch": 0.6965480331816966, "grad_norm": 3.3979485034942627, "learning_rate": 9.783689016116874e-06, "loss": 1.1188, "step": 2603 }, { "epoch": 0.6968156275086969, "grad_norm": 3.3660459518432617, "learning_rate": 9.783431462009146e-06, "loss": 1.0795, "step": 2604 }, { "epoch": 0.697083221835697, "grad_norm": 3.240844964981079, "learning_rate": 9.7831737580563e-06, "loss": 1.0436, "step": 2605 }, { "epoch": 0.6973508161626973, "grad_norm": 3.5097098350524902, "learning_rate": 9.782915904266416e-06, "loss": 1.1287, "step": 2606 }, { "epoch": 0.6976184104896976, "grad_norm": 3.2211825847625732, "learning_rate": 9.782657900647567e-06, "loss": 1.0162, "step": 2607 }, { "epoch": 0.6978860048166979, "grad_norm": 3.5163321495056152, "learning_rate": 9.782399747207838e-06, "loss": 1.258, "step": 2608 }, { "epoch": 0.6981535991436981, "grad_norm": 3.4427928924560547, "learning_rate": 9.782141443955316e-06, "loss": 1.1632, "step": 2609 }, { "epoch": 0.6984211934706984, "grad_norm": 3.6478707790374756, "learning_rate": 9.78188299089809e-06, "loss": 1.0287, "step": 2610 }, { "epoch": 0.6986887877976987, "grad_norm": 3.5365660190582275, "learning_rate": 9.781624388044257e-06, "loss": 1.1929, "step": 2611 }, { "epoch": 0.6989563821246989, "grad_norm": 3.965444803237915, "learning_rate": 9.78136563540192e-06, "loss": 1.3651, "step": 2612 }, { "epoch": 0.6992239764516992, "grad_norm": 3.7215042114257812, "learning_rate": 9.781106732979182e-06, "loss": 1.1677, "step": 2613 }, { "epoch": 0.6994915707786995, "grad_norm": 3.5624494552612305, "learning_rate": 9.780847680784156e-06, "loss": 1.0269, "step": 2614 }, { "epoch": 0.6997591651056998, "grad_norm": 3.623762845993042, "learning_rate": 9.780588478824953e-06, "loss": 1.1772, "step": 2615 }, { "epoch": 0.7000267594327, "grad_norm": 3.544771194458008, "learning_rate": 9.780329127109697e-06, "loss": 1.1919, "step": 2616 }, { "epoch": 0.7002943537597003, "grad_norm": 3.900216817855835, "learning_rate": 9.780069625646512e-06, "loss": 1.2399, "step": 2617 }, { "epoch": 0.7005619480867006, "grad_norm": 3.4038405418395996, "learning_rate": 9.779809974443525e-06, "loss": 1.143, "step": 2618 }, { "epoch": 0.7008295424137009, "grad_norm": 3.4808125495910645, "learning_rate": 9.77955017350887e-06, "loss": 1.1755, "step": 2619 }, { "epoch": 0.7010971367407011, "grad_norm": 3.106503963470459, "learning_rate": 9.779290222850686e-06, "loss": 1.0324, "step": 2620 }, { "epoch": 0.7013647310677014, "grad_norm": 3.0059962272644043, "learning_rate": 9.779030122477118e-06, "loss": 1.0385, "step": 2621 }, { "epoch": 0.7016323253947017, "grad_norm": 3.8533339500427246, "learning_rate": 9.778769872396311e-06, "loss": 1.2545, "step": 2622 }, { "epoch": 0.7018999197217018, "grad_norm": 3.361427068710327, "learning_rate": 9.77850947261642e-06, "loss": 1.1001, "step": 2623 }, { "epoch": 0.7021675140487021, "grad_norm": 3.411195755004883, "learning_rate": 9.778248923145599e-06, "loss": 0.9854, "step": 2624 }, { "epoch": 0.7024351083757024, "grad_norm": 3.7567944526672363, "learning_rate": 9.777988223992014e-06, "loss": 1.2254, "step": 2625 }, { "epoch": 0.7027027027027027, "grad_norm": 3.7029223442077637, "learning_rate": 9.777727375163828e-06, "loss": 1.2784, "step": 2626 }, { "epoch": 0.7029702970297029, "grad_norm": 3.0535287857055664, "learning_rate": 9.777466376669214e-06, "loss": 0.9761, "step": 2627 }, { "epoch": 0.7032378913567032, "grad_norm": 3.1361937522888184, "learning_rate": 9.777205228516349e-06, "loss": 1.0701, "step": 2628 }, { "epoch": 0.7035054856837035, "grad_norm": 3.5227043628692627, "learning_rate": 9.776943930713411e-06, "loss": 1.2202, "step": 2629 }, { "epoch": 0.7037730800107038, "grad_norm": 3.2027533054351807, "learning_rate": 9.776682483268588e-06, "loss": 1.1063, "step": 2630 }, { "epoch": 0.704040674337704, "grad_norm": 3.622596025466919, "learning_rate": 9.776420886190069e-06, "loss": 1.1919, "step": 2631 }, { "epoch": 0.7043082686647043, "grad_norm": 3.527977466583252, "learning_rate": 9.776159139486048e-06, "loss": 1.1076, "step": 2632 }, { "epoch": 0.7045758629917046, "grad_norm": 2.9947965145111084, "learning_rate": 9.775897243164727e-06, "loss": 1.1083, "step": 2633 }, { "epoch": 0.7048434573187048, "grad_norm": 3.344877004623413, "learning_rate": 9.775635197234306e-06, "loss": 1.1652, "step": 2634 }, { "epoch": 0.7051110516457051, "grad_norm": 3.506344795227051, "learning_rate": 9.775373001702998e-06, "loss": 1.1122, "step": 2635 }, { "epoch": 0.7053786459727054, "grad_norm": 3.5416011810302734, "learning_rate": 9.775110656579015e-06, "loss": 1.0505, "step": 2636 }, { "epoch": 0.7056462402997057, "grad_norm": 3.234518527984619, "learning_rate": 9.774848161870574e-06, "loss": 0.999, "step": 2637 }, { "epoch": 0.7059138346267059, "grad_norm": 4.471454620361328, "learning_rate": 9.774585517585898e-06, "loss": 1.146, "step": 2638 }, { "epoch": 0.7061814289537062, "grad_norm": 3.475337505340576, "learning_rate": 9.774322723733216e-06, "loss": 0.9791, "step": 2639 }, { "epoch": 0.7064490232807065, "grad_norm": 3.684784173965454, "learning_rate": 9.774059780320759e-06, "loss": 1.1905, "step": 2640 }, { "epoch": 0.7067166176077068, "grad_norm": 3.747850179672241, "learning_rate": 9.773796687356764e-06, "loss": 1.2367, "step": 2641 }, { "epoch": 0.706984211934707, "grad_norm": 3.962178945541382, "learning_rate": 9.773533444849475e-06, "loss": 1.1333, "step": 2642 }, { "epoch": 0.7072518062617072, "grad_norm": 3.6332039833068848, "learning_rate": 9.773270052807135e-06, "loss": 1.1832, "step": 2643 }, { "epoch": 0.7075194005887075, "grad_norm": 3.565274715423584, "learning_rate": 9.773006511237997e-06, "loss": 1.2, "step": 2644 }, { "epoch": 0.7077869949157077, "grad_norm": 3.718888998031616, "learning_rate": 9.772742820150316e-06, "loss": 1.18, "step": 2645 }, { "epoch": 0.708054589242708, "grad_norm": 3.387706756591797, "learning_rate": 9.772478979552353e-06, "loss": 1.0484, "step": 2646 }, { "epoch": 0.7083221835697083, "grad_norm": 3.5784285068511963, "learning_rate": 9.772214989452372e-06, "loss": 1.1859, "step": 2647 }, { "epoch": 0.7085897778967086, "grad_norm": 3.052260160446167, "learning_rate": 9.771950849858641e-06, "loss": 1.0571, "step": 2648 }, { "epoch": 0.7088573722237088, "grad_norm": 3.3145363330841064, "learning_rate": 9.771686560779438e-06, "loss": 1.1643, "step": 2649 }, { "epoch": 0.7091249665507091, "grad_norm": 3.7414932250976562, "learning_rate": 9.771422122223042e-06, "loss": 1.2321, "step": 2650 }, { "epoch": 0.7093925608777094, "grad_norm": 3.2115883827209473, "learning_rate": 9.771157534197733e-06, "loss": 1.1001, "step": 2651 }, { "epoch": 0.7096601552047097, "grad_norm": 3.6394455432891846, "learning_rate": 9.770892796711804e-06, "loss": 1.2063, "step": 2652 }, { "epoch": 0.7099277495317099, "grad_norm": 3.6082866191864014, "learning_rate": 9.770627909773545e-06, "loss": 1.1383, "step": 2653 }, { "epoch": 0.7101953438587102, "grad_norm": 4.1197919845581055, "learning_rate": 9.770362873391256e-06, "loss": 1.2361, "step": 2654 }, { "epoch": 0.7104629381857105, "grad_norm": 3.101154327392578, "learning_rate": 9.770097687573235e-06, "loss": 1.0858, "step": 2655 }, { "epoch": 0.7107305325127107, "grad_norm": 3.6352450847625732, "learning_rate": 9.769832352327795e-06, "loss": 1.0094, "step": 2656 }, { "epoch": 0.710998126839711, "grad_norm": 3.195739507675171, "learning_rate": 9.769566867663245e-06, "loss": 1.0132, "step": 2657 }, { "epoch": 0.7112657211667113, "grad_norm": 3.4742939472198486, "learning_rate": 9.7693012335879e-06, "loss": 1.235, "step": 2658 }, { "epoch": 0.7115333154937116, "grad_norm": 3.451916217803955, "learning_rate": 9.769035450110084e-06, "loss": 1.1433, "step": 2659 }, { "epoch": 0.7118009098207118, "grad_norm": 3.5261240005493164, "learning_rate": 9.768769517238124e-06, "loss": 1.1758, "step": 2660 }, { "epoch": 0.7120685041477121, "grad_norm": 3.142664670944214, "learning_rate": 9.768503434980348e-06, "loss": 1.0356, "step": 2661 }, { "epoch": 0.7123360984747124, "grad_norm": 3.1803033351898193, "learning_rate": 9.76823720334509e-06, "loss": 1.0501, "step": 2662 }, { "epoch": 0.7126036928017127, "grad_norm": 3.5911192893981934, "learning_rate": 9.767970822340692e-06, "loss": 1.0931, "step": 2663 }, { "epoch": 0.7128712871287128, "grad_norm": 3.599949598312378, "learning_rate": 9.7677042919755e-06, "loss": 1.1297, "step": 2664 }, { "epoch": 0.7131388814557131, "grad_norm": 3.7325220108032227, "learning_rate": 9.76743761225786e-06, "loss": 1.1475, "step": 2665 }, { "epoch": 0.7134064757827134, "grad_norm": 3.2687487602233887, "learning_rate": 9.767170783196128e-06, "loss": 1.108, "step": 2666 }, { "epoch": 0.7136740701097136, "grad_norm": 3.567669630050659, "learning_rate": 9.766903804798663e-06, "loss": 1.1965, "step": 2667 }, { "epoch": 0.7139416644367139, "grad_norm": 2.9746851921081543, "learning_rate": 9.766636677073825e-06, "loss": 0.9885, "step": 2668 }, { "epoch": 0.7142092587637142, "grad_norm": 3.6333086490631104, "learning_rate": 9.766369400029987e-06, "loss": 1.1906, "step": 2669 }, { "epoch": 0.7144768530907145, "grad_norm": 3.6311559677124023, "learning_rate": 9.766101973675519e-06, "loss": 1.1869, "step": 2670 }, { "epoch": 0.7147444474177147, "grad_norm": 3.632929801940918, "learning_rate": 9.765834398018797e-06, "loss": 1.1423, "step": 2671 }, { "epoch": 0.715012041744715, "grad_norm": 3.4885165691375732, "learning_rate": 9.765566673068206e-06, "loss": 1.1226, "step": 2672 }, { "epoch": 0.7152796360717153, "grad_norm": 3.2653591632843018, "learning_rate": 9.765298798832132e-06, "loss": 1.0427, "step": 2673 }, { "epoch": 0.7155472303987156, "grad_norm": 3.1086037158966064, "learning_rate": 9.765030775318965e-06, "loss": 1.0032, "step": 2674 }, { "epoch": 0.7158148247257158, "grad_norm": 3.084402322769165, "learning_rate": 9.764762602537102e-06, "loss": 1.0019, "step": 2675 }, { "epoch": 0.7160824190527161, "grad_norm": 3.50754714012146, "learning_rate": 9.764494280494943e-06, "loss": 1.0982, "step": 2676 }, { "epoch": 0.7163500133797164, "grad_norm": 3.355750799179077, "learning_rate": 9.764225809200894e-06, "loss": 1.1512, "step": 2677 }, { "epoch": 0.7166176077067166, "grad_norm": 3.4063217639923096, "learning_rate": 9.763957188663366e-06, "loss": 1.2209, "step": 2678 }, { "epoch": 0.7168852020337169, "grad_norm": 3.5086374282836914, "learning_rate": 9.76368841889077e-06, "loss": 1.2162, "step": 2679 }, { "epoch": 0.7171527963607172, "grad_norm": 3.286731719970703, "learning_rate": 9.763419499891533e-06, "loss": 1.176, "step": 2680 }, { "epoch": 0.7174203906877175, "grad_norm": 3.622854232788086, "learning_rate": 9.763150431674072e-06, "loss": 1.1612, "step": 2681 }, { "epoch": 0.7176879850147176, "grad_norm": 3.5535635948181152, "learning_rate": 9.762881214246817e-06, "loss": 1.1771, "step": 2682 }, { "epoch": 0.7179555793417179, "grad_norm": 4.360621452331543, "learning_rate": 9.762611847618203e-06, "loss": 1.0991, "step": 2683 }, { "epoch": 0.7182231736687182, "grad_norm": 3.4346253871917725, "learning_rate": 9.762342331796671e-06, "loss": 1.1323, "step": 2684 }, { "epoch": 0.7184907679957185, "grad_norm": 3.3075368404388428, "learning_rate": 9.762072666790658e-06, "loss": 1.0226, "step": 2685 }, { "epoch": 0.7187583623227187, "grad_norm": 3.4032669067382812, "learning_rate": 9.761802852608614e-06, "loss": 1.0554, "step": 2686 }, { "epoch": 0.719025956649719, "grad_norm": 3.6175873279571533, "learning_rate": 9.76153288925899e-06, "loss": 1.1181, "step": 2687 }, { "epoch": 0.7192935509767193, "grad_norm": 3.678610324859619, "learning_rate": 9.761262776750248e-06, "loss": 1.2954, "step": 2688 }, { "epoch": 0.7195611453037195, "grad_norm": 3.5059852600097656, "learning_rate": 9.760992515090844e-06, "loss": 1.1544, "step": 2689 }, { "epoch": 0.7198287396307198, "grad_norm": 3.412489414215088, "learning_rate": 9.760722104289244e-06, "loss": 1.2178, "step": 2690 }, { "epoch": 0.7200963339577201, "grad_norm": 3.746623992919922, "learning_rate": 9.760451544353923e-06, "loss": 1.2707, "step": 2691 }, { "epoch": 0.7203639282847204, "grad_norm": 3.2864015102386475, "learning_rate": 9.760180835293352e-06, "loss": 1.0242, "step": 2692 }, { "epoch": 0.7206315226117206, "grad_norm": 3.267595052719116, "learning_rate": 9.759909977116016e-06, "loss": 1.0804, "step": 2693 }, { "epoch": 0.7208991169387209, "grad_norm": 3.310580015182495, "learning_rate": 9.759638969830395e-06, "loss": 1.079, "step": 2694 }, { "epoch": 0.7211667112657212, "grad_norm": 3.208405017852783, "learning_rate": 9.759367813444982e-06, "loss": 1.0354, "step": 2695 }, { "epoch": 0.7214343055927215, "grad_norm": 3.2207465171813965, "learning_rate": 9.75909650796827e-06, "loss": 1.0239, "step": 2696 }, { "epoch": 0.7217018999197217, "grad_norm": 3.3544509410858154, "learning_rate": 9.758825053408755e-06, "loss": 1.1832, "step": 2697 }, { "epoch": 0.721969494246722, "grad_norm": 3.2339577674865723, "learning_rate": 9.758553449774947e-06, "loss": 1.0818, "step": 2698 }, { "epoch": 0.7222370885737223, "grad_norm": 3.8596277236938477, "learning_rate": 9.75828169707535e-06, "loss": 1.1333, "step": 2699 }, { "epoch": 0.7225046829007225, "grad_norm": 3.5846714973449707, "learning_rate": 9.758009795318477e-06, "loss": 1.0485, "step": 2700 }, { "epoch": 0.7227722772277227, "grad_norm": 3.747907876968384, "learning_rate": 9.757737744512846e-06, "loss": 1.0817, "step": 2701 }, { "epoch": 0.723039871554723, "grad_norm": 3.468989849090576, "learning_rate": 9.75746554466698e-06, "loss": 1.2511, "step": 2702 }, { "epoch": 0.7233074658817233, "grad_norm": 3.4616754055023193, "learning_rate": 9.757193195789404e-06, "loss": 1.1138, "step": 2703 }, { "epoch": 0.7235750602087235, "grad_norm": 3.6678431034088135, "learning_rate": 9.75692069788865e-06, "loss": 1.0741, "step": 2704 }, { "epoch": 0.7238426545357238, "grad_norm": 3.6420814990997314, "learning_rate": 9.756648050973257e-06, "loss": 1.1498, "step": 2705 }, { "epoch": 0.7241102488627241, "grad_norm": 3.3295350074768066, "learning_rate": 9.756375255051765e-06, "loss": 1.2033, "step": 2706 }, { "epoch": 0.7243778431897244, "grad_norm": 3.3087949752807617, "learning_rate": 9.756102310132716e-06, "loss": 1.144, "step": 2707 }, { "epoch": 0.7246454375167246, "grad_norm": 3.582380771636963, "learning_rate": 9.755829216224662e-06, "loss": 1.0771, "step": 2708 }, { "epoch": 0.7249130318437249, "grad_norm": 3.513324737548828, "learning_rate": 9.75555597333616e-06, "loss": 1.1307, "step": 2709 }, { "epoch": 0.7251806261707252, "grad_norm": 3.110485315322876, "learning_rate": 9.755282581475769e-06, "loss": 1.0273, "step": 2710 }, { "epoch": 0.7254482204977254, "grad_norm": 3.4464118480682373, "learning_rate": 9.75500904065205e-06, "loss": 1.0884, "step": 2711 }, { "epoch": 0.7257158148247257, "grad_norm": 3.5363407135009766, "learning_rate": 9.754735350873577e-06, "loss": 1.1758, "step": 2712 }, { "epoch": 0.725983409151726, "grad_norm": 3.9216394424438477, "learning_rate": 9.75446151214892e-06, "loss": 1.3667, "step": 2713 }, { "epoch": 0.7262510034787263, "grad_norm": 4.166318893432617, "learning_rate": 9.754187524486658e-06, "loss": 1.3686, "step": 2714 }, { "epoch": 0.7265185978057265, "grad_norm": 3.6010658740997314, "learning_rate": 9.753913387895373e-06, "loss": 1.2221, "step": 2715 }, { "epoch": 0.7267861921327268, "grad_norm": 3.6263794898986816, "learning_rate": 9.753639102383653e-06, "loss": 1.092, "step": 2716 }, { "epoch": 0.7270537864597271, "grad_norm": 3.779825210571289, "learning_rate": 9.753364667960093e-06, "loss": 1.1366, "step": 2717 }, { "epoch": 0.7273213807867274, "grad_norm": 3.460033416748047, "learning_rate": 9.753090084633288e-06, "loss": 1.2492, "step": 2718 }, { "epoch": 0.7275889751137276, "grad_norm": 3.507516384124756, "learning_rate": 9.752815352411837e-06, "loss": 1.2037, "step": 2719 }, { "epoch": 0.7278565694407279, "grad_norm": 3.2808637619018555, "learning_rate": 9.752540471304351e-06, "loss": 1.1144, "step": 2720 }, { "epoch": 0.7281241637677281, "grad_norm": 3.5148873329162598, "learning_rate": 9.752265441319437e-06, "loss": 1.1453, "step": 2721 }, { "epoch": 0.7283917580947283, "grad_norm": 3.546168327331543, "learning_rate": 9.751990262465712e-06, "loss": 1.1504, "step": 2722 }, { "epoch": 0.7286593524217286, "grad_norm": 3.2594752311706543, "learning_rate": 9.751714934751795e-06, "loss": 1.1144, "step": 2723 }, { "epoch": 0.7289269467487289, "grad_norm": 3.6359105110168457, "learning_rate": 9.751439458186314e-06, "loss": 1.0574, "step": 2724 }, { "epoch": 0.7291945410757292, "grad_norm": 3.5395443439483643, "learning_rate": 9.751163832777894e-06, "loss": 1.1311, "step": 2725 }, { "epoch": 0.7294621354027294, "grad_norm": 3.6369519233703613, "learning_rate": 9.750888058535175e-06, "loss": 1.228, "step": 2726 }, { "epoch": 0.7297297297297297, "grad_norm": 3.7972755432128906, "learning_rate": 9.75061213546679e-06, "loss": 1.2122, "step": 2727 }, { "epoch": 0.72999732405673, "grad_norm": 3.208137273788452, "learning_rate": 9.750336063581385e-06, "loss": 1.1357, "step": 2728 }, { "epoch": 0.7302649183837303, "grad_norm": 3.6804027557373047, "learning_rate": 9.75005984288761e-06, "loss": 1.0588, "step": 2729 }, { "epoch": 0.7305325127107305, "grad_norm": 3.5338356494903564, "learning_rate": 9.749783473394115e-06, "loss": 1.1344, "step": 2730 }, { "epoch": 0.7308001070377308, "grad_norm": 3.542436122894287, "learning_rate": 9.74950695510956e-06, "loss": 1.0266, "step": 2731 }, { "epoch": 0.7310677013647311, "grad_norm": 3.2684695720672607, "learning_rate": 9.749230288042605e-06, "loss": 1.1069, "step": 2732 }, { "epoch": 0.7313352956917314, "grad_norm": 3.5910449028015137, "learning_rate": 9.748953472201919e-06, "loss": 1.1241, "step": 2733 }, { "epoch": 0.7316028900187316, "grad_norm": 3.693363904953003, "learning_rate": 9.74867650759617e-06, "loss": 1.029, "step": 2734 }, { "epoch": 0.7318704843457319, "grad_norm": 3.376753091812134, "learning_rate": 9.748399394234038e-06, "loss": 1.1953, "step": 2735 }, { "epoch": 0.7321380786727322, "grad_norm": 3.5596413612365723, "learning_rate": 9.7481221321242e-06, "loss": 1.1171, "step": 2736 }, { "epoch": 0.7324056729997324, "grad_norm": 3.588493585586548, "learning_rate": 9.747844721275345e-06, "loss": 1.3143, "step": 2737 }, { "epoch": 0.7326732673267327, "grad_norm": 3.4020540714263916, "learning_rate": 9.747567161696163e-06, "loss": 1.1001, "step": 2738 }, { "epoch": 0.732940861653733, "grad_norm": 3.346292018890381, "learning_rate": 9.747289453395348e-06, "loss": 1.0981, "step": 2739 }, { "epoch": 0.7332084559807333, "grad_norm": 3.401524543762207, "learning_rate": 9.747011596381597e-06, "loss": 1.0512, "step": 2740 }, { "epoch": 0.7334760503077334, "grad_norm": 3.25940203666687, "learning_rate": 9.746733590663616e-06, "loss": 1.1377, "step": 2741 }, { "epoch": 0.7337436446347337, "grad_norm": 3.116464376449585, "learning_rate": 9.746455436250116e-06, "loss": 1.0579, "step": 2742 }, { "epoch": 0.734011238961734, "grad_norm": 3.291623592376709, "learning_rate": 9.746177133149805e-06, "loss": 1.0135, "step": 2743 }, { "epoch": 0.7342788332887343, "grad_norm": 4.061689376831055, "learning_rate": 9.745898681371408e-06, "loss": 1.3905, "step": 2744 }, { "epoch": 0.7345464276157345, "grad_norm": 3.388113021850586, "learning_rate": 9.74562008092364e-06, "loss": 1.1734, "step": 2745 }, { "epoch": 0.7348140219427348, "grad_norm": 3.3899624347686768, "learning_rate": 9.745341331815237e-06, "loss": 1.1051, "step": 2746 }, { "epoch": 0.7350816162697351, "grad_norm": 3.5263469219207764, "learning_rate": 9.745062434054924e-06, "loss": 1.2386, "step": 2747 }, { "epoch": 0.7353492105967353, "grad_norm": 3.7269399166107178, "learning_rate": 9.744783387651442e-06, "loss": 1.1823, "step": 2748 }, { "epoch": 0.7356168049237356, "grad_norm": 3.4878084659576416, "learning_rate": 9.74450419261353e-06, "loss": 1.0985, "step": 2749 }, { "epoch": 0.7358843992507359, "grad_norm": 3.7728066444396973, "learning_rate": 9.744224848949935e-06, "loss": 1.1252, "step": 2750 }, { "epoch": 0.7361519935777362, "grad_norm": 3.4812254905700684, "learning_rate": 9.743945356669406e-06, "loss": 1.1439, "step": 2751 }, { "epoch": 0.7364195879047364, "grad_norm": 3.372687816619873, "learning_rate": 9.743665715780702e-06, "loss": 1.1005, "step": 2752 }, { "epoch": 0.7366871822317367, "grad_norm": 3.568819522857666, "learning_rate": 9.743385926292578e-06, "loss": 1.1, "step": 2753 }, { "epoch": 0.736954776558737, "grad_norm": 3.3924365043640137, "learning_rate": 9.743105988213802e-06, "loss": 1.2092, "step": 2754 }, { "epoch": 0.7372223708857373, "grad_norm": 3.7355546951293945, "learning_rate": 9.742825901553144e-06, "loss": 1.2169, "step": 2755 }, { "epoch": 0.7374899652127375, "grad_norm": 3.2989847660064697, "learning_rate": 9.742545666319376e-06, "loss": 1.1556, "step": 2756 }, { "epoch": 0.7377575595397378, "grad_norm": 3.268017530441284, "learning_rate": 9.742265282521278e-06, "loss": 1.0398, "step": 2757 }, { "epoch": 0.7380251538667381, "grad_norm": 3.880585193634033, "learning_rate": 9.741984750167632e-06, "loss": 1.3448, "step": 2758 }, { "epoch": 0.7382927481937382, "grad_norm": 3.6967694759368896, "learning_rate": 9.741704069267227e-06, "loss": 1.2818, "step": 2759 }, { "epoch": 0.7385603425207385, "grad_norm": 3.3277947902679443, "learning_rate": 9.741423239828854e-06, "loss": 1.1014, "step": 2760 }, { "epoch": 0.7388279368477388, "grad_norm": 3.7067902088165283, "learning_rate": 9.74114226186131e-06, "loss": 1.1701, "step": 2761 }, { "epoch": 0.7390955311747391, "grad_norm": 3.5703206062316895, "learning_rate": 9.740861135373399e-06, "loss": 1.2229, "step": 2762 }, { "epoch": 0.7393631255017393, "grad_norm": 3.534301519393921, "learning_rate": 9.740579860373928e-06, "loss": 1.162, "step": 2763 }, { "epoch": 0.7396307198287396, "grad_norm": 3.224804162979126, "learning_rate": 9.740298436871705e-06, "loss": 1.0507, "step": 2764 }, { "epoch": 0.7398983141557399, "grad_norm": 3.5627236366271973, "learning_rate": 9.74001686487555e-06, "loss": 1.1545, "step": 2765 }, { "epoch": 0.7401659084827402, "grad_norm": 3.293410301208496, "learning_rate": 9.73973514439428e-06, "loss": 1.0306, "step": 2766 }, { "epoch": 0.7404335028097404, "grad_norm": 3.6083991527557373, "learning_rate": 9.73945327543672e-06, "loss": 1.2526, "step": 2767 }, { "epoch": 0.7407010971367407, "grad_norm": 3.2375547885894775, "learning_rate": 9.739171258011703e-06, "loss": 1.0081, "step": 2768 }, { "epoch": 0.740968691463741, "grad_norm": 3.6871652603149414, "learning_rate": 9.73888909212806e-06, "loss": 1.1781, "step": 2769 }, { "epoch": 0.7412362857907412, "grad_norm": 3.2498250007629395, "learning_rate": 9.738606777794633e-06, "loss": 1.1097, "step": 2770 }, { "epoch": 0.7415038801177415, "grad_norm": 3.439887046813965, "learning_rate": 9.738324315020263e-06, "loss": 1.1992, "step": 2771 }, { "epoch": 0.7417714744447418, "grad_norm": 3.3322887420654297, "learning_rate": 9.7380417038138e-06, "loss": 1.2245, "step": 2772 }, { "epoch": 0.7420390687717421, "grad_norm": 3.5965330600738525, "learning_rate": 9.737758944184096e-06, "loss": 1.1906, "step": 2773 }, { "epoch": 0.7423066630987423, "grad_norm": 3.299678325653076, "learning_rate": 9.737476036140011e-06, "loss": 1.2128, "step": 2774 }, { "epoch": 0.7425742574257426, "grad_norm": 3.468172073364258, "learning_rate": 9.737192979690404e-06, "loss": 1.1996, "step": 2775 }, { "epoch": 0.7428418517527429, "grad_norm": 3.4514479637145996, "learning_rate": 9.736909774844145e-06, "loss": 1.1241, "step": 2776 }, { "epoch": 0.7431094460797432, "grad_norm": 3.221329927444458, "learning_rate": 9.736626421610104e-06, "loss": 1.1235, "step": 2777 }, { "epoch": 0.7433770404067434, "grad_norm": 3.6566755771636963, "learning_rate": 9.73634291999716e-06, "loss": 1.0872, "step": 2778 }, { "epoch": 0.7436446347337436, "grad_norm": 3.055006980895996, "learning_rate": 9.73605927001419e-06, "loss": 1.0536, "step": 2779 }, { "epoch": 0.743912229060744, "grad_norm": 3.0918097496032715, "learning_rate": 9.735775471670079e-06, "loss": 1.0914, "step": 2780 }, { "epoch": 0.7441798233877441, "grad_norm": 3.5324559211730957, "learning_rate": 9.735491524973723e-06, "loss": 1.041, "step": 2781 }, { "epoch": 0.7444474177147444, "grad_norm": 3.506650447845459, "learning_rate": 9.73520742993401e-06, "loss": 1.1942, "step": 2782 }, { "epoch": 0.7447150120417447, "grad_norm": 3.5160765647888184, "learning_rate": 9.734923186559845e-06, "loss": 1.1306, "step": 2783 }, { "epoch": 0.744982606368745, "grad_norm": 3.377394676208496, "learning_rate": 9.73463879486013e-06, "loss": 1.1159, "step": 2784 }, { "epoch": 0.7452502006957452, "grad_norm": 3.6153159141540527, "learning_rate": 9.734354254843773e-06, "loss": 1.0963, "step": 2785 }, { "epoch": 0.7455177950227455, "grad_norm": 3.4530587196350098, "learning_rate": 9.734069566519688e-06, "loss": 1.1871, "step": 2786 }, { "epoch": 0.7457853893497458, "grad_norm": 3.537059783935547, "learning_rate": 9.733784729896794e-06, "loss": 1.2016, "step": 2787 }, { "epoch": 0.7460529836767461, "grad_norm": 3.379148483276367, "learning_rate": 9.733499744984013e-06, "loss": 1.1321, "step": 2788 }, { "epoch": 0.7463205780037463, "grad_norm": 3.4380931854248047, "learning_rate": 9.733214611790273e-06, "loss": 1.1662, "step": 2789 }, { "epoch": 0.7465881723307466, "grad_norm": 3.5000431537628174, "learning_rate": 9.732929330324505e-06, "loss": 1.0895, "step": 2790 }, { "epoch": 0.7468557666577469, "grad_norm": 3.2787697315216064, "learning_rate": 9.732643900595646e-06, "loss": 1.094, "step": 2791 }, { "epoch": 0.7471233609847471, "grad_norm": 3.452360153198242, "learning_rate": 9.732358322612639e-06, "loss": 1.198, "step": 2792 }, { "epoch": 0.7473909553117474, "grad_norm": 3.0884242057800293, "learning_rate": 9.732072596384427e-06, "loss": 1.1294, "step": 2793 }, { "epoch": 0.7476585496387477, "grad_norm": 3.0468862056732178, "learning_rate": 9.731786721919963e-06, "loss": 1.0767, "step": 2794 }, { "epoch": 0.747926143965748, "grad_norm": 3.783818483352661, "learning_rate": 9.7315006992282e-06, "loss": 1.1782, "step": 2795 }, { "epoch": 0.7481937382927482, "grad_norm": 3.290731430053711, "learning_rate": 9.731214528318101e-06, "loss": 1.1673, "step": 2796 }, { "epoch": 0.7484613326197485, "grad_norm": 3.4749789237976074, "learning_rate": 9.730928209198629e-06, "loss": 1.0845, "step": 2797 }, { "epoch": 0.7487289269467488, "grad_norm": 3.39563250541687, "learning_rate": 9.730641741878752e-06, "loss": 1.1038, "step": 2798 }, { "epoch": 0.748996521273749, "grad_norm": 3.1787352561950684, "learning_rate": 9.730355126367446e-06, "loss": 0.9825, "step": 2799 }, { "epoch": 0.7492641156007492, "grad_norm": 3.4629030227661133, "learning_rate": 9.730068362673686e-06, "loss": 1.1339, "step": 2800 }, { "epoch": 0.7495317099277495, "grad_norm": 3.959449052810669, "learning_rate": 9.72978145080646e-06, "loss": 1.3313, "step": 2801 }, { "epoch": 0.7497993042547498, "grad_norm": 2.8361458778381348, "learning_rate": 9.729494390774753e-06, "loss": 1.0094, "step": 2802 }, { "epoch": 0.75006689858175, "grad_norm": 3.317673921585083, "learning_rate": 9.729207182587556e-06, "loss": 1.0883, "step": 2803 }, { "epoch": 0.7503344929087503, "grad_norm": 3.1433663368225098, "learning_rate": 9.728919826253872e-06, "loss": 1.0981, "step": 2804 }, { "epoch": 0.7506020872357506, "grad_norm": 3.8277859687805176, "learning_rate": 9.728632321782693e-06, "loss": 1.1252, "step": 2805 }, { "epoch": 0.7508696815627509, "grad_norm": 3.1304125785827637, "learning_rate": 9.728344669183033e-06, "loss": 1.0509, "step": 2806 }, { "epoch": 0.7511372758897511, "grad_norm": 3.266526937484741, "learning_rate": 9.728056868463903e-06, "loss": 1.1155, "step": 2807 }, { "epoch": 0.7514048702167514, "grad_norm": 3.4399197101593018, "learning_rate": 9.727768919634314e-06, "loss": 1.2062, "step": 2808 }, { "epoch": 0.7516724645437517, "grad_norm": 3.5575180053710938, "learning_rate": 9.72748082270329e-06, "loss": 1.211, "step": 2809 }, { "epoch": 0.751940058870752, "grad_norm": 3.2147161960601807, "learning_rate": 9.727192577679852e-06, "loss": 1.0478, "step": 2810 }, { "epoch": 0.7522076531977522, "grad_norm": 3.8405959606170654, "learning_rate": 9.726904184573034e-06, "loss": 1.2406, "step": 2811 }, { "epoch": 0.7524752475247525, "grad_norm": 3.6334824562072754, "learning_rate": 9.726615643391868e-06, "loss": 1.1493, "step": 2812 }, { "epoch": 0.7527428418517528, "grad_norm": 3.3839588165283203, "learning_rate": 9.726326954145391e-06, "loss": 1.2048, "step": 2813 }, { "epoch": 0.753010436178753, "grad_norm": 3.4611270427703857, "learning_rate": 9.72603811684265e-06, "loss": 1.1183, "step": 2814 }, { "epoch": 0.7532780305057533, "grad_norm": 3.1408448219299316, "learning_rate": 9.725749131492691e-06, "loss": 1.0962, "step": 2815 }, { "epoch": 0.7535456248327536, "grad_norm": 3.3118159770965576, "learning_rate": 9.725459998104568e-06, "loss": 1.1289, "step": 2816 }, { "epoch": 0.7538132191597539, "grad_norm": 3.467696189880371, "learning_rate": 9.725170716687337e-06, "loss": 1.1242, "step": 2817 }, { "epoch": 0.754080813486754, "grad_norm": 3.346605062484741, "learning_rate": 9.72488128725006e-06, "loss": 1.1256, "step": 2818 }, { "epoch": 0.7543484078137543, "grad_norm": 2.9358856678009033, "learning_rate": 9.724591709801804e-06, "loss": 1.0252, "step": 2819 }, { "epoch": 0.7546160021407546, "grad_norm": 3.8935790061950684, "learning_rate": 9.724301984351642e-06, "loss": 1.2343, "step": 2820 }, { "epoch": 0.7548835964677549, "grad_norm": 3.3613624572753906, "learning_rate": 9.724012110908647e-06, "loss": 1.0944, "step": 2821 }, { "epoch": 0.7551511907947551, "grad_norm": 3.857342004776001, "learning_rate": 9.723722089481902e-06, "loss": 1.1819, "step": 2822 }, { "epoch": 0.7554187851217554, "grad_norm": 3.4227402210235596, "learning_rate": 9.72343192008049e-06, "loss": 1.1457, "step": 2823 }, { "epoch": 0.7556863794487557, "grad_norm": 3.526207447052002, "learning_rate": 9.723141602713502e-06, "loss": 1.1525, "step": 2824 }, { "epoch": 0.7559539737757559, "grad_norm": 3.2722322940826416, "learning_rate": 9.722851137390032e-06, "loss": 0.999, "step": 2825 }, { "epoch": 0.7562215681027562, "grad_norm": 3.4464046955108643, "learning_rate": 9.72256052411918e-06, "loss": 1.1352, "step": 2826 }, { "epoch": 0.7564891624297565, "grad_norm": 3.817711353302002, "learning_rate": 9.72226976291005e-06, "loss": 1.2295, "step": 2827 }, { "epoch": 0.7567567567567568, "grad_norm": 3.3932790756225586, "learning_rate": 9.721978853771747e-06, "loss": 1.1339, "step": 2828 }, { "epoch": 0.757024351083757, "grad_norm": 3.173757553100586, "learning_rate": 9.721687796713388e-06, "loss": 1.0569, "step": 2829 }, { "epoch": 0.7572919454107573, "grad_norm": 3.3787193298339844, "learning_rate": 9.721396591744089e-06, "loss": 1.0959, "step": 2830 }, { "epoch": 0.7575595397377576, "grad_norm": 3.255352258682251, "learning_rate": 9.72110523887297e-06, "loss": 1.1342, "step": 2831 }, { "epoch": 0.7578271340647579, "grad_norm": 3.6418159008026123, "learning_rate": 9.720813738109163e-06, "loss": 1.158, "step": 2832 }, { "epoch": 0.7580947283917581, "grad_norm": 3.731308937072754, "learning_rate": 9.720522089461795e-06, "loss": 1.2893, "step": 2833 }, { "epoch": 0.7583623227187584, "grad_norm": 3.3955862522125244, "learning_rate": 9.720230292940005e-06, "loss": 1.2315, "step": 2834 }, { "epoch": 0.7586299170457587, "grad_norm": 3.6548402309417725, "learning_rate": 9.71993834855293e-06, "loss": 1.3398, "step": 2835 }, { "epoch": 0.7588975113727588, "grad_norm": 3.494920015335083, "learning_rate": 9.71964625630972e-06, "loss": 1.0822, "step": 2836 }, { "epoch": 0.7591651056997591, "grad_norm": 3.2807202339172363, "learning_rate": 9.719354016219524e-06, "loss": 1.0012, "step": 2837 }, { "epoch": 0.7594327000267594, "grad_norm": 3.419506549835205, "learning_rate": 9.719061628291495e-06, "loss": 1.1424, "step": 2838 }, { "epoch": 0.7597002943537597, "grad_norm": 3.452536106109619, "learning_rate": 9.718769092534791e-06, "loss": 1.2047, "step": 2839 }, { "epoch": 0.7599678886807599, "grad_norm": 3.1318492889404297, "learning_rate": 9.71847640895858e-06, "loss": 1.1482, "step": 2840 }, { "epoch": 0.7602354830077602, "grad_norm": 3.5314605236053467, "learning_rate": 9.718183577572027e-06, "loss": 1.1353, "step": 2841 }, { "epoch": 0.7605030773347605, "grad_norm": 3.302334785461426, "learning_rate": 9.717890598384308e-06, "loss": 1.0301, "step": 2842 }, { "epoch": 0.7607706716617608, "grad_norm": 3.7543177604675293, "learning_rate": 9.7175974714046e-06, "loss": 1.1595, "step": 2843 }, { "epoch": 0.761038265988761, "grad_norm": 2.979762554168701, "learning_rate": 9.717304196642084e-06, "loss": 1.0898, "step": 2844 }, { "epoch": 0.7613058603157613, "grad_norm": 3.3831746578216553, "learning_rate": 9.717010774105948e-06, "loss": 1.0798, "step": 2845 }, { "epoch": 0.7615734546427616, "grad_norm": 3.5637614727020264, "learning_rate": 9.716717203805383e-06, "loss": 1.2266, "step": 2846 }, { "epoch": 0.7618410489697618, "grad_norm": 3.3989365100860596, "learning_rate": 9.716423485749587e-06, "loss": 1.0865, "step": 2847 }, { "epoch": 0.7621086432967621, "grad_norm": 3.465242385864258, "learning_rate": 9.716129619947759e-06, "loss": 1.1392, "step": 2848 }, { "epoch": 0.7623762376237624, "grad_norm": 3.8061766624450684, "learning_rate": 9.715835606409107e-06, "loss": 1.1998, "step": 2849 }, { "epoch": 0.7626438319507627, "grad_norm": 3.2725119590759277, "learning_rate": 9.71554144514284e-06, "loss": 1.1301, "step": 2850 }, { "epoch": 0.7629114262777629, "grad_norm": 3.4146728515625, "learning_rate": 9.715247136158173e-06, "loss": 1.1111, "step": 2851 }, { "epoch": 0.7631790206047632, "grad_norm": 3.5250372886657715, "learning_rate": 9.714952679464324e-06, "loss": 1.2267, "step": 2852 }, { "epoch": 0.7634466149317635, "grad_norm": 3.194732427597046, "learning_rate": 9.714658075070518e-06, "loss": 1.0405, "step": 2853 }, { "epoch": 0.7637142092587638, "grad_norm": 3.0676612854003906, "learning_rate": 9.714363322985984e-06, "loss": 0.9979, "step": 2854 }, { "epoch": 0.763981803585764, "grad_norm": 3.3650014400482178, "learning_rate": 9.714068423219958e-06, "loss": 1.0496, "step": 2855 }, { "epoch": 0.7642493979127643, "grad_norm": 3.422921657562256, "learning_rate": 9.713773375781672e-06, "loss": 1.1274, "step": 2856 }, { "epoch": 0.7645169922397645, "grad_norm": 3.542006492614746, "learning_rate": 9.713478180680375e-06, "loss": 1.149, "step": 2857 }, { "epoch": 0.7647845865667647, "grad_norm": 3.3169963359832764, "learning_rate": 9.71318283792531e-06, "loss": 1.0904, "step": 2858 }, { "epoch": 0.765052180893765, "grad_norm": 3.1313977241516113, "learning_rate": 9.71288734752573e-06, "loss": 1.0458, "step": 2859 }, { "epoch": 0.7653197752207653, "grad_norm": 3.309631586074829, "learning_rate": 9.712591709490891e-06, "loss": 1.1167, "step": 2860 }, { "epoch": 0.7655873695477656, "grad_norm": 3.1519172191619873, "learning_rate": 9.712295923830057e-06, "loss": 1.0628, "step": 2861 }, { "epoch": 0.7658549638747658, "grad_norm": 3.4607601165771484, "learning_rate": 9.71199999055249e-06, "loss": 1.157, "step": 2862 }, { "epoch": 0.7661225582017661, "grad_norm": 3.342031478881836, "learning_rate": 9.711703909667461e-06, "loss": 1.1209, "step": 2863 }, { "epoch": 0.7663901525287664, "grad_norm": 3.9641542434692383, "learning_rate": 9.711407681184248e-06, "loss": 1.1324, "step": 2864 }, { "epoch": 0.7666577468557667, "grad_norm": 3.4209370613098145, "learning_rate": 9.711111305112126e-06, "loss": 1.0754, "step": 2865 }, { "epoch": 0.7669253411827669, "grad_norm": 3.292510509490967, "learning_rate": 9.710814781460383e-06, "loss": 1.0765, "step": 2866 }, { "epoch": 0.7671929355097672, "grad_norm": 4.118853569030762, "learning_rate": 9.710518110238308e-06, "loss": 1.2039, "step": 2867 }, { "epoch": 0.7674605298367675, "grad_norm": 3.280724287033081, "learning_rate": 9.71022129145519e-06, "loss": 0.9847, "step": 2868 }, { "epoch": 0.7677281241637677, "grad_norm": 3.1965861320495605, "learning_rate": 9.709924325120333e-06, "loss": 0.9819, "step": 2869 }, { "epoch": 0.767995718490768, "grad_norm": 3.7593677043914795, "learning_rate": 9.709627211243036e-06, "loss": 1.259, "step": 2870 }, { "epoch": 0.7682633128177683, "grad_norm": 3.556138277053833, "learning_rate": 9.709329949832606e-06, "loss": 1.0214, "step": 2871 }, { "epoch": 0.7685309071447686, "grad_norm": 3.3062312602996826, "learning_rate": 9.709032540898356e-06, "loss": 1.1449, "step": 2872 }, { "epoch": 0.7687985014717688, "grad_norm": 3.483119249343872, "learning_rate": 9.708734984449605e-06, "loss": 1.2922, "step": 2873 }, { "epoch": 0.7690660957987691, "grad_norm": 3.3715760707855225, "learning_rate": 9.70843728049567e-06, "loss": 1.0213, "step": 2874 }, { "epoch": 0.7693336901257694, "grad_norm": 3.3638691902160645, "learning_rate": 9.70813942904588e-06, "loss": 1.1002, "step": 2875 }, { "epoch": 0.7696012844527697, "grad_norm": 3.502279281616211, "learning_rate": 9.707841430109564e-06, "loss": 1.1725, "step": 2876 }, { "epoch": 0.7698688787797698, "grad_norm": 3.4064905643463135, "learning_rate": 9.707543283696056e-06, "loss": 1.1057, "step": 2877 }, { "epoch": 0.7701364731067701, "grad_norm": 3.243762969970703, "learning_rate": 9.707244989814699e-06, "loss": 1.039, "step": 2878 }, { "epoch": 0.7704040674337704, "grad_norm": 3.5731518268585205, "learning_rate": 9.706946548474836e-06, "loss": 1.0588, "step": 2879 }, { "epoch": 0.7706716617607706, "grad_norm": 3.416506052017212, "learning_rate": 9.706647959685813e-06, "loss": 1.0165, "step": 2880 }, { "epoch": 0.7709392560877709, "grad_norm": 3.192201852798462, "learning_rate": 9.706349223456988e-06, "loss": 1.0624, "step": 2881 }, { "epoch": 0.7712068504147712, "grad_norm": 3.571995735168457, "learning_rate": 9.706050339797714e-06, "loss": 1.1391, "step": 2882 }, { "epoch": 0.7714744447417715, "grad_norm": 3.073079824447632, "learning_rate": 9.70575130871736e-06, "loss": 1.0965, "step": 2883 }, { "epoch": 0.7717420390687717, "grad_norm": 3.43789005279541, "learning_rate": 9.705452130225287e-06, "loss": 1.0569, "step": 2884 }, { "epoch": 0.772009633395772, "grad_norm": 3.334461212158203, "learning_rate": 9.705152804330872e-06, "loss": 1.053, "step": 2885 }, { "epoch": 0.7722772277227723, "grad_norm": 3.743177890777588, "learning_rate": 9.70485333104349e-06, "loss": 1.2276, "step": 2886 }, { "epoch": 0.7725448220497726, "grad_norm": 3.443610191345215, "learning_rate": 9.704553710372524e-06, "loss": 1.0712, "step": 2887 }, { "epoch": 0.7728124163767728, "grad_norm": 3.481642246246338, "learning_rate": 9.704253942327357e-06, "loss": 1.1042, "step": 2888 }, { "epoch": 0.7730800107037731, "grad_norm": 3.3586671352386475, "learning_rate": 9.703954026917379e-06, "loss": 1.2262, "step": 2889 }, { "epoch": 0.7733476050307734, "grad_norm": 3.1895251274108887, "learning_rate": 9.703653964151986e-06, "loss": 0.9845, "step": 2890 }, { "epoch": 0.7736151993577736, "grad_norm": 3.624223232269287, "learning_rate": 9.70335375404058e-06, "loss": 1.3042, "step": 2891 }, { "epoch": 0.7738827936847739, "grad_norm": 3.3234941959381104, "learning_rate": 9.703053396592562e-06, "loss": 1.0994, "step": 2892 }, { "epoch": 0.7741503880117742, "grad_norm": 3.3439879417419434, "learning_rate": 9.702752891817346e-06, "loss": 1.2438, "step": 2893 }, { "epoch": 0.7744179823387745, "grad_norm": 3.2826695442199707, "learning_rate": 9.70245223972434e-06, "loss": 1.0742, "step": 2894 }, { "epoch": 0.7746855766657746, "grad_norm": 3.288297414779663, "learning_rate": 9.702151440322964e-06, "loss": 1.0301, "step": 2895 }, { "epoch": 0.7749531709927749, "grad_norm": 3.380511999130249, "learning_rate": 9.701850493622642e-06, "loss": 1.1317, "step": 2896 }, { "epoch": 0.7752207653197752, "grad_norm": 3.54054856300354, "learning_rate": 9.7015493996328e-06, "loss": 1.0911, "step": 2897 }, { "epoch": 0.7754883596467755, "grad_norm": 3.4729793071746826, "learning_rate": 9.701248158362871e-06, "loss": 1.1824, "step": 2898 }, { "epoch": 0.7757559539737757, "grad_norm": 3.3883352279663086, "learning_rate": 9.700946769822292e-06, "loss": 1.1696, "step": 2899 }, { "epoch": 0.776023548300776, "grad_norm": 3.5626111030578613, "learning_rate": 9.700645234020502e-06, "loss": 1.2284, "step": 2900 }, { "epoch": 0.7762911426277763, "grad_norm": 3.4596970081329346, "learning_rate": 9.70034355096695e-06, "loss": 1.0903, "step": 2901 }, { "epoch": 0.7765587369547765, "grad_norm": 3.7619760036468506, "learning_rate": 9.700041720671082e-06, "loss": 1.2232, "step": 2902 }, { "epoch": 0.7768263312817768, "grad_norm": 3.1871516704559326, "learning_rate": 9.69973974314236e-06, "loss": 1.0154, "step": 2903 }, { "epoch": 0.7770939256087771, "grad_norm": 3.5451059341430664, "learning_rate": 9.699437618390237e-06, "loss": 1.0359, "step": 2904 }, { "epoch": 0.7773615199357774, "grad_norm": 3.7833518981933594, "learning_rate": 9.69913534642418e-06, "loss": 1.1205, "step": 2905 }, { "epoch": 0.7776291142627776, "grad_norm": 3.3426740169525146, "learning_rate": 9.69883292725366e-06, "loss": 1.146, "step": 2906 }, { "epoch": 0.7778967085897779, "grad_norm": 3.564518690109253, "learning_rate": 9.698530360888146e-06, "loss": 1.1515, "step": 2907 }, { "epoch": 0.7781643029167782, "grad_norm": 3.3578410148620605, "learning_rate": 9.69822764733712e-06, "loss": 1.2046, "step": 2908 }, { "epoch": 0.7784318972437785, "grad_norm": 3.161803722381592, "learning_rate": 9.697924786610063e-06, "loss": 1.1811, "step": 2909 }, { "epoch": 0.7786994915707787, "grad_norm": 3.1119868755340576, "learning_rate": 9.697621778716465e-06, "loss": 1.0896, "step": 2910 }, { "epoch": 0.778967085897779, "grad_norm": 3.2111477851867676, "learning_rate": 9.697318623665813e-06, "loss": 1.0613, "step": 2911 }, { "epoch": 0.7792346802247793, "grad_norm": 3.4069631099700928, "learning_rate": 9.697015321467606e-06, "loss": 1.0905, "step": 2912 }, { "epoch": 0.7795022745517795, "grad_norm": 3.5640361309051514, "learning_rate": 9.696711872131347e-06, "loss": 1.2176, "step": 2913 }, { "epoch": 0.7797698688787797, "grad_norm": 3.4428586959838867, "learning_rate": 9.69640827566654e-06, "loss": 1.1433, "step": 2914 }, { "epoch": 0.78003746320578, "grad_norm": 3.6529276371002197, "learning_rate": 9.696104532082695e-06, "loss": 1.2443, "step": 2915 }, { "epoch": 0.7803050575327803, "grad_norm": 3.5144267082214355, "learning_rate": 9.695800641389327e-06, "loss": 1.1708, "step": 2916 }, { "epoch": 0.7805726518597805, "grad_norm": 3.2962844371795654, "learning_rate": 9.695496603595959e-06, "loss": 1.0612, "step": 2917 }, { "epoch": 0.7808402461867808, "grad_norm": 3.4255483150482178, "learning_rate": 9.695192418712111e-06, "loss": 1.1376, "step": 2918 }, { "epoch": 0.7811078405137811, "grad_norm": 3.3936362266540527, "learning_rate": 9.694888086747315e-06, "loss": 1.0977, "step": 2919 }, { "epoch": 0.7813754348407814, "grad_norm": 3.835702896118164, "learning_rate": 9.694583607711102e-06, "loss": 1.2083, "step": 2920 }, { "epoch": 0.7816430291677816, "grad_norm": 3.329684257507324, "learning_rate": 9.69427898161301e-06, "loss": 1.1792, "step": 2921 }, { "epoch": 0.7819106234947819, "grad_norm": 3.5898046493530273, "learning_rate": 9.693974208462585e-06, "loss": 1.1128, "step": 2922 }, { "epoch": 0.7821782178217822, "grad_norm": 3.2505688667297363, "learning_rate": 9.693669288269371e-06, "loss": 1.1022, "step": 2923 }, { "epoch": 0.7824458121487824, "grad_norm": 3.629041910171509, "learning_rate": 9.693364221042922e-06, "loss": 1.0837, "step": 2924 }, { "epoch": 0.7827134064757827, "grad_norm": 3.4234085083007812, "learning_rate": 9.69305900679279e-06, "loss": 1.2019, "step": 2925 }, { "epoch": 0.782981000802783, "grad_norm": 3.353302001953125, "learning_rate": 9.692753645528544e-06, "loss": 1.133, "step": 2926 }, { "epoch": 0.7832485951297833, "grad_norm": 3.490877866744995, "learning_rate": 9.692448137259743e-06, "loss": 1.2875, "step": 2927 }, { "epoch": 0.7835161894567835, "grad_norm": 3.5161383152008057, "learning_rate": 9.692142481995958e-06, "loss": 1.1722, "step": 2928 }, { "epoch": 0.7837837837837838, "grad_norm": 3.030968189239502, "learning_rate": 9.691836679746767e-06, "loss": 1.0362, "step": 2929 }, { "epoch": 0.7840513781107841, "grad_norm": 3.819481611251831, "learning_rate": 9.691530730521748e-06, "loss": 1.2682, "step": 2930 }, { "epoch": 0.7843189724377844, "grad_norm": 3.640918731689453, "learning_rate": 9.691224634330484e-06, "loss": 1.1868, "step": 2931 }, { "epoch": 0.7845865667647846, "grad_norm": 3.7881932258605957, "learning_rate": 9.690918391182568e-06, "loss": 1.2436, "step": 2932 }, { "epoch": 0.7848541610917849, "grad_norm": 3.8479201793670654, "learning_rate": 9.690612001087586e-06, "loss": 1.0979, "step": 2933 }, { "epoch": 0.7851217554187851, "grad_norm": 3.2943499088287354, "learning_rate": 9.690305464055143e-06, "loss": 1.1036, "step": 2934 }, { "epoch": 0.7853893497457853, "grad_norm": 3.42976713180542, "learning_rate": 9.689998780094839e-06, "loss": 1.1348, "step": 2935 }, { "epoch": 0.7856569440727856, "grad_norm": 3.6888561248779297, "learning_rate": 9.689691949216278e-06, "loss": 1.1974, "step": 2936 }, { "epoch": 0.7859245383997859, "grad_norm": 3.266007900238037, "learning_rate": 9.689384971429077e-06, "loss": 1.1575, "step": 2937 }, { "epoch": 0.7861921327267862, "grad_norm": 3.421496629714966, "learning_rate": 9.689077846742847e-06, "loss": 1.1723, "step": 2938 }, { "epoch": 0.7864597270537864, "grad_norm": 3.432095766067505, "learning_rate": 9.688770575167215e-06, "loss": 1.0722, "step": 2939 }, { "epoch": 0.7867273213807867, "grad_norm": 3.453275680541992, "learning_rate": 9.688463156711801e-06, "loss": 1.145, "step": 2940 }, { "epoch": 0.786994915707787, "grad_norm": 3.332948923110962, "learning_rate": 9.688155591386239e-06, "loss": 1.0426, "step": 2941 }, { "epoch": 0.7872625100347873, "grad_norm": 3.5865981578826904, "learning_rate": 9.687847879200161e-06, "loss": 1.2441, "step": 2942 }, { "epoch": 0.7875301043617875, "grad_norm": 3.633302688598633, "learning_rate": 9.687540020163209e-06, "loss": 1.2145, "step": 2943 }, { "epoch": 0.7877976986887878, "grad_norm": 3.5107271671295166, "learning_rate": 9.687232014285025e-06, "loss": 1.1149, "step": 2944 }, { "epoch": 0.7880652930157881, "grad_norm": 3.2951745986938477, "learning_rate": 9.686923861575258e-06, "loss": 1.0549, "step": 2945 }, { "epoch": 0.7883328873427883, "grad_norm": 3.1973979473114014, "learning_rate": 9.68661556204356e-06, "loss": 1.1486, "step": 2946 }, { "epoch": 0.7886004816697886, "grad_norm": 3.6182639598846436, "learning_rate": 9.68630711569959e-06, "loss": 1.1141, "step": 2947 }, { "epoch": 0.7888680759967889, "grad_norm": 3.3912758827209473, "learning_rate": 9.685998522553012e-06, "loss": 1.1695, "step": 2948 }, { "epoch": 0.7891356703237892, "grad_norm": 3.658475637435913, "learning_rate": 9.68568978261349e-06, "loss": 1.216, "step": 2949 }, { "epoch": 0.7894032646507894, "grad_norm": 3.5192058086395264, "learning_rate": 9.685380895890698e-06, "loss": 1.1805, "step": 2950 }, { "epoch": 0.7896708589777897, "grad_norm": 3.3814120292663574, "learning_rate": 9.68507186239431e-06, "loss": 1.1627, "step": 2951 }, { "epoch": 0.78993845330479, "grad_norm": 3.405315399169922, "learning_rate": 9.684762682134008e-06, "loss": 1.2152, "step": 2952 }, { "epoch": 0.7902060476317903, "grad_norm": 3.6160085201263428, "learning_rate": 9.684453355119476e-06, "loss": 1.2863, "step": 2953 }, { "epoch": 0.7904736419587904, "grad_norm": 3.364459991455078, "learning_rate": 9.684143881360406e-06, "loss": 0.9876, "step": 2954 }, { "epoch": 0.7907412362857907, "grad_norm": 3.066523551940918, "learning_rate": 9.683834260866492e-06, "loss": 0.9858, "step": 2955 }, { "epoch": 0.791008830612791, "grad_norm": 3.4516170024871826, "learning_rate": 9.68352449364743e-06, "loss": 1.3306, "step": 2956 }, { "epoch": 0.7912764249397912, "grad_norm": 3.5677990913391113, "learning_rate": 9.68321457971293e-06, "loss": 1.1928, "step": 2957 }, { "epoch": 0.7915440192667915, "grad_norm": 3.59769868850708, "learning_rate": 9.682904519072696e-06, "loss": 1.2216, "step": 2958 }, { "epoch": 0.7918116135937918, "grad_norm": 3.0195512771606445, "learning_rate": 9.682594311736439e-06, "loss": 0.9886, "step": 2959 }, { "epoch": 0.7920792079207921, "grad_norm": 3.224322557449341, "learning_rate": 9.68228395771388e-06, "loss": 1.0185, "step": 2960 }, { "epoch": 0.7923468022477923, "grad_norm": 3.414687395095825, "learning_rate": 9.681973457014742e-06, "loss": 1.2, "step": 2961 }, { "epoch": 0.7926143965747926, "grad_norm": 3.225135564804077, "learning_rate": 9.681662809648749e-06, "loss": 1.1227, "step": 2962 }, { "epoch": 0.7928819909017929, "grad_norm": 3.3739073276519775, "learning_rate": 9.681352015625634e-06, "loss": 1.1265, "step": 2963 }, { "epoch": 0.7931495852287932, "grad_norm": 3.418264389038086, "learning_rate": 9.681041074955131e-06, "loss": 1.2126, "step": 2964 }, { "epoch": 0.7934171795557934, "grad_norm": 3.712611198425293, "learning_rate": 9.68072998764698e-06, "loss": 1.2432, "step": 2965 }, { "epoch": 0.7936847738827937, "grad_norm": 3.4805774688720703, "learning_rate": 9.68041875371093e-06, "loss": 1.2368, "step": 2966 }, { "epoch": 0.793952368209794, "grad_norm": 3.31071400642395, "learning_rate": 9.68010737315673e-06, "loss": 1.1119, "step": 2967 }, { "epoch": 0.7942199625367942, "grad_norm": 3.2610623836517334, "learning_rate": 9.679795845994129e-06, "loss": 1.0559, "step": 2968 }, { "epoch": 0.7944875568637945, "grad_norm": 3.272242546081543, "learning_rate": 9.67948417223289e-06, "loss": 1.1692, "step": 2969 }, { "epoch": 0.7947551511907948, "grad_norm": 3.6862285137176514, "learning_rate": 9.679172351882778e-06, "loss": 1.2637, "step": 2970 }, { "epoch": 0.7950227455177951, "grad_norm": 3.0989432334899902, "learning_rate": 9.678860384953558e-06, "loss": 1.0365, "step": 2971 }, { "epoch": 0.7952903398447952, "grad_norm": 3.5036988258361816, "learning_rate": 9.678548271455002e-06, "loss": 1.1943, "step": 2972 }, { "epoch": 0.7955579341717955, "grad_norm": 3.549891233444214, "learning_rate": 9.67823601139689e-06, "loss": 1.2642, "step": 2973 }, { "epoch": 0.7958255284987958, "grad_norm": 2.958547353744507, "learning_rate": 9.677923604789002e-06, "loss": 1.0623, "step": 2974 }, { "epoch": 0.7960931228257961, "grad_norm": 3.7506515979766846, "learning_rate": 9.677611051641126e-06, "loss": 1.327, "step": 2975 }, { "epoch": 0.7963607171527963, "grad_norm": 3.485591173171997, "learning_rate": 9.677298351963051e-06, "loss": 1.1078, "step": 2976 }, { "epoch": 0.7966283114797966, "grad_norm": 3.605431079864502, "learning_rate": 9.676985505764575e-06, "loss": 1.1839, "step": 2977 }, { "epoch": 0.7968959058067969, "grad_norm": 3.253654956817627, "learning_rate": 9.676672513055496e-06, "loss": 1.0312, "step": 2978 }, { "epoch": 0.7971635001337971, "grad_norm": 3.57499361038208, "learning_rate": 9.67635937384562e-06, "loss": 1.114, "step": 2979 }, { "epoch": 0.7974310944607974, "grad_norm": 3.2494728565216064, "learning_rate": 9.676046088144755e-06, "loss": 1.0676, "step": 2980 }, { "epoch": 0.7976986887877977, "grad_norm": 3.5611202716827393, "learning_rate": 9.675732655962716e-06, "loss": 1.1961, "step": 2981 }, { "epoch": 0.797966283114798, "grad_norm": 3.6243984699249268, "learning_rate": 9.675419077309323e-06, "loss": 1.1931, "step": 2982 }, { "epoch": 0.7982338774417982, "grad_norm": 3.6850814819335938, "learning_rate": 9.675105352194396e-06, "loss": 1.0914, "step": 2983 }, { "epoch": 0.7985014717687985, "grad_norm": 3.424598455429077, "learning_rate": 9.674791480627763e-06, "loss": 1.2153, "step": 2984 }, { "epoch": 0.7987690660957988, "grad_norm": 3.0985870361328125, "learning_rate": 9.67447746261926e-06, "loss": 0.9851, "step": 2985 }, { "epoch": 0.7990366604227991, "grad_norm": 3.504242181777954, "learning_rate": 9.67416329817872e-06, "loss": 1.1605, "step": 2986 }, { "epoch": 0.7993042547497993, "grad_norm": 3.373812198638916, "learning_rate": 9.673848987315986e-06, "loss": 1.2192, "step": 2987 }, { "epoch": 0.7995718490767996, "grad_norm": 3.4131312370300293, "learning_rate": 9.673534530040905e-06, "loss": 1.0798, "step": 2988 }, { "epoch": 0.7998394434037999, "grad_norm": 3.4545650482177734, "learning_rate": 9.673219926363325e-06, "loss": 1.078, "step": 2989 }, { "epoch": 0.8001070377308, "grad_norm": 3.47906231880188, "learning_rate": 9.672905176293103e-06, "loss": 1.0452, "step": 2990 }, { "epoch": 0.8003746320578004, "grad_norm": 3.450021505355835, "learning_rate": 9.6725902798401e-06, "loss": 1.1115, "step": 2991 }, { "epoch": 0.8006422263848006, "grad_norm": 3.7371201515197754, "learning_rate": 9.672275237014178e-06, "loss": 1.2083, "step": 2992 }, { "epoch": 0.800909820711801, "grad_norm": 3.5337791442871094, "learning_rate": 9.671960047825207e-06, "loss": 1.1462, "step": 2993 }, { "epoch": 0.8011774150388011, "grad_norm": 3.226942539215088, "learning_rate": 9.671644712283061e-06, "loss": 1.1274, "step": 2994 }, { "epoch": 0.8014450093658014, "grad_norm": 3.127251625061035, "learning_rate": 9.671329230397616e-06, "loss": 1.0761, "step": 2995 }, { "epoch": 0.8017126036928017, "grad_norm": 3.322313070297241, "learning_rate": 9.67101360217876e-06, "loss": 1.0466, "step": 2996 }, { "epoch": 0.801980198019802, "grad_norm": 3.0869202613830566, "learning_rate": 9.670697827636374e-06, "loss": 1.0175, "step": 2997 }, { "epoch": 0.8022477923468022, "grad_norm": 3.7030136585235596, "learning_rate": 9.670381906780354e-06, "loss": 1.2653, "step": 2998 }, { "epoch": 0.8025153866738025, "grad_norm": 3.163114070892334, "learning_rate": 9.670065839620594e-06, "loss": 1.0952, "step": 2999 }, { "epoch": 0.8027829810008028, "grad_norm": 3.4521522521972656, "learning_rate": 9.669749626166998e-06, "loss": 1.1834, "step": 3000 }, { "epoch": 0.8027829810008028, "eval_loss": 1.1542552709579468, "eval_runtime": 11.69, "eval_samples_per_second": 34.217, "eval_steps_per_second": 4.277, "step": 3000 }, { "epoch": 0.803050575327803, "grad_norm": 3.3275747299194336, "learning_rate": 9.669433266429468e-06, "loss": 1.127, "step": 3001 }, { "epoch": 0.8033181696548033, "grad_norm": 3.7543275356292725, "learning_rate": 9.669116760417919e-06, "loss": 1.2466, "step": 3002 }, { "epoch": 0.8035857639818036, "grad_norm": 3.295300006866455, "learning_rate": 9.66880010814226e-06, "loss": 1.0984, "step": 3003 }, { "epoch": 0.8038533583088039, "grad_norm": 3.4165024757385254, "learning_rate": 9.668483309612415e-06, "loss": 1.0271, "step": 3004 }, { "epoch": 0.8041209526358041, "grad_norm": 3.307145357131958, "learning_rate": 9.668166364838306e-06, "loss": 1.1855, "step": 3005 }, { "epoch": 0.8043885469628044, "grad_norm": 3.409726858139038, "learning_rate": 9.667849273829861e-06, "loss": 1.0533, "step": 3006 }, { "epoch": 0.8046561412898047, "grad_norm": 3.490656852722168, "learning_rate": 9.667532036597017e-06, "loss": 1.14, "step": 3007 }, { "epoch": 0.804923735616805, "grad_norm": 3.395625352859497, "learning_rate": 9.667214653149706e-06, "loss": 1.2552, "step": 3008 }, { "epoch": 0.8051913299438052, "grad_norm": 3.3872432708740234, "learning_rate": 9.666897123497874e-06, "loss": 1.1062, "step": 3009 }, { "epoch": 0.8054589242708055, "grad_norm": 3.4740712642669678, "learning_rate": 9.666579447651467e-06, "loss": 1.1967, "step": 3010 }, { "epoch": 0.8057265185978058, "grad_norm": 3.5477073192596436, "learning_rate": 9.666261625620437e-06, "loss": 1.0399, "step": 3011 }, { "epoch": 0.8059941129248059, "grad_norm": 3.2551109790802, "learning_rate": 9.665943657414738e-06, "loss": 1.1364, "step": 3012 }, { "epoch": 0.8062617072518062, "grad_norm": 3.1551992893218994, "learning_rate": 9.665625543044335e-06, "loss": 1.1235, "step": 3013 }, { "epoch": 0.8065293015788065, "grad_norm": 3.2002670764923096, "learning_rate": 9.66530728251919e-06, "loss": 1.06, "step": 3014 }, { "epoch": 0.8067968959058068, "grad_norm": 3.1332433223724365, "learning_rate": 9.664988875849271e-06, "loss": 1.0827, "step": 3015 }, { "epoch": 0.807064490232807, "grad_norm": 3.4181861877441406, "learning_rate": 9.664670323044555e-06, "loss": 1.108, "step": 3016 }, { "epoch": 0.8073320845598073, "grad_norm": 3.91221284866333, "learning_rate": 9.66435162411502e-06, "loss": 1.0166, "step": 3017 }, { "epoch": 0.8075996788868076, "grad_norm": 3.2280433177948, "learning_rate": 9.664032779070652e-06, "loss": 1.1096, "step": 3018 }, { "epoch": 0.8078672732138079, "grad_norm": 3.229264259338379, "learning_rate": 9.663713787921436e-06, "loss": 1.0637, "step": 3019 }, { "epoch": 0.8081348675408081, "grad_norm": 3.3362436294555664, "learning_rate": 9.663394650677368e-06, "loss": 1.0432, "step": 3020 }, { "epoch": 0.8084024618678084, "grad_norm": 3.3346054553985596, "learning_rate": 9.66307536734844e-06, "loss": 1.164, "step": 3021 }, { "epoch": 0.8086700561948087, "grad_norm": 3.382387399673462, "learning_rate": 9.662755937944657e-06, "loss": 1.0578, "step": 3022 }, { "epoch": 0.808937650521809, "grad_norm": 3.3161141872406006, "learning_rate": 9.662436362476026e-06, "loss": 0.9829, "step": 3023 }, { "epoch": 0.8092052448488092, "grad_norm": 3.457970380783081, "learning_rate": 9.662116640952558e-06, "loss": 1.2865, "step": 3024 }, { "epoch": 0.8094728391758095, "grad_norm": 3.1441056728363037, "learning_rate": 9.661796773384266e-06, "loss": 1.0722, "step": 3025 }, { "epoch": 0.8097404335028098, "grad_norm": 3.2600796222686768, "learning_rate": 9.661476759781174e-06, "loss": 1.0949, "step": 3026 }, { "epoch": 0.81000802782981, "grad_norm": 3.8801653385162354, "learning_rate": 9.661156600153304e-06, "loss": 1.2197, "step": 3027 }, { "epoch": 0.8102756221568103, "grad_norm": 3.6208014488220215, "learning_rate": 9.660836294510685e-06, "loss": 1.2421, "step": 3028 }, { "epoch": 0.8105432164838106, "grad_norm": 3.5174331665039062, "learning_rate": 9.660515842863352e-06, "loss": 1.0628, "step": 3029 }, { "epoch": 0.8108108108108109, "grad_norm": 3.285752058029175, "learning_rate": 9.660195245221345e-06, "loss": 1.2271, "step": 3030 }, { "epoch": 0.811078405137811, "grad_norm": 3.3572685718536377, "learning_rate": 9.659874501594705e-06, "loss": 1.1451, "step": 3031 }, { "epoch": 0.8113459994648113, "grad_norm": 3.5392873287200928, "learning_rate": 9.659553611993478e-06, "loss": 1.2389, "step": 3032 }, { "epoch": 0.8116135937918116, "grad_norm": 3.6059014797210693, "learning_rate": 9.659232576427718e-06, "loss": 1.2854, "step": 3033 }, { "epoch": 0.8118811881188119, "grad_norm": 3.2821319103240967, "learning_rate": 9.65891139490748e-06, "loss": 1.0982, "step": 3034 }, { "epoch": 0.8121487824458121, "grad_norm": 3.2728023529052734, "learning_rate": 9.65859006744283e-06, "loss": 1.0598, "step": 3035 }, { "epoch": 0.8124163767728124, "grad_norm": 3.7586371898651123, "learning_rate": 9.65826859404383e-06, "loss": 1.2271, "step": 3036 }, { "epoch": 0.8126839710998127, "grad_norm": 3.513029098510742, "learning_rate": 9.65794697472055e-06, "loss": 1.0671, "step": 3037 }, { "epoch": 0.8129515654268129, "grad_norm": 3.1939735412597656, "learning_rate": 9.657625209483066e-06, "loss": 1.0949, "step": 3038 }, { "epoch": 0.8132191597538132, "grad_norm": 3.557431221008301, "learning_rate": 9.65730329834146e-06, "loss": 1.1211, "step": 3039 }, { "epoch": 0.8134867540808135, "grad_norm": 3.6598188877105713, "learning_rate": 9.656981241305811e-06, "loss": 1.1189, "step": 3040 }, { "epoch": 0.8137543484078138, "grad_norm": 3.781261444091797, "learning_rate": 9.656659038386213e-06, "loss": 1.1389, "step": 3041 }, { "epoch": 0.814021942734814, "grad_norm": 3.638216733932495, "learning_rate": 9.656336689592756e-06, "loss": 1.3045, "step": 3042 }, { "epoch": 0.8142895370618143, "grad_norm": 3.7151739597320557, "learning_rate": 9.65601419493554e-06, "loss": 1.3391, "step": 3043 }, { "epoch": 0.8145571313888146, "grad_norm": 3.482971429824829, "learning_rate": 9.655691554424664e-06, "loss": 1.0059, "step": 3044 }, { "epoch": 0.8148247257158149, "grad_norm": 3.6908507347106934, "learning_rate": 9.655368768070239e-06, "loss": 1.33, "step": 3045 }, { "epoch": 0.8150923200428151, "grad_norm": 3.2988486289978027, "learning_rate": 9.655045835882373e-06, "loss": 1.0606, "step": 3046 }, { "epoch": 0.8153599143698154, "grad_norm": 3.3644847869873047, "learning_rate": 9.654722757871184e-06, "loss": 1.1128, "step": 3047 }, { "epoch": 0.8156275086968157, "grad_norm": 3.3999931812286377, "learning_rate": 9.654399534046795e-06, "loss": 1.1453, "step": 3048 }, { "epoch": 0.8158951030238158, "grad_norm": 3.4176931381225586, "learning_rate": 9.654076164419326e-06, "loss": 1.1062, "step": 3049 }, { "epoch": 0.8161626973508161, "grad_norm": 3.199340581893921, "learning_rate": 9.65375264899891e-06, "loss": 1.1005, "step": 3050 }, { "epoch": 0.8164302916778164, "grad_norm": 3.3121516704559326, "learning_rate": 9.653428987795684e-06, "loss": 1.0958, "step": 3051 }, { "epoch": 0.8166978860048167, "grad_norm": 3.3458409309387207, "learning_rate": 9.65310518081978e-06, "loss": 1.1123, "step": 3052 }, { "epoch": 0.8169654803318169, "grad_norm": 3.438964366912842, "learning_rate": 9.652781228081348e-06, "loss": 1.2157, "step": 3053 }, { "epoch": 0.8172330746588172, "grad_norm": 3.2331788539886475, "learning_rate": 9.652457129590534e-06, "loss": 1.0771, "step": 3054 }, { "epoch": 0.8175006689858175, "grad_norm": 3.585362434387207, "learning_rate": 9.652132885357488e-06, "loss": 1.2097, "step": 3055 }, { "epoch": 0.8177682633128178, "grad_norm": 3.1754887104034424, "learning_rate": 9.65180849539237e-06, "loss": 1.0281, "step": 3056 }, { "epoch": 0.818035857639818, "grad_norm": 3.4600307941436768, "learning_rate": 9.651483959705344e-06, "loss": 1.1359, "step": 3057 }, { "epoch": 0.8183034519668183, "grad_norm": 3.3886213302612305, "learning_rate": 9.65115927830657e-06, "loss": 1.0735, "step": 3058 }, { "epoch": 0.8185710462938186, "grad_norm": 3.5005595684051514, "learning_rate": 9.650834451206225e-06, "loss": 1.0937, "step": 3059 }, { "epoch": 0.8188386406208188, "grad_norm": 3.592665672302246, "learning_rate": 9.650509478414483e-06, "loss": 1.1554, "step": 3060 }, { "epoch": 0.8191062349478191, "grad_norm": 3.504587173461914, "learning_rate": 9.650184359941522e-06, "loss": 1.0877, "step": 3061 }, { "epoch": 0.8193738292748194, "grad_norm": 3.803943634033203, "learning_rate": 9.649859095797526e-06, "loss": 1.2895, "step": 3062 }, { "epoch": 0.8196414236018197, "grad_norm": 3.929657459259033, "learning_rate": 9.649533685992687e-06, "loss": 1.2047, "step": 3063 }, { "epoch": 0.8199090179288199, "grad_norm": 3.074686288833618, "learning_rate": 9.649208130537199e-06, "loss": 1.0744, "step": 3064 }, { "epoch": 0.8201766122558202, "grad_norm": 3.3522446155548096, "learning_rate": 9.648882429441258e-06, "loss": 1.1105, "step": 3065 }, { "epoch": 0.8204442065828205, "grad_norm": 3.2287309169769287, "learning_rate": 9.648556582715067e-06, "loss": 1.1542, "step": 3066 }, { "epoch": 0.8207118009098208, "grad_norm": 3.072052478790283, "learning_rate": 9.648230590368836e-06, "loss": 1.0983, "step": 3067 }, { "epoch": 0.820979395236821, "grad_norm": 3.3558244705200195, "learning_rate": 9.647904452412774e-06, "loss": 1.1362, "step": 3068 }, { "epoch": 0.8212469895638213, "grad_norm": 3.917283296585083, "learning_rate": 9.647578168857101e-06, "loss": 1.1743, "step": 3069 }, { "epoch": 0.8215145838908215, "grad_norm": 3.6273481845855713, "learning_rate": 9.647251739712034e-06, "loss": 1.2516, "step": 3070 }, { "epoch": 0.8217821782178217, "grad_norm": 3.0678317546844482, "learning_rate": 9.646925164987802e-06, "loss": 1.0271, "step": 3071 }, { "epoch": 0.822049772544822, "grad_norm": 2.849170684814453, "learning_rate": 9.646598444694631e-06, "loss": 0.9816, "step": 3072 }, { "epoch": 0.8223173668718223, "grad_norm": 3.5290982723236084, "learning_rate": 9.64627157884276e-06, "loss": 1.1911, "step": 3073 }, { "epoch": 0.8225849611988226, "grad_norm": 3.403162717819214, "learning_rate": 9.645944567442429e-06, "loss": 1.1342, "step": 3074 }, { "epoch": 0.8228525555258228, "grad_norm": 3.4919958114624023, "learning_rate": 9.645617410503879e-06, "loss": 1.256, "step": 3075 }, { "epoch": 0.8231201498528231, "grad_norm": 3.1679487228393555, "learning_rate": 9.645290108037358e-06, "loss": 1.0346, "step": 3076 }, { "epoch": 0.8233877441798234, "grad_norm": 3.7232227325439453, "learning_rate": 9.644962660053122e-06, "loss": 1.2307, "step": 3077 }, { "epoch": 0.8236553385068237, "grad_norm": 3.162550449371338, "learning_rate": 9.644635066561426e-06, "loss": 1.0, "step": 3078 }, { "epoch": 0.8239229328338239, "grad_norm": 3.254295825958252, "learning_rate": 9.644307327572533e-06, "loss": 0.9921, "step": 3079 }, { "epoch": 0.8241905271608242, "grad_norm": 3.4591927528381348, "learning_rate": 9.643979443096711e-06, "loss": 1.149, "step": 3080 }, { "epoch": 0.8244581214878245, "grad_norm": 3.499791383743286, "learning_rate": 9.64365141314423e-06, "loss": 1.1439, "step": 3081 }, { "epoch": 0.8247257158148247, "grad_norm": 4.038766860961914, "learning_rate": 9.643323237725366e-06, "loss": 1.2011, "step": 3082 }, { "epoch": 0.824993310141825, "grad_norm": 3.279536247253418, "learning_rate": 9.6429949168504e-06, "loss": 1.1845, "step": 3083 }, { "epoch": 0.8252609044688253, "grad_norm": 3.441106081008911, "learning_rate": 9.642666450529613e-06, "loss": 1.1406, "step": 3084 }, { "epoch": 0.8255284987958256, "grad_norm": 3.512998104095459, "learning_rate": 9.6423378387733e-06, "loss": 1.1178, "step": 3085 }, { "epoch": 0.8257960931228258, "grad_norm": 3.23618483543396, "learning_rate": 9.642009081591753e-06, "loss": 1.0476, "step": 3086 }, { "epoch": 0.8260636874498261, "grad_norm": 3.6994218826293945, "learning_rate": 9.641680178995272e-06, "loss": 1.3332, "step": 3087 }, { "epoch": 0.8263312817768264, "grad_norm": 3.1423802375793457, "learning_rate": 9.641351130994155e-06, "loss": 1.0252, "step": 3088 }, { "epoch": 0.8265988761038267, "grad_norm": 3.7969133853912354, "learning_rate": 9.641021937598715e-06, "loss": 1.2813, "step": 3089 }, { "epoch": 0.8268664704308268, "grad_norm": 3.5946247577667236, "learning_rate": 9.640692598819263e-06, "loss": 1.2722, "step": 3090 }, { "epoch": 0.8271340647578271, "grad_norm": 3.4758689403533936, "learning_rate": 9.640363114666115e-06, "loss": 1.0492, "step": 3091 }, { "epoch": 0.8274016590848274, "grad_norm": 3.2242352962493896, "learning_rate": 9.640033485149594e-06, "loss": 1.0117, "step": 3092 }, { "epoch": 0.8276692534118276, "grad_norm": 3.510794162750244, "learning_rate": 9.639703710280022e-06, "loss": 1.1141, "step": 3093 }, { "epoch": 0.8279368477388279, "grad_norm": 3.322143077850342, "learning_rate": 9.639373790067734e-06, "loss": 1.1013, "step": 3094 }, { "epoch": 0.8282044420658282, "grad_norm": 3.6232595443725586, "learning_rate": 9.639043724523063e-06, "loss": 1.0961, "step": 3095 }, { "epoch": 0.8284720363928285, "grad_norm": 3.486630439758301, "learning_rate": 9.638713513656348e-06, "loss": 1.2134, "step": 3096 }, { "epoch": 0.8287396307198287, "grad_norm": 3.1779515743255615, "learning_rate": 9.638383157477935e-06, "loss": 1.0994, "step": 3097 }, { "epoch": 0.829007225046829, "grad_norm": 3.2651169300079346, "learning_rate": 9.638052655998172e-06, "loss": 1.0744, "step": 3098 }, { "epoch": 0.8292748193738293, "grad_norm": 4.6499152183532715, "learning_rate": 9.63772200922741e-06, "loss": 1.1698, "step": 3099 }, { "epoch": 0.8295424137008296, "grad_norm": 3.5739243030548096, "learning_rate": 9.63739121717601e-06, "loss": 1.1934, "step": 3100 }, { "epoch": 0.8298100080278298, "grad_norm": 3.5878965854644775, "learning_rate": 9.637060279854331e-06, "loss": 1.2052, "step": 3101 }, { "epoch": 0.8300776023548301, "grad_norm": 3.2450151443481445, "learning_rate": 9.636729197272745e-06, "loss": 1.0594, "step": 3102 }, { "epoch": 0.8303451966818304, "grad_norm": 3.224311351776123, "learning_rate": 9.636397969441617e-06, "loss": 1.1101, "step": 3103 }, { "epoch": 0.8306127910088306, "grad_norm": 3.1356136798858643, "learning_rate": 9.63606659637133e-06, "loss": 1.0276, "step": 3104 }, { "epoch": 0.8308803853358309, "grad_norm": 3.4842357635498047, "learning_rate": 9.635735078072259e-06, "loss": 1.2134, "step": 3105 }, { "epoch": 0.8311479796628312, "grad_norm": 3.485252857208252, "learning_rate": 9.635403414554791e-06, "loss": 1.0805, "step": 3106 }, { "epoch": 0.8314155739898315, "grad_norm": 3.630953311920166, "learning_rate": 9.635071605829315e-06, "loss": 1.1837, "step": 3107 }, { "epoch": 0.8316831683168316, "grad_norm": 3.5035400390625, "learning_rate": 9.634739651906227e-06, "loss": 1.142, "step": 3108 }, { "epoch": 0.8319507626438319, "grad_norm": 3.4590489864349365, "learning_rate": 9.634407552795924e-06, "loss": 1.1785, "step": 3109 }, { "epoch": 0.8322183569708322, "grad_norm": 3.730466842651367, "learning_rate": 9.63407530850881e-06, "loss": 1.1447, "step": 3110 }, { "epoch": 0.8324859512978325, "grad_norm": 3.295057535171509, "learning_rate": 9.633742919055294e-06, "loss": 1.0757, "step": 3111 }, { "epoch": 0.8327535456248327, "grad_norm": 3.471201181411743, "learning_rate": 9.633410384445785e-06, "loss": 1.1772, "step": 3112 }, { "epoch": 0.833021139951833, "grad_norm": 3.329434633255005, "learning_rate": 9.633077704690702e-06, "loss": 1.2623, "step": 3113 }, { "epoch": 0.8332887342788333, "grad_norm": 3.593980073928833, "learning_rate": 9.632744879800468e-06, "loss": 1.1858, "step": 3114 }, { "epoch": 0.8335563286058335, "grad_norm": 3.156765937805176, "learning_rate": 9.632411909785506e-06, "loss": 1.1067, "step": 3115 }, { "epoch": 0.8338239229328338, "grad_norm": 3.3649260997772217, "learning_rate": 9.632078794656249e-06, "loss": 1.1243, "step": 3116 }, { "epoch": 0.8340915172598341, "grad_norm": 3.270552635192871, "learning_rate": 9.631745534423132e-06, "loss": 1.1653, "step": 3117 }, { "epoch": 0.8343591115868344, "grad_norm": 2.9742650985717773, "learning_rate": 9.631412129096591e-06, "loss": 1.0039, "step": 3118 }, { "epoch": 0.8346267059138346, "grad_norm": 3.710505247116089, "learning_rate": 9.631078578687077e-06, "loss": 1.1613, "step": 3119 }, { "epoch": 0.8348943002408349, "grad_norm": 3.2119741439819336, "learning_rate": 9.630744883205031e-06, "loss": 1.1568, "step": 3120 }, { "epoch": 0.8351618945678352, "grad_norm": 3.492464065551758, "learning_rate": 9.630411042660913e-06, "loss": 1.3087, "step": 3121 }, { "epoch": 0.8354294888948355, "grad_norm": 4.012518405914307, "learning_rate": 9.630077057065177e-06, "loss": 1.1665, "step": 3122 }, { "epoch": 0.8356970832218357, "grad_norm": 3.5707449913024902, "learning_rate": 9.629742926428287e-06, "loss": 1.1712, "step": 3123 }, { "epoch": 0.835964677548836, "grad_norm": 3.179173469543457, "learning_rate": 9.629408650760707e-06, "loss": 1.0721, "step": 3124 }, { "epoch": 0.8362322718758363, "grad_norm": 3.2190163135528564, "learning_rate": 9.629074230072913e-06, "loss": 1.1279, "step": 3125 }, { "epoch": 0.8364998662028365, "grad_norm": 3.036876916885376, "learning_rate": 9.62873966437538e-06, "loss": 1.0463, "step": 3126 }, { "epoch": 0.8367674605298367, "grad_norm": 3.333547592163086, "learning_rate": 9.628404953678585e-06, "loss": 1.1396, "step": 3127 }, { "epoch": 0.837035054856837, "grad_norm": 3.266360282897949, "learning_rate": 9.628070097993016e-06, "loss": 1.1264, "step": 3128 }, { "epoch": 0.8373026491838373, "grad_norm": 3.3879363536834717, "learning_rate": 9.627735097329161e-06, "loss": 1.0972, "step": 3129 }, { "epoch": 0.8375702435108375, "grad_norm": 3.3518929481506348, "learning_rate": 9.627399951697516e-06, "loss": 1.1234, "step": 3130 }, { "epoch": 0.8378378378378378, "grad_norm": 3.3172409534454346, "learning_rate": 9.627064661108581e-06, "loss": 1.0768, "step": 3131 }, { "epoch": 0.8381054321648381, "grad_norm": 3.6157588958740234, "learning_rate": 9.626729225572854e-06, "loss": 1.2114, "step": 3132 }, { "epoch": 0.8383730264918384, "grad_norm": 3.2437682151794434, "learning_rate": 9.626393645100849e-06, "loss": 1.0175, "step": 3133 }, { "epoch": 0.8386406208188386, "grad_norm": 3.443774461746216, "learning_rate": 9.626057919703073e-06, "loss": 1.1866, "step": 3134 }, { "epoch": 0.8389082151458389, "grad_norm": 3.1143884658813477, "learning_rate": 9.625722049390048e-06, "loss": 0.9715, "step": 3135 }, { "epoch": 0.8391758094728392, "grad_norm": 3.3151462078094482, "learning_rate": 9.62538603417229e-06, "loss": 1.0459, "step": 3136 }, { "epoch": 0.8394434037998394, "grad_norm": 3.691002368927002, "learning_rate": 9.625049874060331e-06, "loss": 1.1284, "step": 3137 }, { "epoch": 0.8397109981268397, "grad_norm": 3.0173420906066895, "learning_rate": 9.624713569064695e-06, "loss": 0.9815, "step": 3138 }, { "epoch": 0.83997859245384, "grad_norm": 3.3124630451202393, "learning_rate": 9.624377119195922e-06, "loss": 1.1042, "step": 3139 }, { "epoch": 0.8402461867808403, "grad_norm": 3.262075424194336, "learning_rate": 9.624040524464548e-06, "loss": 1.1501, "step": 3140 }, { "epoch": 0.8405137811078405, "grad_norm": 3.391528367996216, "learning_rate": 9.623703784881121e-06, "loss": 1.086, "step": 3141 }, { "epoch": 0.8407813754348408, "grad_norm": 3.690544843673706, "learning_rate": 9.623366900456186e-06, "loss": 1.1857, "step": 3142 }, { "epoch": 0.8410489697618411, "grad_norm": 3.2583820819854736, "learning_rate": 9.6230298712003e-06, "loss": 1.063, "step": 3143 }, { "epoch": 0.8413165640888414, "grad_norm": 3.278346300125122, "learning_rate": 9.622692697124016e-06, "loss": 1.1059, "step": 3144 }, { "epoch": 0.8415841584158416, "grad_norm": 3.320652484893799, "learning_rate": 9.6223553782379e-06, "loss": 1.165, "step": 3145 }, { "epoch": 0.8418517527428419, "grad_norm": 3.6142923831939697, "learning_rate": 9.622017914552519e-06, "loss": 1.1734, "step": 3146 }, { "epoch": 0.8421193470698422, "grad_norm": 3.483147382736206, "learning_rate": 9.62168030607844e-06, "loss": 1.0495, "step": 3147 }, { "epoch": 0.8423869413968423, "grad_norm": 3.2388815879821777, "learning_rate": 9.621342552826245e-06, "loss": 1.0552, "step": 3148 }, { "epoch": 0.8426545357238426, "grad_norm": 3.1021432876586914, "learning_rate": 9.62100465480651e-06, "loss": 0.9876, "step": 3149 }, { "epoch": 0.8429221300508429, "grad_norm": 3.7463855743408203, "learning_rate": 9.62066661202982e-06, "loss": 1.0824, "step": 3150 }, { "epoch": 0.8431897243778432, "grad_norm": 3.345280170440674, "learning_rate": 9.620328424506767e-06, "loss": 1.0385, "step": 3151 }, { "epoch": 0.8434573187048434, "grad_norm": 3.582469940185547, "learning_rate": 9.619990092247943e-06, "loss": 1.219, "step": 3152 }, { "epoch": 0.8437249130318437, "grad_norm": 3.824211835861206, "learning_rate": 9.619651615263948e-06, "loss": 1.4056, "step": 3153 }, { "epoch": 0.843992507358844, "grad_norm": 3.295612335205078, "learning_rate": 9.619312993565382e-06, "loss": 1.0493, "step": 3154 }, { "epoch": 0.8442601016858443, "grad_norm": 3.390982151031494, "learning_rate": 9.618974227162857e-06, "loss": 1.136, "step": 3155 }, { "epoch": 0.8445276960128445, "grad_norm": 3.5141913890838623, "learning_rate": 9.618635316066984e-06, "loss": 1.1228, "step": 3156 }, { "epoch": 0.8447952903398448, "grad_norm": 3.6350278854370117, "learning_rate": 9.618296260288376e-06, "loss": 1.2088, "step": 3157 }, { "epoch": 0.8450628846668451, "grad_norm": 3.1930181980133057, "learning_rate": 9.617957059837659e-06, "loss": 1.1015, "step": 3158 }, { "epoch": 0.8453304789938453, "grad_norm": 3.7268929481506348, "learning_rate": 9.617617714725456e-06, "loss": 1.18, "step": 3159 }, { "epoch": 0.8455980733208456, "grad_norm": 3.712311267852783, "learning_rate": 9.617278224962398e-06, "loss": 1.1109, "step": 3160 }, { "epoch": 0.8458656676478459, "grad_norm": 3.326599597930908, "learning_rate": 9.616938590559121e-06, "loss": 1.0733, "step": 3161 }, { "epoch": 0.8461332619748462, "grad_norm": 3.1389646530151367, "learning_rate": 9.616598811526263e-06, "loss": 1.0736, "step": 3162 }, { "epoch": 0.8464008563018464, "grad_norm": 3.1288650035858154, "learning_rate": 9.616258887874467e-06, "loss": 1.0572, "step": 3163 }, { "epoch": 0.8466684506288467, "grad_norm": 3.364788055419922, "learning_rate": 9.615918819614382e-06, "loss": 1.0658, "step": 3164 }, { "epoch": 0.846936044955847, "grad_norm": 3.18229341506958, "learning_rate": 9.615578606756663e-06, "loss": 1.0498, "step": 3165 }, { "epoch": 0.8472036392828473, "grad_norm": 3.276883125305176, "learning_rate": 9.615238249311964e-06, "loss": 1.0673, "step": 3166 }, { "epoch": 0.8474712336098474, "grad_norm": 3.2905640602111816, "learning_rate": 9.61489774729095e-06, "loss": 1.0059, "step": 3167 }, { "epoch": 0.8477388279368477, "grad_norm": 3.756727933883667, "learning_rate": 9.614557100704286e-06, "loss": 1.2645, "step": 3168 }, { "epoch": 0.848006422263848, "grad_norm": 3.3283801078796387, "learning_rate": 9.614216309562643e-06, "loss": 1.0832, "step": 3169 }, { "epoch": 0.8482740165908482, "grad_norm": 3.5391414165496826, "learning_rate": 9.613875373876698e-06, "loss": 1.0946, "step": 3170 }, { "epoch": 0.8485416109178485, "grad_norm": 3.3885715007781982, "learning_rate": 9.61353429365713e-06, "loss": 1.1686, "step": 3171 }, { "epoch": 0.8488092052448488, "grad_norm": 3.24389386177063, "learning_rate": 9.613193068914623e-06, "loss": 1.1554, "step": 3172 }, { "epoch": 0.8490767995718491, "grad_norm": 3.2578012943267822, "learning_rate": 9.612851699659867e-06, "loss": 1.1124, "step": 3173 }, { "epoch": 0.8493443938988493, "grad_norm": 3.3463966846466064, "learning_rate": 9.612510185903554e-06, "loss": 1.0264, "step": 3174 }, { "epoch": 0.8496119882258496, "grad_norm": 3.320957899093628, "learning_rate": 9.612168527656386e-06, "loss": 1.1183, "step": 3175 }, { "epoch": 0.8498795825528499, "grad_norm": 3.1176092624664307, "learning_rate": 9.611826724929063e-06, "loss": 1.1182, "step": 3176 }, { "epoch": 0.8501471768798502, "grad_norm": 3.338179349899292, "learning_rate": 9.611484777732292e-06, "loss": 1.1178, "step": 3177 }, { "epoch": 0.8504147712068504, "grad_norm": 3.7851016521453857, "learning_rate": 9.611142686076787e-06, "loss": 1.2339, "step": 3178 }, { "epoch": 0.8506823655338507, "grad_norm": 3.0936129093170166, "learning_rate": 9.610800449973261e-06, "loss": 1.1433, "step": 3179 }, { "epoch": 0.850949959860851, "grad_norm": 3.48248291015625, "learning_rate": 9.610458069432438e-06, "loss": 1.1971, "step": 3180 }, { "epoch": 0.8512175541878512, "grad_norm": 3.774419069290161, "learning_rate": 9.610115544465042e-06, "loss": 1.1778, "step": 3181 }, { "epoch": 0.8514851485148515, "grad_norm": 3.461056709289551, "learning_rate": 9.609772875081802e-06, "loss": 1.1425, "step": 3182 }, { "epoch": 0.8517527428418518, "grad_norm": 3.332552671432495, "learning_rate": 9.609430061293454e-06, "loss": 1.0041, "step": 3183 }, { "epoch": 0.8520203371688521, "grad_norm": 3.5970587730407715, "learning_rate": 9.609087103110737e-06, "loss": 1.2363, "step": 3184 }, { "epoch": 0.8522879314958522, "grad_norm": 3.4365155696868896, "learning_rate": 9.608744000544392e-06, "loss": 1.0534, "step": 3185 }, { "epoch": 0.8525555258228525, "grad_norm": 3.2905330657958984, "learning_rate": 9.60840075360517e-06, "loss": 1.2355, "step": 3186 }, { "epoch": 0.8528231201498528, "grad_norm": 3.4680607318878174, "learning_rate": 9.608057362303823e-06, "loss": 1.0901, "step": 3187 }, { "epoch": 0.8530907144768531, "grad_norm": 3.351891279220581, "learning_rate": 9.607713826651107e-06, "loss": 1.1422, "step": 3188 }, { "epoch": 0.8533583088038533, "grad_norm": 3.7744686603546143, "learning_rate": 9.607370146657782e-06, "loss": 1.1692, "step": 3189 }, { "epoch": 0.8536259031308536, "grad_norm": 3.2692463397979736, "learning_rate": 9.607026322334618e-06, "loss": 1.0488, "step": 3190 }, { "epoch": 0.8538934974578539, "grad_norm": 3.201399564743042, "learning_rate": 9.606682353692383e-06, "loss": 0.9253, "step": 3191 }, { "epoch": 0.8541610917848541, "grad_norm": 3.5281589031219482, "learning_rate": 9.606338240741851e-06, "loss": 1.1785, "step": 3192 }, { "epoch": 0.8544286861118544, "grad_norm": 3.3514602184295654, "learning_rate": 9.605993983493804e-06, "loss": 1.1364, "step": 3193 }, { "epoch": 0.8546962804388547, "grad_norm": 3.6264495849609375, "learning_rate": 9.605649581959027e-06, "loss": 1.1206, "step": 3194 }, { "epoch": 0.854963874765855, "grad_norm": 3.1329174041748047, "learning_rate": 9.605305036148306e-06, "loss": 1.0666, "step": 3195 }, { "epoch": 0.8552314690928552, "grad_norm": 3.255485773086548, "learning_rate": 9.604960346072435e-06, "loss": 0.9613, "step": 3196 }, { "epoch": 0.8554990634198555, "grad_norm": 3.693399429321289, "learning_rate": 9.604615511742213e-06, "loss": 1.169, "step": 3197 }, { "epoch": 0.8557666577468558, "grad_norm": 3.0587754249572754, "learning_rate": 9.604270533168441e-06, "loss": 1.0926, "step": 3198 }, { "epoch": 0.8560342520738561, "grad_norm": 3.428370952606201, "learning_rate": 9.603925410361925e-06, "loss": 1.111, "step": 3199 }, { "epoch": 0.8563018464008563, "grad_norm": 3.2654330730438232, "learning_rate": 9.603580143333478e-06, "loss": 1.1336, "step": 3200 }, { "epoch": 0.8565694407278566, "grad_norm": 3.4805808067321777, "learning_rate": 9.603234732093913e-06, "loss": 1.1853, "step": 3201 }, { "epoch": 0.8568370350548569, "grad_norm": 3.3785743713378906, "learning_rate": 9.602889176654055e-06, "loss": 1.1114, "step": 3202 }, { "epoch": 0.857104629381857, "grad_norm": 3.4433510303497314, "learning_rate": 9.602543477024725e-06, "loss": 1.1787, "step": 3203 }, { "epoch": 0.8573722237088574, "grad_norm": 3.078172445297241, "learning_rate": 9.602197633216754e-06, "loss": 1.0089, "step": 3204 }, { "epoch": 0.8576398180358576, "grad_norm": 3.009098768234253, "learning_rate": 9.601851645240974e-06, "loss": 0.9585, "step": 3205 }, { "epoch": 0.857907412362858, "grad_norm": 3.31787109375, "learning_rate": 9.601505513108227e-06, "loss": 1.097, "step": 3206 }, { "epoch": 0.8581750066898581, "grad_norm": 3.802264451980591, "learning_rate": 9.601159236829353e-06, "loss": 1.2097, "step": 3207 }, { "epoch": 0.8584426010168584, "grad_norm": 3.393442392349243, "learning_rate": 9.600812816415199e-06, "loss": 1.1896, "step": 3208 }, { "epoch": 0.8587101953438587, "grad_norm": 3.611478090286255, "learning_rate": 9.600466251876618e-06, "loss": 1.306, "step": 3209 }, { "epoch": 0.858977789670859, "grad_norm": 3.6949093341827393, "learning_rate": 9.600119543224467e-06, "loss": 1.0832, "step": 3210 }, { "epoch": 0.8592453839978592, "grad_norm": 3.2041354179382324, "learning_rate": 9.599772690469606e-06, "loss": 1.0338, "step": 3211 }, { "epoch": 0.8595129783248595, "grad_norm": 3.4140734672546387, "learning_rate": 9.599425693622902e-06, "loss": 1.1597, "step": 3212 }, { "epoch": 0.8597805726518598, "grad_norm": 2.8042356967926025, "learning_rate": 9.599078552695223e-06, "loss": 0.9167, "step": 3213 }, { "epoch": 0.86004816697886, "grad_norm": 3.645156145095825, "learning_rate": 9.598731267697443e-06, "loss": 1.3054, "step": 3214 }, { "epoch": 0.8603157613058603, "grad_norm": 3.2911882400512695, "learning_rate": 9.598383838640443e-06, "loss": 1.0813, "step": 3215 }, { "epoch": 0.8605833556328606, "grad_norm": 3.168053388595581, "learning_rate": 9.598036265535104e-06, "loss": 1.0603, "step": 3216 }, { "epoch": 0.8608509499598609, "grad_norm": 3.6250712871551514, "learning_rate": 9.597688548392319e-06, "loss": 1.1671, "step": 3217 }, { "epoch": 0.8611185442868611, "grad_norm": 3.570465326309204, "learning_rate": 9.597340687222975e-06, "loss": 1.1248, "step": 3218 }, { "epoch": 0.8613861386138614, "grad_norm": 3.188462495803833, "learning_rate": 9.596992682037973e-06, "loss": 1.0927, "step": 3219 }, { "epoch": 0.8616537329408617, "grad_norm": 3.166240692138672, "learning_rate": 9.596644532848211e-06, "loss": 1.1719, "step": 3220 }, { "epoch": 0.861921327267862, "grad_norm": 3.369922399520874, "learning_rate": 9.5962962396646e-06, "loss": 1.0546, "step": 3221 }, { "epoch": 0.8621889215948622, "grad_norm": 3.611721992492676, "learning_rate": 9.595947802498046e-06, "loss": 1.1727, "step": 3222 }, { "epoch": 0.8624565159218625, "grad_norm": 3.5370113849639893, "learning_rate": 9.595599221359464e-06, "loss": 1.1045, "step": 3223 }, { "epoch": 0.8627241102488628, "grad_norm": 3.5464746952056885, "learning_rate": 9.595250496259778e-06, "loss": 1.1146, "step": 3224 }, { "epoch": 0.8629917045758629, "grad_norm": 3.1489906311035156, "learning_rate": 9.594901627209908e-06, "loss": 1.0356, "step": 3225 }, { "epoch": 0.8632592989028632, "grad_norm": 3.69189190864563, "learning_rate": 9.594552614220785e-06, "loss": 1.2625, "step": 3226 }, { "epoch": 0.8635268932298635, "grad_norm": 3.298753499984741, "learning_rate": 9.594203457303339e-06, "loss": 1.0553, "step": 3227 }, { "epoch": 0.8637944875568638, "grad_norm": 3.2291910648345947, "learning_rate": 9.593854156468512e-06, "loss": 1.133, "step": 3228 }, { "epoch": 0.864062081883864, "grad_norm": 3.1540310382843018, "learning_rate": 9.593504711727243e-06, "loss": 0.9898, "step": 3229 }, { "epoch": 0.8643296762108643, "grad_norm": 3.049051523208618, "learning_rate": 9.593155123090479e-06, "loss": 0.9765, "step": 3230 }, { "epoch": 0.8645972705378646, "grad_norm": 3.2508833408355713, "learning_rate": 9.592805390569173e-06, "loss": 1.0901, "step": 3231 }, { "epoch": 0.8648648648648649, "grad_norm": 3.9772286415100098, "learning_rate": 9.59245551417428e-06, "loss": 1.2102, "step": 3232 }, { "epoch": 0.8651324591918651, "grad_norm": 3.5919082164764404, "learning_rate": 9.592105493916758e-06, "loss": 1.1591, "step": 3233 }, { "epoch": 0.8654000535188654, "grad_norm": 3.9090003967285156, "learning_rate": 9.591755329807574e-06, "loss": 1.2031, "step": 3234 }, { "epoch": 0.8656676478458657, "grad_norm": 3.496093273162842, "learning_rate": 9.591405021857697e-06, "loss": 1.109, "step": 3235 }, { "epoch": 0.8659352421728659, "grad_norm": 3.8281493186950684, "learning_rate": 9.5910545700781e-06, "loss": 1.211, "step": 3236 }, { "epoch": 0.8662028364998662, "grad_norm": 3.2927358150482178, "learning_rate": 9.59070397447976e-06, "loss": 1.0816, "step": 3237 }, { "epoch": 0.8664704308268665, "grad_norm": 3.4841480255126953, "learning_rate": 9.590353235073663e-06, "loss": 1.1187, "step": 3238 }, { "epoch": 0.8667380251538668, "grad_norm": 3.2719860076904297, "learning_rate": 9.590002351870793e-06, "loss": 1.0203, "step": 3239 }, { "epoch": 0.867005619480867, "grad_norm": 3.0735063552856445, "learning_rate": 9.589651324882143e-06, "loss": 1.1405, "step": 3240 }, { "epoch": 0.8672732138078673, "grad_norm": 3.6215524673461914, "learning_rate": 9.58930015411871e-06, "loss": 1.2612, "step": 3241 }, { "epoch": 0.8675408081348676, "grad_norm": 3.0990355014801025, "learning_rate": 9.588948839591494e-06, "loss": 1.1183, "step": 3242 }, { "epoch": 0.8678084024618679, "grad_norm": 2.985930919647217, "learning_rate": 9.5885973813115e-06, "loss": 1.0287, "step": 3243 }, { "epoch": 0.868075996788868, "grad_norm": 3.4825994968414307, "learning_rate": 9.588245779289738e-06, "loss": 1.0794, "step": 3244 }, { "epoch": 0.8683435911158683, "grad_norm": 3.289504289627075, "learning_rate": 9.587894033537223e-06, "loss": 1.1203, "step": 3245 }, { "epoch": 0.8686111854428686, "grad_norm": 3.175842761993408, "learning_rate": 9.587542144064972e-06, "loss": 1.0932, "step": 3246 }, { "epoch": 0.8688787797698688, "grad_norm": 3.251260995864868, "learning_rate": 9.587190110884009e-06, "loss": 1.0971, "step": 3247 }, { "epoch": 0.8691463740968691, "grad_norm": 2.8951752185821533, "learning_rate": 9.586837934005363e-06, "loss": 0.9605, "step": 3248 }, { "epoch": 0.8694139684238694, "grad_norm": 3.264331102371216, "learning_rate": 9.586485613440064e-06, "loss": 1.1496, "step": 3249 }, { "epoch": 0.8696815627508697, "grad_norm": 3.1973655223846436, "learning_rate": 9.586133149199151e-06, "loss": 1.152, "step": 3250 }, { "epoch": 0.8699491570778699, "grad_norm": 2.9676499366760254, "learning_rate": 9.585780541293663e-06, "loss": 1.1106, "step": 3251 }, { "epoch": 0.8702167514048702, "grad_norm": 3.147977352142334, "learning_rate": 9.585427789734647e-06, "loss": 1.2677, "step": 3252 }, { "epoch": 0.8704843457318705, "grad_norm": 3.3018641471862793, "learning_rate": 9.585074894533154e-06, "loss": 1.0454, "step": 3253 }, { "epoch": 0.8707519400588708, "grad_norm": 3.2543413639068604, "learning_rate": 9.584721855700238e-06, "loss": 1.0568, "step": 3254 }, { "epoch": 0.871019534385871, "grad_norm": 3.091062545776367, "learning_rate": 9.584368673246957e-06, "loss": 1.0498, "step": 3255 }, { "epoch": 0.8712871287128713, "grad_norm": 3.4238486289978027, "learning_rate": 9.584015347184376e-06, "loss": 1.0649, "step": 3256 }, { "epoch": 0.8715547230398716, "grad_norm": 3.575374126434326, "learning_rate": 9.583661877523565e-06, "loss": 1.1816, "step": 3257 }, { "epoch": 0.8718223173668718, "grad_norm": 3.7600672245025635, "learning_rate": 9.583308264275593e-06, "loss": 1.2363, "step": 3258 }, { "epoch": 0.8720899116938721, "grad_norm": 3.4532859325408936, "learning_rate": 9.58295450745154e-06, "loss": 1.2357, "step": 3259 }, { "epoch": 0.8723575060208724, "grad_norm": 2.9904305934906006, "learning_rate": 9.582600607062486e-06, "loss": 0.9099, "step": 3260 }, { "epoch": 0.8726251003478727, "grad_norm": 3.143056631088257, "learning_rate": 9.58224656311952e-06, "loss": 1.1002, "step": 3261 }, { "epoch": 0.8728926946748728, "grad_norm": 3.371873140335083, "learning_rate": 9.581892375633729e-06, "loss": 1.1547, "step": 3262 }, { "epoch": 0.8731602890018731, "grad_norm": 3.277872085571289, "learning_rate": 9.58153804461621e-06, "loss": 1.0734, "step": 3263 }, { "epoch": 0.8734278833288734, "grad_norm": 3.2630152702331543, "learning_rate": 9.581183570078064e-06, "loss": 1.0204, "step": 3264 }, { "epoch": 0.8736954776558737, "grad_norm": 3.5363354682922363, "learning_rate": 9.580828952030392e-06, "loss": 1.2343, "step": 3265 }, { "epoch": 0.8739630719828739, "grad_norm": 3.0592634677886963, "learning_rate": 9.580474190484306e-06, "loss": 1.0132, "step": 3266 }, { "epoch": 0.8742306663098742, "grad_norm": 3.0633037090301514, "learning_rate": 9.580119285450917e-06, "loss": 1.081, "step": 3267 }, { "epoch": 0.8744982606368745, "grad_norm": 3.3034725189208984, "learning_rate": 9.579764236941345e-06, "loss": 1.1423, "step": 3268 }, { "epoch": 0.8747658549638747, "grad_norm": 3.3148138523101807, "learning_rate": 9.57940904496671e-06, "loss": 1.1116, "step": 3269 }, { "epoch": 0.875033449290875, "grad_norm": 3.3860421180725098, "learning_rate": 9.57905370953814e-06, "loss": 1.0291, "step": 3270 }, { "epoch": 0.8753010436178753, "grad_norm": 3.3635714054107666, "learning_rate": 9.578698230666767e-06, "loss": 1.117, "step": 3271 }, { "epoch": 0.8755686379448756, "grad_norm": 3.6900482177734375, "learning_rate": 9.578342608363723e-06, "loss": 1.1655, "step": 3272 }, { "epoch": 0.8758362322718758, "grad_norm": 3.450373411178589, "learning_rate": 9.577986842640152e-06, "loss": 1.1539, "step": 3273 }, { "epoch": 0.8761038265988761, "grad_norm": 3.40311598777771, "learning_rate": 9.577630933507196e-06, "loss": 1.1297, "step": 3274 }, { "epoch": 0.8763714209258764, "grad_norm": 3.1564464569091797, "learning_rate": 9.577274880976007e-06, "loss": 1.0134, "step": 3275 }, { "epoch": 0.8766390152528767, "grad_norm": 4.080739498138428, "learning_rate": 9.576918685057736e-06, "loss": 1.0344, "step": 3276 }, { "epoch": 0.8769066095798769, "grad_norm": 3.6657655239105225, "learning_rate": 9.576562345763542e-06, "loss": 1.1657, "step": 3277 }, { "epoch": 0.8771742039068772, "grad_norm": 3.4710497856140137, "learning_rate": 9.576205863104588e-06, "loss": 1.2043, "step": 3278 }, { "epoch": 0.8774417982338775, "grad_norm": 3.297534704208374, "learning_rate": 9.575849237092042e-06, "loss": 1.0513, "step": 3279 }, { "epoch": 0.8777093925608777, "grad_norm": 3.476609230041504, "learning_rate": 9.575492467737074e-06, "loss": 1.1804, "step": 3280 }, { "epoch": 0.877976986887878, "grad_norm": 3.289842367172241, "learning_rate": 9.575135555050861e-06, "loss": 1.1133, "step": 3281 }, { "epoch": 0.8782445812148783, "grad_norm": 3.235844135284424, "learning_rate": 9.574778499044582e-06, "loss": 1.0974, "step": 3282 }, { "epoch": 0.8785121755418785, "grad_norm": 3.5740182399749756, "learning_rate": 9.574421299729424e-06, "loss": 1.2028, "step": 3283 }, { "epoch": 0.8787797698688787, "grad_norm": 3.4164657592773438, "learning_rate": 9.574063957116575e-06, "loss": 1.1063, "step": 3284 }, { "epoch": 0.879047364195879, "grad_norm": 3.4936609268188477, "learning_rate": 9.573706471217232e-06, "loss": 1.1992, "step": 3285 }, { "epoch": 0.8793149585228793, "grad_norm": 3.449378252029419, "learning_rate": 9.573348842042592e-06, "loss": 1.1717, "step": 3286 }, { "epoch": 0.8795825528498796, "grad_norm": 3.8629961013793945, "learning_rate": 9.572991069603853e-06, "loss": 1.2591, "step": 3287 }, { "epoch": 0.8798501471768798, "grad_norm": 3.312222957611084, "learning_rate": 9.572633153912232e-06, "loss": 1.0761, "step": 3288 }, { "epoch": 0.8801177415038801, "grad_norm": 3.432467460632324, "learning_rate": 9.572275094978934e-06, "loss": 1.211, "step": 3289 }, { "epoch": 0.8803853358308804, "grad_norm": 3.3937036991119385, "learning_rate": 9.571916892815179e-06, "loss": 1.2013, "step": 3290 }, { "epoch": 0.8806529301578806, "grad_norm": 3.1374263763427734, "learning_rate": 9.571558547432185e-06, "loss": 1.1642, "step": 3291 }, { "epoch": 0.8809205244848809, "grad_norm": 3.3817193508148193, "learning_rate": 9.57120005884118e-06, "loss": 1.0251, "step": 3292 }, { "epoch": 0.8811881188118812, "grad_norm": 3.5183558464050293, "learning_rate": 9.570841427053394e-06, "loss": 1.103, "step": 3293 }, { "epoch": 0.8814557131388815, "grad_norm": 3.377310037612915, "learning_rate": 9.57048265208006e-06, "loss": 1.0861, "step": 3294 }, { "epoch": 0.8817233074658817, "grad_norm": 3.5976264476776123, "learning_rate": 9.570123733932415e-06, "loss": 1.2058, "step": 3295 }, { "epoch": 0.881990901792882, "grad_norm": 3.367487668991089, "learning_rate": 9.569764672621707e-06, "loss": 1.042, "step": 3296 }, { "epoch": 0.8822584961198823, "grad_norm": 3.392971992492676, "learning_rate": 9.569405468159183e-06, "loss": 1.1187, "step": 3297 }, { "epoch": 0.8825260904468826, "grad_norm": 3.862159013748169, "learning_rate": 9.569046120556092e-06, "loss": 1.2342, "step": 3298 }, { "epoch": 0.8827936847738828, "grad_norm": 2.9790005683898926, "learning_rate": 9.568686629823693e-06, "loss": 1.0197, "step": 3299 }, { "epoch": 0.8830612791008831, "grad_norm": 3.4555444717407227, "learning_rate": 9.56832699597325e-06, "loss": 1.1651, "step": 3300 }, { "epoch": 0.8833288734278834, "grad_norm": 4.1415557861328125, "learning_rate": 9.567967219016024e-06, "loss": 1.2982, "step": 3301 }, { "epoch": 0.8835964677548835, "grad_norm": 3.344966173171997, "learning_rate": 9.567607298963288e-06, "loss": 1.1131, "step": 3302 }, { "epoch": 0.8838640620818838, "grad_norm": 3.7621214389801025, "learning_rate": 9.567247235826316e-06, "loss": 1.1731, "step": 3303 }, { "epoch": 0.8841316564088841, "grad_norm": 3.282111167907715, "learning_rate": 9.56688702961639e-06, "loss": 1.1508, "step": 3304 }, { "epoch": 0.8843992507358844, "grad_norm": 3.501091957092285, "learning_rate": 9.566526680344788e-06, "loss": 1.1034, "step": 3305 }, { "epoch": 0.8846668450628846, "grad_norm": 3.3922479152679443, "learning_rate": 9.566166188022804e-06, "loss": 1.054, "step": 3306 }, { "epoch": 0.8849344393898849, "grad_norm": 3.4459426403045654, "learning_rate": 9.565805552661728e-06, "loss": 1.2095, "step": 3307 }, { "epoch": 0.8852020337168852, "grad_norm": 3.219888210296631, "learning_rate": 9.565444774272858e-06, "loss": 1.106, "step": 3308 }, { "epoch": 0.8854696280438855, "grad_norm": 3.378981113433838, "learning_rate": 9.565083852867494e-06, "loss": 1.1935, "step": 3309 }, { "epoch": 0.8857372223708857, "grad_norm": 3.991638660430908, "learning_rate": 9.564722788456943e-06, "loss": 1.4006, "step": 3310 }, { "epoch": 0.886004816697886, "grad_norm": 3.2675182819366455, "learning_rate": 9.564361581052519e-06, "loss": 1.109, "step": 3311 }, { "epoch": 0.8862724110248863, "grad_norm": 3.029271125793457, "learning_rate": 9.564000230665534e-06, "loss": 1.0853, "step": 3312 }, { "epoch": 0.8865400053518866, "grad_norm": 3.339115619659424, "learning_rate": 9.563638737307307e-06, "loss": 1.1887, "step": 3313 }, { "epoch": 0.8868075996788868, "grad_norm": 3.1957242488861084, "learning_rate": 9.56327710098916e-06, "loss": 1.1597, "step": 3314 }, { "epoch": 0.8870751940058871, "grad_norm": 3.588334798812866, "learning_rate": 9.562915321722428e-06, "loss": 1.1607, "step": 3315 }, { "epoch": 0.8873427883328874, "grad_norm": 3.3101933002471924, "learning_rate": 9.56255339951844e-06, "loss": 1.1174, "step": 3316 }, { "epoch": 0.8876103826598876, "grad_norm": 3.237942934036255, "learning_rate": 9.562191334388535e-06, "loss": 1.1496, "step": 3317 }, { "epoch": 0.8878779769868879, "grad_norm": 3.141970634460449, "learning_rate": 9.561829126344053e-06, "loss": 1.0682, "step": 3318 }, { "epoch": 0.8881455713138882, "grad_norm": 3.4344382286071777, "learning_rate": 9.561466775396342e-06, "loss": 1.0629, "step": 3319 }, { "epoch": 0.8884131656408885, "grad_norm": 3.219492197036743, "learning_rate": 9.561104281556752e-06, "loss": 1.167, "step": 3320 }, { "epoch": 0.8886807599678886, "grad_norm": 3.2902910709381104, "learning_rate": 9.56074164483664e-06, "loss": 1.0776, "step": 3321 }, { "epoch": 0.8889483542948889, "grad_norm": 3.4856672286987305, "learning_rate": 9.560378865247363e-06, "loss": 1.2053, "step": 3322 }, { "epoch": 0.8892159486218892, "grad_norm": 3.6184751987457275, "learning_rate": 9.560015942800289e-06, "loss": 1.1847, "step": 3323 }, { "epoch": 0.8894835429488895, "grad_norm": 3.2977712154388428, "learning_rate": 9.559652877506785e-06, "loss": 1.1069, "step": 3324 }, { "epoch": 0.8897511372758897, "grad_norm": 3.0380518436431885, "learning_rate": 9.559289669378224e-06, "loss": 1.0513, "step": 3325 }, { "epoch": 0.89001873160289, "grad_norm": 3.521289348602295, "learning_rate": 9.558926318425986e-06, "loss": 1.0462, "step": 3326 }, { "epoch": 0.8902863259298903, "grad_norm": 2.921584129333496, "learning_rate": 9.558562824661448e-06, "loss": 0.9953, "step": 3327 }, { "epoch": 0.8905539202568905, "grad_norm": 3.5694329738616943, "learning_rate": 9.558199188096004e-06, "loss": 1.231, "step": 3328 }, { "epoch": 0.8908215145838908, "grad_norm": 3.6749255657196045, "learning_rate": 9.557835408741039e-06, "loss": 1.1523, "step": 3329 }, { "epoch": 0.8910891089108911, "grad_norm": 3.4388997554779053, "learning_rate": 9.557471486607952e-06, "loss": 1.1669, "step": 3330 }, { "epoch": 0.8913567032378914, "grad_norm": 3.3783130645751953, "learning_rate": 9.557107421708142e-06, "loss": 1.0796, "step": 3331 }, { "epoch": 0.8916242975648916, "grad_norm": 3.5907704830169678, "learning_rate": 9.556743214053017e-06, "loss": 1.1456, "step": 3332 }, { "epoch": 0.8918918918918919, "grad_norm": 3.543071746826172, "learning_rate": 9.55637886365398e-06, "loss": 1.2526, "step": 3333 }, { "epoch": 0.8921594862188922, "grad_norm": 3.632092237472534, "learning_rate": 9.55601437052245e-06, "loss": 1.2962, "step": 3334 }, { "epoch": 0.8924270805458925, "grad_norm": 3.31510066986084, "learning_rate": 9.55564973466984e-06, "loss": 1.138, "step": 3335 }, { "epoch": 0.8926946748728927, "grad_norm": 3.437994956970215, "learning_rate": 9.555284956107578e-06, "loss": 1.0778, "step": 3336 }, { "epoch": 0.892962269199893, "grad_norm": 3.2904369831085205, "learning_rate": 9.554920034847088e-06, "loss": 1.1556, "step": 3337 }, { "epoch": 0.8932298635268933, "grad_norm": 3.6388745307922363, "learning_rate": 9.5545549708998e-06, "loss": 1.1318, "step": 3338 }, { "epoch": 0.8934974578538935, "grad_norm": 3.85868239402771, "learning_rate": 9.554189764277155e-06, "loss": 1.26, "step": 3339 }, { "epoch": 0.8937650521808937, "grad_norm": 3.250420570373535, "learning_rate": 9.553824414990588e-06, "loss": 1.0647, "step": 3340 }, { "epoch": 0.894032646507894, "grad_norm": 3.053664445877075, "learning_rate": 9.553458923051546e-06, "loss": 1.0382, "step": 3341 }, { "epoch": 0.8943002408348943, "grad_norm": 3.8642590045928955, "learning_rate": 9.553093288471479e-06, "loss": 1.1887, "step": 3342 }, { "epoch": 0.8945678351618945, "grad_norm": 3.3516335487365723, "learning_rate": 9.552727511261841e-06, "loss": 1.1131, "step": 3343 }, { "epoch": 0.8948354294888948, "grad_norm": 2.911613702774048, "learning_rate": 9.55236159143409e-06, "loss": 1.0118, "step": 3344 }, { "epoch": 0.8951030238158951, "grad_norm": 3.028801441192627, "learning_rate": 9.551995528999686e-06, "loss": 1.0264, "step": 3345 }, { "epoch": 0.8953706181428954, "grad_norm": 3.528012752532959, "learning_rate": 9.5516293239701e-06, "loss": 1.1946, "step": 3346 }, { "epoch": 0.8956382124698956, "grad_norm": 2.96004581451416, "learning_rate": 9.551262976356801e-06, "loss": 0.9409, "step": 3347 }, { "epoch": 0.8959058067968959, "grad_norm": 3.013521194458008, "learning_rate": 9.550896486171268e-06, "loss": 1.0383, "step": 3348 }, { "epoch": 0.8961734011238962, "grad_norm": 3.353602409362793, "learning_rate": 9.550529853424979e-06, "loss": 1.0802, "step": 3349 }, { "epoch": 0.8964409954508964, "grad_norm": 3.60223126411438, "learning_rate": 9.55016307812942e-06, "loss": 1.1218, "step": 3350 }, { "epoch": 0.8967085897778967, "grad_norm": 3.689014434814453, "learning_rate": 9.549796160296081e-06, "loss": 1.2695, "step": 3351 }, { "epoch": 0.896976184104897, "grad_norm": 2.9822311401367188, "learning_rate": 9.549429099936455e-06, "loss": 0.9881, "step": 3352 }, { "epoch": 0.8972437784318973, "grad_norm": 3.03279447555542, "learning_rate": 9.549061897062043e-06, "loss": 1.0138, "step": 3353 }, { "epoch": 0.8975113727588975, "grad_norm": 3.1987500190734863, "learning_rate": 9.548694551684345e-06, "loss": 1.0596, "step": 3354 }, { "epoch": 0.8977789670858978, "grad_norm": 3.4811275005340576, "learning_rate": 9.548327063814871e-06, "loss": 1.1956, "step": 3355 }, { "epoch": 0.8980465614128981, "grad_norm": 3.577713966369629, "learning_rate": 9.547959433465128e-06, "loss": 1.1442, "step": 3356 }, { "epoch": 0.8983141557398984, "grad_norm": 3.459491014480591, "learning_rate": 9.547591660646637e-06, "loss": 1.3063, "step": 3357 }, { "epoch": 0.8985817500668986, "grad_norm": 3.2407350540161133, "learning_rate": 9.54722374537092e-06, "loss": 1.0682, "step": 3358 }, { "epoch": 0.8988493443938989, "grad_norm": 3.129257917404175, "learning_rate": 9.546855687649497e-06, "loss": 1.0516, "step": 3359 }, { "epoch": 0.8991169387208992, "grad_norm": 3.4182918071746826, "learning_rate": 9.5464874874939e-06, "loss": 1.1032, "step": 3360 }, { "epoch": 0.8993845330478993, "grad_norm": 3.995587110519409, "learning_rate": 9.546119144915667e-06, "loss": 1.1301, "step": 3361 }, { "epoch": 0.8996521273748996, "grad_norm": 3.1512610912323, "learning_rate": 9.545750659926331e-06, "loss": 1.0217, "step": 3362 }, { "epoch": 0.8999197217018999, "grad_norm": 3.4359290599823, "learning_rate": 9.545382032537438e-06, "loss": 1.2411, "step": 3363 }, { "epoch": 0.9001873160289002, "grad_norm": 3.2754461765289307, "learning_rate": 9.545013262760535e-06, "loss": 1.0496, "step": 3364 }, { "epoch": 0.9004549103559004, "grad_norm": 3.3657703399658203, "learning_rate": 9.544644350607173e-06, "loss": 1.0734, "step": 3365 }, { "epoch": 0.9007225046829007, "grad_norm": 3.1346230506896973, "learning_rate": 9.54427529608891e-06, "loss": 1.1506, "step": 3366 }, { "epoch": 0.900990099009901, "grad_norm": 3.4281997680664062, "learning_rate": 9.543906099217308e-06, "loss": 1.0849, "step": 3367 }, { "epoch": 0.9012576933369013, "grad_norm": 3.335317850112915, "learning_rate": 9.543536760003928e-06, "loss": 1.1822, "step": 3368 }, { "epoch": 0.9015252876639015, "grad_norm": 3.0482382774353027, "learning_rate": 9.543167278460345e-06, "loss": 1.1431, "step": 3369 }, { "epoch": 0.9017928819909018, "grad_norm": 3.4252700805664062, "learning_rate": 9.54279765459813e-06, "loss": 1.0517, "step": 3370 }, { "epoch": 0.9020604763179021, "grad_norm": 3.5490097999572754, "learning_rate": 9.542427888428864e-06, "loss": 1.1277, "step": 3371 }, { "epoch": 0.9023280706449023, "grad_norm": 3.5822768211364746, "learning_rate": 9.54205797996413e-06, "loss": 1.0832, "step": 3372 }, { "epoch": 0.9025956649719026, "grad_norm": 3.2864580154418945, "learning_rate": 9.541687929215512e-06, "loss": 1.0394, "step": 3373 }, { "epoch": 0.9028632592989029, "grad_norm": 3.281869888305664, "learning_rate": 9.541317736194608e-06, "loss": 1.1343, "step": 3374 }, { "epoch": 0.9031308536259032, "grad_norm": 3.3536150455474854, "learning_rate": 9.54094740091301e-06, "loss": 1.1296, "step": 3375 }, { "epoch": 0.9033984479529034, "grad_norm": 3.730140447616577, "learning_rate": 9.54057692338232e-06, "loss": 1.1234, "step": 3376 }, { "epoch": 0.9036660422799037, "grad_norm": 3.273059129714966, "learning_rate": 9.540206303614146e-06, "loss": 1.0658, "step": 3377 }, { "epoch": 0.903933636606904, "grad_norm": 3.179582357406616, "learning_rate": 9.539835541620096e-06, "loss": 1.0502, "step": 3378 }, { "epoch": 0.9042012309339043, "grad_norm": 3.5026938915252686, "learning_rate": 9.539464637411782e-06, "loss": 1.1883, "step": 3379 }, { "epoch": 0.9044688252609044, "grad_norm": 3.0967392921447754, "learning_rate": 9.539093591000828e-06, "loss": 1.169, "step": 3380 }, { "epoch": 0.9047364195879047, "grad_norm": 3.457193613052368, "learning_rate": 9.538722402398854e-06, "loss": 1.1136, "step": 3381 }, { "epoch": 0.905004013914905, "grad_norm": 3.4893715381622314, "learning_rate": 9.538351071617489e-06, "loss": 1.1458, "step": 3382 }, { "epoch": 0.9052716082419052, "grad_norm": 3.3818037509918213, "learning_rate": 9.537979598668364e-06, "loss": 1.1278, "step": 3383 }, { "epoch": 0.9055392025689055, "grad_norm": 3.309565782546997, "learning_rate": 9.537607983563117e-06, "loss": 1.1216, "step": 3384 }, { "epoch": 0.9058067968959058, "grad_norm": 3.428215980529785, "learning_rate": 9.53723622631339e-06, "loss": 1.1297, "step": 3385 }, { "epoch": 0.9060743912229061, "grad_norm": 3.088609457015991, "learning_rate": 9.536864326930826e-06, "loss": 1.0726, "step": 3386 }, { "epoch": 0.9063419855499063, "grad_norm": 3.1660032272338867, "learning_rate": 9.536492285427077e-06, "loss": 1.1357, "step": 3387 }, { "epoch": 0.9066095798769066, "grad_norm": 3.1965155601501465, "learning_rate": 9.536120101813797e-06, "loss": 1.0836, "step": 3388 }, { "epoch": 0.9068771742039069, "grad_norm": 3.284010410308838, "learning_rate": 9.535747776102645e-06, "loss": 1.1047, "step": 3389 }, { "epoch": 0.9071447685309072, "grad_norm": 3.5011777877807617, "learning_rate": 9.535375308305283e-06, "loss": 1.2027, "step": 3390 }, { "epoch": 0.9074123628579074, "grad_norm": 3.8040499687194824, "learning_rate": 9.535002698433383e-06, "loss": 1.1663, "step": 3391 }, { "epoch": 0.9076799571849077, "grad_norm": 3.3377859592437744, "learning_rate": 9.534629946498613e-06, "loss": 1.2642, "step": 3392 }, { "epoch": 0.907947551511908, "grad_norm": 3.4258129596710205, "learning_rate": 9.534257052512651e-06, "loss": 1.1166, "step": 3393 }, { "epoch": 0.9082151458389082, "grad_norm": 3.600273847579956, "learning_rate": 9.533884016487181e-06, "loss": 1.0948, "step": 3394 }, { "epoch": 0.9084827401659085, "grad_norm": 3.1784534454345703, "learning_rate": 9.533510838433884e-06, "loss": 1.0578, "step": 3395 }, { "epoch": 0.9087503344929088, "grad_norm": 3.3856201171875, "learning_rate": 9.533137518364453e-06, "loss": 1.1899, "step": 3396 }, { "epoch": 0.9090179288199091, "grad_norm": 3.489384412765503, "learning_rate": 9.532764056290582e-06, "loss": 1.0646, "step": 3397 }, { "epoch": 0.9092855231469092, "grad_norm": 3.3582942485809326, "learning_rate": 9.53239045222397e-06, "loss": 1.1614, "step": 3398 }, { "epoch": 0.9095531174739095, "grad_norm": 3.891071319580078, "learning_rate": 9.53201670617632e-06, "loss": 1.2788, "step": 3399 }, { "epoch": 0.9098207118009098, "grad_norm": 4.0248026847839355, "learning_rate": 9.531642818159341e-06, "loss": 1.2048, "step": 3400 }, { "epoch": 0.9100883061279101, "grad_norm": 3.6524062156677246, "learning_rate": 9.531268788184744e-06, "loss": 1.3362, "step": 3401 }, { "epoch": 0.9103559004549103, "grad_norm": 3.208773374557495, "learning_rate": 9.530894616264248e-06, "loss": 1.0624, "step": 3402 }, { "epoch": 0.9106234947819106, "grad_norm": 3.280280351638794, "learning_rate": 9.530520302409572e-06, "loss": 1.138, "step": 3403 }, { "epoch": 0.9108910891089109, "grad_norm": 3.034221887588501, "learning_rate": 9.530145846632441e-06, "loss": 1.1124, "step": 3404 }, { "epoch": 0.9111586834359111, "grad_norm": 3.4571194648742676, "learning_rate": 9.52977124894459e-06, "loss": 1.1556, "step": 3405 }, { "epoch": 0.9114262777629114, "grad_norm": 3.2355082035064697, "learning_rate": 9.529396509357748e-06, "loss": 1.1784, "step": 3406 }, { "epoch": 0.9116938720899117, "grad_norm": 3.521646499633789, "learning_rate": 9.529021627883657e-06, "loss": 1.0774, "step": 3407 }, { "epoch": 0.911961466416912, "grad_norm": 3.313163995742798, "learning_rate": 9.528646604534058e-06, "loss": 1.035, "step": 3408 }, { "epoch": 0.9122290607439122, "grad_norm": 3.7362465858459473, "learning_rate": 9.528271439320703e-06, "loss": 1.2586, "step": 3409 }, { "epoch": 0.9124966550709125, "grad_norm": 3.2787117958068848, "learning_rate": 9.527896132255341e-06, "loss": 1.1981, "step": 3410 }, { "epoch": 0.9127642493979128, "grad_norm": 3.5333304405212402, "learning_rate": 9.52752068334973e-06, "loss": 1.1986, "step": 3411 }, { "epoch": 0.9130318437249131, "grad_norm": 3.5400190353393555, "learning_rate": 9.527145092615631e-06, "loss": 1.1002, "step": 3412 }, { "epoch": 0.9132994380519133, "grad_norm": 3.786625862121582, "learning_rate": 9.526769360064812e-06, "loss": 1.1783, "step": 3413 }, { "epoch": 0.9135670323789136, "grad_norm": 3.3080086708068848, "learning_rate": 9.526393485709038e-06, "loss": 1.0919, "step": 3414 }, { "epoch": 0.9138346267059139, "grad_norm": 3.204632043838501, "learning_rate": 9.526017469560088e-06, "loss": 1.0737, "step": 3415 }, { "epoch": 0.914102221032914, "grad_norm": 3.2012712955474854, "learning_rate": 9.52564131162974e-06, "loss": 1.1405, "step": 3416 }, { "epoch": 0.9143698153599144, "grad_norm": 3.119194746017456, "learning_rate": 9.525265011929776e-06, "loss": 1.0067, "step": 3417 }, { "epoch": 0.9146374096869146, "grad_norm": 3.2325518131256104, "learning_rate": 9.524888570471987e-06, "loss": 1.2162, "step": 3418 }, { "epoch": 0.914905004013915, "grad_norm": 3.490710973739624, "learning_rate": 9.524511987268161e-06, "loss": 1.0244, "step": 3419 }, { "epoch": 0.9151725983409151, "grad_norm": 3.0969996452331543, "learning_rate": 9.524135262330098e-06, "loss": 1.1009, "step": 3420 }, { "epoch": 0.9154401926679154, "grad_norm": 3.454273223876953, "learning_rate": 9.523758395669598e-06, "loss": 1.1822, "step": 3421 }, { "epoch": 0.9157077869949157, "grad_norm": 3.2115368843078613, "learning_rate": 9.523381387298469e-06, "loss": 1.0328, "step": 3422 }, { "epoch": 0.915975381321916, "grad_norm": 3.3276708126068115, "learning_rate": 9.523004237228517e-06, "loss": 1.1133, "step": 3423 }, { "epoch": 0.9162429756489162, "grad_norm": 3.601778507232666, "learning_rate": 9.522626945471561e-06, "loss": 1.2206, "step": 3424 }, { "epoch": 0.9165105699759165, "grad_norm": 3.281283378601074, "learning_rate": 9.522249512039417e-06, "loss": 1.09, "step": 3425 }, { "epoch": 0.9167781643029168, "grad_norm": 3.4089417457580566, "learning_rate": 9.521871936943907e-06, "loss": 1.2478, "step": 3426 }, { "epoch": 0.917045758629917, "grad_norm": 3.5590267181396484, "learning_rate": 9.521494220196862e-06, "loss": 1.1056, "step": 3427 }, { "epoch": 0.9173133529569173, "grad_norm": 3.1895391941070557, "learning_rate": 9.521116361810115e-06, "loss": 1.1022, "step": 3428 }, { "epoch": 0.9175809472839176, "grad_norm": 3.6638033390045166, "learning_rate": 9.5207383617955e-06, "loss": 1.3269, "step": 3429 }, { "epoch": 0.9178485416109179, "grad_norm": 3.7325849533081055, "learning_rate": 9.52036022016486e-06, "loss": 1.2461, "step": 3430 }, { "epoch": 0.9181161359379181, "grad_norm": 3.56632137298584, "learning_rate": 9.519981936930038e-06, "loss": 1.2025, "step": 3431 }, { "epoch": 0.9183837302649184, "grad_norm": 3.4402332305908203, "learning_rate": 9.519603512102887e-06, "loss": 1.0808, "step": 3432 }, { "epoch": 0.9186513245919187, "grad_norm": 3.298569917678833, "learning_rate": 9.51922494569526e-06, "loss": 1.0316, "step": 3433 }, { "epoch": 0.918918918918919, "grad_norm": 3.712266683578491, "learning_rate": 9.518846237719018e-06, "loss": 1.2548, "step": 3434 }, { "epoch": 0.9191865132459192, "grad_norm": 3.270195722579956, "learning_rate": 9.51846738818602e-06, "loss": 1.1098, "step": 3435 }, { "epoch": 0.9194541075729195, "grad_norm": 3.471754312515259, "learning_rate": 9.518088397108138e-06, "loss": 1.0728, "step": 3436 }, { "epoch": 0.9197217018999198, "grad_norm": 3.256743907928467, "learning_rate": 9.517709264497242e-06, "loss": 1.1637, "step": 3437 }, { "epoch": 0.9199892962269199, "grad_norm": 3.3077757358551025, "learning_rate": 9.51732999036521e-06, "loss": 1.1242, "step": 3438 }, { "epoch": 0.9202568905539202, "grad_norm": 3.492668867111206, "learning_rate": 9.516950574723922e-06, "loss": 1.0478, "step": 3439 }, { "epoch": 0.9205244848809205, "grad_norm": 3.167327880859375, "learning_rate": 9.516571017585265e-06, "loss": 1.1084, "step": 3440 }, { "epoch": 0.9207920792079208, "grad_norm": 2.9469475746154785, "learning_rate": 9.516191318961126e-06, "loss": 1.0578, "step": 3441 }, { "epoch": 0.921059673534921, "grad_norm": 3.6381478309631348, "learning_rate": 9.515811478863402e-06, "loss": 1.2693, "step": 3442 }, { "epoch": 0.9213272678619213, "grad_norm": 3.4339804649353027, "learning_rate": 9.515431497303992e-06, "loss": 1.1058, "step": 3443 }, { "epoch": 0.9215948621889216, "grad_norm": 3.2788732051849365, "learning_rate": 9.515051374294797e-06, "loss": 0.9658, "step": 3444 }, { "epoch": 0.9218624565159219, "grad_norm": 3.393667697906494, "learning_rate": 9.514671109847727e-06, "loss": 1.0991, "step": 3445 }, { "epoch": 0.9221300508429221, "grad_norm": 3.7156498432159424, "learning_rate": 9.514290703974694e-06, "loss": 1.0418, "step": 3446 }, { "epoch": 0.9223976451699224, "grad_norm": 3.149561882019043, "learning_rate": 9.513910156687612e-06, "loss": 1.1174, "step": 3447 }, { "epoch": 0.9226652394969227, "grad_norm": 3.51082181930542, "learning_rate": 9.513529467998404e-06, "loss": 1.1957, "step": 3448 }, { "epoch": 0.9229328338239229, "grad_norm": 3.3628318309783936, "learning_rate": 9.513148637918995e-06, "loss": 1.0921, "step": 3449 }, { "epoch": 0.9232004281509232, "grad_norm": 3.5448429584503174, "learning_rate": 9.512767666461316e-06, "loss": 1.1887, "step": 3450 }, { "epoch": 0.9234680224779235, "grad_norm": 3.335571050643921, "learning_rate": 9.512386553637298e-06, "loss": 1.0374, "step": 3451 }, { "epoch": 0.9237356168049238, "grad_norm": 3.2040200233459473, "learning_rate": 9.512005299458885e-06, "loss": 1.1254, "step": 3452 }, { "epoch": 0.924003211131924, "grad_norm": 3.4492249488830566, "learning_rate": 9.511623903938015e-06, "loss": 1.1536, "step": 3453 }, { "epoch": 0.9242708054589243, "grad_norm": 3.422445774078369, "learning_rate": 9.511242367086637e-06, "loss": 1.2253, "step": 3454 }, { "epoch": 0.9245383997859246, "grad_norm": 3.5041046142578125, "learning_rate": 9.510860688916704e-06, "loss": 1.1549, "step": 3455 }, { "epoch": 0.9248059941129249, "grad_norm": 3.517303705215454, "learning_rate": 9.510478869440172e-06, "loss": 1.1445, "step": 3456 }, { "epoch": 0.925073588439925, "grad_norm": 3.393651247024536, "learning_rate": 9.510096908669e-06, "loss": 1.2026, "step": 3457 }, { "epoch": 0.9253411827669253, "grad_norm": 3.499711751937866, "learning_rate": 9.509714806615157e-06, "loss": 1.2276, "step": 3458 }, { "epoch": 0.9256087770939256, "grad_norm": 3.527127742767334, "learning_rate": 9.509332563290611e-06, "loss": 1.1068, "step": 3459 }, { "epoch": 0.9258763714209258, "grad_norm": 2.9405460357666016, "learning_rate": 9.508950178707335e-06, "loss": 0.984, "step": 3460 }, { "epoch": 0.9261439657479261, "grad_norm": 3.161170482635498, "learning_rate": 9.508567652877307e-06, "loss": 1.0606, "step": 3461 }, { "epoch": 0.9264115600749264, "grad_norm": 2.99855375289917, "learning_rate": 9.508184985812514e-06, "loss": 0.9767, "step": 3462 }, { "epoch": 0.9266791544019267, "grad_norm": 3.1051480770111084, "learning_rate": 9.507802177524937e-06, "loss": 1.0899, "step": 3463 }, { "epoch": 0.9269467487289269, "grad_norm": 3.3303263187408447, "learning_rate": 9.507419228026574e-06, "loss": 1.2223, "step": 3464 }, { "epoch": 0.9272143430559272, "grad_norm": 3.6255226135253906, "learning_rate": 9.507036137329417e-06, "loss": 1.2277, "step": 3465 }, { "epoch": 0.9274819373829275, "grad_norm": 3.4016709327697754, "learning_rate": 9.50665290544547e-06, "loss": 1.0828, "step": 3466 }, { "epoch": 0.9277495317099278, "grad_norm": 3.1820127964019775, "learning_rate": 9.506269532386736e-06, "loss": 1.0512, "step": 3467 }, { "epoch": 0.928017126036928, "grad_norm": 3.8635528087615967, "learning_rate": 9.505886018165223e-06, "loss": 1.137, "step": 3468 }, { "epoch": 0.9282847203639283, "grad_norm": 3.615046977996826, "learning_rate": 9.505502362792947e-06, "loss": 1.1372, "step": 3469 }, { "epoch": 0.9285523146909286, "grad_norm": 3.5528488159179688, "learning_rate": 9.505118566281928e-06, "loss": 1.145, "step": 3470 }, { "epoch": 0.9288199090179288, "grad_norm": 3.367374897003174, "learning_rate": 9.504734628644186e-06, "loss": 1.1556, "step": 3471 }, { "epoch": 0.9290875033449291, "grad_norm": 3.4801955223083496, "learning_rate": 9.504350549891748e-06, "loss": 1.1011, "step": 3472 }, { "epoch": 0.9293550976719294, "grad_norm": 3.310755729675293, "learning_rate": 9.503966330036646e-06, "loss": 1.239, "step": 3473 }, { "epoch": 0.9296226919989297, "grad_norm": 3.1718642711639404, "learning_rate": 9.50358196909092e-06, "loss": 1.1341, "step": 3474 }, { "epoch": 0.9298902863259298, "grad_norm": 3.5848066806793213, "learning_rate": 9.503197467066604e-06, "loss": 1.2817, "step": 3475 }, { "epoch": 0.9301578806529301, "grad_norm": 3.2660577297210693, "learning_rate": 9.502812823975746e-06, "loss": 1.1082, "step": 3476 }, { "epoch": 0.9304254749799304, "grad_norm": 3.4839932918548584, "learning_rate": 9.502428039830395e-06, "loss": 1.0307, "step": 3477 }, { "epoch": 0.9306930693069307, "grad_norm": 3.186553478240967, "learning_rate": 9.502043114642607e-06, "loss": 0.9545, "step": 3478 }, { "epoch": 0.9309606636339309, "grad_norm": 3.0523486137390137, "learning_rate": 9.501658048424437e-06, "loss": 0.99, "step": 3479 }, { "epoch": 0.9312282579609312, "grad_norm": 3.2745773792266846, "learning_rate": 9.501272841187949e-06, "loss": 1.105, "step": 3480 }, { "epoch": 0.9314958522879315, "grad_norm": 3.30385422706604, "learning_rate": 9.500887492945208e-06, "loss": 1.03, "step": 3481 }, { "epoch": 0.9317634466149317, "grad_norm": 3.7593932151794434, "learning_rate": 9.500502003708287e-06, "loss": 1.3537, "step": 3482 }, { "epoch": 0.932031040941932, "grad_norm": 3.3091962337493896, "learning_rate": 9.500116373489264e-06, "loss": 1.1339, "step": 3483 }, { "epoch": 0.9322986352689323, "grad_norm": 3.1026055812835693, "learning_rate": 9.499730602300213e-06, "loss": 1.095, "step": 3484 }, { "epoch": 0.9325662295959326, "grad_norm": 3.178584337234497, "learning_rate": 9.499344690153226e-06, "loss": 0.9671, "step": 3485 }, { "epoch": 0.9328338239229328, "grad_norm": 3.1470065116882324, "learning_rate": 9.498958637060385e-06, "loss": 1.1067, "step": 3486 }, { "epoch": 0.9331014182499331, "grad_norm": 3.4016363620758057, "learning_rate": 9.498572443033789e-06, "loss": 1.1828, "step": 3487 }, { "epoch": 0.9333690125769334, "grad_norm": 3.14091157913208, "learning_rate": 9.498186108085534e-06, "loss": 1.0975, "step": 3488 }, { "epoch": 0.9336366069039337, "grad_norm": 3.34956431388855, "learning_rate": 9.497799632227721e-06, "loss": 1.2063, "step": 3489 }, { "epoch": 0.9339042012309339, "grad_norm": 3.507667064666748, "learning_rate": 9.497413015472458e-06, "loss": 1.1349, "step": 3490 }, { "epoch": 0.9341717955579342, "grad_norm": 3.209160566329956, "learning_rate": 9.497026257831856e-06, "loss": 1.0282, "step": 3491 }, { "epoch": 0.9344393898849345, "grad_norm": 3.31990647315979, "learning_rate": 9.49663935931803e-06, "loss": 1.1131, "step": 3492 }, { "epoch": 0.9347069842119347, "grad_norm": 3.19960618019104, "learning_rate": 9.4962523199431e-06, "loss": 1.1472, "step": 3493 }, { "epoch": 0.934974578538935, "grad_norm": 3.4468166828155518, "learning_rate": 9.495865139719192e-06, "loss": 1.1146, "step": 3494 }, { "epoch": 0.9352421728659353, "grad_norm": 3.4722113609313965, "learning_rate": 9.495477818658432e-06, "loss": 1.0553, "step": 3495 }, { "epoch": 0.9355097671929355, "grad_norm": 3.26762318611145, "learning_rate": 9.495090356772955e-06, "loss": 1.0282, "step": 3496 }, { "epoch": 0.9357773615199357, "grad_norm": 3.4020135402679443, "learning_rate": 9.494702754074898e-06, "loss": 1.1634, "step": 3497 }, { "epoch": 0.936044955846936, "grad_norm": 3.114060401916504, "learning_rate": 9.494315010576405e-06, "loss": 1.0458, "step": 3498 }, { "epoch": 0.9363125501739363, "grad_norm": 2.988351345062256, "learning_rate": 9.493927126289619e-06, "loss": 1.0748, "step": 3499 }, { "epoch": 0.9365801445009366, "grad_norm": 3.3398990631103516, "learning_rate": 9.493539101226692e-06, "loss": 1.0633, "step": 3500 }, { "epoch": 0.9365801445009366, "eval_loss": 1.1470533609390259, "eval_runtime": 11.4325, "eval_samples_per_second": 34.988, "eval_steps_per_second": 4.373, "step": 3500 }, { "epoch": 0.9368477388279368, "grad_norm": 3.560861587524414, "learning_rate": 9.493150935399779e-06, "loss": 0.9958, "step": 3501 }, { "epoch": 0.9371153331549371, "grad_norm": 3.2529478073120117, "learning_rate": 9.49276262882104e-06, "loss": 1.1023, "step": 3502 }, { "epoch": 0.9373829274819374, "grad_norm": 3.466764211654663, "learning_rate": 9.49237418150264e-06, "loss": 1.1395, "step": 3503 }, { "epoch": 0.9376505218089376, "grad_norm": 3.3550920486450195, "learning_rate": 9.491985593456747e-06, "loss": 1.111, "step": 3504 }, { "epoch": 0.9379181161359379, "grad_norm": 3.469780683517456, "learning_rate": 9.491596864695534e-06, "loss": 1.1792, "step": 3505 }, { "epoch": 0.9381857104629382, "grad_norm": 3.7144579887390137, "learning_rate": 9.491207995231176e-06, "loss": 1.2884, "step": 3506 }, { "epoch": 0.9384533047899385, "grad_norm": 2.8839621543884277, "learning_rate": 9.490818985075856e-06, "loss": 1.0105, "step": 3507 }, { "epoch": 0.9387208991169387, "grad_norm": 3.0779335498809814, "learning_rate": 9.490429834241763e-06, "loss": 1.052, "step": 3508 }, { "epoch": 0.938988493443939, "grad_norm": 3.260777711868286, "learning_rate": 9.490040542741084e-06, "loss": 1.0654, "step": 3509 }, { "epoch": 0.9392560877709393, "grad_norm": 3.4385604858398438, "learning_rate": 9.489651110586014e-06, "loss": 1.2247, "step": 3510 }, { "epoch": 0.9395236820979396, "grad_norm": 3.460442066192627, "learning_rate": 9.489261537788754e-06, "loss": 1.0923, "step": 3511 }, { "epoch": 0.9397912764249398, "grad_norm": 3.365694284439087, "learning_rate": 9.488871824361508e-06, "loss": 1.0738, "step": 3512 }, { "epoch": 0.9400588707519401, "grad_norm": 3.1729164123535156, "learning_rate": 9.48848197031648e-06, "loss": 1.1758, "step": 3513 }, { "epoch": 0.9403264650789404, "grad_norm": 3.1198678016662598, "learning_rate": 9.488091975665887e-06, "loss": 1.0894, "step": 3514 }, { "epoch": 0.9405940594059405, "grad_norm": 3.1159262657165527, "learning_rate": 9.487701840421945e-06, "loss": 1.1764, "step": 3515 }, { "epoch": 0.9408616537329408, "grad_norm": 3.0507001876831055, "learning_rate": 9.487311564596875e-06, "loss": 1.0162, "step": 3516 }, { "epoch": 0.9411292480599411, "grad_norm": 3.2293381690979004, "learning_rate": 9.486921148202901e-06, "loss": 1.0442, "step": 3517 }, { "epoch": 0.9413968423869414, "grad_norm": 3.246738910675049, "learning_rate": 9.486530591252257e-06, "loss": 1.1221, "step": 3518 }, { "epoch": 0.9416644367139416, "grad_norm": 3.022080659866333, "learning_rate": 9.486139893757172e-06, "loss": 0.9833, "step": 3519 }, { "epoch": 0.9419320310409419, "grad_norm": 3.232952356338501, "learning_rate": 9.485749055729891e-06, "loss": 1.058, "step": 3520 }, { "epoch": 0.9421996253679422, "grad_norm": 3.4002277851104736, "learning_rate": 9.485358077182653e-06, "loss": 1.1898, "step": 3521 }, { "epoch": 0.9424672196949425, "grad_norm": 3.4779772758483887, "learning_rate": 9.484966958127707e-06, "loss": 1.0439, "step": 3522 }, { "epoch": 0.9427348140219427, "grad_norm": 3.6369545459747314, "learning_rate": 9.484575698577305e-06, "loss": 1.1877, "step": 3523 }, { "epoch": 0.943002408348943, "grad_norm": 3.197024345397949, "learning_rate": 9.484184298543706e-06, "loss": 1.0569, "step": 3524 }, { "epoch": 0.9432700026759433, "grad_norm": 3.642747640609741, "learning_rate": 9.483792758039165e-06, "loss": 1.2194, "step": 3525 }, { "epoch": 0.9435375970029435, "grad_norm": 3.4038000106811523, "learning_rate": 9.483401077075954e-06, "loss": 1.1591, "step": 3526 }, { "epoch": 0.9438051913299438, "grad_norm": 2.978867292404175, "learning_rate": 9.48300925566634e-06, "loss": 1.0912, "step": 3527 }, { "epoch": 0.9440727856569441, "grad_norm": 3.416811943054199, "learning_rate": 9.482617293822596e-06, "loss": 1.124, "step": 3528 }, { "epoch": 0.9443403799839444, "grad_norm": 3.1118931770324707, "learning_rate": 9.482225191557001e-06, "loss": 1.0341, "step": 3529 }, { "epoch": 0.9446079743109446, "grad_norm": 3.495542049407959, "learning_rate": 9.48183294888184e-06, "loss": 1.3161, "step": 3530 }, { "epoch": 0.9448755686379449, "grad_norm": 3.3605546951293945, "learning_rate": 9.481440565809398e-06, "loss": 1.0635, "step": 3531 }, { "epoch": 0.9451431629649452, "grad_norm": 3.383915901184082, "learning_rate": 9.481048042351967e-06, "loss": 1.0157, "step": 3532 }, { "epoch": 0.9454107572919455, "grad_norm": 2.984562873840332, "learning_rate": 9.480655378521845e-06, "loss": 1.0117, "step": 3533 }, { "epoch": 0.9456783516189456, "grad_norm": 3.205364942550659, "learning_rate": 9.480262574331331e-06, "loss": 1.0787, "step": 3534 }, { "epoch": 0.9459459459459459, "grad_norm": 3.5284080505371094, "learning_rate": 9.479869629792729e-06, "loss": 1.2231, "step": 3535 }, { "epoch": 0.9462135402729462, "grad_norm": 3.833712577819824, "learning_rate": 9.47947654491835e-06, "loss": 1.1585, "step": 3536 }, { "epoch": 0.9464811345999464, "grad_norm": 3.6991257667541504, "learning_rate": 9.479083319720508e-06, "loss": 1.1624, "step": 3537 }, { "epoch": 0.9467487289269467, "grad_norm": 3.437206983566284, "learning_rate": 9.47868995421152e-06, "loss": 1.1759, "step": 3538 }, { "epoch": 0.947016323253947, "grad_norm": 4.02100944519043, "learning_rate": 9.478296448403707e-06, "loss": 1.3648, "step": 3539 }, { "epoch": 0.9472839175809473, "grad_norm": 3.552727699279785, "learning_rate": 9.4779028023094e-06, "loss": 1.1806, "step": 3540 }, { "epoch": 0.9475515119079475, "grad_norm": 3.3923287391662598, "learning_rate": 9.477509015940928e-06, "loss": 1.1629, "step": 3541 }, { "epoch": 0.9478191062349478, "grad_norm": 3.3122611045837402, "learning_rate": 9.477115089310626e-06, "loss": 1.0938, "step": 3542 }, { "epoch": 0.9480867005619481, "grad_norm": 3.2239348888397217, "learning_rate": 9.476721022430834e-06, "loss": 1.2055, "step": 3543 }, { "epoch": 0.9483542948889484, "grad_norm": 3.426645278930664, "learning_rate": 9.4763268153139e-06, "loss": 1.1852, "step": 3544 }, { "epoch": 0.9486218892159486, "grad_norm": 3.3436081409454346, "learning_rate": 9.475932467972169e-06, "loss": 1.0941, "step": 3545 }, { "epoch": 0.9488894835429489, "grad_norm": 3.2754054069519043, "learning_rate": 9.475537980417994e-06, "loss": 1.0063, "step": 3546 }, { "epoch": 0.9491570778699492, "grad_norm": 3.0442628860473633, "learning_rate": 9.475143352663736e-06, "loss": 1.0222, "step": 3547 }, { "epoch": 0.9494246721969494, "grad_norm": 3.662236213684082, "learning_rate": 9.474748584721755e-06, "loss": 1.2637, "step": 3548 }, { "epoch": 0.9496922665239497, "grad_norm": 3.3807575702667236, "learning_rate": 9.474353676604416e-06, "loss": 1.1554, "step": 3549 }, { "epoch": 0.94995986085095, "grad_norm": 3.3155808448791504, "learning_rate": 9.473958628324093e-06, "loss": 1.1654, "step": 3550 }, { "epoch": 0.9502274551779503, "grad_norm": 3.54040789604187, "learning_rate": 9.47356343989316e-06, "loss": 1.3299, "step": 3551 }, { "epoch": 0.9504950495049505, "grad_norm": 3.640690565109253, "learning_rate": 9.473168111323995e-06, "loss": 1.2952, "step": 3552 }, { "epoch": 0.9507626438319507, "grad_norm": 3.141310214996338, "learning_rate": 9.472772642628984e-06, "loss": 1.0503, "step": 3553 }, { "epoch": 0.951030238158951, "grad_norm": 3.160971164703369, "learning_rate": 9.472377033820514e-06, "loss": 1.065, "step": 3554 }, { "epoch": 0.9512978324859513, "grad_norm": 3.1601452827453613, "learning_rate": 9.47198128491098e-06, "loss": 1.1415, "step": 3555 }, { "epoch": 0.9515654268129515, "grad_norm": 3.017057180404663, "learning_rate": 9.471585395912776e-06, "loss": 1.1711, "step": 3556 }, { "epoch": 0.9518330211399518, "grad_norm": 3.127492666244507, "learning_rate": 9.471189366838307e-06, "loss": 1.0834, "step": 3557 }, { "epoch": 0.9521006154669521, "grad_norm": 3.3001465797424316, "learning_rate": 9.470793197699977e-06, "loss": 1.1977, "step": 3558 }, { "epoch": 0.9523682097939523, "grad_norm": 3.1050307750701904, "learning_rate": 9.470396888510196e-06, "loss": 1.0702, "step": 3559 }, { "epoch": 0.9526358041209526, "grad_norm": 3.070390224456787, "learning_rate": 9.470000439281379e-06, "loss": 0.9667, "step": 3560 }, { "epoch": 0.9529033984479529, "grad_norm": 3.1426901817321777, "learning_rate": 9.469603850025946e-06, "loss": 0.9483, "step": 3561 }, { "epoch": 0.9531709927749532, "grad_norm": 3.142430543899536, "learning_rate": 9.46920712075632e-06, "loss": 0.9968, "step": 3562 }, { "epoch": 0.9534385871019534, "grad_norm": 3.0528955459594727, "learning_rate": 9.468810251484928e-06, "loss": 1.0463, "step": 3563 }, { "epoch": 0.9537061814289537, "grad_norm": 3.309941530227661, "learning_rate": 9.468413242224204e-06, "loss": 1.2411, "step": 3564 }, { "epoch": 0.953973775755954, "grad_norm": 3.3797719478607178, "learning_rate": 9.468016092986583e-06, "loss": 1.1239, "step": 3565 }, { "epoch": 0.9542413700829543, "grad_norm": 3.4834790229797363, "learning_rate": 9.467618803784507e-06, "loss": 1.1477, "step": 3566 }, { "epoch": 0.9545089644099545, "grad_norm": 3.8445494174957275, "learning_rate": 9.467221374630422e-06, "loss": 1.2043, "step": 3567 }, { "epoch": 0.9547765587369548, "grad_norm": 3.565916061401367, "learning_rate": 9.466823805536776e-06, "loss": 1.0226, "step": 3568 }, { "epoch": 0.9550441530639551, "grad_norm": 3.3251450061798096, "learning_rate": 9.466426096516024e-06, "loss": 1.1116, "step": 3569 }, { "epoch": 0.9553117473909553, "grad_norm": 3.7028703689575195, "learning_rate": 9.466028247580624e-06, "loss": 1.1634, "step": 3570 }, { "epoch": 0.9555793417179556, "grad_norm": 3.316803455352783, "learning_rate": 9.465630258743041e-06, "loss": 1.1836, "step": 3571 }, { "epoch": 0.9558469360449559, "grad_norm": 3.284135341644287, "learning_rate": 9.46523213001574e-06, "loss": 1.077, "step": 3572 }, { "epoch": 0.9561145303719562, "grad_norm": 3.4031052589416504, "learning_rate": 9.464833861411196e-06, "loss": 1.0654, "step": 3573 }, { "epoch": 0.9563821246989563, "grad_norm": 3.1017119884490967, "learning_rate": 9.464435452941881e-06, "loss": 1.055, "step": 3574 }, { "epoch": 0.9566497190259566, "grad_norm": 3.4276275634765625, "learning_rate": 9.464036904620278e-06, "loss": 1.1501, "step": 3575 }, { "epoch": 0.9569173133529569, "grad_norm": 3.362846851348877, "learning_rate": 9.46363821645887e-06, "loss": 1.1495, "step": 3576 }, { "epoch": 0.9571849076799572, "grad_norm": 3.4557695388793945, "learning_rate": 9.46323938847015e-06, "loss": 1.0333, "step": 3577 }, { "epoch": 0.9574525020069574, "grad_norm": 3.3175323009490967, "learning_rate": 9.462840420666607e-06, "loss": 1.0388, "step": 3578 }, { "epoch": 0.9577200963339577, "grad_norm": 3.987826347351074, "learning_rate": 9.462441313060741e-06, "loss": 1.3387, "step": 3579 }, { "epoch": 0.957987690660958, "grad_norm": 3.354966163635254, "learning_rate": 9.462042065665057e-06, "loss": 1.0009, "step": 3580 }, { "epoch": 0.9582552849879582, "grad_norm": 3.2124292850494385, "learning_rate": 9.461642678492059e-06, "loss": 1.0912, "step": 3581 }, { "epoch": 0.9585228793149585, "grad_norm": 3.4074666500091553, "learning_rate": 9.461243151554257e-06, "loss": 1.1189, "step": 3582 }, { "epoch": 0.9587904736419588, "grad_norm": 3.342137336730957, "learning_rate": 9.460843484864168e-06, "loss": 1.1774, "step": 3583 }, { "epoch": 0.9590580679689591, "grad_norm": 3.108454704284668, "learning_rate": 9.460443678434313e-06, "loss": 0.9848, "step": 3584 }, { "epoch": 0.9593256622959593, "grad_norm": 2.764338731765747, "learning_rate": 9.460043732277213e-06, "loss": 0.9759, "step": 3585 }, { "epoch": 0.9595932566229596, "grad_norm": 3.277522087097168, "learning_rate": 9.4596436464054e-06, "loss": 1.0655, "step": 3586 }, { "epoch": 0.9598608509499599, "grad_norm": 3.2596328258514404, "learning_rate": 9.459243420831406e-06, "loss": 1.0931, "step": 3587 }, { "epoch": 0.9601284452769602, "grad_norm": 3.3938968181610107, "learning_rate": 9.45884305556777e-06, "loss": 1.1095, "step": 3588 }, { "epoch": 0.9603960396039604, "grad_norm": 3.694939374923706, "learning_rate": 9.45844255062703e-06, "loss": 1.274, "step": 3589 }, { "epoch": 0.9606636339309607, "grad_norm": 3.2319419384002686, "learning_rate": 9.458041906021733e-06, "loss": 1.1096, "step": 3590 }, { "epoch": 0.960931228257961, "grad_norm": 3.1638870239257812, "learning_rate": 9.457641121764433e-06, "loss": 1.0875, "step": 3591 }, { "epoch": 0.9611988225849611, "grad_norm": 3.1526119709014893, "learning_rate": 9.457240197867682e-06, "loss": 0.9327, "step": 3592 }, { "epoch": 0.9614664169119614, "grad_norm": 3.1602957248687744, "learning_rate": 9.45683913434404e-06, "loss": 1.0397, "step": 3593 }, { "epoch": 0.9617340112389617, "grad_norm": 3.8300065994262695, "learning_rate": 9.45643793120607e-06, "loss": 1.2436, "step": 3594 }, { "epoch": 0.962001605565962, "grad_norm": 3.174600839614868, "learning_rate": 9.456036588466342e-06, "loss": 1.1446, "step": 3595 }, { "epoch": 0.9622691998929622, "grad_norm": 3.3465054035186768, "learning_rate": 9.455635106137427e-06, "loss": 1.1416, "step": 3596 }, { "epoch": 0.9625367942199625, "grad_norm": 2.8999526500701904, "learning_rate": 9.455233484231901e-06, "loss": 0.9453, "step": 3597 }, { "epoch": 0.9628043885469628, "grad_norm": 3.197765350341797, "learning_rate": 9.454831722762346e-06, "loss": 1.0374, "step": 3598 }, { "epoch": 0.9630719828739631, "grad_norm": 3.180861711502075, "learning_rate": 9.454429821741346e-06, "loss": 1.051, "step": 3599 }, { "epoch": 0.9633395772009633, "grad_norm": 3.3026978969573975, "learning_rate": 9.454027781181496e-06, "loss": 1.0753, "step": 3600 }, { "epoch": 0.9636071715279636, "grad_norm": 3.2190282344818115, "learning_rate": 9.453625601095385e-06, "loss": 1.0299, "step": 3601 }, { "epoch": 0.9638747658549639, "grad_norm": 3.302915334701538, "learning_rate": 9.453223281495612e-06, "loss": 1.1025, "step": 3602 }, { "epoch": 0.9641423601819642, "grad_norm": 3.600322961807251, "learning_rate": 9.452820822394783e-06, "loss": 1.1095, "step": 3603 }, { "epoch": 0.9644099545089644, "grad_norm": 3.5180773735046387, "learning_rate": 9.452418223805502e-06, "loss": 1.1045, "step": 3604 }, { "epoch": 0.9646775488359647, "grad_norm": 3.113248825073242, "learning_rate": 9.452015485740384e-06, "loss": 0.9298, "step": 3605 }, { "epoch": 0.964945143162965, "grad_norm": 3.2621712684631348, "learning_rate": 9.451612608212044e-06, "loss": 1.0531, "step": 3606 }, { "epoch": 0.9652127374899652, "grad_norm": 2.929264783859253, "learning_rate": 9.4512095912331e-06, "loss": 1.0108, "step": 3607 }, { "epoch": 0.9654803318169655, "grad_norm": 3.159482955932617, "learning_rate": 9.45080643481618e-06, "loss": 1.0752, "step": 3608 }, { "epoch": 0.9657479261439658, "grad_norm": 3.332207202911377, "learning_rate": 9.450403138973914e-06, "loss": 1.1162, "step": 3609 }, { "epoch": 0.9660155204709661, "grad_norm": 3.570805072784424, "learning_rate": 9.44999970371893e-06, "loss": 1.2211, "step": 3610 }, { "epoch": 0.9662831147979662, "grad_norm": 2.926478624343872, "learning_rate": 9.449596129063873e-06, "loss": 0.9727, "step": 3611 }, { "epoch": 0.9665507091249665, "grad_norm": 3.6152682304382324, "learning_rate": 9.44919241502138e-06, "loss": 1.0963, "step": 3612 }, { "epoch": 0.9668183034519668, "grad_norm": 2.954878807067871, "learning_rate": 9.4487885616041e-06, "loss": 1.0543, "step": 3613 }, { "epoch": 0.9670858977789671, "grad_norm": 3.6669461727142334, "learning_rate": 9.448384568824685e-06, "loss": 1.2643, "step": 3614 }, { "epoch": 0.9673534921059673, "grad_norm": 3.3067846298217773, "learning_rate": 9.447980436695787e-06, "loss": 1.2128, "step": 3615 }, { "epoch": 0.9676210864329676, "grad_norm": 3.863605499267578, "learning_rate": 9.44757616523007e-06, "loss": 1.2189, "step": 3616 }, { "epoch": 0.9678886807599679, "grad_norm": 3.104247570037842, "learning_rate": 9.447171754440195e-06, "loss": 1.0933, "step": 3617 }, { "epoch": 0.9681562750869681, "grad_norm": 4.070940017700195, "learning_rate": 9.446767204338832e-06, "loss": 1.2085, "step": 3618 }, { "epoch": 0.9684238694139684, "grad_norm": 3.5472705364227295, "learning_rate": 9.446362514938653e-06, "loss": 1.1472, "step": 3619 }, { "epoch": 0.9686914637409687, "grad_norm": 3.455854654312134, "learning_rate": 9.445957686252336e-06, "loss": 1.2072, "step": 3620 }, { "epoch": 0.968959058067969, "grad_norm": 3.4434685707092285, "learning_rate": 9.445552718292564e-06, "loss": 1.0107, "step": 3621 }, { "epoch": 0.9692266523949692, "grad_norm": 3.8100736141204834, "learning_rate": 9.445147611072019e-06, "loss": 1.4714, "step": 3622 }, { "epoch": 0.9694942467219695, "grad_norm": 3.2713818550109863, "learning_rate": 9.444742364603394e-06, "loss": 1.1, "step": 3623 }, { "epoch": 0.9697618410489698, "grad_norm": 3.6512348651885986, "learning_rate": 9.444336978899384e-06, "loss": 1.1204, "step": 3624 }, { "epoch": 0.9700294353759701, "grad_norm": 3.12085223197937, "learning_rate": 9.443931453972688e-06, "loss": 1.1352, "step": 3625 }, { "epoch": 0.9702970297029703, "grad_norm": 3.4997334480285645, "learning_rate": 9.443525789836008e-06, "loss": 1.184, "step": 3626 }, { "epoch": 0.9705646240299706, "grad_norm": 3.1696505546569824, "learning_rate": 9.443119986502053e-06, "loss": 1.0827, "step": 3627 }, { "epoch": 0.9708322183569709, "grad_norm": 2.911027431488037, "learning_rate": 9.442714043983534e-06, "loss": 0.9887, "step": 3628 }, { "epoch": 0.971099812683971, "grad_norm": 3.3565635681152344, "learning_rate": 9.44230796229317e-06, "loss": 1.0637, "step": 3629 }, { "epoch": 0.9713674070109714, "grad_norm": 3.567798614501953, "learning_rate": 9.441901741443678e-06, "loss": 1.0772, "step": 3630 }, { "epoch": 0.9716350013379716, "grad_norm": 3.278237819671631, "learning_rate": 9.441495381447787e-06, "loss": 1.1284, "step": 3631 }, { "epoch": 0.971902595664972, "grad_norm": 3.231175661087036, "learning_rate": 9.441088882318223e-06, "loss": 1.1555, "step": 3632 }, { "epoch": 0.9721701899919721, "grad_norm": 3.619246006011963, "learning_rate": 9.440682244067724e-06, "loss": 1.2106, "step": 3633 }, { "epoch": 0.9724377843189724, "grad_norm": 3.553589344024658, "learning_rate": 9.440275466709025e-06, "loss": 1.1818, "step": 3634 }, { "epoch": 0.9727053786459727, "grad_norm": 3.7136471271514893, "learning_rate": 9.43986855025487e-06, "loss": 1.2618, "step": 3635 }, { "epoch": 0.972972972972973, "grad_norm": 3.361229658126831, "learning_rate": 9.439461494718006e-06, "loss": 1.1693, "step": 3636 }, { "epoch": 0.9732405672999732, "grad_norm": 3.6238696575164795, "learning_rate": 9.439054300111183e-06, "loss": 1.2334, "step": 3637 }, { "epoch": 0.9735081616269735, "grad_norm": 3.4375593662261963, "learning_rate": 9.438646966447158e-06, "loss": 1.1148, "step": 3638 }, { "epoch": 0.9737757559539738, "grad_norm": 3.1718556880950928, "learning_rate": 9.438239493738692e-06, "loss": 1.13, "step": 3639 }, { "epoch": 0.974043350280974, "grad_norm": 3.3983285427093506, "learning_rate": 9.437831881998548e-06, "loss": 1.2176, "step": 3640 }, { "epoch": 0.9743109446079743, "grad_norm": 3.254225015640259, "learning_rate": 9.437424131239496e-06, "loss": 1.2788, "step": 3641 }, { "epoch": 0.9745785389349746, "grad_norm": 3.181648015975952, "learning_rate": 9.437016241474307e-06, "loss": 1.0597, "step": 3642 }, { "epoch": 0.9748461332619749, "grad_norm": 3.5744338035583496, "learning_rate": 9.43660821271576e-06, "loss": 1.1843, "step": 3643 }, { "epoch": 0.9751137275889751, "grad_norm": 3.0701401233673096, "learning_rate": 9.436200044976638e-06, "loss": 0.9703, "step": 3644 }, { "epoch": 0.9753813219159754, "grad_norm": 3.134881019592285, "learning_rate": 9.435791738269725e-06, "loss": 1.1275, "step": 3645 }, { "epoch": 0.9756489162429757, "grad_norm": 3.301896810531616, "learning_rate": 9.43538329260781e-06, "loss": 1.0886, "step": 3646 }, { "epoch": 0.975916510569976, "grad_norm": 3.211254358291626, "learning_rate": 9.434974708003694e-06, "loss": 1.018, "step": 3647 }, { "epoch": 0.9761841048969762, "grad_norm": 3.36275577545166, "learning_rate": 9.434565984470172e-06, "loss": 1.1677, "step": 3648 }, { "epoch": 0.9764516992239765, "grad_norm": 3.162914276123047, "learning_rate": 9.434157122020047e-06, "loss": 1.1324, "step": 3649 }, { "epoch": 0.9767192935509768, "grad_norm": 3.0449602603912354, "learning_rate": 9.433748120666129e-06, "loss": 1.0288, "step": 3650 }, { "epoch": 0.9769868878779769, "grad_norm": 3.1934330463409424, "learning_rate": 9.43333898042123e-06, "loss": 1.1246, "step": 3651 }, { "epoch": 0.9772544822049772, "grad_norm": 3.4677317142486572, "learning_rate": 9.432929701298166e-06, "loss": 1.0707, "step": 3652 }, { "epoch": 0.9775220765319775, "grad_norm": 3.6359336376190186, "learning_rate": 9.432520283309756e-06, "loss": 1.1178, "step": 3653 }, { "epoch": 0.9777896708589778, "grad_norm": 3.6173250675201416, "learning_rate": 9.43211072646883e-06, "loss": 1.2067, "step": 3654 }, { "epoch": 0.978057265185978, "grad_norm": 3.4153003692626953, "learning_rate": 9.431701030788215e-06, "loss": 1.1684, "step": 3655 }, { "epoch": 0.9783248595129783, "grad_norm": 3.5359673500061035, "learning_rate": 9.431291196280745e-06, "loss": 1.2438, "step": 3656 }, { "epoch": 0.9785924538399786, "grad_norm": 3.2912895679473877, "learning_rate": 9.430881222959258e-06, "loss": 1.1959, "step": 3657 }, { "epoch": 0.9788600481669789, "grad_norm": 3.617729902267456, "learning_rate": 9.430471110836599e-06, "loss": 1.2038, "step": 3658 }, { "epoch": 0.9791276424939791, "grad_norm": 2.9933791160583496, "learning_rate": 9.430060859925614e-06, "loss": 1.0116, "step": 3659 }, { "epoch": 0.9793952368209794, "grad_norm": 3.0839054584503174, "learning_rate": 9.429650470239152e-06, "loss": 1.0596, "step": 3660 }, { "epoch": 0.9796628311479797, "grad_norm": 2.8952386379241943, "learning_rate": 9.429239941790072e-06, "loss": 1.0405, "step": 3661 }, { "epoch": 0.9799304254749799, "grad_norm": 3.154313564300537, "learning_rate": 9.428829274591234e-06, "loss": 0.9956, "step": 3662 }, { "epoch": 0.9801980198019802, "grad_norm": 3.2512032985687256, "learning_rate": 9.4284184686555e-06, "loss": 1.0996, "step": 3663 }, { "epoch": 0.9804656141289805, "grad_norm": 3.4779818058013916, "learning_rate": 9.428007523995741e-06, "loss": 1.1982, "step": 3664 }, { "epoch": 0.9807332084559808, "grad_norm": 3.4594082832336426, "learning_rate": 9.427596440624832e-06, "loss": 1.2461, "step": 3665 }, { "epoch": 0.981000802782981, "grad_norm": 3.6881461143493652, "learning_rate": 9.427185218555645e-06, "loss": 1.1537, "step": 3666 }, { "epoch": 0.9812683971099813, "grad_norm": 3.308070182800293, "learning_rate": 9.426773857801067e-06, "loss": 1.1589, "step": 3667 }, { "epoch": 0.9815359914369816, "grad_norm": 3.4280967712402344, "learning_rate": 9.426362358373981e-06, "loss": 1.0666, "step": 3668 }, { "epoch": 0.9818035857639819, "grad_norm": 3.077514886856079, "learning_rate": 9.42595072028728e-06, "loss": 1.2246, "step": 3669 }, { "epoch": 0.982071180090982, "grad_norm": 2.923833131790161, "learning_rate": 9.425538943553858e-06, "loss": 0.9993, "step": 3670 }, { "epoch": 0.9823387744179823, "grad_norm": 3.5569820404052734, "learning_rate": 9.425127028186613e-06, "loss": 1.1705, "step": 3671 }, { "epoch": 0.9826063687449826, "grad_norm": 3.547506093978882, "learning_rate": 9.42471497419845e-06, "loss": 1.1549, "step": 3672 }, { "epoch": 0.9828739630719828, "grad_norm": 3.3490121364593506, "learning_rate": 9.424302781602277e-06, "loss": 1.1942, "step": 3673 }, { "epoch": 0.9831415573989831, "grad_norm": 3.038180112838745, "learning_rate": 9.423890450411005e-06, "loss": 0.9606, "step": 3674 }, { "epoch": 0.9834091517259834, "grad_norm": 3.5518031120300293, "learning_rate": 9.423477980637552e-06, "loss": 1.1898, "step": 3675 }, { "epoch": 0.9836767460529837, "grad_norm": 3.295964002609253, "learning_rate": 9.42306537229484e-06, "loss": 1.1399, "step": 3676 }, { "epoch": 0.9839443403799839, "grad_norm": 3.0206878185272217, "learning_rate": 9.422652625395791e-06, "loss": 1.0414, "step": 3677 }, { "epoch": 0.9842119347069842, "grad_norm": 3.4383180141448975, "learning_rate": 9.422239739953337e-06, "loss": 1.1399, "step": 3678 }, { "epoch": 0.9844795290339845, "grad_norm": 3.2433290481567383, "learning_rate": 9.42182671598041e-06, "loss": 1.1698, "step": 3679 }, { "epoch": 0.9847471233609848, "grad_norm": 3.151118755340576, "learning_rate": 9.421413553489952e-06, "loss": 1.1247, "step": 3680 }, { "epoch": 0.985014717687985, "grad_norm": 3.5655605792999268, "learning_rate": 9.421000252494902e-06, "loss": 1.1653, "step": 3681 }, { "epoch": 0.9852823120149853, "grad_norm": 2.9051101207733154, "learning_rate": 9.42058681300821e-06, "loss": 0.9898, "step": 3682 }, { "epoch": 0.9855499063419856, "grad_norm": 3.184781312942505, "learning_rate": 9.420173235042825e-06, "loss": 1.0923, "step": 3683 }, { "epoch": 0.9858175006689858, "grad_norm": 3.448216199874878, "learning_rate": 9.419759518611704e-06, "loss": 1.1623, "step": 3684 }, { "epoch": 0.9860850949959861, "grad_norm": 3.177147626876831, "learning_rate": 9.419345663727805e-06, "loss": 1.0076, "step": 3685 }, { "epoch": 0.9863526893229864, "grad_norm": 3.480036497116089, "learning_rate": 9.418931670404096e-06, "loss": 1.1148, "step": 3686 }, { "epoch": 0.9866202836499867, "grad_norm": 3.5168023109436035, "learning_rate": 9.418517538653541e-06, "loss": 1.1358, "step": 3687 }, { "epoch": 0.9868878779769868, "grad_norm": 3.518444061279297, "learning_rate": 9.41810326848912e-06, "loss": 1.1001, "step": 3688 }, { "epoch": 0.9871554723039871, "grad_norm": 3.6086690425872803, "learning_rate": 9.417688859923803e-06, "loss": 1.2808, "step": 3689 }, { "epoch": 0.9874230666309874, "grad_norm": 3.5940322875976562, "learning_rate": 9.417274312970574e-06, "loss": 1.1559, "step": 3690 }, { "epoch": 0.9876906609579877, "grad_norm": 3.6169145107269287, "learning_rate": 9.416859627642421e-06, "loss": 1.1886, "step": 3691 }, { "epoch": 0.9879582552849879, "grad_norm": 3.5202441215515137, "learning_rate": 9.416444803952334e-06, "loss": 1.2057, "step": 3692 }, { "epoch": 0.9882258496119882, "grad_norm": 3.2127108573913574, "learning_rate": 9.416029841913306e-06, "loss": 0.9903, "step": 3693 }, { "epoch": 0.9884934439389885, "grad_norm": 3.4693965911865234, "learning_rate": 9.415614741538334e-06, "loss": 1.1354, "step": 3694 }, { "epoch": 0.9887610382659887, "grad_norm": 3.5482194423675537, "learning_rate": 9.415199502840428e-06, "loss": 1.2641, "step": 3695 }, { "epoch": 0.989028632592989, "grad_norm": 2.904871702194214, "learning_rate": 9.414784125832592e-06, "loss": 1.0245, "step": 3696 }, { "epoch": 0.9892962269199893, "grad_norm": 3.295982599258423, "learning_rate": 9.414368610527835e-06, "loss": 1.1042, "step": 3697 }, { "epoch": 0.9895638212469896, "grad_norm": 3.2028210163116455, "learning_rate": 9.413952956939179e-06, "loss": 1.1382, "step": 3698 }, { "epoch": 0.9898314155739898, "grad_norm": 3.677734136581421, "learning_rate": 9.413537165079642e-06, "loss": 1.3448, "step": 3699 }, { "epoch": 0.9900990099009901, "grad_norm": 3.27496337890625, "learning_rate": 9.413121234962248e-06, "loss": 1.2098, "step": 3700 }, { "epoch": 0.9903666042279904, "grad_norm": 3.1553688049316406, "learning_rate": 9.412705166600026e-06, "loss": 1.1474, "step": 3701 }, { "epoch": 0.9906341985549907, "grad_norm": 3.2582755088806152, "learning_rate": 9.412288960006016e-06, "loss": 1.0499, "step": 3702 }, { "epoch": 0.9909017928819909, "grad_norm": 3.174191474914551, "learning_rate": 9.411872615193247e-06, "loss": 1.1505, "step": 3703 }, { "epoch": 0.9911693872089912, "grad_norm": 3.137990951538086, "learning_rate": 9.411456132174768e-06, "loss": 1.0766, "step": 3704 }, { "epoch": 0.9914369815359915, "grad_norm": 3.366492748260498, "learning_rate": 9.411039510963622e-06, "loss": 1.265, "step": 3705 }, { "epoch": 0.9917045758629917, "grad_norm": 3.27744460105896, "learning_rate": 9.41062275157286e-06, "loss": 1.0141, "step": 3706 }, { "epoch": 0.991972170189992, "grad_norm": 3.3313217163085938, "learning_rate": 9.410205854015542e-06, "loss": 1.1903, "step": 3707 }, { "epoch": 0.9922397645169923, "grad_norm": 3.281294584274292, "learning_rate": 9.409788818304722e-06, "loss": 1.164, "step": 3708 }, { "epoch": 0.9925073588439925, "grad_norm": 3.4108831882476807, "learning_rate": 9.409371644453467e-06, "loss": 1.0759, "step": 3709 }, { "epoch": 0.9927749531709927, "grad_norm": 2.919935703277588, "learning_rate": 9.408954332474845e-06, "loss": 1.0233, "step": 3710 }, { "epoch": 0.993042547497993, "grad_norm": 3.332352876663208, "learning_rate": 9.408536882381928e-06, "loss": 1.0486, "step": 3711 }, { "epoch": 0.9933101418249933, "grad_norm": 3.033339023590088, "learning_rate": 9.408119294187791e-06, "loss": 0.9737, "step": 3712 }, { "epoch": 0.9935777361519936, "grad_norm": 3.5248947143554688, "learning_rate": 9.40770156790552e-06, "loss": 1.2238, "step": 3713 }, { "epoch": 0.9938453304789938, "grad_norm": 3.687695026397705, "learning_rate": 9.407283703548198e-06, "loss": 1.1228, "step": 3714 }, { "epoch": 0.9941129248059941, "grad_norm": 3.2966973781585693, "learning_rate": 9.406865701128916e-06, "loss": 1.0991, "step": 3715 }, { "epoch": 0.9943805191329944, "grad_norm": 3.3037948608398438, "learning_rate": 9.406447560660764e-06, "loss": 1.1622, "step": 3716 }, { "epoch": 0.9946481134599946, "grad_norm": 3.2481751441955566, "learning_rate": 9.40602928215685e-06, "loss": 1.0621, "step": 3717 }, { "epoch": 0.9949157077869949, "grad_norm": 3.3406388759613037, "learning_rate": 9.405610865630266e-06, "loss": 1.1471, "step": 3718 }, { "epoch": 0.9951833021139952, "grad_norm": 3.2971763610839844, "learning_rate": 9.405192311094126e-06, "loss": 1.1473, "step": 3719 }, { "epoch": 0.9954508964409955, "grad_norm": 3.1722514629364014, "learning_rate": 9.40477361856154e-06, "loss": 1.0188, "step": 3720 }, { "epoch": 0.9957184907679957, "grad_norm": 3.5597033500671387, "learning_rate": 9.404354788045624e-06, "loss": 1.1735, "step": 3721 }, { "epoch": 0.995986085094996, "grad_norm": 3.2064480781555176, "learning_rate": 9.403935819559497e-06, "loss": 1.1026, "step": 3722 }, { "epoch": 0.9962536794219963, "grad_norm": 3.40864896774292, "learning_rate": 9.403516713116286e-06, "loss": 1.2236, "step": 3723 }, { "epoch": 0.9965212737489966, "grad_norm": 3.1237740516662598, "learning_rate": 9.403097468729117e-06, "loss": 1.0975, "step": 3724 }, { "epoch": 0.9967888680759968, "grad_norm": 3.1500935554504395, "learning_rate": 9.402678086411125e-06, "loss": 1.0771, "step": 3725 }, { "epoch": 0.9970564624029971, "grad_norm": 3.1867852210998535, "learning_rate": 9.402258566175448e-06, "loss": 0.9686, "step": 3726 }, { "epoch": 0.9973240567299974, "grad_norm": 3.5945894718170166, "learning_rate": 9.401838908035226e-06, "loss": 1.2021, "step": 3727 }, { "epoch": 0.9975916510569975, "grad_norm": 3.2258009910583496, "learning_rate": 9.401419112003607e-06, "loss": 1.1979, "step": 3728 }, { "epoch": 0.9978592453839978, "grad_norm": 3.2605550289154053, "learning_rate": 9.40099917809374e-06, "loss": 1.1081, "step": 3729 }, { "epoch": 0.9981268397109981, "grad_norm": 3.4050374031066895, "learning_rate": 9.400579106318781e-06, "loss": 1.1763, "step": 3730 }, { "epoch": 0.9983944340379984, "grad_norm": 3.2462353706359863, "learning_rate": 9.400158896691887e-06, "loss": 1.0444, "step": 3731 }, { "epoch": 0.9986620283649986, "grad_norm": 3.2882609367370605, "learning_rate": 9.399738549226226e-06, "loss": 1.0605, "step": 3732 }, { "epoch": 0.9989296226919989, "grad_norm": 3.3432724475860596, "learning_rate": 9.399318063934959e-06, "loss": 1.109, "step": 3733 }, { "epoch": 0.9991972170189992, "grad_norm": 3.5138442516326904, "learning_rate": 9.398897440831263e-06, "loss": 1.2697, "step": 3734 }, { "epoch": 0.9994648113459995, "grad_norm": 3.237004280090332, "learning_rate": 9.398476679928313e-06, "loss": 1.0746, "step": 3735 }, { "epoch": 0.9997324056729997, "grad_norm": 3.4802517890930176, "learning_rate": 9.39805578123929e-06, "loss": 1.1824, "step": 3736 }, { "epoch": 1.0, "grad_norm": 3.3513879776000977, "learning_rate": 9.39763474477738e-06, "loss": 1.1062, "step": 3737 } ], "logging_steps": 1.0, "max_steps": 18685, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2477154653844275e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }