{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 16.454624484391783, "learning_rate": 4.132231404958678e-08, "loss": 2.6798, "step": 1 }, { "epoch": 0.0, "grad_norm": 15.786174378520965, "learning_rate": 8.264462809917357e-08, "loss": 2.423, "step": 2 }, { "epoch": 0.0, "grad_norm": 15.481362581938647, "learning_rate": 1.2396694214876034e-07, "loss": 2.7942, "step": 3 }, { "epoch": 0.0, "grad_norm": 18.500586414289728, "learning_rate": 1.6528925619834713e-07, "loss": 2.532, "step": 4 }, { "epoch": 0.0, "grad_norm": 17.658919116301494, "learning_rate": 2.066115702479339e-07, "loss": 2.5275, "step": 5 }, { "epoch": 0.0, "grad_norm": 18.647280815498746, "learning_rate": 2.4793388429752067e-07, "loss": 2.5449, "step": 6 }, { "epoch": 0.0, "grad_norm": 15.537629434419038, "learning_rate": 2.892561983471075e-07, "loss": 2.5578, "step": 7 }, { "epoch": 0.0, "grad_norm": 18.32412091275387, "learning_rate": 3.3057851239669426e-07, "loss": 2.6483, "step": 8 }, { "epoch": 0.0, "grad_norm": 17.647885549222284, "learning_rate": 3.7190082644628103e-07, "loss": 2.8688, "step": 9 }, { "epoch": 0.0, "grad_norm": 15.159830314133835, "learning_rate": 4.132231404958678e-07, "loss": 2.565, "step": 10 }, { "epoch": 0.0, "grad_norm": 16.270458591824724, "learning_rate": 4.5454545454545457e-07, "loss": 2.6692, "step": 11 }, { "epoch": 0.0, "grad_norm": 16.8537247674438, "learning_rate": 4.958677685950413e-07, "loss": 2.5663, "step": 12 }, { "epoch": 0.0, "grad_norm": 14.2130512168258, "learning_rate": 5.371900826446281e-07, "loss": 2.4112, "step": 13 }, { "epoch": 0.0, "grad_norm": 14.933711652785474, "learning_rate": 5.78512396694215e-07, "loss": 2.3863, "step": 14 }, { "epoch": 0.0, "grad_norm": 14.632205198210762, "learning_rate": 6.198347107438018e-07, "loss": 2.5186, "step": 15 }, { "epoch": 0.0, "grad_norm": 15.363217338862972, "learning_rate": 6.611570247933885e-07, "loss": 2.5862, "step": 16 }, { "epoch": 0.0, "grad_norm": 16.834857340465213, "learning_rate": 7.024793388429753e-07, "loss": 2.5648, "step": 17 }, { "epoch": 0.0, "grad_norm": 16.363595622010816, "learning_rate": 7.438016528925621e-07, "loss": 2.4753, "step": 18 }, { "epoch": 0.0, "grad_norm": 17.267860655442604, "learning_rate": 7.851239669421488e-07, "loss": 2.5054, "step": 19 }, { "epoch": 0.0, "grad_norm": 12.604179585703752, "learning_rate": 8.264462809917356e-07, "loss": 2.5062, "step": 20 }, { "epoch": 0.0, "grad_norm": 16.351447932471427, "learning_rate": 8.677685950413224e-07, "loss": 2.6843, "step": 21 }, { "epoch": 0.0, "grad_norm": 16.8396777048417, "learning_rate": 9.090909090909091e-07, "loss": 2.6492, "step": 22 }, { "epoch": 0.0, "grad_norm": 16.328510841205038, "learning_rate": 9.50413223140496e-07, "loss": 2.5997, "step": 23 }, { "epoch": 0.0, "grad_norm": 14.234858891976334, "learning_rate": 9.917355371900827e-07, "loss": 2.5285, "step": 24 }, { "epoch": 0.0, "grad_norm": 15.202384972007433, "learning_rate": 1.0330578512396695e-06, "loss": 2.2785, "step": 25 }, { "epoch": 0.0, "grad_norm": 15.882883627713342, "learning_rate": 1.0743801652892562e-06, "loss": 2.4016, "step": 26 }, { "epoch": 0.0, "grad_norm": 13.70005110189558, "learning_rate": 1.115702479338843e-06, "loss": 2.4585, "step": 27 }, { "epoch": 0.0, "grad_norm": 12.262161528698979, "learning_rate": 1.15702479338843e-06, "loss": 2.1022, "step": 28 }, { "epoch": 0.0, "grad_norm": 12.73901544752808, "learning_rate": 1.1983471074380167e-06, "loss": 2.2474, "step": 29 }, { "epoch": 0.0, "grad_norm": 14.310583315638448, "learning_rate": 1.2396694214876035e-06, "loss": 2.176, "step": 30 }, { "epoch": 0.0, "grad_norm": 12.87064366240795, "learning_rate": 1.28099173553719e-06, "loss": 2.3066, "step": 31 }, { "epoch": 0.0, "grad_norm": 14.863278225676664, "learning_rate": 1.322314049586777e-06, "loss": 2.168, "step": 32 }, { "epoch": 0.0, "grad_norm": 12.191386393699187, "learning_rate": 1.3636363636363636e-06, "loss": 2.1098, "step": 33 }, { "epoch": 0.0, "grad_norm": 11.899584915617622, "learning_rate": 1.4049586776859506e-06, "loss": 1.9844, "step": 34 }, { "epoch": 0.0, "grad_norm": 12.515912781255654, "learning_rate": 1.4462809917355372e-06, "loss": 2.1423, "step": 35 }, { "epoch": 0.0, "grad_norm": 11.414189243709481, "learning_rate": 1.4876033057851241e-06, "loss": 1.9862, "step": 36 }, { "epoch": 0.0, "grad_norm": 11.813814719448414, "learning_rate": 1.5289256198347107e-06, "loss": 2.1121, "step": 37 }, { "epoch": 0.0, "grad_norm": 9.662052683301567, "learning_rate": 1.5702479338842977e-06, "loss": 2.0836, "step": 38 }, { "epoch": 0.0, "grad_norm": 13.371775934295181, "learning_rate": 1.6115702479338842e-06, "loss": 2.0762, "step": 39 }, { "epoch": 0.0, "grad_norm": 13.44445119629881, "learning_rate": 1.6528925619834712e-06, "loss": 2.1513, "step": 40 }, { "epoch": 0.01, "grad_norm": 11.371926643972962, "learning_rate": 1.694214876033058e-06, "loss": 2.3268, "step": 41 }, { "epoch": 0.01, "grad_norm": 13.077606716681487, "learning_rate": 1.7355371900826448e-06, "loss": 2.1095, "step": 42 }, { "epoch": 0.01, "grad_norm": 11.492530409327221, "learning_rate": 1.7768595041322315e-06, "loss": 2.1203, "step": 43 }, { "epoch": 0.01, "grad_norm": 14.224874418144086, "learning_rate": 1.8181818181818183e-06, "loss": 2.2027, "step": 44 }, { "epoch": 0.01, "grad_norm": 11.104895999759943, "learning_rate": 1.859504132231405e-06, "loss": 2.1667, "step": 45 }, { "epoch": 0.01, "grad_norm": 10.879211062152587, "learning_rate": 1.900826446280992e-06, "loss": 2.4429, "step": 46 }, { "epoch": 0.01, "grad_norm": 11.014417267235558, "learning_rate": 1.9421487603305786e-06, "loss": 2.0897, "step": 47 }, { "epoch": 0.01, "grad_norm": 20.413102153236586, "learning_rate": 1.9834710743801654e-06, "loss": 2.1584, "step": 48 }, { "epoch": 0.01, "grad_norm": 9.986385391009275, "learning_rate": 2.024793388429752e-06, "loss": 1.8575, "step": 49 }, { "epoch": 0.01, "grad_norm": 10.339592046237641, "learning_rate": 2.066115702479339e-06, "loss": 2.0581, "step": 50 }, { "epoch": 0.01, "grad_norm": 12.496953475748992, "learning_rate": 2.1074380165289257e-06, "loss": 2.1641, "step": 51 }, { "epoch": 0.01, "grad_norm": 10.501132598388304, "learning_rate": 2.1487603305785124e-06, "loss": 1.9484, "step": 52 }, { "epoch": 0.01, "grad_norm": 10.137152044981377, "learning_rate": 2.1900826446280992e-06, "loss": 1.9784, "step": 53 }, { "epoch": 0.01, "grad_norm": 9.335632763235502, "learning_rate": 2.231404958677686e-06, "loss": 2.0883, "step": 54 }, { "epoch": 0.01, "grad_norm": 9.788072781163752, "learning_rate": 2.2727272727272728e-06, "loss": 2.0923, "step": 55 }, { "epoch": 0.01, "grad_norm": 10.429725366559552, "learning_rate": 2.31404958677686e-06, "loss": 2.0792, "step": 56 }, { "epoch": 0.01, "grad_norm": 9.774931834381437, "learning_rate": 2.3553719008264463e-06, "loss": 2.3172, "step": 57 }, { "epoch": 0.01, "grad_norm": 8.905485254280343, "learning_rate": 2.3966942148760335e-06, "loss": 2.0839, "step": 58 }, { "epoch": 0.01, "grad_norm": 10.097102358328423, "learning_rate": 2.43801652892562e-06, "loss": 2.1117, "step": 59 }, { "epoch": 0.01, "grad_norm": 9.88536387021511, "learning_rate": 2.479338842975207e-06, "loss": 2.0174, "step": 60 }, { "epoch": 0.01, "grad_norm": 10.011562757672124, "learning_rate": 2.5206611570247934e-06, "loss": 1.9346, "step": 61 }, { "epoch": 0.01, "grad_norm": 11.14506995264367, "learning_rate": 2.56198347107438e-06, "loss": 1.9979, "step": 62 }, { "epoch": 0.01, "grad_norm": 13.515314511273361, "learning_rate": 2.6033057851239673e-06, "loss": 2.286, "step": 63 }, { "epoch": 0.01, "grad_norm": 11.132209403555175, "learning_rate": 2.644628099173554e-06, "loss": 2.0499, "step": 64 }, { "epoch": 0.01, "grad_norm": 8.974588117411214, "learning_rate": 2.6859504132231405e-06, "loss": 1.8848, "step": 65 }, { "epoch": 0.01, "grad_norm": 9.00357797509728, "learning_rate": 2.7272727272727272e-06, "loss": 2.1155, "step": 66 }, { "epoch": 0.01, "grad_norm": 9.245075287581058, "learning_rate": 2.7685950413223144e-06, "loss": 1.9979, "step": 67 }, { "epoch": 0.01, "grad_norm": 10.100004512179483, "learning_rate": 2.809917355371901e-06, "loss": 2.1364, "step": 68 }, { "epoch": 0.01, "grad_norm": 9.389531780751923, "learning_rate": 2.851239669421488e-06, "loss": 2.0023, "step": 69 }, { "epoch": 0.01, "grad_norm": 10.481080611827874, "learning_rate": 2.8925619834710743e-06, "loss": 1.9649, "step": 70 }, { "epoch": 0.01, "grad_norm": 10.142011053286225, "learning_rate": 2.9338842975206615e-06, "loss": 2.0329, "step": 71 }, { "epoch": 0.01, "grad_norm": 10.425395823225413, "learning_rate": 2.9752066115702483e-06, "loss": 1.9477, "step": 72 }, { "epoch": 0.01, "grad_norm": 10.926886208979898, "learning_rate": 3.016528925619835e-06, "loss": 2.0419, "step": 73 }, { "epoch": 0.01, "grad_norm": 10.438528794323524, "learning_rate": 3.0578512396694214e-06, "loss": 1.8438, "step": 74 }, { "epoch": 0.01, "grad_norm": 9.229835234165565, "learning_rate": 3.0991735537190086e-06, "loss": 2.0368, "step": 75 }, { "epoch": 0.01, "grad_norm": 12.162843703283269, "learning_rate": 3.1404958677685953e-06, "loss": 1.9366, "step": 76 }, { "epoch": 0.01, "grad_norm": 10.579765033094825, "learning_rate": 3.181818181818182e-06, "loss": 2.3397, "step": 77 }, { "epoch": 0.01, "grad_norm": 9.41405684057134, "learning_rate": 3.2231404958677685e-06, "loss": 2.1514, "step": 78 }, { "epoch": 0.01, "grad_norm": 11.093827090796587, "learning_rate": 3.264462809917356e-06, "loss": 2.0118, "step": 79 }, { "epoch": 0.01, "grad_norm": 17.205740835879105, "learning_rate": 3.3057851239669424e-06, "loss": 2.0838, "step": 80 }, { "epoch": 0.01, "grad_norm": 10.339602308190535, "learning_rate": 3.347107438016529e-06, "loss": 1.8997, "step": 81 }, { "epoch": 0.01, "grad_norm": 10.415060893531479, "learning_rate": 3.388429752066116e-06, "loss": 1.8997, "step": 82 }, { "epoch": 0.01, "grad_norm": 11.454381836080685, "learning_rate": 3.429752066115703e-06, "loss": 1.9474, "step": 83 }, { "epoch": 0.01, "grad_norm": 11.631557844240854, "learning_rate": 3.4710743801652895e-06, "loss": 2.0227, "step": 84 }, { "epoch": 0.01, "grad_norm": 8.904743270038985, "learning_rate": 3.5123966942148763e-06, "loss": 1.8716, "step": 85 }, { "epoch": 0.01, "grad_norm": 8.967741574183623, "learning_rate": 3.553719008264463e-06, "loss": 1.8722, "step": 86 }, { "epoch": 0.01, "grad_norm": 15.907260060661063, "learning_rate": 3.5950413223140502e-06, "loss": 1.9778, "step": 87 }, { "epoch": 0.01, "grad_norm": 10.13650504618456, "learning_rate": 3.6363636363636366e-06, "loss": 1.7392, "step": 88 }, { "epoch": 0.01, "grad_norm": 28.387071126416025, "learning_rate": 3.6776859504132234e-06, "loss": 1.7626, "step": 89 }, { "epoch": 0.01, "grad_norm": 9.784265322455552, "learning_rate": 3.71900826446281e-06, "loss": 1.8647, "step": 90 }, { "epoch": 0.01, "grad_norm": 9.07344673074511, "learning_rate": 3.7603305785123973e-06, "loss": 2.1762, "step": 91 }, { "epoch": 0.01, "grad_norm": 10.356721226573415, "learning_rate": 3.801652892561984e-06, "loss": 1.8888, "step": 92 }, { "epoch": 0.01, "grad_norm": 9.669076688587314, "learning_rate": 3.842975206611571e-06, "loss": 2.0368, "step": 93 }, { "epoch": 0.01, "grad_norm": 10.33806776716408, "learning_rate": 3.884297520661157e-06, "loss": 1.6224, "step": 94 }, { "epoch": 0.01, "grad_norm": 10.703869983697253, "learning_rate": 3.925619834710744e-06, "loss": 2.1649, "step": 95 }, { "epoch": 0.01, "grad_norm": 9.21676103995682, "learning_rate": 3.966942148760331e-06, "loss": 1.912, "step": 96 }, { "epoch": 0.01, "grad_norm": 9.304648984198877, "learning_rate": 4.008264462809918e-06, "loss": 2.0468, "step": 97 }, { "epoch": 0.01, "grad_norm": 9.757073973492082, "learning_rate": 4.049586776859504e-06, "loss": 1.8934, "step": 98 }, { "epoch": 0.01, "grad_norm": 10.967857139071667, "learning_rate": 4.0909090909090915e-06, "loss": 1.9981, "step": 99 }, { "epoch": 0.01, "grad_norm": 8.982602893767018, "learning_rate": 4.132231404958678e-06, "loss": 2.0203, "step": 100 }, { "epoch": 0.01, "grad_norm": 10.721810384423303, "learning_rate": 4.173553719008265e-06, "loss": 2.159, "step": 101 }, { "epoch": 0.01, "grad_norm": 11.001336095617303, "learning_rate": 4.214876033057851e-06, "loss": 2.0261, "step": 102 }, { "epoch": 0.01, "grad_norm": 9.762118718504238, "learning_rate": 4.2561983471074386e-06, "loss": 1.8778, "step": 103 }, { "epoch": 0.01, "grad_norm": 13.619122635624729, "learning_rate": 4.297520661157025e-06, "loss": 1.772, "step": 104 }, { "epoch": 0.01, "grad_norm": 10.681708861980265, "learning_rate": 4.338842975206612e-06, "loss": 1.8257, "step": 105 }, { "epoch": 0.01, "grad_norm": 9.79520526176123, "learning_rate": 4.3801652892561984e-06, "loss": 1.867, "step": 106 }, { "epoch": 0.01, "grad_norm": 9.98799625787614, "learning_rate": 4.421487603305786e-06, "loss": 1.8603, "step": 107 }, { "epoch": 0.01, "grad_norm": 11.187200271275401, "learning_rate": 4.462809917355372e-06, "loss": 1.8464, "step": 108 }, { "epoch": 0.01, "grad_norm": 11.290733860805291, "learning_rate": 4.504132231404959e-06, "loss": 2.2186, "step": 109 }, { "epoch": 0.01, "grad_norm": 9.569680751830427, "learning_rate": 4.5454545454545455e-06, "loss": 2.1155, "step": 110 }, { "epoch": 0.01, "grad_norm": 11.631141779596886, "learning_rate": 4.586776859504133e-06, "loss": 2.0806, "step": 111 }, { "epoch": 0.01, "grad_norm": 9.717278449640162, "learning_rate": 4.62809917355372e-06, "loss": 1.9115, "step": 112 }, { "epoch": 0.01, "grad_norm": 12.50017643399969, "learning_rate": 4.669421487603306e-06, "loss": 1.6472, "step": 113 }, { "epoch": 0.01, "grad_norm": 10.705469706783939, "learning_rate": 4.710743801652893e-06, "loss": 2.1605, "step": 114 }, { "epoch": 0.01, "grad_norm": 10.454693390980797, "learning_rate": 4.75206611570248e-06, "loss": 1.7306, "step": 115 }, { "epoch": 0.01, "grad_norm": 9.142208220120457, "learning_rate": 4.793388429752067e-06, "loss": 1.955, "step": 116 }, { "epoch": 0.01, "grad_norm": 9.07558010237617, "learning_rate": 4.834710743801653e-06, "loss": 1.7816, "step": 117 }, { "epoch": 0.01, "grad_norm": 9.494585425112305, "learning_rate": 4.87603305785124e-06, "loss": 2.0595, "step": 118 }, { "epoch": 0.01, "grad_norm": 9.883477612107527, "learning_rate": 4.917355371900827e-06, "loss": 1.8752, "step": 119 }, { "epoch": 0.01, "grad_norm": 10.566402311020358, "learning_rate": 4.958677685950414e-06, "loss": 1.8757, "step": 120 }, { "epoch": 0.02, "grad_norm": 9.24699322840026, "learning_rate": 5e-06, "loss": 2.0521, "step": 121 }, { "epoch": 0.02, "grad_norm": 11.584534399098613, "learning_rate": 5.041322314049587e-06, "loss": 1.8947, "step": 122 }, { "epoch": 0.02, "grad_norm": 10.03552074546801, "learning_rate": 5.082644628099174e-06, "loss": 2.1106, "step": 123 }, { "epoch": 0.02, "grad_norm": 9.436875350006817, "learning_rate": 5.12396694214876e-06, "loss": 1.8705, "step": 124 }, { "epoch": 0.02, "grad_norm": 9.000043929607253, "learning_rate": 5.165289256198347e-06, "loss": 1.8069, "step": 125 }, { "epoch": 0.02, "grad_norm": 10.233655865222776, "learning_rate": 5.206611570247935e-06, "loss": 2.0789, "step": 126 }, { "epoch": 0.02, "grad_norm": 10.085952812230992, "learning_rate": 5.247933884297521e-06, "loss": 1.9078, "step": 127 }, { "epoch": 0.02, "grad_norm": 20.099089162162926, "learning_rate": 5.289256198347108e-06, "loss": 1.881, "step": 128 }, { "epoch": 0.02, "grad_norm": 9.929070264504835, "learning_rate": 5.3305785123966946e-06, "loss": 1.9741, "step": 129 }, { "epoch": 0.02, "grad_norm": 10.373397879981596, "learning_rate": 5.371900826446281e-06, "loss": 1.915, "step": 130 }, { "epoch": 0.02, "grad_norm": 8.494652888289481, "learning_rate": 5.413223140495868e-06, "loss": 1.6946, "step": 131 }, { "epoch": 0.02, "grad_norm": 9.327541804908451, "learning_rate": 5.4545454545454545e-06, "loss": 1.8572, "step": 132 }, { "epoch": 0.02, "grad_norm": 10.734678288591631, "learning_rate": 5.495867768595042e-06, "loss": 2.0182, "step": 133 }, { "epoch": 0.02, "grad_norm": 13.067698348116803, "learning_rate": 5.537190082644629e-06, "loss": 1.9689, "step": 134 }, { "epoch": 0.02, "grad_norm": 11.987849889081295, "learning_rate": 5.578512396694216e-06, "loss": 2.0062, "step": 135 }, { "epoch": 0.02, "grad_norm": 9.251221696765837, "learning_rate": 5.619834710743802e-06, "loss": 1.8437, "step": 136 }, { "epoch": 0.02, "grad_norm": 11.041930900683525, "learning_rate": 5.661157024793389e-06, "loss": 2.0402, "step": 137 }, { "epoch": 0.02, "grad_norm": 9.407359767603225, "learning_rate": 5.702479338842976e-06, "loss": 1.6146, "step": 138 }, { "epoch": 0.02, "grad_norm": 9.813638761843453, "learning_rate": 5.743801652892562e-06, "loss": 1.9814, "step": 139 }, { "epoch": 0.02, "grad_norm": 9.97269567329318, "learning_rate": 5.785123966942149e-06, "loss": 2.0552, "step": 140 }, { "epoch": 0.02, "grad_norm": 9.036598832899655, "learning_rate": 5.826446280991736e-06, "loss": 1.9966, "step": 141 }, { "epoch": 0.02, "grad_norm": 10.088735503460502, "learning_rate": 5.867768595041323e-06, "loss": 2.0802, "step": 142 }, { "epoch": 0.02, "grad_norm": 9.400271279704414, "learning_rate": 5.90909090909091e-06, "loss": 1.7774, "step": 143 }, { "epoch": 0.02, "grad_norm": 9.787385766139383, "learning_rate": 5.9504132231404965e-06, "loss": 2.054, "step": 144 }, { "epoch": 0.02, "grad_norm": 10.671912769081132, "learning_rate": 5.991735537190083e-06, "loss": 2.0302, "step": 145 }, { "epoch": 0.02, "grad_norm": 8.479522486728339, "learning_rate": 6.03305785123967e-06, "loss": 1.7985, "step": 146 }, { "epoch": 0.02, "grad_norm": 9.316965714106725, "learning_rate": 6.074380165289256e-06, "loss": 1.9866, "step": 147 }, { "epoch": 0.02, "grad_norm": 8.946286631539332, "learning_rate": 6.115702479338843e-06, "loss": 2.1031, "step": 148 }, { "epoch": 0.02, "grad_norm": 9.764245419869107, "learning_rate": 6.15702479338843e-06, "loss": 2.0184, "step": 149 }, { "epoch": 0.02, "grad_norm": 8.181143521118045, "learning_rate": 6.198347107438017e-06, "loss": 1.7136, "step": 150 }, { "epoch": 0.02, "grad_norm": 9.182895479345817, "learning_rate": 6.239669421487604e-06, "loss": 1.7436, "step": 151 }, { "epoch": 0.02, "grad_norm": 9.986729673689625, "learning_rate": 6.280991735537191e-06, "loss": 1.9897, "step": 152 }, { "epoch": 0.02, "grad_norm": 8.465501196495273, "learning_rate": 6.322314049586777e-06, "loss": 2.0927, "step": 153 }, { "epoch": 0.02, "grad_norm": 9.436835701005968, "learning_rate": 6.363636363636364e-06, "loss": 1.9391, "step": 154 }, { "epoch": 0.02, "grad_norm": 15.936407965958205, "learning_rate": 6.404958677685951e-06, "loss": 2.1614, "step": 155 }, { "epoch": 0.02, "grad_norm": 9.034435862682608, "learning_rate": 6.446280991735537e-06, "loss": 1.9044, "step": 156 }, { "epoch": 0.02, "grad_norm": 8.937628424599243, "learning_rate": 6.487603305785124e-06, "loss": 1.9135, "step": 157 }, { "epoch": 0.02, "grad_norm": 10.18685440049138, "learning_rate": 6.528925619834712e-06, "loss": 2.1853, "step": 158 }, { "epoch": 0.02, "grad_norm": 10.714096555662632, "learning_rate": 6.5702479338842985e-06, "loss": 1.9491, "step": 159 }, { "epoch": 0.02, "grad_norm": 9.692423761466083, "learning_rate": 6.611570247933885e-06, "loss": 1.8784, "step": 160 }, { "epoch": 0.02, "grad_norm": 11.112340505351206, "learning_rate": 6.652892561983472e-06, "loss": 2.0274, "step": 161 }, { "epoch": 0.02, "grad_norm": 8.059527025115466, "learning_rate": 6.694214876033058e-06, "loss": 1.9376, "step": 162 }, { "epoch": 0.02, "grad_norm": 9.718026546550432, "learning_rate": 6.735537190082645e-06, "loss": 2.192, "step": 163 }, { "epoch": 0.02, "grad_norm": 8.331583089987838, "learning_rate": 6.776859504132232e-06, "loss": 1.7257, "step": 164 }, { "epoch": 0.02, "grad_norm": 9.164628609747433, "learning_rate": 6.818181818181818e-06, "loss": 1.6946, "step": 165 }, { "epoch": 0.02, "grad_norm": 9.61166181213478, "learning_rate": 6.859504132231406e-06, "loss": 1.8074, "step": 166 }, { "epoch": 0.02, "grad_norm": 10.384371046682721, "learning_rate": 6.900826446280993e-06, "loss": 2.0419, "step": 167 }, { "epoch": 0.02, "grad_norm": 8.42691646637082, "learning_rate": 6.942148760330579e-06, "loss": 1.9259, "step": 168 }, { "epoch": 0.02, "grad_norm": 9.562999824875565, "learning_rate": 6.983471074380166e-06, "loss": 1.7367, "step": 169 }, { "epoch": 0.02, "grad_norm": 11.078817229570127, "learning_rate": 7.0247933884297525e-06, "loss": 1.9838, "step": 170 }, { "epoch": 0.02, "grad_norm": 8.778586812883367, "learning_rate": 7.066115702479339e-06, "loss": 1.9553, "step": 171 }, { "epoch": 0.02, "grad_norm": 10.224164769616243, "learning_rate": 7.107438016528926e-06, "loss": 1.8952, "step": 172 }, { "epoch": 0.02, "grad_norm": 9.042706647090064, "learning_rate": 7.1487603305785124e-06, "loss": 1.9499, "step": 173 }, { "epoch": 0.02, "grad_norm": 8.577763985675142, "learning_rate": 7.1900826446281005e-06, "loss": 1.9329, "step": 174 }, { "epoch": 0.02, "grad_norm": 8.949561436702313, "learning_rate": 7.231404958677687e-06, "loss": 1.8513, "step": 175 }, { "epoch": 0.02, "grad_norm": 10.385796743381105, "learning_rate": 7.272727272727273e-06, "loss": 1.9724, "step": 176 }, { "epoch": 0.02, "grad_norm": 9.77173743414977, "learning_rate": 7.31404958677686e-06, "loss": 1.9701, "step": 177 }, { "epoch": 0.02, "grad_norm": 9.110305246457026, "learning_rate": 7.355371900826447e-06, "loss": 1.8387, "step": 178 }, { "epoch": 0.02, "grad_norm": 20.712198746733563, "learning_rate": 7.396694214876033e-06, "loss": 1.9116, "step": 179 }, { "epoch": 0.02, "grad_norm": 8.212240343278607, "learning_rate": 7.43801652892562e-06, "loss": 1.9868, "step": 180 }, { "epoch": 0.02, "grad_norm": 9.113793210406447, "learning_rate": 7.479338842975207e-06, "loss": 1.9642, "step": 181 }, { "epoch": 0.02, "grad_norm": 10.051962022891791, "learning_rate": 7.520661157024795e-06, "loss": 1.7574, "step": 182 }, { "epoch": 0.02, "grad_norm": 8.859552247080856, "learning_rate": 7.561983471074381e-06, "loss": 2.2245, "step": 183 }, { "epoch": 0.02, "grad_norm": 9.008635553665522, "learning_rate": 7.603305785123968e-06, "loss": 1.9634, "step": 184 }, { "epoch": 0.02, "grad_norm": 9.373360783934737, "learning_rate": 7.644628099173555e-06, "loss": 1.9576, "step": 185 }, { "epoch": 0.02, "grad_norm": 9.507224409603742, "learning_rate": 7.685950413223142e-06, "loss": 2.048, "step": 186 }, { "epoch": 0.02, "grad_norm": 8.539020166709617, "learning_rate": 7.727272727272727e-06, "loss": 1.7927, "step": 187 }, { "epoch": 0.02, "grad_norm": 9.789732777447142, "learning_rate": 7.768595041322314e-06, "loss": 2.0043, "step": 188 }, { "epoch": 0.02, "grad_norm": 9.15474463282987, "learning_rate": 7.809917355371902e-06, "loss": 1.9866, "step": 189 }, { "epoch": 0.02, "grad_norm": 8.65104826964854, "learning_rate": 7.851239669421489e-06, "loss": 1.9257, "step": 190 }, { "epoch": 0.02, "grad_norm": 9.546337331511713, "learning_rate": 7.892561983471076e-06, "loss": 1.8312, "step": 191 }, { "epoch": 0.02, "grad_norm": 9.21432842228729, "learning_rate": 7.933884297520661e-06, "loss": 1.8475, "step": 192 }, { "epoch": 0.02, "grad_norm": 9.234736074075107, "learning_rate": 7.975206611570249e-06, "loss": 1.8698, "step": 193 }, { "epoch": 0.02, "grad_norm": 8.681374925546985, "learning_rate": 8.016528925619836e-06, "loss": 1.9, "step": 194 }, { "epoch": 0.02, "grad_norm": 9.415307968608786, "learning_rate": 8.057851239669421e-06, "loss": 2.1109, "step": 195 }, { "epoch": 0.02, "grad_norm": 8.035137869253694, "learning_rate": 8.099173553719009e-06, "loss": 1.8642, "step": 196 }, { "epoch": 0.02, "grad_norm": 8.746480214309662, "learning_rate": 8.140495867768596e-06, "loss": 1.7914, "step": 197 }, { "epoch": 0.02, "grad_norm": 10.184285274183168, "learning_rate": 8.181818181818183e-06, "loss": 2.169, "step": 198 }, { "epoch": 0.02, "grad_norm": 8.861851372899286, "learning_rate": 8.22314049586777e-06, "loss": 1.7633, "step": 199 }, { "epoch": 0.02, "grad_norm": 8.815514947017528, "learning_rate": 8.264462809917356e-06, "loss": 2.0706, "step": 200 }, { "epoch": 0.03, "grad_norm": 11.063620734923807, "learning_rate": 8.305785123966943e-06, "loss": 1.9302, "step": 201 }, { "epoch": 0.03, "grad_norm": 10.689794216903778, "learning_rate": 8.34710743801653e-06, "loss": 1.9152, "step": 202 }, { "epoch": 0.03, "grad_norm": 10.767527073887285, "learning_rate": 8.388429752066116e-06, "loss": 1.9798, "step": 203 }, { "epoch": 0.03, "grad_norm": 10.940639283598138, "learning_rate": 8.429752066115703e-06, "loss": 2.1708, "step": 204 }, { "epoch": 0.03, "grad_norm": 9.727462788303134, "learning_rate": 8.47107438016529e-06, "loss": 2.0207, "step": 205 }, { "epoch": 0.03, "grad_norm": 8.228004165840023, "learning_rate": 8.512396694214877e-06, "loss": 1.921, "step": 206 }, { "epoch": 0.03, "grad_norm": 9.495665981202869, "learning_rate": 8.553719008264464e-06, "loss": 1.9397, "step": 207 }, { "epoch": 0.03, "grad_norm": 8.378646795481956, "learning_rate": 8.59504132231405e-06, "loss": 1.8191, "step": 208 }, { "epoch": 0.03, "grad_norm": 10.475841195590732, "learning_rate": 8.636363636363637e-06, "loss": 1.9756, "step": 209 }, { "epoch": 0.03, "grad_norm": 10.373371380629703, "learning_rate": 8.677685950413224e-06, "loss": 1.8886, "step": 210 }, { "epoch": 0.03, "grad_norm": 9.110432297738129, "learning_rate": 8.71900826446281e-06, "loss": 2.0162, "step": 211 }, { "epoch": 0.03, "grad_norm": 9.912590811646458, "learning_rate": 8.760330578512397e-06, "loss": 2.2097, "step": 212 }, { "epoch": 0.03, "grad_norm": 9.613005985353594, "learning_rate": 8.801652892561984e-06, "loss": 1.9196, "step": 213 }, { "epoch": 0.03, "grad_norm": 9.347466352071963, "learning_rate": 8.842975206611571e-06, "loss": 2.0565, "step": 214 }, { "epoch": 0.03, "grad_norm": 9.185940852440194, "learning_rate": 8.884297520661158e-06, "loss": 2.0218, "step": 215 }, { "epoch": 0.03, "grad_norm": 8.75015521406585, "learning_rate": 8.925619834710744e-06, "loss": 1.9208, "step": 216 }, { "epoch": 0.03, "grad_norm": 8.886863537874104, "learning_rate": 8.966942148760331e-06, "loss": 1.8278, "step": 217 }, { "epoch": 0.03, "grad_norm": 8.437359503548105, "learning_rate": 9.008264462809918e-06, "loss": 1.8828, "step": 218 }, { "epoch": 0.03, "grad_norm": 8.063342474328548, "learning_rate": 9.049586776859506e-06, "loss": 2.0073, "step": 219 }, { "epoch": 0.03, "grad_norm": 10.04497586701276, "learning_rate": 9.090909090909091e-06, "loss": 1.8687, "step": 220 }, { "epoch": 0.03, "grad_norm": 9.675601885661589, "learning_rate": 9.132231404958678e-06, "loss": 1.9724, "step": 221 }, { "epoch": 0.03, "grad_norm": 10.596860779495767, "learning_rate": 9.173553719008265e-06, "loss": 2.1465, "step": 222 }, { "epoch": 0.03, "grad_norm": 8.647018392148608, "learning_rate": 9.214876033057853e-06, "loss": 2.1107, "step": 223 }, { "epoch": 0.03, "grad_norm": 9.171017534256697, "learning_rate": 9.25619834710744e-06, "loss": 2.0252, "step": 224 }, { "epoch": 0.03, "grad_norm": 8.473203310222361, "learning_rate": 9.297520661157025e-06, "loss": 1.7761, "step": 225 }, { "epoch": 0.03, "grad_norm": 8.80685783234519, "learning_rate": 9.338842975206613e-06, "loss": 1.721, "step": 226 }, { "epoch": 0.03, "grad_norm": 9.187588603226734, "learning_rate": 9.3801652892562e-06, "loss": 2.0464, "step": 227 }, { "epoch": 0.03, "grad_norm": 8.528420694843955, "learning_rate": 9.421487603305785e-06, "loss": 1.5606, "step": 228 }, { "epoch": 0.03, "grad_norm": 9.572533289207882, "learning_rate": 9.462809917355372e-06, "loss": 1.6547, "step": 229 }, { "epoch": 0.03, "grad_norm": 9.168833075308061, "learning_rate": 9.50413223140496e-06, "loss": 1.96, "step": 230 }, { "epoch": 0.03, "grad_norm": 9.959027878858093, "learning_rate": 9.545454545454547e-06, "loss": 1.956, "step": 231 }, { "epoch": 0.03, "grad_norm": 9.298002793964347, "learning_rate": 9.586776859504134e-06, "loss": 2.0927, "step": 232 }, { "epoch": 0.03, "grad_norm": 9.102930950130755, "learning_rate": 9.62809917355372e-06, "loss": 1.908, "step": 233 }, { "epoch": 0.03, "grad_norm": 8.555532237767633, "learning_rate": 9.669421487603307e-06, "loss": 1.6747, "step": 234 }, { "epoch": 0.03, "grad_norm": 8.305910651940502, "learning_rate": 9.710743801652894e-06, "loss": 2.1246, "step": 235 }, { "epoch": 0.03, "grad_norm": 9.055935808999525, "learning_rate": 9.75206611570248e-06, "loss": 2.3411, "step": 236 }, { "epoch": 0.03, "grad_norm": 9.926934512934947, "learning_rate": 9.793388429752067e-06, "loss": 1.6767, "step": 237 }, { "epoch": 0.03, "grad_norm": 8.866240198178858, "learning_rate": 9.834710743801654e-06, "loss": 2.052, "step": 238 }, { "epoch": 0.03, "grad_norm": 9.382527663673262, "learning_rate": 9.876033057851241e-06, "loss": 1.8879, "step": 239 }, { "epoch": 0.03, "grad_norm": 9.206675605992933, "learning_rate": 9.917355371900828e-06, "loss": 1.9787, "step": 240 }, { "epoch": 0.03, "grad_norm": 7.732492415340666, "learning_rate": 9.958677685950414e-06, "loss": 1.8011, "step": 241 }, { "epoch": 0.03, "grad_norm": 11.949499651563832, "learning_rate": 1e-05, "loss": 1.9414, "step": 242 }, { "epoch": 0.03, "grad_norm": 8.506681894731017, "learning_rate": 9.999999594236213e-06, "loss": 1.9244, "step": 243 }, { "epoch": 0.03, "grad_norm": 8.426313705274087, "learning_rate": 9.999998376944914e-06, "loss": 1.7578, "step": 244 }, { "epoch": 0.03, "grad_norm": 8.335498953799902, "learning_rate": 9.999996348126303e-06, "loss": 1.8731, "step": 245 }, { "epoch": 0.03, "grad_norm": 8.776860597661779, "learning_rate": 9.999993507780707e-06, "loss": 1.9052, "step": 246 }, { "epoch": 0.03, "grad_norm": 10.947511363604733, "learning_rate": 9.999989855908588e-06, "loss": 1.9761, "step": 247 }, { "epoch": 0.03, "grad_norm": 8.48222686523565, "learning_rate": 9.99998539251054e-06, "loss": 2.1072, "step": 248 }, { "epoch": 0.03, "grad_norm": 7.375791118967941, "learning_rate": 9.999980117587285e-06, "loss": 1.9842, "step": 249 }, { "epoch": 0.03, "grad_norm": 9.310581534012224, "learning_rate": 9.999974031139682e-06, "loss": 1.9455, "step": 250 }, { "epoch": 0.03, "grad_norm": 8.397259443600687, "learning_rate": 9.999967133168718e-06, "loss": 1.8094, "step": 251 }, { "epoch": 0.03, "grad_norm": 8.657420376132825, "learning_rate": 9.99995942367551e-06, "loss": 1.9105, "step": 252 }, { "epoch": 0.03, "grad_norm": 8.69229805709189, "learning_rate": 9.999950902661313e-06, "loss": 1.8799, "step": 253 }, { "epoch": 0.03, "grad_norm": 9.857148174472428, "learning_rate": 9.999941570127507e-06, "loss": 2.2328, "step": 254 }, { "epoch": 0.03, "grad_norm": 9.35730809456506, "learning_rate": 9.999931426075608e-06, "loss": 1.9544, "step": 255 }, { "epoch": 0.03, "grad_norm": 8.583332005662738, "learning_rate": 9.999920470507263e-06, "loss": 1.9157, "step": 256 }, { "epoch": 0.03, "grad_norm": 9.938453268254264, "learning_rate": 9.99990870342425e-06, "loss": 2.1439, "step": 257 }, { "epoch": 0.03, "grad_norm": 10.312503780892191, "learning_rate": 9.999896124828478e-06, "loss": 2.2669, "step": 258 }, { "epoch": 0.03, "grad_norm": 8.58302046433907, "learning_rate": 9.999882734721989e-06, "loss": 2.0764, "step": 259 }, { "epoch": 0.03, "grad_norm": 9.021858743871201, "learning_rate": 9.999868533106958e-06, "loss": 1.9659, "step": 260 }, { "epoch": 0.03, "grad_norm": 9.196261122454134, "learning_rate": 9.999853519985687e-06, "loss": 1.869, "step": 261 }, { "epoch": 0.03, "grad_norm": 9.604989792052644, "learning_rate": 9.999837695360612e-06, "loss": 2.2725, "step": 262 }, { "epoch": 0.03, "grad_norm": 10.230037275036008, "learning_rate": 9.999821059234308e-06, "loss": 1.6834, "step": 263 }, { "epoch": 0.03, "grad_norm": 8.8253390088667, "learning_rate": 9.999803611609467e-06, "loss": 2.0477, "step": 264 }, { "epoch": 0.03, "grad_norm": 7.760834653024659, "learning_rate": 9.999785352488925e-06, "loss": 2.0228, "step": 265 }, { "epoch": 0.03, "grad_norm": 9.027398912087152, "learning_rate": 9.999766281875645e-06, "loss": 1.8825, "step": 266 }, { "epoch": 0.03, "grad_norm": 9.388465382449052, "learning_rate": 9.999746399772723e-06, "loss": 1.9656, "step": 267 }, { "epoch": 0.03, "grad_norm": 8.151395461460435, "learning_rate": 9.999725706183386e-06, "loss": 1.9056, "step": 268 }, { "epoch": 0.03, "grad_norm": 7.454631638238912, "learning_rate": 9.99970420111099e-06, "loss": 1.7885, "step": 269 }, { "epoch": 0.03, "grad_norm": 9.455180715944959, "learning_rate": 9.999681884559027e-06, "loss": 2.0159, "step": 270 }, { "epoch": 0.03, "grad_norm": 8.38009484393064, "learning_rate": 9.999658756531121e-06, "loss": 1.834, "step": 271 }, { "epoch": 0.03, "grad_norm": 8.136881648028126, "learning_rate": 9.999634817031025e-06, "loss": 1.9989, "step": 272 }, { "epoch": 0.03, "grad_norm": 8.34286734545337, "learning_rate": 9.999610066062621e-06, "loss": 1.9592, "step": 273 }, { "epoch": 0.03, "grad_norm": 7.6787047033049145, "learning_rate": 9.999584503629932e-06, "loss": 2.0772, "step": 274 }, { "epoch": 0.03, "grad_norm": 9.375297590442223, "learning_rate": 9.9995581297371e-06, "loss": 2.0024, "step": 275 }, { "epoch": 0.03, "grad_norm": 8.469309452692068, "learning_rate": 9.999530944388414e-06, "loss": 2.002, "step": 276 }, { "epoch": 0.03, "grad_norm": 8.686669955373493, "learning_rate": 9.999502947588279e-06, "loss": 1.8949, "step": 277 }, { "epoch": 0.03, "grad_norm": 8.011968373625535, "learning_rate": 9.999474139341243e-06, "loss": 1.7268, "step": 278 }, { "epoch": 0.03, "grad_norm": 8.464899028547737, "learning_rate": 9.999444519651982e-06, "loss": 2.0077, "step": 279 }, { "epoch": 0.03, "grad_norm": 8.098216937666137, "learning_rate": 9.999414088525298e-06, "loss": 1.8657, "step": 280 }, { "epoch": 0.03, "grad_norm": 7.911649873174135, "learning_rate": 9.999382845966138e-06, "loss": 2.1548, "step": 281 }, { "epoch": 0.04, "grad_norm": 7.751833605167496, "learning_rate": 9.999350791979568e-06, "loss": 1.8828, "step": 282 }, { "epoch": 0.04, "grad_norm": 7.970396951968716, "learning_rate": 9.99931792657079e-06, "loss": 1.9745, "step": 283 }, { "epoch": 0.04, "grad_norm": 8.213070955192718, "learning_rate": 9.999284249745143e-06, "loss": 1.8684, "step": 284 }, { "epoch": 0.04, "grad_norm": 8.136723810856074, "learning_rate": 9.999249761508086e-06, "loss": 1.9603, "step": 285 }, { "epoch": 0.04, "grad_norm": 8.355952500660166, "learning_rate": 9.999214461865224e-06, "loss": 1.9095, "step": 286 }, { "epoch": 0.04, "grad_norm": 9.707158905076357, "learning_rate": 9.999178350822279e-06, "loss": 1.9113, "step": 287 }, { "epoch": 0.04, "grad_norm": 8.683209671978815, "learning_rate": 9.999141428385117e-06, "loss": 1.9736, "step": 288 }, { "epoch": 0.04, "grad_norm": 8.766309058058232, "learning_rate": 9.999103694559731e-06, "loss": 2.2717, "step": 289 }, { "epoch": 0.04, "grad_norm": 9.947553522920769, "learning_rate": 9.999065149352242e-06, "loss": 2.0182, "step": 290 }, { "epoch": 0.04, "grad_norm": 8.675265443447255, "learning_rate": 9.99902579276891e-06, "loss": 2.0907, "step": 291 }, { "epoch": 0.04, "grad_norm": 8.64123132749705, "learning_rate": 9.99898562481612e-06, "loss": 1.9084, "step": 292 }, { "epoch": 0.04, "grad_norm": 9.234810863731482, "learning_rate": 9.99894464550039e-06, "loss": 1.9932, "step": 293 }, { "epoch": 0.04, "grad_norm": 8.156543640868673, "learning_rate": 9.998902854828377e-06, "loss": 1.7589, "step": 294 }, { "epoch": 0.04, "grad_norm": 8.022298591623386, "learning_rate": 9.998860252806857e-06, "loss": 1.943, "step": 295 }, { "epoch": 0.04, "grad_norm": 7.957157909827541, "learning_rate": 9.99881683944275e-06, "loss": 1.7267, "step": 296 }, { "epoch": 0.04, "grad_norm": 8.42660048919152, "learning_rate": 9.9987726147431e-06, "loss": 1.8963, "step": 297 }, { "epoch": 0.04, "grad_norm": 8.184168441065664, "learning_rate": 9.998727578715083e-06, "loss": 1.8657, "step": 298 }, { "epoch": 0.04, "grad_norm": 7.72725733214423, "learning_rate": 9.998681731366012e-06, "loss": 1.7694, "step": 299 }, { "epoch": 0.04, "grad_norm": 9.111134859729766, "learning_rate": 9.998635072703327e-06, "loss": 1.8721, "step": 300 }, { "epoch": 0.04, "grad_norm": 9.795159679883207, "learning_rate": 9.9985876027346e-06, "loss": 2.0429, "step": 301 }, { "epoch": 0.04, "grad_norm": 8.569550402953332, "learning_rate": 9.998539321467536e-06, "loss": 2.0961, "step": 302 }, { "epoch": 0.04, "grad_norm": 7.799276509733299, "learning_rate": 9.998490228909972e-06, "loss": 1.9206, "step": 303 }, { "epoch": 0.04, "grad_norm": 9.098078663378386, "learning_rate": 9.998440325069876e-06, "loss": 1.7681, "step": 304 }, { "epoch": 0.04, "grad_norm": 9.644163391603946, "learning_rate": 9.998389609955348e-06, "loss": 2.1452, "step": 305 }, { "epoch": 0.04, "grad_norm": 8.910887312491596, "learning_rate": 9.998338083574618e-06, "loss": 2.1131, "step": 306 }, { "epoch": 0.04, "grad_norm": 7.980815243970783, "learning_rate": 9.99828574593605e-06, "loss": 1.917, "step": 307 }, { "epoch": 0.04, "grad_norm": 9.96390008097356, "learning_rate": 9.998232597048138e-06, "loss": 1.9103, "step": 308 }, { "epoch": 0.04, "grad_norm": 9.174314985315329, "learning_rate": 9.99817863691951e-06, "loss": 1.9654, "step": 309 }, { "epoch": 0.04, "grad_norm": 8.118002623350819, "learning_rate": 9.998123865558921e-06, "loss": 2.025, "step": 310 }, { "epoch": 0.04, "grad_norm": 7.907721967690884, "learning_rate": 9.998068282975264e-06, "loss": 1.8251, "step": 311 }, { "epoch": 0.04, "grad_norm": 7.364116882844524, "learning_rate": 9.998011889177558e-06, "loss": 1.9423, "step": 312 }, { "epoch": 0.04, "grad_norm": 8.603722649603824, "learning_rate": 9.997954684174957e-06, "loss": 1.8626, "step": 313 }, { "epoch": 0.04, "grad_norm": 9.75562245453525, "learning_rate": 9.997896667976745e-06, "loss": 2.0277, "step": 314 }, { "epoch": 0.04, "grad_norm": 8.032435808020956, "learning_rate": 9.99783784059234e-06, "loss": 2.0628, "step": 315 }, { "epoch": 0.04, "grad_norm": 8.205254461300731, "learning_rate": 9.997778202031289e-06, "loss": 1.8688, "step": 316 }, { "epoch": 0.04, "grad_norm": 8.637420937936225, "learning_rate": 9.997717752303272e-06, "loss": 2.1438, "step": 317 }, { "epoch": 0.04, "grad_norm": 7.616449029933792, "learning_rate": 9.997656491418098e-06, "loss": 1.7899, "step": 318 }, { "epoch": 0.04, "grad_norm": 7.7253468341059115, "learning_rate": 9.997594419385712e-06, "loss": 1.8027, "step": 319 }, { "epoch": 0.04, "grad_norm": 7.44249041757811, "learning_rate": 9.99753153621619e-06, "loss": 1.7697, "step": 320 }, { "epoch": 0.04, "grad_norm": 8.020749410302344, "learning_rate": 9.997467841919736e-06, "loss": 2.0084, "step": 321 }, { "epoch": 0.04, "grad_norm": 7.493705583123682, "learning_rate": 9.99740333650669e-06, "loss": 1.824, "step": 322 }, { "epoch": 0.04, "grad_norm": 8.74639698043684, "learning_rate": 9.997338019987518e-06, "loss": 1.9468, "step": 323 }, { "epoch": 0.04, "grad_norm": 8.46057638218816, "learning_rate": 9.997271892372826e-06, "loss": 2.0079, "step": 324 }, { "epoch": 0.04, "grad_norm": 7.473482726261468, "learning_rate": 9.997204953673342e-06, "loss": 2.011, "step": 325 }, { "epoch": 0.04, "grad_norm": 7.837416493603346, "learning_rate": 9.997137203899935e-06, "loss": 1.8007, "step": 326 }, { "epoch": 0.04, "grad_norm": 7.8623905455581715, "learning_rate": 9.997068643063599e-06, "loss": 2.1961, "step": 327 }, { "epoch": 0.04, "grad_norm": 8.629398024771373, "learning_rate": 9.996999271175462e-06, "loss": 1.9317, "step": 328 }, { "epoch": 0.04, "grad_norm": 8.666090559781216, "learning_rate": 9.996929088246783e-06, "loss": 1.8351, "step": 329 }, { "epoch": 0.04, "grad_norm": 8.627580418144618, "learning_rate": 9.996858094288952e-06, "loss": 1.7898, "step": 330 }, { "epoch": 0.04, "grad_norm": 8.53687129454593, "learning_rate": 9.996786289313496e-06, "loss": 2.0142, "step": 331 }, { "epoch": 0.04, "grad_norm": 9.02488323526696, "learning_rate": 9.996713673332064e-06, "loss": 1.9633, "step": 332 }, { "epoch": 0.04, "grad_norm": 8.68240008507738, "learning_rate": 9.996640246356446e-06, "loss": 1.9814, "step": 333 }, { "epoch": 0.04, "grad_norm": 8.274422458942027, "learning_rate": 9.996566008398556e-06, "loss": 2.0918, "step": 334 }, { "epoch": 0.04, "grad_norm": 8.442453841717107, "learning_rate": 9.996490959470448e-06, "loss": 2.0544, "step": 335 }, { "epoch": 0.04, "grad_norm": 8.938743465824032, "learning_rate": 9.996415099584297e-06, "loss": 2.0952, "step": 336 }, { "epoch": 0.04, "grad_norm": 8.114557421387657, "learning_rate": 9.996338428752423e-06, "loss": 1.793, "step": 337 }, { "epoch": 0.04, "grad_norm": 8.022708066932996, "learning_rate": 9.996260946987262e-06, "loss": 1.9506, "step": 338 }, { "epoch": 0.04, "grad_norm": 8.352851743394169, "learning_rate": 9.996182654301395e-06, "loss": 2.0295, "step": 339 }, { "epoch": 0.04, "grad_norm": 7.097158760959699, "learning_rate": 9.996103550707528e-06, "loss": 1.7974, "step": 340 }, { "epoch": 0.04, "grad_norm": 8.242313770581363, "learning_rate": 9.9960236362185e-06, "loss": 1.911, "step": 341 }, { "epoch": 0.04, "grad_norm": 9.118224815449485, "learning_rate": 9.99594291084728e-06, "loss": 1.8736, "step": 342 }, { "epoch": 0.04, "grad_norm": 8.580603348104672, "learning_rate": 9.995861374606972e-06, "loss": 1.9594, "step": 343 }, { "epoch": 0.04, "grad_norm": 7.3991819009243445, "learning_rate": 9.99577902751081e-06, "loss": 2.0526, "step": 344 }, { "epoch": 0.04, "grad_norm": 8.565259482049667, "learning_rate": 9.995695869572158e-06, "loss": 1.7841, "step": 345 }, { "epoch": 0.04, "grad_norm": 8.626323333599354, "learning_rate": 9.995611900804513e-06, "loss": 1.9499, "step": 346 }, { "epoch": 0.04, "grad_norm": 8.675022885638139, "learning_rate": 9.995527121221504e-06, "loss": 1.7943, "step": 347 }, { "epoch": 0.04, "grad_norm": 8.903252968456103, "learning_rate": 9.995441530836893e-06, "loss": 1.9206, "step": 348 }, { "epoch": 0.04, "grad_norm": 7.786060806681775, "learning_rate": 9.99535512966457e-06, "loss": 2.0031, "step": 349 }, { "epoch": 0.04, "grad_norm": 8.45617641620377, "learning_rate": 9.995267917718559e-06, "loss": 1.9383, "step": 350 }, { "epoch": 0.04, "grad_norm": 7.462435057473947, "learning_rate": 9.995179895013011e-06, "loss": 1.6763, "step": 351 }, { "epoch": 0.04, "grad_norm": 7.856825964424402, "learning_rate": 9.995091061562221e-06, "loss": 1.9335, "step": 352 }, { "epoch": 0.04, "grad_norm": 9.203022250465578, "learning_rate": 9.9950014173806e-06, "loss": 1.9619, "step": 353 }, { "epoch": 0.04, "grad_norm": 9.059876128527097, "learning_rate": 9.9949109624827e-06, "loss": 1.8098, "step": 354 }, { "epoch": 0.04, "grad_norm": 9.003390860998293, "learning_rate": 9.994819696883203e-06, "loss": 1.6767, "step": 355 }, { "epoch": 0.04, "grad_norm": 9.004164510938905, "learning_rate": 9.99472762059692e-06, "loss": 1.7166, "step": 356 }, { "epoch": 0.04, "grad_norm": 8.206837942884722, "learning_rate": 9.994634733638799e-06, "loss": 1.9975, "step": 357 }, { "epoch": 0.04, "grad_norm": 9.04433728987272, "learning_rate": 9.994541036023913e-06, "loss": 2.1912, "step": 358 }, { "epoch": 0.04, "grad_norm": 8.510810665108357, "learning_rate": 9.994446527767469e-06, "loss": 1.9095, "step": 359 }, { "epoch": 0.04, "grad_norm": 8.639223411849347, "learning_rate": 9.99435120888481e-06, "loss": 1.7798, "step": 360 }, { "epoch": 0.04, "grad_norm": 7.788673269159248, "learning_rate": 9.994255079391402e-06, "loss": 1.8406, "step": 361 }, { "epoch": 0.05, "grad_norm": 8.931523118624126, "learning_rate": 9.994158139302852e-06, "loss": 2.0589, "step": 362 }, { "epoch": 0.05, "grad_norm": 9.015732593853178, "learning_rate": 9.99406038863489e-06, "loss": 1.9419, "step": 363 }, { "epoch": 0.05, "grad_norm": 9.005259586165177, "learning_rate": 9.993961827403385e-06, "loss": 2.0096, "step": 364 }, { "epoch": 0.05, "grad_norm": 7.626349347985496, "learning_rate": 9.99386245562433e-06, "loss": 2.0786, "step": 365 }, { "epoch": 0.05, "grad_norm": 7.724704705535141, "learning_rate": 9.993762273313858e-06, "loss": 2.1523, "step": 366 }, { "epoch": 0.05, "grad_norm": 8.86805510567949, "learning_rate": 9.993661280488225e-06, "loss": 1.7812, "step": 367 }, { "epoch": 0.05, "grad_norm": 8.433301247761307, "learning_rate": 9.993559477163827e-06, "loss": 2.0256, "step": 368 }, { "epoch": 0.05, "grad_norm": 7.497552861728034, "learning_rate": 9.993456863357183e-06, "loss": 1.8227, "step": 369 }, { "epoch": 0.05, "grad_norm": 9.197853272186277, "learning_rate": 9.99335343908495e-06, "loss": 1.9092, "step": 370 }, { "epoch": 0.05, "grad_norm": 7.228781796298233, "learning_rate": 9.993249204363915e-06, "loss": 2.0769, "step": 371 }, { "epoch": 0.05, "grad_norm": 7.652267752686182, "learning_rate": 9.993144159210994e-06, "loss": 1.9415, "step": 372 }, { "epoch": 0.05, "grad_norm": 7.765410614765676, "learning_rate": 9.993038303643239e-06, "loss": 1.8776, "step": 373 }, { "epoch": 0.05, "grad_norm": 7.253983336277575, "learning_rate": 9.992931637677828e-06, "loss": 1.8547, "step": 374 }, { "epoch": 0.05, "grad_norm": 8.265873353572378, "learning_rate": 9.992824161332073e-06, "loss": 1.8928, "step": 375 }, { "epoch": 0.05, "grad_norm": 7.931502980403187, "learning_rate": 9.992715874623422e-06, "loss": 1.72, "step": 376 }, { "epoch": 0.05, "grad_norm": 8.843591107746793, "learning_rate": 9.992606777569447e-06, "loss": 1.8753, "step": 377 }, { "epoch": 0.05, "grad_norm": 7.220696038745079, "learning_rate": 9.992496870187857e-06, "loss": 1.6919, "step": 378 }, { "epoch": 0.05, "grad_norm": 7.743089946955008, "learning_rate": 9.99238615249649e-06, "loss": 1.9582, "step": 379 }, { "epoch": 0.05, "grad_norm": 8.24541338294204, "learning_rate": 9.992274624513316e-06, "loss": 1.9566, "step": 380 }, { "epoch": 0.05, "grad_norm": 10.132015195480914, "learning_rate": 9.992162286256436e-06, "loss": 2.3435, "step": 381 }, { "epoch": 0.05, "grad_norm": 8.106921448286165, "learning_rate": 9.992049137744084e-06, "loss": 1.7995, "step": 382 }, { "epoch": 0.05, "grad_norm": 8.909639875845123, "learning_rate": 9.991935178994625e-06, "loss": 1.7577, "step": 383 }, { "epoch": 0.05, "grad_norm": 7.781789963552718, "learning_rate": 9.991820410026554e-06, "loss": 1.9582, "step": 384 }, { "epoch": 0.05, "grad_norm": 7.823424136838359, "learning_rate": 9.991704830858498e-06, "loss": 1.9137, "step": 385 }, { "epoch": 0.05, "grad_norm": 8.201123773418145, "learning_rate": 9.991588441509218e-06, "loss": 1.8383, "step": 386 }, { "epoch": 0.05, "grad_norm": 8.552729984609671, "learning_rate": 9.991471241997604e-06, "loss": 1.9506, "step": 387 }, { "epoch": 0.05, "grad_norm": 11.008341322090871, "learning_rate": 9.99135323234268e-06, "loss": 2.1774, "step": 388 }, { "epoch": 0.05, "grad_norm": 7.92779625568221, "learning_rate": 9.991234412563594e-06, "loss": 1.875, "step": 389 }, { "epoch": 0.05, "grad_norm": 7.286462818972296, "learning_rate": 9.991114782679636e-06, "loss": 2.0592, "step": 390 }, { "epoch": 0.05, "grad_norm": 8.96919966800026, "learning_rate": 9.990994342710223e-06, "loss": 1.9089, "step": 391 }, { "epoch": 0.05, "grad_norm": 8.319533149228327, "learning_rate": 9.9908730926749e-06, "loss": 2.1527, "step": 392 }, { "epoch": 0.05, "grad_norm": 8.495661566283962, "learning_rate": 9.990751032593347e-06, "loss": 1.7815, "step": 393 }, { "epoch": 0.05, "grad_norm": 8.666178150349666, "learning_rate": 9.990628162485377e-06, "loss": 1.8859, "step": 394 }, { "epoch": 0.05, "grad_norm": 7.06865134470587, "learning_rate": 9.990504482370931e-06, "loss": 1.9254, "step": 395 }, { "epoch": 0.05, "grad_norm": 8.041045169426955, "learning_rate": 9.990379992270084e-06, "loss": 1.973, "step": 396 }, { "epoch": 0.05, "grad_norm": 7.780073347506855, "learning_rate": 9.99025469220304e-06, "loss": 2.0489, "step": 397 }, { "epoch": 0.05, "grad_norm": 8.325010843669384, "learning_rate": 9.990128582190139e-06, "loss": 2.0199, "step": 398 }, { "epoch": 0.05, "grad_norm": 8.049256096884534, "learning_rate": 9.990001662251844e-06, "loss": 1.6476, "step": 399 }, { "epoch": 0.05, "grad_norm": 9.226144412543464, "learning_rate": 9.989873932408761e-06, "loss": 1.8589, "step": 400 }, { "epoch": 0.05, "grad_norm": 7.983565506975998, "learning_rate": 9.989745392681616e-06, "loss": 1.8481, "step": 401 }, { "epoch": 0.05, "grad_norm": 7.273272269141753, "learning_rate": 9.989616043091275e-06, "loss": 1.996, "step": 402 }, { "epoch": 0.05, "grad_norm": 8.080992579812964, "learning_rate": 9.989485883658729e-06, "loss": 1.9044, "step": 403 }, { "epoch": 0.05, "grad_norm": 7.621629495481999, "learning_rate": 9.989354914405106e-06, "loss": 1.9648, "step": 404 }, { "epoch": 0.05, "grad_norm": 8.643585038069588, "learning_rate": 9.989223135351664e-06, "loss": 1.8339, "step": 405 }, { "epoch": 0.05, "grad_norm": 8.911438902695732, "learning_rate": 9.989090546519791e-06, "loss": 1.9086, "step": 406 }, { "epoch": 0.05, "grad_norm": 8.637129509989698, "learning_rate": 9.988957147931004e-06, "loss": 1.879, "step": 407 }, { "epoch": 0.05, "grad_norm": 8.490916066959722, "learning_rate": 9.988822939606957e-06, "loss": 1.7337, "step": 408 }, { "epoch": 0.05, "grad_norm": 8.69368787115206, "learning_rate": 9.988687921569433e-06, "loss": 1.8882, "step": 409 }, { "epoch": 0.05, "grad_norm": 8.043593160035238, "learning_rate": 9.988552093840344e-06, "loss": 1.9654, "step": 410 }, { "epoch": 0.05, "grad_norm": 8.380921909235644, "learning_rate": 9.988415456441737e-06, "loss": 1.7366, "step": 411 }, { "epoch": 0.05, "grad_norm": 7.8787974278424135, "learning_rate": 9.988278009395788e-06, "loss": 1.9629, "step": 412 }, { "epoch": 0.05, "grad_norm": 7.975078925196779, "learning_rate": 9.988139752724807e-06, "loss": 1.7735, "step": 413 }, { "epoch": 0.05, "grad_norm": 8.514299247019638, "learning_rate": 9.988000686451235e-06, "loss": 1.9607, "step": 414 }, { "epoch": 0.05, "grad_norm": 7.433587951826769, "learning_rate": 9.987860810597639e-06, "loss": 1.9962, "step": 415 }, { "epoch": 0.05, "grad_norm": 8.05332511400459, "learning_rate": 9.987720125186725e-06, "loss": 1.8479, "step": 416 }, { "epoch": 0.05, "grad_norm": 8.158442381896505, "learning_rate": 9.987578630241326e-06, "loss": 1.9039, "step": 417 }, { "epoch": 0.05, "grad_norm": 7.731648064024509, "learning_rate": 9.987436325784407e-06, "loss": 1.9345, "step": 418 }, { "epoch": 0.05, "grad_norm": 7.886726994614096, "learning_rate": 9.987293211839065e-06, "loss": 2.1885, "step": 419 }, { "epoch": 0.05, "grad_norm": 8.963380273727308, "learning_rate": 9.987149288428529e-06, "loss": 1.9893, "step": 420 }, { "epoch": 0.05, "grad_norm": 7.109160631621873, "learning_rate": 9.987004555576156e-06, "loss": 1.7918, "step": 421 }, { "epoch": 0.05, "grad_norm": 7.555699717429362, "learning_rate": 9.986859013305441e-06, "loss": 1.8786, "step": 422 }, { "epoch": 0.05, "grad_norm": 8.337336900089857, "learning_rate": 9.986712661640003e-06, "loss": 2.027, "step": 423 }, { "epoch": 0.05, "grad_norm": 7.811677204903205, "learning_rate": 9.986565500603598e-06, "loss": 1.8794, "step": 424 }, { "epoch": 0.05, "grad_norm": 8.523225140762438, "learning_rate": 9.986417530220107e-06, "loss": 2.0361, "step": 425 }, { "epoch": 0.05, "grad_norm": 7.981821998470238, "learning_rate": 9.986268750513552e-06, "loss": 1.7912, "step": 426 }, { "epoch": 0.05, "grad_norm": 7.232404315819718, "learning_rate": 9.986119161508078e-06, "loss": 1.7302, "step": 427 }, { "epoch": 0.05, "grad_norm": 8.107824322186916, "learning_rate": 9.985968763227963e-06, "loss": 1.7449, "step": 428 }, { "epoch": 0.05, "grad_norm": 7.907619567649971, "learning_rate": 9.985817555697618e-06, "loss": 1.8811, "step": 429 }, { "epoch": 0.05, "grad_norm": 8.00970585838184, "learning_rate": 9.985665538941588e-06, "loss": 2.3317, "step": 430 }, { "epoch": 0.05, "grad_norm": 7.800524363018733, "learning_rate": 9.985512712984543e-06, "loss": 1.9658, "step": 431 }, { "epoch": 0.05, "grad_norm": 11.80614432631225, "learning_rate": 9.985359077851287e-06, "loss": 1.8602, "step": 432 }, { "epoch": 0.05, "grad_norm": 7.665583610488059, "learning_rate": 9.985204633566756e-06, "loss": 1.7712, "step": 433 }, { "epoch": 0.05, "grad_norm": 7.492244748850037, "learning_rate": 9.98504938015602e-06, "loss": 1.9854, "step": 434 }, { "epoch": 0.05, "grad_norm": 6.9375990078209915, "learning_rate": 9.984893317644275e-06, "loss": 1.6999, "step": 435 }, { "epoch": 0.05, "grad_norm": 8.738363657618471, "learning_rate": 9.984736446056852e-06, "loss": 1.988, "step": 436 }, { "epoch": 0.05, "grad_norm": 8.089353915156718, "learning_rate": 9.984578765419211e-06, "loss": 1.9128, "step": 437 }, { "epoch": 0.05, "grad_norm": 8.518454923523251, "learning_rate": 9.984420275756945e-06, "loss": 1.9998, "step": 438 }, { "epoch": 0.05, "grad_norm": 8.307052038671603, "learning_rate": 9.984260977095777e-06, "loss": 1.7624, "step": 439 }, { "epoch": 0.05, "grad_norm": 7.158215702722645, "learning_rate": 9.984100869461563e-06, "loss": 2.0818, "step": 440 }, { "epoch": 0.05, "grad_norm": 8.010186975921712, "learning_rate": 9.983939952880291e-06, "loss": 2.0884, "step": 441 }, { "epoch": 0.05, "grad_norm": 8.03677989038993, "learning_rate": 9.983778227378074e-06, "loss": 2.0571, "step": 442 }, { "epoch": 0.06, "grad_norm": 7.424368342350429, "learning_rate": 9.983615692981166e-06, "loss": 1.9161, "step": 443 }, { "epoch": 0.06, "grad_norm": 7.659379951357727, "learning_rate": 9.983452349715944e-06, "loss": 1.9553, "step": 444 }, { "epoch": 0.06, "grad_norm": 7.480524099688378, "learning_rate": 9.98328819760892e-06, "loss": 1.7952, "step": 445 }, { "epoch": 0.06, "grad_norm": 7.520614042463804, "learning_rate": 9.983123236686736e-06, "loss": 1.9738, "step": 446 }, { "epoch": 0.06, "grad_norm": 8.259018830521004, "learning_rate": 9.982957466976169e-06, "loss": 1.9226, "step": 447 }, { "epoch": 0.06, "grad_norm": 7.452124430603704, "learning_rate": 9.982790888504122e-06, "loss": 1.961, "step": 448 }, { "epoch": 0.06, "grad_norm": 8.319904772862278, "learning_rate": 9.982623501297633e-06, "loss": 1.9286, "step": 449 }, { "epoch": 0.06, "grad_norm": 7.708450235107373, "learning_rate": 9.98245530538387e-06, "loss": 1.9683, "step": 450 }, { "epoch": 0.06, "grad_norm": 8.709534968677206, "learning_rate": 9.98228630079013e-06, "loss": 1.8342, "step": 451 }, { "epoch": 0.06, "grad_norm": 7.222567777444563, "learning_rate": 9.982116487543844e-06, "loss": 1.8785, "step": 452 }, { "epoch": 0.06, "grad_norm": 7.494442885041719, "learning_rate": 9.981945865672575e-06, "loss": 1.9351, "step": 453 }, { "epoch": 0.06, "grad_norm": 8.698840883391616, "learning_rate": 9.981774435204014e-06, "loss": 2.0972, "step": 454 }, { "epoch": 0.06, "grad_norm": 7.884238498864951, "learning_rate": 9.981602196165986e-06, "loss": 1.6176, "step": 455 }, { "epoch": 0.06, "grad_norm": 7.754881782663961, "learning_rate": 9.98142914858645e-06, "loss": 1.9587, "step": 456 }, { "epoch": 0.06, "grad_norm": 7.753099121579416, "learning_rate": 9.981255292493486e-06, "loss": 1.8316, "step": 457 }, { "epoch": 0.06, "grad_norm": 8.741777568050525, "learning_rate": 9.981080627915316e-06, "loss": 1.7328, "step": 458 }, { "epoch": 0.06, "grad_norm": 7.164827838104347, "learning_rate": 9.980905154880288e-06, "loss": 1.7406, "step": 459 }, { "epoch": 0.06, "grad_norm": 7.964796660623513, "learning_rate": 9.980728873416883e-06, "loss": 2.0716, "step": 460 }, { "epoch": 0.06, "grad_norm": 9.03144637966211, "learning_rate": 9.98055178355371e-06, "loss": 1.8229, "step": 461 }, { "epoch": 0.06, "grad_norm": 8.30907432351598, "learning_rate": 9.980373885319514e-06, "loss": 1.5971, "step": 462 }, { "epoch": 0.06, "grad_norm": 8.630609933170678, "learning_rate": 9.980195178743169e-06, "loss": 2.0217, "step": 463 }, { "epoch": 0.06, "grad_norm": 9.129435634798291, "learning_rate": 9.980015663853677e-06, "loss": 2.0979, "step": 464 }, { "epoch": 0.06, "grad_norm": 9.053410050710855, "learning_rate": 9.979835340680178e-06, "loss": 2.0329, "step": 465 }, { "epoch": 0.06, "grad_norm": 15.863086399801057, "learning_rate": 9.979654209251939e-06, "loss": 2.0838, "step": 466 }, { "epoch": 0.06, "grad_norm": 8.524340271219673, "learning_rate": 9.979472269598356e-06, "loss": 1.9536, "step": 467 }, { "epoch": 0.06, "grad_norm": 7.913667815901161, "learning_rate": 9.97928952174896e-06, "loss": 1.7811, "step": 468 }, { "epoch": 0.06, "grad_norm": 8.79558639392487, "learning_rate": 9.979105965733413e-06, "loss": 1.9733, "step": 469 }, { "epoch": 0.06, "grad_norm": 7.858354902402524, "learning_rate": 9.978921601581508e-06, "loss": 1.8934, "step": 470 }, { "epoch": 0.06, "grad_norm": 7.641050183944704, "learning_rate": 9.978736429323166e-06, "loss": 1.8043, "step": 471 }, { "epoch": 0.06, "grad_norm": 8.587638274331642, "learning_rate": 9.978550448988443e-06, "loss": 1.9877, "step": 472 }, { "epoch": 0.06, "grad_norm": 7.5051794313057, "learning_rate": 9.978363660607522e-06, "loss": 1.9204, "step": 473 }, { "epoch": 0.06, "grad_norm": 8.317181741879912, "learning_rate": 9.978176064210723e-06, "loss": 2.0815, "step": 474 }, { "epoch": 0.06, "grad_norm": 8.636942633193053, "learning_rate": 9.977987659828492e-06, "loss": 2.0042, "step": 475 }, { "epoch": 0.06, "grad_norm": 7.357163801251532, "learning_rate": 9.97779844749141e-06, "loss": 1.8682, "step": 476 }, { "epoch": 0.06, "grad_norm": 7.758383260169424, "learning_rate": 9.977608427230184e-06, "loss": 1.7482, "step": 477 }, { "epoch": 0.06, "grad_norm": 8.40989251164511, "learning_rate": 9.977417599075658e-06, "loss": 1.7133, "step": 478 }, { "epoch": 0.06, "grad_norm": 9.199856407578597, "learning_rate": 9.977225963058804e-06, "loss": 2.1901, "step": 479 }, { "epoch": 0.06, "grad_norm": 8.040507373410101, "learning_rate": 9.977033519210725e-06, "loss": 2.0719, "step": 480 }, { "epoch": 0.06, "grad_norm": 8.14874842816703, "learning_rate": 9.976840267562658e-06, "loss": 1.9967, "step": 481 }, { "epoch": 0.06, "grad_norm": 8.181303993145912, "learning_rate": 9.976646208145964e-06, "loss": 1.7955, "step": 482 }, { "epoch": 0.06, "grad_norm": 9.52101083950791, "learning_rate": 9.976451340992143e-06, "loss": 1.8626, "step": 483 }, { "epoch": 0.06, "grad_norm": 8.116940271211408, "learning_rate": 9.976255666132825e-06, "loss": 1.8847, "step": 484 }, { "epoch": 0.06, "grad_norm": 8.072605267335227, "learning_rate": 9.976059183599765e-06, "loss": 2.1296, "step": 485 }, { "epoch": 0.06, "grad_norm": 7.384567709516375, "learning_rate": 9.975861893424856e-06, "loss": 1.8205, "step": 486 }, { "epoch": 0.06, "grad_norm": 8.560759877967428, "learning_rate": 9.975663795640118e-06, "loss": 1.774, "step": 487 }, { "epoch": 0.06, "grad_norm": 8.096189970662037, "learning_rate": 9.975464890277704e-06, "loss": 1.9356, "step": 488 }, { "epoch": 0.06, "grad_norm": 6.996770711461549, "learning_rate": 9.975265177369897e-06, "loss": 1.9358, "step": 489 }, { "epoch": 0.06, "grad_norm": 8.429164960593617, "learning_rate": 9.975064656949112e-06, "loss": 1.9707, "step": 490 }, { "epoch": 0.06, "grad_norm": 8.527123096734694, "learning_rate": 9.974863329047894e-06, "loss": 2.072, "step": 491 }, { "epoch": 0.06, "grad_norm": 7.571947389594298, "learning_rate": 9.97466119369892e-06, "loss": 1.8873, "step": 492 }, { "epoch": 0.06, "grad_norm": 7.936933287336415, "learning_rate": 9.974458250934997e-06, "loss": 1.5295, "step": 493 }, { "epoch": 0.06, "grad_norm": 8.336311570638177, "learning_rate": 9.974254500789065e-06, "loss": 1.7876, "step": 494 }, { "epoch": 0.06, "grad_norm": 7.285342721247761, "learning_rate": 9.974049943294193e-06, "loss": 1.7472, "step": 495 }, { "epoch": 0.06, "grad_norm": 8.21455191886948, "learning_rate": 9.97384457848358e-06, "loss": 1.6814, "step": 496 }, { "epoch": 0.06, "grad_norm": 7.654993439505626, "learning_rate": 9.973638406390563e-06, "loss": 1.8149, "step": 497 }, { "epoch": 0.06, "grad_norm": 6.868256040200617, "learning_rate": 9.9734314270486e-06, "loss": 1.8743, "step": 498 }, { "epoch": 0.06, "grad_norm": 8.86347960782948, "learning_rate": 9.973223640491286e-06, "loss": 2.0185, "step": 499 }, { "epoch": 0.06, "grad_norm": 7.235429543633969, "learning_rate": 9.973015046752346e-06, "loss": 2.1262, "step": 500 }, { "epoch": 0.06, "grad_norm": 6.923674023342423, "learning_rate": 9.972805645865637e-06, "loss": 1.9186, "step": 501 }, { "epoch": 0.06, "grad_norm": 7.476652252818588, "learning_rate": 9.972595437865145e-06, "loss": 1.6288, "step": 502 }, { "epoch": 0.06, "grad_norm": 7.465525431483426, "learning_rate": 9.972384422784989e-06, "loss": 1.7879, "step": 503 }, { "epoch": 0.06, "grad_norm": 6.961337084659824, "learning_rate": 9.972172600659416e-06, "loss": 1.881, "step": 504 }, { "epoch": 0.06, "grad_norm": 7.567320429223, "learning_rate": 9.971959971522806e-06, "loss": 1.8067, "step": 505 }, { "epoch": 0.06, "grad_norm": 8.388685338054643, "learning_rate": 9.971746535409672e-06, "loss": 1.9443, "step": 506 }, { "epoch": 0.06, "grad_norm": 8.196914725932412, "learning_rate": 9.971532292354655e-06, "loss": 1.7682, "step": 507 }, { "epoch": 0.06, "grad_norm": 7.996607420015542, "learning_rate": 9.971317242392527e-06, "loss": 1.812, "step": 508 }, { "epoch": 0.06, "grad_norm": 8.59167114397184, "learning_rate": 9.971101385558193e-06, "loss": 2.0818, "step": 509 }, { "epoch": 0.06, "grad_norm": 7.112602950732548, "learning_rate": 9.970884721886685e-06, "loss": 2.0289, "step": 510 }, { "epoch": 0.06, "grad_norm": 8.156083666985394, "learning_rate": 9.970667251413173e-06, "loss": 2.0993, "step": 511 }, { "epoch": 0.06, "grad_norm": 7.869387591228293, "learning_rate": 9.97044897417295e-06, "loss": 1.8786, "step": 512 }, { "epoch": 0.06, "grad_norm": 8.82204829763571, "learning_rate": 9.970229890201445e-06, "loss": 2.1554, "step": 513 }, { "epoch": 0.06, "grad_norm": 7.815167018140766, "learning_rate": 9.970009999534218e-06, "loss": 1.8809, "step": 514 }, { "epoch": 0.06, "grad_norm": 7.667287271405136, "learning_rate": 9.969789302206957e-06, "loss": 2.0119, "step": 515 }, { "epoch": 0.06, "grad_norm": 8.083627925418112, "learning_rate": 9.969567798255481e-06, "loss": 1.7832, "step": 516 }, { "epoch": 0.06, "grad_norm": 8.782535518492598, "learning_rate": 9.969345487715743e-06, "loss": 1.7341, "step": 517 }, { "epoch": 0.06, "grad_norm": 7.882058838247966, "learning_rate": 9.969122370623824e-06, "loss": 1.9803, "step": 518 }, { "epoch": 0.06, "grad_norm": 7.584042029490565, "learning_rate": 9.96889844701594e-06, "loss": 1.9628, "step": 519 }, { "epoch": 0.06, "grad_norm": 8.136621229271874, "learning_rate": 9.968673716928432e-06, "loss": 1.841, "step": 520 }, { "epoch": 0.06, "grad_norm": 8.398516560122875, "learning_rate": 9.968448180397778e-06, "loss": 2.0149, "step": 521 }, { "epoch": 0.06, "grad_norm": 8.075264103169584, "learning_rate": 9.968221837460578e-06, "loss": 2.0313, "step": 522 }, { "epoch": 0.07, "grad_norm": 9.272735959426457, "learning_rate": 9.967994688153577e-06, "loss": 1.9978, "step": 523 }, { "epoch": 0.07, "grad_norm": 9.562299714566487, "learning_rate": 9.967766732513635e-06, "loss": 1.8811, "step": 524 }, { "epoch": 0.07, "grad_norm": 10.582415424250344, "learning_rate": 9.967537970577755e-06, "loss": 1.8437, "step": 525 }, { "epoch": 0.07, "grad_norm": 8.670155436106993, "learning_rate": 9.967308402383065e-06, "loss": 1.8283, "step": 526 }, { "epoch": 0.07, "grad_norm": 8.06344369176699, "learning_rate": 9.967078027966825e-06, "loss": 1.7934, "step": 527 }, { "epoch": 0.07, "grad_norm": 6.791838853843959, "learning_rate": 9.966846847366424e-06, "loss": 1.68, "step": 528 }, { "epoch": 0.07, "grad_norm": 8.095156253882488, "learning_rate": 9.96661486061939e-06, "loss": 1.7864, "step": 529 }, { "epoch": 0.07, "grad_norm": 6.831536033177788, "learning_rate": 9.96638206776337e-06, "loss": 1.804, "step": 530 }, { "epoch": 0.07, "grad_norm": 9.370257110484397, "learning_rate": 9.966148468836147e-06, "loss": 2.1418, "step": 531 }, { "epoch": 0.07, "grad_norm": 7.9515088387898665, "learning_rate": 9.965914063875641e-06, "loss": 1.6919, "step": 532 }, { "epoch": 0.07, "grad_norm": 8.13391038932991, "learning_rate": 9.965678852919894e-06, "loss": 2.0014, "step": 533 }, { "epoch": 0.07, "grad_norm": 8.000520307637027, "learning_rate": 9.96544283600708e-06, "loss": 1.934, "step": 534 }, { "epoch": 0.07, "grad_norm": 8.037403638689309, "learning_rate": 9.965206013175508e-06, "loss": 2.0487, "step": 535 }, { "epoch": 0.07, "grad_norm": 8.457384375661851, "learning_rate": 9.964968384463616e-06, "loss": 2.0857, "step": 536 }, { "epoch": 0.07, "grad_norm": 9.860265817198679, "learning_rate": 9.964729949909973e-06, "loss": 2.1883, "step": 537 }, { "epoch": 0.07, "grad_norm": 8.014333361348966, "learning_rate": 9.964490709553276e-06, "loss": 1.8132, "step": 538 }, { "epoch": 0.07, "grad_norm": 8.4129740197087, "learning_rate": 9.964250663432356e-06, "loss": 1.7912, "step": 539 }, { "epoch": 0.07, "grad_norm": 8.933734005915564, "learning_rate": 9.964009811586175e-06, "loss": 1.9046, "step": 540 }, { "epoch": 0.07, "grad_norm": 9.549317746558765, "learning_rate": 9.963768154053823e-06, "loss": 1.8676, "step": 541 }, { "epoch": 0.07, "grad_norm": 8.116244338780824, "learning_rate": 9.963525690874523e-06, "loss": 1.8285, "step": 542 }, { "epoch": 0.07, "grad_norm": 9.875266898424577, "learning_rate": 9.963282422087628e-06, "loss": 1.6955, "step": 543 }, { "epoch": 0.07, "grad_norm": 8.203434472513207, "learning_rate": 9.963038347732624e-06, "loss": 1.6714, "step": 544 }, { "epoch": 0.07, "grad_norm": 8.781989330085533, "learning_rate": 9.96279346784912e-06, "loss": 1.8388, "step": 545 }, { "epoch": 0.07, "grad_norm": 7.981790458676795, "learning_rate": 9.962547782476865e-06, "loss": 2.0871, "step": 546 }, { "epoch": 0.07, "grad_norm": 7.017824531045713, "learning_rate": 9.962301291655738e-06, "loss": 2.05, "step": 547 }, { "epoch": 0.07, "grad_norm": 8.333120012197082, "learning_rate": 9.96205399542574e-06, "loss": 1.9874, "step": 548 }, { "epoch": 0.07, "grad_norm": 7.919466048686052, "learning_rate": 9.961805893827015e-06, "loss": 1.832, "step": 549 }, { "epoch": 0.07, "grad_norm": 7.84064800555041, "learning_rate": 9.961556986899824e-06, "loss": 1.741, "step": 550 }, { "epoch": 0.07, "grad_norm": 7.221266783637965, "learning_rate": 9.96130727468457e-06, "loss": 1.8124, "step": 551 }, { "epoch": 0.07, "grad_norm": 8.442517086415826, "learning_rate": 9.961056757221785e-06, "loss": 1.927, "step": 552 }, { "epoch": 0.07, "grad_norm": 8.765365044996123, "learning_rate": 9.960805434552124e-06, "loss": 1.851, "step": 553 }, { "epoch": 0.07, "grad_norm": 7.972268372033053, "learning_rate": 9.960553306716382e-06, "loss": 2.0732, "step": 554 }, { "epoch": 0.07, "grad_norm": 8.861698002570908, "learning_rate": 9.960300373755479e-06, "loss": 1.7481, "step": 555 }, { "epoch": 0.07, "grad_norm": 7.66939336377934, "learning_rate": 9.960046635710467e-06, "loss": 1.7972, "step": 556 }, { "epoch": 0.07, "grad_norm": 6.993713648684445, "learning_rate": 9.959792092622532e-06, "loss": 1.8147, "step": 557 }, { "epoch": 0.07, "grad_norm": 7.356860464188796, "learning_rate": 9.959536744532982e-06, "loss": 1.7704, "step": 558 }, { "epoch": 0.07, "grad_norm": 8.936347086135651, "learning_rate": 9.95928059148327e-06, "loss": 2.0885, "step": 559 }, { "epoch": 0.07, "grad_norm": 9.616047951859102, "learning_rate": 9.95902363351496e-06, "loss": 1.9782, "step": 560 }, { "epoch": 0.07, "grad_norm": 7.088066523130039, "learning_rate": 9.958765870669769e-06, "loss": 1.6198, "step": 561 }, { "epoch": 0.07, "grad_norm": 7.848287310933424, "learning_rate": 9.958507302989527e-06, "loss": 2.04, "step": 562 }, { "epoch": 0.07, "grad_norm": 7.25303287056634, "learning_rate": 9.9582479305162e-06, "loss": 1.9542, "step": 563 }, { "epoch": 0.07, "grad_norm": 8.612106496045387, "learning_rate": 9.95798775329189e-06, "loss": 1.9807, "step": 564 }, { "epoch": 0.07, "grad_norm": 8.19971434931731, "learning_rate": 9.957726771358823e-06, "loss": 2.0063, "step": 565 }, { "epoch": 0.07, "grad_norm": 7.326279741987214, "learning_rate": 9.957464984759357e-06, "loss": 1.7114, "step": 566 }, { "epoch": 0.07, "grad_norm": 8.027974313164918, "learning_rate": 9.95720239353598e-06, "loss": 1.7676, "step": 567 }, { "epoch": 0.07, "grad_norm": 7.196254404568526, "learning_rate": 9.956938997731317e-06, "loss": 2.138, "step": 568 }, { "epoch": 0.07, "grad_norm": 6.81790250816353, "learning_rate": 9.956674797388115e-06, "loss": 1.8636, "step": 569 }, { "epoch": 0.07, "grad_norm": 7.454920184933398, "learning_rate": 9.956409792549255e-06, "loss": 2.0166, "step": 570 }, { "epoch": 0.07, "grad_norm": 7.406664415278992, "learning_rate": 9.95614398325775e-06, "loss": 1.8573, "step": 571 }, { "epoch": 0.07, "grad_norm": 8.240859852216376, "learning_rate": 9.955877369556742e-06, "loss": 1.8703, "step": 572 }, { "epoch": 0.07, "grad_norm": 8.032071142204408, "learning_rate": 9.955609951489504e-06, "loss": 1.7318, "step": 573 }, { "epoch": 0.07, "grad_norm": 7.496168479357124, "learning_rate": 9.955341729099439e-06, "loss": 1.8094, "step": 574 }, { "epoch": 0.07, "grad_norm": 6.781143595738223, "learning_rate": 9.955072702430082e-06, "loss": 1.662, "step": 575 }, { "epoch": 0.07, "grad_norm": 7.851374223798546, "learning_rate": 9.954802871525095e-06, "loss": 1.8739, "step": 576 }, { "epoch": 0.07, "grad_norm": 8.081232759057928, "learning_rate": 9.954532236428277e-06, "loss": 1.7393, "step": 577 }, { "epoch": 0.07, "grad_norm": 7.988742041832586, "learning_rate": 9.95426079718355e-06, "loss": 1.7715, "step": 578 }, { "epoch": 0.07, "grad_norm": 7.846522324195767, "learning_rate": 9.95398855383497e-06, "loss": 1.9651, "step": 579 }, { "epoch": 0.07, "grad_norm": 7.165279211166078, "learning_rate": 9.953715506426727e-06, "loss": 2.0604, "step": 580 }, { "epoch": 0.07, "grad_norm": 9.583307613852963, "learning_rate": 9.953441655003135e-06, "loss": 1.5029, "step": 581 }, { "epoch": 0.07, "grad_norm": 8.378109947900741, "learning_rate": 9.953166999608645e-06, "loss": 1.9364, "step": 582 }, { "epoch": 0.07, "grad_norm": 7.15588409849187, "learning_rate": 9.95289154028783e-06, "loss": 1.7724, "step": 583 }, { "epoch": 0.07, "grad_norm": 7.941460153727374, "learning_rate": 9.952615277085402e-06, "loss": 1.9826, "step": 584 }, { "epoch": 0.07, "grad_norm": 8.49338386721728, "learning_rate": 9.952338210046202e-06, "loss": 2.1063, "step": 585 }, { "epoch": 0.07, "grad_norm": 7.8590660595669775, "learning_rate": 9.952060339215194e-06, "loss": 1.82, "step": 586 }, { "epoch": 0.07, "grad_norm": 6.931784472523082, "learning_rate": 9.951781664637482e-06, "loss": 2.0143, "step": 587 }, { "epoch": 0.07, "grad_norm": 8.027711497645042, "learning_rate": 9.951502186358295e-06, "loss": 1.8886, "step": 588 }, { "epoch": 0.07, "grad_norm": 6.922234412703324, "learning_rate": 9.951221904422993e-06, "loss": 1.7727, "step": 589 }, { "epoch": 0.07, "grad_norm": 6.988872740004942, "learning_rate": 9.95094081887707e-06, "loss": 1.9162, "step": 590 }, { "epoch": 0.07, "grad_norm": 31.42668362355611, "learning_rate": 9.950658929766146e-06, "loss": 2.0486, "step": 591 }, { "epoch": 0.07, "grad_norm": 9.866539704132844, "learning_rate": 9.950376237135974e-06, "loss": 2.0121, "step": 592 }, { "epoch": 0.07, "grad_norm": 7.92316240163665, "learning_rate": 9.950092741032435e-06, "loss": 1.9333, "step": 593 }, { "epoch": 0.07, "grad_norm": 8.134126371436894, "learning_rate": 9.949808441501542e-06, "loss": 2.2441, "step": 594 }, { "epoch": 0.07, "grad_norm": 7.467407380590905, "learning_rate": 9.94952333858944e-06, "loss": 2.1662, "step": 595 }, { "epoch": 0.07, "grad_norm": 7.302355263226073, "learning_rate": 9.949237432342404e-06, "loss": 1.8906, "step": 596 }, { "epoch": 0.07, "grad_norm": 8.421290526915364, "learning_rate": 9.948950722806833e-06, "loss": 1.8661, "step": 597 }, { "epoch": 0.07, "grad_norm": 7.6061165657864915, "learning_rate": 9.948663210029269e-06, "loss": 1.9031, "step": 598 }, { "epoch": 0.07, "grad_norm": 8.230104025368156, "learning_rate": 9.94837489405637e-06, "loss": 1.8105, "step": 599 }, { "epoch": 0.07, "grad_norm": 7.688665438041219, "learning_rate": 9.948085774934932e-06, "loss": 1.442, "step": 600 }, { "epoch": 0.07, "grad_norm": 8.483547732095696, "learning_rate": 9.947795852711886e-06, "loss": 2.017, "step": 601 }, { "epoch": 0.07, "grad_norm": 8.742022185616351, "learning_rate": 9.947505127434282e-06, "loss": 1.7758, "step": 602 }, { "epoch": 0.07, "grad_norm": 7.742785339268938, "learning_rate": 9.94721359914931e-06, "loss": 1.7082, "step": 603 }, { "epoch": 0.08, "grad_norm": 7.919268129663995, "learning_rate": 9.946921267904285e-06, "loss": 1.8278, "step": 604 }, { "epoch": 0.08, "grad_norm": 8.868643849139634, "learning_rate": 9.946628133746657e-06, "loss": 2.1134, "step": 605 }, { "epoch": 0.08, "grad_norm": 7.677572067619369, "learning_rate": 9.946334196724e-06, "loss": 1.9266, "step": 606 }, { "epoch": 0.08, "grad_norm": 7.556012021420873, "learning_rate": 9.94603945688402e-06, "loss": 1.6988, "step": 607 }, { "epoch": 0.08, "grad_norm": 7.255050855988665, "learning_rate": 9.945743914274557e-06, "loss": 1.6556, "step": 608 }, { "epoch": 0.08, "grad_norm": 8.595396799256125, "learning_rate": 9.945447568943582e-06, "loss": 1.5848, "step": 609 }, { "epoch": 0.08, "grad_norm": 7.892140560717609, "learning_rate": 9.94515042093919e-06, "loss": 1.761, "step": 610 }, { "epoch": 0.08, "grad_norm": 7.5897253808779315, "learning_rate": 9.944852470309612e-06, "loss": 1.7817, "step": 611 }, { "epoch": 0.08, "grad_norm": 7.7506823532960185, "learning_rate": 9.944553717103204e-06, "loss": 1.9466, "step": 612 }, { "epoch": 0.08, "grad_norm": 8.163179313616794, "learning_rate": 9.944254161368457e-06, "loss": 1.8926, "step": 613 }, { "epoch": 0.08, "grad_norm": 7.933831412877204, "learning_rate": 9.943953803153993e-06, "loss": 1.8219, "step": 614 }, { "epoch": 0.08, "grad_norm": 8.228901623949463, "learning_rate": 9.943652642508556e-06, "loss": 1.6554, "step": 615 }, { "epoch": 0.08, "grad_norm": 8.120402739841031, "learning_rate": 9.943350679481032e-06, "loss": 1.5776, "step": 616 }, { "epoch": 0.08, "grad_norm": 8.030596104599914, "learning_rate": 9.943047914120426e-06, "loss": 1.8257, "step": 617 }, { "epoch": 0.08, "grad_norm": 7.7012592239811655, "learning_rate": 9.942744346475884e-06, "loss": 1.7434, "step": 618 }, { "epoch": 0.08, "grad_norm": 7.624363852872998, "learning_rate": 9.942439976596672e-06, "loss": 1.92, "step": 619 }, { "epoch": 0.08, "grad_norm": 7.618497856849879, "learning_rate": 9.942134804532194e-06, "loss": 1.796, "step": 620 }, { "epoch": 0.08, "grad_norm": 8.422886130825956, "learning_rate": 9.941828830331978e-06, "loss": 1.8338, "step": 621 }, { "epoch": 0.08, "grad_norm": 8.36033241857807, "learning_rate": 9.941522054045689e-06, "loss": 1.8963, "step": 622 }, { "epoch": 0.08, "grad_norm": 9.255745011634112, "learning_rate": 9.941214475723116e-06, "loss": 1.7798, "step": 623 }, { "epoch": 0.08, "grad_norm": 7.137464759526632, "learning_rate": 9.940906095414182e-06, "loss": 1.8252, "step": 624 }, { "epoch": 0.08, "grad_norm": 7.583765700987652, "learning_rate": 9.940596913168938e-06, "loss": 1.7738, "step": 625 }, { "epoch": 0.08, "grad_norm": 7.334789973440467, "learning_rate": 9.940286929037567e-06, "loss": 1.8427, "step": 626 }, { "epoch": 0.08, "grad_norm": 6.674243206698386, "learning_rate": 9.939976143070378e-06, "loss": 1.6447, "step": 627 }, { "epoch": 0.08, "grad_norm": 7.989154443940641, "learning_rate": 9.939664555317818e-06, "loss": 1.8788, "step": 628 }, { "epoch": 0.08, "grad_norm": 7.142556172404867, "learning_rate": 9.939352165830454e-06, "loss": 1.8338, "step": 629 }, { "epoch": 0.08, "grad_norm": 7.504269494685725, "learning_rate": 9.939038974658994e-06, "loss": 1.8834, "step": 630 }, { "epoch": 0.08, "grad_norm": 7.26707488004708, "learning_rate": 9.938724981854268e-06, "loss": 1.8406, "step": 631 }, { "epoch": 0.08, "grad_norm": 7.814466844185806, "learning_rate": 9.938410187467238e-06, "loss": 1.8269, "step": 632 }, { "epoch": 0.08, "grad_norm": 7.449433181561858, "learning_rate": 9.938094591548999e-06, "loss": 1.7237, "step": 633 }, { "epoch": 0.08, "grad_norm": 7.46332512455563, "learning_rate": 9.937778194150771e-06, "loss": 1.9577, "step": 634 }, { "epoch": 0.08, "grad_norm": 7.820486264168295, "learning_rate": 9.93746099532391e-06, "loss": 1.8517, "step": 635 }, { "epoch": 0.08, "grad_norm": 9.01603438639033, "learning_rate": 9.937142995119897e-06, "loss": 1.9267, "step": 636 }, { "epoch": 0.08, "grad_norm": 7.830708260381816, "learning_rate": 9.936824193590346e-06, "loss": 2.0202, "step": 637 }, { "epoch": 0.08, "grad_norm": 22.361446186821112, "learning_rate": 9.936504590787002e-06, "loss": 1.8344, "step": 638 }, { "epoch": 0.08, "grad_norm": 7.936939084438591, "learning_rate": 9.936184186761737e-06, "loss": 1.7858, "step": 639 }, { "epoch": 0.08, "grad_norm": 6.8766767335850405, "learning_rate": 9.93586298156655e-06, "loss": 1.7821, "step": 640 }, { "epoch": 0.08, "grad_norm": 7.435169419024877, "learning_rate": 9.935540975253582e-06, "loss": 1.9741, "step": 641 }, { "epoch": 0.08, "grad_norm": 7.147872566180323, "learning_rate": 9.935218167875093e-06, "loss": 1.8577, "step": 642 }, { "epoch": 0.08, "grad_norm": 7.777160560974835, "learning_rate": 9.934894559483475e-06, "loss": 1.8773, "step": 643 }, { "epoch": 0.08, "grad_norm": 7.096228660263305, "learning_rate": 9.934570150131251e-06, "loss": 1.8342, "step": 644 }, { "epoch": 0.08, "grad_norm": 8.124881083881832, "learning_rate": 9.934244939871078e-06, "loss": 1.7322, "step": 645 }, { "epoch": 0.08, "grad_norm": 7.206135944059945, "learning_rate": 9.933918928755739e-06, "loss": 1.9424, "step": 646 }, { "epoch": 0.08, "grad_norm": 7.843045165171175, "learning_rate": 9.933592116838141e-06, "loss": 1.852, "step": 647 }, { "epoch": 0.08, "grad_norm": 8.437197420135524, "learning_rate": 9.933264504171337e-06, "loss": 1.9489, "step": 648 }, { "epoch": 0.08, "grad_norm": 7.050173677306107, "learning_rate": 9.932936090808494e-06, "loss": 1.7422, "step": 649 }, { "epoch": 0.08, "grad_norm": 6.611959016028743, "learning_rate": 9.932606876802916e-06, "loss": 1.693, "step": 650 }, { "epoch": 0.08, "grad_norm": 9.06272704562109, "learning_rate": 9.932276862208038e-06, "loss": 1.783, "step": 651 }, { "epoch": 0.08, "grad_norm": 8.46784380857866, "learning_rate": 9.931946047077422e-06, "loss": 1.7957, "step": 652 }, { "epoch": 0.08, "grad_norm": 7.4829791504333345, "learning_rate": 9.931614431464763e-06, "loss": 1.8121, "step": 653 }, { "epoch": 0.08, "grad_norm": 6.911302767350926, "learning_rate": 9.93128201542388e-06, "loss": 1.804, "step": 654 }, { "epoch": 0.08, "grad_norm": 7.204181425702822, "learning_rate": 9.930948799008728e-06, "loss": 1.9467, "step": 655 }, { "epoch": 0.08, "grad_norm": 8.5091312795221, "learning_rate": 9.930614782273392e-06, "loss": 1.9989, "step": 656 }, { "epoch": 0.08, "grad_norm": 6.64559342116815, "learning_rate": 9.930279965272082e-06, "loss": 1.8474, "step": 657 }, { "epoch": 0.08, "grad_norm": 6.47787351713488, "learning_rate": 9.929944348059143e-06, "loss": 1.6872, "step": 658 }, { "epoch": 0.08, "grad_norm": 6.405015668239739, "learning_rate": 9.929607930689046e-06, "loss": 1.6765, "step": 659 }, { "epoch": 0.08, "grad_norm": 7.643429010428409, "learning_rate": 9.929270713216393e-06, "loss": 1.793, "step": 660 }, { "epoch": 0.08, "grad_norm": 7.463849497553075, "learning_rate": 9.928932695695917e-06, "loss": 1.6619, "step": 661 }, { "epoch": 0.08, "grad_norm": 8.31872743150783, "learning_rate": 9.92859387818248e-06, "loss": 1.7943, "step": 662 }, { "epoch": 0.08, "grad_norm": 7.983990726844712, "learning_rate": 9.928254260731073e-06, "loss": 1.9603, "step": 663 }, { "epoch": 0.08, "grad_norm": 7.004499078695481, "learning_rate": 9.927913843396822e-06, "loss": 2.0045, "step": 664 }, { "epoch": 0.08, "grad_norm": 7.376919574255647, "learning_rate": 9.927572626234973e-06, "loss": 1.7337, "step": 665 }, { "epoch": 0.08, "grad_norm": 7.365781024467354, "learning_rate": 9.927230609300908e-06, "loss": 1.7455, "step": 666 }, { "epoch": 0.08, "grad_norm": 7.841233367122626, "learning_rate": 9.926887792650143e-06, "loss": 2.0133, "step": 667 }, { "epoch": 0.08, "grad_norm": 7.952807605142409, "learning_rate": 9.926544176338315e-06, "loss": 2.0434, "step": 668 }, { "epoch": 0.08, "grad_norm": 7.012883400554871, "learning_rate": 9.926199760421196e-06, "loss": 1.9161, "step": 669 }, { "epoch": 0.08, "grad_norm": 7.589730363167433, "learning_rate": 9.925854544954684e-06, "loss": 1.8519, "step": 670 }, { "epoch": 0.08, "grad_norm": 7.218094708145925, "learning_rate": 9.925508529994816e-06, "loss": 1.7063, "step": 671 }, { "epoch": 0.08, "grad_norm": 7.155872399834244, "learning_rate": 9.925161715597745e-06, "loss": 1.8323, "step": 672 }, { "epoch": 0.08, "grad_norm": 8.417688211388366, "learning_rate": 9.924814101819766e-06, "loss": 1.9481, "step": 673 }, { "epoch": 0.08, "grad_norm": 9.091196371660361, "learning_rate": 9.924465688717294e-06, "loss": 1.9866, "step": 674 }, { "epoch": 0.08, "grad_norm": 7.9182038338788026, "learning_rate": 9.924116476346882e-06, "loss": 1.7705, "step": 675 }, { "epoch": 0.08, "grad_norm": 8.35146748102017, "learning_rate": 9.923766464765208e-06, "loss": 1.7127, "step": 676 }, { "epoch": 0.08, "grad_norm": 6.972686120385891, "learning_rate": 9.92341565402908e-06, "loss": 1.6751, "step": 677 }, { "epoch": 0.08, "grad_norm": 6.780118970363044, "learning_rate": 9.923064044195438e-06, "loss": 1.5396, "step": 678 }, { "epoch": 0.08, "grad_norm": 8.417342806527268, "learning_rate": 9.92271163532135e-06, "loss": 1.9459, "step": 679 }, { "epoch": 0.08, "grad_norm": 8.19850059829785, "learning_rate": 9.922358427464013e-06, "loss": 1.886, "step": 680 }, { "epoch": 0.08, "grad_norm": 7.710978409684634, "learning_rate": 9.922004420680755e-06, "loss": 1.942, "step": 681 }, { "epoch": 0.08, "grad_norm": 7.521534277781298, "learning_rate": 9.921649615029035e-06, "loss": 1.8354, "step": 682 }, { "epoch": 0.08, "grad_norm": 6.784484234878621, "learning_rate": 9.921294010566434e-06, "loss": 1.8656, "step": 683 }, { "epoch": 0.09, "grad_norm": 7.685813519990028, "learning_rate": 9.920937607350677e-06, "loss": 1.7572, "step": 684 }, { "epoch": 0.09, "grad_norm": 7.200543692213148, "learning_rate": 9.920580405439603e-06, "loss": 1.671, "step": 685 }, { "epoch": 0.09, "grad_norm": 8.004625981946276, "learning_rate": 9.92022240489119e-06, "loss": 1.8915, "step": 686 }, { "epoch": 0.09, "grad_norm": 8.115694350724718, "learning_rate": 9.919863605763547e-06, "loss": 1.9619, "step": 687 }, { "epoch": 0.09, "grad_norm": 6.548507677700247, "learning_rate": 9.919504008114906e-06, "loss": 1.8059, "step": 688 }, { "epoch": 0.09, "grad_norm": 10.079487622265212, "learning_rate": 9.91914361200363e-06, "loss": 1.8281, "step": 689 }, { "epoch": 0.09, "grad_norm": 7.074863311008134, "learning_rate": 9.918782417488216e-06, "loss": 1.689, "step": 690 }, { "epoch": 0.09, "grad_norm": 9.273587557436947, "learning_rate": 9.918420424627289e-06, "loss": 1.9633, "step": 691 }, { "epoch": 0.09, "grad_norm": 7.278123936092388, "learning_rate": 9.918057633479598e-06, "loss": 1.7078, "step": 692 }, { "epoch": 0.09, "grad_norm": 8.360386634190833, "learning_rate": 9.91769404410403e-06, "loss": 1.9771, "step": 693 }, { "epoch": 0.09, "grad_norm": 7.172264848964162, "learning_rate": 9.917329656559595e-06, "loss": 1.7041, "step": 694 }, { "epoch": 0.09, "grad_norm": 8.004007625732703, "learning_rate": 9.916964470905438e-06, "loss": 2.0367, "step": 695 }, { "epoch": 0.09, "grad_norm": 6.936352651997473, "learning_rate": 9.916598487200827e-06, "loss": 1.8393, "step": 696 }, { "epoch": 0.09, "grad_norm": 7.170329837391496, "learning_rate": 9.916231705505166e-06, "loss": 1.7549, "step": 697 }, { "epoch": 0.09, "grad_norm": 7.404897782756866, "learning_rate": 9.915864125877984e-06, "loss": 1.909, "step": 698 }, { "epoch": 0.09, "grad_norm": 8.239530568201257, "learning_rate": 9.915495748378943e-06, "loss": 2.0506, "step": 699 }, { "epoch": 0.09, "grad_norm": 8.155256021085108, "learning_rate": 9.91512657306783e-06, "loss": 1.7724, "step": 700 }, { "epoch": 0.09, "grad_norm": 8.281402692002338, "learning_rate": 9.914756600004567e-06, "loss": 1.853, "step": 701 }, { "epoch": 0.09, "grad_norm": 8.053618186891875, "learning_rate": 9.914385829249203e-06, "loss": 1.727, "step": 702 }, { "epoch": 0.09, "grad_norm": 8.592662623419198, "learning_rate": 9.914014260861913e-06, "loss": 2.2241, "step": 703 }, { "epoch": 0.09, "grad_norm": 8.380929073000495, "learning_rate": 9.913641894903006e-06, "loss": 1.4893, "step": 704 }, { "epoch": 0.09, "grad_norm": 9.896329621357431, "learning_rate": 9.91326873143292e-06, "loss": 1.992, "step": 705 }, { "epoch": 0.09, "grad_norm": 8.77687716226877, "learning_rate": 9.91289477051222e-06, "loss": 1.8, "step": 706 }, { "epoch": 0.09, "grad_norm": 9.542124723427904, "learning_rate": 9.912520012201603e-06, "loss": 1.8672, "step": 707 }, { "epoch": 0.09, "grad_norm": 7.992677571427686, "learning_rate": 9.912144456561894e-06, "loss": 1.99, "step": 708 }, { "epoch": 0.09, "grad_norm": 8.517096882392416, "learning_rate": 9.911768103654047e-06, "loss": 2.0293, "step": 709 }, { "epoch": 0.09, "grad_norm": 7.608548926868875, "learning_rate": 9.911390953539149e-06, "loss": 1.9396, "step": 710 }, { "epoch": 0.09, "grad_norm": 8.07077027873993, "learning_rate": 9.91101300627841e-06, "loss": 1.6014, "step": 711 }, { "epoch": 0.09, "grad_norm": 6.977005605607578, "learning_rate": 9.910634261933175e-06, "loss": 1.8582, "step": 712 }, { "epoch": 0.09, "grad_norm": 9.07739253628511, "learning_rate": 9.910254720564914e-06, "loss": 1.6153, "step": 713 }, { "epoch": 0.09, "grad_norm": 7.30722594892479, "learning_rate": 9.909874382235232e-06, "loss": 1.696, "step": 714 }, { "epoch": 0.09, "grad_norm": 7.564303504703151, "learning_rate": 9.909493247005858e-06, "loss": 1.7105, "step": 715 }, { "epoch": 0.09, "grad_norm": 7.933602961430256, "learning_rate": 9.909111314938654e-06, "loss": 1.7253, "step": 716 }, { "epoch": 0.09, "grad_norm": 7.360634300877582, "learning_rate": 9.908728586095607e-06, "loss": 1.5283, "step": 717 }, { "epoch": 0.09, "grad_norm": 7.287250700485657, "learning_rate": 9.908345060538838e-06, "loss": 1.7892, "step": 718 }, { "epoch": 0.09, "grad_norm": 7.688045081870204, "learning_rate": 9.907960738330593e-06, "loss": 1.8818, "step": 719 }, { "epoch": 0.09, "grad_norm": 6.641012490896776, "learning_rate": 9.907575619533253e-06, "loss": 1.834, "step": 720 }, { "epoch": 0.09, "grad_norm": 7.828379378208179, "learning_rate": 9.907189704209324e-06, "loss": 1.8902, "step": 721 }, { "epoch": 0.09, "grad_norm": 7.7454405862026485, "learning_rate": 9.906802992421438e-06, "loss": 1.7681, "step": 722 }, { "epoch": 0.09, "grad_norm": 8.778983246598786, "learning_rate": 9.906415484232368e-06, "loss": 1.8255, "step": 723 }, { "epoch": 0.09, "grad_norm": 7.325525020336499, "learning_rate": 9.906027179705003e-06, "loss": 1.8343, "step": 724 }, { "epoch": 0.09, "grad_norm": 8.140924225053409, "learning_rate": 9.905638078902367e-06, "loss": 1.9752, "step": 725 }, { "epoch": 0.09, "grad_norm": 8.962936417204293, "learning_rate": 9.905248181887617e-06, "loss": 1.7868, "step": 726 }, { "epoch": 0.09, "grad_norm": 7.559215519894962, "learning_rate": 9.90485748872403e-06, "loss": 1.8597, "step": 727 }, { "epoch": 0.09, "grad_norm": 8.282629795422684, "learning_rate": 9.904465999475024e-06, "loss": 1.5891, "step": 728 }, { "epoch": 0.09, "grad_norm": 8.482278941886218, "learning_rate": 9.904073714204137e-06, "loss": 1.9475, "step": 729 }, { "epoch": 0.09, "grad_norm": 6.830656950918769, "learning_rate": 9.903680632975036e-06, "loss": 1.5649, "step": 730 }, { "epoch": 0.09, "grad_norm": 8.039576386563544, "learning_rate": 9.903286755851525e-06, "loss": 2.0328, "step": 731 }, { "epoch": 0.09, "grad_norm": 7.53825536108382, "learning_rate": 9.90289208289753e-06, "loss": 1.6605, "step": 732 }, { "epoch": 0.09, "grad_norm": 7.3194546545456864, "learning_rate": 9.90249661417711e-06, "loss": 1.7956, "step": 733 }, { "epoch": 0.09, "grad_norm": 7.399565852119248, "learning_rate": 9.90210034975445e-06, "loss": 1.8356, "step": 734 }, { "epoch": 0.09, "grad_norm": 7.388005197996601, "learning_rate": 9.901703289693867e-06, "loss": 1.7701, "step": 735 }, { "epoch": 0.09, "grad_norm": 7.016501297693813, "learning_rate": 9.901305434059804e-06, "loss": 1.6189, "step": 736 }, { "epoch": 0.09, "grad_norm": 8.902797516355466, "learning_rate": 9.90090678291684e-06, "loss": 1.5977, "step": 737 }, { "epoch": 0.09, "grad_norm": 13.802152336745285, "learning_rate": 9.900507336329672e-06, "loss": 1.9029, "step": 738 }, { "epoch": 0.09, "grad_norm": 8.068631638562342, "learning_rate": 9.900107094363139e-06, "loss": 1.8235, "step": 739 }, { "epoch": 0.09, "grad_norm": 7.608126399684696, "learning_rate": 9.899706057082197e-06, "loss": 1.7185, "step": 740 }, { "epoch": 0.09, "grad_norm": 8.049715337976656, "learning_rate": 9.899304224551937e-06, "loss": 1.6892, "step": 741 }, { "epoch": 0.09, "grad_norm": 8.711958317394519, "learning_rate": 9.898901596837582e-06, "loss": 1.8057, "step": 742 }, { "epoch": 0.09, "grad_norm": 9.207298402898415, "learning_rate": 9.89849817400448e-06, "loss": 1.9835, "step": 743 }, { "epoch": 0.09, "grad_norm": 7.912951755227244, "learning_rate": 9.898093956118106e-06, "loss": 1.687, "step": 744 }, { "epoch": 0.09, "grad_norm": 6.825960445889269, "learning_rate": 9.897688943244071e-06, "loss": 1.7599, "step": 745 }, { "epoch": 0.09, "grad_norm": 7.4333692766121775, "learning_rate": 9.897283135448106e-06, "loss": 1.5217, "step": 746 }, { "epoch": 0.09, "grad_norm": 8.236399894055404, "learning_rate": 9.896876532796078e-06, "loss": 1.588, "step": 747 }, { "epoch": 0.09, "grad_norm": 6.744873708966185, "learning_rate": 9.89646913535398e-06, "loss": 1.822, "step": 748 }, { "epoch": 0.09, "grad_norm": 7.277362001336439, "learning_rate": 9.89606094318794e-06, "loss": 1.982, "step": 749 }, { "epoch": 0.09, "grad_norm": 8.401131648854218, "learning_rate": 9.895651956364203e-06, "loss": 1.7802, "step": 750 }, { "epoch": 0.09, "grad_norm": 6.78890171732429, "learning_rate": 9.895242174949153e-06, "loss": 1.9999, "step": 751 }, { "epoch": 0.09, "grad_norm": 7.961685776051477, "learning_rate": 9.894831599009298e-06, "loss": 1.9766, "step": 752 }, { "epoch": 0.09, "grad_norm": 7.743811050804551, "learning_rate": 9.89442022861128e-06, "loss": 1.7278, "step": 753 }, { "epoch": 0.09, "grad_norm": 8.210259453824982, "learning_rate": 9.894008063821863e-06, "loss": 2.0188, "step": 754 }, { "epoch": 0.09, "grad_norm": 6.939775283743842, "learning_rate": 9.893595104707947e-06, "loss": 1.5905, "step": 755 }, { "epoch": 0.09, "grad_norm": 7.564925755978224, "learning_rate": 9.893181351336554e-06, "loss": 1.8423, "step": 756 }, { "epoch": 0.09, "grad_norm": 7.533438411971866, "learning_rate": 9.892766803774842e-06, "loss": 1.817, "step": 757 }, { "epoch": 0.09, "grad_norm": 8.391726404807697, "learning_rate": 9.892351462090093e-06, "loss": 1.6395, "step": 758 }, { "epoch": 0.09, "grad_norm": 6.6679391078697146, "learning_rate": 9.891935326349718e-06, "loss": 1.8491, "step": 759 }, { "epoch": 0.09, "grad_norm": 7.009658728974396, "learning_rate": 9.891518396621257e-06, "loss": 1.8423, "step": 760 }, { "epoch": 0.09, "grad_norm": 7.220180273638715, "learning_rate": 9.891100672972385e-06, "loss": 1.8116, "step": 761 }, { "epoch": 0.09, "grad_norm": 7.6265530145738305, "learning_rate": 9.8906821554709e-06, "loss": 1.7732, "step": 762 }, { "epoch": 0.09, "grad_norm": 7.296761929875281, "learning_rate": 9.890262844184723e-06, "loss": 1.5212, "step": 763 }, { "epoch": 0.1, "grad_norm": 7.495230444629777, "learning_rate": 9.889842739181915e-06, "loss": 1.8767, "step": 764 }, { "epoch": 0.1, "grad_norm": 6.380320840970532, "learning_rate": 9.889421840530665e-06, "loss": 1.8463, "step": 765 }, { "epoch": 0.1, "grad_norm": 6.4612408502577665, "learning_rate": 9.889000148299282e-06, "loss": 1.4066, "step": 766 }, { "epoch": 0.1, "grad_norm": 7.16717642398613, "learning_rate": 9.888577662556211e-06, "loss": 1.9568, "step": 767 }, { "epoch": 0.1, "grad_norm": 7.611933051296303, "learning_rate": 9.888154383370023e-06, "loss": 1.7591, "step": 768 }, { "epoch": 0.1, "grad_norm": 7.220486076569932, "learning_rate": 9.88773031080942e-06, "loss": 1.5654, "step": 769 }, { "epoch": 0.1, "grad_norm": 7.344632590803972, "learning_rate": 9.887305444943229e-06, "loss": 1.6683, "step": 770 }, { "epoch": 0.1, "grad_norm": 8.03471587537762, "learning_rate": 9.886879785840412e-06, "loss": 1.8343, "step": 771 }, { "epoch": 0.1, "grad_norm": 8.239616131677518, "learning_rate": 9.886453333570049e-06, "loss": 2.0192, "step": 772 }, { "epoch": 0.1, "grad_norm": 8.868664150681909, "learning_rate": 9.886026088201362e-06, "loss": 1.7092, "step": 773 }, { "epoch": 0.1, "grad_norm": 8.153381853595867, "learning_rate": 9.885598049803693e-06, "loss": 1.6842, "step": 774 }, { "epoch": 0.1, "grad_norm": 6.954115872350479, "learning_rate": 9.885169218446516e-06, "loss": 1.7296, "step": 775 }, { "epoch": 0.1, "grad_norm": 9.14246343801533, "learning_rate": 9.884739594199429e-06, "loss": 1.9249, "step": 776 }, { "epoch": 0.1, "grad_norm": 6.65568729830004, "learning_rate": 9.884309177132168e-06, "loss": 1.5506, "step": 777 }, { "epoch": 0.1, "grad_norm": 7.429887723415327, "learning_rate": 9.883877967314586e-06, "loss": 1.9914, "step": 778 }, { "epoch": 0.1, "grad_norm": 8.059904019345982, "learning_rate": 9.883445964816676e-06, "loss": 1.9089, "step": 779 }, { "epoch": 0.1, "grad_norm": 6.693322091320568, "learning_rate": 9.883013169708552e-06, "loss": 1.8039, "step": 780 }, { "epoch": 0.1, "grad_norm": 7.957015597505622, "learning_rate": 9.882579582060459e-06, "loss": 1.9328, "step": 781 }, { "epoch": 0.1, "grad_norm": 7.6441199101153945, "learning_rate": 9.882145201942773e-06, "loss": 1.9085, "step": 782 }, { "epoch": 0.1, "grad_norm": 7.1024253487518285, "learning_rate": 9.88171002942599e-06, "loss": 2.0352, "step": 783 }, { "epoch": 0.1, "grad_norm": 6.294099570462909, "learning_rate": 9.881274064580748e-06, "loss": 1.936, "step": 784 }, { "epoch": 0.1, "grad_norm": 8.131425161139253, "learning_rate": 9.880837307477805e-06, "loss": 1.7527, "step": 785 }, { "epoch": 0.1, "grad_norm": 7.669184868781733, "learning_rate": 9.880399758188044e-06, "loss": 1.8622, "step": 786 }, { "epoch": 0.1, "grad_norm": 7.006490574925973, "learning_rate": 9.879961416782489e-06, "loss": 1.6554, "step": 787 }, { "epoch": 0.1, "grad_norm": 7.55639503286709, "learning_rate": 9.87952228333228e-06, "loss": 1.8701, "step": 788 }, { "epoch": 0.1, "grad_norm": 7.577582464210116, "learning_rate": 9.879082357908692e-06, "loss": 1.9413, "step": 789 }, { "epoch": 0.1, "grad_norm": 7.400894615306607, "learning_rate": 9.878641640583128e-06, "loss": 1.9553, "step": 790 }, { "epoch": 0.1, "grad_norm": 7.149555333138408, "learning_rate": 9.878200131427118e-06, "loss": 2.0987, "step": 791 }, { "epoch": 0.1, "grad_norm": 7.465305147069488, "learning_rate": 9.877757830512323e-06, "loss": 1.8801, "step": 792 }, { "epoch": 0.1, "grad_norm": 7.322285567444295, "learning_rate": 9.87731473791053e-06, "loss": 1.6657, "step": 793 }, { "epoch": 0.1, "grad_norm": 7.28441421664014, "learning_rate": 9.876870853693655e-06, "loss": 1.799, "step": 794 }, { "epoch": 0.1, "grad_norm": 7.099280999667437, "learning_rate": 9.876426177933743e-06, "loss": 2.0773, "step": 795 }, { "epoch": 0.1, "grad_norm": 8.100809008491037, "learning_rate": 9.875980710702968e-06, "loss": 1.9588, "step": 796 }, { "epoch": 0.1, "grad_norm": 6.815821291988421, "learning_rate": 9.875534452073629e-06, "loss": 1.9137, "step": 797 }, { "epoch": 0.1, "grad_norm": 6.483295988111469, "learning_rate": 9.87508740211816e-06, "loss": 1.9391, "step": 798 }, { "epoch": 0.1, "grad_norm": 7.961333570863055, "learning_rate": 9.874639560909118e-06, "loss": 1.7139, "step": 799 }, { "epoch": 0.1, "grad_norm": 8.395821149336184, "learning_rate": 9.874190928519191e-06, "loss": 1.7457, "step": 800 }, { "epoch": 0.1, "grad_norm": 6.869379634849202, "learning_rate": 9.873741505021193e-06, "loss": 1.7928, "step": 801 }, { "epoch": 0.1, "grad_norm": 8.05494670548235, "learning_rate": 9.873291290488068e-06, "loss": 1.7194, "step": 802 }, { "epoch": 0.1, "grad_norm": 7.3956869371291765, "learning_rate": 9.872840284992891e-06, "loss": 1.8195, "step": 803 }, { "epoch": 0.1, "grad_norm": 7.88271679216923, "learning_rate": 9.872388488608858e-06, "loss": 1.9104, "step": 804 }, { "epoch": 0.1, "grad_norm": 6.817958906862914, "learning_rate": 9.871935901409302e-06, "loss": 1.6243, "step": 805 }, { "epoch": 0.1, "grad_norm": 8.342114657119335, "learning_rate": 9.871482523467679e-06, "loss": 1.8271, "step": 806 }, { "epoch": 0.1, "grad_norm": 7.817734039246238, "learning_rate": 9.871028354857574e-06, "loss": 1.8949, "step": 807 }, { "epoch": 0.1, "grad_norm": 6.8869342294091735, "learning_rate": 9.870573395652703e-06, "loss": 2.0126, "step": 808 }, { "epoch": 0.1, "grad_norm": 6.499708227755971, "learning_rate": 9.870117645926907e-06, "loss": 1.8201, "step": 809 }, { "epoch": 0.1, "grad_norm": 7.738442766481709, "learning_rate": 9.869661105754155e-06, "loss": 1.8312, "step": 810 }, { "epoch": 0.1, "grad_norm": 7.997501102898605, "learning_rate": 9.86920377520855e-06, "loss": 1.906, "step": 811 }, { "epoch": 0.1, "grad_norm": 6.831561987291755, "learning_rate": 9.868745654364318e-06, "loss": 1.8651, "step": 812 }, { "epoch": 0.1, "grad_norm": 6.3531704032789165, "learning_rate": 9.868286743295813e-06, "loss": 1.6929, "step": 813 }, { "epoch": 0.1, "grad_norm": 8.314646543918808, "learning_rate": 9.867827042077518e-06, "loss": 1.7653, "step": 814 }, { "epoch": 0.1, "grad_norm": 8.1165560526523, "learning_rate": 9.867366550784048e-06, "loss": 1.8323, "step": 815 }, { "epoch": 0.1, "grad_norm": 7.450042122020573, "learning_rate": 9.866905269490141e-06, "loss": 1.8212, "step": 816 }, { "epoch": 0.1, "grad_norm": 7.6182057817058375, "learning_rate": 9.866443198270665e-06, "loss": 1.831, "step": 817 }, { "epoch": 0.1, "grad_norm": 7.351089290850685, "learning_rate": 9.865980337200622e-06, "loss": 1.8069, "step": 818 }, { "epoch": 0.1, "grad_norm": 7.4898871468230155, "learning_rate": 9.86551668635513e-06, "loss": 1.6628, "step": 819 }, { "epoch": 0.1, "grad_norm": 7.4995974568850965, "learning_rate": 9.865052245809446e-06, "loss": 1.7321, "step": 820 }, { "epoch": 0.1, "grad_norm": 6.880267849581373, "learning_rate": 9.86458701563895e-06, "loss": 1.8092, "step": 821 }, { "epoch": 0.1, "grad_norm": 6.90236406390767, "learning_rate": 9.864120995919151e-06, "loss": 1.7086, "step": 822 }, { "epoch": 0.1, "grad_norm": 6.485606775229837, "learning_rate": 9.863654186725688e-06, "loss": 1.8314, "step": 823 }, { "epoch": 0.1, "grad_norm": 7.280422250136689, "learning_rate": 9.863186588134327e-06, "loss": 1.7791, "step": 824 }, { "epoch": 0.1, "grad_norm": 7.822808255444516, "learning_rate": 9.862718200220959e-06, "loss": 1.6529, "step": 825 }, { "epoch": 0.1, "grad_norm": 7.278082216497208, "learning_rate": 9.86224902306161e-06, "loss": 1.8411, "step": 826 }, { "epoch": 0.1, "grad_norm": 8.077504992777273, "learning_rate": 9.861779056732427e-06, "loss": 1.8274, "step": 827 }, { "epoch": 0.1, "grad_norm": 7.2849251610852255, "learning_rate": 9.861308301309689e-06, "loss": 1.9468, "step": 828 }, { "epoch": 0.1, "grad_norm": 6.988992748904001, "learning_rate": 9.8608367568698e-06, "loss": 1.8404, "step": 829 }, { "epoch": 0.1, "grad_norm": 7.552410717632815, "learning_rate": 9.860364423489299e-06, "loss": 1.8837, "step": 830 }, { "epoch": 0.1, "grad_norm": 7.667748747532554, "learning_rate": 9.859891301244844e-06, "loss": 1.6102, "step": 831 }, { "epoch": 0.1, "grad_norm": 7.397925893314788, "learning_rate": 9.859417390213228e-06, "loss": 1.806, "step": 832 }, { "epoch": 0.1, "grad_norm": 7.356527228623808, "learning_rate": 9.85894269047137e-06, "loss": 1.7988, "step": 833 }, { "epoch": 0.1, "grad_norm": 8.079618739512732, "learning_rate": 9.858467202096314e-06, "loss": 1.7826, "step": 834 }, { "epoch": 0.1, "grad_norm": 7.175372809146668, "learning_rate": 9.857990925165234e-06, "loss": 1.8618, "step": 835 }, { "epoch": 0.1, "grad_norm": 7.2168874270335905, "learning_rate": 9.857513859755434e-06, "loss": 1.6809, "step": 836 }, { "epoch": 0.1, "grad_norm": 9.74488538353162, "learning_rate": 9.857036005944344e-06, "loss": 1.7269, "step": 837 }, { "epoch": 0.1, "grad_norm": 7.58292880116753, "learning_rate": 9.856557363809523e-06, "loss": 2.0034, "step": 838 }, { "epoch": 0.1, "grad_norm": 7.793679701212753, "learning_rate": 9.856077933428655e-06, "loss": 1.8998, "step": 839 }, { "epoch": 0.1, "grad_norm": 8.179861106856494, "learning_rate": 9.855597714879557e-06, "loss": 2.0289, "step": 840 }, { "epoch": 0.1, "grad_norm": 8.576453854255236, "learning_rate": 9.855116708240168e-06, "loss": 1.6202, "step": 841 }, { "epoch": 0.1, "grad_norm": 6.746588472290053, "learning_rate": 9.854634913588563e-06, "loss": 1.7153, "step": 842 }, { "epoch": 0.1, "grad_norm": 8.894920248262858, "learning_rate": 9.854152331002934e-06, "loss": 1.8581, "step": 843 }, { "epoch": 0.1, "grad_norm": 7.952023660875249, "learning_rate": 9.853668960561611e-06, "loss": 1.7606, "step": 844 }, { "epoch": 0.11, "grad_norm": 7.784464705924824, "learning_rate": 9.853184802343045e-06, "loss": 1.7592, "step": 845 }, { "epoch": 0.11, "grad_norm": 7.091155462060067, "learning_rate": 9.85269985642582e-06, "loss": 1.8044, "step": 846 }, { "epoch": 0.11, "grad_norm": 6.853513415915793, "learning_rate": 9.852214122888645e-06, "loss": 1.7053, "step": 847 }, { "epoch": 0.11, "grad_norm": 7.991529052873785, "learning_rate": 9.851727601810354e-06, "loss": 1.6507, "step": 848 }, { "epoch": 0.11, "grad_norm": 7.04445142282865, "learning_rate": 9.851240293269917e-06, "loss": 1.4335, "step": 849 }, { "epoch": 0.11, "grad_norm": 7.616466980119407, "learning_rate": 9.850752197346425e-06, "loss": 1.6374, "step": 850 }, { "epoch": 0.11, "grad_norm": 7.036402154313197, "learning_rate": 9.850263314119095e-06, "loss": 1.7333, "step": 851 }, { "epoch": 0.11, "grad_norm": 9.331044180261781, "learning_rate": 9.84977364366728e-06, "loss": 1.8367, "step": 852 }, { "epoch": 0.11, "grad_norm": 7.52481499407342, "learning_rate": 9.849283186070456e-06, "loss": 1.612, "step": 853 }, { "epoch": 0.11, "grad_norm": 7.174396799558746, "learning_rate": 9.848791941408222e-06, "loss": 1.7796, "step": 854 }, { "epoch": 0.11, "grad_norm": 8.417246395411006, "learning_rate": 9.848299909760317e-06, "loss": 2.1233, "step": 855 }, { "epoch": 0.11, "grad_norm": 7.034854141278098, "learning_rate": 9.847807091206595e-06, "loss": 1.6401, "step": 856 }, { "epoch": 0.11, "grad_norm": 9.811491014117099, "learning_rate": 9.847313485827047e-06, "loss": 1.9974, "step": 857 }, { "epoch": 0.11, "grad_norm": 7.418814531693564, "learning_rate": 9.846819093701782e-06, "loss": 1.7804, "step": 858 }, { "epoch": 0.11, "grad_norm": 7.142648587637571, "learning_rate": 9.84632391491105e-06, "loss": 1.8832, "step": 859 }, { "epoch": 0.11, "grad_norm": 7.618965585713463, "learning_rate": 9.845827949535217e-06, "loss": 1.8542, "step": 860 }, { "epoch": 0.11, "grad_norm": 7.999323988818914, "learning_rate": 9.845331197654781e-06, "loss": 1.8806, "step": 861 }, { "epoch": 0.11, "grad_norm": 7.0494695736508834, "learning_rate": 9.844833659350368e-06, "loss": 1.795, "step": 862 }, { "epoch": 0.11, "grad_norm": 6.886076730184519, "learning_rate": 9.844335334702732e-06, "loss": 1.9933, "step": 863 }, { "epoch": 0.11, "grad_norm": 7.449660884030685, "learning_rate": 9.843836223792753e-06, "loss": 1.7091, "step": 864 }, { "epoch": 0.11, "grad_norm": 6.6904352106341936, "learning_rate": 9.84333632670144e-06, "loss": 1.8438, "step": 865 }, { "epoch": 0.11, "grad_norm": 7.584893237142008, "learning_rate": 9.84283564350993e-06, "loss": 1.8305, "step": 866 }, { "epoch": 0.11, "grad_norm": 7.315027095360408, "learning_rate": 9.842334174299484e-06, "loss": 1.7532, "step": 867 }, { "epoch": 0.11, "grad_norm": 7.377618069605411, "learning_rate": 9.841831919151495e-06, "loss": 1.714, "step": 868 }, { "epoch": 0.11, "grad_norm": 12.07108319259465, "learning_rate": 9.84132887814748e-06, "loss": 1.6632, "step": 869 }, { "epoch": 0.11, "grad_norm": 7.234257995304398, "learning_rate": 9.840825051369088e-06, "loss": 1.7681, "step": 870 }, { "epoch": 0.11, "grad_norm": 7.124268682622898, "learning_rate": 9.84032043889809e-06, "loss": 1.7277, "step": 871 }, { "epoch": 0.11, "grad_norm": 7.959469291892427, "learning_rate": 9.839815040816391e-06, "loss": 1.8048, "step": 872 }, { "epoch": 0.11, "grad_norm": 7.056871745821652, "learning_rate": 9.839308857206017e-06, "loss": 1.7335, "step": 873 }, { "epoch": 0.11, "grad_norm": 7.100515299024303, "learning_rate": 9.838801888149125e-06, "loss": 1.8276, "step": 874 }, { "epoch": 0.11, "grad_norm": 7.857170119746983, "learning_rate": 9.838294133728e-06, "loss": 1.6543, "step": 875 }, { "epoch": 0.11, "grad_norm": 8.791991341149531, "learning_rate": 9.837785594025053e-06, "loss": 1.7523, "step": 876 }, { "epoch": 0.11, "grad_norm": 7.52230188188387, "learning_rate": 9.83727626912282e-06, "loss": 1.6064, "step": 877 }, { "epoch": 0.11, "grad_norm": 7.421792925689372, "learning_rate": 9.836766159103973e-06, "loss": 1.6977, "step": 878 }, { "epoch": 0.11, "grad_norm": 20.858631055933618, "learning_rate": 9.8362552640513e-06, "loss": 1.9174, "step": 879 }, { "epoch": 0.11, "grad_norm": 7.032870187268035, "learning_rate": 9.835743584047724e-06, "loss": 1.896, "step": 880 }, { "epoch": 0.11, "grad_norm": 7.955353206072325, "learning_rate": 9.835231119176294e-06, "loss": 2.008, "step": 881 }, { "epoch": 0.11, "grad_norm": 7.191936782943567, "learning_rate": 9.834717869520185e-06, "loss": 1.5586, "step": 882 }, { "epoch": 0.11, "grad_norm": 7.672546874709677, "learning_rate": 9.834203835162702e-06, "loss": 1.7487, "step": 883 }, { "epoch": 0.11, "grad_norm": 8.23005943164236, "learning_rate": 9.833689016187273e-06, "loss": 1.6442, "step": 884 }, { "epoch": 0.11, "grad_norm": 8.60239983460352, "learning_rate": 9.83317341267746e-06, "loss": 1.7382, "step": 885 }, { "epoch": 0.11, "grad_norm": 7.623352239732228, "learning_rate": 9.832657024716944e-06, "loss": 1.9052, "step": 886 }, { "epoch": 0.11, "grad_norm": 6.79996639739033, "learning_rate": 9.83213985238954e-06, "loss": 1.9387, "step": 887 }, { "epoch": 0.11, "grad_norm": 7.259726414596907, "learning_rate": 9.831621895779187e-06, "loss": 1.8168, "step": 888 }, { "epoch": 0.11, "grad_norm": 7.730308369243172, "learning_rate": 9.831103154969953e-06, "loss": 1.9323, "step": 889 }, { "epoch": 0.11, "grad_norm": 8.247462658386887, "learning_rate": 9.83058363004603e-06, "loss": 1.9007, "step": 890 }, { "epoch": 0.11, "grad_norm": 7.8874523838423505, "learning_rate": 9.830063321091743e-06, "loss": 1.7693, "step": 891 }, { "epoch": 0.11, "grad_norm": 6.540229720235832, "learning_rate": 9.829542228191541e-06, "loss": 1.7944, "step": 892 }, { "epoch": 0.11, "grad_norm": 8.116356239811694, "learning_rate": 9.829020351429999e-06, "loss": 1.7768, "step": 893 }, { "epoch": 0.11, "grad_norm": 7.770980396219482, "learning_rate": 9.828497690891818e-06, "loss": 2.107, "step": 894 }, { "epoch": 0.11, "grad_norm": 7.7010180814401, "learning_rate": 9.827974246661834e-06, "loss": 1.6569, "step": 895 }, { "epoch": 0.11, "grad_norm": 7.364546026616483, "learning_rate": 9.827450018825e-06, "loss": 1.7637, "step": 896 }, { "epoch": 0.11, "grad_norm": 8.864397821882106, "learning_rate": 9.826925007466403e-06, "loss": 1.7274, "step": 897 }, { "epoch": 0.11, "grad_norm": 7.419132436253405, "learning_rate": 9.826399212671257e-06, "loss": 1.6992, "step": 898 }, { "epoch": 0.11, "grad_norm": 7.172763663664268, "learning_rate": 9.8258726345249e-06, "loss": 1.8366, "step": 899 }, { "epoch": 0.11, "grad_norm": 7.990265924490426, "learning_rate": 9.825345273112796e-06, "loss": 1.6259, "step": 900 }, { "epoch": 0.11, "grad_norm": 8.49945162510448, "learning_rate": 9.824817128520542e-06, "loss": 2.0134, "step": 901 }, { "epoch": 0.11, "grad_norm": 7.328479027400875, "learning_rate": 9.82428820083386e-06, "loss": 1.9119, "step": 902 }, { "epoch": 0.11, "grad_norm": 6.949307767161612, "learning_rate": 9.823758490138592e-06, "loss": 1.6601, "step": 903 }, { "epoch": 0.11, "grad_norm": 6.897167592395869, "learning_rate": 9.823227996520718e-06, "loss": 1.4281, "step": 904 }, { "epoch": 0.11, "grad_norm": 6.997007616209509, "learning_rate": 9.82269672006634e-06, "loss": 1.8117, "step": 905 }, { "epoch": 0.11, "grad_norm": 7.543929838870678, "learning_rate": 9.822164660861686e-06, "loss": 1.7831, "step": 906 }, { "epoch": 0.11, "grad_norm": 7.649819982856616, "learning_rate": 9.82163181899311e-06, "loss": 1.9338, "step": 907 }, { "epoch": 0.11, "grad_norm": 8.125607158956898, "learning_rate": 9.821098194547098e-06, "loss": 1.7358, "step": 908 }, { "epoch": 0.11, "grad_norm": 7.785993143957283, "learning_rate": 9.820563787610258e-06, "loss": 1.6364, "step": 909 }, { "epoch": 0.11, "grad_norm": 7.950515534419461, "learning_rate": 9.82002859826933e-06, "loss": 1.7589, "step": 910 }, { "epoch": 0.11, "grad_norm": 6.81126353992253, "learning_rate": 9.819492626611177e-06, "loss": 1.7142, "step": 911 }, { "epoch": 0.11, "grad_norm": 8.626578604227225, "learning_rate": 9.818955872722788e-06, "loss": 1.6011, "step": 912 }, { "epoch": 0.11, "grad_norm": 7.631063767921858, "learning_rate": 9.818418336691285e-06, "loss": 1.9177, "step": 913 }, { "epoch": 0.11, "grad_norm": 6.797037542520055, "learning_rate": 9.81788001860391e-06, "loss": 1.744, "step": 914 }, { "epoch": 0.11, "grad_norm": 7.490432819566713, "learning_rate": 9.817340918548035e-06, "loss": 1.6602, "step": 915 }, { "epoch": 0.11, "grad_norm": 7.320023061916937, "learning_rate": 9.81680103661116e-06, "loss": 1.8294, "step": 916 }, { "epoch": 0.11, "grad_norm": 8.688238947015845, "learning_rate": 9.816260372880912e-06, "loss": 1.5803, "step": 917 }, { "epoch": 0.11, "grad_norm": 7.636870659100165, "learning_rate": 9.815718927445042e-06, "loss": 1.916, "step": 918 }, { "epoch": 0.11, "grad_norm": 8.179540552820669, "learning_rate": 9.815176700391429e-06, "loss": 1.9573, "step": 919 }, { "epoch": 0.11, "grad_norm": 7.108138004886462, "learning_rate": 9.81463369180808e-06, "loss": 1.8218, "step": 920 }, { "epoch": 0.11, "grad_norm": 6.941010575046021, "learning_rate": 9.81408990178313e-06, "loss": 1.683, "step": 921 }, { "epoch": 0.11, "grad_norm": 8.002814276857036, "learning_rate": 9.813545330404839e-06, "loss": 1.6877, "step": 922 }, { "epoch": 0.11, "grad_norm": 6.752396703713316, "learning_rate": 9.81299997776159e-06, "loss": 1.7973, "step": 923 }, { "epoch": 0.11, "grad_norm": 8.495707443537148, "learning_rate": 9.8124538439419e-06, "loss": 1.9227, "step": 924 }, { "epoch": 0.12, "grad_norm": 7.228917313101763, "learning_rate": 9.811906929034412e-06, "loss": 1.9892, "step": 925 }, { "epoch": 0.12, "grad_norm": 7.8617891118576955, "learning_rate": 9.811359233127887e-06, "loss": 1.6027, "step": 926 }, { "epoch": 0.12, "grad_norm": 7.8075978811169655, "learning_rate": 9.810810756311224e-06, "loss": 2.0051, "step": 927 }, { "epoch": 0.12, "grad_norm": 8.420514343091714, "learning_rate": 9.810261498673441e-06, "loss": 1.8395, "step": 928 }, { "epoch": 0.12, "grad_norm": 8.331678426378517, "learning_rate": 9.809711460303687e-06, "loss": 1.625, "step": 929 }, { "epoch": 0.12, "grad_norm": 7.308633161663496, "learning_rate": 9.809160641291238e-06, "loss": 1.6004, "step": 930 }, { "epoch": 0.12, "grad_norm": 8.797665568007117, "learning_rate": 9.808609041725493e-06, "loss": 2.0602, "step": 931 }, { "epoch": 0.12, "grad_norm": 7.3473667047243305, "learning_rate": 9.808056661695977e-06, "loss": 1.7206, "step": 932 }, { "epoch": 0.12, "grad_norm": 6.904747083611906, "learning_rate": 9.807503501292349e-06, "loss": 1.8691, "step": 933 }, { "epoch": 0.12, "grad_norm": 10.124266010460044, "learning_rate": 9.806949560604387e-06, "loss": 1.7688, "step": 934 }, { "epoch": 0.12, "grad_norm": 6.705589636683171, "learning_rate": 9.806394839722e-06, "loss": 1.8781, "step": 935 }, { "epoch": 0.12, "grad_norm": 8.03606065132735, "learning_rate": 9.80583933873522e-06, "loss": 1.8994, "step": 936 }, { "epoch": 0.12, "grad_norm": 7.523568075766494, "learning_rate": 9.805283057734212e-06, "loss": 1.7915, "step": 937 }, { "epoch": 0.12, "grad_norm": 7.925798025670224, "learning_rate": 9.80472599680926e-06, "loss": 1.8551, "step": 938 }, { "epoch": 0.12, "grad_norm": 8.166189452140271, "learning_rate": 9.80416815605078e-06, "loss": 1.7628, "step": 939 }, { "epoch": 0.12, "grad_norm": 7.172053064096124, "learning_rate": 9.803609535549312e-06, "loss": 1.7732, "step": 940 }, { "epoch": 0.12, "grad_norm": 7.213155027635578, "learning_rate": 9.803050135395522e-06, "loss": 1.665, "step": 941 }, { "epoch": 0.12, "grad_norm": 7.710904622113002, "learning_rate": 9.802489955680206e-06, "loss": 1.5696, "step": 942 }, { "epoch": 0.12, "grad_norm": 7.855170423432917, "learning_rate": 9.80192899649428e-06, "loss": 1.7267, "step": 943 }, { "epoch": 0.12, "grad_norm": 6.85595167029396, "learning_rate": 9.801367257928797e-06, "loss": 1.6981, "step": 944 }, { "epoch": 0.12, "grad_norm": 8.610683343889598, "learning_rate": 9.800804740074928e-06, "loss": 1.746, "step": 945 }, { "epoch": 0.12, "grad_norm": 7.738598882899379, "learning_rate": 9.80024144302397e-06, "loss": 1.5973, "step": 946 }, { "epoch": 0.12, "grad_norm": 6.715073421196044, "learning_rate": 9.799677366867351e-06, "loss": 1.6286, "step": 947 }, { "epoch": 0.12, "grad_norm": 8.280598537577855, "learning_rate": 9.799112511696625e-06, "loss": 1.9371, "step": 948 }, { "epoch": 0.12, "grad_norm": 7.443101158225918, "learning_rate": 9.798546877603468e-06, "loss": 1.9815, "step": 949 }, { "epoch": 0.12, "grad_norm": 7.078272480579111, "learning_rate": 9.797980464679688e-06, "loss": 1.985, "step": 950 }, { "epoch": 0.12, "grad_norm": 7.9780424669912176, "learning_rate": 9.797413273017216e-06, "loss": 1.9735, "step": 951 }, { "epoch": 0.12, "grad_norm": 8.379189189820517, "learning_rate": 9.79684530270811e-06, "loss": 2.0843, "step": 952 }, { "epoch": 0.12, "grad_norm": 7.521083207350668, "learning_rate": 9.796276553844558e-06, "loss": 1.8167, "step": 953 }, { "epoch": 0.12, "grad_norm": 7.393657398065623, "learning_rate": 9.795707026518868e-06, "loss": 1.9421, "step": 954 }, { "epoch": 0.12, "grad_norm": 6.9779109458118525, "learning_rate": 9.795136720823477e-06, "loss": 1.684, "step": 955 }, { "epoch": 0.12, "grad_norm": 7.524361365341636, "learning_rate": 9.794565636850948e-06, "loss": 1.6687, "step": 956 }, { "epoch": 0.12, "grad_norm": 7.890844904862532, "learning_rate": 9.793993774693974e-06, "loss": 1.9175, "step": 957 }, { "epoch": 0.12, "grad_norm": 5.928573807943255, "learning_rate": 9.793421134445372e-06, "loss": 1.8141, "step": 958 }, { "epoch": 0.12, "grad_norm": 7.26163802725666, "learning_rate": 9.792847716198079e-06, "loss": 1.6101, "step": 959 }, { "epoch": 0.12, "grad_norm": 6.9222935957525005, "learning_rate": 9.79227352004517e-06, "loss": 1.425, "step": 960 }, { "epoch": 0.12, "grad_norm": 7.839392892335339, "learning_rate": 9.791698546079838e-06, "loss": 1.9153, "step": 961 }, { "epoch": 0.12, "grad_norm": 7.05986523516693, "learning_rate": 9.791122794395405e-06, "loss": 1.8529, "step": 962 }, { "epoch": 0.12, "grad_norm": 7.218840535552235, "learning_rate": 9.790546265085317e-06, "loss": 2.045, "step": 963 }, { "epoch": 0.12, "grad_norm": 7.193696330761681, "learning_rate": 9.789968958243147e-06, "loss": 1.6438, "step": 964 }, { "epoch": 0.12, "grad_norm": 7.666690180124635, "learning_rate": 9.7893908739626e-06, "loss": 1.6327, "step": 965 }, { "epoch": 0.12, "grad_norm": 7.477413554507335, "learning_rate": 9.788812012337499e-06, "loss": 1.9477, "step": 966 }, { "epoch": 0.12, "grad_norm": 7.407909132423143, "learning_rate": 9.788232373461795e-06, "loss": 1.5902, "step": 967 }, { "epoch": 0.12, "grad_norm": 7.500232154004057, "learning_rate": 9.787651957429569e-06, "loss": 1.6234, "step": 968 }, { "epoch": 0.12, "grad_norm": 8.723526511279792, "learning_rate": 9.787070764335025e-06, "loss": 1.6928, "step": 969 }, { "epoch": 0.12, "grad_norm": 7.017282376521394, "learning_rate": 9.786488794272494e-06, "loss": 1.5926, "step": 970 }, { "epoch": 0.12, "grad_norm": 6.33316664477475, "learning_rate": 9.785906047336433e-06, "loss": 1.8053, "step": 971 }, { "epoch": 0.12, "grad_norm": 7.503215356875477, "learning_rate": 9.785322523621424e-06, "loss": 1.8495, "step": 972 }, { "epoch": 0.12, "grad_norm": 6.474622257756537, "learning_rate": 9.784738223222178e-06, "loss": 1.8522, "step": 973 }, { "epoch": 0.12, "grad_norm": 6.93639721708675, "learning_rate": 9.784153146233528e-06, "loss": 1.5676, "step": 974 }, { "epoch": 0.12, "grad_norm": 7.544116949793797, "learning_rate": 9.783567292750438e-06, "loss": 1.8264, "step": 975 }, { "epoch": 0.12, "grad_norm": 7.890290521777045, "learning_rate": 9.782980662867992e-06, "loss": 1.8956, "step": 976 }, { "epoch": 0.12, "grad_norm": 7.040822498585256, "learning_rate": 9.782393256681406e-06, "loss": 1.5437, "step": 977 }, { "epoch": 0.12, "grad_norm": 6.74976467899771, "learning_rate": 9.781805074286016e-06, "loss": 1.6788, "step": 978 }, { "epoch": 0.12, "grad_norm": 6.674450709349387, "learning_rate": 9.781216115777292e-06, "loss": 1.6762, "step": 979 }, { "epoch": 0.12, "grad_norm": 8.257404638841365, "learning_rate": 9.780626381250822e-06, "loss": 1.5943, "step": 980 }, { "epoch": 0.12, "grad_norm": 10.7055440595712, "learning_rate": 9.780035870802323e-06, "loss": 2.2106, "step": 981 }, { "epoch": 0.12, "grad_norm": 6.440726493892265, "learning_rate": 9.77944458452764e-06, "loss": 1.6739, "step": 982 }, { "epoch": 0.12, "grad_norm": 7.4397748326996, "learning_rate": 9.778852522522739e-06, "loss": 1.9084, "step": 983 }, { "epoch": 0.12, "grad_norm": 8.192730187618306, "learning_rate": 9.77825968488372e-06, "loss": 1.5141, "step": 984 }, { "epoch": 0.12, "grad_norm": 9.99864299202583, "learning_rate": 9.777666071706797e-06, "loss": 1.5367, "step": 985 }, { "epoch": 0.12, "grad_norm": 7.387249058992274, "learning_rate": 9.777071683088323e-06, "loss": 1.7072, "step": 986 }, { "epoch": 0.12, "grad_norm": 7.4991669410287685, "learning_rate": 9.776476519124767e-06, "loss": 1.7657, "step": 987 }, { "epoch": 0.12, "grad_norm": 8.406076170185997, "learning_rate": 9.775880579912728e-06, "loss": 1.8171, "step": 988 }, { "epoch": 0.12, "grad_norm": 7.021972349752205, "learning_rate": 9.77528386554893e-06, "loss": 1.85, "step": 989 }, { "epoch": 0.12, "grad_norm": 6.961113796771055, "learning_rate": 9.774686376130225e-06, "loss": 1.5993, "step": 990 }, { "epoch": 0.12, "grad_norm": 8.294966026562701, "learning_rate": 9.774088111753586e-06, "loss": 1.7392, "step": 991 }, { "epoch": 0.12, "grad_norm": 7.793076322744755, "learning_rate": 9.773489072516116e-06, "loss": 1.7752, "step": 992 }, { "epoch": 0.12, "grad_norm": 8.271691106214828, "learning_rate": 9.772889258515044e-06, "loss": 1.7061, "step": 993 }, { "epoch": 0.12, "grad_norm": 9.548838875862192, "learning_rate": 9.772288669847719e-06, "loss": 1.8197, "step": 994 }, { "epoch": 0.12, "grad_norm": 7.72946509365149, "learning_rate": 9.771687306611623e-06, "loss": 2.0489, "step": 995 }, { "epoch": 0.12, "grad_norm": 7.308721258343051, "learning_rate": 9.771085168904363e-06, "loss": 1.5827, "step": 996 }, { "epoch": 0.12, "grad_norm": 8.77551484657375, "learning_rate": 9.770482256823662e-06, "loss": 1.9482, "step": 997 }, { "epoch": 0.12, "grad_norm": 7.518748838065766, "learning_rate": 9.769878570467382e-06, "loss": 1.5941, "step": 998 }, { "epoch": 0.12, "grad_norm": 7.290409059035936, "learning_rate": 9.769274109933501e-06, "loss": 1.6196, "step": 999 }, { "epoch": 0.12, "grad_norm": 7.554744483316342, "learning_rate": 9.76866887532013e-06, "loss": 1.8434, "step": 1000 }, { "epoch": 0.12, "grad_norm": 8.173749911959831, "learning_rate": 9.768062866725501e-06, "loss": 1.6037, "step": 1001 }, { "epoch": 0.12, "grad_norm": 7.125398687411534, "learning_rate": 9.76745608424797e-06, "loss": 1.6099, "step": 1002 }, { "epoch": 0.12, "grad_norm": 5.945536745380783, "learning_rate": 9.766848527986022e-06, "loss": 1.9722, "step": 1003 }, { "epoch": 0.12, "grad_norm": 7.413361667112737, "learning_rate": 9.766240198038267e-06, "loss": 1.5734, "step": 1004 }, { "epoch": 0.12, "grad_norm": 7.433156902484609, "learning_rate": 9.765631094503442e-06, "loss": 1.7678, "step": 1005 }, { "epoch": 0.13, "grad_norm": 7.890885098636238, "learning_rate": 9.765021217480406e-06, "loss": 1.8526, "step": 1006 }, { "epoch": 0.13, "grad_norm": 7.136741506761165, "learning_rate": 9.764410567068146e-06, "loss": 1.8475, "step": 1007 }, { "epoch": 0.13, "grad_norm": 7.47836094011814, "learning_rate": 9.763799143365773e-06, "loss": 1.7684, "step": 1008 }, { "epoch": 0.13, "grad_norm": 7.73464786943068, "learning_rate": 9.763186946472526e-06, "loss": 1.7842, "step": 1009 }, { "epoch": 0.13, "grad_norm": 8.555651676035334, "learning_rate": 9.762573976487767e-06, "loss": 1.6961, "step": 1010 }, { "epoch": 0.13, "grad_norm": 8.34219019151997, "learning_rate": 9.761960233510986e-06, "loss": 1.6483, "step": 1011 }, { "epoch": 0.13, "grad_norm": 6.745708851677415, "learning_rate": 9.761345717641794e-06, "loss": 1.6073, "step": 1012 }, { "epoch": 0.13, "grad_norm": 7.957387098101768, "learning_rate": 9.760730428979933e-06, "loss": 1.6463, "step": 1013 }, { "epoch": 0.13, "grad_norm": 8.147613095225571, "learning_rate": 9.760114367625264e-06, "loss": 1.5596, "step": 1014 }, { "epoch": 0.13, "grad_norm": 7.611118424631541, "learning_rate": 9.759497533677783e-06, "loss": 1.8869, "step": 1015 }, { "epoch": 0.13, "grad_norm": 7.7522131632665365, "learning_rate": 9.758879927237599e-06, "loss": 1.8972, "step": 1016 }, { "epoch": 0.13, "grad_norm": 7.220845587542113, "learning_rate": 9.758261548404959e-06, "loss": 1.7011, "step": 1017 }, { "epoch": 0.13, "grad_norm": 6.942362697831658, "learning_rate": 9.757642397280226e-06, "loss": 1.8107, "step": 1018 }, { "epoch": 0.13, "grad_norm": 7.902598158655567, "learning_rate": 9.757022473963891e-06, "loss": 1.8191, "step": 1019 }, { "epoch": 0.13, "grad_norm": 6.978761880010639, "learning_rate": 9.756401778556572e-06, "loss": 1.65, "step": 1020 }, { "epoch": 0.13, "grad_norm": 7.365378883495362, "learning_rate": 9.755780311159013e-06, "loss": 1.5718, "step": 1021 }, { "epoch": 0.13, "grad_norm": 6.743602912056911, "learning_rate": 9.755158071872077e-06, "loss": 1.8918, "step": 1022 }, { "epoch": 0.13, "grad_norm": 7.577698099934766, "learning_rate": 9.754535060796762e-06, "loss": 1.8007, "step": 1023 }, { "epoch": 0.13, "grad_norm": 6.992666541568811, "learning_rate": 9.753911278034184e-06, "loss": 1.6402, "step": 1024 }, { "epoch": 0.13, "grad_norm": 8.1612275208134, "learning_rate": 9.753286723685587e-06, "loss": 1.727, "step": 1025 }, { "epoch": 0.13, "grad_norm": 6.962975831214597, "learning_rate": 9.752661397852338e-06, "loss": 1.7146, "step": 1026 }, { "epoch": 0.13, "grad_norm": 6.767027530047307, "learning_rate": 9.752035300635932e-06, "loss": 1.5459, "step": 1027 }, { "epoch": 0.13, "grad_norm": 6.964670730886756, "learning_rate": 9.751408432137988e-06, "loss": 1.2693, "step": 1028 }, { "epoch": 0.13, "grad_norm": 7.560149949720377, "learning_rate": 9.750780792460248e-06, "loss": 1.8167, "step": 1029 }, { "epoch": 0.13, "grad_norm": 7.00129743138861, "learning_rate": 9.750152381704587e-06, "loss": 1.6596, "step": 1030 }, { "epoch": 0.13, "grad_norm": 7.1626910434725595, "learning_rate": 9.749523199972993e-06, "loss": 1.5113, "step": 1031 }, { "epoch": 0.13, "grad_norm": 7.4581100117501, "learning_rate": 9.74889324736759e-06, "loss": 1.7816, "step": 1032 }, { "epoch": 0.13, "grad_norm": 7.772289882228262, "learning_rate": 9.748262523990621e-06, "loss": 1.4616, "step": 1033 }, { "epoch": 0.13, "grad_norm": 6.795544845044775, "learning_rate": 9.747631029944457e-06, "loss": 1.852, "step": 1034 }, { "epoch": 0.13, "grad_norm": 7.53343057457573, "learning_rate": 9.746998765331593e-06, "loss": 1.7286, "step": 1035 }, { "epoch": 0.13, "grad_norm": 7.856555292636769, "learning_rate": 9.746365730254646e-06, "loss": 1.5234, "step": 1036 }, { "epoch": 0.13, "grad_norm": 8.031546883151218, "learning_rate": 9.745731924816364e-06, "loss": 1.8337, "step": 1037 }, { "epoch": 0.13, "grad_norm": 6.575370373070773, "learning_rate": 9.745097349119616e-06, "loss": 1.7, "step": 1038 }, { "epoch": 0.13, "grad_norm": 7.296405317769876, "learning_rate": 9.7444620032674e-06, "loss": 1.5703, "step": 1039 }, { "epoch": 0.13, "grad_norm": 8.243084812892883, "learning_rate": 9.743825887362832e-06, "loss": 2.1735, "step": 1040 }, { "epoch": 0.13, "grad_norm": 6.920818670007402, "learning_rate": 9.743189001509159e-06, "loss": 1.7249, "step": 1041 }, { "epoch": 0.13, "grad_norm": 7.881257264197376, "learning_rate": 9.742551345809752e-06, "loss": 1.7975, "step": 1042 }, { "epoch": 0.13, "grad_norm": 7.2492695447996764, "learning_rate": 9.741912920368105e-06, "loss": 1.7115, "step": 1043 }, { "epoch": 0.13, "grad_norm": 8.348080001861176, "learning_rate": 9.741273725287837e-06, "loss": 1.9352, "step": 1044 }, { "epoch": 0.13, "grad_norm": 7.715337230903479, "learning_rate": 9.740633760672693e-06, "loss": 1.9076, "step": 1045 }, { "epoch": 0.13, "grad_norm": 7.955279336417902, "learning_rate": 9.739993026626544e-06, "loss": 1.7899, "step": 1046 }, { "epoch": 0.13, "grad_norm": 7.897103570285756, "learning_rate": 9.739351523253386e-06, "loss": 1.8153, "step": 1047 }, { "epoch": 0.13, "grad_norm": 10.326966833142807, "learning_rate": 9.738709250657336e-06, "loss": 1.8705, "step": 1048 }, { "epoch": 0.13, "grad_norm": 7.3776060546783935, "learning_rate": 9.738066208942638e-06, "loss": 1.7011, "step": 1049 }, { "epoch": 0.13, "grad_norm": 6.744719101440599, "learning_rate": 9.737422398213664e-06, "loss": 1.8342, "step": 1050 }, { "epoch": 0.13, "grad_norm": 7.2865713747800775, "learning_rate": 9.736777818574907e-06, "loss": 1.745, "step": 1051 }, { "epoch": 0.13, "grad_norm": 7.362570169756956, "learning_rate": 9.736132470130983e-06, "loss": 1.7387, "step": 1052 }, { "epoch": 0.13, "grad_norm": 7.6914267244230325, "learning_rate": 9.73548635298664e-06, "loss": 1.5974, "step": 1053 }, { "epoch": 0.13, "grad_norm": 7.669063024346968, "learning_rate": 9.734839467246744e-06, "loss": 2.0789, "step": 1054 }, { "epoch": 0.13, "grad_norm": 7.937635250183002, "learning_rate": 9.734191813016288e-06, "loss": 1.6875, "step": 1055 }, { "epoch": 0.13, "grad_norm": 8.226705149480154, "learning_rate": 9.733543390400391e-06, "loss": 1.8492, "step": 1056 }, { "epoch": 0.13, "grad_norm": 6.751538692185898, "learning_rate": 9.732894199504294e-06, "loss": 1.6475, "step": 1057 }, { "epoch": 0.13, "grad_norm": 7.645509114946336, "learning_rate": 9.732244240433367e-06, "loss": 1.8731, "step": 1058 }, { "epoch": 0.13, "grad_norm": 7.1766753366934495, "learning_rate": 9.731593513293097e-06, "loss": 1.6558, "step": 1059 }, { "epoch": 0.13, "grad_norm": 6.713803762721546, "learning_rate": 9.730942018189104e-06, "loss": 1.745, "step": 1060 }, { "epoch": 0.13, "grad_norm": 7.331836266513929, "learning_rate": 9.730289755227131e-06, "loss": 1.9974, "step": 1061 }, { "epoch": 0.13, "grad_norm": 7.448798343136819, "learning_rate": 9.729636724513041e-06, "loss": 1.611, "step": 1062 }, { "epoch": 0.13, "grad_norm": 6.92344417034771, "learning_rate": 9.728982926152826e-06, "loss": 1.7371, "step": 1063 }, { "epoch": 0.13, "grad_norm": 6.755441271447967, "learning_rate": 9.728328360252599e-06, "loss": 1.5091, "step": 1064 }, { "epoch": 0.13, "grad_norm": 8.314733984584226, "learning_rate": 9.7276730269186e-06, "loss": 1.6687, "step": 1065 }, { "epoch": 0.13, "grad_norm": 6.785963497739163, "learning_rate": 9.727016926257196e-06, "loss": 2.0666, "step": 1066 }, { "epoch": 0.13, "grad_norm": 7.313013355485066, "learning_rate": 9.726360058374876e-06, "loss": 1.7219, "step": 1067 }, { "epoch": 0.13, "grad_norm": 6.965208956577256, "learning_rate": 9.725702423378248e-06, "loss": 1.7774, "step": 1068 }, { "epoch": 0.13, "grad_norm": 8.13242634393829, "learning_rate": 9.725044021374053e-06, "loss": 1.6135, "step": 1069 }, { "epoch": 0.13, "grad_norm": 7.112156405140304, "learning_rate": 9.724384852469155e-06, "loss": 1.7171, "step": 1070 }, { "epoch": 0.13, "grad_norm": 7.372745280702634, "learning_rate": 9.723724916770539e-06, "loss": 1.7337, "step": 1071 }, { "epoch": 0.13, "grad_norm": 7.911562695690859, "learning_rate": 9.723064214385315e-06, "loss": 1.8434, "step": 1072 }, { "epoch": 0.13, "grad_norm": 8.692539742283959, "learning_rate": 9.72240274542072e-06, "loss": 1.6974, "step": 1073 }, { "epoch": 0.13, "grad_norm": 7.279611397846465, "learning_rate": 9.721740509984114e-06, "loss": 2.0507, "step": 1074 }, { "epoch": 0.13, "grad_norm": 7.277819844293525, "learning_rate": 9.721077508182983e-06, "loss": 1.8085, "step": 1075 }, { "epoch": 0.13, "grad_norm": 8.058622575753267, "learning_rate": 9.720413740124933e-06, "loss": 1.6578, "step": 1076 }, { "epoch": 0.13, "grad_norm": 6.934991424202455, "learning_rate": 9.7197492059177e-06, "loss": 1.6835, "step": 1077 }, { "epoch": 0.13, "grad_norm": 6.9395918498186795, "learning_rate": 9.719083905669138e-06, "loss": 1.7349, "step": 1078 }, { "epoch": 0.13, "grad_norm": 7.33380760582548, "learning_rate": 9.718417839487232e-06, "loss": 1.7891, "step": 1079 }, { "epoch": 0.13, "grad_norm": 6.703630692364957, "learning_rate": 9.717751007480087e-06, "loss": 1.6849, "step": 1080 }, { "epoch": 0.13, "grad_norm": 6.6914420096629925, "learning_rate": 9.717083409755935e-06, "loss": 1.7542, "step": 1081 }, { "epoch": 0.13, "grad_norm": 6.173095203409622, "learning_rate": 9.716415046423126e-06, "loss": 1.6566, "step": 1082 }, { "epoch": 0.13, "grad_norm": 7.815488343373597, "learning_rate": 9.715745917590145e-06, "loss": 1.553, "step": 1083 }, { "epoch": 0.13, "grad_norm": 7.690847184702531, "learning_rate": 9.715076023365594e-06, "loss": 1.8841, "step": 1084 }, { "epoch": 0.13, "grad_norm": 7.274389440328826, "learning_rate": 9.714405363858198e-06, "loss": 1.9917, "step": 1085 }, { "epoch": 0.14, "grad_norm": 7.807272768979869, "learning_rate": 9.71373393917681e-06, "loss": 1.6749, "step": 1086 }, { "epoch": 0.14, "grad_norm": 6.612682814945906, "learning_rate": 9.713061749430407e-06, "loss": 1.6397, "step": 1087 }, { "epoch": 0.14, "grad_norm": 6.426650092837469, "learning_rate": 9.712388794728088e-06, "loss": 1.7678, "step": 1088 }, { "epoch": 0.14, "grad_norm": 7.258590868459369, "learning_rate": 9.711715075179075e-06, "loss": 1.8745, "step": 1089 }, { "epoch": 0.14, "grad_norm": 8.483267880106364, "learning_rate": 9.711040590892722e-06, "loss": 1.8661, "step": 1090 }, { "epoch": 0.14, "grad_norm": 8.471035219931913, "learning_rate": 9.710365341978496e-06, "loss": 1.4399, "step": 1091 }, { "epoch": 0.14, "grad_norm": 6.7582248695226514, "learning_rate": 9.709689328545997e-06, "loss": 1.7629, "step": 1092 }, { "epoch": 0.14, "grad_norm": 7.75979037419434, "learning_rate": 9.709012550704945e-06, "loss": 1.5706, "step": 1093 }, { "epoch": 0.14, "grad_norm": 7.279514180819644, "learning_rate": 9.708335008565184e-06, "loss": 1.7018, "step": 1094 }, { "epoch": 0.14, "grad_norm": 7.533073830485536, "learning_rate": 9.707656702236683e-06, "loss": 1.7078, "step": 1095 }, { "epoch": 0.14, "grad_norm": 7.1154208755599555, "learning_rate": 9.706977631829535e-06, "loss": 1.7801, "step": 1096 }, { "epoch": 0.14, "grad_norm": 7.156011732183849, "learning_rate": 9.706297797453958e-06, "loss": 1.5551, "step": 1097 }, { "epoch": 0.14, "grad_norm": 7.834601835461286, "learning_rate": 9.705617199220291e-06, "loss": 1.7281, "step": 1098 }, { "epoch": 0.14, "grad_norm": 7.4581527172196385, "learning_rate": 9.704935837239e-06, "loss": 1.6472, "step": 1099 }, { "epoch": 0.14, "grad_norm": 7.809959507322614, "learning_rate": 9.704253711620673e-06, "loss": 1.6875, "step": 1100 }, { "epoch": 0.14, "grad_norm": 7.526628368888849, "learning_rate": 9.703570822476023e-06, "loss": 1.7716, "step": 1101 }, { "epoch": 0.14, "grad_norm": 7.102957201935369, "learning_rate": 9.70288716991589e-06, "loss": 1.5678, "step": 1102 }, { "epoch": 0.14, "grad_norm": 8.103976739949118, "learning_rate": 9.702202754051227e-06, "loss": 1.6655, "step": 1103 }, { "epoch": 0.14, "grad_norm": 8.538620852017543, "learning_rate": 9.701517574993126e-06, "loss": 1.7063, "step": 1104 }, { "epoch": 0.14, "grad_norm": 6.7800617803924155, "learning_rate": 9.700831632852791e-06, "loss": 1.8752, "step": 1105 }, { "epoch": 0.14, "grad_norm": 7.569377649971356, "learning_rate": 9.700144927741556e-06, "loss": 1.9024, "step": 1106 }, { "epoch": 0.14, "grad_norm": 7.5412966742819965, "learning_rate": 9.699457459770876e-06, "loss": 1.8567, "step": 1107 }, { "epoch": 0.14, "grad_norm": 32.661049499009735, "learning_rate": 9.698769229052333e-06, "loss": 1.5085, "step": 1108 }, { "epoch": 0.14, "grad_norm": 7.479413236315371, "learning_rate": 9.698080235697627e-06, "loss": 1.3926, "step": 1109 }, { "epoch": 0.14, "grad_norm": 5.913740262778206, "learning_rate": 9.69739047981859e-06, "loss": 1.4642, "step": 1110 }, { "epoch": 0.14, "grad_norm": 9.213585405331882, "learning_rate": 9.696699961527167e-06, "loss": 1.9004, "step": 1111 }, { "epoch": 0.14, "grad_norm": 7.137645832911436, "learning_rate": 9.69600868093544e-06, "loss": 1.7179, "step": 1112 }, { "epoch": 0.14, "grad_norm": 7.0169360507206235, "learning_rate": 9.695316638155602e-06, "loss": 1.5568, "step": 1113 }, { "epoch": 0.14, "grad_norm": 7.442230333709271, "learning_rate": 9.694623833299978e-06, "loss": 1.8601, "step": 1114 }, { "epoch": 0.14, "grad_norm": 7.547379889993122, "learning_rate": 9.693930266481015e-06, "loss": 1.8238, "step": 1115 }, { "epoch": 0.14, "grad_norm": 7.319774867868007, "learning_rate": 9.69323593781128e-06, "loss": 1.7912, "step": 1116 }, { "epoch": 0.14, "grad_norm": 8.41159799522505, "learning_rate": 9.692540847403468e-06, "loss": 1.8975, "step": 1117 }, { "epoch": 0.14, "grad_norm": 6.911793327751444, "learning_rate": 9.691844995370396e-06, "loss": 1.4408, "step": 1118 }, { "epoch": 0.14, "grad_norm": 7.907472071152368, "learning_rate": 9.691148381825004e-06, "loss": 1.7352, "step": 1119 }, { "epoch": 0.14, "grad_norm": 7.255818659931437, "learning_rate": 9.690451006880356e-06, "loss": 1.6315, "step": 1120 }, { "epoch": 0.14, "grad_norm": 7.669778840143168, "learning_rate": 9.689752870649642e-06, "loss": 1.8553, "step": 1121 }, { "epoch": 0.14, "grad_norm": 7.479305426899, "learning_rate": 9.689053973246172e-06, "loss": 1.563, "step": 1122 }, { "epoch": 0.14, "grad_norm": 7.491448600004008, "learning_rate": 9.688354314783379e-06, "loss": 1.4478, "step": 1123 }, { "epoch": 0.14, "grad_norm": 6.715648039674751, "learning_rate": 9.687653895374824e-06, "loss": 1.6638, "step": 1124 }, { "epoch": 0.14, "grad_norm": 7.1236978827965975, "learning_rate": 9.686952715134187e-06, "loss": 1.6933, "step": 1125 }, { "epoch": 0.14, "grad_norm": 8.23538407183743, "learning_rate": 9.686250774175273e-06, "loss": 1.6262, "step": 1126 }, { "epoch": 0.14, "grad_norm": 7.104401608857524, "learning_rate": 9.685548072612017e-06, "loss": 1.708, "step": 1127 }, { "epoch": 0.14, "grad_norm": 7.360841656729998, "learning_rate": 9.684844610558463e-06, "loss": 1.6868, "step": 1128 }, { "epoch": 0.14, "grad_norm": 7.114425181038042, "learning_rate": 9.684140388128791e-06, "loss": 1.5046, "step": 1129 }, { "epoch": 0.14, "grad_norm": 7.304587316925204, "learning_rate": 9.683435405437301e-06, "loss": 1.609, "step": 1130 }, { "epoch": 0.14, "grad_norm": 7.416847067393182, "learning_rate": 9.682729662598412e-06, "loss": 1.8297, "step": 1131 }, { "epoch": 0.14, "grad_norm": 7.754492462356818, "learning_rate": 9.682023159726673e-06, "loss": 1.7683, "step": 1132 }, { "epoch": 0.14, "grad_norm": 7.422022344982069, "learning_rate": 9.681315896936753e-06, "loss": 1.514, "step": 1133 }, { "epoch": 0.14, "grad_norm": 7.958181691174668, "learning_rate": 9.680607874343444e-06, "loss": 1.7784, "step": 1134 }, { "epoch": 0.14, "grad_norm": 6.7504949894699635, "learning_rate": 9.679899092061662e-06, "loss": 1.6038, "step": 1135 }, { "epoch": 0.14, "grad_norm": 7.544471306194705, "learning_rate": 9.679189550206447e-06, "loss": 1.5739, "step": 1136 }, { "epoch": 0.14, "grad_norm": 7.008876123041634, "learning_rate": 9.678479248892959e-06, "loss": 2.0175, "step": 1137 }, { "epoch": 0.14, "grad_norm": 7.632560090482106, "learning_rate": 9.677768188236487e-06, "loss": 1.6547, "step": 1138 }, { "epoch": 0.14, "grad_norm": 8.216307774190948, "learning_rate": 9.677056368352437e-06, "loss": 1.6441, "step": 1139 }, { "epoch": 0.14, "grad_norm": 7.285193249320938, "learning_rate": 9.676343789356346e-06, "loss": 1.6919, "step": 1140 }, { "epoch": 0.14, "grad_norm": 8.420290584442231, "learning_rate": 9.675630451363866e-06, "loss": 1.6972, "step": 1141 }, { "epoch": 0.14, "grad_norm": 7.296311929217165, "learning_rate": 9.674916354490776e-06, "loss": 1.7951, "step": 1142 }, { "epoch": 0.14, "grad_norm": 7.589980455266562, "learning_rate": 9.674201498852977e-06, "loss": 1.5328, "step": 1143 }, { "epoch": 0.14, "grad_norm": 6.429549955335792, "learning_rate": 9.673485884566495e-06, "loss": 1.8265, "step": 1144 }, { "epoch": 0.14, "grad_norm": 6.891572779413669, "learning_rate": 9.67276951174748e-06, "loss": 1.329, "step": 1145 }, { "epoch": 0.14, "grad_norm": 6.066314115590057, "learning_rate": 9.6720523805122e-06, "loss": 1.3729, "step": 1146 }, { "epoch": 0.14, "grad_norm": 7.108349231428533, "learning_rate": 9.67133449097705e-06, "loss": 1.4064, "step": 1147 }, { "epoch": 0.14, "grad_norm": 7.054864138009047, "learning_rate": 9.670615843258551e-06, "loss": 1.9174, "step": 1148 }, { "epoch": 0.14, "grad_norm": 7.174026637564426, "learning_rate": 9.66989643747334e-06, "loss": 1.6532, "step": 1149 }, { "epoch": 0.14, "grad_norm": 7.673132712981283, "learning_rate": 9.669176273738182e-06, "loss": 1.9221, "step": 1150 }, { "epoch": 0.14, "grad_norm": 7.6243557056292515, "learning_rate": 9.668455352169961e-06, "loss": 1.7502, "step": 1151 }, { "epoch": 0.14, "grad_norm": 8.964944703074583, "learning_rate": 9.667733672885688e-06, "loss": 1.6812, "step": 1152 }, { "epoch": 0.14, "grad_norm": 6.8681945437870615, "learning_rate": 9.6670112360025e-06, "loss": 1.4703, "step": 1153 }, { "epoch": 0.14, "grad_norm": 7.566189723166054, "learning_rate": 9.666288041637644e-06, "loss": 1.8288, "step": 1154 }, { "epoch": 0.14, "grad_norm": 7.8117954776515015, "learning_rate": 9.665564089908506e-06, "loss": 1.9601, "step": 1155 }, { "epoch": 0.14, "grad_norm": 6.871591984981343, "learning_rate": 9.664839380932583e-06, "loss": 1.4921, "step": 1156 }, { "epoch": 0.14, "grad_norm": 9.936277487438126, "learning_rate": 9.6641139148275e-06, "loss": 1.7239, "step": 1157 }, { "epoch": 0.14, "grad_norm": 8.076000052984211, "learning_rate": 9.663387691711005e-06, "loss": 1.7116, "step": 1158 }, { "epoch": 0.14, "grad_norm": 7.7615642014188175, "learning_rate": 9.662660711700967e-06, "loss": 1.412, "step": 1159 }, { "epoch": 0.14, "grad_norm": 7.8808534207541046, "learning_rate": 9.66193297491538e-06, "loss": 1.5157, "step": 1160 }, { "epoch": 0.14, "grad_norm": 7.909013213815568, "learning_rate": 9.661204481472361e-06, "loss": 1.5097, "step": 1161 }, { "epoch": 0.14, "grad_norm": 6.6199069380876185, "learning_rate": 9.660475231490145e-06, "loss": 1.545, "step": 1162 }, { "epoch": 0.14, "grad_norm": 7.642242739914698, "learning_rate": 9.659745225087095e-06, "loss": 1.9222, "step": 1163 }, { "epoch": 0.14, "grad_norm": 6.404400618336818, "learning_rate": 9.659014462381695e-06, "loss": 1.6551, "step": 1164 }, { "epoch": 0.14, "grad_norm": 6.472511691307142, "learning_rate": 9.658282943492552e-06, "loss": 1.5976, "step": 1165 }, { "epoch": 0.15, "grad_norm": 8.241206183984005, "learning_rate": 9.657550668538396e-06, "loss": 1.8286, "step": 1166 }, { "epoch": 0.15, "grad_norm": 6.755927393930996, "learning_rate": 9.65681763763808e-06, "loss": 1.7141, "step": 1167 }, { "epoch": 0.15, "grad_norm": 7.503777177406373, "learning_rate": 9.656083850910575e-06, "loss": 1.7916, "step": 1168 }, { "epoch": 0.15, "grad_norm": 7.53601054062195, "learning_rate": 9.655349308474984e-06, "loss": 1.9007, "step": 1169 }, { "epoch": 0.15, "grad_norm": 7.1006436231374295, "learning_rate": 9.654614010450522e-06, "loss": 1.5899, "step": 1170 }, { "epoch": 0.15, "grad_norm": 8.034665931001916, "learning_rate": 9.653877956956537e-06, "loss": 1.7451, "step": 1171 }, { "epoch": 0.15, "grad_norm": 7.447735119566694, "learning_rate": 9.65314114811249e-06, "loss": 1.7981, "step": 1172 }, { "epoch": 0.15, "grad_norm": 6.637953093271459, "learning_rate": 9.652403584037973e-06, "loss": 1.805, "step": 1173 }, { "epoch": 0.15, "grad_norm": 7.39599376121859, "learning_rate": 9.651665264852695e-06, "loss": 1.6492, "step": 1174 }, { "epoch": 0.15, "grad_norm": 7.6283812868616385, "learning_rate": 9.650926190676487e-06, "loss": 1.7313, "step": 1175 }, { "epoch": 0.15, "grad_norm": 7.643992634340045, "learning_rate": 9.650186361629309e-06, "loss": 1.4144, "step": 1176 }, { "epoch": 0.15, "grad_norm": 7.051687602139108, "learning_rate": 9.649445777831236e-06, "loss": 1.7698, "step": 1177 }, { "epoch": 0.15, "grad_norm": 7.275192174399576, "learning_rate": 9.64870443940247e-06, "loss": 1.7774, "step": 1178 }, { "epoch": 0.15, "grad_norm": 8.158872622663946, "learning_rate": 9.647962346463336e-06, "loss": 1.5877, "step": 1179 }, { "epoch": 0.15, "grad_norm": 7.354246035049597, "learning_rate": 9.647219499134278e-06, "loss": 1.5156, "step": 1180 }, { "epoch": 0.15, "grad_norm": 7.3153286050100945, "learning_rate": 9.646475897535864e-06, "loss": 1.9675, "step": 1181 }, { "epoch": 0.15, "grad_norm": 8.842892188943916, "learning_rate": 9.645731541788785e-06, "loss": 1.7937, "step": 1182 }, { "epoch": 0.15, "grad_norm": 7.967770888687213, "learning_rate": 9.644986432013854e-06, "loss": 1.8691, "step": 1183 }, { "epoch": 0.15, "grad_norm": 7.556259210082163, "learning_rate": 9.644240568332009e-06, "loss": 1.7305, "step": 1184 }, { "epoch": 0.15, "grad_norm": 7.271594412906199, "learning_rate": 9.643493950864302e-06, "loss": 1.6246, "step": 1185 }, { "epoch": 0.15, "grad_norm": 7.287885693280897, "learning_rate": 9.642746579731918e-06, "loss": 1.6875, "step": 1186 }, { "epoch": 0.15, "grad_norm": 7.066842403788656, "learning_rate": 9.641998455056158e-06, "loss": 1.6473, "step": 1187 }, { "epoch": 0.15, "grad_norm": 7.732819604572488, "learning_rate": 9.641249576958448e-06, "loss": 1.5514, "step": 1188 }, { "epoch": 0.15, "grad_norm": 7.474517679503934, "learning_rate": 9.640499945560332e-06, "loss": 1.8198, "step": 1189 }, { "epoch": 0.15, "grad_norm": 6.99323681425531, "learning_rate": 9.639749560983483e-06, "loss": 1.6566, "step": 1190 }, { "epoch": 0.15, "grad_norm": 7.098786603189937, "learning_rate": 9.638998423349688e-06, "loss": 2.0744, "step": 1191 }, { "epoch": 0.15, "grad_norm": 7.335607487376945, "learning_rate": 9.638246532780865e-06, "loss": 1.6083, "step": 1192 }, { "epoch": 0.15, "grad_norm": 7.079080648643856, "learning_rate": 9.637493889399048e-06, "loss": 1.6446, "step": 1193 }, { "epoch": 0.15, "grad_norm": 6.487875863586713, "learning_rate": 9.636740493326398e-06, "loss": 1.5544, "step": 1194 }, { "epoch": 0.15, "grad_norm": 6.953623484752028, "learning_rate": 9.63598634468519e-06, "loss": 1.8837, "step": 1195 }, { "epoch": 0.15, "grad_norm": 6.966083984650428, "learning_rate": 9.63523144359783e-06, "loss": 1.6938, "step": 1196 }, { "epoch": 0.15, "grad_norm": 6.373139960377982, "learning_rate": 9.634475790186842e-06, "loss": 1.6866, "step": 1197 }, { "epoch": 0.15, "grad_norm": 6.96410765954911, "learning_rate": 9.633719384574873e-06, "loss": 1.6174, "step": 1198 }, { "epoch": 0.15, "grad_norm": 7.420688942718975, "learning_rate": 9.632962226884692e-06, "loss": 1.8714, "step": 1199 }, { "epoch": 0.15, "grad_norm": 8.435118233830886, "learning_rate": 9.632204317239189e-06, "loss": 1.6727, "step": 1200 }, { "epoch": 0.15, "grad_norm": 7.560072869988558, "learning_rate": 9.631445655761378e-06, "loss": 1.6385, "step": 1201 }, { "epoch": 0.15, "grad_norm": 6.794959557432711, "learning_rate": 9.630686242574392e-06, "loss": 1.5449, "step": 1202 }, { "epoch": 0.15, "grad_norm": 7.650161492895676, "learning_rate": 9.629926077801491e-06, "loss": 1.5526, "step": 1203 }, { "epoch": 0.15, "grad_norm": 7.345685330349043, "learning_rate": 9.629165161566051e-06, "loss": 1.7205, "step": 1204 }, { "epoch": 0.15, "grad_norm": 7.1801861210303, "learning_rate": 9.628403493991574e-06, "loss": 1.5975, "step": 1205 }, { "epoch": 0.15, "grad_norm": 7.103113571958293, "learning_rate": 9.627641075201684e-06, "loss": 1.5472, "step": 1206 }, { "epoch": 0.15, "grad_norm": 7.6856922363051785, "learning_rate": 9.626877905320123e-06, "loss": 1.7762, "step": 1207 }, { "epoch": 0.15, "grad_norm": 7.174798715462076, "learning_rate": 9.626113984470761e-06, "loss": 1.9114, "step": 1208 }, { "epoch": 0.15, "grad_norm": 8.966454377378955, "learning_rate": 9.625349312777583e-06, "loss": 2.2286, "step": 1209 }, { "epoch": 0.15, "grad_norm": 8.16360878981736, "learning_rate": 9.624583890364703e-06, "loss": 1.7446, "step": 1210 }, { "epoch": 0.15, "grad_norm": 7.834030015219526, "learning_rate": 9.623817717356353e-06, "loss": 1.664, "step": 1211 }, { "epoch": 0.15, "grad_norm": 7.395470356629455, "learning_rate": 9.623050793876882e-06, "loss": 1.8928, "step": 1212 }, { "epoch": 0.15, "grad_norm": 6.971997768670043, "learning_rate": 9.622283120050772e-06, "loss": 1.6191, "step": 1213 }, { "epoch": 0.15, "grad_norm": 8.309511774661889, "learning_rate": 9.621514696002618e-06, "loss": 1.3746, "step": 1214 }, { "epoch": 0.15, "grad_norm": 6.898730687150803, "learning_rate": 9.62074552185714e-06, "loss": 1.581, "step": 1215 }, { "epoch": 0.15, "grad_norm": 7.715624993272126, "learning_rate": 9.61997559773918e-06, "loss": 1.5447, "step": 1216 }, { "epoch": 0.15, "grad_norm": 7.324935437825437, "learning_rate": 9.619204923773697e-06, "loss": 1.5143, "step": 1217 }, { "epoch": 0.15, "grad_norm": 6.406602979287841, "learning_rate": 9.618433500085782e-06, "loss": 1.3937, "step": 1218 }, { "epoch": 0.15, "grad_norm": 6.676742671734978, "learning_rate": 9.617661326800635e-06, "loss": 1.5864, "step": 1219 }, { "epoch": 0.15, "grad_norm": 8.117695799112342, "learning_rate": 9.61688840404359e-06, "loss": 1.9054, "step": 1220 }, { "epoch": 0.15, "grad_norm": 9.708705353199365, "learning_rate": 9.61611473194009e-06, "loss": 1.7425, "step": 1221 }, { "epoch": 0.15, "grad_norm": 7.530943987054739, "learning_rate": 9.615340310615713e-06, "loss": 1.7423, "step": 1222 }, { "epoch": 0.15, "grad_norm": 7.21884705324915, "learning_rate": 9.614565140196145e-06, "loss": 1.8728, "step": 1223 }, { "epoch": 0.15, "grad_norm": 7.720691330698892, "learning_rate": 9.613789220807208e-06, "loss": 1.6473, "step": 1224 }, { "epoch": 0.15, "grad_norm": 8.46087466568209, "learning_rate": 9.613012552574832e-06, "loss": 1.8678, "step": 1225 }, { "epoch": 0.15, "grad_norm": 11.474091128729093, "learning_rate": 9.612235135625076e-06, "loss": 1.7523, "step": 1226 }, { "epoch": 0.15, "grad_norm": 7.055623046679987, "learning_rate": 9.611456970084123e-06, "loss": 1.4807, "step": 1227 }, { "epoch": 0.15, "grad_norm": 7.465211170376635, "learning_rate": 9.610678056078266e-06, "loss": 1.842, "step": 1228 }, { "epoch": 0.15, "grad_norm": 6.906959883400788, "learning_rate": 9.609898393733933e-06, "loss": 1.6083, "step": 1229 }, { "epoch": 0.15, "grad_norm": 7.342879294181833, "learning_rate": 9.609117983177667e-06, "loss": 1.8892, "step": 1230 }, { "epoch": 0.15, "grad_norm": 6.350476582780891, "learning_rate": 9.608336824536131e-06, "loss": 1.5361, "step": 1231 }, { "epoch": 0.15, "grad_norm": 11.12758497260685, "learning_rate": 9.60755491793611e-06, "loss": 1.7891, "step": 1232 }, { "epoch": 0.15, "grad_norm": 6.985270324257953, "learning_rate": 9.606772263504516e-06, "loss": 1.4748, "step": 1233 }, { "epoch": 0.15, "grad_norm": 8.157222695081403, "learning_rate": 9.605988861368376e-06, "loss": 1.7211, "step": 1234 }, { "epoch": 0.15, "grad_norm": 7.074278085484562, "learning_rate": 9.60520471165484e-06, "loss": 1.7762, "step": 1235 }, { "epoch": 0.15, "grad_norm": 7.134757539475158, "learning_rate": 9.604419814491179e-06, "loss": 1.7363, "step": 1236 }, { "epoch": 0.15, "grad_norm": 6.879189847451156, "learning_rate": 9.603634170004788e-06, "loss": 1.5637, "step": 1237 }, { "epoch": 0.15, "grad_norm": 7.760793338420738, "learning_rate": 9.60284777832318e-06, "loss": 1.8359, "step": 1238 }, { "epoch": 0.15, "grad_norm": 7.690160456636299, "learning_rate": 9.602060639573994e-06, "loss": 1.5203, "step": 1239 }, { "epoch": 0.15, "grad_norm": 8.09572962205631, "learning_rate": 9.601272753884982e-06, "loss": 1.7566, "step": 1240 }, { "epoch": 0.15, "grad_norm": 6.803482850294943, "learning_rate": 9.600484121384028e-06, "loss": 1.6096, "step": 1241 }, { "epoch": 0.15, "grad_norm": 7.223045487914883, "learning_rate": 9.599694742199126e-06, "loss": 1.7277, "step": 1242 }, { "epoch": 0.15, "grad_norm": 7.432710446004264, "learning_rate": 9.598904616458398e-06, "loss": 1.4595, "step": 1243 }, { "epoch": 0.15, "grad_norm": 7.243793076225991, "learning_rate": 9.598113744290088e-06, "loss": 1.7588, "step": 1244 }, { "epoch": 0.15, "grad_norm": 6.742251769885849, "learning_rate": 9.597322125822554e-06, "loss": 1.5074, "step": 1245 }, { "epoch": 0.15, "grad_norm": 7.466040566193641, "learning_rate": 9.596529761184287e-06, "loss": 1.8279, "step": 1246 }, { "epoch": 0.16, "grad_norm": 7.51865595819551, "learning_rate": 9.595736650503887e-06, "loss": 1.7791, "step": 1247 }, { "epoch": 0.16, "grad_norm": 7.696964154247908, "learning_rate": 9.594942793910082e-06, "loss": 1.7308, "step": 1248 }, { "epoch": 0.16, "grad_norm": 7.199038839286995, "learning_rate": 9.59414819153172e-06, "loss": 1.6593, "step": 1249 }, { "epoch": 0.16, "grad_norm": 7.000884322101906, "learning_rate": 9.593352843497768e-06, "loss": 1.5286, "step": 1250 }, { "epoch": 0.16, "grad_norm": 8.204943017391283, "learning_rate": 9.592556749937315e-06, "loss": 1.605, "step": 1251 }, { "epoch": 0.16, "grad_norm": 6.90485342916897, "learning_rate": 9.591759910979572e-06, "loss": 1.8336, "step": 1252 }, { "epoch": 0.16, "grad_norm": 8.094149929978942, "learning_rate": 9.590962326753872e-06, "loss": 1.7419, "step": 1253 }, { "epoch": 0.16, "grad_norm": 8.102636467015971, "learning_rate": 9.590163997389665e-06, "loss": 1.569, "step": 1254 }, { "epoch": 0.16, "grad_norm": 6.464056374574768, "learning_rate": 9.589364923016524e-06, "loss": 1.5288, "step": 1255 }, { "epoch": 0.16, "grad_norm": 7.352959264439021, "learning_rate": 9.588565103764148e-06, "loss": 1.7863, "step": 1256 }, { "epoch": 0.16, "grad_norm": 7.008262827817541, "learning_rate": 9.587764539762345e-06, "loss": 1.563, "step": 1257 }, { "epoch": 0.16, "grad_norm": 7.174419841223719, "learning_rate": 9.586963231141055e-06, "loss": 1.7649, "step": 1258 }, { "epoch": 0.16, "grad_norm": 7.315108845701145, "learning_rate": 9.586161178030333e-06, "loss": 1.7481, "step": 1259 }, { "epoch": 0.16, "grad_norm": 7.4194462534636605, "learning_rate": 9.58535838056036e-06, "loss": 1.7214, "step": 1260 }, { "epoch": 0.16, "grad_norm": 7.5737047472938785, "learning_rate": 9.584554838861431e-06, "loss": 1.7722, "step": 1261 }, { "epoch": 0.16, "grad_norm": 7.312563135931859, "learning_rate": 9.583750553063967e-06, "loss": 1.7576, "step": 1262 }, { "epoch": 0.16, "grad_norm": 7.9824157910903315, "learning_rate": 9.582945523298507e-06, "loss": 1.5874, "step": 1263 }, { "epoch": 0.16, "grad_norm": 7.5038952184026035, "learning_rate": 9.582139749695713e-06, "loss": 1.7884, "step": 1264 }, { "epoch": 0.16, "grad_norm": 8.061734513289409, "learning_rate": 9.581333232386365e-06, "loss": 1.819, "step": 1265 }, { "epoch": 0.16, "grad_norm": 7.480427014442601, "learning_rate": 9.580525971501367e-06, "loss": 1.7465, "step": 1266 }, { "epoch": 0.16, "grad_norm": 8.159862913192802, "learning_rate": 9.57971796717174e-06, "loss": 1.651, "step": 1267 }, { "epoch": 0.16, "grad_norm": 7.2553222202296, "learning_rate": 9.57890921952863e-06, "loss": 1.4411, "step": 1268 }, { "epoch": 0.16, "grad_norm": 7.020484923101717, "learning_rate": 9.578099728703298e-06, "loss": 1.7926, "step": 1269 }, { "epoch": 0.16, "grad_norm": 7.003817451487589, "learning_rate": 9.57728949482713e-06, "loss": 1.5813, "step": 1270 }, { "epoch": 0.16, "grad_norm": 6.776708988551682, "learning_rate": 9.576478518031634e-06, "loss": 1.7754, "step": 1271 }, { "epoch": 0.16, "grad_norm": 7.780731996003984, "learning_rate": 9.57566679844843e-06, "loss": 2.0844, "step": 1272 }, { "epoch": 0.16, "grad_norm": 6.416043297649683, "learning_rate": 9.574854336209272e-06, "loss": 1.5548, "step": 1273 }, { "epoch": 0.16, "grad_norm": 7.073245918716733, "learning_rate": 9.574041131446024e-06, "loss": 1.4728, "step": 1274 }, { "epoch": 0.16, "grad_norm": 7.337107296493559, "learning_rate": 9.573227184290672e-06, "loss": 1.8024, "step": 1275 }, { "epoch": 0.16, "grad_norm": 6.185915887546448, "learning_rate": 9.572412494875325e-06, "loss": 1.5208, "step": 1276 }, { "epoch": 0.16, "grad_norm": 7.623964161939942, "learning_rate": 9.571597063332211e-06, "loss": 1.6665, "step": 1277 }, { "epoch": 0.16, "grad_norm": 7.8254640167048635, "learning_rate": 9.570780889793681e-06, "loss": 1.8207, "step": 1278 }, { "epoch": 0.16, "grad_norm": 6.509568181748368, "learning_rate": 9.569963974392203e-06, "loss": 1.6781, "step": 1279 }, { "epoch": 0.16, "grad_norm": 7.527474895738566, "learning_rate": 9.569146317260367e-06, "loss": 1.9976, "step": 1280 }, { "epoch": 0.16, "grad_norm": 7.403004085059516, "learning_rate": 9.568327918530885e-06, "loss": 1.7814, "step": 1281 }, { "epoch": 0.16, "grad_norm": 7.0665976311312235, "learning_rate": 9.567508778336584e-06, "loss": 1.5834, "step": 1282 }, { "epoch": 0.16, "grad_norm": 6.843440636980945, "learning_rate": 9.56668889681042e-06, "loss": 1.6662, "step": 1283 }, { "epoch": 0.16, "grad_norm": 7.03580049537233, "learning_rate": 9.565868274085459e-06, "loss": 1.8403, "step": 1284 }, { "epoch": 0.16, "grad_norm": 7.546035088926341, "learning_rate": 9.565046910294895e-06, "loss": 1.8575, "step": 1285 }, { "epoch": 0.16, "grad_norm": 7.316192264490101, "learning_rate": 9.564224805572043e-06, "loss": 1.5239, "step": 1286 }, { "epoch": 0.16, "grad_norm": 7.634071643575928, "learning_rate": 9.563401960050329e-06, "loss": 1.7529, "step": 1287 }, { "epoch": 0.16, "grad_norm": 7.297951803081174, "learning_rate": 9.56257837386331e-06, "loss": 1.9619, "step": 1288 }, { "epoch": 0.16, "grad_norm": 7.798031252099183, "learning_rate": 9.561754047144656e-06, "loss": 1.5551, "step": 1289 }, { "epoch": 0.16, "grad_norm": 7.015326556904297, "learning_rate": 9.560928980028159e-06, "loss": 1.6431, "step": 1290 }, { "epoch": 0.16, "grad_norm": 6.589453076554638, "learning_rate": 9.560103172647737e-06, "loss": 1.7246, "step": 1291 }, { "epoch": 0.16, "grad_norm": 7.029464327980055, "learning_rate": 9.559276625137416e-06, "loss": 1.621, "step": 1292 }, { "epoch": 0.16, "grad_norm": 7.232144187635329, "learning_rate": 9.558449337631357e-06, "loss": 1.5608, "step": 1293 }, { "epoch": 0.16, "grad_norm": 6.527234936494026, "learning_rate": 9.557621310263827e-06, "loss": 1.6084, "step": 1294 }, { "epoch": 0.16, "grad_norm": 6.434543810096429, "learning_rate": 9.556792543169221e-06, "loss": 1.7955, "step": 1295 }, { "epoch": 0.16, "grad_norm": 6.5717223504400595, "learning_rate": 9.555963036482056e-06, "loss": 1.5297, "step": 1296 }, { "epoch": 0.16, "grad_norm": 7.245766096834106, "learning_rate": 9.555132790336962e-06, "loss": 1.4576, "step": 1297 }, { "epoch": 0.16, "grad_norm": 9.181510076297872, "learning_rate": 9.554301804868693e-06, "loss": 1.9287, "step": 1298 }, { "epoch": 0.16, "grad_norm": 7.36873455513587, "learning_rate": 9.553470080212122e-06, "loss": 1.8065, "step": 1299 }, { "epoch": 0.16, "grad_norm": 6.844947110466986, "learning_rate": 9.552637616502243e-06, "loss": 1.739, "step": 1300 }, { "epoch": 0.16, "grad_norm": 7.533457624464909, "learning_rate": 9.551804413874169e-06, "loss": 1.4595, "step": 1301 }, { "epoch": 0.16, "grad_norm": 6.626068348057797, "learning_rate": 9.550970472463137e-06, "loss": 1.3791, "step": 1302 }, { "epoch": 0.16, "grad_norm": 7.070001197094793, "learning_rate": 9.550135792404495e-06, "loss": 1.5765, "step": 1303 }, { "epoch": 0.16, "grad_norm": 7.780371113305877, "learning_rate": 9.549300373833719e-06, "loss": 1.8375, "step": 1304 }, { "epoch": 0.16, "grad_norm": 9.099526391984725, "learning_rate": 9.548464216886401e-06, "loss": 1.6573, "step": 1305 }, { "epoch": 0.16, "grad_norm": 8.344947014581235, "learning_rate": 9.547627321698257e-06, "loss": 1.6855, "step": 1306 }, { "epoch": 0.16, "grad_norm": 7.006900396552503, "learning_rate": 9.546789688405114e-06, "loss": 1.7465, "step": 1307 }, { "epoch": 0.16, "grad_norm": 7.695335595440753, "learning_rate": 9.54595131714293e-06, "loss": 1.4483, "step": 1308 }, { "epoch": 0.16, "grad_norm": 6.9717996983082315, "learning_rate": 9.545112208047772e-06, "loss": 1.5642, "step": 1309 }, { "epoch": 0.16, "grad_norm": 7.780856682349418, "learning_rate": 9.544272361255838e-06, "loss": 1.5014, "step": 1310 }, { "epoch": 0.16, "grad_norm": 8.814668530921184, "learning_rate": 9.543431776903436e-06, "loss": 1.6151, "step": 1311 }, { "epoch": 0.16, "grad_norm": 7.123435886653715, "learning_rate": 9.542590455127e-06, "loss": 1.6896, "step": 1312 }, { "epoch": 0.16, "grad_norm": 6.462874087090037, "learning_rate": 9.541748396063077e-06, "loss": 1.5333, "step": 1313 }, { "epoch": 0.16, "grad_norm": 6.8013840319332335, "learning_rate": 9.540905599848341e-06, "loss": 1.6822, "step": 1314 }, { "epoch": 0.16, "grad_norm": 8.940315925840046, "learning_rate": 9.540062066619581e-06, "loss": 1.7895, "step": 1315 }, { "epoch": 0.16, "grad_norm": 7.548038551185305, "learning_rate": 9.539217796513708e-06, "loss": 1.571, "step": 1316 }, { "epoch": 0.16, "grad_norm": 6.009342351829622, "learning_rate": 9.538372789667751e-06, "loss": 1.2509, "step": 1317 }, { "epoch": 0.16, "grad_norm": 7.350881649833198, "learning_rate": 9.537527046218861e-06, "loss": 1.617, "step": 1318 }, { "epoch": 0.16, "grad_norm": 6.150123493252902, "learning_rate": 9.536680566304306e-06, "loss": 1.7809, "step": 1319 }, { "epoch": 0.16, "grad_norm": 6.51566882891457, "learning_rate": 9.535833350061473e-06, "loss": 1.6801, "step": 1320 }, { "epoch": 0.16, "grad_norm": 7.17409148586635, "learning_rate": 9.534985397627872e-06, "loss": 1.5565, "step": 1321 }, { "epoch": 0.16, "grad_norm": 7.2954173048236814, "learning_rate": 9.534136709141128e-06, "loss": 1.6413, "step": 1322 }, { "epoch": 0.16, "grad_norm": 7.198845532609944, "learning_rate": 9.533287284738989e-06, "loss": 1.631, "step": 1323 }, { "epoch": 0.16, "grad_norm": 7.557293289682839, "learning_rate": 9.532437124559322e-06, "loss": 1.7966, "step": 1324 }, { "epoch": 0.16, "grad_norm": 9.002300414282974, "learning_rate": 9.531586228740111e-06, "loss": 1.7498, "step": 1325 }, { "epoch": 0.16, "grad_norm": 8.270352244501872, "learning_rate": 9.530734597419463e-06, "loss": 1.9455, "step": 1326 }, { "epoch": 0.17, "grad_norm": 6.80403008694154, "learning_rate": 9.5298822307356e-06, "loss": 1.4, "step": 1327 }, { "epoch": 0.17, "grad_norm": 7.513847321783936, "learning_rate": 9.529029128826868e-06, "loss": 1.7008, "step": 1328 }, { "epoch": 0.17, "grad_norm": 7.279690375712646, "learning_rate": 9.52817529183173e-06, "loss": 1.5838, "step": 1329 }, { "epoch": 0.17, "grad_norm": 8.16054700656364, "learning_rate": 9.527320719888768e-06, "loss": 2.0802, "step": 1330 }, { "epoch": 0.17, "grad_norm": 7.12439803280822, "learning_rate": 9.526465413136683e-06, "loss": 1.8201, "step": 1331 }, { "epoch": 0.17, "grad_norm": 7.139185094871699, "learning_rate": 9.525609371714297e-06, "loss": 1.9068, "step": 1332 }, { "epoch": 0.17, "grad_norm": 6.8522250448054285, "learning_rate": 9.524752595760552e-06, "loss": 1.5718, "step": 1333 }, { "epoch": 0.17, "grad_norm": 7.205704034245906, "learning_rate": 9.523895085414503e-06, "loss": 1.6875, "step": 1334 }, { "epoch": 0.17, "grad_norm": 7.046546501828301, "learning_rate": 9.523036840815331e-06, "loss": 1.6964, "step": 1335 }, { "epoch": 0.17, "grad_norm": 7.549343811399341, "learning_rate": 9.522177862102333e-06, "loss": 1.7661, "step": 1336 }, { "epoch": 0.17, "grad_norm": 6.906960645213272, "learning_rate": 9.521318149414929e-06, "loss": 1.5806, "step": 1337 }, { "epoch": 0.17, "grad_norm": 8.386851668557188, "learning_rate": 9.520457702892651e-06, "loss": 1.8105, "step": 1338 }, { "epoch": 0.17, "grad_norm": 6.911677247699874, "learning_rate": 9.519596522675158e-06, "loss": 1.6167, "step": 1339 }, { "epoch": 0.17, "grad_norm": 6.842987963915712, "learning_rate": 9.518734608902222e-06, "loss": 1.6997, "step": 1340 }, { "epoch": 0.17, "grad_norm": 7.051771060451891, "learning_rate": 9.517871961713736e-06, "loss": 1.7642, "step": 1341 }, { "epoch": 0.17, "grad_norm": 7.388763514655094, "learning_rate": 9.517008581249713e-06, "loss": 1.6206, "step": 1342 }, { "epoch": 0.17, "grad_norm": 6.741429766469613, "learning_rate": 9.516144467650286e-06, "loss": 1.4474, "step": 1343 }, { "epoch": 0.17, "grad_norm": 7.435482512123905, "learning_rate": 9.515279621055705e-06, "loss": 1.6198, "step": 1344 }, { "epoch": 0.17, "grad_norm": 7.078005534317267, "learning_rate": 9.514414041606334e-06, "loss": 1.7123, "step": 1345 }, { "epoch": 0.17, "grad_norm": 6.616380245391111, "learning_rate": 9.51354772944267e-06, "loss": 1.5223, "step": 1346 }, { "epoch": 0.17, "grad_norm": 6.802669615550675, "learning_rate": 9.512680684705314e-06, "loss": 1.8018, "step": 1347 }, { "epoch": 0.17, "grad_norm": 7.693817481485299, "learning_rate": 9.511812907534995e-06, "loss": 1.3283, "step": 1348 }, { "epoch": 0.17, "grad_norm": 9.832811975818174, "learning_rate": 9.510944398072557e-06, "loss": 1.7076, "step": 1349 }, { "epoch": 0.17, "grad_norm": 7.340024846120869, "learning_rate": 9.510075156458965e-06, "loss": 1.6832, "step": 1350 }, { "epoch": 0.17, "grad_norm": 6.672141574731841, "learning_rate": 9.509205182835298e-06, "loss": 1.7359, "step": 1351 }, { "epoch": 0.17, "grad_norm": 6.702489381514041, "learning_rate": 9.508334477342763e-06, "loss": 1.7371, "step": 1352 }, { "epoch": 0.17, "grad_norm": 7.12174181942787, "learning_rate": 9.507463040122676e-06, "loss": 1.6125, "step": 1353 }, { "epoch": 0.17, "grad_norm": 7.076297029991796, "learning_rate": 9.506590871316476e-06, "loss": 1.4451, "step": 1354 }, { "epoch": 0.17, "grad_norm": 8.24073683045988, "learning_rate": 9.505717971065724e-06, "loss": 1.6027, "step": 1355 }, { "epoch": 0.17, "grad_norm": 8.202898598206973, "learning_rate": 9.504844339512096e-06, "loss": 1.6774, "step": 1356 }, { "epoch": 0.17, "grad_norm": 7.140806708992558, "learning_rate": 9.503969976797386e-06, "loss": 1.5596, "step": 1357 }, { "epoch": 0.17, "grad_norm": 7.282127436117041, "learning_rate": 9.503094883063507e-06, "loss": 1.8328, "step": 1358 }, { "epoch": 0.17, "grad_norm": 7.00469952453832, "learning_rate": 9.502219058452492e-06, "loss": 1.8165, "step": 1359 }, { "epoch": 0.17, "grad_norm": 8.09110011270631, "learning_rate": 9.501342503106495e-06, "loss": 1.7772, "step": 1360 }, { "epoch": 0.17, "grad_norm": 7.205695468626552, "learning_rate": 9.500465217167783e-06, "loss": 1.7517, "step": 1361 }, { "epoch": 0.17, "grad_norm": 6.431491549960077, "learning_rate": 9.499587200778744e-06, "loss": 1.5405, "step": 1362 }, { "epoch": 0.17, "grad_norm": 6.816012195041328, "learning_rate": 9.498708454081886e-06, "loss": 1.6585, "step": 1363 }, { "epoch": 0.17, "grad_norm": 6.9156046226812276, "learning_rate": 9.497828977219833e-06, "loss": 1.8587, "step": 1364 }, { "epoch": 0.17, "grad_norm": 7.506027418672857, "learning_rate": 9.496948770335332e-06, "loss": 1.8688, "step": 1365 }, { "epoch": 0.17, "grad_norm": 7.712614982054217, "learning_rate": 9.496067833571243e-06, "loss": 1.8055, "step": 1366 }, { "epoch": 0.17, "grad_norm": 8.15712603286237, "learning_rate": 9.495186167070547e-06, "loss": 1.8943, "step": 1367 }, { "epoch": 0.17, "grad_norm": 7.333409118799239, "learning_rate": 9.494303770976344e-06, "loss": 1.373, "step": 1368 }, { "epoch": 0.17, "grad_norm": 7.108053452005245, "learning_rate": 9.493420645431853e-06, "loss": 1.5491, "step": 1369 }, { "epoch": 0.17, "grad_norm": 8.398669455536286, "learning_rate": 9.492536790580406e-06, "loss": 1.5493, "step": 1370 }, { "epoch": 0.17, "grad_norm": 6.634575388651538, "learning_rate": 9.491652206565462e-06, "loss": 1.7835, "step": 1371 }, { "epoch": 0.17, "grad_norm": 7.520764103594344, "learning_rate": 9.490766893530592e-06, "loss": 1.7701, "step": 1372 }, { "epoch": 0.17, "grad_norm": 7.57683899393943, "learning_rate": 9.489880851619486e-06, "loss": 1.8569, "step": 1373 }, { "epoch": 0.17, "grad_norm": 7.492805452335427, "learning_rate": 9.488994080975957e-06, "loss": 1.4446, "step": 1374 }, { "epoch": 0.17, "grad_norm": 6.6961872729619385, "learning_rate": 9.488106581743929e-06, "loss": 1.6506, "step": 1375 }, { "epoch": 0.17, "grad_norm": 7.001381604904056, "learning_rate": 9.48721835406745e-06, "loss": 1.586, "step": 1376 }, { "epoch": 0.17, "grad_norm": 7.90293199541345, "learning_rate": 9.486329398090683e-06, "loss": 1.5437, "step": 1377 }, { "epoch": 0.17, "grad_norm": 6.522079589067728, "learning_rate": 9.485439713957912e-06, "loss": 1.6908, "step": 1378 }, { "epoch": 0.17, "grad_norm": 8.066813595444593, "learning_rate": 9.484549301813537e-06, "loss": 1.5016, "step": 1379 }, { "epoch": 0.17, "grad_norm": 6.972355628928858, "learning_rate": 9.483658161802077e-06, "loss": 1.5942, "step": 1380 }, { "epoch": 0.17, "grad_norm": 6.52855850925791, "learning_rate": 9.48276629406817e-06, "loss": 1.5237, "step": 1381 }, { "epoch": 0.17, "grad_norm": 8.588536150112652, "learning_rate": 9.481873698756566e-06, "loss": 1.7631, "step": 1382 }, { "epoch": 0.17, "grad_norm": 7.621535409510003, "learning_rate": 9.480980376012145e-06, "loss": 1.7738, "step": 1383 }, { "epoch": 0.17, "grad_norm": 7.529503818671551, "learning_rate": 9.480086325979894e-06, "loss": 1.8138, "step": 1384 }, { "epoch": 0.17, "grad_norm": 7.682836931909514, "learning_rate": 9.479191548804923e-06, "loss": 1.9749, "step": 1385 }, { "epoch": 0.17, "grad_norm": 8.421850432638612, "learning_rate": 9.47829604463246e-06, "loss": 1.3898, "step": 1386 }, { "epoch": 0.17, "grad_norm": 6.319152185221257, "learning_rate": 9.477399813607849e-06, "loss": 1.4813, "step": 1387 }, { "epoch": 0.17, "grad_norm": 7.52068803507144, "learning_rate": 9.476502855876554e-06, "loss": 1.7047, "step": 1388 }, { "epoch": 0.17, "grad_norm": 6.795400123375438, "learning_rate": 9.475605171584157e-06, "loss": 1.6774, "step": 1389 }, { "epoch": 0.17, "grad_norm": 7.099980103875327, "learning_rate": 9.474706760876356e-06, "loss": 1.6683, "step": 1390 }, { "epoch": 0.17, "grad_norm": 6.080107901623833, "learning_rate": 9.47380762389897e-06, "loss": 1.6074, "step": 1391 }, { "epoch": 0.17, "grad_norm": 6.819052171776723, "learning_rate": 9.47290776079793e-06, "loss": 1.8916, "step": 1392 }, { "epoch": 0.17, "grad_norm": 7.087905344214223, "learning_rate": 9.472007171719292e-06, "loss": 1.6881, "step": 1393 }, { "epoch": 0.17, "grad_norm": 7.000068490848602, "learning_rate": 9.471105856809226e-06, "loss": 1.3956, "step": 1394 }, { "epoch": 0.17, "grad_norm": 8.28802812800705, "learning_rate": 9.470203816214019e-06, "loss": 1.7938, "step": 1395 }, { "epoch": 0.17, "grad_norm": 6.648145792387823, "learning_rate": 9.46930105008008e-06, "loss": 1.4753, "step": 1396 }, { "epoch": 0.17, "grad_norm": 6.3765619606285355, "learning_rate": 9.468397558553928e-06, "loss": 1.425, "step": 1397 }, { "epoch": 0.17, "grad_norm": 8.54107171224222, "learning_rate": 9.46749334178221e-06, "loss": 1.956, "step": 1398 }, { "epoch": 0.17, "grad_norm": 8.887443882559262, "learning_rate": 9.466588399911682e-06, "loss": 1.5993, "step": 1399 }, { "epoch": 0.17, "grad_norm": 7.139160959829565, "learning_rate": 9.465682733089224e-06, "loss": 1.7155, "step": 1400 }, { "epoch": 0.17, "grad_norm": 7.4851998754692115, "learning_rate": 9.464776341461828e-06, "loss": 1.7604, "step": 1401 }, { "epoch": 0.17, "grad_norm": 7.679638223470576, "learning_rate": 9.463869225176605e-06, "loss": 1.571, "step": 1402 }, { "epoch": 0.17, "grad_norm": 6.628713525628536, "learning_rate": 9.462961384380789e-06, "loss": 1.579, "step": 1403 }, { "epoch": 0.17, "grad_norm": 7.804785878944785, "learning_rate": 9.462052819221726e-06, "loss": 1.8029, "step": 1404 }, { "epoch": 0.17, "grad_norm": 7.9558313679203545, "learning_rate": 9.461143529846882e-06, "loss": 1.4651, "step": 1405 }, { "epoch": 0.17, "grad_norm": 6.510766052199631, "learning_rate": 9.460233516403836e-06, "loss": 1.7556, "step": 1406 }, { "epoch": 0.17, "grad_norm": 6.944358439655696, "learning_rate": 9.459322779040292e-06, "loss": 1.6275, "step": 1407 }, { "epoch": 0.18, "grad_norm": 7.270517982619676, "learning_rate": 9.458411317904066e-06, "loss": 1.4163, "step": 1408 }, { "epoch": 0.18, "grad_norm": 7.838093062999063, "learning_rate": 9.457499133143096e-06, "loss": 1.7439, "step": 1409 }, { "epoch": 0.18, "grad_norm": 8.147499114115169, "learning_rate": 9.45658622490543e-06, "loss": 1.6135, "step": 1410 }, { "epoch": 0.18, "grad_norm": 7.95693401074822, "learning_rate": 9.455672593339241e-06, "loss": 1.9618, "step": 1411 }, { "epoch": 0.18, "grad_norm": 7.029312943345053, "learning_rate": 9.454758238592816e-06, "loss": 1.5987, "step": 1412 }, { "epoch": 0.18, "grad_norm": 8.025219160026534, "learning_rate": 9.453843160814558e-06, "loss": 1.7606, "step": 1413 }, { "epoch": 0.18, "grad_norm": 6.930286844617501, "learning_rate": 9.452927360152993e-06, "loss": 1.5474, "step": 1414 }, { "epoch": 0.18, "grad_norm": 7.491978948206925, "learning_rate": 9.452010836756756e-06, "loss": 1.6141, "step": 1415 }, { "epoch": 0.18, "grad_norm": 8.49469936011807, "learning_rate": 9.451093590774609e-06, "loss": 1.5687, "step": 1416 }, { "epoch": 0.18, "grad_norm": 7.832491345834431, "learning_rate": 9.45017562235542e-06, "loss": 1.5414, "step": 1417 }, { "epoch": 0.18, "grad_norm": 6.636004864383721, "learning_rate": 9.449256931648185e-06, "loss": 1.6954, "step": 1418 }, { "epoch": 0.18, "grad_norm": 7.672951780221844, "learning_rate": 9.44833751880201e-06, "loss": 1.5951, "step": 1419 }, { "epoch": 0.18, "grad_norm": 7.435625617857992, "learning_rate": 9.447417383966124e-06, "loss": 1.463, "step": 1420 }, { "epoch": 0.18, "grad_norm": 7.868835013976767, "learning_rate": 9.446496527289866e-06, "loss": 1.5613, "step": 1421 }, { "epoch": 0.18, "grad_norm": 7.785194472483078, "learning_rate": 9.445574948922698e-06, "loss": 1.6718, "step": 1422 }, { "epoch": 0.18, "grad_norm": 7.367055504864576, "learning_rate": 9.444652649014198e-06, "loss": 1.5628, "step": 1423 }, { "epoch": 0.18, "grad_norm": 7.866998546824561, "learning_rate": 9.443729627714059e-06, "loss": 1.6225, "step": 1424 }, { "epoch": 0.18, "grad_norm": 6.879228069449073, "learning_rate": 9.442805885172092e-06, "loss": 1.1952, "step": 1425 }, { "epoch": 0.18, "grad_norm": 6.91549527038015, "learning_rate": 9.441881421538228e-06, "loss": 1.7562, "step": 1426 }, { "epoch": 0.18, "grad_norm": 7.550461999791994, "learning_rate": 9.440956236962511e-06, "loss": 1.4518, "step": 1427 }, { "epoch": 0.18, "grad_norm": 7.4878345384333205, "learning_rate": 9.440030331595103e-06, "loss": 1.7238, "step": 1428 }, { "epoch": 0.18, "grad_norm": 7.735336330158584, "learning_rate": 9.439103705586283e-06, "loss": 1.7656, "step": 1429 }, { "epoch": 0.18, "grad_norm": 6.887022577822192, "learning_rate": 9.43817635908645e-06, "loss": 1.6644, "step": 1430 }, { "epoch": 0.18, "grad_norm": 7.695586680019524, "learning_rate": 9.437248292246116e-06, "loss": 1.5208, "step": 1431 }, { "epoch": 0.18, "grad_norm": 8.511269867855866, "learning_rate": 9.43631950521591e-06, "loss": 1.8275, "step": 1432 }, { "epoch": 0.18, "grad_norm": 8.860113220529968, "learning_rate": 9.435389998146582e-06, "loss": 1.6182, "step": 1433 }, { "epoch": 0.18, "grad_norm": 7.476397988366851, "learning_rate": 9.434459771188996e-06, "loss": 1.6506, "step": 1434 }, { "epoch": 0.18, "grad_norm": 7.5552705167994, "learning_rate": 9.433528824494131e-06, "loss": 1.7041, "step": 1435 }, { "epoch": 0.18, "grad_norm": 7.270289580916755, "learning_rate": 9.432597158213083e-06, "loss": 1.5953, "step": 1436 }, { "epoch": 0.18, "grad_norm": 7.8790772504553885, "learning_rate": 9.431664772497071e-06, "loss": 1.6977, "step": 1437 }, { "epoch": 0.18, "grad_norm": 7.492431228258403, "learning_rate": 9.430731667497425e-06, "loss": 1.7924, "step": 1438 }, { "epoch": 0.18, "grad_norm": 6.318855055647529, "learning_rate": 9.429797843365594e-06, "loss": 1.2667, "step": 1439 }, { "epoch": 0.18, "grad_norm": 7.405916049586825, "learning_rate": 9.42886330025314e-06, "loss": 1.9503, "step": 1440 }, { "epoch": 0.18, "grad_norm": 7.364217426307138, "learning_rate": 9.427928038311744e-06, "loss": 1.5063, "step": 1441 }, { "epoch": 0.18, "grad_norm": 7.281670925048569, "learning_rate": 9.426992057693208e-06, "loss": 1.5917, "step": 1442 }, { "epoch": 0.18, "grad_norm": 8.176084738405038, "learning_rate": 9.426055358549444e-06, "loss": 1.6219, "step": 1443 }, { "epoch": 0.18, "grad_norm": 7.60467582400312, "learning_rate": 9.425117941032485e-06, "loss": 2.0641, "step": 1444 }, { "epoch": 0.18, "grad_norm": 6.902664689964815, "learning_rate": 9.424179805294478e-06, "loss": 1.5475, "step": 1445 }, { "epoch": 0.18, "grad_norm": 7.293209670901922, "learning_rate": 9.42324095148769e-06, "loss": 1.5142, "step": 1446 }, { "epoch": 0.18, "grad_norm": 7.4835548276339505, "learning_rate": 9.422301379764497e-06, "loss": 1.677, "step": 1447 }, { "epoch": 0.18, "grad_norm": 6.7837597435120545, "learning_rate": 9.421361090277401e-06, "loss": 1.4883, "step": 1448 }, { "epoch": 0.18, "grad_norm": 7.338368112240077, "learning_rate": 9.420420083179015e-06, "loss": 1.5545, "step": 1449 }, { "epoch": 0.18, "grad_norm": 7.3799468914669255, "learning_rate": 9.41947835862207e-06, "loss": 1.4848, "step": 1450 }, { "epoch": 0.18, "grad_norm": 7.190619824897909, "learning_rate": 9.41853591675941e-06, "loss": 1.3706, "step": 1451 }, { "epoch": 0.18, "grad_norm": 6.504722814000297, "learning_rate": 9.417592757744005e-06, "loss": 1.6048, "step": 1452 }, { "epoch": 0.18, "grad_norm": 6.693210907435623, "learning_rate": 9.41664888172893e-06, "loss": 1.5024, "step": 1453 }, { "epoch": 0.18, "grad_norm": 6.892199584496102, "learning_rate": 9.415704288867381e-06, "loss": 1.5056, "step": 1454 }, { "epoch": 0.18, "grad_norm": 7.2789602475724475, "learning_rate": 9.414758979312673e-06, "loss": 1.7126, "step": 1455 }, { "epoch": 0.18, "grad_norm": 7.720353083878373, "learning_rate": 9.413812953218233e-06, "loss": 1.5172, "step": 1456 }, { "epoch": 0.18, "grad_norm": 7.232473130041129, "learning_rate": 9.412866210737609e-06, "loss": 2.025, "step": 1457 }, { "epoch": 0.18, "grad_norm": 7.755044127048572, "learning_rate": 9.41191875202446e-06, "loss": 1.5264, "step": 1458 }, { "epoch": 0.18, "grad_norm": 7.798081564052816, "learning_rate": 9.410970577232564e-06, "loss": 1.4129, "step": 1459 }, { "epoch": 0.18, "grad_norm": 7.209537211158705, "learning_rate": 9.410021686515815e-06, "loss": 1.4998, "step": 1460 }, { "epoch": 0.18, "grad_norm": 7.377966867003925, "learning_rate": 9.409072080028225e-06, "loss": 1.7409, "step": 1461 }, { "epoch": 0.18, "grad_norm": 7.108375044245903, "learning_rate": 9.408121757923919e-06, "loss": 1.668, "step": 1462 }, { "epoch": 0.18, "grad_norm": 9.274159331537785, "learning_rate": 9.407170720357139e-06, "loss": 1.5152, "step": 1463 }, { "epoch": 0.18, "grad_norm": 7.176069690198273, "learning_rate": 9.406218967482246e-06, "loss": 1.4658, "step": 1464 }, { "epoch": 0.18, "grad_norm": 7.503418764808526, "learning_rate": 9.40526649945371e-06, "loss": 1.5184, "step": 1465 }, { "epoch": 0.18, "grad_norm": 7.986556957546882, "learning_rate": 9.404313316426127e-06, "loss": 1.6407, "step": 1466 }, { "epoch": 0.18, "grad_norm": 7.301618723747065, "learning_rate": 9.4033594185542e-06, "loss": 1.8016, "step": 1467 }, { "epoch": 0.18, "grad_norm": 6.912399298608378, "learning_rate": 9.402404805992755e-06, "loss": 1.3306, "step": 1468 }, { "epoch": 0.18, "grad_norm": 5.999240363476229, "learning_rate": 9.40144947889673e-06, "loss": 1.4441, "step": 1469 }, { "epoch": 0.18, "grad_norm": 6.800917034446088, "learning_rate": 9.400493437421177e-06, "loss": 1.4557, "step": 1470 }, { "epoch": 0.18, "grad_norm": 7.2854681170339095, "learning_rate": 9.399536681721271e-06, "loss": 1.7115, "step": 1471 }, { "epoch": 0.18, "grad_norm": 7.356163841708347, "learning_rate": 9.398579211952296e-06, "loss": 1.6273, "step": 1472 }, { "epoch": 0.18, "grad_norm": 6.590576918843098, "learning_rate": 9.397621028269654e-06, "loss": 1.7291, "step": 1473 }, { "epoch": 0.18, "grad_norm": 7.697901105578593, "learning_rate": 9.396662130828869e-06, "loss": 1.584, "step": 1474 }, { "epoch": 0.18, "grad_norm": 7.738197796320046, "learning_rate": 9.395702519785567e-06, "loss": 1.6712, "step": 1475 }, { "epoch": 0.18, "grad_norm": 7.981808718878038, "learning_rate": 9.394742195295506e-06, "loss": 1.5018, "step": 1476 }, { "epoch": 0.18, "grad_norm": 7.520416274072773, "learning_rate": 9.393781157514546e-06, "loss": 1.547, "step": 1477 }, { "epoch": 0.18, "grad_norm": 7.01363590837145, "learning_rate": 9.39281940659867e-06, "loss": 1.3011, "step": 1478 }, { "epoch": 0.18, "grad_norm": 7.97823926970821, "learning_rate": 9.391856942703979e-06, "loss": 1.555, "step": 1479 }, { "epoch": 0.18, "grad_norm": 8.091000732137097, "learning_rate": 9.390893765986684e-06, "loss": 1.6419, "step": 1480 }, { "epoch": 0.18, "grad_norm": 6.534998457698246, "learning_rate": 9.389929876603112e-06, "loss": 1.7018, "step": 1481 }, { "epoch": 0.18, "grad_norm": 8.031380689500523, "learning_rate": 9.388965274709712e-06, "loss": 1.5477, "step": 1482 }, { "epoch": 0.18, "grad_norm": 8.484520954614412, "learning_rate": 9.387999960463039e-06, "loss": 1.8034, "step": 1483 }, { "epoch": 0.18, "grad_norm": 7.341481811364014, "learning_rate": 9.387033934019772e-06, "loss": 1.4869, "step": 1484 }, { "epoch": 0.18, "grad_norm": 7.145875741658462, "learning_rate": 9.386067195536702e-06, "loss": 1.5431, "step": 1485 }, { "epoch": 0.18, "grad_norm": 7.248872789833361, "learning_rate": 9.385099745170735e-06, "loss": 1.5908, "step": 1486 }, { "epoch": 0.18, "grad_norm": 8.055919583538884, "learning_rate": 9.384131583078896e-06, "loss": 1.7145, "step": 1487 }, { "epoch": 0.19, "grad_norm": 7.673856254486052, "learning_rate": 9.38316270941832e-06, "loss": 1.3822, "step": 1488 }, { "epoch": 0.19, "grad_norm": 7.03037226005118, "learning_rate": 9.38219312434626e-06, "loss": 1.8536, "step": 1489 }, { "epoch": 0.19, "grad_norm": 6.931534408026704, "learning_rate": 9.38122282802009e-06, "loss": 1.602, "step": 1490 }, { "epoch": 0.19, "grad_norm": 7.256612421441661, "learning_rate": 9.38025182059729e-06, "loss": 1.6302, "step": 1491 }, { "epoch": 0.19, "grad_norm": 6.424344064748559, "learning_rate": 9.37928010223546e-06, "loss": 1.4211, "step": 1492 }, { "epoch": 0.19, "grad_norm": 6.819535984880767, "learning_rate": 9.378307673092318e-06, "loss": 1.4579, "step": 1493 }, { "epoch": 0.19, "grad_norm": 7.071936678217803, "learning_rate": 9.377334533325693e-06, "loss": 1.5201, "step": 1494 }, { "epoch": 0.19, "grad_norm": 6.9474113351458175, "learning_rate": 9.37636068309353e-06, "loss": 1.5853, "step": 1495 }, { "epoch": 0.19, "grad_norm": 6.3411932268437665, "learning_rate": 9.37538612255389e-06, "loss": 1.5557, "step": 1496 }, { "epoch": 0.19, "grad_norm": 6.8719556919225315, "learning_rate": 9.374410851864955e-06, "loss": 1.9422, "step": 1497 }, { "epoch": 0.19, "grad_norm": 6.778134570544442, "learning_rate": 9.37343487118501e-06, "loss": 1.2866, "step": 1498 }, { "epoch": 0.19, "grad_norm": 8.150297154972536, "learning_rate": 9.372458180672464e-06, "loss": 1.747, "step": 1499 }, { "epoch": 0.19, "grad_norm": 9.177893648266297, "learning_rate": 9.37148078048584e-06, "loss": 1.6512, "step": 1500 }, { "epoch": 0.19, "grad_norm": 6.731034877248412, "learning_rate": 9.370502670783777e-06, "loss": 1.3708, "step": 1501 }, { "epoch": 0.19, "grad_norm": 6.5659903110520705, "learning_rate": 9.369523851725024e-06, "loss": 1.8112, "step": 1502 }, { "epoch": 0.19, "grad_norm": 7.906943650077935, "learning_rate": 9.368544323468451e-06, "loss": 1.6418, "step": 1503 }, { "epoch": 0.19, "grad_norm": 7.641861170766049, "learning_rate": 9.367564086173042e-06, "loss": 1.8073, "step": 1504 }, { "epoch": 0.19, "grad_norm": 6.975635527091534, "learning_rate": 9.366583139997891e-06, "loss": 1.5489, "step": 1505 }, { "epoch": 0.19, "grad_norm": 6.870219052174094, "learning_rate": 9.365601485102216e-06, "loss": 1.8046, "step": 1506 }, { "epoch": 0.19, "grad_norm": 9.084479376412776, "learning_rate": 9.364619121645341e-06, "loss": 1.6197, "step": 1507 }, { "epoch": 0.19, "grad_norm": 7.718369921821822, "learning_rate": 9.363636049786711e-06, "loss": 1.4325, "step": 1508 }, { "epoch": 0.19, "grad_norm": 9.290956106263007, "learning_rate": 9.362652269685881e-06, "loss": 1.9607, "step": 1509 }, { "epoch": 0.19, "grad_norm": 7.314323562727413, "learning_rate": 9.36166778150253e-06, "loss": 1.74, "step": 1510 }, { "epoch": 0.19, "grad_norm": 7.177442519025871, "learning_rate": 9.36068258539644e-06, "loss": 1.846, "step": 1511 }, { "epoch": 0.19, "grad_norm": 7.656725132613688, "learning_rate": 9.359696681527517e-06, "loss": 1.5522, "step": 1512 }, { "epoch": 0.19, "grad_norm": 7.8444700302323636, "learning_rate": 9.358710070055777e-06, "loss": 1.7056, "step": 1513 }, { "epoch": 0.19, "grad_norm": 7.124634567580487, "learning_rate": 9.357722751141352e-06, "loss": 1.709, "step": 1514 }, { "epoch": 0.19, "grad_norm": 7.746503905052208, "learning_rate": 9.356734724944492e-06, "loss": 1.6932, "step": 1515 }, { "epoch": 0.19, "grad_norm": 7.169051680672566, "learning_rate": 9.355745991625556e-06, "loss": 1.1311, "step": 1516 }, { "epoch": 0.19, "grad_norm": 7.054478322200052, "learning_rate": 9.354756551345023e-06, "loss": 1.2851, "step": 1517 }, { "epoch": 0.19, "grad_norm": 7.441751814751938, "learning_rate": 9.353766404263485e-06, "loss": 1.5698, "step": 1518 }, { "epoch": 0.19, "grad_norm": 7.595111924492975, "learning_rate": 9.352775550541645e-06, "loss": 1.4708, "step": 1519 }, { "epoch": 0.19, "grad_norm": 7.540121235380206, "learning_rate": 9.35178399034033e-06, "loss": 1.5914, "step": 1520 }, { "epoch": 0.19, "grad_norm": 7.788901627076626, "learning_rate": 9.35079172382047e-06, "loss": 1.7324, "step": 1521 }, { "epoch": 0.19, "grad_norm": 8.483373281137911, "learning_rate": 9.349798751143116e-06, "loss": 1.5851, "step": 1522 }, { "epoch": 0.19, "grad_norm": 7.151444306386401, "learning_rate": 9.348805072469435e-06, "loss": 1.5302, "step": 1523 }, { "epoch": 0.19, "grad_norm": 7.152277909476958, "learning_rate": 9.347810687960706e-06, "loss": 1.7044, "step": 1524 }, { "epoch": 0.19, "grad_norm": 6.803072296609114, "learning_rate": 9.346815597778323e-06, "loss": 1.5131, "step": 1525 }, { "epoch": 0.19, "grad_norm": 7.424428433173724, "learning_rate": 9.345819802083795e-06, "loss": 1.4658, "step": 1526 }, { "epoch": 0.19, "grad_norm": 6.765927990093665, "learning_rate": 9.344823301038745e-06, "loss": 1.5773, "step": 1527 }, { "epoch": 0.19, "grad_norm": 7.389316851491306, "learning_rate": 9.34382609480491e-06, "loss": 1.7546, "step": 1528 }, { "epoch": 0.19, "grad_norm": 7.519095433271634, "learning_rate": 9.342828183544142e-06, "loss": 1.622, "step": 1529 }, { "epoch": 0.19, "grad_norm": 7.836634146243434, "learning_rate": 9.341829567418407e-06, "loss": 1.8409, "step": 1530 }, { "epoch": 0.19, "grad_norm": 7.308892926833554, "learning_rate": 9.340830246589788e-06, "loss": 1.8077, "step": 1531 }, { "epoch": 0.19, "grad_norm": 7.7147827000484845, "learning_rate": 9.339830221220478e-06, "loss": 1.6434, "step": 1532 }, { "epoch": 0.19, "grad_norm": 7.1061282114456015, "learning_rate": 9.338829491472787e-06, "loss": 1.6326, "step": 1533 }, { "epoch": 0.19, "grad_norm": 6.9495933818310425, "learning_rate": 9.337828057509142e-06, "loss": 1.7356, "step": 1534 }, { "epoch": 0.19, "grad_norm": 7.507863393253565, "learning_rate": 9.336825919492078e-06, "loss": 1.3314, "step": 1535 }, { "epoch": 0.19, "grad_norm": 8.151054789660753, "learning_rate": 9.335823077584248e-06, "loss": 1.5875, "step": 1536 }, { "epoch": 0.19, "grad_norm": 7.130528687733196, "learning_rate": 9.334819531948418e-06, "loss": 1.4996, "step": 1537 }, { "epoch": 0.19, "grad_norm": 6.586437980437008, "learning_rate": 9.333815282747471e-06, "loss": 1.6057, "step": 1538 }, { "epoch": 0.19, "grad_norm": 7.045437399583947, "learning_rate": 9.332810330144402e-06, "loss": 1.9414, "step": 1539 }, { "epoch": 0.19, "grad_norm": 7.6754691095247, "learning_rate": 9.331804674302321e-06, "loss": 1.9004, "step": 1540 }, { "epoch": 0.19, "grad_norm": 7.314932266660372, "learning_rate": 9.330798315384448e-06, "loss": 1.7176, "step": 1541 }, { "epoch": 0.19, "grad_norm": 7.395739023974509, "learning_rate": 9.329791253554123e-06, "loss": 1.4467, "step": 1542 }, { "epoch": 0.19, "grad_norm": 8.761109880723797, "learning_rate": 9.328783488974797e-06, "loss": 1.6661, "step": 1543 }, { "epoch": 0.19, "grad_norm": 7.207304879835301, "learning_rate": 9.327775021810037e-06, "loss": 1.6327, "step": 1544 }, { "epoch": 0.19, "grad_norm": 6.737554577010832, "learning_rate": 9.326765852223522e-06, "loss": 1.5374, "step": 1545 }, { "epoch": 0.19, "grad_norm": 6.708436034420777, "learning_rate": 9.325755980379047e-06, "loss": 1.8875, "step": 1546 }, { "epoch": 0.19, "grad_norm": 7.274692591406242, "learning_rate": 9.324745406440517e-06, "loss": 1.6539, "step": 1547 }, { "epoch": 0.19, "grad_norm": 6.837442568202163, "learning_rate": 9.323734130571955e-06, "loss": 1.5605, "step": 1548 }, { "epoch": 0.19, "grad_norm": 7.384353152336243, "learning_rate": 9.322722152937499e-06, "loss": 1.5369, "step": 1549 }, { "epoch": 0.19, "grad_norm": 8.632580328081083, "learning_rate": 9.321709473701394e-06, "loss": 1.7786, "step": 1550 }, { "epoch": 0.19, "grad_norm": 7.3907297378216095, "learning_rate": 9.320696093028009e-06, "loss": 1.7557, "step": 1551 }, { "epoch": 0.19, "grad_norm": 6.814724398923214, "learning_rate": 9.319682011081816e-06, "loss": 1.4447, "step": 1552 }, { "epoch": 0.19, "grad_norm": 12.494113153965722, "learning_rate": 9.318667228027408e-06, "loss": 1.5596, "step": 1553 }, { "epoch": 0.19, "grad_norm": 8.578826697234604, "learning_rate": 9.31765174402949e-06, "loss": 1.9346, "step": 1554 }, { "epoch": 0.19, "grad_norm": 6.570127211265641, "learning_rate": 9.316635559252882e-06, "loss": 1.5896, "step": 1555 }, { "epoch": 0.19, "grad_norm": 7.178633453494391, "learning_rate": 9.315618673862515e-06, "loss": 1.6658, "step": 1556 }, { "epoch": 0.19, "grad_norm": 8.300272691401718, "learning_rate": 9.314601088023435e-06, "loss": 1.612, "step": 1557 }, { "epoch": 0.19, "grad_norm": 7.331725229672301, "learning_rate": 9.313582801900802e-06, "loss": 1.7934, "step": 1558 }, { "epoch": 0.19, "grad_norm": 7.095387831411181, "learning_rate": 9.31256381565989e-06, "loss": 1.6048, "step": 1559 }, { "epoch": 0.19, "grad_norm": 7.476488265908153, "learning_rate": 9.311544129466085e-06, "loss": 1.7015, "step": 1560 }, { "epoch": 0.19, "grad_norm": 7.427224842872005, "learning_rate": 9.31052374348489e-06, "loss": 1.7394, "step": 1561 }, { "epoch": 0.19, "grad_norm": 7.513670947707097, "learning_rate": 9.309502657881916e-06, "loss": 1.6122, "step": 1562 }, { "epoch": 0.19, "grad_norm": 7.028306394075823, "learning_rate": 9.308480872822894e-06, "loss": 1.8069, "step": 1563 }, { "epoch": 0.19, "grad_norm": 7.668923937951687, "learning_rate": 9.307458388473662e-06, "loss": 1.5488, "step": 1564 }, { "epoch": 0.19, "grad_norm": 7.607862647795974, "learning_rate": 9.306435205000178e-06, "loss": 1.7628, "step": 1565 }, { "epoch": 0.19, "grad_norm": 6.3245020526500975, "learning_rate": 9.305411322568507e-06, "loss": 1.5244, "step": 1566 }, { "epoch": 0.19, "grad_norm": 6.519902036405583, "learning_rate": 9.304386741344835e-06, "loss": 1.4532, "step": 1567 }, { "epoch": 0.2, "grad_norm": 7.441512286320175, "learning_rate": 9.303361461495455e-06, "loss": 1.5925, "step": 1568 }, { "epoch": 0.2, "grad_norm": 7.405887502916289, "learning_rate": 9.302335483186773e-06, "loss": 1.7611, "step": 1569 }, { "epoch": 0.2, "grad_norm": 9.404874206800583, "learning_rate": 9.301308806585315e-06, "loss": 1.6161, "step": 1570 }, { "epoch": 0.2, "grad_norm": 8.196689175818957, "learning_rate": 9.300281431857715e-06, "loss": 1.6823, "step": 1571 }, { "epoch": 0.2, "grad_norm": 7.8029225975430485, "learning_rate": 9.299253359170722e-06, "loss": 1.4486, "step": 1572 }, { "epoch": 0.2, "grad_norm": 7.670062322917595, "learning_rate": 9.298224588691196e-06, "loss": 1.5606, "step": 1573 }, { "epoch": 0.2, "grad_norm": 7.495952757890844, "learning_rate": 9.297195120586114e-06, "loss": 1.4454, "step": 1574 }, { "epoch": 0.2, "grad_norm": 7.143786959583466, "learning_rate": 9.296164955022562e-06, "loss": 1.6378, "step": 1575 }, { "epoch": 0.2, "grad_norm": 8.604243227656132, "learning_rate": 9.295134092167744e-06, "loss": 1.5484, "step": 1576 }, { "epoch": 0.2, "grad_norm": 7.8981615744630265, "learning_rate": 9.294102532188975e-06, "loss": 1.7537, "step": 1577 }, { "epoch": 0.2, "grad_norm": 6.9468436311117365, "learning_rate": 9.293070275253679e-06, "loss": 1.5541, "step": 1578 }, { "epoch": 0.2, "grad_norm": 8.264780101948276, "learning_rate": 9.292037321529404e-06, "loss": 1.8035, "step": 1579 }, { "epoch": 0.2, "grad_norm": 8.599635961063644, "learning_rate": 9.291003671183796e-06, "loss": 1.6637, "step": 1580 }, { "epoch": 0.2, "grad_norm": 7.401463302952984, "learning_rate": 9.28996932438463e-06, "loss": 1.8015, "step": 1581 }, { "epoch": 0.2, "grad_norm": 8.117976149336387, "learning_rate": 9.288934281299777e-06, "loss": 1.6737, "step": 1582 }, { "epoch": 0.2, "grad_norm": 8.498461032487253, "learning_rate": 9.28789854209724e-06, "loss": 1.7376, "step": 1583 }, { "epoch": 0.2, "grad_norm": 8.200322545109087, "learning_rate": 9.286862106945118e-06, "loss": 1.6844, "step": 1584 }, { "epoch": 0.2, "grad_norm": 7.357176245740557, "learning_rate": 9.285824976011634e-06, "loss": 2.0597, "step": 1585 }, { "epoch": 0.2, "grad_norm": 7.683674799027486, "learning_rate": 9.284787149465119e-06, "loss": 1.6256, "step": 1586 }, { "epoch": 0.2, "grad_norm": 6.371878461268342, "learning_rate": 9.283748627474017e-06, "loss": 1.1811, "step": 1587 }, { "epoch": 0.2, "grad_norm": 8.858728291212497, "learning_rate": 9.282709410206887e-06, "loss": 1.263, "step": 1588 }, { "epoch": 0.2, "grad_norm": 7.19273217403482, "learning_rate": 9.2816694978324e-06, "loss": 1.4849, "step": 1589 }, { "epoch": 0.2, "grad_norm": 8.29788998094503, "learning_rate": 9.280628890519336e-06, "loss": 1.6602, "step": 1590 }, { "epoch": 0.2, "grad_norm": 7.881535249392813, "learning_rate": 9.279587588436597e-06, "loss": 1.4874, "step": 1591 }, { "epoch": 0.2, "grad_norm": 6.879581546302711, "learning_rate": 9.278545591753188e-06, "loss": 1.651, "step": 1592 }, { "epoch": 0.2, "grad_norm": 6.89129497551695, "learning_rate": 9.277502900638233e-06, "loss": 1.5055, "step": 1593 }, { "epoch": 0.2, "grad_norm": 7.007423322851434, "learning_rate": 9.276459515260965e-06, "loss": 1.4907, "step": 1594 }, { "epoch": 0.2, "grad_norm": 7.479269783277743, "learning_rate": 9.275415435790732e-06, "loss": 1.697, "step": 1595 }, { "epoch": 0.2, "grad_norm": 7.608042796155957, "learning_rate": 9.274370662396992e-06, "loss": 1.681, "step": 1596 }, { "epoch": 0.2, "grad_norm": 6.931310511417486, "learning_rate": 9.27332519524932e-06, "loss": 1.5781, "step": 1597 }, { "epoch": 0.2, "grad_norm": 7.571700476376448, "learning_rate": 9.272279034517403e-06, "loss": 1.6616, "step": 1598 }, { "epoch": 0.2, "grad_norm": 9.091943333838877, "learning_rate": 9.27123218037103e-06, "loss": 1.3705, "step": 1599 }, { "epoch": 0.2, "grad_norm": 7.706812217599559, "learning_rate": 9.270184632980121e-06, "loss": 1.3693, "step": 1600 }, { "epoch": 0.2, "grad_norm": 6.03163746382189, "learning_rate": 9.269136392514694e-06, "loss": 1.3916, "step": 1601 }, { "epoch": 0.2, "grad_norm": 6.245062315113583, "learning_rate": 9.268087459144885e-06, "loss": 1.5407, "step": 1602 }, { "epoch": 0.2, "grad_norm": 6.505947517853684, "learning_rate": 9.267037833040943e-06, "loss": 1.3674, "step": 1603 }, { "epoch": 0.2, "grad_norm": 7.261137159332112, "learning_rate": 9.265987514373224e-06, "loss": 1.6416, "step": 1604 }, { "epoch": 0.2, "grad_norm": 6.5296859292317215, "learning_rate": 9.264936503312207e-06, "loss": 1.371, "step": 1605 }, { "epoch": 0.2, "grad_norm": 7.692873301191647, "learning_rate": 9.263884800028469e-06, "loss": 1.3412, "step": 1606 }, { "epoch": 0.2, "grad_norm": 10.687845368684906, "learning_rate": 9.262832404692714e-06, "loss": 1.7205, "step": 1607 }, { "epoch": 0.2, "grad_norm": 8.923973878759734, "learning_rate": 9.261779317475747e-06, "loss": 1.4922, "step": 1608 }, { "epoch": 0.2, "grad_norm": 7.29582603796319, "learning_rate": 9.260725538548494e-06, "loss": 1.5351, "step": 1609 }, { "epoch": 0.2, "grad_norm": 7.265112029015842, "learning_rate": 9.259671068081985e-06, "loss": 1.7992, "step": 1610 }, { "epoch": 0.2, "grad_norm": 7.033727216922483, "learning_rate": 9.25861590624737e-06, "loss": 1.5729, "step": 1611 }, { "epoch": 0.2, "grad_norm": 7.201510781598203, "learning_rate": 9.257560053215905e-06, "loss": 1.5775, "step": 1612 }, { "epoch": 0.2, "grad_norm": 6.7439480296361385, "learning_rate": 9.256503509158962e-06, "loss": 1.4543, "step": 1613 }, { "epoch": 0.2, "grad_norm": 10.490554939990862, "learning_rate": 9.255446274248023e-06, "loss": 1.5026, "step": 1614 }, { "epoch": 0.2, "grad_norm": 7.246787500143098, "learning_rate": 9.254388348654683e-06, "loss": 1.7193, "step": 1615 }, { "epoch": 0.2, "grad_norm": 6.692632176186056, "learning_rate": 9.253329732550652e-06, "loss": 1.4181, "step": 1616 }, { "epoch": 0.2, "grad_norm": 6.488565949569122, "learning_rate": 9.252270426107746e-06, "loss": 1.712, "step": 1617 }, { "epoch": 0.2, "grad_norm": 7.322135219087612, "learning_rate": 9.251210429497898e-06, "loss": 1.7931, "step": 1618 }, { "epoch": 0.2, "grad_norm": 7.8213003709360605, "learning_rate": 9.250149742893149e-06, "loss": 1.4242, "step": 1619 }, { "epoch": 0.2, "grad_norm": 7.022919210313738, "learning_rate": 9.249088366465657e-06, "loss": 1.8013, "step": 1620 }, { "epoch": 0.2, "grad_norm": 6.809808111936879, "learning_rate": 9.248026300387688e-06, "loss": 1.5044, "step": 1621 }, { "epoch": 0.2, "grad_norm": 7.34354838695329, "learning_rate": 9.246963544831622e-06, "loss": 1.698, "step": 1622 }, { "epoch": 0.2, "grad_norm": 7.599541324938056, "learning_rate": 9.245900099969948e-06, "loss": 1.8402, "step": 1623 }, { "epoch": 0.2, "grad_norm": 7.215511040026796, "learning_rate": 9.244835965975271e-06, "loss": 1.4861, "step": 1624 }, { "epoch": 0.2, "grad_norm": 6.815822256278336, "learning_rate": 9.243771143020305e-06, "loss": 1.562, "step": 1625 }, { "epoch": 0.2, "grad_norm": 7.241300072455331, "learning_rate": 9.242705631277878e-06, "loss": 1.5487, "step": 1626 }, { "epoch": 0.2, "grad_norm": 6.436923496644046, "learning_rate": 9.241639430920925e-06, "loss": 1.3748, "step": 1627 }, { "epoch": 0.2, "grad_norm": 7.247526869895328, "learning_rate": 9.240572542122502e-06, "loss": 1.6037, "step": 1628 }, { "epoch": 0.2, "grad_norm": 6.8597649442505215, "learning_rate": 9.239504965055763e-06, "loss": 1.6451, "step": 1629 }, { "epoch": 0.2, "grad_norm": 6.953881627457901, "learning_rate": 9.23843669989399e-06, "loss": 1.4748, "step": 1630 }, { "epoch": 0.2, "grad_norm": 6.4246081616663835, "learning_rate": 9.237367746810561e-06, "loss": 1.512, "step": 1631 }, { "epoch": 0.2, "grad_norm": 6.739493098392381, "learning_rate": 9.23629810597898e-06, "loss": 1.5439, "step": 1632 }, { "epoch": 0.2, "grad_norm": 11.10150080372719, "learning_rate": 9.235227777572849e-06, "loss": 1.6102, "step": 1633 }, { "epoch": 0.2, "grad_norm": 7.902945619602033, "learning_rate": 9.234156761765893e-06, "loss": 1.6802, "step": 1634 }, { "epoch": 0.2, "grad_norm": 6.9542474484013574, "learning_rate": 9.23308505873194e-06, "loss": 1.4599, "step": 1635 }, { "epoch": 0.2, "grad_norm": 7.274210579119363, "learning_rate": 9.232012668644936e-06, "loss": 1.379, "step": 1636 }, { "epoch": 0.2, "grad_norm": 7.894949642635283, "learning_rate": 9.230939591678936e-06, "loss": 1.5254, "step": 1637 }, { "epoch": 0.2, "grad_norm": 6.5567530493900295, "learning_rate": 9.229865828008105e-06, "loss": 1.5238, "step": 1638 }, { "epoch": 0.2, "grad_norm": 7.79091140132792, "learning_rate": 9.228791377806721e-06, "loss": 1.6291, "step": 1639 }, { "epoch": 0.2, "grad_norm": 6.873293874583878, "learning_rate": 9.227716241249173e-06, "loss": 1.5777, "step": 1640 }, { "epoch": 0.2, "grad_norm": 6.363013205863204, "learning_rate": 9.226640418509962e-06, "loss": 1.6394, "step": 1641 }, { "epoch": 0.2, "grad_norm": 7.6218153774411, "learning_rate": 9.225563909763701e-06, "loss": 1.8897, "step": 1642 }, { "epoch": 0.2, "grad_norm": 7.50268772677441, "learning_rate": 9.224486715185113e-06, "loss": 1.651, "step": 1643 }, { "epoch": 0.2, "grad_norm": 7.309683997808337, "learning_rate": 9.223408834949029e-06, "loss": 1.7015, "step": 1644 }, { "epoch": 0.2, "grad_norm": 7.135085352759727, "learning_rate": 9.2223302692304e-06, "loss": 1.5919, "step": 1645 }, { "epoch": 0.2, "grad_norm": 7.297394703743154, "learning_rate": 9.22125101820428e-06, "loss": 1.7406, "step": 1646 }, { "epoch": 0.2, "grad_norm": 7.204178822319608, "learning_rate": 9.220171082045837e-06, "loss": 1.69, "step": 1647 }, { "epoch": 0.2, "grad_norm": 6.785202031129156, "learning_rate": 9.219090460930353e-06, "loss": 1.3142, "step": 1648 }, { "epoch": 0.21, "grad_norm": 6.257769333120327, "learning_rate": 9.218009155033218e-06, "loss": 1.4725, "step": 1649 }, { "epoch": 0.21, "grad_norm": 9.198674210150617, "learning_rate": 9.216927164529935e-06, "loss": 1.8521, "step": 1650 }, { "epoch": 0.21, "grad_norm": 7.30147932611004, "learning_rate": 9.215844489596114e-06, "loss": 1.5206, "step": 1651 }, { "epoch": 0.21, "grad_norm": 6.909173731106585, "learning_rate": 9.21476113040748e-06, "loss": 1.3268, "step": 1652 }, { "epoch": 0.21, "grad_norm": 7.261292462986967, "learning_rate": 9.21367708713987e-06, "loss": 1.3509, "step": 1653 }, { "epoch": 0.21, "grad_norm": 8.42678846978643, "learning_rate": 9.212592359969228e-06, "loss": 1.6072, "step": 1654 }, { "epoch": 0.21, "grad_norm": 5.962471414417366, "learning_rate": 9.211506949071613e-06, "loss": 1.2079, "step": 1655 }, { "epoch": 0.21, "grad_norm": 6.909551409995693, "learning_rate": 9.210420854623191e-06, "loss": 1.8494, "step": 1656 }, { "epoch": 0.21, "grad_norm": 7.13703791233821, "learning_rate": 9.209334076800247e-06, "loss": 1.7053, "step": 1657 }, { "epoch": 0.21, "grad_norm": 12.405627218271523, "learning_rate": 9.208246615779163e-06, "loss": 1.8554, "step": 1658 }, { "epoch": 0.21, "grad_norm": 7.5999072399223815, "learning_rate": 9.207158471736444e-06, "loss": 1.7556, "step": 1659 }, { "epoch": 0.21, "grad_norm": 7.191292439040953, "learning_rate": 9.206069644848702e-06, "loss": 1.8009, "step": 1660 }, { "epoch": 0.21, "grad_norm": 8.54752907675901, "learning_rate": 9.204980135292659e-06, "loss": 1.5236, "step": 1661 }, { "epoch": 0.21, "grad_norm": 7.382799373373029, "learning_rate": 9.203889943245148e-06, "loss": 1.6245, "step": 1662 }, { "epoch": 0.21, "grad_norm": 8.093789779344037, "learning_rate": 9.202799068883113e-06, "loss": 1.6721, "step": 1663 }, { "epoch": 0.21, "grad_norm": 6.675057384543078, "learning_rate": 9.201707512383609e-06, "loss": 1.7047, "step": 1664 }, { "epoch": 0.21, "grad_norm": 7.8454375323402115, "learning_rate": 9.200615273923803e-06, "loss": 1.6483, "step": 1665 }, { "epoch": 0.21, "grad_norm": 7.171151972283894, "learning_rate": 9.199522353680971e-06, "loss": 1.5572, "step": 1666 }, { "epoch": 0.21, "grad_norm": 7.631694700328419, "learning_rate": 9.198428751832498e-06, "loss": 1.4622, "step": 1667 }, { "epoch": 0.21, "grad_norm": 7.229797908572838, "learning_rate": 9.197334468555884e-06, "loss": 1.3903, "step": 1668 }, { "epoch": 0.21, "grad_norm": 7.111409448996431, "learning_rate": 9.196239504028736e-06, "loss": 1.4439, "step": 1669 }, { "epoch": 0.21, "grad_norm": 7.591395594553069, "learning_rate": 9.195143858428773e-06, "loss": 1.8785, "step": 1670 }, { "epoch": 0.21, "grad_norm": 6.570889515613279, "learning_rate": 9.194047531933825e-06, "loss": 1.6255, "step": 1671 }, { "epoch": 0.21, "grad_norm": 8.341634247752795, "learning_rate": 9.192950524721832e-06, "loss": 1.7488, "step": 1672 }, { "epoch": 0.21, "grad_norm": 7.297813293841213, "learning_rate": 9.19185283697084e-06, "loss": 1.4213, "step": 1673 }, { "epoch": 0.21, "grad_norm": 7.0882240070295985, "learning_rate": 9.190754468859016e-06, "loss": 1.6215, "step": 1674 }, { "epoch": 0.21, "grad_norm": 8.116700131000654, "learning_rate": 9.189655420564628e-06, "loss": 1.7439, "step": 1675 }, { "epoch": 0.21, "grad_norm": 6.979276069188081, "learning_rate": 9.188555692266057e-06, "loss": 1.4666, "step": 1676 }, { "epoch": 0.21, "grad_norm": 10.023902451537271, "learning_rate": 9.187455284141798e-06, "loss": 1.5427, "step": 1677 }, { "epoch": 0.21, "grad_norm": 7.157112606216444, "learning_rate": 9.186354196370448e-06, "loss": 1.3895, "step": 1678 }, { "epoch": 0.21, "grad_norm": 6.682777200661407, "learning_rate": 9.185252429130727e-06, "loss": 1.7372, "step": 1679 }, { "epoch": 0.21, "grad_norm": 11.836904562126664, "learning_rate": 9.184149982601451e-06, "loss": 1.6566, "step": 1680 }, { "epoch": 0.21, "grad_norm": 8.056214289328766, "learning_rate": 9.183046856961557e-06, "loss": 1.7137, "step": 1681 }, { "epoch": 0.21, "grad_norm": 7.697682132584184, "learning_rate": 9.181943052390087e-06, "loss": 1.7664, "step": 1682 }, { "epoch": 0.21, "grad_norm": 7.008506477727158, "learning_rate": 9.180838569066195e-06, "loss": 1.4806, "step": 1683 }, { "epoch": 0.21, "grad_norm": 6.8370089245468355, "learning_rate": 9.179733407169146e-06, "loss": 1.3902, "step": 1684 }, { "epoch": 0.21, "grad_norm": 7.517924054525527, "learning_rate": 9.17862756687831e-06, "loss": 1.6752, "step": 1685 }, { "epoch": 0.21, "grad_norm": 7.0690248319468445, "learning_rate": 9.177521048373176e-06, "loss": 1.7594, "step": 1686 }, { "epoch": 0.21, "grad_norm": 6.673456376149631, "learning_rate": 9.176413851833335e-06, "loss": 1.4665, "step": 1687 }, { "epoch": 0.21, "grad_norm": 7.043488300757727, "learning_rate": 9.175305977438491e-06, "loss": 1.6917, "step": 1688 }, { "epoch": 0.21, "grad_norm": 7.273406687763205, "learning_rate": 9.17419742536846e-06, "loss": 1.5841, "step": 1689 }, { "epoch": 0.21, "grad_norm": 7.220413806834574, "learning_rate": 9.173088195803164e-06, "loss": 1.5801, "step": 1690 }, { "epoch": 0.21, "grad_norm": 6.957125791129682, "learning_rate": 9.171978288922638e-06, "loss": 1.4832, "step": 1691 }, { "epoch": 0.21, "grad_norm": 7.488872211337887, "learning_rate": 9.170867704907026e-06, "loss": 1.6839, "step": 1692 }, { "epoch": 0.21, "grad_norm": 6.979756729655873, "learning_rate": 9.169756443936582e-06, "loss": 1.7841, "step": 1693 }, { "epoch": 0.21, "grad_norm": 8.008610025109085, "learning_rate": 9.168644506191671e-06, "loss": 1.5518, "step": 1694 }, { "epoch": 0.21, "grad_norm": 6.469527704379677, "learning_rate": 9.167531891852764e-06, "loss": 1.5084, "step": 1695 }, { "epoch": 0.21, "grad_norm": 7.736044749832444, "learning_rate": 9.166418601100445e-06, "loss": 1.618, "step": 1696 }, { "epoch": 0.21, "grad_norm": 7.312891309164047, "learning_rate": 9.16530463411541e-06, "loss": 1.3174, "step": 1697 }, { "epoch": 0.21, "grad_norm": 7.196173472268466, "learning_rate": 9.164189991078458e-06, "loss": 1.6323, "step": 1698 }, { "epoch": 0.21, "grad_norm": 7.822222245663657, "learning_rate": 9.163074672170504e-06, "loss": 1.697, "step": 1699 }, { "epoch": 0.21, "grad_norm": 8.138538682501459, "learning_rate": 9.161958677572573e-06, "loss": 1.5129, "step": 1700 }, { "epoch": 0.21, "grad_norm": 7.138599341282164, "learning_rate": 9.160842007465791e-06, "loss": 1.9145, "step": 1701 }, { "epoch": 0.21, "grad_norm": 7.258517846526544, "learning_rate": 9.159724662031404e-06, "loss": 1.4097, "step": 1702 }, { "epoch": 0.21, "grad_norm": 7.2634182679835115, "learning_rate": 9.158606641450762e-06, "loss": 1.8267, "step": 1703 }, { "epoch": 0.21, "grad_norm": 8.053859993596134, "learning_rate": 9.157487945905327e-06, "loss": 1.3935, "step": 1704 }, { "epoch": 0.21, "grad_norm": 7.439687320076676, "learning_rate": 9.156368575576667e-06, "loss": 1.5924, "step": 1705 }, { "epoch": 0.21, "grad_norm": 7.761430676893488, "learning_rate": 9.155248530646466e-06, "loss": 1.9367, "step": 1706 }, { "epoch": 0.21, "grad_norm": 7.190116877670779, "learning_rate": 9.154127811296508e-06, "loss": 1.5996, "step": 1707 }, { "epoch": 0.21, "grad_norm": 7.4915035588767696, "learning_rate": 9.153006417708697e-06, "loss": 1.6773, "step": 1708 }, { "epoch": 0.21, "grad_norm": 7.063896035406822, "learning_rate": 9.151884350065038e-06, "loss": 1.2559, "step": 1709 }, { "epoch": 0.21, "grad_norm": 8.236876194074956, "learning_rate": 9.150761608547652e-06, "loss": 1.9037, "step": 1710 }, { "epoch": 0.21, "grad_norm": 6.722428141993552, "learning_rate": 9.149638193338764e-06, "loss": 1.5871, "step": 1711 }, { "epoch": 0.21, "grad_norm": 7.465735372710614, "learning_rate": 9.148514104620711e-06, "loss": 1.4662, "step": 1712 }, { "epoch": 0.21, "grad_norm": 6.545677821486985, "learning_rate": 9.147389342575936e-06, "loss": 1.53, "step": 1713 }, { "epoch": 0.21, "grad_norm": 6.38810306437064, "learning_rate": 9.146263907386999e-06, "loss": 1.3656, "step": 1714 }, { "epoch": 0.21, "grad_norm": 7.754411498132887, "learning_rate": 9.145137799236561e-06, "loss": 1.8169, "step": 1715 }, { "epoch": 0.21, "grad_norm": 6.9003489301988425, "learning_rate": 9.144011018307397e-06, "loss": 1.5116, "step": 1716 }, { "epoch": 0.21, "grad_norm": 6.475074383807039, "learning_rate": 9.14288356478239e-06, "loss": 1.5924, "step": 1717 }, { "epoch": 0.21, "grad_norm": 6.741688065942491, "learning_rate": 9.141755438844529e-06, "loss": 1.5637, "step": 1718 }, { "epoch": 0.21, "grad_norm": 7.5879831212252125, "learning_rate": 9.14062664067692e-06, "loss": 1.7721, "step": 1719 }, { "epoch": 0.21, "grad_norm": 7.559637606309779, "learning_rate": 9.139497170462768e-06, "loss": 1.5818, "step": 1720 }, { "epoch": 0.21, "grad_norm": 8.016754520640859, "learning_rate": 9.138367028385396e-06, "loss": 1.4451, "step": 1721 }, { "epoch": 0.21, "grad_norm": 7.339680352291182, "learning_rate": 9.137236214628231e-06, "loss": 1.5327, "step": 1722 }, { "epoch": 0.21, "grad_norm": 7.274059496274957, "learning_rate": 9.13610472937481e-06, "loss": 1.4661, "step": 1723 }, { "epoch": 0.21, "grad_norm": 6.820459268030904, "learning_rate": 9.13497257280878e-06, "loss": 1.4015, "step": 1724 }, { "epoch": 0.21, "grad_norm": 7.554416946927527, "learning_rate": 9.133839745113895e-06, "loss": 1.5378, "step": 1725 }, { "epoch": 0.21, "grad_norm": 6.983181942850508, "learning_rate": 9.13270624647402e-06, "loss": 1.309, "step": 1726 }, { "epoch": 0.21, "grad_norm": 7.769753184508097, "learning_rate": 9.13157207707313e-06, "loss": 1.8195, "step": 1727 }, { "epoch": 0.21, "grad_norm": 6.738581927737578, "learning_rate": 9.130437237095302e-06, "loss": 1.5895, "step": 1728 }, { "epoch": 0.22, "grad_norm": 7.912932927279299, "learning_rate": 9.129301726724733e-06, "loss": 1.403, "step": 1729 }, { "epoch": 0.22, "grad_norm": 6.8068204488432125, "learning_rate": 9.128165546145717e-06, "loss": 1.2522, "step": 1730 }, { "epoch": 0.22, "grad_norm": 7.848800379390725, "learning_rate": 9.127028695542666e-06, "loss": 1.4177, "step": 1731 }, { "epoch": 0.22, "grad_norm": 7.118546139732806, "learning_rate": 9.125891175100095e-06, "loss": 1.626, "step": 1732 }, { "epoch": 0.22, "grad_norm": 6.851065190256259, "learning_rate": 9.124752985002632e-06, "loss": 1.4872, "step": 1733 }, { "epoch": 0.22, "grad_norm": 7.472072839504076, "learning_rate": 9.12361412543501e-06, "loss": 1.6064, "step": 1734 }, { "epoch": 0.22, "grad_norm": 6.990659374347812, "learning_rate": 9.122474596582073e-06, "loss": 1.6668, "step": 1735 }, { "epoch": 0.22, "grad_norm": 8.052674180318608, "learning_rate": 9.121334398628772e-06, "loss": 1.6402, "step": 1736 }, { "epoch": 0.22, "grad_norm": 6.98021899183303, "learning_rate": 9.120193531760166e-06, "loss": 1.4029, "step": 1737 }, { "epoch": 0.22, "grad_norm": 7.323941773923116, "learning_rate": 9.119051996161427e-06, "loss": 1.5201, "step": 1738 }, { "epoch": 0.22, "grad_norm": 7.1138364546809765, "learning_rate": 9.117909792017831e-06, "loss": 1.5186, "step": 1739 }, { "epoch": 0.22, "grad_norm": 7.091057567792945, "learning_rate": 9.116766919514765e-06, "loss": 1.3034, "step": 1740 }, { "epoch": 0.22, "grad_norm": 13.855903886153106, "learning_rate": 9.115623378837723e-06, "loss": 1.6282, "step": 1741 }, { "epoch": 0.22, "grad_norm": 6.798120019850731, "learning_rate": 9.114479170172307e-06, "loss": 1.742, "step": 1742 }, { "epoch": 0.22, "grad_norm": 6.285026297903537, "learning_rate": 9.113334293704229e-06, "loss": 1.2331, "step": 1743 }, { "epoch": 0.22, "grad_norm": 7.190365080122966, "learning_rate": 9.112188749619308e-06, "loss": 1.5963, "step": 1744 }, { "epoch": 0.22, "grad_norm": 7.13767999530823, "learning_rate": 9.111042538103477e-06, "loss": 1.5951, "step": 1745 }, { "epoch": 0.22, "grad_norm": 8.361529089194045, "learning_rate": 9.109895659342764e-06, "loss": 1.6809, "step": 1746 }, { "epoch": 0.22, "grad_norm": 7.99906830357116, "learning_rate": 9.10874811352332e-06, "loss": 1.3363, "step": 1747 }, { "epoch": 0.22, "grad_norm": 8.27154377320664, "learning_rate": 9.107599900831396e-06, "loss": 1.6705, "step": 1748 }, { "epoch": 0.22, "grad_norm": 7.7696088199600215, "learning_rate": 9.106451021453353e-06, "loss": 1.7905, "step": 1749 }, { "epoch": 0.22, "grad_norm": 8.306128902506533, "learning_rate": 9.105301475575662e-06, "loss": 1.8894, "step": 1750 }, { "epoch": 0.22, "grad_norm": 6.922648724509508, "learning_rate": 9.1041512633849e-06, "loss": 1.3167, "step": 1751 }, { "epoch": 0.22, "grad_norm": 6.941699690261511, "learning_rate": 9.10300038506775e-06, "loss": 1.6975, "step": 1752 }, { "epoch": 0.22, "grad_norm": 6.596290132231675, "learning_rate": 9.10184884081101e-06, "loss": 1.447, "step": 1753 }, { "epoch": 0.22, "grad_norm": 7.3415700996082505, "learning_rate": 9.10069663080158e-06, "loss": 1.3378, "step": 1754 }, { "epoch": 0.22, "grad_norm": 8.346634978373272, "learning_rate": 9.09954375522647e-06, "loss": 1.8877, "step": 1755 }, { "epoch": 0.22, "grad_norm": 6.205343254093656, "learning_rate": 9.098390214272798e-06, "loss": 1.4612, "step": 1756 }, { "epoch": 0.22, "grad_norm": 7.035439550325736, "learning_rate": 9.097236008127791e-06, "loss": 1.4375, "step": 1757 }, { "epoch": 0.22, "grad_norm": 6.5148387556187295, "learning_rate": 9.096081136978782e-06, "loss": 1.5462, "step": 1758 }, { "epoch": 0.22, "grad_norm": 9.482232552228359, "learning_rate": 9.094925601013214e-06, "loss": 1.9656, "step": 1759 }, { "epoch": 0.22, "grad_norm": 8.008771188854851, "learning_rate": 9.093769400418635e-06, "loss": 1.6268, "step": 1760 }, { "epoch": 0.22, "grad_norm": 7.345633854968158, "learning_rate": 9.092612535382705e-06, "loss": 1.5916, "step": 1761 }, { "epoch": 0.22, "grad_norm": 7.686215804849847, "learning_rate": 9.091455006093189e-06, "loss": 1.7434, "step": 1762 }, { "epoch": 0.22, "grad_norm": 6.5754267210990935, "learning_rate": 9.09029681273796e-06, "loss": 1.7613, "step": 1763 }, { "epoch": 0.22, "grad_norm": 6.751076084948549, "learning_rate": 9.089137955504998e-06, "loss": 1.5981, "step": 1764 }, { "epoch": 0.22, "grad_norm": 8.09359604999869, "learning_rate": 9.087978434582395e-06, "loss": 1.5458, "step": 1765 }, { "epoch": 0.22, "grad_norm": 6.848153750458205, "learning_rate": 9.086818250158342e-06, "loss": 1.5911, "step": 1766 }, { "epoch": 0.22, "grad_norm": 6.410268096484312, "learning_rate": 9.08565740242115e-06, "loss": 1.2513, "step": 1767 }, { "epoch": 0.22, "grad_norm": 6.981641533476455, "learning_rate": 9.084495891559227e-06, "loss": 1.3843, "step": 1768 }, { "epoch": 0.22, "grad_norm": 6.988251336689089, "learning_rate": 9.083333717761093e-06, "loss": 1.429, "step": 1769 }, { "epoch": 0.22, "grad_norm": 6.196793720037166, "learning_rate": 9.082170881215377e-06, "loss": 1.5678, "step": 1770 }, { "epoch": 0.22, "grad_norm": 7.594793683668648, "learning_rate": 9.081007382110812e-06, "loss": 1.7062, "step": 1771 }, { "epoch": 0.22, "grad_norm": 7.021274963370848, "learning_rate": 9.079843220636242e-06, "loss": 1.258, "step": 1772 }, { "epoch": 0.22, "grad_norm": 6.065046830472682, "learning_rate": 9.078678396980613e-06, "loss": 1.2506, "step": 1773 }, { "epoch": 0.22, "grad_norm": 7.11009518771902, "learning_rate": 9.077512911332989e-06, "loss": 1.8754, "step": 1774 }, { "epoch": 0.22, "grad_norm": 6.333554791834509, "learning_rate": 9.076346763882529e-06, "loss": 1.522, "step": 1775 }, { "epoch": 0.22, "grad_norm": 7.920028627606489, "learning_rate": 9.075179954818506e-06, "loss": 1.4339, "step": 1776 }, { "epoch": 0.22, "grad_norm": 8.278460139170404, "learning_rate": 9.074012484330301e-06, "loss": 1.9709, "step": 1777 }, { "epoch": 0.22, "grad_norm": 7.279534868200268, "learning_rate": 9.072844352607401e-06, "loss": 1.2776, "step": 1778 }, { "epoch": 0.22, "grad_norm": 5.991965074712696, "learning_rate": 9.0716755598394e-06, "loss": 1.2568, "step": 1779 }, { "epoch": 0.22, "grad_norm": 7.772387822646055, "learning_rate": 9.070506106215997e-06, "loss": 1.7871, "step": 1780 }, { "epoch": 0.22, "grad_norm": 9.31829192559843, "learning_rate": 9.069335991927005e-06, "loss": 1.3765, "step": 1781 }, { "epoch": 0.22, "grad_norm": 6.763699960286681, "learning_rate": 9.068165217162337e-06, "loss": 1.5635, "step": 1782 }, { "epoch": 0.22, "grad_norm": 7.257640875960606, "learning_rate": 9.066993782112018e-06, "loss": 1.3905, "step": 1783 }, { "epoch": 0.22, "grad_norm": 6.865209640486995, "learning_rate": 9.065821686966176e-06, "loss": 1.6373, "step": 1784 }, { "epoch": 0.22, "grad_norm": 7.10349690878974, "learning_rate": 9.064648931915052e-06, "loss": 1.6045, "step": 1785 }, { "epoch": 0.22, "grad_norm": 6.602395072913149, "learning_rate": 9.063475517148987e-06, "loss": 1.5375, "step": 1786 }, { "epoch": 0.22, "grad_norm": 8.41528126011676, "learning_rate": 9.062301442858434e-06, "loss": 1.8046, "step": 1787 }, { "epoch": 0.22, "grad_norm": 8.410882072702691, "learning_rate": 9.061126709233954e-06, "loss": 1.8037, "step": 1788 }, { "epoch": 0.22, "grad_norm": 7.0342024620307715, "learning_rate": 9.059951316466209e-06, "loss": 1.3953, "step": 1789 }, { "epoch": 0.22, "grad_norm": 7.737894055885337, "learning_rate": 9.058775264745976e-06, "loss": 1.4333, "step": 1790 }, { "epoch": 0.22, "grad_norm": 7.709417024877374, "learning_rate": 9.05759855426413e-06, "loss": 1.7473, "step": 1791 }, { "epoch": 0.22, "grad_norm": 6.855284008192558, "learning_rate": 9.05642118521166e-06, "loss": 1.5842, "step": 1792 }, { "epoch": 0.22, "grad_norm": 7.252238486906309, "learning_rate": 9.055243157779658e-06, "loss": 1.7665, "step": 1793 }, { "epoch": 0.22, "grad_norm": 6.804694276289282, "learning_rate": 9.054064472159328e-06, "loss": 1.4122, "step": 1794 }, { "epoch": 0.22, "grad_norm": 7.21754031496092, "learning_rate": 9.052885128541974e-06, "loss": 1.4455, "step": 1795 }, { "epoch": 0.22, "grad_norm": 7.745484175635602, "learning_rate": 9.05170512711901e-06, "loss": 1.2182, "step": 1796 }, { "epoch": 0.22, "grad_norm": 8.443089398973388, "learning_rate": 9.050524468081958e-06, "loss": 1.4837, "step": 1797 }, { "epoch": 0.22, "grad_norm": 8.539758439606874, "learning_rate": 9.049343151622446e-06, "loss": 1.4658, "step": 1798 }, { "epoch": 0.22, "grad_norm": 7.292952942657775, "learning_rate": 9.048161177932207e-06, "loss": 1.9189, "step": 1799 }, { "epoch": 0.22, "grad_norm": 8.17994752949066, "learning_rate": 9.04697854720308e-06, "loss": 1.5364, "step": 1800 }, { "epoch": 0.22, "grad_norm": 7.674749453696534, "learning_rate": 9.045795259627017e-06, "loss": 1.6876, "step": 1801 }, { "epoch": 0.22, "grad_norm": 7.636198667921498, "learning_rate": 9.044611315396069e-06, "loss": 1.5767, "step": 1802 }, { "epoch": 0.22, "grad_norm": 7.095153275482512, "learning_rate": 9.043426714702398e-06, "loss": 1.8939, "step": 1803 }, { "epoch": 0.22, "grad_norm": 7.315951024666214, "learning_rate": 9.042241457738268e-06, "loss": 1.7462, "step": 1804 }, { "epoch": 0.22, "grad_norm": 6.962548448689142, "learning_rate": 9.041055544696055e-06, "loss": 1.4016, "step": 1805 }, { "epoch": 0.22, "grad_norm": 7.121659958788608, "learning_rate": 9.039868975768242e-06, "loss": 1.6226, "step": 1806 }, { "epoch": 0.22, "grad_norm": 6.089358419245876, "learning_rate": 9.038681751147413e-06, "loss": 1.4169, "step": 1807 }, { "epoch": 0.22, "grad_norm": 6.753181931889405, "learning_rate": 9.037493871026259e-06, "loss": 1.5004, "step": 1808 }, { "epoch": 0.23, "grad_norm": 6.77924589917091, "learning_rate": 9.036305335597583e-06, "loss": 1.4282, "step": 1809 }, { "epoch": 0.23, "grad_norm": 8.830094780191667, "learning_rate": 9.035116145054292e-06, "loss": 1.8428, "step": 1810 }, { "epoch": 0.23, "grad_norm": 7.364010576047003, "learning_rate": 9.033926299589393e-06, "loss": 1.5697, "step": 1811 }, { "epoch": 0.23, "grad_norm": 7.343429905584983, "learning_rate": 9.032735799396009e-06, "loss": 1.6386, "step": 1812 }, { "epoch": 0.23, "grad_norm": 7.063066829711817, "learning_rate": 9.03154464466736e-06, "loss": 1.7615, "step": 1813 }, { "epoch": 0.23, "grad_norm": 8.108177668357532, "learning_rate": 9.030352835596782e-06, "loss": 1.299, "step": 1814 }, { "epoch": 0.23, "grad_norm": 6.851611045795069, "learning_rate": 9.029160372377712e-06, "loss": 1.6032, "step": 1815 }, { "epoch": 0.23, "grad_norm": 8.062174700299495, "learning_rate": 9.02796725520369e-06, "loss": 1.6936, "step": 1816 }, { "epoch": 0.23, "grad_norm": 7.396020877535412, "learning_rate": 9.026773484268368e-06, "loss": 1.6302, "step": 1817 }, { "epoch": 0.23, "grad_norm": 7.593291765304507, "learning_rate": 9.0255790597655e-06, "loss": 1.7756, "step": 1818 }, { "epoch": 0.23, "grad_norm": 8.084871264703903, "learning_rate": 9.024383981888947e-06, "loss": 1.3424, "step": 1819 }, { "epoch": 0.23, "grad_norm": 7.633542130225265, "learning_rate": 9.023188250832679e-06, "loss": 1.5668, "step": 1820 }, { "epoch": 0.23, "grad_norm": 9.298418614398845, "learning_rate": 9.021991866790768e-06, "loss": 1.4785, "step": 1821 }, { "epoch": 0.23, "grad_norm": 6.699459579594739, "learning_rate": 9.020794829957395e-06, "loss": 1.8559, "step": 1822 }, { "epoch": 0.23, "grad_norm": 6.74615063139876, "learning_rate": 9.019597140526846e-06, "loss": 1.3876, "step": 1823 }, { "epoch": 0.23, "grad_norm": 7.69705482091076, "learning_rate": 9.018398798693512e-06, "loss": 1.4895, "step": 1824 }, { "epoch": 0.23, "grad_norm": 6.804109411135982, "learning_rate": 9.01719980465189e-06, "loss": 1.5699, "step": 1825 }, { "epoch": 0.23, "grad_norm": 9.22752622836341, "learning_rate": 9.016000158596584e-06, "loss": 1.744, "step": 1826 }, { "epoch": 0.23, "grad_norm": 6.4585832417734705, "learning_rate": 9.0147998607223e-06, "loss": 1.1518, "step": 1827 }, { "epoch": 0.23, "grad_norm": 7.618096711476916, "learning_rate": 9.01359891122386e-06, "loss": 1.9518, "step": 1828 }, { "epoch": 0.23, "grad_norm": 6.475732799106416, "learning_rate": 9.012397310296179e-06, "loss": 1.5779, "step": 1829 }, { "epoch": 0.23, "grad_norm": 7.067592145295517, "learning_rate": 9.011195058134283e-06, "loss": 1.9323, "step": 1830 }, { "epoch": 0.23, "grad_norm": 6.657058911613968, "learning_rate": 9.009992154933309e-06, "loss": 1.4469, "step": 1831 }, { "epoch": 0.23, "grad_norm": 7.12548308619064, "learning_rate": 9.008788600888491e-06, "loss": 1.8113, "step": 1832 }, { "epoch": 0.23, "grad_norm": 8.03345178307184, "learning_rate": 9.007584396195173e-06, "loss": 1.6556, "step": 1833 }, { "epoch": 0.23, "grad_norm": 7.579658032075234, "learning_rate": 9.006379541048804e-06, "loss": 1.7639, "step": 1834 }, { "epoch": 0.23, "grad_norm": 8.16485597327567, "learning_rate": 9.00517403564494e-06, "loss": 1.6704, "step": 1835 }, { "epoch": 0.23, "grad_norm": 6.893900013731249, "learning_rate": 9.00396788017924e-06, "loss": 1.3304, "step": 1836 }, { "epoch": 0.23, "grad_norm": 8.385398336945231, "learning_rate": 9.00276107484747e-06, "loss": 1.5764, "step": 1837 }, { "epoch": 0.23, "grad_norm": 7.331149742915056, "learning_rate": 9.001553619845502e-06, "loss": 1.6372, "step": 1838 }, { "epoch": 0.23, "grad_norm": 7.081450288493388, "learning_rate": 9.000345515369312e-06, "loss": 1.7308, "step": 1839 }, { "epoch": 0.23, "grad_norm": 7.154837143467221, "learning_rate": 8.999136761614979e-06, "loss": 1.4235, "step": 1840 }, { "epoch": 0.23, "grad_norm": 6.493660953288515, "learning_rate": 8.997927358778695e-06, "loss": 1.2975, "step": 1841 }, { "epoch": 0.23, "grad_norm": 7.147915172538158, "learning_rate": 8.996717307056751e-06, "loss": 1.5998, "step": 1842 }, { "epoch": 0.23, "grad_norm": 7.803789640141037, "learning_rate": 8.995506606645546e-06, "loss": 1.7107, "step": 1843 }, { "epoch": 0.23, "grad_norm": 8.812790361124538, "learning_rate": 8.994295257741581e-06, "loss": 1.8968, "step": 1844 }, { "epoch": 0.23, "grad_norm": 7.98363025265323, "learning_rate": 8.993083260541467e-06, "loss": 1.5039, "step": 1845 }, { "epoch": 0.23, "grad_norm": 7.888583254613281, "learning_rate": 8.991870615241916e-06, "loss": 1.437, "step": 1846 }, { "epoch": 0.23, "grad_norm": 6.453334837932656, "learning_rate": 8.990657322039747e-06, "loss": 1.6343, "step": 1847 }, { "epoch": 0.23, "grad_norm": 7.254458901820773, "learning_rate": 8.989443381131885e-06, "loss": 1.4467, "step": 1848 }, { "epoch": 0.23, "grad_norm": 7.005232102987928, "learning_rate": 8.988228792715362e-06, "loss": 1.8225, "step": 1849 }, { "epoch": 0.23, "grad_norm": 6.355708424310986, "learning_rate": 8.987013556987307e-06, "loss": 1.5329, "step": 1850 }, { "epoch": 0.23, "grad_norm": 7.019526165791947, "learning_rate": 8.985797674144962e-06, "loss": 1.7365, "step": 1851 }, { "epoch": 0.23, "grad_norm": 7.212131144101945, "learning_rate": 8.984581144385673e-06, "loss": 1.5072, "step": 1852 }, { "epoch": 0.23, "grad_norm": 7.161249589437315, "learning_rate": 8.983363967906888e-06, "loss": 1.4634, "step": 1853 }, { "epoch": 0.23, "grad_norm": 6.815009971740238, "learning_rate": 8.98214614490616e-06, "loss": 1.4924, "step": 1854 }, { "epoch": 0.23, "grad_norm": 7.749853413263032, "learning_rate": 8.980927675581152e-06, "loss": 1.6205, "step": 1855 }, { "epoch": 0.23, "grad_norm": 6.0998301653465, "learning_rate": 8.979708560129625e-06, "loss": 1.6056, "step": 1856 }, { "epoch": 0.23, "grad_norm": 6.64667859929878, "learning_rate": 8.978488798749451e-06, "loss": 1.4366, "step": 1857 }, { "epoch": 0.23, "grad_norm": 5.766400457931669, "learning_rate": 8.9772683916386e-06, "loss": 1.4218, "step": 1858 }, { "epoch": 0.23, "grad_norm": 6.582308763198399, "learning_rate": 8.976047338995156e-06, "loss": 1.3069, "step": 1859 }, { "epoch": 0.23, "grad_norm": 7.052998919156637, "learning_rate": 8.974825641017297e-06, "loss": 1.5645, "step": 1860 }, { "epoch": 0.23, "grad_norm": 6.655868199829939, "learning_rate": 8.973603297903317e-06, "loss": 1.3444, "step": 1861 }, { "epoch": 0.23, "grad_norm": 7.293393490843982, "learning_rate": 8.972380309851606e-06, "loss": 1.5125, "step": 1862 }, { "epoch": 0.23, "grad_norm": 7.592727111382942, "learning_rate": 8.97115667706066e-06, "loss": 1.7692, "step": 1863 }, { "epoch": 0.23, "grad_norm": 7.468139877996896, "learning_rate": 8.969932399729083e-06, "loss": 1.7068, "step": 1864 }, { "epoch": 0.23, "grad_norm": 7.197158438577716, "learning_rate": 8.968707478055582e-06, "loss": 1.3723, "step": 1865 }, { "epoch": 0.23, "grad_norm": 7.639150794470938, "learning_rate": 8.967481912238971e-06, "loss": 1.6081, "step": 1866 }, { "epoch": 0.23, "grad_norm": 7.180954548820723, "learning_rate": 8.966255702478162e-06, "loss": 1.3888, "step": 1867 }, { "epoch": 0.23, "grad_norm": 7.039480460808609, "learning_rate": 8.965028848972178e-06, "loss": 1.6156, "step": 1868 }, { "epoch": 0.23, "grad_norm": 6.665214287025252, "learning_rate": 8.963801351920143e-06, "loss": 1.3432, "step": 1869 }, { "epoch": 0.23, "grad_norm": 6.557776821967282, "learning_rate": 8.962573211521289e-06, "loss": 1.3276, "step": 1870 }, { "epoch": 0.23, "grad_norm": 7.57734590032302, "learning_rate": 8.961344427974945e-06, "loss": 1.6956, "step": 1871 }, { "epoch": 0.23, "grad_norm": 6.914112909831765, "learning_rate": 8.960115001480554e-06, "loss": 1.4898, "step": 1872 }, { "epoch": 0.23, "grad_norm": 6.810288567169355, "learning_rate": 8.958884932237658e-06, "loss": 1.5859, "step": 1873 }, { "epoch": 0.23, "grad_norm": 7.684644094876788, "learning_rate": 8.957654220445901e-06, "loss": 1.5491, "step": 1874 }, { "epoch": 0.23, "grad_norm": 7.194162864748354, "learning_rate": 8.956422866305039e-06, "loss": 1.4452, "step": 1875 }, { "epoch": 0.23, "grad_norm": 8.711215581234889, "learning_rate": 8.955190870014923e-06, "loss": 1.5997, "step": 1876 }, { "epoch": 0.23, "grad_norm": 7.491376774054437, "learning_rate": 8.953958231775515e-06, "loss": 1.5235, "step": 1877 }, { "epoch": 0.23, "grad_norm": 6.740479945637894, "learning_rate": 8.952724951786878e-06, "loss": 1.7769, "step": 1878 }, { "epoch": 0.23, "grad_norm": 7.012676340131778, "learning_rate": 8.951491030249182e-06, "loss": 1.7731, "step": 1879 }, { "epoch": 0.23, "grad_norm": 7.236730896230488, "learning_rate": 8.9502564673627e-06, "loss": 1.9831, "step": 1880 }, { "epoch": 0.23, "grad_norm": 6.222475946371114, "learning_rate": 8.949021263327803e-06, "loss": 1.3827, "step": 1881 }, { "epoch": 0.23, "grad_norm": 6.132990629429032, "learning_rate": 8.947785418344977e-06, "loss": 1.2817, "step": 1882 }, { "epoch": 0.23, "grad_norm": 6.781029003877739, "learning_rate": 8.946548932614803e-06, "loss": 1.3069, "step": 1883 }, { "epoch": 0.23, "grad_norm": 6.80724522294729, "learning_rate": 8.945311806337972e-06, "loss": 1.1618, "step": 1884 }, { "epoch": 0.23, "grad_norm": 6.3586148355563425, "learning_rate": 8.944074039715276e-06, "loss": 1.598, "step": 1885 }, { "epoch": 0.23, "grad_norm": 6.693850333386372, "learning_rate": 8.942835632947609e-06, "loss": 1.5714, "step": 1886 }, { "epoch": 0.23, "grad_norm": 7.308669900040884, "learning_rate": 8.941596586235972e-06, "loss": 1.2761, "step": 1887 }, { "epoch": 0.23, "grad_norm": 6.497865702833452, "learning_rate": 8.940356899781473e-06, "loss": 1.3329, "step": 1888 }, { "epoch": 0.23, "grad_norm": 5.8995700698096485, "learning_rate": 8.939116573785315e-06, "loss": 1.4092, "step": 1889 }, { "epoch": 0.24, "grad_norm": 7.300831254745804, "learning_rate": 8.937875608448812e-06, "loss": 1.4468, "step": 1890 }, { "epoch": 0.24, "grad_norm": 7.337764173813319, "learning_rate": 8.936634003973379e-06, "loss": 1.682, "step": 1891 }, { "epoch": 0.24, "grad_norm": 7.335933959351164, "learning_rate": 8.935391760560537e-06, "loss": 1.354, "step": 1892 }, { "epoch": 0.24, "grad_norm": 7.842685417882735, "learning_rate": 8.934148878411906e-06, "loss": 1.6803, "step": 1893 }, { "epoch": 0.24, "grad_norm": 6.2174896696488755, "learning_rate": 8.932905357729213e-06, "loss": 1.2034, "step": 1894 }, { "epoch": 0.24, "grad_norm": 8.786371149031847, "learning_rate": 8.931661198714291e-06, "loss": 1.612, "step": 1895 }, { "epoch": 0.24, "grad_norm": 8.68821780485353, "learning_rate": 8.930416401569071e-06, "loss": 1.3569, "step": 1896 }, { "epoch": 0.24, "grad_norm": 7.38541325305852, "learning_rate": 8.929170966495595e-06, "loss": 1.5964, "step": 1897 }, { "epoch": 0.24, "grad_norm": 6.725405998859199, "learning_rate": 8.927924893695996e-06, "loss": 1.477, "step": 1898 }, { "epoch": 0.24, "grad_norm": 7.497097751854439, "learning_rate": 8.926678183372527e-06, "loss": 1.4787, "step": 1899 }, { "epoch": 0.24, "grad_norm": 9.280613223605306, "learning_rate": 8.925430835727529e-06, "loss": 1.6634, "step": 1900 }, { "epoch": 0.24, "grad_norm": 6.854408754075457, "learning_rate": 8.924182850963457e-06, "loss": 1.4279, "step": 1901 }, { "epoch": 0.24, "grad_norm": 8.037112028678738, "learning_rate": 8.922934229282868e-06, "loss": 1.7853, "step": 1902 }, { "epoch": 0.24, "grad_norm": 8.265408382761759, "learning_rate": 8.921684970888413e-06, "loss": 1.6764, "step": 1903 }, { "epoch": 0.24, "grad_norm": 8.06715133010411, "learning_rate": 8.920435075982861e-06, "loss": 1.5998, "step": 1904 }, { "epoch": 0.24, "grad_norm": 6.798324662078614, "learning_rate": 8.919184544769073e-06, "loss": 1.3718, "step": 1905 }, { "epoch": 0.24, "grad_norm": 7.420777761928666, "learning_rate": 8.91793337745002e-06, "loss": 1.6423, "step": 1906 }, { "epoch": 0.24, "grad_norm": 8.193154065899483, "learning_rate": 8.916681574228767e-06, "loss": 1.4749, "step": 1907 }, { "epoch": 0.24, "grad_norm": 6.759947278504975, "learning_rate": 8.915429135308496e-06, "loss": 1.4803, "step": 1908 }, { "epoch": 0.24, "grad_norm": 6.940107886570812, "learning_rate": 8.914176060892481e-06, "loss": 1.6353, "step": 1909 }, { "epoch": 0.24, "grad_norm": 5.818958744904566, "learning_rate": 8.912922351184103e-06, "loss": 1.6389, "step": 1910 }, { "epoch": 0.24, "grad_norm": 7.493727468113883, "learning_rate": 8.911668006386846e-06, "loss": 1.3377, "step": 1911 }, { "epoch": 0.24, "grad_norm": 7.691705599567122, "learning_rate": 8.910413026704298e-06, "loss": 1.4394, "step": 1912 }, { "epoch": 0.24, "grad_norm": 7.144533884956966, "learning_rate": 8.90915741234015e-06, "loss": 1.5015, "step": 1913 }, { "epoch": 0.24, "grad_norm": 6.4961717675606625, "learning_rate": 8.907901163498193e-06, "loss": 1.4873, "step": 1914 }, { "epoch": 0.24, "grad_norm": 8.385994842862482, "learning_rate": 8.906644280382325e-06, "loss": 1.6762, "step": 1915 }, { "epoch": 0.24, "grad_norm": 6.994264408245803, "learning_rate": 8.905386763196541e-06, "loss": 1.6965, "step": 1916 }, { "epoch": 0.24, "grad_norm": 8.0347719309413, "learning_rate": 8.90412861214495e-06, "loss": 1.646, "step": 1917 }, { "epoch": 0.24, "grad_norm": 8.066436403429622, "learning_rate": 8.902869827431749e-06, "loss": 1.8872, "step": 1918 }, { "epoch": 0.24, "grad_norm": 6.854983213155375, "learning_rate": 8.901610409261252e-06, "loss": 1.4535, "step": 1919 }, { "epoch": 0.24, "grad_norm": 6.6579039220615535, "learning_rate": 8.900350357837864e-06, "loss": 1.351, "step": 1920 }, { "epoch": 0.24, "grad_norm": 6.512668328461758, "learning_rate": 8.899089673366105e-06, "loss": 1.4713, "step": 1921 }, { "epoch": 0.24, "grad_norm": 7.275438283800283, "learning_rate": 8.897828356050587e-06, "loss": 1.8305, "step": 1922 }, { "epoch": 0.24, "grad_norm": 6.416093636624433, "learning_rate": 8.896566406096027e-06, "loss": 1.4277, "step": 1923 }, { "epoch": 0.24, "grad_norm": 7.121393430275167, "learning_rate": 8.895303823707248e-06, "loss": 1.7365, "step": 1924 }, { "epoch": 0.24, "grad_norm": 6.907168832308513, "learning_rate": 8.894040609089179e-06, "loss": 1.3515, "step": 1925 }, { "epoch": 0.24, "grad_norm": 5.952299824851991, "learning_rate": 8.892776762446838e-06, "loss": 1.2634, "step": 1926 }, { "epoch": 0.24, "grad_norm": 7.629802134767989, "learning_rate": 8.89151228398536e-06, "loss": 1.5113, "step": 1927 }, { "epoch": 0.24, "grad_norm": 7.092795914536396, "learning_rate": 8.890247173909976e-06, "loss": 1.6001, "step": 1928 }, { "epoch": 0.24, "grad_norm": 7.640550006900662, "learning_rate": 8.88898143242602e-06, "loss": 1.4685, "step": 1929 }, { "epoch": 0.24, "grad_norm": 6.574475725670384, "learning_rate": 8.887715059738929e-06, "loss": 1.137, "step": 1930 }, { "epoch": 0.24, "grad_norm": 7.428135043167321, "learning_rate": 8.88644805605424e-06, "loss": 1.4111, "step": 1931 }, { "epoch": 0.24, "grad_norm": 6.509579628045263, "learning_rate": 8.885180421577598e-06, "loss": 1.6075, "step": 1932 }, { "epoch": 0.24, "grad_norm": 7.243161625212483, "learning_rate": 8.883912156514747e-06, "loss": 1.6493, "step": 1933 }, { "epoch": 0.24, "grad_norm": 7.704238314357563, "learning_rate": 8.882643261071529e-06, "loss": 1.4541, "step": 1934 }, { "epoch": 0.24, "grad_norm": 7.042372302710571, "learning_rate": 8.881373735453897e-06, "loss": 1.7401, "step": 1935 }, { "epoch": 0.24, "grad_norm": 6.640570354837364, "learning_rate": 8.880103579867903e-06, "loss": 1.627, "step": 1936 }, { "epoch": 0.24, "grad_norm": 7.111012701881216, "learning_rate": 8.878832794519696e-06, "loss": 1.4032, "step": 1937 }, { "epoch": 0.24, "grad_norm": 10.184197889688622, "learning_rate": 8.877561379615533e-06, "loss": 1.4767, "step": 1938 }, { "epoch": 0.24, "grad_norm": 7.24002564177959, "learning_rate": 8.876289335361774e-06, "loss": 1.5265, "step": 1939 }, { "epoch": 0.24, "grad_norm": 7.226530485239629, "learning_rate": 8.875016661964875e-06, "loss": 1.4816, "step": 1940 }, { "epoch": 0.24, "grad_norm": 8.352482787536701, "learning_rate": 8.873743359631401e-06, "loss": 1.5402, "step": 1941 }, { "epoch": 0.24, "grad_norm": 7.09133558228607, "learning_rate": 8.872469428568016e-06, "loss": 1.4828, "step": 1942 }, { "epoch": 0.24, "grad_norm": 6.544685819923842, "learning_rate": 8.871194868981483e-06, "loss": 1.3435, "step": 1943 }, { "epoch": 0.24, "grad_norm": 7.966120975384796, "learning_rate": 8.869919681078673e-06, "loss": 1.4046, "step": 1944 }, { "epoch": 0.24, "grad_norm": 7.1127745484304326, "learning_rate": 8.868643865066556e-06, "loss": 1.4203, "step": 1945 }, { "epoch": 0.24, "grad_norm": 7.214401829293501, "learning_rate": 8.8673674211522e-06, "loss": 1.672, "step": 1946 }, { "epoch": 0.24, "grad_norm": 7.407721576404229, "learning_rate": 8.866090349542785e-06, "loss": 1.354, "step": 1947 }, { "epoch": 0.24, "grad_norm": 7.234144156992056, "learning_rate": 8.864812650445582e-06, "loss": 1.4848, "step": 1948 }, { "epoch": 0.24, "grad_norm": 6.8876945869161155, "learning_rate": 8.86353432406797e-06, "loss": 1.4544, "step": 1949 }, { "epoch": 0.24, "grad_norm": 7.038114380569798, "learning_rate": 8.86225537061743e-06, "loss": 1.3057, "step": 1950 }, { "epoch": 0.24, "grad_norm": 7.509971515630899, "learning_rate": 8.86097579030154e-06, "loss": 1.573, "step": 1951 }, { "epoch": 0.24, "grad_norm": 7.167294353005758, "learning_rate": 8.859695583327985e-06, "loss": 1.4231, "step": 1952 }, { "epoch": 0.24, "grad_norm": 7.2849290794684185, "learning_rate": 8.858414749904551e-06, "loss": 1.6105, "step": 1953 }, { "epoch": 0.24, "grad_norm": 7.554386521976901, "learning_rate": 8.857133290239123e-06, "loss": 1.7287, "step": 1954 }, { "epoch": 0.24, "grad_norm": 7.454927862484182, "learning_rate": 8.855851204539687e-06, "loss": 1.5683, "step": 1955 }, { "epoch": 0.24, "grad_norm": 6.947450341595674, "learning_rate": 8.854568493014335e-06, "loss": 1.3977, "step": 1956 }, { "epoch": 0.24, "grad_norm": 7.318728003963947, "learning_rate": 8.853285155871258e-06, "loss": 1.7278, "step": 1957 }, { "epoch": 0.24, "grad_norm": 7.456012429839485, "learning_rate": 8.852001193318748e-06, "loss": 1.8044, "step": 1958 }, { "epoch": 0.24, "grad_norm": 7.877201181539815, "learning_rate": 8.8507166055652e-06, "loss": 1.3611, "step": 1959 }, { "epoch": 0.24, "grad_norm": 6.9268856057190735, "learning_rate": 8.849431392819109e-06, "loss": 1.5083, "step": 1960 }, { "epoch": 0.24, "grad_norm": 7.537617687680475, "learning_rate": 8.848145555289071e-06, "loss": 1.5299, "step": 1961 }, { "epoch": 0.24, "grad_norm": 6.87956690465329, "learning_rate": 8.846859093183788e-06, "loss": 1.6837, "step": 1962 }, { "epoch": 0.24, "grad_norm": 6.5367555417939665, "learning_rate": 8.845572006712056e-06, "loss": 1.3788, "step": 1963 }, { "epoch": 0.24, "grad_norm": 7.278257899195676, "learning_rate": 8.844284296082776e-06, "loss": 1.6486, "step": 1964 }, { "epoch": 0.24, "grad_norm": 6.697242432717211, "learning_rate": 8.842995961504954e-06, "loss": 1.5158, "step": 1965 }, { "epoch": 0.24, "grad_norm": 7.257732450832689, "learning_rate": 8.841707003187695e-06, "loss": 1.4774, "step": 1966 }, { "epoch": 0.24, "grad_norm": 6.076132038837274, "learning_rate": 8.840417421340198e-06, "loss": 1.5103, "step": 1967 }, { "epoch": 0.24, "grad_norm": 7.118304451682792, "learning_rate": 8.839127216171772e-06, "loss": 1.6225, "step": 1968 }, { "epoch": 0.24, "grad_norm": 7.817905038764504, "learning_rate": 8.837836387891827e-06, "loss": 1.5583, "step": 1969 }, { "epoch": 0.25, "grad_norm": 5.996203371897669, "learning_rate": 8.836544936709868e-06, "loss": 1.3909, "step": 1970 }, { "epoch": 0.25, "grad_norm": 6.990582450832653, "learning_rate": 8.835252862835508e-06, "loss": 1.5921, "step": 1971 }, { "epoch": 0.25, "grad_norm": 7.491073852834062, "learning_rate": 8.833960166478454e-06, "loss": 1.4552, "step": 1972 }, { "epoch": 0.25, "grad_norm": 8.01567611069019, "learning_rate": 8.83266684784852e-06, "loss": 1.9895, "step": 1973 }, { "epoch": 0.25, "grad_norm": 7.101492536629152, "learning_rate": 8.831372907155617e-06, "loss": 1.5301, "step": 1974 }, { "epoch": 0.25, "grad_norm": 8.172948552043037, "learning_rate": 8.830078344609763e-06, "loss": 1.3746, "step": 1975 }, { "epoch": 0.25, "grad_norm": 6.841286797637124, "learning_rate": 8.828783160421067e-06, "loss": 1.449, "step": 1976 }, { "epoch": 0.25, "grad_norm": 6.654521401413598, "learning_rate": 8.82748735479975e-06, "loss": 1.3869, "step": 1977 }, { "epoch": 0.25, "grad_norm": 10.573123781835701, "learning_rate": 8.826190927956123e-06, "loss": 1.5504, "step": 1978 }, { "epoch": 0.25, "grad_norm": 7.2118550433354764, "learning_rate": 8.824893880100607e-06, "loss": 1.7337, "step": 1979 }, { "epoch": 0.25, "grad_norm": 9.514949101476214, "learning_rate": 8.823596211443719e-06, "loss": 1.4172, "step": 1980 }, { "epoch": 0.25, "grad_norm": 7.325043844109597, "learning_rate": 8.822297922196076e-06, "loss": 1.8533, "step": 1981 }, { "epoch": 0.25, "grad_norm": 7.787626152274091, "learning_rate": 8.820999012568402e-06, "loss": 1.6962, "step": 1982 }, { "epoch": 0.25, "grad_norm": 7.855005013678277, "learning_rate": 8.81969948277151e-06, "loss": 1.6666, "step": 1983 }, { "epoch": 0.25, "grad_norm": 6.663748650335387, "learning_rate": 8.81839933301633e-06, "loss": 1.5204, "step": 1984 }, { "epoch": 0.25, "grad_norm": 7.053706845391676, "learning_rate": 8.817098563513874e-06, "loss": 1.6936, "step": 1985 }, { "epoch": 0.25, "grad_norm": 6.850514325971996, "learning_rate": 8.815797174475273e-06, "loss": 1.4679, "step": 1986 }, { "epoch": 0.25, "grad_norm": 6.701430519568883, "learning_rate": 8.814495166111743e-06, "loss": 1.4757, "step": 1987 }, { "epoch": 0.25, "grad_norm": 7.391959844743167, "learning_rate": 8.81319253863461e-06, "loss": 1.8623, "step": 1988 }, { "epoch": 0.25, "grad_norm": 6.70667875636883, "learning_rate": 8.811889292255297e-06, "loss": 1.4851, "step": 1989 }, { "epoch": 0.25, "grad_norm": 7.50171217188488, "learning_rate": 8.810585427185328e-06, "loss": 1.8438, "step": 1990 }, { "epoch": 0.25, "grad_norm": 6.900886244646147, "learning_rate": 8.809280943636329e-06, "loss": 1.3879, "step": 1991 }, { "epoch": 0.25, "grad_norm": 7.195247106937324, "learning_rate": 8.807975841820023e-06, "loss": 1.64, "step": 1992 }, { "epoch": 0.25, "grad_norm": 6.61053001170947, "learning_rate": 8.806670121948234e-06, "loss": 1.3562, "step": 1993 }, { "epoch": 0.25, "grad_norm": 6.644287528696013, "learning_rate": 8.80536378423289e-06, "loss": 1.484, "step": 1994 }, { "epoch": 0.25, "grad_norm": 6.568641313651743, "learning_rate": 8.804056828886019e-06, "loss": 1.5931, "step": 1995 }, { "epoch": 0.25, "grad_norm": 7.106797324547359, "learning_rate": 8.80274925611974e-06, "loss": 1.4355, "step": 1996 }, { "epoch": 0.25, "grad_norm": 6.4708962502149046, "learning_rate": 8.801441066146286e-06, "loss": 1.3963, "step": 1997 }, { "epoch": 0.25, "grad_norm": 7.983870334571824, "learning_rate": 8.80013225917798e-06, "loss": 1.6796, "step": 1998 }, { "epoch": 0.25, "grad_norm": 9.950059815700783, "learning_rate": 8.79882283542725e-06, "loss": 1.4787, "step": 1999 }, { "epoch": 0.25, "grad_norm": 7.5951725883943375, "learning_rate": 8.797512795106622e-06, "loss": 1.783, "step": 2000 }, { "epoch": 0.25, "grad_norm": 7.401107997610314, "learning_rate": 8.796202138428722e-06, "loss": 1.5864, "step": 2001 }, { "epoch": 0.25, "grad_norm": 7.454672862290784, "learning_rate": 8.79489086560628e-06, "loss": 1.8653, "step": 2002 }, { "epoch": 0.25, "grad_norm": 7.433018957042579, "learning_rate": 8.793578976852118e-06, "loss": 1.7246, "step": 2003 }, { "epoch": 0.25, "grad_norm": 7.502543019971579, "learning_rate": 8.792266472379168e-06, "loss": 1.449, "step": 2004 }, { "epoch": 0.25, "grad_norm": 6.752850666919463, "learning_rate": 8.790953352400452e-06, "loss": 1.5448, "step": 2005 }, { "epoch": 0.25, "grad_norm": 5.990915725350933, "learning_rate": 8.7896396171291e-06, "loss": 1.43, "step": 2006 }, { "epoch": 0.25, "grad_norm": 6.6000869425064055, "learning_rate": 8.788325266778336e-06, "loss": 1.7596, "step": 2007 }, { "epoch": 0.25, "grad_norm": 7.225527793690743, "learning_rate": 8.787010301561488e-06, "loss": 1.5094, "step": 2008 }, { "epoch": 0.25, "grad_norm": 8.260132919520014, "learning_rate": 8.785694721691981e-06, "loss": 1.5835, "step": 2009 }, { "epoch": 0.25, "grad_norm": 7.3300792780271475, "learning_rate": 8.784378527383342e-06, "loss": 1.4601, "step": 2010 }, { "epoch": 0.25, "grad_norm": 8.171244561481, "learning_rate": 8.783061718849197e-06, "loss": 1.6201, "step": 2011 }, { "epoch": 0.25, "grad_norm": 7.102653105128977, "learning_rate": 8.78174429630327e-06, "loss": 1.5889, "step": 2012 }, { "epoch": 0.25, "grad_norm": 7.568736367177257, "learning_rate": 8.780426259959385e-06, "loss": 1.5471, "step": 2013 }, { "epoch": 0.25, "grad_norm": 6.864798724792219, "learning_rate": 8.779107610031468e-06, "loss": 1.5446, "step": 2014 }, { "epoch": 0.25, "grad_norm": 7.762392451343402, "learning_rate": 8.777788346733543e-06, "loss": 1.4449, "step": 2015 }, { "epoch": 0.25, "grad_norm": 7.154415344596559, "learning_rate": 8.776468470279735e-06, "loss": 1.6436, "step": 2016 }, { "epoch": 0.25, "grad_norm": 6.88287661862337, "learning_rate": 8.775147980884265e-06, "loss": 1.5059, "step": 2017 }, { "epoch": 0.25, "grad_norm": 6.683255602401333, "learning_rate": 8.773826878761456e-06, "loss": 1.4724, "step": 2018 }, { "epoch": 0.25, "grad_norm": 8.537793812187479, "learning_rate": 8.772505164125734e-06, "loss": 1.6037, "step": 2019 }, { "epoch": 0.25, "grad_norm": 6.881954708224423, "learning_rate": 8.771182837191614e-06, "loss": 1.5577, "step": 2020 }, { "epoch": 0.25, "grad_norm": 7.4678144611963555, "learning_rate": 8.76985989817372e-06, "loss": 1.386, "step": 2021 }, { "epoch": 0.25, "grad_norm": 7.303678855058957, "learning_rate": 8.768536347286774e-06, "loss": 1.4451, "step": 2022 }, { "epoch": 0.25, "grad_norm": 6.895541013846909, "learning_rate": 8.767212184745595e-06, "loss": 1.3847, "step": 2023 }, { "epoch": 0.25, "grad_norm": 7.0955938265274865, "learning_rate": 8.7658874107651e-06, "loss": 1.6178, "step": 2024 }, { "epoch": 0.25, "grad_norm": 7.6128979727391135, "learning_rate": 8.764562025560309e-06, "loss": 1.5883, "step": 2025 }, { "epoch": 0.25, "grad_norm": 6.8538481955461705, "learning_rate": 8.763236029346339e-06, "loss": 1.561, "step": 2026 }, { "epoch": 0.25, "grad_norm": 7.576204983128869, "learning_rate": 8.761909422338404e-06, "loss": 1.6871, "step": 2027 }, { "epoch": 0.25, "grad_norm": 8.419626100725539, "learning_rate": 8.760582204751824e-06, "loss": 1.4112, "step": 2028 }, { "epoch": 0.25, "grad_norm": 7.2152369865555555, "learning_rate": 8.75925437680201e-06, "loss": 1.5758, "step": 2029 }, { "epoch": 0.25, "grad_norm": 6.977629211713386, "learning_rate": 8.757925938704478e-06, "loss": 1.7533, "step": 2030 }, { "epoch": 0.25, "grad_norm": 6.872638173714682, "learning_rate": 8.75659689067484e-06, "loss": 1.3245, "step": 2031 }, { "epoch": 0.25, "grad_norm": 8.46909890980775, "learning_rate": 8.755267232928808e-06, "loss": 1.8728, "step": 2032 }, { "epoch": 0.25, "grad_norm": 6.407180634628666, "learning_rate": 8.753936965682193e-06, "loss": 1.2675, "step": 2033 }, { "epoch": 0.25, "grad_norm": 6.207540037968187, "learning_rate": 8.752606089150903e-06, "loss": 1.4688, "step": 2034 }, { "epoch": 0.25, "grad_norm": 6.642447306659013, "learning_rate": 8.75127460355095e-06, "loss": 1.2299, "step": 2035 }, { "epoch": 0.25, "grad_norm": 7.187379099631403, "learning_rate": 8.749942509098437e-06, "loss": 1.7758, "step": 2036 }, { "epoch": 0.25, "grad_norm": 6.714799042121469, "learning_rate": 8.748609806009575e-06, "loss": 1.354, "step": 2037 }, { "epoch": 0.25, "grad_norm": 6.834720617567051, "learning_rate": 8.747276494500665e-06, "loss": 1.3629, "step": 2038 }, { "epoch": 0.25, "grad_norm": 7.512902838107472, "learning_rate": 8.745942574788115e-06, "loss": 1.2949, "step": 2039 }, { "epoch": 0.25, "grad_norm": 7.644319660454887, "learning_rate": 8.744608047088423e-06, "loss": 1.6874, "step": 2040 }, { "epoch": 0.25, "grad_norm": 7.53120222414053, "learning_rate": 8.743272911618193e-06, "loss": 1.3839, "step": 2041 }, { "epoch": 0.25, "grad_norm": 7.826449052349631, "learning_rate": 8.741937168594124e-06, "loss": 1.5981, "step": 2042 }, { "epoch": 0.25, "grad_norm": 8.20189723708259, "learning_rate": 8.740600818233015e-06, "loss": 1.6661, "step": 2043 }, { "epoch": 0.25, "grad_norm": 8.104593987812608, "learning_rate": 8.739263860751764e-06, "loss": 1.6991, "step": 2044 }, { "epoch": 0.25, "grad_norm": 8.610305312123538, "learning_rate": 8.737926296367363e-06, "loss": 1.3237, "step": 2045 }, { "epoch": 0.25, "grad_norm": 6.665173642756507, "learning_rate": 8.736588125296907e-06, "loss": 1.5929, "step": 2046 }, { "epoch": 0.25, "grad_norm": 6.684629321323698, "learning_rate": 8.735249347757592e-06, "loss": 1.3833, "step": 2047 }, { "epoch": 0.25, "grad_norm": 7.521161825171303, "learning_rate": 8.733909963966709e-06, "loss": 1.1279, "step": 2048 }, { "epoch": 0.25, "grad_norm": 6.738595342249495, "learning_rate": 8.732569974141642e-06, "loss": 1.2394, "step": 2049 }, { "epoch": 0.25, "grad_norm": 7.74115255639403, "learning_rate": 8.731229378499884e-06, "loss": 1.7317, "step": 2050 }, { "epoch": 0.26, "grad_norm": 7.4300741696918085, "learning_rate": 8.729888177259017e-06, "loss": 1.4303, "step": 2051 }, { "epoch": 0.26, "grad_norm": 7.4466092149890555, "learning_rate": 8.728546370636729e-06, "loss": 1.6869, "step": 2052 }, { "epoch": 0.26, "grad_norm": 7.997703030245158, "learning_rate": 8.7272039588508e-06, "loss": 1.5732, "step": 2053 }, { "epoch": 0.26, "grad_norm": 6.735464511391652, "learning_rate": 8.725860942119114e-06, "loss": 1.2817, "step": 2054 }, { "epoch": 0.26, "grad_norm": 7.262166956222659, "learning_rate": 8.724517320659644e-06, "loss": 1.4334, "step": 2055 }, { "epoch": 0.26, "grad_norm": 9.867953309164644, "learning_rate": 8.723173094690475e-06, "loss": 1.4005, "step": 2056 }, { "epoch": 0.26, "grad_norm": 6.7373170585841065, "learning_rate": 8.721828264429776e-06, "loss": 1.5353, "step": 2057 }, { "epoch": 0.26, "grad_norm": 7.251936996207739, "learning_rate": 8.720482830095825e-06, "loss": 1.3791, "step": 2058 }, { "epoch": 0.26, "grad_norm": 7.470427807109488, "learning_rate": 8.719136791906989e-06, "loss": 1.5803, "step": 2059 }, { "epoch": 0.26, "grad_norm": 7.941321481873459, "learning_rate": 8.71779015008174e-06, "loss": 1.4259, "step": 2060 }, { "epoch": 0.26, "grad_norm": 7.474768770865681, "learning_rate": 8.716442904838645e-06, "loss": 1.484, "step": 2061 }, { "epoch": 0.26, "grad_norm": 6.19762616108581, "learning_rate": 8.715095056396369e-06, "loss": 1.2208, "step": 2062 }, { "epoch": 0.26, "grad_norm": 6.350921090737167, "learning_rate": 8.713746604973676e-06, "loss": 1.4135, "step": 2063 }, { "epoch": 0.26, "grad_norm": 6.886638852694391, "learning_rate": 8.712397550789427e-06, "loss": 1.4058, "step": 2064 }, { "epoch": 0.26, "grad_norm": 6.213418808428878, "learning_rate": 8.711047894062578e-06, "loss": 1.3599, "step": 2065 }, { "epoch": 0.26, "grad_norm": 7.479114452479256, "learning_rate": 8.70969763501219e-06, "loss": 1.6024, "step": 2066 }, { "epoch": 0.26, "grad_norm": 6.437724999966527, "learning_rate": 8.708346773857417e-06, "loss": 1.4228, "step": 2067 }, { "epoch": 0.26, "grad_norm": 6.9683033263961995, "learning_rate": 8.706995310817508e-06, "loss": 1.4799, "step": 2068 }, { "epoch": 0.26, "grad_norm": 7.672580003713315, "learning_rate": 8.705643246111817e-06, "loss": 1.6459, "step": 2069 }, { "epoch": 0.26, "grad_norm": 7.420346078995278, "learning_rate": 8.704290579959788e-06, "loss": 1.9511, "step": 2070 }, { "epoch": 0.26, "grad_norm": 7.041729207307923, "learning_rate": 8.702937312580967e-06, "loss": 1.3658, "step": 2071 }, { "epoch": 0.26, "grad_norm": 7.451469620344782, "learning_rate": 8.701583444195e-06, "loss": 1.3839, "step": 2072 }, { "epoch": 0.26, "grad_norm": 6.943532168626222, "learning_rate": 8.700228975021621e-06, "loss": 1.3707, "step": 2073 }, { "epoch": 0.26, "grad_norm": 6.761895346964152, "learning_rate": 8.698873905280674e-06, "loss": 1.344, "step": 2074 }, { "epoch": 0.26, "grad_norm": 7.06771757921348, "learning_rate": 8.697518235192091e-06, "loss": 1.2191, "step": 2075 }, { "epoch": 0.26, "grad_norm": 6.7518948236473255, "learning_rate": 8.696161964975907e-06, "loss": 1.3375, "step": 2076 }, { "epoch": 0.26, "grad_norm": 7.161095224176188, "learning_rate": 8.69480509485225e-06, "loss": 1.366, "step": 2077 }, { "epoch": 0.26, "grad_norm": 6.1429475795560355, "learning_rate": 8.693447625041347e-06, "loss": 1.6724, "step": 2078 }, { "epoch": 0.26, "grad_norm": 6.844029848766538, "learning_rate": 8.692089555763524e-06, "loss": 1.4194, "step": 2079 }, { "epoch": 0.26, "grad_norm": 6.702435922461526, "learning_rate": 8.690730887239204e-06, "loss": 1.67, "step": 2080 }, { "epoch": 0.26, "grad_norm": 6.922982028118482, "learning_rate": 8.689371619688905e-06, "loss": 1.7349, "step": 2081 }, { "epoch": 0.26, "grad_norm": 7.318895899923445, "learning_rate": 8.688011753333245e-06, "loss": 1.59, "step": 2082 }, { "epoch": 0.26, "grad_norm": 6.241685602627581, "learning_rate": 8.686651288392937e-06, "loss": 1.5764, "step": 2083 }, { "epoch": 0.26, "grad_norm": 7.047610889985137, "learning_rate": 8.685290225088791e-06, "loss": 1.4968, "step": 2084 }, { "epoch": 0.26, "grad_norm": 6.299874932859793, "learning_rate": 8.683928563641717e-06, "loss": 1.3873, "step": 2085 }, { "epoch": 0.26, "grad_norm": 7.3092941383454235, "learning_rate": 8.682566304272718e-06, "loss": 1.5607, "step": 2086 }, { "epoch": 0.26, "grad_norm": 7.798187145027084, "learning_rate": 8.681203447202897e-06, "loss": 1.5582, "step": 2087 }, { "epoch": 0.26, "grad_norm": 6.9470570987170595, "learning_rate": 8.679839992653457e-06, "loss": 1.1722, "step": 2088 }, { "epoch": 0.26, "grad_norm": 8.67572072132863, "learning_rate": 8.678475940845688e-06, "loss": 1.1755, "step": 2089 }, { "epoch": 0.26, "grad_norm": 7.466090789386474, "learning_rate": 8.677111292000985e-06, "loss": 1.283, "step": 2090 }, { "epoch": 0.26, "grad_norm": 8.132815048761245, "learning_rate": 8.67574604634084e-06, "loss": 1.561, "step": 2091 }, { "epoch": 0.26, "grad_norm": 7.1587158433902385, "learning_rate": 8.67438020408684e-06, "loss": 1.2717, "step": 2092 }, { "epoch": 0.26, "grad_norm": 7.161921757726486, "learning_rate": 8.673013765460666e-06, "loss": 1.1874, "step": 2093 }, { "epoch": 0.26, "grad_norm": 6.738272764259905, "learning_rate": 8.6716467306841e-06, "loss": 1.3484, "step": 2094 }, { "epoch": 0.26, "grad_norm": 7.032085902169154, "learning_rate": 8.67027909997902e-06, "loss": 1.4951, "step": 2095 }, { "epoch": 0.26, "grad_norm": 7.635641143494652, "learning_rate": 8.6689108735674e-06, "loss": 1.7634, "step": 2096 }, { "epoch": 0.26, "grad_norm": 6.617528322292227, "learning_rate": 8.66754205167131e-06, "loss": 1.4747, "step": 2097 }, { "epoch": 0.26, "grad_norm": 7.744160512709391, "learning_rate": 8.666172634512916e-06, "loss": 1.2826, "step": 2098 }, { "epoch": 0.26, "grad_norm": 6.63534976409375, "learning_rate": 8.664802622314486e-06, "loss": 1.3592, "step": 2099 }, { "epoch": 0.26, "grad_norm": 6.918194677248264, "learning_rate": 8.663432015298377e-06, "loss": 1.4899, "step": 2100 }, { "epoch": 0.26, "grad_norm": 7.86250453952712, "learning_rate": 8.662060813687046e-06, "loss": 1.1833, "step": 2101 }, { "epoch": 0.26, "grad_norm": 7.843597748839793, "learning_rate": 8.660689017703048e-06, "loss": 1.4198, "step": 2102 }, { "epoch": 0.26, "grad_norm": 8.185467827957671, "learning_rate": 8.659316627569032e-06, "loss": 1.5526, "step": 2103 }, { "epoch": 0.26, "grad_norm": 8.112254041197895, "learning_rate": 8.657943643507747e-06, "loss": 1.5964, "step": 2104 }, { "epoch": 0.26, "grad_norm": 7.39172844040915, "learning_rate": 8.656570065742033e-06, "loss": 1.5015, "step": 2105 }, { "epoch": 0.26, "grad_norm": 6.739151852772587, "learning_rate": 8.65519589449483e-06, "loss": 1.5122, "step": 2106 }, { "epoch": 0.26, "grad_norm": 7.251492810161331, "learning_rate": 8.653821129989173e-06, "loss": 1.5563, "step": 2107 }, { "epoch": 0.26, "grad_norm": 7.094222827921707, "learning_rate": 8.652445772448196e-06, "loss": 1.5435, "step": 2108 }, { "epoch": 0.26, "grad_norm": 8.472912265630466, "learning_rate": 8.651069822095126e-06, "loss": 1.5398, "step": 2109 }, { "epoch": 0.26, "grad_norm": 8.250093053407575, "learning_rate": 8.649693279153288e-06, "loss": 1.42, "step": 2110 }, { "epoch": 0.26, "grad_norm": 7.549828460297714, "learning_rate": 8.6483161438461e-06, "loss": 1.3313, "step": 2111 }, { "epoch": 0.26, "grad_norm": 11.773655762941727, "learning_rate": 8.64693841639708e-06, "loss": 1.3854, "step": 2112 }, { "epoch": 0.26, "grad_norm": 7.426742003482325, "learning_rate": 8.645560097029843e-06, "loss": 1.5133, "step": 2113 }, { "epoch": 0.26, "grad_norm": 7.523207148999803, "learning_rate": 8.644181185968093e-06, "loss": 1.3253, "step": 2114 }, { "epoch": 0.26, "grad_norm": 7.655507544803466, "learning_rate": 8.642801683435639e-06, "loss": 1.4606, "step": 2115 }, { "epoch": 0.26, "grad_norm": 8.196271200791546, "learning_rate": 8.641421589656383e-06, "loss": 1.4755, "step": 2116 }, { "epoch": 0.26, "grad_norm": 6.923566126540296, "learning_rate": 8.640040904854316e-06, "loss": 1.1995, "step": 2117 }, { "epoch": 0.26, "grad_norm": 7.926839598770444, "learning_rate": 8.638659629253536e-06, "loss": 1.43, "step": 2118 }, { "epoch": 0.26, "grad_norm": 7.0145520583512395, "learning_rate": 8.637277763078231e-06, "loss": 1.1826, "step": 2119 }, { "epoch": 0.26, "grad_norm": 7.075695822780005, "learning_rate": 8.635895306552683e-06, "loss": 1.6789, "step": 2120 }, { "epoch": 0.26, "grad_norm": 7.353720889751511, "learning_rate": 8.634512259901275e-06, "loss": 1.6094, "step": 2121 }, { "epoch": 0.26, "grad_norm": 6.804497562985488, "learning_rate": 8.633128623348478e-06, "loss": 1.237, "step": 2122 }, { "epoch": 0.26, "grad_norm": 6.918481173738226, "learning_rate": 8.631744397118872e-06, "loss": 1.3572, "step": 2123 }, { "epoch": 0.26, "grad_norm": 7.339563099211351, "learning_rate": 8.630359581437118e-06, "loss": 1.6332, "step": 2124 }, { "epoch": 0.26, "grad_norm": 6.651713215798594, "learning_rate": 8.628974176527982e-06, "loss": 1.4756, "step": 2125 }, { "epoch": 0.26, "grad_norm": 7.767831110235285, "learning_rate": 8.627588182616321e-06, "loss": 1.6346, "step": 2126 }, { "epoch": 0.26, "grad_norm": 6.987424888509775, "learning_rate": 8.626201599927094e-06, "loss": 1.1926, "step": 2127 }, { "epoch": 0.26, "grad_norm": 7.395317450657551, "learning_rate": 8.624814428685346e-06, "loss": 1.4401, "step": 2128 }, { "epoch": 0.26, "grad_norm": 7.026649796007137, "learning_rate": 8.623426669116223e-06, "loss": 1.6083, "step": 2129 }, { "epoch": 0.26, "grad_norm": 9.240047754895002, "learning_rate": 8.62203832144497e-06, "loss": 1.7153, "step": 2130 }, { "epoch": 0.27, "grad_norm": 7.060987389922649, "learning_rate": 8.620649385896919e-06, "loss": 1.7044, "step": 2131 }, { "epoch": 0.27, "grad_norm": 6.616023758151928, "learning_rate": 8.619259862697504e-06, "loss": 1.2718, "step": 2132 }, { "epoch": 0.27, "grad_norm": 10.037218308188516, "learning_rate": 8.617869752072252e-06, "loss": 1.7857, "step": 2133 }, { "epoch": 0.27, "grad_norm": 6.989378824065204, "learning_rate": 8.616479054246786e-06, "loss": 1.5069, "step": 2134 }, { "epoch": 0.27, "grad_norm": 6.190885805122959, "learning_rate": 8.615087769446822e-06, "loss": 1.6656, "step": 2135 }, { "epoch": 0.27, "grad_norm": 7.702757414976043, "learning_rate": 8.613695897898177e-06, "loss": 1.2143, "step": 2136 }, { "epoch": 0.27, "grad_norm": 7.403388798837552, "learning_rate": 8.612303439826758e-06, "loss": 1.5954, "step": 2137 }, { "epoch": 0.27, "grad_norm": 6.164283735545882, "learning_rate": 8.610910395458563e-06, "loss": 1.3842, "step": 2138 }, { "epoch": 0.27, "grad_norm": 7.027661299822432, "learning_rate": 8.609516765019699e-06, "loss": 1.4728, "step": 2139 }, { "epoch": 0.27, "grad_norm": 6.735151976415107, "learning_rate": 8.608122548736356e-06, "loss": 1.5098, "step": 2140 }, { "epoch": 0.27, "grad_norm": 8.044297198883003, "learning_rate": 8.606727746834824e-06, "loss": 1.4047, "step": 2141 }, { "epoch": 0.27, "grad_norm": 6.750294170504796, "learning_rate": 8.605332359541486e-06, "loss": 1.3086, "step": 2142 }, { "epoch": 0.27, "grad_norm": 6.96191642532821, "learning_rate": 8.603936387082821e-06, "loss": 1.6371, "step": 2143 }, { "epoch": 0.27, "grad_norm": 8.462751264157898, "learning_rate": 8.602539829685405e-06, "loss": 1.3912, "step": 2144 }, { "epoch": 0.27, "grad_norm": 7.303441273511973, "learning_rate": 8.601142687575905e-06, "loss": 1.5069, "step": 2145 }, { "epoch": 0.27, "grad_norm": 8.198506092276059, "learning_rate": 8.599744960981085e-06, "loss": 1.6624, "step": 2146 }, { "epoch": 0.27, "grad_norm": 7.096774265603814, "learning_rate": 8.598346650127805e-06, "loss": 1.2893, "step": 2147 }, { "epoch": 0.27, "grad_norm": 7.775225175936546, "learning_rate": 8.596947755243017e-06, "loss": 1.5906, "step": 2148 }, { "epoch": 0.27, "grad_norm": 7.26406875945325, "learning_rate": 8.59554827655377e-06, "loss": 1.3571, "step": 2149 }, { "epoch": 0.27, "grad_norm": 7.8358178877961695, "learning_rate": 8.594148214287209e-06, "loss": 1.679, "step": 2150 }, { "epoch": 0.27, "grad_norm": 5.994104796512303, "learning_rate": 8.592747568670567e-06, "loss": 1.4344, "step": 2151 }, { "epoch": 0.27, "grad_norm": 7.256355441028146, "learning_rate": 8.591346339931182e-06, "loss": 1.7386, "step": 2152 }, { "epoch": 0.27, "grad_norm": 7.123139534903631, "learning_rate": 8.589944528296476e-06, "loss": 1.2676, "step": 2153 }, { "epoch": 0.27, "grad_norm": 8.122065311335106, "learning_rate": 8.588542133993976e-06, "loss": 1.7661, "step": 2154 }, { "epoch": 0.27, "grad_norm": 8.412913010905529, "learning_rate": 8.587139157251295e-06, "loss": 1.556, "step": 2155 }, { "epoch": 0.27, "grad_norm": 8.817111445774428, "learning_rate": 8.585735598296144e-06, "loss": 1.5988, "step": 2156 }, { "epoch": 0.27, "grad_norm": 6.807854606601321, "learning_rate": 8.584331457356331e-06, "loss": 1.335, "step": 2157 }, { "epoch": 0.27, "grad_norm": 7.922653242452865, "learning_rate": 8.582926734659752e-06, "loss": 1.6086, "step": 2158 }, { "epoch": 0.27, "grad_norm": 7.283930345178207, "learning_rate": 8.581521430434403e-06, "loss": 1.4011, "step": 2159 }, { "epoch": 0.27, "grad_norm": 7.287264532269947, "learning_rate": 8.580115544908374e-06, "loss": 1.4184, "step": 2160 }, { "epoch": 0.27, "grad_norm": 7.440968234217916, "learning_rate": 8.578709078309845e-06, "loss": 1.4758, "step": 2161 }, { "epoch": 0.27, "grad_norm": 6.7830118340237515, "learning_rate": 8.577302030867096e-06, "loss": 1.5205, "step": 2162 }, { "epoch": 0.27, "grad_norm": 7.086383771732934, "learning_rate": 8.575894402808497e-06, "loss": 1.5435, "step": 2163 }, { "epoch": 0.27, "grad_norm": 7.880011342607986, "learning_rate": 8.574486194362517e-06, "loss": 1.5375, "step": 2164 }, { "epoch": 0.27, "grad_norm": 8.010460409527427, "learning_rate": 8.573077405757711e-06, "loss": 1.6191, "step": 2165 }, { "epoch": 0.27, "grad_norm": 6.443404699656489, "learning_rate": 8.571668037222736e-06, "loss": 1.6462, "step": 2166 }, { "epoch": 0.27, "grad_norm": 7.764323858933085, "learning_rate": 8.570258088986339e-06, "loss": 1.9607, "step": 2167 }, { "epoch": 0.27, "grad_norm": 7.4025957348261295, "learning_rate": 8.568847561277362e-06, "loss": 1.3691, "step": 2168 }, { "epoch": 0.27, "grad_norm": 6.374195696877536, "learning_rate": 8.567436454324745e-06, "loss": 1.2903, "step": 2169 }, { "epoch": 0.27, "grad_norm": 7.260046677672469, "learning_rate": 8.566024768357515e-06, "loss": 1.5267, "step": 2170 }, { "epoch": 0.27, "grad_norm": 7.053912358845905, "learning_rate": 8.564612503604796e-06, "loss": 1.4292, "step": 2171 }, { "epoch": 0.27, "grad_norm": 6.926266701447343, "learning_rate": 8.56319966029581e-06, "loss": 1.5378, "step": 2172 }, { "epoch": 0.27, "grad_norm": 6.444637555910401, "learning_rate": 8.561786238659866e-06, "loss": 1.1976, "step": 2173 }, { "epoch": 0.27, "grad_norm": 6.790068603159505, "learning_rate": 8.560372238926372e-06, "loss": 1.5572, "step": 2174 }, { "epoch": 0.27, "grad_norm": 6.566665991650368, "learning_rate": 8.558957661324827e-06, "loss": 1.558, "step": 2175 }, { "epoch": 0.27, "grad_norm": 7.072119202990964, "learning_rate": 8.557542506084824e-06, "loss": 1.2396, "step": 2176 }, { "epoch": 0.27, "grad_norm": 8.112044431860381, "learning_rate": 8.55612677343605e-06, "loss": 1.4829, "step": 2177 }, { "epoch": 0.27, "grad_norm": 6.451905403426403, "learning_rate": 8.554710463608292e-06, "loss": 1.5527, "step": 2178 }, { "epoch": 0.27, "grad_norm": 7.004392880472942, "learning_rate": 8.553293576831417e-06, "loss": 1.151, "step": 2179 }, { "epoch": 0.27, "grad_norm": 7.035936666938962, "learning_rate": 8.551876113335398e-06, "loss": 1.4405, "step": 2180 }, { "epoch": 0.27, "grad_norm": 7.403397343209865, "learning_rate": 8.550458073350296e-06, "loss": 1.5419, "step": 2181 }, { "epoch": 0.27, "grad_norm": 7.669738349761742, "learning_rate": 8.549039457106267e-06, "loss": 1.6336, "step": 2182 }, { "epoch": 0.27, "grad_norm": 6.768151508568347, "learning_rate": 8.54762026483356e-06, "loss": 1.1301, "step": 2183 }, { "epoch": 0.27, "grad_norm": 6.870482102501423, "learning_rate": 8.546200496762517e-06, "loss": 1.242, "step": 2184 }, { "epoch": 0.27, "grad_norm": 7.3430654745448924, "learning_rate": 8.544780153123577e-06, "loss": 1.5448, "step": 2185 }, { "epoch": 0.27, "grad_norm": 7.074216734257354, "learning_rate": 8.543359234147266e-06, "loss": 1.7062, "step": 2186 }, { "epoch": 0.27, "grad_norm": 7.108451954646075, "learning_rate": 8.54193774006421e-06, "loss": 1.3135, "step": 2187 }, { "epoch": 0.27, "grad_norm": 7.564369711468935, "learning_rate": 8.540515671105122e-06, "loss": 1.5407, "step": 2188 }, { "epoch": 0.27, "grad_norm": 9.791316833161849, "learning_rate": 8.539093027500815e-06, "loss": 1.8922, "step": 2189 }, { "epoch": 0.27, "grad_norm": 6.931701882846775, "learning_rate": 8.537669809482193e-06, "loss": 1.2959, "step": 2190 }, { "epoch": 0.27, "grad_norm": 7.126452615976473, "learning_rate": 8.536246017280245e-06, "loss": 1.4833, "step": 2191 }, { "epoch": 0.27, "grad_norm": 7.099242866603567, "learning_rate": 8.534821651126067e-06, "loss": 1.6916, "step": 2192 }, { "epoch": 0.27, "grad_norm": 6.85808708251736, "learning_rate": 8.53339671125084e-06, "loss": 1.4364, "step": 2193 }, { "epoch": 0.27, "grad_norm": 7.100885414680763, "learning_rate": 8.531971197885837e-06, "loss": 1.2116, "step": 2194 }, { "epoch": 0.27, "grad_norm": 7.3435272103523035, "learning_rate": 8.530545111262432e-06, "loss": 1.156, "step": 2195 }, { "epoch": 0.27, "grad_norm": 7.95861574614257, "learning_rate": 8.52911845161208e-06, "loss": 1.6878, "step": 2196 }, { "epoch": 0.27, "grad_norm": 8.829149852038332, "learning_rate": 8.527691219166341e-06, "loss": 1.7477, "step": 2197 }, { "epoch": 0.27, "grad_norm": 7.964810651617033, "learning_rate": 8.526263414156862e-06, "loss": 1.6433, "step": 2198 }, { "epoch": 0.27, "grad_norm": 7.357156024248922, "learning_rate": 8.52483503681538e-06, "loss": 1.2817, "step": 2199 }, { "epoch": 0.27, "grad_norm": 7.137653553623456, "learning_rate": 8.523406087373732e-06, "loss": 1.7586, "step": 2200 }, { "epoch": 0.27, "grad_norm": 7.083373934838315, "learning_rate": 8.521976566063843e-06, "loss": 1.4288, "step": 2201 }, { "epoch": 0.27, "grad_norm": 7.280651403102203, "learning_rate": 8.520546473117735e-06, "loss": 1.5481, "step": 2202 }, { "epoch": 0.27, "grad_norm": 7.004753291748892, "learning_rate": 8.519115808767514e-06, "loss": 1.388, "step": 2203 }, { "epoch": 0.27, "grad_norm": 6.402491184082387, "learning_rate": 8.517684573245391e-06, "loss": 1.137, "step": 2204 }, { "epoch": 0.27, "grad_norm": 7.855104874083077, "learning_rate": 8.516252766783659e-06, "loss": 1.3463, "step": 2205 }, { "epoch": 0.27, "grad_norm": 7.453251892775248, "learning_rate": 8.51482038961471e-06, "loss": 1.5475, "step": 2206 }, { "epoch": 0.27, "grad_norm": 12.511256151899627, "learning_rate": 8.513387441971028e-06, "loss": 1.344, "step": 2207 }, { "epoch": 0.27, "grad_norm": 6.189127850515589, "learning_rate": 8.511953924085185e-06, "loss": 1.8009, "step": 2208 }, { "epoch": 0.27, "grad_norm": 7.970379587510682, "learning_rate": 8.510519836189853e-06, "loss": 1.6314, "step": 2209 }, { "epoch": 0.27, "grad_norm": 6.731365857774621, "learning_rate": 8.509085178517788e-06, "loss": 1.6369, "step": 2210 }, { "epoch": 0.28, "grad_norm": 7.219605495400848, "learning_rate": 8.507649951301845e-06, "loss": 1.4522, "step": 2211 }, { "epoch": 0.28, "grad_norm": 6.630168521816854, "learning_rate": 8.50621415477497e-06, "loss": 1.1158, "step": 2212 }, { "epoch": 0.28, "grad_norm": 7.988186234439016, "learning_rate": 8.504777789170198e-06, "loss": 1.3383, "step": 2213 }, { "epoch": 0.28, "grad_norm": 6.8492816338935105, "learning_rate": 8.503340854720664e-06, "loss": 1.6136, "step": 2214 }, { "epoch": 0.28, "grad_norm": 6.8901762994090925, "learning_rate": 8.501903351659584e-06, "loss": 1.3168, "step": 2215 }, { "epoch": 0.28, "grad_norm": 7.025567420279859, "learning_rate": 8.500465280220278e-06, "loss": 1.5599, "step": 2216 }, { "epoch": 0.28, "grad_norm": 7.3268834239417036, "learning_rate": 8.499026640636152e-06, "loss": 1.9026, "step": 2217 }, { "epoch": 0.28, "grad_norm": 7.521508836820596, "learning_rate": 8.497587433140702e-06, "loss": 1.6022, "step": 2218 }, { "epoch": 0.28, "grad_norm": 6.896875501681599, "learning_rate": 8.496147657967521e-06, "loss": 1.2827, "step": 2219 }, { "epoch": 0.28, "grad_norm": 7.590994439177676, "learning_rate": 8.494707315350294e-06, "loss": 1.5916, "step": 2220 }, { "epoch": 0.28, "grad_norm": 7.903754752086731, "learning_rate": 8.493266405522796e-06, "loss": 1.6767, "step": 2221 }, { "epoch": 0.28, "grad_norm": 6.884250424214501, "learning_rate": 8.491824928718893e-06, "loss": 1.2297, "step": 2222 }, { "epoch": 0.28, "grad_norm": 6.535748905525587, "learning_rate": 8.490382885172545e-06, "loss": 1.4277, "step": 2223 }, { "epoch": 0.28, "grad_norm": 8.23689631773838, "learning_rate": 8.488940275117806e-06, "loss": 1.7758, "step": 2224 }, { "epoch": 0.28, "grad_norm": 8.924738348238279, "learning_rate": 8.487497098788817e-06, "loss": 1.4391, "step": 2225 }, { "epoch": 0.28, "grad_norm": 6.7193734531257725, "learning_rate": 8.486053356419814e-06, "loss": 1.5806, "step": 2226 }, { "epoch": 0.28, "grad_norm": 6.093509806596921, "learning_rate": 8.484609048245126e-06, "loss": 1.5179, "step": 2227 }, { "epoch": 0.28, "grad_norm": 7.997529113865869, "learning_rate": 8.48316417449917e-06, "loss": 1.7814, "step": 2228 }, { "epoch": 0.28, "grad_norm": 6.512265773356932, "learning_rate": 8.481718735416456e-06, "loss": 1.2731, "step": 2229 }, { "epoch": 0.28, "grad_norm": 6.847340814138879, "learning_rate": 8.480272731231591e-06, "loss": 1.5824, "step": 2230 }, { "epoch": 0.28, "grad_norm": 8.776105214679385, "learning_rate": 8.478826162179266e-06, "loss": 1.6884, "step": 2231 }, { "epoch": 0.28, "grad_norm": 6.676872628149906, "learning_rate": 8.477379028494268e-06, "loss": 1.5218, "step": 2232 }, { "epoch": 0.28, "grad_norm": 6.039931800657339, "learning_rate": 8.475931330411475e-06, "loss": 1.3968, "step": 2233 }, { "epoch": 0.28, "grad_norm": 6.603255973569436, "learning_rate": 8.474483068165857e-06, "loss": 1.4848, "step": 2234 }, { "epoch": 0.28, "grad_norm": 7.0509773336088974, "learning_rate": 8.473034241992474e-06, "loss": 1.4353, "step": 2235 }, { "epoch": 0.28, "grad_norm": 6.336083862813042, "learning_rate": 8.471584852126478e-06, "loss": 1.2922, "step": 2236 }, { "epoch": 0.28, "grad_norm": 7.72220480234595, "learning_rate": 8.470134898803113e-06, "loss": 1.7711, "step": 2237 }, { "epoch": 0.28, "grad_norm": 7.67527579196733, "learning_rate": 8.468684382257718e-06, "loss": 1.4693, "step": 2238 }, { "epoch": 0.28, "grad_norm": 6.967854318297582, "learning_rate": 8.467233302725714e-06, "loss": 1.4681, "step": 2239 }, { "epoch": 0.28, "grad_norm": 7.080460310973836, "learning_rate": 8.465781660442623e-06, "loss": 1.3343, "step": 2240 }, { "epoch": 0.28, "grad_norm": 7.3200293504007, "learning_rate": 8.464329455644056e-06, "loss": 1.4431, "step": 2241 }, { "epoch": 0.28, "grad_norm": 7.078524848662056, "learning_rate": 8.462876688565708e-06, "loss": 1.4798, "step": 2242 }, { "epoch": 0.28, "grad_norm": 7.301260123579229, "learning_rate": 8.461423359443377e-06, "loss": 1.4489, "step": 2243 }, { "epoch": 0.28, "grad_norm": 6.180063648928444, "learning_rate": 8.459969468512943e-06, "loss": 1.4097, "step": 2244 }, { "epoch": 0.28, "grad_norm": 6.543044255281278, "learning_rate": 8.458515016010381e-06, "loss": 1.5181, "step": 2245 }, { "epoch": 0.28, "grad_norm": 6.657248232804964, "learning_rate": 8.457060002171758e-06, "loss": 1.4636, "step": 2246 }, { "epoch": 0.28, "grad_norm": 6.987583239890663, "learning_rate": 8.45560442723323e-06, "loss": 1.6412, "step": 2247 }, { "epoch": 0.28, "grad_norm": 6.7552989026033075, "learning_rate": 8.454148291431044e-06, "loss": 1.3775, "step": 2248 }, { "epoch": 0.28, "grad_norm": 6.396115819838651, "learning_rate": 8.452691595001541e-06, "loss": 1.3207, "step": 2249 }, { "epoch": 0.28, "grad_norm": 7.301211585608634, "learning_rate": 8.45123433818115e-06, "loss": 1.5278, "step": 2250 }, { "epoch": 0.28, "grad_norm": 7.331054409723629, "learning_rate": 8.44977652120639e-06, "loss": 1.5355, "step": 2251 }, { "epoch": 0.28, "grad_norm": 8.9917354747884, "learning_rate": 8.448318144313875e-06, "loss": 1.6583, "step": 2252 }, { "epoch": 0.28, "grad_norm": 6.533483438408977, "learning_rate": 8.446859207740304e-06, "loss": 1.8133, "step": 2253 }, { "epoch": 0.28, "grad_norm": 6.903928913351334, "learning_rate": 8.445399711722476e-06, "loss": 1.5777, "step": 2254 }, { "epoch": 0.28, "grad_norm": 6.899516592539495, "learning_rate": 8.44393965649727e-06, "loss": 1.5505, "step": 2255 }, { "epoch": 0.28, "grad_norm": 6.876402047765932, "learning_rate": 8.442479042301666e-06, "loss": 1.3699, "step": 2256 }, { "epoch": 0.28, "grad_norm": 6.82123293774179, "learning_rate": 8.441017869372725e-06, "loss": 1.3794, "step": 2257 }, { "epoch": 0.28, "grad_norm": 6.436437488312111, "learning_rate": 8.439556137947607e-06, "loss": 1.4226, "step": 2258 }, { "epoch": 0.28, "grad_norm": 7.485266678019636, "learning_rate": 8.438093848263558e-06, "loss": 1.5195, "step": 2259 }, { "epoch": 0.28, "grad_norm": 6.431887703508485, "learning_rate": 8.436631000557915e-06, "loss": 1.4831, "step": 2260 }, { "epoch": 0.28, "grad_norm": 6.873360449762467, "learning_rate": 8.435167595068106e-06, "loss": 1.4372, "step": 2261 }, { "epoch": 0.28, "grad_norm": 7.516503412524328, "learning_rate": 8.43370363203165e-06, "loss": 1.5886, "step": 2262 }, { "epoch": 0.28, "grad_norm": 7.101517245187007, "learning_rate": 8.432239111686158e-06, "loss": 1.4765, "step": 2263 }, { "epoch": 0.28, "grad_norm": 7.516902179764751, "learning_rate": 8.430774034269326e-06, "loss": 1.7161, "step": 2264 }, { "epoch": 0.28, "grad_norm": 6.041787068972835, "learning_rate": 8.429308400018949e-06, "loss": 1.1097, "step": 2265 }, { "epoch": 0.28, "grad_norm": 6.459330920411768, "learning_rate": 8.427842209172903e-06, "loss": 1.3701, "step": 2266 }, { "epoch": 0.28, "grad_norm": 6.353228389977875, "learning_rate": 8.426375461969163e-06, "loss": 1.2984, "step": 2267 }, { "epoch": 0.28, "grad_norm": 6.904279510191372, "learning_rate": 8.424908158645788e-06, "loss": 1.4528, "step": 2268 }, { "epoch": 0.28, "grad_norm": 7.239790527367672, "learning_rate": 8.423440299440928e-06, "loss": 1.2102, "step": 2269 }, { "epoch": 0.28, "grad_norm": 7.994553677356178, "learning_rate": 8.421971884592825e-06, "loss": 1.6113, "step": 2270 }, { "epoch": 0.28, "grad_norm": 6.775145224792126, "learning_rate": 8.420502914339814e-06, "loss": 1.2873, "step": 2271 }, { "epoch": 0.28, "grad_norm": 7.117872010431934, "learning_rate": 8.419033388920315e-06, "loss": 1.4175, "step": 2272 }, { "epoch": 0.28, "grad_norm": 8.0049061588138, "learning_rate": 8.41756330857284e-06, "loss": 1.8554, "step": 2273 }, { "epoch": 0.28, "grad_norm": 7.143052165911168, "learning_rate": 8.416092673535992e-06, "loss": 1.1884, "step": 2274 }, { "epoch": 0.28, "grad_norm": 7.338021786899974, "learning_rate": 8.41462148404846e-06, "loss": 1.435, "step": 2275 }, { "epoch": 0.28, "grad_norm": 7.542687199319917, "learning_rate": 8.413149740349029e-06, "loss": 1.3977, "step": 2276 }, { "epoch": 0.28, "grad_norm": 6.888057642297405, "learning_rate": 8.41167744267657e-06, "loss": 1.5595, "step": 2277 }, { "epoch": 0.28, "grad_norm": 7.337442491542102, "learning_rate": 8.410204591270048e-06, "loss": 1.4454, "step": 2278 }, { "epoch": 0.28, "grad_norm": 7.095312161539265, "learning_rate": 8.40873118636851e-06, "loss": 1.5361, "step": 2279 }, { "epoch": 0.28, "grad_norm": 13.225219873523477, "learning_rate": 8.407257228211104e-06, "loss": 1.1876, "step": 2280 }, { "epoch": 0.28, "grad_norm": 8.378134512093142, "learning_rate": 8.405782717037055e-06, "loss": 1.5864, "step": 2281 }, { "epoch": 0.28, "grad_norm": 7.233216399860096, "learning_rate": 8.404307653085688e-06, "loss": 1.0485, "step": 2282 }, { "epoch": 0.28, "grad_norm": 6.742981901104493, "learning_rate": 8.402832036596412e-06, "loss": 1.3947, "step": 2283 }, { "epoch": 0.28, "grad_norm": 7.4324928117459494, "learning_rate": 8.40135586780873e-06, "loss": 1.5895, "step": 2284 }, { "epoch": 0.28, "grad_norm": 7.322691498073936, "learning_rate": 8.39987914696223e-06, "loss": 1.4515, "step": 2285 }, { "epoch": 0.28, "grad_norm": 6.957119653711382, "learning_rate": 8.398401874296595e-06, "loss": 1.7478, "step": 2286 }, { "epoch": 0.28, "grad_norm": 7.435536248381875, "learning_rate": 8.396924050051592e-06, "loss": 1.6144, "step": 2287 }, { "epoch": 0.28, "grad_norm": 6.236980701556918, "learning_rate": 8.395445674467078e-06, "loss": 1.6815, "step": 2288 }, { "epoch": 0.28, "grad_norm": 6.982182195799502, "learning_rate": 8.393966747783007e-06, "loss": 1.5329, "step": 2289 }, { "epoch": 0.28, "grad_norm": 6.257695932185393, "learning_rate": 8.392487270239413e-06, "loss": 1.1797, "step": 2290 }, { "epoch": 0.28, "grad_norm": 7.260450976755986, "learning_rate": 8.391007242076425e-06, "loss": 1.4662, "step": 2291 }, { "epoch": 0.29, "grad_norm": 7.550909681853157, "learning_rate": 8.38952666353426e-06, "loss": 1.6626, "step": 2292 }, { "epoch": 0.29, "grad_norm": 7.321933975832198, "learning_rate": 8.388045534853222e-06, "loss": 1.3937, "step": 2293 }, { "epoch": 0.29, "grad_norm": 6.4317721155763845, "learning_rate": 8.386563856273708e-06, "loss": 1.4195, "step": 2294 }, { "epoch": 0.29, "grad_norm": 7.178997737350607, "learning_rate": 8.385081628036202e-06, "loss": 1.7501, "step": 2295 }, { "epoch": 0.29, "grad_norm": 6.128926853919524, "learning_rate": 8.383598850381279e-06, "loss": 1.2719, "step": 2296 }, { "epoch": 0.29, "grad_norm": 7.215825175596666, "learning_rate": 8.3821155235496e-06, "loss": 1.4086, "step": 2297 }, { "epoch": 0.29, "grad_norm": 6.775597616057142, "learning_rate": 8.38063164778192e-06, "loss": 1.4196, "step": 2298 }, { "epoch": 0.29, "grad_norm": 7.7531394538168295, "learning_rate": 8.379147223319075e-06, "loss": 1.3715, "step": 2299 }, { "epoch": 0.29, "grad_norm": 6.6599373578472605, "learning_rate": 8.377662250402001e-06, "loss": 1.1785, "step": 2300 }, { "epoch": 0.29, "grad_norm": 7.1092879484540275, "learning_rate": 8.376176729271714e-06, "loss": 1.7228, "step": 2301 }, { "epoch": 0.29, "grad_norm": 6.430803396496845, "learning_rate": 8.374690660169324e-06, "loss": 1.3428, "step": 2302 }, { "epoch": 0.29, "grad_norm": 7.349599619410138, "learning_rate": 8.373204043336027e-06, "loss": 1.6598, "step": 2303 }, { "epoch": 0.29, "grad_norm": 7.685480411986309, "learning_rate": 8.37171687901311e-06, "loss": 1.6248, "step": 2304 }, { "epoch": 0.29, "grad_norm": 6.616097383984759, "learning_rate": 8.370229167441948e-06, "loss": 1.3515, "step": 2305 }, { "epoch": 0.29, "grad_norm": 7.219889682462717, "learning_rate": 8.368740908864004e-06, "loss": 1.4454, "step": 2306 }, { "epoch": 0.29, "grad_norm": 8.060661873193236, "learning_rate": 8.36725210352083e-06, "loss": 1.4789, "step": 2307 }, { "epoch": 0.29, "grad_norm": 7.1666970626489315, "learning_rate": 8.36576275165407e-06, "loss": 1.4841, "step": 2308 }, { "epoch": 0.29, "grad_norm": 10.41777091458765, "learning_rate": 8.36427285350545e-06, "loss": 1.344, "step": 2309 }, { "epoch": 0.29, "grad_norm": 6.765599611106724, "learning_rate": 8.362782409316793e-06, "loss": 1.3343, "step": 2310 }, { "epoch": 0.29, "grad_norm": 7.336167756232905, "learning_rate": 8.361291419330003e-06, "loss": 1.4091, "step": 2311 }, { "epoch": 0.29, "grad_norm": 6.7867914775695946, "learning_rate": 8.359799883787077e-06, "loss": 1.6376, "step": 2312 }, { "epoch": 0.29, "grad_norm": 6.746631623235307, "learning_rate": 8.358307802930098e-06, "loss": 1.4245, "step": 2313 }, { "epoch": 0.29, "grad_norm": 6.743070660940503, "learning_rate": 8.356815177001243e-06, "loss": 1.2521, "step": 2314 }, { "epoch": 0.29, "grad_norm": 7.028095083616423, "learning_rate": 8.355322006242771e-06, "loss": 1.5015, "step": 2315 }, { "epoch": 0.29, "grad_norm": 6.3240675461030005, "learning_rate": 8.353828290897031e-06, "loss": 1.5013, "step": 2316 }, { "epoch": 0.29, "grad_norm": 6.537463647743031, "learning_rate": 8.352334031206463e-06, "loss": 1.2849, "step": 2317 }, { "epoch": 0.29, "grad_norm": 7.144045607949415, "learning_rate": 8.35083922741359e-06, "loss": 1.7186, "step": 2318 }, { "epoch": 0.29, "grad_norm": 6.170842743035328, "learning_rate": 8.349343879761032e-06, "loss": 1.4173, "step": 2319 }, { "epoch": 0.29, "grad_norm": 7.35265532255448, "learning_rate": 8.347847988491488e-06, "loss": 1.3869, "step": 2320 }, { "epoch": 0.29, "grad_norm": 7.636378300809349, "learning_rate": 8.346351553847754e-06, "loss": 1.9171, "step": 2321 }, { "epoch": 0.29, "grad_norm": 6.519442700272106, "learning_rate": 8.344854576072706e-06, "loss": 1.4969, "step": 2322 }, { "epoch": 0.29, "grad_norm": 7.525830923777658, "learning_rate": 8.343357055409309e-06, "loss": 1.4293, "step": 2323 }, { "epoch": 0.29, "grad_norm": 5.933780517029924, "learning_rate": 8.341858992100625e-06, "loss": 1.5826, "step": 2324 }, { "epoch": 0.29, "grad_norm": 7.007459181680934, "learning_rate": 8.340360386389796e-06, "loss": 1.5966, "step": 2325 }, { "epoch": 0.29, "grad_norm": 6.544723794598282, "learning_rate": 8.33886123852005e-06, "loss": 1.3349, "step": 2326 }, { "epoch": 0.29, "grad_norm": 7.451986574600875, "learning_rate": 8.337361548734712e-06, "loss": 1.4621, "step": 2327 }, { "epoch": 0.29, "grad_norm": 7.139650572611688, "learning_rate": 8.33586131727719e-06, "loss": 1.3996, "step": 2328 }, { "epoch": 0.29, "grad_norm": 6.655549729045198, "learning_rate": 8.334360544390975e-06, "loss": 1.2871, "step": 2329 }, { "epoch": 0.29, "grad_norm": 7.774172803203995, "learning_rate": 8.332859230319657e-06, "loss": 1.4651, "step": 2330 }, { "epoch": 0.29, "grad_norm": 6.82312235434228, "learning_rate": 8.3313573753069e-06, "loss": 1.2738, "step": 2331 }, { "epoch": 0.29, "grad_norm": 6.236065541266635, "learning_rate": 8.329854979596472e-06, "loss": 1.455, "step": 2332 }, { "epoch": 0.29, "grad_norm": 7.101658106570234, "learning_rate": 8.328352043432212e-06, "loss": 1.4066, "step": 2333 }, { "epoch": 0.29, "grad_norm": 6.533918412892167, "learning_rate": 8.326848567058061e-06, "loss": 1.3358, "step": 2334 }, { "epoch": 0.29, "grad_norm": 8.911988554545733, "learning_rate": 8.325344550718037e-06, "loss": 1.705, "step": 2335 }, { "epoch": 0.29, "grad_norm": 6.8569555680855, "learning_rate": 8.323839994656253e-06, "loss": 1.5407, "step": 2336 }, { "epoch": 0.29, "grad_norm": 7.390901443624469, "learning_rate": 8.322334899116906e-06, "loss": 1.4865, "step": 2337 }, { "epoch": 0.29, "grad_norm": 7.481392394611362, "learning_rate": 8.320829264344281e-06, "loss": 1.4998, "step": 2338 }, { "epoch": 0.29, "grad_norm": 8.397257011492451, "learning_rate": 8.319323090582753e-06, "loss": 1.5174, "step": 2339 }, { "epoch": 0.29, "grad_norm": 6.7069687607552275, "learning_rate": 8.317816378076779e-06, "loss": 1.5021, "step": 2340 }, { "epoch": 0.29, "grad_norm": 6.632614907188989, "learning_rate": 8.316309127070907e-06, "loss": 1.2258, "step": 2341 }, { "epoch": 0.29, "grad_norm": 8.974576024307186, "learning_rate": 8.314801337809775e-06, "loss": 1.5618, "step": 2342 }, { "epoch": 0.29, "grad_norm": 7.604452675344447, "learning_rate": 8.313293010538101e-06, "loss": 1.7368, "step": 2343 }, { "epoch": 0.29, "grad_norm": 6.41335059095109, "learning_rate": 8.3117841455007e-06, "loss": 1.5764, "step": 2344 }, { "epoch": 0.29, "grad_norm": 7.224432020544994, "learning_rate": 8.310274742942466e-06, "loss": 1.8269, "step": 2345 }, { "epoch": 0.29, "grad_norm": 7.78824588896286, "learning_rate": 8.308764803108384e-06, "loss": 1.5864, "step": 2346 }, { "epoch": 0.29, "grad_norm": 8.03100331923216, "learning_rate": 8.307254326243527e-06, "loss": 1.7848, "step": 2347 }, { "epoch": 0.29, "grad_norm": 6.764857922327464, "learning_rate": 8.30574331259305e-06, "loss": 1.4013, "step": 2348 }, { "epoch": 0.29, "grad_norm": 7.206982364969483, "learning_rate": 8.304231762402203e-06, "loss": 1.7005, "step": 2349 }, { "epoch": 0.29, "grad_norm": 6.597833178811745, "learning_rate": 8.302719675916318e-06, "loss": 1.1702, "step": 2350 }, { "epoch": 0.29, "grad_norm": 6.333554624202728, "learning_rate": 8.301207053380813e-06, "loss": 1.1126, "step": 2351 }, { "epoch": 0.29, "grad_norm": 7.362478552219569, "learning_rate": 8.299693895041195e-06, "loss": 1.6757, "step": 2352 }, { "epoch": 0.29, "grad_norm": 7.377546854147125, "learning_rate": 8.29818020114306e-06, "loss": 1.4378, "step": 2353 }, { "epoch": 0.29, "grad_norm": 7.570388072832723, "learning_rate": 8.296665971932088e-06, "loss": 1.204, "step": 2354 }, { "epoch": 0.29, "grad_norm": 6.394784103346679, "learning_rate": 8.295151207654046e-06, "loss": 1.9228, "step": 2355 }, { "epoch": 0.29, "grad_norm": 6.330642822823784, "learning_rate": 8.29363590855479e-06, "loss": 1.2963, "step": 2356 }, { "epoch": 0.29, "grad_norm": 6.7018433498433385, "learning_rate": 8.292120074880262e-06, "loss": 1.7457, "step": 2357 }, { "epoch": 0.29, "grad_norm": 6.918328391583366, "learning_rate": 8.290603706876486e-06, "loss": 1.3536, "step": 2358 }, { "epoch": 0.29, "grad_norm": 6.175156973427529, "learning_rate": 8.289086804789581e-06, "loss": 1.3021, "step": 2359 }, { "epoch": 0.29, "grad_norm": 6.70043571181033, "learning_rate": 8.287569368865747e-06, "loss": 1.1423, "step": 2360 }, { "epoch": 0.29, "grad_norm": 7.24912910567205, "learning_rate": 8.286051399351274e-06, "loss": 1.5456, "step": 2361 }, { "epoch": 0.29, "grad_norm": 8.040018459333867, "learning_rate": 8.284532896492534e-06, "loss": 1.5876, "step": 2362 }, { "epoch": 0.29, "grad_norm": 7.572399175665992, "learning_rate": 8.28301386053599e-06, "loss": 1.6965, "step": 2363 }, { "epoch": 0.29, "grad_norm": 6.937294093921639, "learning_rate": 8.28149429172819e-06, "loss": 1.5934, "step": 2364 }, { "epoch": 0.29, "grad_norm": 6.600071836671228, "learning_rate": 8.279974190315766e-06, "loss": 1.3379, "step": 2365 }, { "epoch": 0.29, "grad_norm": 6.350725259387992, "learning_rate": 8.278453556545444e-06, "loss": 1.2153, "step": 2366 }, { "epoch": 0.29, "grad_norm": 6.751583697306291, "learning_rate": 8.276932390664026e-06, "loss": 1.4628, "step": 2367 }, { "epoch": 0.29, "grad_norm": 7.00244980529789, "learning_rate": 8.275410692918409e-06, "loss": 1.5484, "step": 2368 }, { "epoch": 0.29, "grad_norm": 6.310014351348905, "learning_rate": 8.273888463555571e-06, "loss": 1.2822, "step": 2369 }, { "epoch": 0.29, "grad_norm": 8.789568813801976, "learning_rate": 8.272365702822577e-06, "loss": 1.3654, "step": 2370 }, { "epoch": 0.29, "grad_norm": 7.3044221225763515, "learning_rate": 8.270842410966587e-06, "loss": 1.067, "step": 2371 }, { "epoch": 0.3, "grad_norm": 6.408547511940245, "learning_rate": 8.269318588234829e-06, "loss": 1.0257, "step": 2372 }, { "epoch": 0.3, "grad_norm": 8.37955673015019, "learning_rate": 8.267794234874634e-06, "loss": 1.6961, "step": 2373 }, { "epoch": 0.3, "grad_norm": 7.215932887093613, "learning_rate": 8.266269351133413e-06, "loss": 1.6163, "step": 2374 }, { "epoch": 0.3, "grad_norm": 5.871169553897236, "learning_rate": 8.26474393725866e-06, "loss": 1.2811, "step": 2375 }, { "epoch": 0.3, "grad_norm": 6.283786949398458, "learning_rate": 8.26321799349796e-06, "loss": 1.2707, "step": 2376 }, { "epoch": 0.3, "grad_norm": 8.001903036130697, "learning_rate": 8.261691520098985e-06, "loss": 1.6028, "step": 2377 }, { "epoch": 0.3, "grad_norm": 7.6434576327750055, "learning_rate": 8.260164517309483e-06, "loss": 1.6539, "step": 2378 }, { "epoch": 0.3, "grad_norm": 7.024743176358269, "learning_rate": 8.258636985377305e-06, "loss": 1.1092, "step": 2379 }, { "epoch": 0.3, "grad_norm": 7.35931163246753, "learning_rate": 8.257108924550366e-06, "loss": 1.8352, "step": 2380 }, { "epoch": 0.3, "grad_norm": 6.445898749085842, "learning_rate": 8.255580335076688e-06, "loss": 1.3617, "step": 2381 }, { "epoch": 0.3, "grad_norm": 6.104761272195153, "learning_rate": 8.254051217204364e-06, "loss": 1.2406, "step": 2382 }, { "epoch": 0.3, "grad_norm": 9.507288087878694, "learning_rate": 8.252521571181582e-06, "loss": 1.4581, "step": 2383 }, { "epoch": 0.3, "grad_norm": 6.854416412843832, "learning_rate": 8.250991397256609e-06, "loss": 1.418, "step": 2384 }, { "epoch": 0.3, "grad_norm": 7.404192638456912, "learning_rate": 8.249460695677802e-06, "loss": 1.1821, "step": 2385 }, { "epoch": 0.3, "grad_norm": 7.9577765403405865, "learning_rate": 8.247929466693604e-06, "loss": 1.513, "step": 2386 }, { "epoch": 0.3, "grad_norm": 6.589745076909579, "learning_rate": 8.24639771055254e-06, "loss": 1.1376, "step": 2387 }, { "epoch": 0.3, "grad_norm": 6.399300260285509, "learning_rate": 8.244865427503221e-06, "loss": 1.2515, "step": 2388 }, { "epoch": 0.3, "grad_norm": 8.202077779124513, "learning_rate": 8.243332617794348e-06, "loss": 1.6961, "step": 2389 }, { "epoch": 0.3, "grad_norm": 7.567550045401565, "learning_rate": 8.241799281674703e-06, "loss": 1.3706, "step": 2390 }, { "epoch": 0.3, "grad_norm": 6.933974926119043, "learning_rate": 8.240265419393157e-06, "loss": 1.5296, "step": 2391 }, { "epoch": 0.3, "grad_norm": 5.737046007122125, "learning_rate": 8.238731031198659e-06, "loss": 1.0623, "step": 2392 }, { "epoch": 0.3, "grad_norm": 7.144475917061442, "learning_rate": 8.237196117340254e-06, "loss": 1.2263, "step": 2393 }, { "epoch": 0.3, "grad_norm": 7.624217131010552, "learning_rate": 8.235660678067066e-06, "loss": 1.4444, "step": 2394 }, { "epoch": 0.3, "grad_norm": 8.360228872647745, "learning_rate": 8.234124713628303e-06, "loss": 1.2521, "step": 2395 }, { "epoch": 0.3, "grad_norm": 6.748474760965718, "learning_rate": 8.23258822427326e-06, "loss": 1.1742, "step": 2396 }, { "epoch": 0.3, "grad_norm": 7.126864019993332, "learning_rate": 8.231051210251324e-06, "loss": 1.4119, "step": 2397 }, { "epoch": 0.3, "grad_norm": 7.376625944275381, "learning_rate": 8.229513671811953e-06, "loss": 1.6189, "step": 2398 }, { "epoch": 0.3, "grad_norm": 6.9930359007184375, "learning_rate": 8.227975609204701e-06, "loss": 1.3861, "step": 2399 }, { "epoch": 0.3, "grad_norm": 7.0191413151922495, "learning_rate": 8.226437022679206e-06, "loss": 1.2776, "step": 2400 }, { "epoch": 0.3, "grad_norm": 7.086345355271732, "learning_rate": 8.224897912485189e-06, "loss": 1.3036, "step": 2401 }, { "epoch": 0.3, "grad_norm": 6.985366036965368, "learning_rate": 8.22335827887245e-06, "loss": 1.5728, "step": 2402 }, { "epoch": 0.3, "grad_norm": 7.660254208927277, "learning_rate": 8.221818122090888e-06, "loss": 1.358, "step": 2403 }, { "epoch": 0.3, "grad_norm": 7.710435573849577, "learning_rate": 8.220277442390475e-06, "loss": 1.6589, "step": 2404 }, { "epoch": 0.3, "grad_norm": 6.792200649585721, "learning_rate": 8.218736240021271e-06, "loss": 1.3567, "step": 2405 }, { "epoch": 0.3, "grad_norm": 8.320372302242323, "learning_rate": 8.217194515233425e-06, "loss": 1.4451, "step": 2406 }, { "epoch": 0.3, "grad_norm": 6.546510735391918, "learning_rate": 8.215652268277164e-06, "loss": 1.4981, "step": 2407 }, { "epoch": 0.3, "grad_norm": 7.968128694687536, "learning_rate": 8.214109499402805e-06, "loss": 1.6334, "step": 2408 }, { "epoch": 0.3, "grad_norm": 7.474767688338576, "learning_rate": 8.212566208860749e-06, "loss": 1.9035, "step": 2409 }, { "epoch": 0.3, "grad_norm": 7.098900104992832, "learning_rate": 8.211022396901475e-06, "loss": 1.486, "step": 2410 }, { "epoch": 0.3, "grad_norm": 7.2904816590709745, "learning_rate": 8.20947806377556e-06, "loss": 1.5539, "step": 2411 }, { "epoch": 0.3, "grad_norm": 7.1875008098306665, "learning_rate": 8.207933209733654e-06, "loss": 1.2923, "step": 2412 }, { "epoch": 0.3, "grad_norm": 7.0516001945024005, "learning_rate": 8.206387835026493e-06, "loss": 1.3744, "step": 2413 }, { "epoch": 0.3, "grad_norm": 7.820224124440359, "learning_rate": 8.204841939904904e-06, "loss": 1.4905, "step": 2414 }, { "epoch": 0.3, "grad_norm": 7.634863071134851, "learning_rate": 8.203295524619793e-06, "loss": 1.4509, "step": 2415 }, { "epoch": 0.3, "grad_norm": 6.932028217396032, "learning_rate": 8.20174858942215e-06, "loss": 1.4273, "step": 2416 }, { "epoch": 0.3, "grad_norm": 7.472943895369053, "learning_rate": 8.20020113456305e-06, "loss": 1.264, "step": 2417 }, { "epoch": 0.3, "grad_norm": 6.9971186210382355, "learning_rate": 8.19865316029366e-06, "loss": 1.1702, "step": 2418 }, { "epoch": 0.3, "grad_norm": 6.675104775641305, "learning_rate": 8.197104666865218e-06, "loss": 1.2476, "step": 2419 }, { "epoch": 0.3, "grad_norm": 7.485311565493542, "learning_rate": 8.195555654529057e-06, "loss": 1.4223, "step": 2420 }, { "epoch": 0.3, "grad_norm": 7.576588776385085, "learning_rate": 8.194006123536589e-06, "loss": 1.4175, "step": 2421 }, { "epoch": 0.3, "grad_norm": 6.480982416519783, "learning_rate": 8.19245607413931e-06, "loss": 1.3158, "step": 2422 }, { "epoch": 0.3, "grad_norm": 7.3083537823079405, "learning_rate": 8.190905506588803e-06, "loss": 1.5087, "step": 2423 }, { "epoch": 0.3, "grad_norm": 7.999685510798206, "learning_rate": 8.189354421136735e-06, "loss": 1.4162, "step": 2424 }, { "epoch": 0.3, "grad_norm": 8.402002750217816, "learning_rate": 8.187802818034852e-06, "loss": 1.5643, "step": 2425 }, { "epoch": 0.3, "grad_norm": 6.041414035500084, "learning_rate": 8.186250697534993e-06, "loss": 0.9289, "step": 2426 }, { "epoch": 0.3, "grad_norm": 7.581477199892656, "learning_rate": 8.184698059889069e-06, "loss": 1.6012, "step": 2427 }, { "epoch": 0.3, "grad_norm": 6.60601196144712, "learning_rate": 8.183144905349087e-06, "loss": 1.3749, "step": 2428 }, { "epoch": 0.3, "grad_norm": 7.830213110416741, "learning_rate": 8.181591234167132e-06, "loss": 1.6196, "step": 2429 }, { "epoch": 0.3, "grad_norm": 6.487121324715006, "learning_rate": 8.18003704659537e-06, "loss": 1.171, "step": 2430 }, { "epoch": 0.3, "grad_norm": 6.559387331075612, "learning_rate": 8.17848234288606e-06, "loss": 1.0679, "step": 2431 }, { "epoch": 0.3, "grad_norm": 7.824376253179768, "learning_rate": 8.17692712329153e-06, "loss": 1.5921, "step": 2432 }, { "epoch": 0.3, "grad_norm": 7.349510204080054, "learning_rate": 8.175371388064212e-06, "loss": 1.4297, "step": 2433 }, { "epoch": 0.3, "grad_norm": 8.664171788633642, "learning_rate": 8.173815137456602e-06, "loss": 1.749, "step": 2434 }, { "epoch": 0.3, "grad_norm": 6.414689703996819, "learning_rate": 8.17225837172129e-06, "loss": 1.3577, "step": 2435 }, { "epoch": 0.3, "grad_norm": 6.931686618225741, "learning_rate": 8.17070109111095e-06, "loss": 1.3278, "step": 2436 }, { "epoch": 0.3, "grad_norm": 7.43433387981203, "learning_rate": 8.169143295878335e-06, "loss": 1.5218, "step": 2437 }, { "epoch": 0.3, "grad_norm": 6.573401977651518, "learning_rate": 8.167584986276284e-06, "loss": 1.0743, "step": 2438 }, { "epoch": 0.3, "grad_norm": 6.56947254270211, "learning_rate": 8.166026162557719e-06, "loss": 1.2073, "step": 2439 }, { "epoch": 0.3, "grad_norm": 6.889240816385001, "learning_rate": 8.164466824975648e-06, "loss": 1.4728, "step": 2440 }, { "epoch": 0.3, "grad_norm": 6.091739339550392, "learning_rate": 8.162906973783157e-06, "loss": 1.1997, "step": 2441 }, { "epoch": 0.3, "grad_norm": 6.5550856472360195, "learning_rate": 8.161346609233418e-06, "loss": 1.3209, "step": 2442 }, { "epoch": 0.3, "grad_norm": 7.05739288873718, "learning_rate": 8.159785731579692e-06, "loss": 1.9141, "step": 2443 }, { "epoch": 0.3, "grad_norm": 6.900756459154884, "learning_rate": 8.158224341075314e-06, "loss": 1.459, "step": 2444 }, { "epoch": 0.3, "grad_norm": 9.890024818282239, "learning_rate": 8.156662437973705e-06, "loss": 1.4234, "step": 2445 }, { "epoch": 0.3, "grad_norm": 7.537266394162339, "learning_rate": 8.155100022528374e-06, "loss": 1.3884, "step": 2446 }, { "epoch": 0.3, "grad_norm": 6.581267714414615, "learning_rate": 8.153537094992907e-06, "loss": 1.5215, "step": 2447 }, { "epoch": 0.3, "grad_norm": 9.629581321431973, "learning_rate": 8.151973655620978e-06, "loss": 1.3313, "step": 2448 }, { "epoch": 0.3, "grad_norm": 6.411150914307147, "learning_rate": 8.150409704666338e-06, "loss": 1.2316, "step": 2449 }, { "epoch": 0.3, "grad_norm": 7.0860510481578665, "learning_rate": 8.14884524238283e-06, "loss": 1.5963, "step": 2450 }, { "epoch": 0.3, "grad_norm": 6.389494673951135, "learning_rate": 8.147280269024372e-06, "loss": 1.5529, "step": 2451 }, { "epoch": 0.3, "grad_norm": 7.380040621539318, "learning_rate": 8.14571478484497e-06, "loss": 1.5987, "step": 2452 }, { "epoch": 0.31, "grad_norm": 6.908760275631008, "learning_rate": 8.144148790098705e-06, "loss": 1.6293, "step": 2453 }, { "epoch": 0.31, "grad_norm": 7.7620261640510595, "learning_rate": 8.142582285039753e-06, "loss": 1.3632, "step": 2454 }, { "epoch": 0.31, "grad_norm": 8.236219507250878, "learning_rate": 8.141015269922363e-06, "loss": 1.4603, "step": 2455 }, { "epoch": 0.31, "grad_norm": 7.707708712169237, "learning_rate": 8.13944774500087e-06, "loss": 1.6635, "step": 2456 }, { "epoch": 0.31, "grad_norm": 6.9073653314671315, "learning_rate": 8.137879710529695e-06, "loss": 1.4713, "step": 2457 }, { "epoch": 0.31, "grad_norm": 7.406174332726825, "learning_rate": 8.136311166763335e-06, "loss": 1.3071, "step": 2458 }, { "epoch": 0.31, "grad_norm": 7.305132055697606, "learning_rate": 8.134742113956377e-06, "loss": 1.4985, "step": 2459 }, { "epoch": 0.31, "grad_norm": 6.863994197342224, "learning_rate": 8.133172552363484e-06, "loss": 1.6386, "step": 2460 }, { "epoch": 0.31, "grad_norm": 6.9787928847381435, "learning_rate": 8.131602482239405e-06, "loss": 1.4898, "step": 2461 }, { "epoch": 0.31, "grad_norm": 8.035694245553216, "learning_rate": 8.130031903838971e-06, "loss": 1.841, "step": 2462 }, { "epoch": 0.31, "grad_norm": 6.648342254526682, "learning_rate": 8.128460817417097e-06, "loss": 1.413, "step": 2463 }, { "epoch": 0.31, "grad_norm": 7.446124146130655, "learning_rate": 8.126889223228777e-06, "loss": 1.3824, "step": 2464 }, { "epoch": 0.31, "grad_norm": 6.200038488629108, "learning_rate": 8.12531712152909e-06, "loss": 1.5192, "step": 2465 }, { "epoch": 0.31, "grad_norm": 7.172487773750704, "learning_rate": 8.1237445125732e-06, "loss": 1.6256, "step": 2466 }, { "epoch": 0.31, "grad_norm": 6.598196166084969, "learning_rate": 8.122171396616344e-06, "loss": 1.3991, "step": 2467 }, { "epoch": 0.31, "grad_norm": 7.298575662500455, "learning_rate": 8.120597773913851e-06, "loss": 1.3158, "step": 2468 }, { "epoch": 0.31, "grad_norm": 6.2433852140850234, "learning_rate": 8.119023644721131e-06, "loss": 1.1639, "step": 2469 }, { "epoch": 0.31, "grad_norm": 7.591200791598165, "learning_rate": 8.117449009293668e-06, "loss": 1.3645, "step": 2470 }, { "epoch": 0.31, "grad_norm": 7.292337841049985, "learning_rate": 8.115873867887038e-06, "loss": 1.3701, "step": 2471 }, { "epoch": 0.31, "grad_norm": 6.6629628095801765, "learning_rate": 8.114298220756895e-06, "loss": 1.3652, "step": 2472 }, { "epoch": 0.31, "grad_norm": 7.753324449747056, "learning_rate": 8.112722068158974e-06, "loss": 1.2502, "step": 2473 }, { "epoch": 0.31, "grad_norm": 7.932307410657829, "learning_rate": 8.111145410349095e-06, "loss": 1.4097, "step": 2474 }, { "epoch": 0.31, "grad_norm": 8.110425785365766, "learning_rate": 8.109568247583155e-06, "loss": 1.4852, "step": 2475 }, { "epoch": 0.31, "grad_norm": 7.416119443463866, "learning_rate": 8.10799058011714e-06, "loss": 1.7498, "step": 2476 }, { "epoch": 0.31, "grad_norm": 6.8531883964294815, "learning_rate": 8.106412408207112e-06, "loss": 1.2754, "step": 2477 }, { "epoch": 0.31, "grad_norm": 6.181134050215058, "learning_rate": 8.104833732109217e-06, "loss": 1.127, "step": 2478 }, { "epoch": 0.31, "grad_norm": 7.1130149953742245, "learning_rate": 8.103254552079682e-06, "loss": 1.3561, "step": 2479 }, { "epoch": 0.31, "grad_norm": 9.364841593484892, "learning_rate": 8.101674868374821e-06, "loss": 1.4072, "step": 2480 }, { "epoch": 0.31, "grad_norm": 7.2609007380921256, "learning_rate": 8.100094681251018e-06, "loss": 1.186, "step": 2481 }, { "epoch": 0.31, "grad_norm": 7.202864194018219, "learning_rate": 8.098513990964754e-06, "loss": 1.3359, "step": 2482 }, { "epoch": 0.31, "grad_norm": 8.345235726385841, "learning_rate": 8.096932797772577e-06, "loss": 1.7457, "step": 2483 }, { "epoch": 0.31, "grad_norm": 7.6695391473000365, "learning_rate": 8.095351101931128e-06, "loss": 1.2982, "step": 2484 }, { "epoch": 0.31, "grad_norm": 6.488822563214282, "learning_rate": 8.093768903697125e-06, "loss": 1.1187, "step": 2485 }, { "epoch": 0.31, "grad_norm": 7.261037678468087, "learning_rate": 8.092186203327364e-06, "loss": 1.3832, "step": 2486 }, { "epoch": 0.31, "grad_norm": 7.7168027663824565, "learning_rate": 8.090603001078727e-06, "loss": 1.6526, "step": 2487 }, { "epoch": 0.31, "grad_norm": 6.818741598206621, "learning_rate": 8.08901929720818e-06, "loss": 1.3639, "step": 2488 }, { "epoch": 0.31, "grad_norm": 6.930775631089808, "learning_rate": 8.087435091972762e-06, "loss": 1.2627, "step": 2489 }, { "epoch": 0.31, "grad_norm": 6.802285584481066, "learning_rate": 8.085850385629601e-06, "loss": 1.1667, "step": 2490 }, { "epoch": 0.31, "grad_norm": 7.494005538812765, "learning_rate": 8.084265178435904e-06, "loss": 1.7314, "step": 2491 }, { "epoch": 0.31, "grad_norm": 6.8308867965148705, "learning_rate": 8.082679470648955e-06, "loss": 1.5652, "step": 2492 }, { "epoch": 0.31, "grad_norm": 7.0145764791739795, "learning_rate": 8.08109326252613e-06, "loss": 1.2988, "step": 2493 }, { "epoch": 0.31, "grad_norm": 7.567749374442639, "learning_rate": 8.079506554324874e-06, "loss": 1.5111, "step": 2494 }, { "epoch": 0.31, "grad_norm": 11.060942473847104, "learning_rate": 8.077919346302718e-06, "loss": 1.3712, "step": 2495 }, { "epoch": 0.31, "grad_norm": 7.750354739680476, "learning_rate": 8.076331638717278e-06, "loss": 1.2097, "step": 2496 }, { "epoch": 0.31, "grad_norm": 7.325321231544325, "learning_rate": 8.074743431826246e-06, "loss": 1.5787, "step": 2497 }, { "epoch": 0.31, "grad_norm": 8.167186081108511, "learning_rate": 8.073154725887398e-06, "loss": 1.5248, "step": 2498 }, { "epoch": 0.31, "grad_norm": 7.235483871142903, "learning_rate": 8.071565521158586e-06, "loss": 1.2965, "step": 2499 }, { "epoch": 0.31, "grad_norm": 6.733438562328479, "learning_rate": 8.069975817897752e-06, "loss": 1.488, "step": 2500 }, { "epoch": 0.31, "grad_norm": 7.584539522278346, "learning_rate": 8.06838561636291e-06, "loss": 1.1675, "step": 2501 }, { "epoch": 0.31, "grad_norm": 15.4299370208627, "learning_rate": 8.066794916812159e-06, "loss": 1.4014, "step": 2502 }, { "epoch": 0.31, "grad_norm": 7.5132019023324315, "learning_rate": 8.065203719503679e-06, "loss": 1.5948, "step": 2503 }, { "epoch": 0.31, "grad_norm": 6.752375371020854, "learning_rate": 8.06361202469573e-06, "loss": 1.5496, "step": 2504 }, { "epoch": 0.31, "grad_norm": 7.3563733450082385, "learning_rate": 8.062019832646652e-06, "loss": 1.2938, "step": 2505 }, { "epoch": 0.31, "grad_norm": 6.86942641210137, "learning_rate": 8.060427143614867e-06, "loss": 1.4759, "step": 2506 }, { "epoch": 0.31, "grad_norm": 6.3913013426394585, "learning_rate": 8.058833957858879e-06, "loss": 1.2718, "step": 2507 }, { "epoch": 0.31, "grad_norm": 8.012852477301697, "learning_rate": 8.057240275637268e-06, "loss": 1.6073, "step": 2508 }, { "epoch": 0.31, "grad_norm": 7.367956808779324, "learning_rate": 8.0556460972087e-06, "loss": 1.6376, "step": 2509 }, { "epoch": 0.31, "grad_norm": 8.103539948928766, "learning_rate": 8.054051422831918e-06, "loss": 1.2692, "step": 2510 }, { "epoch": 0.31, "grad_norm": 7.921705645840764, "learning_rate": 8.052456252765744e-06, "loss": 1.3598, "step": 2511 }, { "epoch": 0.31, "grad_norm": 8.721969709465364, "learning_rate": 8.050860587269084e-06, "loss": 1.4784, "step": 2512 }, { "epoch": 0.31, "grad_norm": 6.785878224389878, "learning_rate": 8.049264426600926e-06, "loss": 1.3224, "step": 2513 }, { "epoch": 0.31, "grad_norm": 7.866681897017653, "learning_rate": 8.047667771020335e-06, "loss": 1.6742, "step": 2514 }, { "epoch": 0.31, "grad_norm": 7.610292050675275, "learning_rate": 8.046070620786454e-06, "loss": 1.1149, "step": 2515 }, { "epoch": 0.31, "grad_norm": 6.470028047900308, "learning_rate": 8.044472976158513e-06, "loss": 0.8676, "step": 2516 }, { "epoch": 0.31, "grad_norm": 6.95655214544787, "learning_rate": 8.042874837395816e-06, "loss": 1.8731, "step": 2517 }, { "epoch": 0.31, "grad_norm": 7.269024264776739, "learning_rate": 8.041276204757752e-06, "loss": 1.0699, "step": 2518 }, { "epoch": 0.31, "grad_norm": 9.339836209566501, "learning_rate": 8.039677078503783e-06, "loss": 1.6486, "step": 2519 }, { "epoch": 0.31, "grad_norm": 7.07687644029901, "learning_rate": 8.038077458893463e-06, "loss": 1.5742, "step": 2520 }, { "epoch": 0.31, "grad_norm": 7.026917766941238, "learning_rate": 8.036477346186413e-06, "loss": 1.244, "step": 2521 }, { "epoch": 0.31, "grad_norm": 8.699209550999285, "learning_rate": 8.034876740642344e-06, "loss": 1.2518, "step": 2522 }, { "epoch": 0.31, "grad_norm": 8.472556657562748, "learning_rate": 8.033275642521041e-06, "loss": 1.363, "step": 2523 }, { "epoch": 0.31, "grad_norm": 7.017615987118221, "learning_rate": 8.031674052082372e-06, "loss": 1.327, "step": 2524 }, { "epoch": 0.31, "grad_norm": 7.185095522476147, "learning_rate": 8.030071969586284e-06, "loss": 1.3607, "step": 2525 }, { "epoch": 0.31, "grad_norm": 7.323308806918152, "learning_rate": 8.028469395292805e-06, "loss": 1.5948, "step": 2526 }, { "epoch": 0.31, "grad_norm": 8.304202002292772, "learning_rate": 8.026866329462038e-06, "loss": 1.5657, "step": 2527 }, { "epoch": 0.31, "grad_norm": 8.359063920381613, "learning_rate": 8.025262772354173e-06, "loss": 1.5351, "step": 2528 }, { "epoch": 0.31, "grad_norm": 7.015643512946307, "learning_rate": 8.023658724229474e-06, "loss": 1.0549, "step": 2529 }, { "epoch": 0.31, "grad_norm": 6.783088993585289, "learning_rate": 8.02205418534829e-06, "loss": 1.3528, "step": 2530 }, { "epoch": 0.31, "grad_norm": 8.305040859958448, "learning_rate": 8.02044915597104e-06, "loss": 1.6501, "step": 2531 }, { "epoch": 0.31, "grad_norm": 7.137344377638465, "learning_rate": 8.018843636358236e-06, "loss": 1.325, "step": 2532 }, { "epoch": 0.32, "grad_norm": 7.349684752455537, "learning_rate": 8.01723762677046e-06, "loss": 1.3105, "step": 2533 }, { "epoch": 0.32, "grad_norm": 7.422226708203929, "learning_rate": 8.015631127468377e-06, "loss": 1.6896, "step": 2534 }, { "epoch": 0.32, "grad_norm": 6.259776095401177, "learning_rate": 8.014024138712728e-06, "loss": 1.1138, "step": 2535 }, { "epoch": 0.32, "grad_norm": 7.820327553975295, "learning_rate": 8.01241666076434e-06, "loss": 1.6511, "step": 2536 }, { "epoch": 0.32, "grad_norm": 7.546487427772058, "learning_rate": 8.010808693884112e-06, "loss": 1.4596, "step": 2537 }, { "epoch": 0.32, "grad_norm": 6.176548913308544, "learning_rate": 8.009200238333028e-06, "loss": 1.3588, "step": 2538 }, { "epoch": 0.32, "grad_norm": 7.556252252854697, "learning_rate": 8.007591294372148e-06, "loss": 1.2927, "step": 2539 }, { "epoch": 0.32, "grad_norm": 7.1062552048919905, "learning_rate": 8.005981862262612e-06, "loss": 1.0981, "step": 2540 }, { "epoch": 0.32, "grad_norm": 7.096998097766152, "learning_rate": 8.004371942265644e-06, "loss": 1.0105, "step": 2541 }, { "epoch": 0.32, "grad_norm": 6.13122318800589, "learning_rate": 8.002761534642538e-06, "loss": 0.9393, "step": 2542 }, { "epoch": 0.32, "grad_norm": 6.3848675872554415, "learning_rate": 8.001150639654676e-06, "loss": 1.0635, "step": 2543 }, { "epoch": 0.32, "grad_norm": 7.060187426182775, "learning_rate": 7.99953925756351e-06, "loss": 1.5717, "step": 2544 }, { "epoch": 0.32, "grad_norm": 7.8423421466610375, "learning_rate": 7.99792738863058e-06, "loss": 1.5623, "step": 2545 }, { "epoch": 0.32, "grad_norm": 7.477717047592307, "learning_rate": 7.9963150331175e-06, "loss": 1.4006, "step": 2546 }, { "epoch": 0.32, "grad_norm": 7.203300765331945, "learning_rate": 7.994702191285968e-06, "loss": 1.5591, "step": 2547 }, { "epoch": 0.32, "grad_norm": 7.210987042788678, "learning_rate": 7.99308886339775e-06, "loss": 1.6179, "step": 2548 }, { "epoch": 0.32, "grad_norm": 6.23422145509374, "learning_rate": 7.991475049714703e-06, "loss": 1.2016, "step": 2549 }, { "epoch": 0.32, "grad_norm": 6.4052410086167315, "learning_rate": 7.989860750498757e-06, "loss": 1.3804, "step": 2550 }, { "epoch": 0.32, "grad_norm": 7.074578705288028, "learning_rate": 7.988245966011919e-06, "loss": 1.1696, "step": 2551 }, { "epoch": 0.32, "grad_norm": 6.509075917448096, "learning_rate": 7.986630696516281e-06, "loss": 1.7056, "step": 2552 }, { "epoch": 0.32, "grad_norm": 6.329030793109959, "learning_rate": 7.98501494227401e-06, "loss": 1.2802, "step": 2553 }, { "epoch": 0.32, "grad_norm": 6.802730779058811, "learning_rate": 7.98339870354735e-06, "loss": 1.4048, "step": 2554 }, { "epoch": 0.32, "grad_norm": 7.58951577691133, "learning_rate": 7.981781980598625e-06, "loss": 1.6124, "step": 2555 }, { "epoch": 0.32, "grad_norm": 7.599566275949652, "learning_rate": 7.98016477369024e-06, "loss": 1.4993, "step": 2556 }, { "epoch": 0.32, "grad_norm": 7.01331666655709, "learning_rate": 7.978547083084676e-06, "loss": 1.3789, "step": 2557 }, { "epoch": 0.32, "grad_norm": 6.249605280819974, "learning_rate": 7.976928909044492e-06, "loss": 1.0868, "step": 2558 }, { "epoch": 0.32, "grad_norm": 7.801332589823712, "learning_rate": 7.975310251832328e-06, "loss": 1.6771, "step": 2559 }, { "epoch": 0.32, "grad_norm": 7.451068131668422, "learning_rate": 7.973691111710901e-06, "loss": 1.4541, "step": 2560 }, { "epoch": 0.32, "grad_norm": 7.173840843380781, "learning_rate": 7.972071488943005e-06, "loss": 1.2665, "step": 2561 }, { "epoch": 0.32, "grad_norm": 8.128787018431522, "learning_rate": 7.970451383791515e-06, "loss": 0.915, "step": 2562 }, { "epoch": 0.32, "grad_norm": 6.982745733805105, "learning_rate": 7.968830796519383e-06, "loss": 1.5537, "step": 2563 }, { "epoch": 0.32, "grad_norm": 6.679787612505556, "learning_rate": 7.967209727389635e-06, "loss": 1.5156, "step": 2564 }, { "epoch": 0.32, "grad_norm": 7.1620384443884175, "learning_rate": 7.965588176665387e-06, "loss": 1.3673, "step": 2565 }, { "epoch": 0.32, "grad_norm": 6.713383395656048, "learning_rate": 7.963966144609821e-06, "loss": 1.3061, "step": 2566 }, { "epoch": 0.32, "grad_norm": 7.696739111682263, "learning_rate": 7.962343631486202e-06, "loss": 1.2174, "step": 2567 }, { "epoch": 0.32, "grad_norm": 7.358170013900699, "learning_rate": 7.960720637557875e-06, "loss": 1.0106, "step": 2568 }, { "epoch": 0.32, "grad_norm": 8.031626762909129, "learning_rate": 7.959097163088259e-06, "loss": 1.2445, "step": 2569 }, { "epoch": 0.32, "grad_norm": 6.863814613822814, "learning_rate": 7.957473208340851e-06, "loss": 1.2902, "step": 2570 }, { "epoch": 0.32, "grad_norm": 10.1275054841989, "learning_rate": 7.955848773579232e-06, "loss": 1.4728, "step": 2571 }, { "epoch": 0.32, "grad_norm": 7.325801553911452, "learning_rate": 7.954223859067055e-06, "loss": 1.5122, "step": 2572 }, { "epoch": 0.32, "grad_norm": 6.861979580590757, "learning_rate": 7.95259846506805e-06, "loss": 1.3002, "step": 2573 }, { "epoch": 0.32, "grad_norm": 7.304326270298974, "learning_rate": 7.950972591846031e-06, "loss": 1.6237, "step": 2574 }, { "epoch": 0.32, "grad_norm": 7.697990617904423, "learning_rate": 7.949346239664885e-06, "loss": 1.3649, "step": 2575 }, { "epoch": 0.32, "grad_norm": 7.122465131399863, "learning_rate": 7.947719408788577e-06, "loss": 1.3923, "step": 2576 }, { "epoch": 0.32, "grad_norm": 8.34530303576538, "learning_rate": 7.946092099481153e-06, "loss": 1.2928, "step": 2577 }, { "epoch": 0.32, "grad_norm": 6.21139360227686, "learning_rate": 7.94446431200673e-06, "loss": 1.3081, "step": 2578 }, { "epoch": 0.32, "grad_norm": 7.244455689145474, "learning_rate": 7.942836046629512e-06, "loss": 1.321, "step": 2579 }, { "epoch": 0.32, "grad_norm": 7.467756314926682, "learning_rate": 7.941207303613773e-06, "loss": 1.7659, "step": 2580 }, { "epoch": 0.32, "grad_norm": 7.117830798269684, "learning_rate": 7.939578083223866e-06, "loss": 1.3003, "step": 2581 }, { "epoch": 0.32, "grad_norm": 7.194333776170839, "learning_rate": 7.937948385724224e-06, "loss": 1.2807, "step": 2582 }, { "epoch": 0.32, "grad_norm": 6.685733603143652, "learning_rate": 7.936318211379355e-06, "loss": 1.1489, "step": 2583 }, { "epoch": 0.32, "grad_norm": 6.718847084253685, "learning_rate": 7.934687560453846e-06, "loss": 1.1857, "step": 2584 }, { "epoch": 0.32, "grad_norm": 6.734731065775103, "learning_rate": 7.93305643321236e-06, "loss": 0.967, "step": 2585 }, { "epoch": 0.32, "grad_norm": 7.222776584738374, "learning_rate": 7.931424829919638e-06, "loss": 1.3769, "step": 2586 }, { "epoch": 0.32, "grad_norm": 7.637516414181373, "learning_rate": 7.929792750840499e-06, "loss": 1.2561, "step": 2587 }, { "epoch": 0.32, "grad_norm": 8.073810245192808, "learning_rate": 7.928160196239836e-06, "loss": 1.4873, "step": 2588 }, { "epoch": 0.32, "grad_norm": 6.490242830145552, "learning_rate": 7.926527166382624e-06, "loss": 1.358, "step": 2589 }, { "epoch": 0.32, "grad_norm": 7.848411972215697, "learning_rate": 7.924893661533914e-06, "loss": 1.3936, "step": 2590 }, { "epoch": 0.32, "grad_norm": 6.927787452674781, "learning_rate": 7.923259681958832e-06, "loss": 1.1811, "step": 2591 }, { "epoch": 0.32, "grad_norm": 6.890904217082913, "learning_rate": 7.921625227922579e-06, "loss": 1.5354, "step": 2592 }, { "epoch": 0.32, "grad_norm": 7.488825187625767, "learning_rate": 7.91999029969044e-06, "loss": 1.2128, "step": 2593 }, { "epoch": 0.32, "grad_norm": 6.8830820148137, "learning_rate": 7.918354897527767e-06, "loss": 1.7404, "step": 2594 }, { "epoch": 0.32, "grad_norm": 7.053871006575014, "learning_rate": 7.916719021700002e-06, "loss": 1.4061, "step": 2595 }, { "epoch": 0.32, "grad_norm": 7.6142068103656895, "learning_rate": 7.91508267247265e-06, "loss": 1.5398, "step": 2596 }, { "epoch": 0.32, "grad_norm": 6.44561967065497, "learning_rate": 7.913445850111305e-06, "loss": 1.2654, "step": 2597 }, { "epoch": 0.32, "grad_norm": 8.936808462697062, "learning_rate": 7.911808554881629e-06, "loss": 1.3907, "step": 2598 }, { "epoch": 0.32, "grad_norm": 7.904674938191692, "learning_rate": 7.910170787049366e-06, "loss": 1.7443, "step": 2599 }, { "epoch": 0.32, "grad_norm": 8.849711056847122, "learning_rate": 7.908532546880334e-06, "loss": 1.2614, "step": 2600 }, { "epoch": 0.32, "grad_norm": 6.899287675767817, "learning_rate": 7.906893834640428e-06, "loss": 1.5696, "step": 2601 }, { "epoch": 0.32, "grad_norm": 7.791036608541082, "learning_rate": 7.905254650595618e-06, "loss": 1.5725, "step": 2602 }, { "epoch": 0.32, "grad_norm": 7.517109881477275, "learning_rate": 7.903614995011956e-06, "loss": 1.6594, "step": 2603 }, { "epoch": 0.32, "grad_norm": 7.509684099631672, "learning_rate": 7.90197486815557e-06, "loss": 1.7166, "step": 2604 }, { "epoch": 0.32, "grad_norm": 7.104302375827419, "learning_rate": 7.900334270292653e-06, "loss": 1.201, "step": 2605 }, { "epoch": 0.32, "grad_norm": 6.5864016353940835, "learning_rate": 7.898693201689489e-06, "loss": 1.3954, "step": 2606 }, { "epoch": 0.32, "grad_norm": 6.689897250931552, "learning_rate": 7.897051662612431e-06, "loss": 1.4697, "step": 2607 }, { "epoch": 0.32, "grad_norm": 8.062743891676522, "learning_rate": 7.89540965332791e-06, "loss": 1.513, "step": 2608 }, { "epoch": 0.32, "grad_norm": 6.98732389543067, "learning_rate": 7.893767174102435e-06, "loss": 1.29, "step": 2609 }, { "epoch": 0.32, "grad_norm": 6.725988771424949, "learning_rate": 7.892124225202587e-06, "loss": 1.5747, "step": 2610 }, { "epoch": 0.32, "grad_norm": 6.982525480220032, "learning_rate": 7.890480806895024e-06, "loss": 1.4582, "step": 2611 }, { "epoch": 0.32, "grad_norm": 6.964905691751929, "learning_rate": 7.888836919446485e-06, "loss": 1.7654, "step": 2612 }, { "epoch": 0.33, "grad_norm": 6.732409223077584, "learning_rate": 7.887192563123783e-06, "loss": 1.5151, "step": 2613 }, { "epoch": 0.33, "grad_norm": 7.289659091621633, "learning_rate": 7.885547738193804e-06, "loss": 1.1952, "step": 2614 }, { "epoch": 0.33, "grad_norm": 7.020532060819885, "learning_rate": 7.883902444923513e-06, "loss": 1.5716, "step": 2615 }, { "epoch": 0.33, "grad_norm": 7.665975733096022, "learning_rate": 7.882256683579948e-06, "loss": 1.3567, "step": 2616 }, { "epoch": 0.33, "grad_norm": 7.047968460874172, "learning_rate": 7.880610454430226e-06, "loss": 1.3781, "step": 2617 }, { "epoch": 0.33, "grad_norm": 6.828944785846057, "learning_rate": 7.878963757741542e-06, "loss": 1.3574, "step": 2618 }, { "epoch": 0.33, "grad_norm": 7.877405825779654, "learning_rate": 7.87731659378116e-06, "loss": 1.403, "step": 2619 }, { "epoch": 0.33, "grad_norm": 8.086784056483676, "learning_rate": 7.875668962816426e-06, "loss": 1.4703, "step": 2620 }, { "epoch": 0.33, "grad_norm": 6.528363383670398, "learning_rate": 7.87402086511476e-06, "loss": 0.9406, "step": 2621 }, { "epoch": 0.33, "grad_norm": 6.855491483445316, "learning_rate": 7.872372300943657e-06, "loss": 1.3295, "step": 2622 }, { "epoch": 0.33, "grad_norm": 8.082412971429124, "learning_rate": 7.870723270570686e-06, "loss": 1.6406, "step": 2623 }, { "epoch": 0.33, "grad_norm": 8.239170518996007, "learning_rate": 7.869073774263496e-06, "loss": 1.331, "step": 2624 }, { "epoch": 0.33, "grad_norm": 7.988930192595583, "learning_rate": 7.867423812289809e-06, "loss": 1.5416, "step": 2625 }, { "epoch": 0.33, "grad_norm": 7.328359605145863, "learning_rate": 7.86577338491742e-06, "loss": 1.2393, "step": 2626 }, { "epoch": 0.33, "grad_norm": 6.835515556723196, "learning_rate": 7.864122492414208e-06, "loss": 1.4935, "step": 2627 }, { "epoch": 0.33, "grad_norm": 7.466119979352822, "learning_rate": 7.86247113504812e-06, "loss": 1.7723, "step": 2628 }, { "epoch": 0.33, "grad_norm": 6.087754572670938, "learning_rate": 7.860819313087177e-06, "loss": 1.3202, "step": 2629 }, { "epoch": 0.33, "grad_norm": 7.712008745737667, "learning_rate": 7.859167026799481e-06, "loss": 1.8255, "step": 2630 }, { "epoch": 0.33, "grad_norm": 7.073346189224526, "learning_rate": 7.857514276453208e-06, "loss": 1.2781, "step": 2631 }, { "epoch": 0.33, "grad_norm": 7.41386786593447, "learning_rate": 7.855861062316609e-06, "loss": 1.6303, "step": 2632 }, { "epoch": 0.33, "grad_norm": 8.305719802712614, "learning_rate": 7.854207384658008e-06, "loss": 1.9142, "step": 2633 }, { "epoch": 0.33, "grad_norm": 6.532033098722959, "learning_rate": 7.852553243745806e-06, "loss": 1.4037, "step": 2634 }, { "epoch": 0.33, "grad_norm": 6.753522709032215, "learning_rate": 7.850898639848481e-06, "loss": 1.0233, "step": 2635 }, { "epoch": 0.33, "grad_norm": 8.878391842003552, "learning_rate": 7.849243573234582e-06, "loss": 1.3469, "step": 2636 }, { "epoch": 0.33, "grad_norm": 6.844920310188255, "learning_rate": 7.847588044172737e-06, "loss": 1.3545, "step": 2637 }, { "epoch": 0.33, "grad_norm": 7.2357423284194775, "learning_rate": 7.845932052931648e-06, "loss": 1.5227, "step": 2638 }, { "epoch": 0.33, "grad_norm": 7.042635326919785, "learning_rate": 7.84427559978009e-06, "loss": 1.5235, "step": 2639 }, { "epoch": 0.33, "grad_norm": 6.906986669401349, "learning_rate": 7.842618684986916e-06, "loss": 1.3883, "step": 2640 }, { "epoch": 0.33, "grad_norm": 7.211692307223672, "learning_rate": 7.840961308821052e-06, "loss": 1.2773, "step": 2641 }, { "epoch": 0.33, "grad_norm": 7.529191876000421, "learning_rate": 7.839303471551496e-06, "loss": 1.4454, "step": 2642 }, { "epoch": 0.33, "grad_norm": 7.054881029254966, "learning_rate": 7.837645173447329e-06, "loss": 1.6898, "step": 2643 }, { "epoch": 0.33, "grad_norm": 6.333938631175128, "learning_rate": 7.835986414777698e-06, "loss": 1.0162, "step": 2644 }, { "epoch": 0.33, "grad_norm": 6.785068508299839, "learning_rate": 7.834327195811833e-06, "loss": 1.4929, "step": 2645 }, { "epoch": 0.33, "grad_norm": 7.369592544085812, "learning_rate": 7.832667516819032e-06, "loss": 1.2913, "step": 2646 }, { "epoch": 0.33, "grad_norm": 7.0712664955126705, "learning_rate": 7.83100737806867e-06, "loss": 1.3261, "step": 2647 }, { "epoch": 0.33, "grad_norm": 7.018054912550773, "learning_rate": 7.829346779830194e-06, "loss": 1.5553, "step": 2648 }, { "epoch": 0.33, "grad_norm": 8.150542006578164, "learning_rate": 7.827685722373133e-06, "loss": 1.3532, "step": 2649 }, { "epoch": 0.33, "grad_norm": 6.8006706212644525, "learning_rate": 7.826024205967084e-06, "loss": 1.2228, "step": 2650 }, { "epoch": 0.33, "grad_norm": 8.959732034653866, "learning_rate": 7.824362230881719e-06, "loss": 1.6745, "step": 2651 }, { "epoch": 0.33, "grad_norm": 7.611326908615317, "learning_rate": 7.822699797386789e-06, "loss": 1.6622, "step": 2652 }, { "epoch": 0.33, "grad_norm": 7.3836051609593145, "learning_rate": 7.82103690575211e-06, "loss": 1.3379, "step": 2653 }, { "epoch": 0.33, "grad_norm": 6.804543177269342, "learning_rate": 7.819373556247585e-06, "loss": 1.5724, "step": 2654 }, { "epoch": 0.33, "grad_norm": 6.523707298429854, "learning_rate": 7.81770974914318e-06, "loss": 1.2132, "step": 2655 }, { "epoch": 0.33, "grad_norm": 6.220303005271411, "learning_rate": 7.816045484708945e-06, "loss": 1.1992, "step": 2656 }, { "epoch": 0.33, "grad_norm": 7.239272944187342, "learning_rate": 7.814380763214996e-06, "loss": 1.6709, "step": 2657 }, { "epoch": 0.33, "grad_norm": 6.398273719267144, "learning_rate": 7.812715584931527e-06, "loss": 0.8805, "step": 2658 }, { "epoch": 0.33, "grad_norm": 7.978570699558915, "learning_rate": 7.811049950128804e-06, "loss": 1.4811, "step": 2659 }, { "epoch": 0.33, "grad_norm": 6.414502732637083, "learning_rate": 7.809383859077172e-06, "loss": 1.4928, "step": 2660 }, { "epoch": 0.33, "grad_norm": 6.58630725300756, "learning_rate": 7.807717312047044e-06, "loss": 1.3168, "step": 2661 }, { "epoch": 0.33, "grad_norm": 5.706343698744604, "learning_rate": 7.806050309308912e-06, "loss": 1.3839, "step": 2662 }, { "epoch": 0.33, "grad_norm": 7.777579723464679, "learning_rate": 7.804382851133336e-06, "loss": 1.5363, "step": 2663 }, { "epoch": 0.33, "grad_norm": 6.934919636402274, "learning_rate": 7.80271493779096e-06, "loss": 1.4707, "step": 2664 }, { "epoch": 0.33, "grad_norm": 7.095900794799495, "learning_rate": 7.801046569552488e-06, "loss": 1.4207, "step": 2665 }, { "epoch": 0.33, "grad_norm": 8.821523883199726, "learning_rate": 7.799377746688712e-06, "loss": 1.4171, "step": 2666 }, { "epoch": 0.33, "grad_norm": 6.900010385961677, "learning_rate": 7.797708469470488e-06, "loss": 1.3414, "step": 2667 }, { "epoch": 0.33, "grad_norm": 6.698479468502642, "learning_rate": 7.79603873816875e-06, "loss": 1.2829, "step": 2668 }, { "epoch": 0.33, "grad_norm": 6.106295632624861, "learning_rate": 7.794368553054502e-06, "loss": 1.1379, "step": 2669 }, { "epoch": 0.33, "grad_norm": 6.787681855525483, "learning_rate": 7.792697914398825e-06, "loss": 1.1976, "step": 2670 }, { "epoch": 0.33, "grad_norm": 7.477475433813898, "learning_rate": 7.791026822472876e-06, "loss": 1.2813, "step": 2671 }, { "epoch": 0.33, "grad_norm": 7.135992389168859, "learning_rate": 7.78935527754788e-06, "loss": 1.2271, "step": 2672 }, { "epoch": 0.33, "grad_norm": 7.005066565253685, "learning_rate": 7.78768327989514e-06, "loss": 1.2928, "step": 2673 }, { "epoch": 0.33, "grad_norm": 7.953206668403036, "learning_rate": 7.786010829786025e-06, "loss": 1.6016, "step": 2674 }, { "epoch": 0.33, "grad_norm": 6.753229120755535, "learning_rate": 7.784337927491989e-06, "loss": 1.4443, "step": 2675 }, { "epoch": 0.33, "grad_norm": 5.865491638310411, "learning_rate": 7.78266457328455e-06, "loss": 1.0645, "step": 2676 }, { "epoch": 0.33, "grad_norm": 6.999403442414741, "learning_rate": 7.780990767435304e-06, "loss": 1.2071, "step": 2677 }, { "epoch": 0.33, "grad_norm": 6.568207752455333, "learning_rate": 7.779316510215919e-06, "loss": 1.2918, "step": 2678 }, { "epoch": 0.33, "grad_norm": 7.691269683831185, "learning_rate": 7.777641801898135e-06, "loss": 1.6437, "step": 2679 }, { "epoch": 0.33, "grad_norm": 7.527096151569531, "learning_rate": 7.775966642753768e-06, "loss": 1.477, "step": 2680 }, { "epoch": 0.33, "grad_norm": 8.879215280920517, "learning_rate": 7.774291033054703e-06, "loss": 1.6357, "step": 2681 }, { "epoch": 0.33, "grad_norm": 8.112570418943134, "learning_rate": 7.772614973072904e-06, "loss": 1.6742, "step": 2682 }, { "epoch": 0.33, "grad_norm": 7.569307630047219, "learning_rate": 7.770938463080401e-06, "loss": 1.4137, "step": 2683 }, { "epoch": 0.33, "grad_norm": 6.34937015325365, "learning_rate": 7.769261503349304e-06, "loss": 1.17, "step": 2684 }, { "epoch": 0.33, "grad_norm": 8.050668652728124, "learning_rate": 7.767584094151793e-06, "loss": 1.6019, "step": 2685 }, { "epoch": 0.33, "grad_norm": 7.406188092209524, "learning_rate": 7.76590623576012e-06, "loss": 1.5941, "step": 2686 }, { "epoch": 0.33, "grad_norm": 7.508112442283202, "learning_rate": 7.764227928446606e-06, "loss": 1.5292, "step": 2687 }, { "epoch": 0.33, "grad_norm": 7.07759278003364, "learning_rate": 7.762549172483657e-06, "loss": 1.372, "step": 2688 }, { "epoch": 0.33, "grad_norm": 6.316237885771164, "learning_rate": 7.76086996814374e-06, "loss": 1.2199, "step": 2689 }, { "epoch": 0.33, "grad_norm": 6.672416386505196, "learning_rate": 7.759190315699401e-06, "loss": 1.443, "step": 2690 }, { "epoch": 0.33, "grad_norm": 7.667137262647111, "learning_rate": 7.757510215423256e-06, "loss": 1.5373, "step": 2691 }, { "epoch": 0.33, "grad_norm": 6.817251167648101, "learning_rate": 7.755829667587993e-06, "loss": 1.5986, "step": 2692 }, { "epoch": 0.33, "grad_norm": 6.989237526542252, "learning_rate": 7.754148672466378e-06, "loss": 1.4184, "step": 2693 }, { "epoch": 0.34, "grad_norm": 6.236841732290855, "learning_rate": 7.75246723033124e-06, "loss": 1.5817, "step": 2694 }, { "epoch": 0.34, "grad_norm": 6.667664789305106, "learning_rate": 7.750785341455494e-06, "loss": 1.1417, "step": 2695 }, { "epoch": 0.34, "grad_norm": 7.579615071331574, "learning_rate": 7.749103006112113e-06, "loss": 1.5172, "step": 2696 }, { "epoch": 0.34, "grad_norm": 6.158657785688025, "learning_rate": 7.747420224574153e-06, "loss": 1.1741, "step": 2697 }, { "epoch": 0.34, "grad_norm": 6.376641551075682, "learning_rate": 7.745736997114737e-06, "loss": 1.2577, "step": 2698 }, { "epoch": 0.34, "grad_norm": 5.883484862507982, "learning_rate": 7.744053324007064e-06, "loss": 1.6284, "step": 2699 }, { "epoch": 0.34, "grad_norm": 6.858251698958605, "learning_rate": 7.742369205524402e-06, "loss": 1.2319, "step": 2700 }, { "epoch": 0.34, "grad_norm": 8.630899808855453, "learning_rate": 7.740684641940092e-06, "loss": 1.3506, "step": 2701 }, { "epoch": 0.34, "grad_norm": 7.413641556061818, "learning_rate": 7.738999633527548e-06, "loss": 1.3925, "step": 2702 }, { "epoch": 0.34, "grad_norm": 7.900788476035647, "learning_rate": 7.73731418056026e-06, "loss": 1.726, "step": 2703 }, { "epoch": 0.34, "grad_norm": 6.81060669067473, "learning_rate": 7.735628283311783e-06, "loss": 1.5294, "step": 2704 }, { "epoch": 0.34, "grad_norm": 7.972904342827301, "learning_rate": 7.733941942055745e-06, "loss": 1.5121, "step": 2705 }, { "epoch": 0.34, "grad_norm": 6.598719780435199, "learning_rate": 7.732255157065854e-06, "loss": 0.9139, "step": 2706 }, { "epoch": 0.34, "grad_norm": 6.747074096351215, "learning_rate": 7.730567928615881e-06, "loss": 1.2822, "step": 2707 }, { "epoch": 0.34, "grad_norm": 6.83027784129503, "learning_rate": 7.728880256979674e-06, "loss": 1.4143, "step": 2708 }, { "epoch": 0.34, "grad_norm": 6.863357409549864, "learning_rate": 7.727192142431149e-06, "loss": 0.9736, "step": 2709 }, { "epoch": 0.34, "grad_norm": 6.7891839954806645, "learning_rate": 7.7255035852443e-06, "loss": 1.1241, "step": 2710 }, { "epoch": 0.34, "grad_norm": 6.483185074561224, "learning_rate": 7.723814585693185e-06, "loss": 1.5585, "step": 2711 }, { "epoch": 0.34, "grad_norm": 8.292341775662386, "learning_rate": 7.722125144051942e-06, "loss": 1.5788, "step": 2712 }, { "epoch": 0.34, "grad_norm": 7.068328299348067, "learning_rate": 7.720435260594774e-06, "loss": 1.4734, "step": 2713 }, { "epoch": 0.34, "grad_norm": 6.161031600169325, "learning_rate": 7.71874493559596e-06, "loss": 1.0369, "step": 2714 }, { "epoch": 0.34, "grad_norm": 7.707882040732547, "learning_rate": 7.717054169329847e-06, "loss": 1.4768, "step": 2715 }, { "epoch": 0.34, "grad_norm": 6.536189513217759, "learning_rate": 7.715362962070857e-06, "loss": 1.3065, "step": 2716 }, { "epoch": 0.34, "grad_norm": 7.29853021392633, "learning_rate": 7.713671314093482e-06, "loss": 1.1754, "step": 2717 }, { "epoch": 0.34, "grad_norm": 7.390629108068784, "learning_rate": 7.711979225672289e-06, "loss": 1.3744, "step": 2718 }, { "epoch": 0.34, "grad_norm": 7.113849273175235, "learning_rate": 7.710286697081906e-06, "loss": 1.4662, "step": 2719 }, { "epoch": 0.34, "grad_norm": 7.103753212204974, "learning_rate": 7.708593728597047e-06, "loss": 1.3483, "step": 2720 }, { "epoch": 0.34, "grad_norm": 7.046430510127641, "learning_rate": 7.706900320492487e-06, "loss": 1.2744, "step": 2721 }, { "epoch": 0.34, "grad_norm": 6.815093347448948, "learning_rate": 7.705206473043075e-06, "loss": 1.5134, "step": 2722 }, { "epoch": 0.34, "grad_norm": 7.7627191044404675, "learning_rate": 7.703512186523732e-06, "loss": 1.3815, "step": 2723 }, { "epoch": 0.34, "grad_norm": 7.520691216074663, "learning_rate": 7.701817461209451e-06, "loss": 1.3374, "step": 2724 }, { "epoch": 0.34, "grad_norm": 7.955668550927575, "learning_rate": 7.700122297375293e-06, "loss": 1.2561, "step": 2725 }, { "epoch": 0.34, "grad_norm": 6.88409445550422, "learning_rate": 7.698426695296397e-06, "loss": 1.3686, "step": 2726 }, { "epoch": 0.34, "grad_norm": 6.857714761434239, "learning_rate": 7.696730655247963e-06, "loss": 1.2577, "step": 2727 }, { "epoch": 0.34, "grad_norm": 7.283504631685963, "learning_rate": 7.695034177505271e-06, "loss": 1.5354, "step": 2728 }, { "epoch": 0.34, "grad_norm": 7.644042935566368, "learning_rate": 7.69333726234367e-06, "loss": 1.4865, "step": 2729 }, { "epoch": 0.34, "grad_norm": 7.1338522823963, "learning_rate": 7.691639910038576e-06, "loss": 1.5494, "step": 2730 }, { "epoch": 0.34, "grad_norm": 6.864623867861465, "learning_rate": 7.68994212086548e-06, "loss": 1.3155, "step": 2731 }, { "epoch": 0.34, "grad_norm": 7.108301467763963, "learning_rate": 7.68824389509994e-06, "loss": 1.1367, "step": 2732 }, { "epoch": 0.34, "grad_norm": 6.960857011144508, "learning_rate": 7.686545233017591e-06, "loss": 1.1781, "step": 2733 }, { "epoch": 0.34, "grad_norm": 7.016381593750199, "learning_rate": 7.684846134894133e-06, "loss": 1.3712, "step": 2734 }, { "epoch": 0.34, "grad_norm": 6.500148294610393, "learning_rate": 7.68314660100534e-06, "loss": 1.1809, "step": 2735 }, { "epoch": 0.34, "grad_norm": 7.434466806030121, "learning_rate": 7.681446631627054e-06, "loss": 1.2353, "step": 2736 }, { "epoch": 0.34, "grad_norm": 6.041972867779719, "learning_rate": 7.679746227035192e-06, "loss": 1.1212, "step": 2737 }, { "epoch": 0.34, "grad_norm": 6.622579743706434, "learning_rate": 7.678045387505737e-06, "loss": 1.4278, "step": 2738 }, { "epoch": 0.34, "grad_norm": 7.70321765419264, "learning_rate": 7.676344113314746e-06, "loss": 1.3366, "step": 2739 }, { "epoch": 0.34, "grad_norm": 7.288327241340635, "learning_rate": 7.674642404738344e-06, "loss": 1.2819, "step": 2740 }, { "epoch": 0.34, "grad_norm": 7.564818963439315, "learning_rate": 7.67294026205273e-06, "loss": 1.2158, "step": 2741 }, { "epoch": 0.34, "grad_norm": 8.171896471699464, "learning_rate": 7.671237685534167e-06, "loss": 1.4145, "step": 2742 }, { "epoch": 0.34, "grad_norm": 6.860301661896185, "learning_rate": 7.669534675458997e-06, "loss": 1.002, "step": 2743 }, { "epoch": 0.34, "grad_norm": 6.870032238886595, "learning_rate": 7.667831232103624e-06, "loss": 1.3651, "step": 2744 }, { "epoch": 0.34, "grad_norm": 8.368424031802283, "learning_rate": 7.666127355744529e-06, "loss": 1.5282, "step": 2745 }, { "epoch": 0.34, "grad_norm": 8.765190077106535, "learning_rate": 7.66442304665826e-06, "loss": 1.496, "step": 2746 }, { "epoch": 0.34, "grad_norm": 7.094231511359037, "learning_rate": 7.662718305121437e-06, "loss": 1.2377, "step": 2747 }, { "epoch": 0.34, "grad_norm": 8.722587318064855, "learning_rate": 7.661013131410745e-06, "loss": 1.8388, "step": 2748 }, { "epoch": 0.34, "grad_norm": 7.378511211922922, "learning_rate": 7.659307525802946e-06, "loss": 1.0917, "step": 2749 }, { "epoch": 0.34, "grad_norm": 7.126042356547347, "learning_rate": 7.657601488574869e-06, "loss": 1.5583, "step": 2750 }, { "epoch": 0.34, "grad_norm": 7.2190519629346594, "learning_rate": 7.655895020003412e-06, "loss": 1.0322, "step": 2751 }, { "epoch": 0.34, "grad_norm": 9.094262121850788, "learning_rate": 7.654188120365546e-06, "loss": 1.502, "step": 2752 }, { "epoch": 0.34, "grad_norm": 6.772028239409124, "learning_rate": 7.65248078993831e-06, "loss": 1.2319, "step": 2753 }, { "epoch": 0.34, "grad_norm": 7.675340091602095, "learning_rate": 7.650773028998812e-06, "loss": 1.5103, "step": 2754 }, { "epoch": 0.34, "grad_norm": 8.466806515812719, "learning_rate": 7.649064837824231e-06, "loss": 1.0974, "step": 2755 }, { "epoch": 0.34, "grad_norm": 6.148687056511344, "learning_rate": 7.64735621669182e-06, "loss": 1.1063, "step": 2756 }, { "epoch": 0.34, "grad_norm": 6.796472352224985, "learning_rate": 7.645647165878889e-06, "loss": 1.2416, "step": 2757 }, { "epoch": 0.34, "grad_norm": 7.99920606698819, "learning_rate": 7.643937685662835e-06, "loss": 1.4884, "step": 2758 }, { "epoch": 0.34, "grad_norm": 6.489146304345211, "learning_rate": 7.64222777632111e-06, "loss": 1.3616, "step": 2759 }, { "epoch": 0.34, "grad_norm": 6.886492759441835, "learning_rate": 7.640517438131245e-06, "loss": 1.3697, "step": 2760 }, { "epoch": 0.34, "grad_norm": 7.428005796152919, "learning_rate": 7.638806671370837e-06, "loss": 1.7741, "step": 2761 }, { "epoch": 0.34, "grad_norm": 9.751324026491588, "learning_rate": 7.637095476317553e-06, "loss": 1.3233, "step": 2762 }, { "epoch": 0.34, "grad_norm": 6.735850450132658, "learning_rate": 7.63538385324913e-06, "loss": 1.2623, "step": 2763 }, { "epoch": 0.34, "grad_norm": 6.599962702889029, "learning_rate": 7.63367180244337e-06, "loss": 1.5401, "step": 2764 }, { "epoch": 0.34, "grad_norm": 6.337814864836164, "learning_rate": 7.631959324178152e-06, "loss": 1.3025, "step": 2765 }, { "epoch": 0.34, "grad_norm": 6.5675017662139314, "learning_rate": 7.63024641873142e-06, "loss": 1.0806, "step": 2766 }, { "epoch": 0.34, "grad_norm": 6.584958517902559, "learning_rate": 7.628533086381187e-06, "loss": 1.322, "step": 2767 }, { "epoch": 0.34, "grad_norm": 6.1781024035082694, "learning_rate": 7.626819327405537e-06, "loss": 1.5464, "step": 2768 }, { "epoch": 0.34, "grad_norm": 7.540589920048338, "learning_rate": 7.6251051420826224e-06, "loss": 1.3948, "step": 2769 }, { "epoch": 0.34, "grad_norm": 6.582653102326048, "learning_rate": 7.623390530690666e-06, "loss": 1.5344, "step": 2770 }, { "epoch": 0.34, "grad_norm": 7.231056364079571, "learning_rate": 7.621675493507957e-06, "loss": 1.5622, "step": 2771 }, { "epoch": 0.34, "grad_norm": 6.687754855438709, "learning_rate": 7.619960030812856e-06, "loss": 1.1653, "step": 2772 }, { "epoch": 0.34, "grad_norm": 6.779612598896735, "learning_rate": 7.618244142883793e-06, "loss": 1.1011, "step": 2773 }, { "epoch": 0.35, "grad_norm": 6.83794425756808, "learning_rate": 7.616527829999264e-06, "loss": 1.4985, "step": 2774 }, { "epoch": 0.35, "grad_norm": 6.5934049277829025, "learning_rate": 7.614811092437837e-06, "loss": 1.2831, "step": 2775 }, { "epoch": 0.35, "grad_norm": 7.406634337609798, "learning_rate": 7.613093930478148e-06, "loss": 1.4027, "step": 2776 }, { "epoch": 0.35, "grad_norm": 7.169508293148135, "learning_rate": 7.6113763443989045e-06, "loss": 1.2715, "step": 2777 }, { "epoch": 0.35, "grad_norm": 7.854633354412625, "learning_rate": 7.609658334478878e-06, "loss": 1.4047, "step": 2778 }, { "epoch": 0.35, "grad_norm": 7.152929092270405, "learning_rate": 7.607939900996908e-06, "loss": 1.3737, "step": 2779 }, { "epoch": 0.35, "grad_norm": 7.103139071598258, "learning_rate": 7.60622104423191e-06, "loss": 1.2875, "step": 2780 }, { "epoch": 0.35, "grad_norm": 6.826257807263852, "learning_rate": 7.604501764462862e-06, "loss": 1.066, "step": 2781 }, { "epoch": 0.35, "grad_norm": 6.950203390798716, "learning_rate": 7.602782061968812e-06, "loss": 1.6696, "step": 2782 }, { "epoch": 0.35, "grad_norm": 7.723052794671285, "learning_rate": 7.601061937028881e-06, "loss": 1.2311, "step": 2783 }, { "epoch": 0.35, "grad_norm": 7.339592513864543, "learning_rate": 7.59934138992225e-06, "loss": 1.4514, "step": 2784 }, { "epoch": 0.35, "grad_norm": 7.983182598473377, "learning_rate": 7.597620420928176e-06, "loss": 1.3828, "step": 2785 }, { "epoch": 0.35, "grad_norm": 6.9915093015543075, "learning_rate": 7.59589903032598e-06, "loss": 1.2978, "step": 2786 }, { "epoch": 0.35, "grad_norm": 7.231454739992533, "learning_rate": 7.5941772183950555e-06, "loss": 1.2442, "step": 2787 }, { "epoch": 0.35, "grad_norm": 7.663924354484735, "learning_rate": 7.592454985414861e-06, "loss": 1.4862, "step": 2788 }, { "epoch": 0.35, "grad_norm": 5.927085274931894, "learning_rate": 7.590732331664923e-06, "loss": 1.0594, "step": 2789 }, { "epoch": 0.35, "grad_norm": 6.987206189769354, "learning_rate": 7.58900925742484e-06, "loss": 1.2324, "step": 2790 }, { "epoch": 0.35, "grad_norm": 6.7947604566356326, "learning_rate": 7.587285762974275e-06, "loss": 1.3783, "step": 2791 }, { "epoch": 0.35, "grad_norm": 8.124020633959326, "learning_rate": 7.585561848592959e-06, "loss": 1.455, "step": 2792 }, { "epoch": 0.35, "grad_norm": 7.1195413443186, "learning_rate": 7.5838375145606965e-06, "loss": 1.7429, "step": 2793 }, { "epoch": 0.35, "grad_norm": 7.951976957851983, "learning_rate": 7.5821127611573565e-06, "loss": 1.2831, "step": 2794 }, { "epoch": 0.35, "grad_norm": 6.981346561209597, "learning_rate": 7.580387588662872e-06, "loss": 1.2901, "step": 2795 }, { "epoch": 0.35, "grad_norm": 6.5512724947717, "learning_rate": 7.5786619973572515e-06, "loss": 1.0614, "step": 2796 }, { "epoch": 0.35, "grad_norm": 7.838774220985667, "learning_rate": 7.576935987520566e-06, "loss": 1.4863, "step": 2797 }, { "epoch": 0.35, "grad_norm": 8.249134419703957, "learning_rate": 7.575209559432959e-06, "loss": 1.5687, "step": 2798 }, { "epoch": 0.35, "grad_norm": 7.74170598739493, "learning_rate": 7.573482713374635e-06, "loss": 1.5059, "step": 2799 }, { "epoch": 0.35, "grad_norm": 7.772492786204046, "learning_rate": 7.571755449625872e-06, "loss": 1.6667, "step": 2800 }, { "epoch": 0.35, "grad_norm": 7.5347585811614, "learning_rate": 7.570027768467019e-06, "loss": 1.2705, "step": 2801 }, { "epoch": 0.35, "grad_norm": 6.630898700195627, "learning_rate": 7.568299670178484e-06, "loss": 1.5867, "step": 2802 }, { "epoch": 0.35, "grad_norm": 6.7324658676161455, "learning_rate": 7.566571155040746e-06, "loss": 1.085, "step": 2803 }, { "epoch": 0.35, "grad_norm": 8.572248270070242, "learning_rate": 7.5648422233343564e-06, "loss": 1.2999, "step": 2804 }, { "epoch": 0.35, "grad_norm": 7.286983852452228, "learning_rate": 7.563112875339926e-06, "loss": 1.3414, "step": 2805 }, { "epoch": 0.35, "grad_norm": 6.5710592621311035, "learning_rate": 7.561383111338139e-06, "loss": 1.0789, "step": 2806 }, { "epoch": 0.35, "grad_norm": 6.806207977300851, "learning_rate": 7.559652931609749e-06, "loss": 1.3492, "step": 2807 }, { "epoch": 0.35, "grad_norm": 7.130442991017998, "learning_rate": 7.557922336435568e-06, "loss": 1.6603, "step": 2808 }, { "epoch": 0.35, "grad_norm": 6.8950348647328275, "learning_rate": 7.556191326096486e-06, "loss": 1.1478, "step": 2809 }, { "epoch": 0.35, "grad_norm": 7.474487672689925, "learning_rate": 7.5544599008734545e-06, "loss": 1.1491, "step": 2810 }, { "epoch": 0.35, "grad_norm": 7.16844932604981, "learning_rate": 7.552728061047492e-06, "loss": 1.5058, "step": 2811 }, { "epoch": 0.35, "grad_norm": 6.5647618552050995, "learning_rate": 7.550995806899683e-06, "loss": 1.3363, "step": 2812 }, { "epoch": 0.35, "grad_norm": 7.145911971038645, "learning_rate": 7.549263138711189e-06, "loss": 1.2991, "step": 2813 }, { "epoch": 0.35, "grad_norm": 7.660667634214868, "learning_rate": 7.547530056763226e-06, "loss": 1.3795, "step": 2814 }, { "epoch": 0.35, "grad_norm": 7.865441685915476, "learning_rate": 7.545796561337084e-06, "loss": 1.4288, "step": 2815 }, { "epoch": 0.35, "grad_norm": 7.2933013989586435, "learning_rate": 7.544062652714119e-06, "loss": 1.3805, "step": 2816 }, { "epoch": 0.35, "grad_norm": 7.115698938937411, "learning_rate": 7.542328331175755e-06, "loss": 1.276, "step": 2817 }, { "epoch": 0.35, "grad_norm": 6.9533797361983005, "learning_rate": 7.54059359700348e-06, "loss": 1.062, "step": 2818 }, { "epoch": 0.35, "grad_norm": 9.153677778590302, "learning_rate": 7.5388584504788555e-06, "loss": 1.1979, "step": 2819 }, { "epoch": 0.35, "grad_norm": 6.864967536152041, "learning_rate": 7.537122891883499e-06, "loss": 1.1394, "step": 2820 }, { "epoch": 0.35, "grad_norm": 7.353282333046606, "learning_rate": 7.535386921499106e-06, "loss": 1.1167, "step": 2821 }, { "epoch": 0.35, "grad_norm": 8.261793661648507, "learning_rate": 7.533650539607431e-06, "loss": 1.5132, "step": 2822 }, { "epoch": 0.35, "grad_norm": 7.80650059348563, "learning_rate": 7.5319137464902994e-06, "loss": 1.5201, "step": 2823 }, { "epoch": 0.35, "grad_norm": 7.344374587855901, "learning_rate": 7.530176542429602e-06, "loss": 1.3937, "step": 2824 }, { "epoch": 0.35, "grad_norm": 7.487714405540302, "learning_rate": 7.528438927707298e-06, "loss": 1.1649, "step": 2825 }, { "epoch": 0.35, "grad_norm": 6.925536383467162, "learning_rate": 7.52670090260541e-06, "loss": 1.6572, "step": 2826 }, { "epoch": 0.35, "grad_norm": 8.002057100276271, "learning_rate": 7.5249624674060315e-06, "loss": 1.2729, "step": 2827 }, { "epoch": 0.35, "grad_norm": 7.472767291276312, "learning_rate": 7.523223622391318e-06, "loss": 1.5416, "step": 2828 }, { "epoch": 0.35, "grad_norm": 6.767828584128604, "learning_rate": 7.5214843678434945e-06, "loss": 1.3989, "step": 2829 }, { "epoch": 0.35, "grad_norm": 7.59493745306087, "learning_rate": 7.519744704044851e-06, "loss": 1.5469, "step": 2830 }, { "epoch": 0.35, "grad_norm": 6.527474187845437, "learning_rate": 7.5180046312777466e-06, "loss": 1.4313, "step": 2831 }, { "epoch": 0.35, "grad_norm": 7.563446784360207, "learning_rate": 7.5162641498246e-06, "loss": 1.4009, "step": 2832 }, { "epoch": 0.35, "grad_norm": 8.186330213200536, "learning_rate": 7.514523259967905e-06, "loss": 1.3547, "step": 2833 }, { "epoch": 0.35, "grad_norm": 7.341528802182603, "learning_rate": 7.5127819619902175e-06, "loss": 1.3969, "step": 2834 }, { "epoch": 0.35, "grad_norm": 6.78337317516621, "learning_rate": 7.511040256174159e-06, "loss": 1.5131, "step": 2835 }, { "epoch": 0.35, "grad_norm": 6.57932887044608, "learning_rate": 7.509298142802417e-06, "loss": 1.3863, "step": 2836 }, { "epoch": 0.35, "grad_norm": 6.656677026742815, "learning_rate": 7.507555622157747e-06, "loss": 1.3747, "step": 2837 }, { "epoch": 0.35, "grad_norm": 6.643099511363714, "learning_rate": 7.505812694522969e-06, "loss": 1.5682, "step": 2838 }, { "epoch": 0.35, "grad_norm": 5.960092643576386, "learning_rate": 7.504069360180971e-06, "loss": 1.1448, "step": 2839 }, { "epoch": 0.35, "grad_norm": 9.44675120131409, "learning_rate": 7.502325619414706e-06, "loss": 1.2375, "step": 2840 }, { "epoch": 0.35, "grad_norm": 7.894498714667152, "learning_rate": 7.50058147250719e-06, "loss": 1.4314, "step": 2841 }, { "epoch": 0.35, "grad_norm": 7.464825109549149, "learning_rate": 7.49883691974151e-06, "loss": 1.6064, "step": 2842 }, { "epoch": 0.35, "grad_norm": 7.09598734690476, "learning_rate": 7.497091961400817e-06, "loss": 1.284, "step": 2843 }, { "epoch": 0.35, "grad_norm": 6.120234382294994, "learning_rate": 7.495346597768327e-06, "loss": 1.2223, "step": 2844 }, { "epoch": 0.35, "grad_norm": 6.428654618747203, "learning_rate": 7.493600829127321e-06, "loss": 1.2394, "step": 2845 }, { "epoch": 0.35, "grad_norm": 6.182867425880324, "learning_rate": 7.491854655761149e-06, "loss": 1.2924, "step": 2846 }, { "epoch": 0.35, "grad_norm": 7.72798970110893, "learning_rate": 7.49010807795322e-06, "loss": 1.7792, "step": 2847 }, { "epoch": 0.35, "grad_norm": 7.978362567441119, "learning_rate": 7.488361095987017e-06, "loss": 1.2828, "step": 2848 }, { "epoch": 0.35, "grad_norm": 6.686302784602924, "learning_rate": 7.486613710146085e-06, "loss": 1.0691, "step": 2849 }, { "epoch": 0.35, "grad_norm": 7.418289920233084, "learning_rate": 7.484865920714033e-06, "loss": 1.4068, "step": 2850 }, { "epoch": 0.35, "grad_norm": 7.826536881574781, "learning_rate": 7.4831177279745385e-06, "loss": 1.3449, "step": 2851 }, { "epoch": 0.35, "grad_norm": 7.281929718258246, "learning_rate": 7.48136913221134e-06, "loss": 1.4773, "step": 2852 }, { "epoch": 0.35, "grad_norm": 7.780056919584595, "learning_rate": 7.479620133708246e-06, "loss": 1.4059, "step": 2853 }, { "epoch": 0.35, "grad_norm": 6.427060952812747, "learning_rate": 7.477870732749129e-06, "loss": 1.157, "step": 2854 }, { "epoch": 0.36, "grad_norm": 7.862545238685554, "learning_rate": 7.476120929617927e-06, "loss": 1.7339, "step": 2855 }, { "epoch": 0.36, "grad_norm": 7.56156402716087, "learning_rate": 7.47437072459864e-06, "loss": 1.2869, "step": 2856 }, { "epoch": 0.36, "grad_norm": 6.9825759228647435, "learning_rate": 7.472620117975337e-06, "loss": 1.6036, "step": 2857 }, { "epoch": 0.36, "grad_norm": 7.3830483357416625, "learning_rate": 7.470869110032153e-06, "loss": 1.4564, "step": 2858 }, { "epoch": 0.36, "grad_norm": 6.902879920014462, "learning_rate": 7.469117701053286e-06, "loss": 1.2213, "step": 2859 }, { "epoch": 0.36, "grad_norm": 7.45800023974783, "learning_rate": 7.467365891322996e-06, "loss": 1.5092, "step": 2860 }, { "epoch": 0.36, "grad_norm": 7.684803536830059, "learning_rate": 7.465613681125613e-06, "loss": 1.628, "step": 2861 }, { "epoch": 0.36, "grad_norm": 7.677756597929845, "learning_rate": 7.463861070745532e-06, "loss": 1.2486, "step": 2862 }, { "epoch": 0.36, "grad_norm": 7.087363335727989, "learning_rate": 7.46210806046721e-06, "loss": 1.7449, "step": 2863 }, { "epoch": 0.36, "grad_norm": 5.989238057794182, "learning_rate": 7.460354650575172e-06, "loss": 1.2433, "step": 2864 }, { "epoch": 0.36, "grad_norm": 6.76705175697061, "learning_rate": 7.458600841354003e-06, "loss": 1.3442, "step": 2865 }, { "epoch": 0.36, "grad_norm": 10.623100761542121, "learning_rate": 7.456846633088358e-06, "loss": 1.4612, "step": 2866 }, { "epoch": 0.36, "grad_norm": 6.510174520358363, "learning_rate": 7.455092026062955e-06, "loss": 1.1039, "step": 2867 }, { "epoch": 0.36, "grad_norm": 6.965500854916697, "learning_rate": 7.453337020562577e-06, "loss": 1.6108, "step": 2868 }, { "epoch": 0.36, "grad_norm": 7.2042518559339115, "learning_rate": 7.451581616872066e-06, "loss": 1.509, "step": 2869 }, { "epoch": 0.36, "grad_norm": 6.2138513769176615, "learning_rate": 7.449825815276341e-06, "loss": 1.1687, "step": 2870 }, { "epoch": 0.36, "grad_norm": 6.366759532348596, "learning_rate": 7.448069616060371e-06, "loss": 1.3662, "step": 2871 }, { "epoch": 0.36, "grad_norm": 7.529595095123542, "learning_rate": 7.446313019509202e-06, "loss": 1.4339, "step": 2872 }, { "epoch": 0.36, "grad_norm": 7.147406139413279, "learning_rate": 7.444556025907939e-06, "loss": 1.2809, "step": 2873 }, { "epoch": 0.36, "grad_norm": 7.613341433036678, "learning_rate": 7.442798635541749e-06, "loss": 1.2971, "step": 2874 }, { "epoch": 0.36, "grad_norm": 11.521181437734871, "learning_rate": 7.441040848695868e-06, "loss": 1.1707, "step": 2875 }, { "epoch": 0.36, "grad_norm": 6.868804177772505, "learning_rate": 7.439282665655595e-06, "loss": 1.5318, "step": 2876 }, { "epoch": 0.36, "grad_norm": 7.875428447188996, "learning_rate": 7.437524086706292e-06, "loss": 1.705, "step": 2877 }, { "epoch": 0.36, "grad_norm": 7.93450705573469, "learning_rate": 7.435765112133385e-06, "loss": 1.184, "step": 2878 }, { "epoch": 0.36, "grad_norm": 7.113610649846795, "learning_rate": 7.4340057422223675e-06, "loss": 1.2218, "step": 2879 }, { "epoch": 0.36, "grad_norm": 7.248438675195639, "learning_rate": 7.432245977258793e-06, "loss": 1.4113, "step": 2880 }, { "epoch": 0.36, "grad_norm": 6.940747734892281, "learning_rate": 7.430485817528281e-06, "loss": 1.3493, "step": 2881 }, { "epoch": 0.36, "grad_norm": 7.314747992743733, "learning_rate": 7.428725263316519e-06, "loss": 1.4018, "step": 2882 }, { "epoch": 0.36, "grad_norm": 7.171647004169002, "learning_rate": 7.4269643149092485e-06, "loss": 1.6178, "step": 2883 }, { "epoch": 0.36, "grad_norm": 6.186906094648784, "learning_rate": 7.425202972592286e-06, "loss": 1.4331, "step": 2884 }, { "epoch": 0.36, "grad_norm": 8.404148683667326, "learning_rate": 7.423441236651506e-06, "loss": 1.3463, "step": 2885 }, { "epoch": 0.36, "grad_norm": 7.485007174804512, "learning_rate": 7.421679107372845e-06, "loss": 1.3949, "step": 2886 }, { "epoch": 0.36, "grad_norm": 6.110223906383893, "learning_rate": 7.4199165850423095e-06, "loss": 1.2897, "step": 2887 }, { "epoch": 0.36, "grad_norm": 6.861857828689118, "learning_rate": 7.418153669945966e-06, "loss": 1.3024, "step": 2888 }, { "epoch": 0.36, "grad_norm": 7.36837498174588, "learning_rate": 7.416390362369945e-06, "loss": 1.103, "step": 2889 }, { "epoch": 0.36, "grad_norm": 7.395577860865578, "learning_rate": 7.41462666260044e-06, "loss": 1.2096, "step": 2890 }, { "epoch": 0.36, "grad_norm": 6.901582509578967, "learning_rate": 7.4128625709237125e-06, "loss": 1.3686, "step": 2891 }, { "epoch": 0.36, "grad_norm": 6.466606279927256, "learning_rate": 7.411098087626081e-06, "loss": 1.2462, "step": 2892 }, { "epoch": 0.36, "grad_norm": 7.377887953321963, "learning_rate": 7.409333212993932e-06, "loss": 1.5129, "step": 2893 }, { "epoch": 0.36, "grad_norm": 6.398763456972229, "learning_rate": 7.407567947313714e-06, "loss": 1.2663, "step": 2894 }, { "epoch": 0.36, "grad_norm": 7.544058854193341, "learning_rate": 7.40580229087194e-06, "loss": 1.3508, "step": 2895 }, { "epoch": 0.36, "grad_norm": 6.445622802722631, "learning_rate": 7.404036243955185e-06, "loss": 1.1274, "step": 2896 }, { "epoch": 0.36, "grad_norm": 6.758148396964915, "learning_rate": 7.40226980685009e-06, "loss": 1.3519, "step": 2897 }, { "epoch": 0.36, "grad_norm": 7.308459160932898, "learning_rate": 7.400502979843354e-06, "loss": 1.5039, "step": 2898 }, { "epoch": 0.36, "grad_norm": 6.897345369541891, "learning_rate": 7.398735763221746e-06, "loss": 1.5381, "step": 2899 }, { "epoch": 0.36, "grad_norm": 6.3422448337544965, "learning_rate": 7.396968157272094e-06, "loss": 1.2, "step": 2900 }, { "epoch": 0.36, "grad_norm": 7.503572290488686, "learning_rate": 7.395200162281291e-06, "loss": 1.3033, "step": 2901 }, { "epoch": 0.36, "grad_norm": 7.113106168183693, "learning_rate": 7.3934317785362905e-06, "loss": 1.4535, "step": 2902 }, { "epoch": 0.36, "grad_norm": 7.520080559496638, "learning_rate": 7.391663006324113e-06, "loss": 1.319, "step": 2903 }, { "epoch": 0.36, "grad_norm": 6.466629957347591, "learning_rate": 7.389893845931836e-06, "loss": 0.9283, "step": 2904 }, { "epoch": 0.36, "grad_norm": 7.791643811784247, "learning_rate": 7.388124297646608e-06, "loss": 1.6172, "step": 2905 }, { "epoch": 0.36, "grad_norm": 7.1751395569681415, "learning_rate": 7.3863543617556365e-06, "loss": 1.3055, "step": 2906 }, { "epoch": 0.36, "grad_norm": 8.402737342544956, "learning_rate": 7.38458403854619e-06, "loss": 1.479, "step": 2907 }, { "epoch": 0.36, "grad_norm": 7.121358588751526, "learning_rate": 7.382813328305603e-06, "loss": 1.3026, "step": 2908 }, { "epoch": 0.36, "grad_norm": 8.48454045620521, "learning_rate": 7.381042231321269e-06, "loss": 1.5352, "step": 2909 }, { "epoch": 0.36, "grad_norm": 6.753207726954905, "learning_rate": 7.379270747880651e-06, "loss": 1.0404, "step": 2910 }, { "epoch": 0.36, "grad_norm": 7.82770992160556, "learning_rate": 7.377498878271266e-06, "loss": 1.6875, "step": 2911 }, { "epoch": 0.36, "grad_norm": 6.812002558943274, "learning_rate": 7.375726622780703e-06, "loss": 1.404, "step": 2912 }, { "epoch": 0.36, "grad_norm": 9.385330226456972, "learning_rate": 7.373953981696604e-06, "loss": 1.3008, "step": 2913 }, { "epoch": 0.36, "grad_norm": 6.922746475520488, "learning_rate": 7.372180955306682e-06, "loss": 1.1228, "step": 2914 }, { "epoch": 0.36, "grad_norm": 7.470713521617846, "learning_rate": 7.370407543898708e-06, "loss": 1.3051, "step": 2915 }, { "epoch": 0.36, "grad_norm": 11.474913773461667, "learning_rate": 7.368633747760515e-06, "loss": 1.3288, "step": 2916 }, { "epoch": 0.36, "grad_norm": 8.931769887828198, "learning_rate": 7.366859567180001e-06, "loss": 1.6011, "step": 2917 }, { "epoch": 0.36, "grad_norm": 7.883806152361969, "learning_rate": 7.365085002445126e-06, "loss": 1.6684, "step": 2918 }, { "epoch": 0.36, "grad_norm": 8.298771674650258, "learning_rate": 7.363310053843912e-06, "loss": 1.4568, "step": 2919 }, { "epoch": 0.36, "grad_norm": 7.401980504596427, "learning_rate": 7.3615347216644395e-06, "loss": 1.5045, "step": 2920 }, { "epoch": 0.36, "grad_norm": 6.227548314059527, "learning_rate": 7.359759006194859e-06, "loss": 1.1428, "step": 2921 }, { "epoch": 0.36, "grad_norm": 7.305356238640684, "learning_rate": 7.357982907723376e-06, "loss": 1.3813, "step": 2922 }, { "epoch": 0.36, "grad_norm": 6.903524692016631, "learning_rate": 7.356206426538262e-06, "loss": 1.2253, "step": 2923 }, { "epoch": 0.36, "grad_norm": 6.890618341907931, "learning_rate": 7.354429562927851e-06, "loss": 1.3574, "step": 2924 }, { "epoch": 0.36, "grad_norm": 7.895287922624681, "learning_rate": 7.352652317180536e-06, "loss": 1.5199, "step": 2925 }, { "epoch": 0.36, "grad_norm": 6.628504312946157, "learning_rate": 7.350874689584774e-06, "loss": 1.4434, "step": 2926 }, { "epoch": 0.36, "grad_norm": 7.579125403563312, "learning_rate": 7.349096680429084e-06, "loss": 1.6167, "step": 2927 }, { "epoch": 0.36, "grad_norm": 7.779608556422343, "learning_rate": 7.347318290002048e-06, "loss": 1.2132, "step": 2928 }, { "epoch": 0.36, "grad_norm": 6.786043412887394, "learning_rate": 7.345539518592305e-06, "loss": 1.549, "step": 2929 }, { "epoch": 0.36, "grad_norm": 7.2833361500610625, "learning_rate": 7.343760366488564e-06, "loss": 1.1954, "step": 2930 }, { "epoch": 0.36, "grad_norm": 5.489909942337669, "learning_rate": 7.341980833979588e-06, "loss": 1.117, "step": 2931 }, { "epoch": 0.36, "grad_norm": 6.5250263087655025, "learning_rate": 7.340200921354206e-06, "loss": 1.3476, "step": 2932 }, { "epoch": 0.36, "grad_norm": 6.972704160773317, "learning_rate": 7.338420628901308e-06, "loss": 1.2346, "step": 2933 }, { "epoch": 0.36, "grad_norm": 6.470747807976048, "learning_rate": 7.336639956909845e-06, "loss": 1.1643, "step": 2934 }, { "epoch": 0.37, "grad_norm": 8.068728881939283, "learning_rate": 7.334858905668829e-06, "loss": 1.3818, "step": 2935 }, { "epoch": 0.37, "grad_norm": 7.197129981743408, "learning_rate": 7.333077475467336e-06, "loss": 1.3235, "step": 2936 }, { "epoch": 0.37, "grad_norm": 7.276959464887171, "learning_rate": 7.3312956665945e-06, "loss": 1.4023, "step": 2937 }, { "epoch": 0.37, "grad_norm": 7.971474041294657, "learning_rate": 7.329513479339521e-06, "loss": 1.453, "step": 2938 }, { "epoch": 0.37, "grad_norm": 6.985337026565677, "learning_rate": 7.327730913991655e-06, "loss": 1.434, "step": 2939 }, { "epoch": 0.37, "grad_norm": 7.2513408837834135, "learning_rate": 7.325947970840223e-06, "loss": 1.3595, "step": 2940 }, { "epoch": 0.37, "grad_norm": 6.674145779795376, "learning_rate": 7.32416465017461e-06, "loss": 1.2304, "step": 2941 }, { "epoch": 0.37, "grad_norm": 7.488125969085522, "learning_rate": 7.322380952284254e-06, "loss": 1.3709, "step": 2942 }, { "epoch": 0.37, "grad_norm": 7.902284031481835, "learning_rate": 7.320596877458661e-06, "loss": 1.7721, "step": 2943 }, { "epoch": 0.37, "grad_norm": 7.3521728573251846, "learning_rate": 7.3188124259873946e-06, "loss": 1.4875, "step": 2944 }, { "epoch": 0.37, "grad_norm": 7.1113994275196415, "learning_rate": 7.317027598160085e-06, "loss": 1.223, "step": 2945 }, { "epoch": 0.37, "grad_norm": 7.215730029179533, "learning_rate": 7.315242394266415e-06, "loss": 1.2285, "step": 2946 }, { "epoch": 0.37, "grad_norm": 6.67525249046276, "learning_rate": 7.313456814596136e-06, "loss": 1.3352, "step": 2947 }, { "epoch": 0.37, "grad_norm": 7.385024398909831, "learning_rate": 7.311670859439057e-06, "loss": 1.7111, "step": 2948 }, { "epoch": 0.37, "grad_norm": 7.712521656937057, "learning_rate": 7.309884529085048e-06, "loss": 1.7407, "step": 2949 }, { "epoch": 0.37, "grad_norm": 5.813869732774425, "learning_rate": 7.30809782382404e-06, "loss": 1.0866, "step": 2950 }, { "epoch": 0.37, "grad_norm": 5.7268043700310045, "learning_rate": 7.306310743946024e-06, "loss": 1.231, "step": 2951 }, { "epoch": 0.37, "grad_norm": 6.793016705969294, "learning_rate": 7.304523289741057e-06, "loss": 1.2711, "step": 2952 }, { "epoch": 0.37, "grad_norm": 7.8166119741986595, "learning_rate": 7.302735461499247e-06, "loss": 1.1222, "step": 2953 }, { "epoch": 0.37, "grad_norm": 7.32319604556701, "learning_rate": 7.300947259510773e-06, "loss": 1.7696, "step": 2954 }, { "epoch": 0.37, "grad_norm": 6.636656283646482, "learning_rate": 7.299158684065868e-06, "loss": 1.4103, "step": 2955 }, { "epoch": 0.37, "grad_norm": 7.453712111566898, "learning_rate": 7.297369735454828e-06, "loss": 1.2556, "step": 2956 }, { "epoch": 0.37, "grad_norm": 12.961547762467527, "learning_rate": 7.2955804139680105e-06, "loss": 1.3383, "step": 2957 }, { "epoch": 0.37, "grad_norm": 7.651025179984964, "learning_rate": 7.29379071989583e-06, "loss": 1.4144, "step": 2958 }, { "epoch": 0.37, "grad_norm": 6.920342149343349, "learning_rate": 7.292000653528765e-06, "loss": 1.3244, "step": 2959 }, { "epoch": 0.37, "grad_norm": 7.3763008025344465, "learning_rate": 7.290210215157353e-06, "loss": 1.6266, "step": 2960 }, { "epoch": 0.37, "grad_norm": 6.605485799689356, "learning_rate": 7.288419405072191e-06, "loss": 1.263, "step": 2961 }, { "epoch": 0.37, "grad_norm": 7.574534774412999, "learning_rate": 7.286628223563939e-06, "loss": 1.3079, "step": 2962 }, { "epoch": 0.37, "grad_norm": 6.772949732493545, "learning_rate": 7.2848366709233146e-06, "loss": 1.2745, "step": 2963 }, { "epoch": 0.37, "grad_norm": 8.536589014630776, "learning_rate": 7.283044747441096e-06, "loss": 1.3069, "step": 2964 }, { "epoch": 0.37, "grad_norm": 6.372672927263807, "learning_rate": 7.281252453408125e-06, "loss": 1.2549, "step": 2965 }, { "epoch": 0.37, "grad_norm": 6.8061219090196845, "learning_rate": 7.279459789115297e-06, "loss": 1.4838, "step": 2966 }, { "epoch": 0.37, "grad_norm": 7.16547990544716, "learning_rate": 7.277666754853576e-06, "loss": 1.383, "step": 2967 }, { "epoch": 0.37, "grad_norm": 7.567270120529105, "learning_rate": 7.275873350913977e-06, "loss": 1.4896, "step": 2968 }, { "epoch": 0.37, "grad_norm": 8.42713996929094, "learning_rate": 7.274079577587582e-06, "loss": 1.428, "step": 2969 }, { "epoch": 0.37, "grad_norm": 7.508894743711066, "learning_rate": 7.2722854351655285e-06, "loss": 1.4887, "step": 2970 }, { "epoch": 0.37, "grad_norm": 8.848605767915037, "learning_rate": 7.270490923939015e-06, "loss": 1.5294, "step": 2971 }, { "epoch": 0.37, "grad_norm": 7.0238982676096215, "learning_rate": 7.268696044199305e-06, "loss": 1.1074, "step": 2972 }, { "epoch": 0.37, "grad_norm": 7.891120944831367, "learning_rate": 7.266900796237714e-06, "loss": 1.5549, "step": 2973 }, { "epoch": 0.37, "grad_norm": 8.521968809955709, "learning_rate": 7.26510518034562e-06, "loss": 1.2854, "step": 2974 }, { "epoch": 0.37, "grad_norm": 8.130903257275666, "learning_rate": 7.263309196814462e-06, "loss": 1.4077, "step": 2975 }, { "epoch": 0.37, "grad_norm": 7.056992539328299, "learning_rate": 7.261512845935741e-06, "loss": 1.5156, "step": 2976 }, { "epoch": 0.37, "grad_norm": 7.2813973598200175, "learning_rate": 7.25971612800101e-06, "loss": 1.5215, "step": 2977 }, { "epoch": 0.37, "grad_norm": 6.444046746649158, "learning_rate": 7.2579190433018895e-06, "loss": 1.4026, "step": 2978 }, { "epoch": 0.37, "grad_norm": 6.121658771973048, "learning_rate": 7.2561215921300545e-06, "loss": 1.1909, "step": 2979 }, { "epoch": 0.37, "grad_norm": 6.653636165419632, "learning_rate": 7.254323774777242e-06, "loss": 1.1308, "step": 2980 }, { "epoch": 0.37, "grad_norm": 8.208312723822445, "learning_rate": 7.252525591535249e-06, "loss": 1.3737, "step": 2981 }, { "epoch": 0.37, "grad_norm": 7.521728351005115, "learning_rate": 7.250727042695928e-06, "loss": 1.0859, "step": 2982 }, { "epoch": 0.37, "grad_norm": 8.029867671713024, "learning_rate": 7.248928128551194e-06, "loss": 1.6034, "step": 2983 }, { "epoch": 0.37, "grad_norm": 7.539058054580553, "learning_rate": 7.247128849393023e-06, "loss": 1.2656, "step": 2984 }, { "epoch": 0.37, "grad_norm": 6.632056552428981, "learning_rate": 7.245329205513444e-06, "loss": 1.0773, "step": 2985 }, { "epoch": 0.37, "grad_norm": 7.950255586965209, "learning_rate": 7.243529197204552e-06, "loss": 1.3676, "step": 2986 }, { "epoch": 0.37, "grad_norm": 6.520853640501035, "learning_rate": 7.2417288247584985e-06, "loss": 1.0789, "step": 2987 }, { "epoch": 0.37, "grad_norm": 7.926992449511775, "learning_rate": 7.239928088467491e-06, "loss": 1.5751, "step": 2988 }, { "epoch": 0.37, "grad_norm": 7.1200557945762, "learning_rate": 7.238126988623801e-06, "loss": 1.4802, "step": 2989 }, { "epoch": 0.37, "grad_norm": 6.749862554463937, "learning_rate": 7.2363255255197596e-06, "loss": 1.4494, "step": 2990 }, { "epoch": 0.37, "grad_norm": 7.166174897883759, "learning_rate": 7.2345236994477505e-06, "loss": 1.1168, "step": 2991 }, { "epoch": 0.37, "grad_norm": 7.057948670768718, "learning_rate": 7.23272151070022e-06, "loss": 1.3151, "step": 2992 }, { "epoch": 0.37, "grad_norm": 8.965829722951828, "learning_rate": 7.230918959569675e-06, "loss": 1.6043, "step": 2993 }, { "epoch": 0.37, "grad_norm": 7.60396978701187, "learning_rate": 7.229116046348678e-06, "loss": 1.1197, "step": 2994 }, { "epoch": 0.37, "grad_norm": 9.377517056331973, "learning_rate": 7.227312771329852e-06, "loss": 1.3248, "step": 2995 }, { "epoch": 0.37, "grad_norm": 7.52130341191522, "learning_rate": 7.225509134805879e-06, "loss": 1.44, "step": 2996 }, { "epoch": 0.37, "grad_norm": 7.149617381550289, "learning_rate": 7.2237051370695e-06, "loss": 1.2663, "step": 2997 }, { "epoch": 0.37, "grad_norm": 7.766393639605771, "learning_rate": 7.221900778413513e-06, "loss": 1.4174, "step": 2998 }, { "epoch": 0.37, "grad_norm": 7.461462829568352, "learning_rate": 7.220096059130774e-06, "loss": 1.5802, "step": 2999 }, { "epoch": 0.37, "grad_norm": 6.472000058926781, "learning_rate": 7.218290979514202e-06, "loss": 1.1397, "step": 3000 }, { "epoch": 0.37, "grad_norm": 6.605636714655705, "learning_rate": 7.216485539856767e-06, "loss": 1.6004, "step": 3001 }, { "epoch": 0.37, "grad_norm": 6.198445574835882, "learning_rate": 7.214679740451507e-06, "loss": 1.2785, "step": 3002 }, { "epoch": 0.37, "grad_norm": 6.881581456473118, "learning_rate": 7.212873581591508e-06, "loss": 1.4055, "step": 3003 }, { "epoch": 0.37, "grad_norm": 6.438157835937341, "learning_rate": 7.2110670635699235e-06, "loss": 1.0234, "step": 3004 }, { "epoch": 0.37, "grad_norm": 7.487033020997775, "learning_rate": 7.209260186679959e-06, "loss": 1.3207, "step": 3005 }, { "epoch": 0.37, "grad_norm": 12.119559615733195, "learning_rate": 7.207452951214883e-06, "loss": 1.411, "step": 3006 }, { "epoch": 0.37, "grad_norm": 6.855350945061209, "learning_rate": 7.205645357468016e-06, "loss": 1.1767, "step": 3007 }, { "epoch": 0.37, "grad_norm": 7.505179944827579, "learning_rate": 7.203837405732744e-06, "loss": 1.5689, "step": 3008 }, { "epoch": 0.37, "grad_norm": 6.545922454626935, "learning_rate": 7.202029096302507e-06, "loss": 1.3173, "step": 3009 }, { "epoch": 0.37, "grad_norm": 7.379613237719814, "learning_rate": 7.2002204294708025e-06, "loss": 1.0432, "step": 3010 }, { "epoch": 0.37, "grad_norm": 7.642548900871553, "learning_rate": 7.198411405531187e-06, "loss": 1.5205, "step": 3011 }, { "epoch": 0.37, "grad_norm": 7.452019651152293, "learning_rate": 7.196602024777275e-06, "loss": 1.583, "step": 3012 }, { "epoch": 0.37, "grad_norm": 5.975329812708787, "learning_rate": 7.19479228750274e-06, "loss": 1.2884, "step": 3013 }, { "epoch": 0.37, "grad_norm": 6.565532572402471, "learning_rate": 7.192982194001312e-06, "loss": 1.0765, "step": 3014 }, { "epoch": 0.38, "grad_norm": 7.459985939127417, "learning_rate": 7.191171744566779e-06, "loss": 1.4475, "step": 3015 }, { "epoch": 0.38, "grad_norm": 6.924692415515238, "learning_rate": 7.189360939492987e-06, "loss": 1.4023, "step": 3016 }, { "epoch": 0.38, "grad_norm": 6.238716692571666, "learning_rate": 7.187549779073839e-06, "loss": 1.1858, "step": 3017 }, { "epoch": 0.38, "grad_norm": 7.749985989238348, "learning_rate": 7.185738263603297e-06, "loss": 1.347, "step": 3018 }, { "epoch": 0.38, "grad_norm": 7.748363451506793, "learning_rate": 7.18392639337538e-06, "loss": 1.443, "step": 3019 }, { "epoch": 0.38, "grad_norm": 7.1037515511584095, "learning_rate": 7.1821141686841646e-06, "loss": 1.6149, "step": 3020 }, { "epoch": 0.38, "grad_norm": 6.6657388808558515, "learning_rate": 7.180301589823784e-06, "loss": 1.5438, "step": 3021 }, { "epoch": 0.38, "grad_norm": 6.8547127115626285, "learning_rate": 7.178488657088432e-06, "loss": 1.4104, "step": 3022 }, { "epoch": 0.38, "grad_norm": 7.14810302630672, "learning_rate": 7.176675370772354e-06, "loss": 1.2524, "step": 3023 }, { "epoch": 0.38, "grad_norm": 7.226627321535791, "learning_rate": 7.17486173116986e-06, "loss": 1.072, "step": 3024 }, { "epoch": 0.38, "grad_norm": 7.529377054081769, "learning_rate": 7.17304773857531e-06, "loss": 1.4479, "step": 3025 }, { "epoch": 0.38, "grad_norm": 6.948659456908665, "learning_rate": 7.171233393283128e-06, "loss": 1.1996, "step": 3026 }, { "epoch": 0.38, "grad_norm": 7.110627959401271, "learning_rate": 7.169418695587791e-06, "loss": 1.5274, "step": 3027 }, { "epoch": 0.38, "grad_norm": 6.889291926632822, "learning_rate": 7.167603645783835e-06, "loss": 1.2315, "step": 3028 }, { "epoch": 0.38, "grad_norm": 6.202951506688075, "learning_rate": 7.1657882441658535e-06, "loss": 1.3487, "step": 3029 }, { "epoch": 0.38, "grad_norm": 6.855935959501138, "learning_rate": 7.163972491028494e-06, "loss": 1.4725, "step": 3030 }, { "epoch": 0.38, "grad_norm": 7.6314362543142265, "learning_rate": 7.162156386666463e-06, "loss": 1.4685, "step": 3031 }, { "epoch": 0.38, "grad_norm": 6.582243556953762, "learning_rate": 7.160339931374527e-06, "loss": 1.1806, "step": 3032 }, { "epoch": 0.38, "grad_norm": 9.271319515610577, "learning_rate": 7.158523125447505e-06, "loss": 1.4211, "step": 3033 }, { "epoch": 0.38, "grad_norm": 5.787676631485372, "learning_rate": 7.156705969180274e-06, "loss": 0.8792, "step": 3034 }, { "epoch": 0.38, "grad_norm": 6.85211770025717, "learning_rate": 7.154888462867771e-06, "loss": 1.197, "step": 3035 }, { "epoch": 0.38, "grad_norm": 7.052892069767848, "learning_rate": 7.153070606804985e-06, "loss": 1.1036, "step": 3036 }, { "epoch": 0.38, "grad_norm": 7.105005227881272, "learning_rate": 7.151252401286963e-06, "loss": 1.443, "step": 3037 }, { "epoch": 0.38, "grad_norm": 7.206387845557656, "learning_rate": 7.149433846608814e-06, "loss": 1.2111, "step": 3038 }, { "epoch": 0.38, "grad_norm": 7.669504989959052, "learning_rate": 7.147614943065697e-06, "loss": 1.3371, "step": 3039 }, { "epoch": 0.38, "grad_norm": 8.088768041904263, "learning_rate": 7.145795690952828e-06, "loss": 1.5468, "step": 3040 }, { "epoch": 0.38, "grad_norm": 6.769702457318653, "learning_rate": 7.143976090565484e-06, "loss": 1.4494, "step": 3041 }, { "epoch": 0.38, "grad_norm": 8.136834734128847, "learning_rate": 7.142156142198997e-06, "loss": 1.2452, "step": 3042 }, { "epoch": 0.38, "grad_norm": 8.02473941743444, "learning_rate": 7.140335846148752e-06, "loss": 1.1308, "step": 3043 }, { "epoch": 0.38, "grad_norm": 6.579203517554048, "learning_rate": 7.138515202710197e-06, "loss": 1.3168, "step": 3044 }, { "epoch": 0.38, "grad_norm": 6.758514207065505, "learning_rate": 7.136694212178828e-06, "loss": 1.3474, "step": 3045 }, { "epoch": 0.38, "grad_norm": 7.28766521539317, "learning_rate": 7.134872874850203e-06, "loss": 1.3778, "step": 3046 }, { "epoch": 0.38, "grad_norm": 6.787765606341087, "learning_rate": 7.133051191019938e-06, "loss": 1.3116, "step": 3047 }, { "epoch": 0.38, "grad_norm": 6.651715274864986, "learning_rate": 7.1312291609837e-06, "loss": 1.0916, "step": 3048 }, { "epoch": 0.38, "grad_norm": 6.151015847738426, "learning_rate": 7.129406785037214e-06, "loss": 1.2372, "step": 3049 }, { "epoch": 0.38, "grad_norm": 7.256177229197652, "learning_rate": 7.127584063476264e-06, "loss": 1.4843, "step": 3050 }, { "epoch": 0.38, "grad_norm": 7.062986170915797, "learning_rate": 7.125760996596685e-06, "loss": 1.4931, "step": 3051 }, { "epoch": 0.38, "grad_norm": 7.427374948186064, "learning_rate": 7.123937584694372e-06, "loss": 1.1164, "step": 3052 }, { "epoch": 0.38, "grad_norm": 7.395290218398944, "learning_rate": 7.1221138280652755e-06, "loss": 1.1875, "step": 3053 }, { "epoch": 0.38, "grad_norm": 8.874889400299246, "learning_rate": 7.1202897270054e-06, "loss": 1.4002, "step": 3054 }, { "epoch": 0.38, "grad_norm": 7.5818884168565095, "learning_rate": 7.118465281810808e-06, "loss": 1.3575, "step": 3055 }, { "epoch": 0.38, "grad_norm": 7.345291061186066, "learning_rate": 7.116640492777617e-06, "loss": 1.313, "step": 3056 }, { "epoch": 0.38, "grad_norm": 8.90968343004141, "learning_rate": 7.1148153602019994e-06, "loss": 1.3585, "step": 3057 }, { "epoch": 0.38, "grad_norm": 7.639906659165473, "learning_rate": 7.112989884380186e-06, "loss": 1.4377, "step": 3058 }, { "epoch": 0.38, "grad_norm": 8.242880940404017, "learning_rate": 7.11116406560846e-06, "loss": 1.4269, "step": 3059 }, { "epoch": 0.38, "grad_norm": 7.1469500967587765, "learning_rate": 7.1093379041831625e-06, "loss": 1.118, "step": 3060 }, { "epoch": 0.38, "grad_norm": 6.320331316664949, "learning_rate": 7.10751140040069e-06, "loss": 0.8996, "step": 3061 }, { "epoch": 0.38, "grad_norm": 6.9164964212812, "learning_rate": 7.1056845545574935e-06, "loss": 1.306, "step": 3062 }, { "epoch": 0.38, "grad_norm": 6.372467783314111, "learning_rate": 7.103857366950081e-06, "loss": 1.2309, "step": 3063 }, { "epoch": 0.38, "grad_norm": 6.9141968268653216, "learning_rate": 7.102029837875013e-06, "loss": 1.3169, "step": 3064 }, { "epoch": 0.38, "grad_norm": 7.722422784263902, "learning_rate": 7.1002019676289095e-06, "loss": 1.3801, "step": 3065 }, { "epoch": 0.38, "grad_norm": 7.121842776898309, "learning_rate": 7.098373756508443e-06, "loss": 1.3951, "step": 3066 }, { "epoch": 0.38, "grad_norm": 6.367675871980896, "learning_rate": 7.0965452048103435e-06, "loss": 1.138, "step": 3067 }, { "epoch": 0.38, "grad_norm": 6.87526779376713, "learning_rate": 7.094716312831395e-06, "loss": 1.4084, "step": 3068 }, { "epoch": 0.38, "grad_norm": 6.821013719809869, "learning_rate": 7.092887080868433e-06, "loss": 1.5932, "step": 3069 }, { "epoch": 0.38, "grad_norm": 7.836534560827064, "learning_rate": 7.091057509218357e-06, "loss": 1.2083, "step": 3070 }, { "epoch": 0.38, "grad_norm": 6.418824867819507, "learning_rate": 7.089227598178114e-06, "loss": 1.1419, "step": 3071 }, { "epoch": 0.38, "grad_norm": 7.239263132032867, "learning_rate": 7.087397348044711e-06, "loss": 1.2547, "step": 3072 }, { "epoch": 0.38, "grad_norm": 7.566570637896618, "learning_rate": 7.085566759115203e-06, "loss": 1.7115, "step": 3073 }, { "epoch": 0.38, "grad_norm": 6.422881231999468, "learning_rate": 7.083735831686709e-06, "loss": 1.0975, "step": 3074 }, { "epoch": 0.38, "grad_norm": 6.350920757685998, "learning_rate": 7.081904566056397e-06, "loss": 1.2899, "step": 3075 }, { "epoch": 0.38, "grad_norm": 9.123625641836297, "learning_rate": 7.080072962521491e-06, "loss": 1.0908, "step": 3076 }, { "epoch": 0.38, "grad_norm": 6.2614128129962126, "learning_rate": 7.078241021379272e-06, "loss": 0.6804, "step": 3077 }, { "epoch": 0.38, "grad_norm": 7.959706513548462, "learning_rate": 7.076408742927072e-06, "loss": 1.2777, "step": 3078 }, { "epoch": 0.38, "grad_norm": 7.6398725269319945, "learning_rate": 7.074576127462281e-06, "loss": 1.3122, "step": 3079 }, { "epoch": 0.38, "grad_norm": 7.685989844154971, "learning_rate": 7.072743175282344e-06, "loss": 1.2301, "step": 3080 }, { "epoch": 0.38, "grad_norm": 6.610465148712688, "learning_rate": 7.070909886684757e-06, "loss": 1.1896, "step": 3081 }, { "epoch": 0.38, "grad_norm": 6.908030680803642, "learning_rate": 7.069076261967073e-06, "loss": 1.4817, "step": 3082 }, { "epoch": 0.38, "grad_norm": 6.8169004484478615, "learning_rate": 7.067242301426902e-06, "loss": 1.2739, "step": 3083 }, { "epoch": 0.38, "grad_norm": 7.382035546086754, "learning_rate": 7.065408005361902e-06, "loss": 1.5357, "step": 3084 }, { "epoch": 0.38, "grad_norm": 6.890078511681892, "learning_rate": 7.063573374069792e-06, "loss": 1.1682, "step": 3085 }, { "epoch": 0.38, "grad_norm": 6.693484799505776, "learning_rate": 7.061738407848342e-06, "loss": 1.2144, "step": 3086 }, { "epoch": 0.38, "grad_norm": 7.030018989626344, "learning_rate": 7.059903106995378e-06, "loss": 1.539, "step": 3087 }, { "epoch": 0.38, "grad_norm": 6.567147495207866, "learning_rate": 7.058067471808778e-06, "loss": 1.2832, "step": 3088 }, { "epoch": 0.38, "grad_norm": 7.7065666086082025, "learning_rate": 7.056231502586477e-06, "loss": 1.5619, "step": 3089 }, { "epoch": 0.38, "grad_norm": 8.12211375785371, "learning_rate": 7.054395199626463e-06, "loss": 1.6428, "step": 3090 }, { "epoch": 0.38, "grad_norm": 7.119870635987888, "learning_rate": 7.052558563226777e-06, "loss": 1.3446, "step": 3091 }, { "epoch": 0.38, "grad_norm": 7.1104985537800784, "learning_rate": 7.050721593685516e-06, "loss": 1.0999, "step": 3092 }, { "epoch": 0.38, "grad_norm": 8.943775942885116, "learning_rate": 7.048884291300828e-06, "loss": 1.9544, "step": 3093 }, { "epoch": 0.38, "grad_norm": 7.3710249941717, "learning_rate": 7.047046656370922e-06, "loss": 1.1946, "step": 3094 }, { "epoch": 0.38, "grad_norm": 9.211191187079914, "learning_rate": 7.045208689194052e-06, "loss": 1.1834, "step": 3095 }, { "epoch": 0.39, "grad_norm": 7.97463127801682, "learning_rate": 7.043370390068533e-06, "loss": 1.4201, "step": 3096 }, { "epoch": 0.39, "grad_norm": 7.193880606409011, "learning_rate": 7.041531759292729e-06, "loss": 1.4493, "step": 3097 }, { "epoch": 0.39, "grad_norm": 6.5916953798306634, "learning_rate": 7.039692797165061e-06, "loss": 1.1669, "step": 3098 }, { "epoch": 0.39, "grad_norm": 7.83066009966982, "learning_rate": 7.037853503984002e-06, "loss": 1.5507, "step": 3099 }, { "epoch": 0.39, "grad_norm": 6.401005314378438, "learning_rate": 7.0360138800480806e-06, "loss": 1.2273, "step": 3100 }, { "epoch": 0.39, "grad_norm": 7.948706324986523, "learning_rate": 7.0341739256558785e-06, "loss": 1.2685, "step": 3101 }, { "epoch": 0.39, "grad_norm": 7.195580425993789, "learning_rate": 7.032333641106027e-06, "loss": 1.4625, "step": 3102 }, { "epoch": 0.39, "grad_norm": 7.466602156930661, "learning_rate": 7.030493026697217e-06, "loss": 1.4978, "step": 3103 }, { "epoch": 0.39, "grad_norm": 7.063390310054983, "learning_rate": 7.0286520827281914e-06, "loss": 1.3008, "step": 3104 }, { "epoch": 0.39, "grad_norm": 6.4541894207144725, "learning_rate": 7.026810809497744e-06, "loss": 1.1727, "step": 3105 }, { "epoch": 0.39, "grad_norm": 7.226401187193324, "learning_rate": 7.024969207304721e-06, "loss": 1.2007, "step": 3106 }, { "epoch": 0.39, "grad_norm": 7.2385170502653695, "learning_rate": 7.0231272764480304e-06, "loss": 1.3006, "step": 3107 }, { "epoch": 0.39, "grad_norm": 7.519095343340609, "learning_rate": 7.021285017226623e-06, "loss": 1.2153, "step": 3108 }, { "epoch": 0.39, "grad_norm": 7.416833282043927, "learning_rate": 7.01944242993951e-06, "loss": 1.1504, "step": 3109 }, { "epoch": 0.39, "grad_norm": 6.964873015096004, "learning_rate": 7.0175995148857535e-06, "loss": 1.6657, "step": 3110 }, { "epoch": 0.39, "grad_norm": 6.645316073961686, "learning_rate": 7.015756272364467e-06, "loss": 1.5529, "step": 3111 }, { "epoch": 0.39, "grad_norm": 7.871990426089743, "learning_rate": 7.013912702674821e-06, "loss": 1.1793, "step": 3112 }, { "epoch": 0.39, "grad_norm": 7.027907633051961, "learning_rate": 7.012068806116035e-06, "loss": 1.415, "step": 3113 }, { "epoch": 0.39, "grad_norm": 8.387816221282408, "learning_rate": 7.010224582987386e-06, "loss": 1.3941, "step": 3114 }, { "epoch": 0.39, "grad_norm": 7.54825467528844, "learning_rate": 7.008380033588198e-06, "loss": 1.3251, "step": 3115 }, { "epoch": 0.39, "grad_norm": 7.590233070956722, "learning_rate": 7.0065351582178565e-06, "loss": 1.2827, "step": 3116 }, { "epoch": 0.39, "grad_norm": 7.264746897303788, "learning_rate": 7.00468995717579e-06, "loss": 1.2114, "step": 3117 }, { "epoch": 0.39, "grad_norm": 7.2466265273839, "learning_rate": 7.002844430761487e-06, "loss": 1.358, "step": 3118 }, { "epoch": 0.39, "grad_norm": 6.25709705842742, "learning_rate": 7.000998579274487e-06, "loss": 1.1936, "step": 3119 }, { "epoch": 0.39, "grad_norm": 7.426329957575425, "learning_rate": 6.9991524030143835e-06, "loss": 1.2602, "step": 3120 }, { "epoch": 0.39, "grad_norm": 6.106359088755422, "learning_rate": 6.997305902280816e-06, "loss": 1.0081, "step": 3121 }, { "epoch": 0.39, "grad_norm": 7.3716680402288155, "learning_rate": 6.995459077373486e-06, "loss": 1.2831, "step": 3122 }, { "epoch": 0.39, "grad_norm": 6.77992556038628, "learning_rate": 6.993611928592143e-06, "loss": 1.2619, "step": 3123 }, { "epoch": 0.39, "grad_norm": 7.442224869146485, "learning_rate": 6.9917644562365885e-06, "loss": 1.2044, "step": 3124 }, { "epoch": 0.39, "grad_norm": 6.790740947513769, "learning_rate": 6.989916660606677e-06, "loss": 1.3646, "step": 3125 }, { "epoch": 0.39, "grad_norm": 7.604071944758076, "learning_rate": 6.988068542002316e-06, "loss": 1.1521, "step": 3126 }, { "epoch": 0.39, "grad_norm": 7.59042838998222, "learning_rate": 6.986220100723467e-06, "loss": 1.1799, "step": 3127 }, { "epoch": 0.39, "grad_norm": 7.394781186034701, "learning_rate": 6.98437133707014e-06, "loss": 0.9612, "step": 3128 }, { "epoch": 0.39, "grad_norm": 6.832543927397344, "learning_rate": 6.982522251342402e-06, "loss": 1.3059, "step": 3129 }, { "epoch": 0.39, "grad_norm": 6.944993923290635, "learning_rate": 6.980672843840367e-06, "loss": 1.0586, "step": 3130 }, { "epoch": 0.39, "grad_norm": 7.803620369950858, "learning_rate": 6.978823114864206e-06, "loss": 1.3224, "step": 3131 }, { "epoch": 0.39, "grad_norm": 6.676698448418638, "learning_rate": 6.97697306471414e-06, "loss": 1.2831, "step": 3132 }, { "epoch": 0.39, "grad_norm": 6.537012748889769, "learning_rate": 6.9751226936904415e-06, "loss": 1.2566, "step": 3133 }, { "epoch": 0.39, "grad_norm": 7.6014632986565465, "learning_rate": 6.973272002093437e-06, "loss": 1.5295, "step": 3134 }, { "epoch": 0.39, "grad_norm": 9.829135807104024, "learning_rate": 6.971420990223503e-06, "loss": 1.6568, "step": 3135 }, { "epoch": 0.39, "grad_norm": 7.089045718257773, "learning_rate": 6.96956965838107e-06, "loss": 1.1665, "step": 3136 }, { "epoch": 0.39, "grad_norm": 7.685491262380775, "learning_rate": 6.967718006866617e-06, "loss": 1.3588, "step": 3137 }, { "epoch": 0.39, "grad_norm": 6.7929118507736845, "learning_rate": 6.9658660359806806e-06, "loss": 1.2851, "step": 3138 }, { "epoch": 0.39, "grad_norm": 7.9153124357816464, "learning_rate": 6.964013746023843e-06, "loss": 1.5008, "step": 3139 }, { "epoch": 0.39, "grad_norm": 6.85283270901661, "learning_rate": 6.9621611372967436e-06, "loss": 1.3039, "step": 3140 }, { "epoch": 0.39, "grad_norm": 7.280371400933804, "learning_rate": 6.9603082101000685e-06, "loss": 1.2545, "step": 3141 }, { "epoch": 0.39, "grad_norm": 8.014207454890979, "learning_rate": 6.95845496473456e-06, "loss": 1.7531, "step": 3142 }, { "epoch": 0.39, "grad_norm": 7.889180740963926, "learning_rate": 6.956601401501009e-06, "loss": 1.2336, "step": 3143 }, { "epoch": 0.39, "grad_norm": 6.820385747366435, "learning_rate": 6.95474752070026e-06, "loss": 1.3945, "step": 3144 }, { "epoch": 0.39, "grad_norm": 6.889423112128766, "learning_rate": 6.952893322633206e-06, "loss": 1.3153, "step": 3145 }, { "epoch": 0.39, "grad_norm": 7.099499892204131, "learning_rate": 6.951038807600795e-06, "loss": 1.314, "step": 3146 }, { "epoch": 0.39, "grad_norm": 6.759542841216756, "learning_rate": 6.949183975904027e-06, "loss": 0.8457, "step": 3147 }, { "epoch": 0.39, "grad_norm": 7.376720186136386, "learning_rate": 6.947328827843946e-06, "loss": 1.7601, "step": 3148 }, { "epoch": 0.39, "grad_norm": 7.717040705256296, "learning_rate": 6.94547336372166e-06, "loss": 1.372, "step": 3149 }, { "epoch": 0.39, "grad_norm": 6.183636858341724, "learning_rate": 6.943617583838314e-06, "loss": 1.2408, "step": 3150 }, { "epoch": 0.39, "grad_norm": 6.931464978520713, "learning_rate": 6.9417614884951156e-06, "loss": 1.1338, "step": 3151 }, { "epoch": 0.39, "grad_norm": 8.799464649867822, "learning_rate": 6.93990507799332e-06, "loss": 1.3776, "step": 3152 }, { "epoch": 0.39, "grad_norm": 6.723710888480347, "learning_rate": 6.93804835263423e-06, "loss": 1.2234, "step": 3153 }, { "epoch": 0.39, "grad_norm": 6.500267738066911, "learning_rate": 6.9361913127192026e-06, "loss": 1.1676, "step": 3154 }, { "epoch": 0.39, "grad_norm": 7.546725963430792, "learning_rate": 6.934333958549646e-06, "loss": 1.5848, "step": 3155 }, { "epoch": 0.39, "grad_norm": 6.344557746085594, "learning_rate": 6.932476290427022e-06, "loss": 1.3525, "step": 3156 }, { "epoch": 0.39, "grad_norm": 7.041166781956692, "learning_rate": 6.9306183086528355e-06, "loss": 1.3696, "step": 3157 }, { "epoch": 0.39, "grad_norm": 7.338857890469015, "learning_rate": 6.928760013528651e-06, "loss": 1.3363, "step": 3158 }, { "epoch": 0.39, "grad_norm": 6.971280284168217, "learning_rate": 6.926901405356078e-06, "loss": 1.3325, "step": 3159 }, { "epoch": 0.39, "grad_norm": 7.96758862659454, "learning_rate": 6.9250424844367795e-06, "loss": 1.6952, "step": 3160 }, { "epoch": 0.39, "grad_norm": 7.632489274161425, "learning_rate": 6.923183251072468e-06, "loss": 1.056, "step": 3161 }, { "epoch": 0.39, "grad_norm": 7.761730983055054, "learning_rate": 6.92132370556491e-06, "loss": 1.3628, "step": 3162 }, { "epoch": 0.39, "grad_norm": 6.784174582352303, "learning_rate": 6.919463848215916e-06, "loss": 1.3421, "step": 3163 }, { "epoch": 0.39, "grad_norm": 6.150232990416884, "learning_rate": 6.917603679327353e-06, "loss": 1.5304, "step": 3164 }, { "epoch": 0.39, "grad_norm": 6.623202253010177, "learning_rate": 6.915743199201138e-06, "loss": 1.1499, "step": 3165 }, { "epoch": 0.39, "grad_norm": 7.840122177105482, "learning_rate": 6.913882408139234e-06, "loss": 1.5805, "step": 3166 }, { "epoch": 0.39, "grad_norm": 7.4385944842460825, "learning_rate": 6.9120213064436615e-06, "loss": 1.33, "step": 3167 }, { "epoch": 0.39, "grad_norm": 7.56372263046594, "learning_rate": 6.910159894416485e-06, "loss": 0.9273, "step": 3168 }, { "epoch": 0.39, "grad_norm": 7.483456888543731, "learning_rate": 6.908298172359822e-06, "loss": 1.3005, "step": 3169 }, { "epoch": 0.39, "grad_norm": 6.698034764230802, "learning_rate": 6.906436140575841e-06, "loss": 1.38, "step": 3170 }, { "epoch": 0.39, "grad_norm": 7.082915243853786, "learning_rate": 6.9045737993667605e-06, "loss": 1.4592, "step": 3171 }, { "epoch": 0.39, "grad_norm": 7.429608038467086, "learning_rate": 6.9027111490348475e-06, "loss": 1.4697, "step": 3172 }, { "epoch": 0.39, "grad_norm": 6.9673184990191315, "learning_rate": 6.9008481898824206e-06, "loss": 1.3024, "step": 3173 }, { "epoch": 0.39, "grad_norm": 6.97299843663385, "learning_rate": 6.89898492221185e-06, "loss": 1.2187, "step": 3174 }, { "epoch": 0.39, "grad_norm": 6.860008312534382, "learning_rate": 6.897121346325552e-06, "loss": 1.4897, "step": 3175 }, { "epoch": 0.4, "grad_norm": 6.537331200479758, "learning_rate": 6.895257462525997e-06, "loss": 1.2094, "step": 3176 }, { "epoch": 0.4, "grad_norm": 7.058026950448199, "learning_rate": 6.893393271115703e-06, "loss": 1.2039, "step": 3177 }, { "epoch": 0.4, "grad_norm": 7.173153173977404, "learning_rate": 6.891528772397237e-06, "loss": 1.2502, "step": 3178 }, { "epoch": 0.4, "grad_norm": 7.63219277333443, "learning_rate": 6.889663966673219e-06, "loss": 1.2089, "step": 3179 }, { "epoch": 0.4, "grad_norm": 7.183391621932782, "learning_rate": 6.8877988542463195e-06, "loss": 1.2511, "step": 3180 }, { "epoch": 0.4, "grad_norm": 6.961787774703828, "learning_rate": 6.885933435419252e-06, "loss": 1.4187, "step": 3181 }, { "epoch": 0.4, "grad_norm": 6.549485094436257, "learning_rate": 6.884067710494788e-06, "loss": 1.1261, "step": 3182 }, { "epoch": 0.4, "grad_norm": 7.741061583971592, "learning_rate": 6.882201679775742e-06, "loss": 1.2449, "step": 3183 }, { "epoch": 0.4, "grad_norm": 6.508769872840415, "learning_rate": 6.880335343564982e-06, "loss": 1.279, "step": 3184 }, { "epoch": 0.4, "grad_norm": 6.833510340884731, "learning_rate": 6.878468702165427e-06, "loss": 1.2017, "step": 3185 }, { "epoch": 0.4, "grad_norm": 6.579732780776143, "learning_rate": 6.876601755880041e-06, "loss": 1.1082, "step": 3186 }, { "epoch": 0.4, "grad_norm": 7.345433099213599, "learning_rate": 6.8747345050118395e-06, "loss": 1.1967, "step": 3187 }, { "epoch": 0.4, "grad_norm": 7.148363882339698, "learning_rate": 6.872866949863888e-06, "loss": 1.2264, "step": 3188 }, { "epoch": 0.4, "grad_norm": 7.300914710380571, "learning_rate": 6.870999090739301e-06, "loss": 1.4715, "step": 3189 }, { "epoch": 0.4, "grad_norm": 7.295086280288811, "learning_rate": 6.869130927941243e-06, "loss": 1.3723, "step": 3190 }, { "epoch": 0.4, "grad_norm": 7.028325722637927, "learning_rate": 6.8672624617729264e-06, "loss": 1.1473, "step": 3191 }, { "epoch": 0.4, "grad_norm": 6.2746000287458354, "learning_rate": 6.865393692537614e-06, "loss": 0.9237, "step": 3192 }, { "epoch": 0.4, "grad_norm": 8.261025419836464, "learning_rate": 6.863524620538618e-06, "loss": 1.1742, "step": 3193 }, { "epoch": 0.4, "grad_norm": 6.902895154924453, "learning_rate": 6.861655246079295e-06, "loss": 1.1887, "step": 3194 }, { "epoch": 0.4, "grad_norm": 7.406565438353667, "learning_rate": 6.8597855694630615e-06, "loss": 1.3507, "step": 3195 }, { "epoch": 0.4, "grad_norm": 7.031917094112791, "learning_rate": 6.857915590993371e-06, "loss": 1.3086, "step": 3196 }, { "epoch": 0.4, "grad_norm": 6.879490079143116, "learning_rate": 6.856045310973733e-06, "loss": 0.8665, "step": 3197 }, { "epoch": 0.4, "grad_norm": 7.081356469819483, "learning_rate": 6.854174729707706e-06, "loss": 1.3156, "step": 3198 }, { "epoch": 0.4, "grad_norm": 8.052491786113691, "learning_rate": 6.852303847498891e-06, "loss": 1.3338, "step": 3199 }, { "epoch": 0.4, "grad_norm": 7.645712893925105, "learning_rate": 6.850432664650948e-06, "loss": 1.2527, "step": 3200 }, { "epoch": 0.4, "grad_norm": 7.592271873222584, "learning_rate": 6.848561181467578e-06, "loss": 1.2324, "step": 3201 }, { "epoch": 0.4, "grad_norm": 6.6020262675098484, "learning_rate": 6.846689398252532e-06, "loss": 1.0131, "step": 3202 }, { "epoch": 0.4, "grad_norm": 8.072541529796117, "learning_rate": 6.844817315309611e-06, "loss": 1.3571, "step": 3203 }, { "epoch": 0.4, "grad_norm": 8.59757574321529, "learning_rate": 6.8429449329426665e-06, "loss": 1.4284, "step": 3204 }, { "epoch": 0.4, "grad_norm": 8.224683050275582, "learning_rate": 6.8410722514555935e-06, "loss": 0.9768, "step": 3205 }, { "epoch": 0.4, "grad_norm": 6.695007160246424, "learning_rate": 6.8391992711523415e-06, "loss": 1.13, "step": 3206 }, { "epoch": 0.4, "grad_norm": 7.021097192321507, "learning_rate": 6.837325992336903e-06, "loss": 1.2459, "step": 3207 }, { "epoch": 0.4, "grad_norm": 6.4720862277180204, "learning_rate": 6.8354524153133225e-06, "loss": 1.4834, "step": 3208 }, { "epoch": 0.4, "grad_norm": 7.519493017529687, "learning_rate": 6.833578540385694e-06, "loss": 1.6792, "step": 3209 }, { "epoch": 0.4, "grad_norm": 6.209042863706544, "learning_rate": 6.831704367858154e-06, "loss": 1.0509, "step": 3210 }, { "epoch": 0.4, "grad_norm": 6.829559105554859, "learning_rate": 6.829829898034892e-06, "loss": 1.4153, "step": 3211 }, { "epoch": 0.4, "grad_norm": 6.737182225239016, "learning_rate": 6.827955131220146e-06, "loss": 1.2493, "step": 3212 }, { "epoch": 0.4, "grad_norm": 6.884634049430163, "learning_rate": 6.826080067718201e-06, "loss": 0.9696, "step": 3213 }, { "epoch": 0.4, "grad_norm": 6.698354110938295, "learning_rate": 6.82420470783339e-06, "loss": 1.1881, "step": 3214 }, { "epoch": 0.4, "grad_norm": 6.878763712507927, "learning_rate": 6.822329051870094e-06, "loss": 1.4575, "step": 3215 }, { "epoch": 0.4, "grad_norm": 6.686358333632487, "learning_rate": 6.82045310013274e-06, "loss": 1.2271, "step": 3216 }, { "epoch": 0.4, "grad_norm": 7.313982009623097, "learning_rate": 6.818576852925809e-06, "loss": 1.3286, "step": 3217 }, { "epoch": 0.4, "grad_norm": 7.256077679824202, "learning_rate": 6.816700310553826e-06, "loss": 1.5289, "step": 3218 }, { "epoch": 0.4, "grad_norm": 7.676066353259249, "learning_rate": 6.814823473321361e-06, "loss": 1.3368, "step": 3219 }, { "epoch": 0.4, "grad_norm": 7.8618729108043395, "learning_rate": 6.812946341533038e-06, "loss": 1.5736, "step": 3220 }, { "epoch": 0.4, "grad_norm": 7.0626869555847, "learning_rate": 6.811068915493524e-06, "loss": 1.2521, "step": 3221 }, { "epoch": 0.4, "grad_norm": 7.120960171659196, "learning_rate": 6.809191195507538e-06, "loss": 1.1196, "step": 3222 }, { "epoch": 0.4, "grad_norm": 5.6084149499382665, "learning_rate": 6.807313181879841e-06, "loss": 1.1127, "step": 3223 }, { "epoch": 0.4, "grad_norm": 7.481929916799767, "learning_rate": 6.805434874915249e-06, "loss": 1.3583, "step": 3224 }, { "epoch": 0.4, "grad_norm": 7.559954370136801, "learning_rate": 6.8035562749186165e-06, "loss": 1.4591, "step": 3225 }, { "epoch": 0.4, "grad_norm": 6.594521522728597, "learning_rate": 6.801677382194856e-06, "loss": 1.2588, "step": 3226 }, { "epoch": 0.4, "grad_norm": 7.749792646021396, "learning_rate": 6.799798197048918e-06, "loss": 1.4729, "step": 3227 }, { "epoch": 0.4, "grad_norm": 7.069564368923474, "learning_rate": 6.7979187197858064e-06, "loss": 1.3117, "step": 3228 }, { "epoch": 0.4, "grad_norm": 7.869337064543839, "learning_rate": 6.796038950710571e-06, "loss": 1.1603, "step": 3229 }, { "epoch": 0.4, "grad_norm": 7.326564114636915, "learning_rate": 6.794158890128308e-06, "loss": 1.1603, "step": 3230 }, { "epoch": 0.4, "grad_norm": 8.315226167564296, "learning_rate": 6.792278538344161e-06, "loss": 1.2588, "step": 3231 }, { "epoch": 0.4, "grad_norm": 7.364203616382472, "learning_rate": 6.790397895663323e-06, "loss": 1.2234, "step": 3232 }, { "epoch": 0.4, "grad_norm": 7.075397236253644, "learning_rate": 6.788516962391032e-06, "loss": 1.3789, "step": 3233 }, { "epoch": 0.4, "grad_norm": 6.762450146728834, "learning_rate": 6.786635738832574e-06, "loss": 1.3212, "step": 3234 }, { "epoch": 0.4, "grad_norm": 7.789739620447631, "learning_rate": 6.784754225293282e-06, "loss": 1.0042, "step": 3235 }, { "epoch": 0.4, "grad_norm": 6.3209611861473665, "learning_rate": 6.782872422078534e-06, "loss": 1.3158, "step": 3236 }, { "epoch": 0.4, "grad_norm": 6.907525315371903, "learning_rate": 6.780990329493762e-06, "loss": 1.1065, "step": 3237 }, { "epoch": 0.4, "grad_norm": 7.049239998547458, "learning_rate": 6.779107947844434e-06, "loss": 1.6311, "step": 3238 }, { "epoch": 0.4, "grad_norm": 6.890465870177423, "learning_rate": 6.777225277436076e-06, "loss": 1.2577, "step": 3239 }, { "epoch": 0.4, "grad_norm": 12.881212726937518, "learning_rate": 6.775342318574252e-06, "loss": 1.0433, "step": 3240 }, { "epoch": 0.4, "grad_norm": 6.864647444448446, "learning_rate": 6.773459071564578e-06, "loss": 1.1577, "step": 3241 }, { "epoch": 0.4, "grad_norm": 8.203263435663612, "learning_rate": 6.771575536712715e-06, "loss": 1.3358, "step": 3242 }, { "epoch": 0.4, "grad_norm": 6.89353444977224, "learning_rate": 6.769691714324375e-06, "loss": 1.2419, "step": 3243 }, { "epoch": 0.4, "grad_norm": 7.111386723225005, "learning_rate": 6.7678076047053056e-06, "loss": 1.3847, "step": 3244 }, { "epoch": 0.4, "grad_norm": 6.930313211593933, "learning_rate": 6.765923208161313e-06, "loss": 1.2855, "step": 3245 }, { "epoch": 0.4, "grad_norm": 7.076975885486087, "learning_rate": 6.764038524998246e-06, "loss": 1.2337, "step": 3246 }, { "epoch": 0.4, "grad_norm": 6.696570708287766, "learning_rate": 6.7621535555219955e-06, "loss": 1.1782, "step": 3247 }, { "epoch": 0.4, "grad_norm": 7.629823136481959, "learning_rate": 6.760268300038504e-06, "loss": 1.315, "step": 3248 }, { "epoch": 0.4, "grad_norm": 6.59591938457593, "learning_rate": 6.758382758853759e-06, "loss": 1.3219, "step": 3249 }, { "epoch": 0.4, "grad_norm": 6.466401831796938, "learning_rate": 6.756496932273795e-06, "loss": 1.0433, "step": 3250 }, { "epoch": 0.4, "grad_norm": 7.4951773971265, "learning_rate": 6.754610820604691e-06, "loss": 1.4142, "step": 3251 }, { "epoch": 0.4, "grad_norm": 6.85326791377405, "learning_rate": 6.752724424152575e-06, "loss": 1.5103, "step": 3252 }, { "epoch": 0.4, "grad_norm": 6.853298372641397, "learning_rate": 6.750837743223615e-06, "loss": 1.0098, "step": 3253 }, { "epoch": 0.4, "grad_norm": 6.301980349737015, "learning_rate": 6.7489507781240345e-06, "loss": 1.0541, "step": 3254 }, { "epoch": 0.4, "grad_norm": 6.721092776679157, "learning_rate": 6.747063529160097e-06, "loss": 1.287, "step": 3255 }, { "epoch": 0.4, "grad_norm": 6.863526275758656, "learning_rate": 6.745175996638114e-06, "loss": 1.3614, "step": 3256 }, { "epoch": 0.41, "grad_norm": 6.950601407236796, "learning_rate": 6.7432881808644405e-06, "loss": 1.1346, "step": 3257 }, { "epoch": 0.41, "grad_norm": 7.042927855874705, "learning_rate": 6.741400082145481e-06, "loss": 1.4319, "step": 3258 }, { "epoch": 0.41, "grad_norm": 7.55547094481074, "learning_rate": 6.739511700787683e-06, "loss": 1.4455, "step": 3259 }, { "epoch": 0.41, "grad_norm": 8.206672798731512, "learning_rate": 6.737623037097542e-06, "loss": 1.5001, "step": 3260 }, { "epoch": 0.41, "grad_norm": 7.4677147848946355, "learning_rate": 6.735734091381599e-06, "loss": 1.3571, "step": 3261 }, { "epoch": 0.41, "grad_norm": 6.894435938284899, "learning_rate": 6.73384486394644e-06, "loss": 1.1721, "step": 3262 }, { "epoch": 0.41, "grad_norm": 6.958349555184033, "learning_rate": 6.731955355098697e-06, "loss": 1.1352, "step": 3263 }, { "epoch": 0.41, "grad_norm": 7.024237269100211, "learning_rate": 6.730065565145047e-06, "loss": 1.23, "step": 3264 }, { "epoch": 0.41, "grad_norm": 6.971749315461588, "learning_rate": 6.728175494392215e-06, "loss": 1.5053, "step": 3265 }, { "epoch": 0.41, "grad_norm": 6.90106765238463, "learning_rate": 6.726285143146969e-06, "loss": 1.1058, "step": 3266 }, { "epoch": 0.41, "grad_norm": 5.548557932737351, "learning_rate": 6.724394511716123e-06, "loss": 0.8456, "step": 3267 }, { "epoch": 0.41, "grad_norm": 7.682098097229667, "learning_rate": 6.722503600406536e-06, "loss": 1.4046, "step": 3268 }, { "epoch": 0.41, "grad_norm": 6.452261458363907, "learning_rate": 6.720612409525115e-06, "loss": 1.2225, "step": 3269 }, { "epoch": 0.41, "grad_norm": 7.148247907422356, "learning_rate": 6.718720939378811e-06, "loss": 1.3898, "step": 3270 }, { "epoch": 0.41, "grad_norm": 8.193710000725947, "learning_rate": 6.716829190274618e-06, "loss": 1.5863, "step": 3271 }, { "epoch": 0.41, "grad_norm": 7.0424341603003775, "learning_rate": 6.714937162519582e-06, "loss": 1.2964, "step": 3272 }, { "epoch": 0.41, "grad_norm": 7.0279441767111415, "learning_rate": 6.713044856420781e-06, "loss": 1.4294, "step": 3273 }, { "epoch": 0.41, "grad_norm": 6.132086698382137, "learning_rate": 6.711152272285355e-06, "loss": 0.8062, "step": 3274 }, { "epoch": 0.41, "grad_norm": 7.098049423925363, "learning_rate": 6.7092594104204775e-06, "loss": 1.4013, "step": 3275 }, { "epoch": 0.41, "grad_norm": 7.188292533908898, "learning_rate": 6.707366271133371e-06, "loss": 1.2239, "step": 3276 }, { "epoch": 0.41, "grad_norm": 7.839209609216187, "learning_rate": 6.705472854731301e-06, "loss": 1.5116, "step": 3277 }, { "epoch": 0.41, "grad_norm": 8.134504161877715, "learning_rate": 6.703579161521581e-06, "loss": 1.647, "step": 3278 }, { "epoch": 0.41, "grad_norm": 7.223534316500262, "learning_rate": 6.7016851918115676e-06, "loss": 1.1778, "step": 3279 }, { "epoch": 0.41, "grad_norm": 6.806392378797176, "learning_rate": 6.699790945908662e-06, "loss": 1.0587, "step": 3280 }, { "epoch": 0.41, "grad_norm": 6.947039601585861, "learning_rate": 6.6978964241203105e-06, "loss": 1.2952, "step": 3281 }, { "epoch": 0.41, "grad_norm": 6.830908052914101, "learning_rate": 6.696001626754005e-06, "loss": 0.951, "step": 3282 }, { "epoch": 0.41, "grad_norm": 5.972534723360004, "learning_rate": 6.694106554117283e-06, "loss": 1.1029, "step": 3283 }, { "epoch": 0.41, "grad_norm": 6.40159475297843, "learning_rate": 6.692211206517723e-06, "loss": 1.1979, "step": 3284 }, { "epoch": 0.41, "grad_norm": 6.190946253887046, "learning_rate": 6.690315584262951e-06, "loss": 1.3368, "step": 3285 }, { "epoch": 0.41, "grad_norm": 7.300475354938414, "learning_rate": 6.688419687660636e-06, "loss": 1.5053, "step": 3286 }, { "epoch": 0.41, "grad_norm": 11.614077179381317, "learning_rate": 6.686523517018494e-06, "loss": 1.1181, "step": 3287 }, { "epoch": 0.41, "grad_norm": 6.610653221529385, "learning_rate": 6.684627072644285e-06, "loss": 1.4409, "step": 3288 }, { "epoch": 0.41, "grad_norm": 6.725413451237029, "learning_rate": 6.682730354845809e-06, "loss": 1.2462, "step": 3289 }, { "epoch": 0.41, "grad_norm": 7.893137649428737, "learning_rate": 6.680833363930918e-06, "loss": 1.6958, "step": 3290 }, { "epoch": 0.41, "grad_norm": 6.25262526125293, "learning_rate": 6.6789361002075e-06, "loss": 1.1361, "step": 3291 }, { "epoch": 0.41, "grad_norm": 6.389578413966869, "learning_rate": 6.677038563983493e-06, "loss": 0.8426, "step": 3292 }, { "epoch": 0.41, "grad_norm": 10.686280102298644, "learning_rate": 6.675140755566878e-06, "loss": 1.4477, "step": 3293 }, { "epoch": 0.41, "grad_norm": 7.379340154738099, "learning_rate": 6.67324267526568e-06, "loss": 1.1478, "step": 3294 }, { "epoch": 0.41, "grad_norm": 7.848728648612662, "learning_rate": 6.671344323387967e-06, "loss": 1.2298, "step": 3295 }, { "epoch": 0.41, "grad_norm": 6.3200716390402345, "learning_rate": 6.669445700241852e-06, "loss": 1.4042, "step": 3296 }, { "epoch": 0.41, "grad_norm": 6.708710096151954, "learning_rate": 6.667546806135493e-06, "loss": 1.135, "step": 3297 }, { "epoch": 0.41, "grad_norm": 7.239139338218638, "learning_rate": 6.665647641377089e-06, "loss": 1.0657, "step": 3298 }, { "epoch": 0.41, "grad_norm": 6.946231776969581, "learning_rate": 6.6637482062748884e-06, "loss": 1.3175, "step": 3299 }, { "epoch": 0.41, "grad_norm": 8.510505316333864, "learning_rate": 6.661848501137177e-06, "loss": 1.5041, "step": 3300 }, { "epoch": 0.41, "grad_norm": 6.760735533772271, "learning_rate": 6.6599485262722885e-06, "loss": 1.2747, "step": 3301 }, { "epoch": 0.41, "grad_norm": 7.1580697519385765, "learning_rate": 6.658048281988598e-06, "loss": 1.5863, "step": 3302 }, { "epoch": 0.41, "grad_norm": 7.002927580049122, "learning_rate": 6.6561477685945275e-06, "loss": 1.0708, "step": 3303 }, { "epoch": 0.41, "grad_norm": 7.800574760514426, "learning_rate": 6.654246986398539e-06, "loss": 1.1512, "step": 3304 }, { "epoch": 0.41, "grad_norm": 8.469311894271776, "learning_rate": 6.652345935709144e-06, "loss": 1.3986, "step": 3305 }, { "epoch": 0.41, "grad_norm": 7.39977734049119, "learning_rate": 6.6504446168348864e-06, "loss": 1.1687, "step": 3306 }, { "epoch": 0.41, "grad_norm": 7.020291534611604, "learning_rate": 6.648543030084368e-06, "loss": 1.8142, "step": 3307 }, { "epoch": 0.41, "grad_norm": 7.322457523160402, "learning_rate": 6.646641175766222e-06, "loss": 1.4363, "step": 3308 }, { "epoch": 0.41, "grad_norm": 6.99836579363666, "learning_rate": 6.644739054189132e-06, "loss": 1.2755, "step": 3309 }, { "epoch": 0.41, "grad_norm": 7.603135352255211, "learning_rate": 6.642836665661821e-06, "loss": 1.5371, "step": 3310 }, { "epoch": 0.41, "grad_norm": 7.424628079243526, "learning_rate": 6.64093401049306e-06, "loss": 1.118, "step": 3311 }, { "epoch": 0.41, "grad_norm": 6.428967930063724, "learning_rate": 6.639031088991658e-06, "loss": 1.1322, "step": 3312 }, { "epoch": 0.41, "grad_norm": 6.5968651709266215, "learning_rate": 6.637127901466471e-06, "loss": 1.0842, "step": 3313 }, { "epoch": 0.41, "grad_norm": 7.8855759572613495, "learning_rate": 6.635224448226397e-06, "loss": 1.5024, "step": 3314 }, { "epoch": 0.41, "grad_norm": 8.644887350254436, "learning_rate": 6.633320729580376e-06, "loss": 1.5296, "step": 3315 }, { "epoch": 0.41, "grad_norm": 7.202360955239574, "learning_rate": 6.631416745837391e-06, "loss": 1.2098, "step": 3316 }, { "epoch": 0.41, "grad_norm": 6.02216108545717, "learning_rate": 6.6295124973064715e-06, "loss": 1.2779, "step": 3317 }, { "epoch": 0.41, "grad_norm": 6.418165551565493, "learning_rate": 6.627607984296687e-06, "loss": 1.2113, "step": 3318 }, { "epoch": 0.41, "grad_norm": 7.667730728609564, "learning_rate": 6.625703207117149e-06, "loss": 1.5023, "step": 3319 }, { "epoch": 0.41, "grad_norm": 5.493511171297822, "learning_rate": 6.623798166077014e-06, "loss": 1.0976, "step": 3320 }, { "epoch": 0.41, "grad_norm": 5.9541466749393965, "learning_rate": 6.6218928614854815e-06, "loss": 1.4827, "step": 3321 }, { "epoch": 0.41, "grad_norm": 7.840618103464141, "learning_rate": 6.6199872936517915e-06, "loss": 1.6491, "step": 3322 }, { "epoch": 0.41, "grad_norm": 6.445048407558466, "learning_rate": 6.618081462885231e-06, "loss": 1.3283, "step": 3323 }, { "epoch": 0.41, "grad_norm": 7.8246124088040405, "learning_rate": 6.616175369495124e-06, "loss": 1.2539, "step": 3324 }, { "epoch": 0.41, "grad_norm": 6.835147814016342, "learning_rate": 6.61426901379084e-06, "loss": 1.3278, "step": 3325 }, { "epoch": 0.41, "grad_norm": 7.8102936735711594, "learning_rate": 6.612362396081792e-06, "loss": 1.1629, "step": 3326 }, { "epoch": 0.41, "grad_norm": 6.867589979069988, "learning_rate": 6.610455516677435e-06, "loss": 1.2181, "step": 3327 }, { "epoch": 0.41, "grad_norm": 7.491671852091165, "learning_rate": 6.608548375887265e-06, "loss": 1.6301, "step": 3328 }, { "epoch": 0.41, "grad_norm": 7.50394563797532, "learning_rate": 6.606640974020824e-06, "loss": 1.1942, "step": 3329 }, { "epoch": 0.41, "grad_norm": 7.516642002844632, "learning_rate": 6.6047333113876885e-06, "loss": 0.9544, "step": 3330 }, { "epoch": 0.41, "grad_norm": 7.890658954440843, "learning_rate": 6.602825388297487e-06, "loss": 1.0776, "step": 3331 }, { "epoch": 0.41, "grad_norm": 7.100513156799813, "learning_rate": 6.600917205059887e-06, "loss": 1.3363, "step": 3332 }, { "epoch": 0.41, "grad_norm": 6.7642494816385, "learning_rate": 6.599008761984593e-06, "loss": 1.342, "step": 3333 }, { "epoch": 0.41, "grad_norm": 6.9402290299387674, "learning_rate": 6.597100059381359e-06, "loss": 1.2436, "step": 3334 }, { "epoch": 0.41, "grad_norm": 6.351175565856537, "learning_rate": 6.595191097559976e-06, "loss": 1.0811, "step": 3335 }, { "epoch": 0.41, "grad_norm": 6.392832069166369, "learning_rate": 6.593281876830281e-06, "loss": 0.9747, "step": 3336 }, { "epoch": 0.42, "grad_norm": 6.921029956781781, "learning_rate": 6.591372397502149e-06, "loss": 1.2501, "step": 3337 }, { "epoch": 0.42, "grad_norm": 7.769183890018873, "learning_rate": 6.589462659885501e-06, "loss": 1.3825, "step": 3338 }, { "epoch": 0.42, "grad_norm": 9.625967278701395, "learning_rate": 6.587552664290296e-06, "loss": 1.1333, "step": 3339 }, { "epoch": 0.42, "grad_norm": 7.29933939228196, "learning_rate": 6.585642411026538e-06, "loss": 1.0828, "step": 3340 }, { "epoch": 0.42, "grad_norm": 8.0307453257761, "learning_rate": 6.583731900404271e-06, "loss": 1.4579, "step": 3341 }, { "epoch": 0.42, "grad_norm": 206.28560352517195, "learning_rate": 6.581821132733583e-06, "loss": 1.0781, "step": 3342 }, { "epoch": 0.42, "grad_norm": 18.101774353510592, "learning_rate": 6.5799101083246e-06, "loss": 1.3129, "step": 3343 }, { "epoch": 0.42, "grad_norm": 8.109694116999016, "learning_rate": 6.5779988274874925e-06, "loss": 1.4171, "step": 3344 }, { "epoch": 0.42, "grad_norm": 6.94036580940106, "learning_rate": 6.576087290532474e-06, "loss": 1.2076, "step": 3345 }, { "epoch": 0.42, "grad_norm": 7.599724861585616, "learning_rate": 6.574175497769794e-06, "loss": 1.0203, "step": 3346 }, { "epoch": 0.42, "grad_norm": 7.744272759579828, "learning_rate": 6.572263449509749e-06, "loss": 1.8873, "step": 3347 }, { "epoch": 0.42, "grad_norm": 6.555595912498585, "learning_rate": 6.570351146062675e-06, "loss": 0.7439, "step": 3348 }, { "epoch": 0.42, "grad_norm": 6.623966375297591, "learning_rate": 6.568438587738948e-06, "loss": 1.1603, "step": 3349 }, { "epoch": 0.42, "grad_norm": 6.845678230939311, "learning_rate": 6.566525774848988e-06, "loss": 1.1247, "step": 3350 }, { "epoch": 0.42, "grad_norm": 7.009603077475582, "learning_rate": 6.564612707703256e-06, "loss": 1.3261, "step": 3351 }, { "epoch": 0.42, "grad_norm": 7.5025337322918455, "learning_rate": 6.562699386612252e-06, "loss": 1.0199, "step": 3352 }, { "epoch": 0.42, "grad_norm": 7.172317196814871, "learning_rate": 6.5607858118865185e-06, "loss": 1.2093, "step": 3353 }, { "epoch": 0.42, "grad_norm": 7.687287185773236, "learning_rate": 6.558871983836639e-06, "loss": 1.5844, "step": 3354 }, { "epoch": 0.42, "grad_norm": 6.602875327880276, "learning_rate": 6.556957902773239e-06, "loss": 1.1162, "step": 3355 }, { "epoch": 0.42, "grad_norm": 7.505469242744738, "learning_rate": 6.555043569006986e-06, "loss": 1.5333, "step": 3356 }, { "epoch": 0.42, "grad_norm": 7.976470064957901, "learning_rate": 6.553128982848584e-06, "loss": 1.4716, "step": 3357 }, { "epoch": 0.42, "grad_norm": 8.60978716202553, "learning_rate": 6.551214144608782e-06, "loss": 1.6633, "step": 3358 }, { "epoch": 0.42, "grad_norm": 8.826549897997598, "learning_rate": 6.549299054598368e-06, "loss": 1.2351, "step": 3359 }, { "epoch": 0.42, "grad_norm": 6.637684052960058, "learning_rate": 6.547383713128175e-06, "loss": 1.5098, "step": 3360 }, { "epoch": 0.42, "grad_norm": 7.123341428082485, "learning_rate": 6.545468120509068e-06, "loss": 1.4817, "step": 3361 }, { "epoch": 0.42, "grad_norm": 7.938265249676804, "learning_rate": 6.5435522770519645e-06, "loss": 1.7713, "step": 3362 }, { "epoch": 0.42, "grad_norm": 7.401978591851219, "learning_rate": 6.541636183067811e-06, "loss": 1.2853, "step": 3363 }, { "epoch": 0.42, "grad_norm": 8.689493657215108, "learning_rate": 6.539719838867604e-06, "loss": 1.1306, "step": 3364 }, { "epoch": 0.42, "grad_norm": 7.0801050604837155, "learning_rate": 6.537803244762373e-06, "loss": 1.6912, "step": 3365 }, { "epoch": 0.42, "grad_norm": 6.871200664699808, "learning_rate": 6.535886401063196e-06, "loss": 1.3999, "step": 3366 }, { "epoch": 0.42, "grad_norm": 6.5278230762286285, "learning_rate": 6.533969308081185e-06, "loss": 1.0703, "step": 3367 }, { "epoch": 0.42, "grad_norm": 6.624858559452185, "learning_rate": 6.532051966127493e-06, "loss": 1.4991, "step": 3368 }, { "epoch": 0.42, "grad_norm": 6.880100720806356, "learning_rate": 6.530134375513319e-06, "loss": 1.1089, "step": 3369 }, { "epoch": 0.42, "grad_norm": 6.816670334644742, "learning_rate": 6.528216536549896e-06, "loss": 1.2964, "step": 3370 }, { "epoch": 0.42, "grad_norm": 7.339571818090004, "learning_rate": 6.526298449548502e-06, "loss": 1.594, "step": 3371 }, { "epoch": 0.42, "grad_norm": 7.142406047176489, "learning_rate": 6.5243801148204524e-06, "loss": 1.4402, "step": 3372 }, { "epoch": 0.42, "grad_norm": 6.96197498204895, "learning_rate": 6.5224615326771e-06, "loss": 1.5921, "step": 3373 }, { "epoch": 0.42, "grad_norm": 5.744185238446315, "learning_rate": 6.520542703429844e-06, "loss": 1.1844, "step": 3374 }, { "epoch": 0.42, "grad_norm": 8.731342077497874, "learning_rate": 6.518623627390123e-06, "loss": 1.4439, "step": 3375 }, { "epoch": 0.42, "grad_norm": 5.85309447519222, "learning_rate": 6.51670430486941e-06, "loss": 0.9569, "step": 3376 }, { "epoch": 0.42, "grad_norm": 7.008819469030888, "learning_rate": 6.514784736179222e-06, "loss": 1.2482, "step": 3377 }, { "epoch": 0.42, "grad_norm": 5.684079425848082, "learning_rate": 6.512864921631121e-06, "loss": 0.8552, "step": 3378 }, { "epoch": 0.42, "grad_norm": 6.421692456790443, "learning_rate": 6.510944861536697e-06, "loss": 0.9807, "step": 3379 }, { "epoch": 0.42, "grad_norm": 7.536648854283695, "learning_rate": 6.509024556207588e-06, "loss": 1.445, "step": 3380 }, { "epoch": 0.42, "grad_norm": 8.245753140967127, "learning_rate": 6.507104005955473e-06, "loss": 1.6461, "step": 3381 }, { "epoch": 0.42, "grad_norm": 7.408993445312183, "learning_rate": 6.505183211092063e-06, "loss": 1.1999, "step": 3382 }, { "epoch": 0.42, "grad_norm": 8.552018810055738, "learning_rate": 6.503262171929117e-06, "loss": 1.4716, "step": 3383 }, { "epoch": 0.42, "grad_norm": 6.877413402108163, "learning_rate": 6.501340888778431e-06, "loss": 1.1532, "step": 3384 }, { "epoch": 0.42, "grad_norm": 6.788198887705053, "learning_rate": 6.499419361951837e-06, "loss": 1.274, "step": 3385 }, { "epoch": 0.42, "grad_norm": 7.137836963016958, "learning_rate": 6.497497591761213e-06, "loss": 1.1775, "step": 3386 }, { "epoch": 0.42, "grad_norm": 7.579192531844159, "learning_rate": 6.4955755785184674e-06, "loss": 1.4524, "step": 3387 }, { "epoch": 0.42, "grad_norm": 6.351447010658591, "learning_rate": 6.493653322535558e-06, "loss": 1.2828, "step": 3388 }, { "epoch": 0.42, "grad_norm": 6.6435983114500665, "learning_rate": 6.491730824124478e-06, "loss": 1.2639, "step": 3389 }, { "epoch": 0.42, "grad_norm": 7.012981539179217, "learning_rate": 6.489808083597258e-06, "loss": 1.3358, "step": 3390 }, { "epoch": 0.42, "grad_norm": 7.2889511591871345, "learning_rate": 6.487885101265967e-06, "loss": 1.3079, "step": 3391 }, { "epoch": 0.42, "grad_norm": 7.186211429513817, "learning_rate": 6.4859618774427195e-06, "loss": 1.6005, "step": 3392 }, { "epoch": 0.42, "grad_norm": 7.102499508870087, "learning_rate": 6.4840384124396636e-06, "loss": 1.4014, "step": 3393 }, { "epoch": 0.42, "grad_norm": 7.917351179411186, "learning_rate": 6.482114706568987e-06, "loss": 1.205, "step": 3394 }, { "epoch": 0.42, "grad_norm": 7.143470292968464, "learning_rate": 6.480190760142921e-06, "loss": 1.258, "step": 3395 }, { "epoch": 0.42, "grad_norm": 7.887989671303191, "learning_rate": 6.47826657347373e-06, "loss": 1.0497, "step": 3396 }, { "epoch": 0.42, "grad_norm": 7.281540588797368, "learning_rate": 6.476342146873722e-06, "loss": 1.262, "step": 3397 }, { "epoch": 0.42, "grad_norm": 7.489970976039763, "learning_rate": 6.474417480655241e-06, "loss": 1.5079, "step": 3398 }, { "epoch": 0.42, "grad_norm": 5.231099935128385, "learning_rate": 6.472492575130671e-06, "loss": 1.2038, "step": 3399 }, { "epoch": 0.42, "grad_norm": 7.714820678614393, "learning_rate": 6.470567430612433e-06, "loss": 1.3428, "step": 3400 }, { "epoch": 0.42, "grad_norm": 7.124529871388006, "learning_rate": 6.468642047412992e-06, "loss": 1.4215, "step": 3401 }, { "epoch": 0.42, "grad_norm": 6.772353917609705, "learning_rate": 6.466716425844848e-06, "loss": 1.254, "step": 3402 }, { "epoch": 0.42, "grad_norm": 7.358487063474912, "learning_rate": 6.464790566220535e-06, "loss": 1.2876, "step": 3403 }, { "epoch": 0.42, "grad_norm": 7.734503097300807, "learning_rate": 6.4628644688526385e-06, "loss": 1.3482, "step": 3404 }, { "epoch": 0.42, "grad_norm": 7.762576470832001, "learning_rate": 6.4609381340537695e-06, "loss": 1.5358, "step": 3405 }, { "epoch": 0.42, "grad_norm": 6.788129214873334, "learning_rate": 6.459011562136582e-06, "loss": 1.2561, "step": 3406 }, { "epoch": 0.42, "grad_norm": 7.119578041770831, "learning_rate": 6.457084753413772e-06, "loss": 1.4594, "step": 3407 }, { "epoch": 0.42, "grad_norm": 6.4642217415074095, "learning_rate": 6.455157708198071e-06, "loss": 1.1696, "step": 3408 }, { "epoch": 0.42, "grad_norm": 6.766565628244938, "learning_rate": 6.453230426802247e-06, "loss": 1.3866, "step": 3409 }, { "epoch": 0.42, "grad_norm": 7.973714450280053, "learning_rate": 6.451302909539111e-06, "loss": 1.5389, "step": 3410 }, { "epoch": 0.42, "grad_norm": 7.306013151576965, "learning_rate": 6.449375156721507e-06, "loss": 1.4441, "step": 3411 }, { "epoch": 0.42, "grad_norm": 6.399383094869218, "learning_rate": 6.447447168662322e-06, "loss": 1.0843, "step": 3412 }, { "epoch": 0.42, "grad_norm": 6.4387548348698935, "learning_rate": 6.44551894567448e-06, "loss": 1.1619, "step": 3413 }, { "epoch": 0.42, "grad_norm": 6.996312123770963, "learning_rate": 6.443590488070939e-06, "loss": 1.3048, "step": 3414 }, { "epoch": 0.42, "grad_norm": 6.933723651074584, "learning_rate": 6.4416617961647e-06, "loss": 1.4767, "step": 3415 }, { "epoch": 0.42, "grad_norm": 6.692034261799542, "learning_rate": 6.4397328702688e-06, "loss": 1.3302, "step": 3416 }, { "epoch": 0.42, "grad_norm": 6.995078368254264, "learning_rate": 6.437803710696315e-06, "loss": 1.5849, "step": 3417 }, { "epoch": 0.43, "grad_norm": 5.818071503040927, "learning_rate": 6.435874317760358e-06, "loss": 0.9782, "step": 3418 }, { "epoch": 0.43, "grad_norm": 6.576658552093189, "learning_rate": 6.433944691774081e-06, "loss": 1.2867, "step": 3419 }, { "epoch": 0.43, "grad_norm": 7.146600344772545, "learning_rate": 6.432014833050671e-06, "loss": 1.3306, "step": 3420 }, { "epoch": 0.43, "grad_norm": 6.88443420040618, "learning_rate": 6.430084741903355e-06, "loss": 1.318, "step": 3421 }, { "epoch": 0.43, "grad_norm": 8.447922536285889, "learning_rate": 6.428154418645398e-06, "loss": 1.4095, "step": 3422 }, { "epoch": 0.43, "grad_norm": 6.4269671256347225, "learning_rate": 6.426223863590104e-06, "loss": 1.3724, "step": 3423 }, { "epoch": 0.43, "grad_norm": 6.1997455447524645, "learning_rate": 6.424293077050809e-06, "loss": 0.9118, "step": 3424 }, { "epoch": 0.43, "grad_norm": 6.697248264377314, "learning_rate": 6.422362059340892e-06, "loss": 1.3641, "step": 3425 }, { "epoch": 0.43, "grad_norm": 6.831182348935746, "learning_rate": 6.42043081077377e-06, "loss": 1.1524, "step": 3426 }, { "epoch": 0.43, "grad_norm": 6.2741124315945465, "learning_rate": 6.41849933166289e-06, "loss": 1.0413, "step": 3427 }, { "epoch": 0.43, "grad_norm": 5.865835097591246, "learning_rate": 6.416567622321749e-06, "loss": 0.9623, "step": 3428 }, { "epoch": 0.43, "grad_norm": 7.70091070634748, "learning_rate": 6.414635683063869e-06, "loss": 1.1945, "step": 3429 }, { "epoch": 0.43, "grad_norm": 9.053176993682426, "learning_rate": 6.412703514202814e-06, "loss": 1.3684, "step": 3430 }, { "epoch": 0.43, "grad_norm": 6.906366276686343, "learning_rate": 6.410771116052188e-06, "loss": 1.4194, "step": 3431 }, { "epoch": 0.43, "grad_norm": 6.858849797900948, "learning_rate": 6.408838488925629e-06, "loss": 1.3874, "step": 3432 }, { "epoch": 0.43, "grad_norm": 7.318802618880837, "learning_rate": 6.406905633136813e-06, "loss": 1.628, "step": 3433 }, { "epoch": 0.43, "grad_norm": 6.237623788153155, "learning_rate": 6.404972548999453e-06, "loss": 0.8763, "step": 3434 }, { "epoch": 0.43, "grad_norm": 7.534372907560679, "learning_rate": 6.4030392368273e-06, "loss": 1.265, "step": 3435 }, { "epoch": 0.43, "grad_norm": 7.769886852165851, "learning_rate": 6.40110569693414e-06, "loss": 1.1142, "step": 3436 }, { "epoch": 0.43, "grad_norm": 7.679355235717118, "learning_rate": 6.3991719296338e-06, "loss": 1.2845, "step": 3437 }, { "epoch": 0.43, "grad_norm": 6.916431288435756, "learning_rate": 6.397237935240137e-06, "loss": 1.2578, "step": 3438 }, { "epoch": 0.43, "grad_norm": 7.5261615501415395, "learning_rate": 6.395303714067051e-06, "loss": 1.1213, "step": 3439 }, { "epoch": 0.43, "grad_norm": 7.798685727488645, "learning_rate": 6.3933692664284764e-06, "loss": 1.4478, "step": 3440 }, { "epoch": 0.43, "grad_norm": 7.2085396253999745, "learning_rate": 6.3914345926383855e-06, "loss": 1.0622, "step": 3441 }, { "epoch": 0.43, "grad_norm": 7.3521904140837995, "learning_rate": 6.389499693010787e-06, "loss": 1.0012, "step": 3442 }, { "epoch": 0.43, "grad_norm": 6.292443738400061, "learning_rate": 6.387564567859724e-06, "loss": 1.0677, "step": 3443 }, { "epoch": 0.43, "grad_norm": 6.940527029184039, "learning_rate": 6.385629217499278e-06, "loss": 0.9386, "step": 3444 }, { "epoch": 0.43, "grad_norm": 9.959460796757162, "learning_rate": 6.383693642243568e-06, "loss": 1.3869, "step": 3445 }, { "epoch": 0.43, "grad_norm": 7.222514012284309, "learning_rate": 6.381757842406751e-06, "loss": 1.4795, "step": 3446 }, { "epoch": 0.43, "grad_norm": 7.078812475200138, "learning_rate": 6.379821818303014e-06, "loss": 1.2407, "step": 3447 }, { "epoch": 0.43, "grad_norm": 8.265531095412046, "learning_rate": 6.3778855702465835e-06, "loss": 1.8916, "step": 3448 }, { "epoch": 0.43, "grad_norm": 6.845466096007308, "learning_rate": 6.375949098551727e-06, "loss": 1.2149, "step": 3449 }, { "epoch": 0.43, "grad_norm": 7.498153058294246, "learning_rate": 6.3740124035327436e-06, "loss": 1.439, "step": 3450 }, { "epoch": 0.43, "grad_norm": 7.151715127272846, "learning_rate": 6.372075485503968e-06, "loss": 1.4465, "step": 3451 }, { "epoch": 0.43, "grad_norm": 6.887297316567678, "learning_rate": 6.370138344779775e-06, "loss": 1.1739, "step": 3452 }, { "epoch": 0.43, "grad_norm": 8.437457298300796, "learning_rate": 6.368200981674571e-06, "loss": 1.228, "step": 3453 }, { "epoch": 0.43, "grad_norm": 7.494906063823457, "learning_rate": 6.366263396502803e-06, "loss": 1.0722, "step": 3454 }, { "epoch": 0.43, "grad_norm": 6.960838554073114, "learning_rate": 6.364325589578948e-06, "loss": 1.2874, "step": 3455 }, { "epoch": 0.43, "grad_norm": 7.13594844608075, "learning_rate": 6.362387561217527e-06, "loss": 1.3633, "step": 3456 }, { "epoch": 0.43, "grad_norm": 7.096641002805458, "learning_rate": 6.36044931173309e-06, "loss": 1.0656, "step": 3457 }, { "epoch": 0.43, "grad_norm": 7.1654450991322065, "learning_rate": 6.358510841440227e-06, "loss": 1.2867, "step": 3458 }, { "epoch": 0.43, "grad_norm": 6.9684247822668075, "learning_rate": 6.3565721506535615e-06, "loss": 1.3111, "step": 3459 }, { "epoch": 0.43, "grad_norm": 6.639611473993795, "learning_rate": 6.354633239687754e-06, "loss": 1.3165, "step": 3460 }, { "epoch": 0.43, "grad_norm": 7.892343266331213, "learning_rate": 6.352694108857501e-06, "loss": 1.2403, "step": 3461 }, { "epoch": 0.43, "grad_norm": 7.901879050236811, "learning_rate": 6.350754758477534e-06, "loss": 1.2721, "step": 3462 }, { "epoch": 0.43, "grad_norm": 12.355482899222542, "learning_rate": 6.348815188862618e-06, "loss": 1.2907, "step": 3463 }, { "epoch": 0.43, "grad_norm": 6.103853209911089, "learning_rate": 6.346875400327559e-06, "loss": 1.4746, "step": 3464 }, { "epoch": 0.43, "grad_norm": 7.081362449144599, "learning_rate": 6.344935393187195e-06, "loss": 1.347, "step": 3465 }, { "epoch": 0.43, "grad_norm": 7.08701453059725, "learning_rate": 6.342995167756397e-06, "loss": 1.4815, "step": 3466 }, { "epoch": 0.43, "grad_norm": 7.901412040973769, "learning_rate": 6.341054724350078e-06, "loss": 1.2747, "step": 3467 }, { "epoch": 0.43, "grad_norm": 7.024380024456797, "learning_rate": 6.339114063283179e-06, "loss": 1.3468, "step": 3468 }, { "epoch": 0.43, "grad_norm": 6.291164637104379, "learning_rate": 6.337173184870683e-06, "loss": 1.2885, "step": 3469 }, { "epoch": 0.43, "grad_norm": 7.853243839030905, "learning_rate": 6.335232089427606e-06, "loss": 1.1787, "step": 3470 }, { "epoch": 0.43, "grad_norm": 6.840190293387808, "learning_rate": 6.333290777268995e-06, "loss": 0.9791, "step": 3471 }, { "epoch": 0.43, "grad_norm": 8.30669673130474, "learning_rate": 6.331349248709937e-06, "loss": 1.1789, "step": 3472 }, { "epoch": 0.43, "grad_norm": 7.31844537109696, "learning_rate": 6.329407504065552e-06, "loss": 1.3792, "step": 3473 }, { "epoch": 0.43, "grad_norm": 7.421912110328396, "learning_rate": 6.327465543650999e-06, "loss": 1.4221, "step": 3474 }, { "epoch": 0.43, "grad_norm": 7.720825163174911, "learning_rate": 6.325523367781466e-06, "loss": 1.2802, "step": 3475 }, { "epoch": 0.43, "grad_norm": 7.320406815899673, "learning_rate": 6.323580976772181e-06, "loss": 1.4574, "step": 3476 }, { "epoch": 0.43, "grad_norm": 8.016953961869103, "learning_rate": 6.321638370938403e-06, "loss": 1.4191, "step": 3477 }, { "epoch": 0.43, "grad_norm": 6.378174894530606, "learning_rate": 6.3196955505954286e-06, "loss": 1.1257, "step": 3478 }, { "epoch": 0.43, "grad_norm": 7.2633451253443715, "learning_rate": 6.317752516058586e-06, "loss": 1.184, "step": 3479 }, { "epoch": 0.43, "grad_norm": 6.918221738023453, "learning_rate": 6.315809267643244e-06, "loss": 1.4065, "step": 3480 }, { "epoch": 0.43, "grad_norm": 6.894503914845717, "learning_rate": 6.313865805664799e-06, "loss": 1.0442, "step": 3481 }, { "epoch": 0.43, "grad_norm": 7.328590825832025, "learning_rate": 6.3119221304386895e-06, "loss": 1.3603, "step": 3482 }, { "epoch": 0.43, "grad_norm": 7.153911053858232, "learning_rate": 6.309978242280382e-06, "loss": 1.1782, "step": 3483 }, { "epoch": 0.43, "grad_norm": 7.754291118581448, "learning_rate": 6.308034141505379e-06, "loss": 1.3304, "step": 3484 }, { "epoch": 0.43, "grad_norm": 6.909524301776387, "learning_rate": 6.3060898284292225e-06, "loss": 1.27, "step": 3485 }, { "epoch": 0.43, "grad_norm": 8.076163905307514, "learning_rate": 6.304145303367484e-06, "loss": 1.6858, "step": 3486 }, { "epoch": 0.43, "grad_norm": 7.063090562438185, "learning_rate": 6.302200566635769e-06, "loss": 1.1799, "step": 3487 }, { "epoch": 0.43, "grad_norm": 7.058207604995868, "learning_rate": 6.3002556185497195e-06, "loss": 1.2963, "step": 3488 }, { "epoch": 0.43, "grad_norm": 7.837262429784418, "learning_rate": 6.2983104594250134e-06, "loss": 0.9837, "step": 3489 }, { "epoch": 0.43, "grad_norm": 6.415568678859553, "learning_rate": 6.2963650895773566e-06, "loss": 1.138, "step": 3490 }, { "epoch": 0.43, "grad_norm": 7.193017183780361, "learning_rate": 6.294419509322498e-06, "loss": 1.3678, "step": 3491 }, { "epoch": 0.43, "grad_norm": 8.418027528239453, "learning_rate": 6.292473718976213e-06, "loss": 1.1287, "step": 3492 }, { "epoch": 0.43, "grad_norm": 5.700518847295446, "learning_rate": 6.290527718854316e-06, "loss": 1.1742, "step": 3493 }, { "epoch": 0.43, "grad_norm": 7.789809167406369, "learning_rate": 6.288581509272652e-06, "loss": 1.299, "step": 3494 }, { "epoch": 0.43, "grad_norm": 8.105649824175496, "learning_rate": 6.286635090547103e-06, "loss": 1.3644, "step": 3495 }, { "epoch": 0.43, "grad_norm": 6.406649016824686, "learning_rate": 6.2846884629935815e-06, "loss": 1.2012, "step": 3496 }, { "epoch": 0.43, "grad_norm": 6.5025937270401375, "learning_rate": 6.282741626928036e-06, "loss": 1.1682, "step": 3497 }, { "epoch": 0.44, "grad_norm": 7.346050610986858, "learning_rate": 6.2807945826664516e-06, "loss": 1.2118, "step": 3498 }, { "epoch": 0.44, "grad_norm": 7.829932519263309, "learning_rate": 6.278847330524842e-06, "loss": 1.6659, "step": 3499 }, { "epoch": 0.44, "grad_norm": 5.864678343034291, "learning_rate": 6.276899870819258e-06, "loss": 0.9851, "step": 3500 }, { "epoch": 0.44, "grad_norm": 7.027857407876725, "learning_rate": 6.274952203865782e-06, "loss": 1.5546, "step": 3501 }, { "epoch": 0.44, "grad_norm": 7.689732164916915, "learning_rate": 6.27300432998053e-06, "loss": 1.2992, "step": 3502 }, { "epoch": 0.44, "grad_norm": 6.524443028951719, "learning_rate": 6.271056249479657e-06, "loss": 1.117, "step": 3503 }, { "epoch": 0.44, "grad_norm": 7.480825755723675, "learning_rate": 6.269107962679344e-06, "loss": 1.1334, "step": 3504 }, { "epoch": 0.44, "grad_norm": 7.659297322914285, "learning_rate": 6.267159469895807e-06, "loss": 1.2546, "step": 3505 }, { "epoch": 0.44, "grad_norm": 8.08917101771872, "learning_rate": 6.2652107714453004e-06, "loss": 1.7523, "step": 3506 }, { "epoch": 0.44, "grad_norm": 7.34473987830839, "learning_rate": 6.263261867644108e-06, "loss": 1.1601, "step": 3507 }, { "epoch": 0.44, "grad_norm": 7.115951941202747, "learning_rate": 6.261312758808547e-06, "loss": 1.1376, "step": 3508 }, { "epoch": 0.44, "grad_norm": 7.2206316675447875, "learning_rate": 6.259363445254969e-06, "loss": 0.8995, "step": 3509 }, { "epoch": 0.44, "grad_norm": 7.443733041295461, "learning_rate": 6.257413927299758e-06, "loss": 1.1894, "step": 3510 }, { "epoch": 0.44, "grad_norm": 6.9500839580037805, "learning_rate": 6.255464205259332e-06, "loss": 1.0297, "step": 3511 }, { "epoch": 0.44, "grad_norm": 7.0363670105645, "learning_rate": 6.253514279450139e-06, "loss": 1.268, "step": 3512 }, { "epoch": 0.44, "grad_norm": 6.694535477503124, "learning_rate": 6.251564150188668e-06, "loss": 1.3689, "step": 3513 }, { "epoch": 0.44, "grad_norm": 6.574654722519605, "learning_rate": 6.249613817791431e-06, "loss": 1.2173, "step": 3514 }, { "epoch": 0.44, "grad_norm": 6.869752642121985, "learning_rate": 6.247663282574979e-06, "loss": 1.2387, "step": 3515 }, { "epoch": 0.44, "grad_norm": 7.857625935469405, "learning_rate": 6.245712544855896e-06, "loss": 1.1376, "step": 3516 }, { "epoch": 0.44, "grad_norm": 9.54620180749406, "learning_rate": 6.243761604950796e-06, "loss": 1.2719, "step": 3517 }, { "epoch": 0.44, "grad_norm": 6.4321786854326914, "learning_rate": 6.241810463176329e-06, "loss": 1.1187, "step": 3518 }, { "epoch": 0.44, "grad_norm": 6.775508779473878, "learning_rate": 6.239859119849174e-06, "loss": 1.0958, "step": 3519 }, { "epoch": 0.44, "grad_norm": 7.4223833920635665, "learning_rate": 6.237907575286045e-06, "loss": 1.1243, "step": 3520 }, { "epoch": 0.44, "grad_norm": 7.673091680543685, "learning_rate": 6.235955829803689e-06, "loss": 1.3852, "step": 3521 }, { "epoch": 0.44, "grad_norm": 7.630429972150363, "learning_rate": 6.234003883718887e-06, "loss": 0.914, "step": 3522 }, { "epoch": 0.44, "grad_norm": 7.741800356745674, "learning_rate": 6.232051737348447e-06, "loss": 1.3543, "step": 3523 }, { "epoch": 0.44, "grad_norm": 7.397087228638278, "learning_rate": 6.230099391009215e-06, "loss": 1.265, "step": 3524 }, { "epoch": 0.44, "grad_norm": 6.533488896396269, "learning_rate": 6.228146845018068e-06, "loss": 0.883, "step": 3525 }, { "epoch": 0.44, "grad_norm": 7.584563230187274, "learning_rate": 6.226194099691914e-06, "loss": 1.3283, "step": 3526 }, { "epoch": 0.44, "grad_norm": 7.672112622297961, "learning_rate": 6.224241155347696e-06, "loss": 1.3296, "step": 3527 }, { "epoch": 0.44, "grad_norm": 7.455166980722397, "learning_rate": 6.2222880123023855e-06, "loss": 1.3496, "step": 3528 }, { "epoch": 0.44, "grad_norm": 6.570531250289163, "learning_rate": 6.22033467087299e-06, "loss": 1.0293, "step": 3529 }, { "epoch": 0.44, "grad_norm": 7.094164903145434, "learning_rate": 6.218381131376545e-06, "loss": 1.5586, "step": 3530 }, { "epoch": 0.44, "grad_norm": 7.8112725184502265, "learning_rate": 6.216427394130124e-06, "loss": 1.1562, "step": 3531 }, { "epoch": 0.44, "grad_norm": 7.369190991622276, "learning_rate": 6.214473459450828e-06, "loss": 1.5183, "step": 3532 }, { "epoch": 0.44, "grad_norm": 7.728384225743382, "learning_rate": 6.212519327655793e-06, "loss": 1.2873, "step": 3533 }, { "epoch": 0.44, "grad_norm": 6.569626924041685, "learning_rate": 6.210564999062181e-06, "loss": 1.5058, "step": 3534 }, { "epoch": 0.44, "grad_norm": 6.848169009190652, "learning_rate": 6.208610473987194e-06, "loss": 0.9749, "step": 3535 }, { "epoch": 0.44, "grad_norm": 7.1308122723394005, "learning_rate": 6.206655752748062e-06, "loss": 1.3788, "step": 3536 }, { "epoch": 0.44, "grad_norm": 6.758387442132492, "learning_rate": 6.204700835662045e-06, "loss": 1.3861, "step": 3537 }, { "epoch": 0.44, "grad_norm": 7.3117695287893945, "learning_rate": 6.202745723046439e-06, "loss": 1.2766, "step": 3538 }, { "epoch": 0.44, "grad_norm": 6.842365208975412, "learning_rate": 6.200790415218568e-06, "loss": 1.2568, "step": 3539 }, { "epoch": 0.44, "grad_norm": 7.360094156286928, "learning_rate": 6.198834912495791e-06, "loss": 1.5508, "step": 3540 }, { "epoch": 0.44, "grad_norm": 7.2835940102269525, "learning_rate": 6.196879215195495e-06, "loss": 1.4631, "step": 3541 }, { "epoch": 0.44, "grad_norm": 7.645776939604188, "learning_rate": 6.194923323635102e-06, "loss": 1.1268, "step": 3542 }, { "epoch": 0.44, "grad_norm": 6.7115091009498355, "learning_rate": 6.192967238132062e-06, "loss": 0.9565, "step": 3543 }, { "epoch": 0.44, "grad_norm": 7.1901653410923405, "learning_rate": 6.19101095900386e-06, "loss": 1.5319, "step": 3544 }, { "epoch": 0.44, "grad_norm": 6.808807148855743, "learning_rate": 6.18905448656801e-06, "loss": 1.3347, "step": 3545 }, { "epoch": 0.44, "grad_norm": 6.190648670231093, "learning_rate": 6.187097821142061e-06, "loss": 0.9847, "step": 3546 }, { "epoch": 0.44, "grad_norm": 6.099992071930621, "learning_rate": 6.185140963043586e-06, "loss": 0.8469, "step": 3547 }, { "epoch": 0.44, "grad_norm": 7.091201879635365, "learning_rate": 6.183183912590197e-06, "loss": 1.2016, "step": 3548 }, { "epoch": 0.44, "grad_norm": 7.2675231266329074, "learning_rate": 6.181226670099534e-06, "loss": 1.1032, "step": 3549 }, { "epoch": 0.44, "grad_norm": 7.293308898375087, "learning_rate": 6.179269235889267e-06, "loss": 1.039, "step": 3550 }, { "epoch": 0.44, "grad_norm": 6.620476870735579, "learning_rate": 6.177311610277101e-06, "loss": 1.072, "step": 3551 }, { "epoch": 0.44, "grad_norm": 7.281492351251421, "learning_rate": 6.175353793580765e-06, "loss": 1.1949, "step": 3552 }, { "epoch": 0.44, "grad_norm": 7.357200290300931, "learning_rate": 6.173395786118026e-06, "loss": 1.2833, "step": 3553 }, { "epoch": 0.44, "grad_norm": 8.83546227465218, "learning_rate": 6.1714375882066806e-06, "loss": 1.2585, "step": 3554 }, { "epoch": 0.44, "grad_norm": 6.708796461781564, "learning_rate": 6.169479200164553e-06, "loss": 1.2454, "step": 3555 }, { "epoch": 0.44, "grad_norm": 7.2191224733493, "learning_rate": 6.167520622309501e-06, "loss": 1.5173, "step": 3556 }, { "epoch": 0.44, "grad_norm": 6.836774950533314, "learning_rate": 6.165561854959412e-06, "loss": 1.3151, "step": 3557 }, { "epoch": 0.44, "grad_norm": 7.20060684416213, "learning_rate": 6.163602898432209e-06, "loss": 1.18, "step": 3558 }, { "epoch": 0.44, "grad_norm": 7.847532752290031, "learning_rate": 6.161643753045835e-06, "loss": 1.2688, "step": 3559 }, { "epoch": 0.44, "grad_norm": 7.535867582850259, "learning_rate": 6.159684419118274e-06, "loss": 1.1831, "step": 3560 }, { "epoch": 0.44, "grad_norm": 6.779038623387533, "learning_rate": 6.157724896967535e-06, "loss": 0.9502, "step": 3561 }, { "epoch": 0.44, "grad_norm": 7.355625603703551, "learning_rate": 6.155765186911661e-06, "loss": 1.3464, "step": 3562 }, { "epoch": 0.44, "grad_norm": 8.368951049783893, "learning_rate": 6.1538052892687215e-06, "loss": 1.2534, "step": 3563 }, { "epoch": 0.44, "grad_norm": 7.6606543456372025, "learning_rate": 6.151845204356821e-06, "loss": 1.2895, "step": 3564 }, { "epoch": 0.44, "grad_norm": 8.054371160929062, "learning_rate": 6.14988493249409e-06, "loss": 1.2661, "step": 3565 }, { "epoch": 0.44, "grad_norm": 5.9796789548460225, "learning_rate": 6.147924473998694e-06, "loss": 1.1267, "step": 3566 }, { "epoch": 0.44, "grad_norm": 7.621744328457999, "learning_rate": 6.145963829188823e-06, "loss": 1.6553, "step": 3567 }, { "epoch": 0.44, "grad_norm": 8.77444846579189, "learning_rate": 6.144002998382706e-06, "loss": 1.7162, "step": 3568 }, { "epoch": 0.44, "grad_norm": 7.121662991127395, "learning_rate": 6.142041981898589e-06, "loss": 1.2454, "step": 3569 }, { "epoch": 0.44, "grad_norm": 6.661897343436449, "learning_rate": 6.1400807800547616e-06, "loss": 0.9907, "step": 3570 }, { "epoch": 0.44, "grad_norm": 6.208936904761622, "learning_rate": 6.138119393169534e-06, "loss": 1.2903, "step": 3571 }, { "epoch": 0.44, "grad_norm": 7.175569918962642, "learning_rate": 6.136157821561252e-06, "loss": 1.6163, "step": 3572 }, { "epoch": 0.44, "grad_norm": 6.847976097561341, "learning_rate": 6.1341960655482904e-06, "loss": 1.0236, "step": 3573 }, { "epoch": 0.44, "grad_norm": 8.688278570469588, "learning_rate": 6.1322341254490515e-06, "loss": 1.3683, "step": 3574 }, { "epoch": 0.44, "grad_norm": 7.128758730543469, "learning_rate": 6.130272001581969e-06, "loss": 1.2108, "step": 3575 }, { "epoch": 0.44, "grad_norm": 7.42916611793038, "learning_rate": 6.1283096942655085e-06, "loss": 0.9759, "step": 3576 }, { "epoch": 0.44, "grad_norm": 5.73036839302839, "learning_rate": 6.1263472038181605e-06, "loss": 0.7911, "step": 3577 }, { "epoch": 0.45, "grad_norm": 6.120800433040115, "learning_rate": 6.124384530558449e-06, "loss": 1.2373, "step": 3578 }, { "epoch": 0.45, "grad_norm": 7.426959868606076, "learning_rate": 6.122421674804927e-06, "loss": 1.1521, "step": 3579 }, { "epoch": 0.45, "grad_norm": 7.904582703355664, "learning_rate": 6.120458636876176e-06, "loss": 1.3847, "step": 3580 }, { "epoch": 0.45, "grad_norm": 7.521305091495103, "learning_rate": 6.118495417090809e-06, "loss": 1.6299, "step": 3581 }, { "epoch": 0.45, "grad_norm": 7.7891134224651255, "learning_rate": 6.116532015767468e-06, "loss": 1.3322, "step": 3582 }, { "epoch": 0.45, "grad_norm": 7.375130640155928, "learning_rate": 6.114568433224822e-06, "loss": 1.2297, "step": 3583 }, { "epoch": 0.45, "grad_norm": 7.574464871015278, "learning_rate": 6.112604669781572e-06, "loss": 1.4865, "step": 3584 }, { "epoch": 0.45, "grad_norm": 10.031491231588497, "learning_rate": 6.110640725756449e-06, "loss": 1.1764, "step": 3585 }, { "epoch": 0.45, "grad_norm": 7.502909368307909, "learning_rate": 6.10867660146821e-06, "loss": 1.1304, "step": 3586 }, { "epoch": 0.45, "grad_norm": 7.242399642568838, "learning_rate": 6.106712297235643e-06, "loss": 1.1659, "step": 3587 }, { "epoch": 0.45, "grad_norm": 6.893726673757669, "learning_rate": 6.104747813377567e-06, "loss": 1.4673, "step": 3588 }, { "epoch": 0.45, "grad_norm": 7.094315439779966, "learning_rate": 6.1027831502128275e-06, "loss": 1.1171, "step": 3589 }, { "epoch": 0.45, "grad_norm": 6.6285698186462, "learning_rate": 6.100818308060301e-06, "loss": 1.1849, "step": 3590 }, { "epoch": 0.45, "grad_norm": 6.744894535928525, "learning_rate": 6.098853287238892e-06, "loss": 1.1546, "step": 3591 }, { "epoch": 0.45, "grad_norm": 5.910676678717255, "learning_rate": 6.096888088067534e-06, "loss": 1.2773, "step": 3592 }, { "epoch": 0.45, "grad_norm": 6.350166759546181, "learning_rate": 6.0949227108651886e-06, "loss": 1.093, "step": 3593 }, { "epoch": 0.45, "grad_norm": 6.501687657125138, "learning_rate": 6.092957155950851e-06, "loss": 1.2342, "step": 3594 }, { "epoch": 0.45, "grad_norm": 7.222497885367381, "learning_rate": 6.0909914236435355e-06, "loss": 1.1288, "step": 3595 }, { "epoch": 0.45, "grad_norm": 6.829653098919249, "learning_rate": 6.089025514262295e-06, "loss": 1.133, "step": 3596 }, { "epoch": 0.45, "grad_norm": 7.817175584831544, "learning_rate": 6.08705942812621e-06, "loss": 1.4369, "step": 3597 }, { "epoch": 0.45, "grad_norm": 6.995482687488987, "learning_rate": 6.085093165554383e-06, "loss": 1.1631, "step": 3598 }, { "epoch": 0.45, "grad_norm": 7.716989704789978, "learning_rate": 6.0831267268659514e-06, "loss": 1.3414, "step": 3599 }, { "epoch": 0.45, "grad_norm": 6.528625810179099, "learning_rate": 6.081160112380078e-06, "loss": 1.2054, "step": 3600 }, { "epoch": 0.45, "grad_norm": 6.997129084553375, "learning_rate": 6.079193322415954e-06, "loss": 1.3408, "step": 3601 }, { "epoch": 0.45, "grad_norm": 7.219122324994812, "learning_rate": 6.077226357292802e-06, "loss": 1.169, "step": 3602 }, { "epoch": 0.45, "grad_norm": 6.505947535826873, "learning_rate": 6.075259217329873e-06, "loss": 1.1884, "step": 3603 }, { "epoch": 0.45, "grad_norm": 7.8952767535384565, "learning_rate": 6.07329190284644e-06, "loss": 1.3528, "step": 3604 }, { "epoch": 0.45, "grad_norm": 7.459593305774026, "learning_rate": 6.071324414161813e-06, "loss": 1.3224, "step": 3605 }, { "epoch": 0.45, "grad_norm": 7.350598977639833, "learning_rate": 6.0693567515953255e-06, "loss": 1.1726, "step": 3606 }, { "epoch": 0.45, "grad_norm": 7.692326762490109, "learning_rate": 6.067388915466338e-06, "loss": 1.4035, "step": 3607 }, { "epoch": 0.45, "grad_norm": 7.660825348138225, "learning_rate": 6.065420906094244e-06, "loss": 1.2485, "step": 3608 }, { "epoch": 0.45, "grad_norm": 7.983777702065226, "learning_rate": 6.063452723798461e-06, "loss": 1.6134, "step": 3609 }, { "epoch": 0.45, "grad_norm": 7.129683721083444, "learning_rate": 6.061484368898434e-06, "loss": 1.324, "step": 3610 }, { "epoch": 0.45, "grad_norm": 6.28051331450249, "learning_rate": 6.05951584171364e-06, "loss": 0.9321, "step": 3611 }, { "epoch": 0.45, "grad_norm": 6.8205279263840435, "learning_rate": 6.057547142563582e-06, "loss": 1.111, "step": 3612 }, { "epoch": 0.45, "grad_norm": 6.248766097988826, "learning_rate": 6.055578271767789e-06, "loss": 1.0633, "step": 3613 }, { "epoch": 0.45, "grad_norm": 6.850364147517065, "learning_rate": 6.053609229645821e-06, "loss": 1.3023, "step": 3614 }, { "epoch": 0.45, "grad_norm": 7.997691913202565, "learning_rate": 6.051640016517265e-06, "loss": 1.0443, "step": 3615 }, { "epoch": 0.45, "grad_norm": 7.018460628445309, "learning_rate": 6.0496706327017355e-06, "loss": 1.2332, "step": 3616 }, { "epoch": 0.45, "grad_norm": 6.687838385537428, "learning_rate": 6.047701078518871e-06, "loss": 0.9514, "step": 3617 }, { "epoch": 0.45, "grad_norm": 6.868177382407135, "learning_rate": 6.045731354288343e-06, "loss": 0.9309, "step": 3618 }, { "epoch": 0.45, "grad_norm": 6.45318469848263, "learning_rate": 6.043761460329849e-06, "loss": 1.2469, "step": 3619 }, { "epoch": 0.45, "grad_norm": 7.796611525933019, "learning_rate": 6.041791396963113e-06, "loss": 1.1275, "step": 3620 }, { "epoch": 0.45, "grad_norm": 5.731536448948687, "learning_rate": 6.039821164507889e-06, "loss": 0.8587, "step": 3621 }, { "epoch": 0.45, "grad_norm": 6.829378522732393, "learning_rate": 6.037850763283952e-06, "loss": 1.2447, "step": 3622 }, { "epoch": 0.45, "grad_norm": 7.141995596012477, "learning_rate": 6.0358801936111145e-06, "loss": 0.9769, "step": 3623 }, { "epoch": 0.45, "grad_norm": 8.213023939627886, "learning_rate": 6.033909455809208e-06, "loss": 1.5441, "step": 3624 }, { "epoch": 0.45, "grad_norm": 7.088342415868019, "learning_rate": 6.031938550198096e-06, "loss": 0.9942, "step": 3625 }, { "epoch": 0.45, "grad_norm": 6.908431177573143, "learning_rate": 6.029967477097662e-06, "loss": 1.204, "step": 3626 }, { "epoch": 0.45, "grad_norm": 6.755795334598131, "learning_rate": 6.02799623682783e-06, "loss": 0.9949, "step": 3627 }, { "epoch": 0.45, "grad_norm": 6.635306399586155, "learning_rate": 6.026024829708537e-06, "loss": 1.2591, "step": 3628 }, { "epoch": 0.45, "grad_norm": 7.667025640732781, "learning_rate": 6.024053256059754e-06, "loss": 1.0003, "step": 3629 }, { "epoch": 0.45, "grad_norm": 6.960593048778529, "learning_rate": 6.022081516201483e-06, "loss": 1.3149, "step": 3630 }, { "epoch": 0.45, "grad_norm": 6.825618979841573, "learning_rate": 6.020109610453741e-06, "loss": 1.0329, "step": 3631 }, { "epoch": 0.45, "grad_norm": 7.257700407056252, "learning_rate": 6.018137539136586e-06, "loss": 1.3029, "step": 3632 }, { "epoch": 0.45, "grad_norm": 7.631700716847006, "learning_rate": 6.016165302570093e-06, "loss": 1.4853, "step": 3633 }, { "epoch": 0.45, "grad_norm": 6.989802763174267, "learning_rate": 6.0141929010743665e-06, "loss": 1.1255, "step": 3634 }, { "epoch": 0.45, "grad_norm": 6.268973050570147, "learning_rate": 6.012220334969536e-06, "loss": 0.8213, "step": 3635 }, { "epoch": 0.45, "grad_norm": 6.623275580081514, "learning_rate": 6.010247604575766e-06, "loss": 1.1012, "step": 3636 }, { "epoch": 0.45, "grad_norm": 8.358401850806686, "learning_rate": 6.008274710213235e-06, "loss": 1.1818, "step": 3637 }, { "epoch": 0.45, "grad_norm": 7.811432722712692, "learning_rate": 6.00630165220216e-06, "loss": 1.5143, "step": 3638 }, { "epoch": 0.45, "grad_norm": 7.50674296758118, "learning_rate": 6.004328430862776e-06, "loss": 1.3886, "step": 3639 }, { "epoch": 0.45, "grad_norm": 6.550507649416402, "learning_rate": 6.002355046515348e-06, "loss": 1.5386, "step": 3640 }, { "epoch": 0.45, "grad_norm": 7.532602187737812, "learning_rate": 6.000381499480169e-06, "loss": 1.4994, "step": 3641 }, { "epoch": 0.45, "grad_norm": 6.021650740502985, "learning_rate": 5.998407790077555e-06, "loss": 1.3753, "step": 3642 }, { "epoch": 0.45, "grad_norm": 6.891467771351762, "learning_rate": 5.996433918627848e-06, "loss": 0.8811, "step": 3643 }, { "epoch": 0.45, "grad_norm": 6.842807974000047, "learning_rate": 5.994459885451423e-06, "loss": 0.9125, "step": 3644 }, { "epoch": 0.45, "grad_norm": 7.09229423342624, "learning_rate": 5.992485690868672e-06, "loss": 1.2139, "step": 3645 }, { "epoch": 0.45, "grad_norm": 7.229776021234171, "learning_rate": 5.99051133520002e-06, "loss": 1.3534, "step": 3646 }, { "epoch": 0.45, "grad_norm": 6.158192088313871, "learning_rate": 5.988536818765915e-06, "loss": 0.9483, "step": 3647 }, { "epoch": 0.45, "grad_norm": 6.73951464267371, "learning_rate": 5.9865621418868335e-06, "loss": 1.2974, "step": 3648 }, { "epoch": 0.45, "grad_norm": 8.581990610159062, "learning_rate": 5.984587304883275e-06, "loss": 1.1683, "step": 3649 }, { "epoch": 0.45, "grad_norm": 8.481382130705363, "learning_rate": 5.982612308075766e-06, "loss": 1.5407, "step": 3650 }, { "epoch": 0.45, "grad_norm": 7.3379914071779355, "learning_rate": 5.9806371517848605e-06, "loss": 1.0714, "step": 3651 }, { "epoch": 0.45, "grad_norm": 6.995823464794284, "learning_rate": 5.978661836331136e-06, "loss": 1.1812, "step": 3652 }, { "epoch": 0.45, "grad_norm": 8.107601746030456, "learning_rate": 5.976686362035198e-06, "loss": 1.2336, "step": 3653 }, { "epoch": 0.45, "grad_norm": 7.391738284991049, "learning_rate": 5.974710729217677e-06, "loss": 1.0371, "step": 3654 }, { "epoch": 0.45, "grad_norm": 7.160713232257715, "learning_rate": 5.972734938199228e-06, "loss": 0.8545, "step": 3655 }, { "epoch": 0.45, "grad_norm": 7.783536729900602, "learning_rate": 5.970758989300536e-06, "loss": 1.3814, "step": 3656 }, { "epoch": 0.45, "grad_norm": 7.052344210732115, "learning_rate": 5.968782882842305e-06, "loss": 1.1333, "step": 3657 }, { "epoch": 0.45, "grad_norm": 7.119308110771002, "learning_rate": 5.966806619145268e-06, "loss": 1.3495, "step": 3658 }, { "epoch": 0.46, "grad_norm": 8.09987892371841, "learning_rate": 5.964830198530184e-06, "loss": 1.3448, "step": 3659 }, { "epoch": 0.46, "grad_norm": 6.672355620998941, "learning_rate": 5.962853621317839e-06, "loss": 1.142, "step": 3660 }, { "epoch": 0.46, "grad_norm": 8.01336661398207, "learning_rate": 5.96087688782904e-06, "loss": 1.3548, "step": 3661 }, { "epoch": 0.46, "grad_norm": 7.299195822581143, "learning_rate": 5.958899998384622e-06, "loss": 1.2569, "step": 3662 }, { "epoch": 0.46, "grad_norm": 6.239032620422572, "learning_rate": 5.956922953305447e-06, "loss": 1.0967, "step": 3663 }, { "epoch": 0.46, "grad_norm": 6.533779575600587, "learning_rate": 5.954945752912398e-06, "loss": 1.0047, "step": 3664 }, { "epoch": 0.46, "grad_norm": 7.2314762730197835, "learning_rate": 5.952968397526388e-06, "loss": 1.5122, "step": 3665 }, { "epoch": 0.46, "grad_norm": 6.665804101172314, "learning_rate": 5.950990887468351e-06, "loss": 1.371, "step": 3666 }, { "epoch": 0.46, "grad_norm": 7.140305038530554, "learning_rate": 5.949013223059246e-06, "loss": 0.9829, "step": 3667 }, { "epoch": 0.46, "grad_norm": 7.268129062900478, "learning_rate": 5.947035404620062e-06, "loss": 1.2037, "step": 3668 }, { "epoch": 0.46, "grad_norm": 7.451951909120552, "learning_rate": 5.945057432471809e-06, "loss": 1.4823, "step": 3669 }, { "epoch": 0.46, "grad_norm": 8.12148751194061, "learning_rate": 5.943079306935521e-06, "loss": 1.2431, "step": 3670 }, { "epoch": 0.46, "grad_norm": 7.372227615248302, "learning_rate": 5.941101028332261e-06, "loss": 1.1973, "step": 3671 }, { "epoch": 0.46, "grad_norm": 6.4502653143806645, "learning_rate": 5.9391225969831145e-06, "loss": 1.0204, "step": 3672 }, { "epoch": 0.46, "grad_norm": 8.225434245708467, "learning_rate": 5.937144013209191e-06, "loss": 1.5761, "step": 3673 }, { "epoch": 0.46, "grad_norm": 6.817739435745992, "learning_rate": 5.935165277331624e-06, "loss": 1.1259, "step": 3674 }, { "epoch": 0.46, "grad_norm": 7.403367715225796, "learning_rate": 5.933186389671576e-06, "loss": 1.0857, "step": 3675 }, { "epoch": 0.46, "grad_norm": 7.402077697276403, "learning_rate": 5.931207350550229e-06, "loss": 0.978, "step": 3676 }, { "epoch": 0.46, "grad_norm": 7.145116999520339, "learning_rate": 5.929228160288794e-06, "loss": 1.2145, "step": 3677 }, { "epoch": 0.46, "grad_norm": 6.057078472939347, "learning_rate": 5.927248819208503e-06, "loss": 1.0737, "step": 3678 }, { "epoch": 0.46, "grad_norm": 6.343037711554947, "learning_rate": 5.925269327630615e-06, "loss": 0.8441, "step": 3679 }, { "epoch": 0.46, "grad_norm": 6.894076190021748, "learning_rate": 5.9232896858764126e-06, "loss": 1.4309, "step": 3680 }, { "epoch": 0.46, "grad_norm": 6.89134602405498, "learning_rate": 5.921309894267201e-06, "loss": 1.1077, "step": 3681 }, { "epoch": 0.46, "grad_norm": 7.4318696774501865, "learning_rate": 5.919329953124313e-06, "loss": 1.1832, "step": 3682 }, { "epoch": 0.46, "grad_norm": 7.419964983488942, "learning_rate": 5.9173498627691024e-06, "loss": 1.0755, "step": 3683 }, { "epoch": 0.46, "grad_norm": 6.7869015150202054, "learning_rate": 5.915369623522951e-06, "loss": 1.0773, "step": 3684 }, { "epoch": 0.46, "grad_norm": 7.9216067950223685, "learning_rate": 5.91338923570726e-06, "loss": 1.1018, "step": 3685 }, { "epoch": 0.46, "grad_norm": 7.755107641878577, "learning_rate": 5.911408699643458e-06, "loss": 1.2732, "step": 3686 }, { "epoch": 0.46, "grad_norm": 7.944719775771972, "learning_rate": 5.909428015652998e-06, "loss": 1.1234, "step": 3687 }, { "epoch": 0.46, "grad_norm": 11.389588729564853, "learning_rate": 5.907447184057356e-06, "loss": 1.0698, "step": 3688 }, { "epoch": 0.46, "grad_norm": 6.726253491758928, "learning_rate": 5.905466205178031e-06, "loss": 1.1323, "step": 3689 }, { "epoch": 0.46, "grad_norm": 8.670809635750983, "learning_rate": 5.9034850793365474e-06, "loss": 1.547, "step": 3690 }, { "epoch": 0.46, "grad_norm": 7.5430571157205, "learning_rate": 5.90150380685445e-06, "loss": 1.3108, "step": 3691 }, { "epoch": 0.46, "grad_norm": 7.8941401870160615, "learning_rate": 5.899522388053315e-06, "loss": 1.6498, "step": 3692 }, { "epoch": 0.46, "grad_norm": 8.261872478651082, "learning_rate": 5.897540823254735e-06, "loss": 1.1451, "step": 3693 }, { "epoch": 0.46, "grad_norm": 8.393651454901185, "learning_rate": 5.895559112780328e-06, "loss": 1.1886, "step": 3694 }, { "epoch": 0.46, "grad_norm": 7.418828426050206, "learning_rate": 5.893577256951738e-06, "loss": 0.9988, "step": 3695 }, { "epoch": 0.46, "grad_norm": 5.986543572284861, "learning_rate": 5.891595256090632e-06, "loss": 1.0111, "step": 3696 }, { "epoch": 0.46, "grad_norm": 7.793719850667911, "learning_rate": 5.889613110518697e-06, "loss": 1.4101, "step": 3697 }, { "epoch": 0.46, "grad_norm": 7.93384520923751, "learning_rate": 5.887630820557649e-06, "loss": 1.2542, "step": 3698 }, { "epoch": 0.46, "grad_norm": 7.872786569798431, "learning_rate": 5.885648386529224e-06, "loss": 1.5088, "step": 3699 }, { "epoch": 0.46, "grad_norm": 7.666923901018515, "learning_rate": 5.883665808755179e-06, "loss": 1.4308, "step": 3700 }, { "epoch": 0.46, "grad_norm": 6.298906961069707, "learning_rate": 5.8816830875573e-06, "loss": 1.1635, "step": 3701 }, { "epoch": 0.46, "grad_norm": 6.861400130307411, "learning_rate": 5.879700223257395e-06, "loss": 0.9248, "step": 3702 }, { "epoch": 0.46, "grad_norm": 7.184534942416117, "learning_rate": 5.87771721617729e-06, "loss": 1.1498, "step": 3703 }, { "epoch": 0.46, "grad_norm": 7.31889604212872, "learning_rate": 5.875734066638841e-06, "loss": 1.211, "step": 3704 }, { "epoch": 0.46, "grad_norm": 6.54066656585364, "learning_rate": 5.8737507749639234e-06, "loss": 0.9795, "step": 3705 }, { "epoch": 0.46, "grad_norm": 7.811592923293467, "learning_rate": 5.871767341474436e-06, "loss": 1.0446, "step": 3706 }, { "epoch": 0.46, "grad_norm": 5.685551705586309, "learning_rate": 5.8697837664923e-06, "loss": 0.9092, "step": 3707 }, { "epoch": 0.46, "grad_norm": 6.898445133901412, "learning_rate": 5.867800050339463e-06, "loss": 1.0679, "step": 3708 }, { "epoch": 0.46, "grad_norm": 7.545243066839292, "learning_rate": 5.8658161933378895e-06, "loss": 1.3581, "step": 3709 }, { "epoch": 0.46, "grad_norm": 7.047825935107384, "learning_rate": 5.8638321958095734e-06, "loss": 1.2757, "step": 3710 }, { "epoch": 0.46, "grad_norm": 7.257936303216995, "learning_rate": 5.861848058076528e-06, "loss": 1.2076, "step": 3711 }, { "epoch": 0.46, "grad_norm": 6.877448568420068, "learning_rate": 5.859863780460789e-06, "loss": 1.304, "step": 3712 }, { "epoch": 0.46, "grad_norm": 5.972228160862044, "learning_rate": 5.857879363284417e-06, "loss": 0.9119, "step": 3713 }, { "epoch": 0.46, "grad_norm": 7.183884121590328, "learning_rate": 5.855894806869493e-06, "loss": 1.1575, "step": 3714 }, { "epoch": 0.46, "grad_norm": 7.247629287478536, "learning_rate": 5.85391011153812e-06, "loss": 1.138, "step": 3715 }, { "epoch": 0.46, "grad_norm": 9.725188345037667, "learning_rate": 5.851925277612427e-06, "loss": 0.9138, "step": 3716 }, { "epoch": 0.46, "grad_norm": 7.2279799500139505, "learning_rate": 5.849940305414563e-06, "loss": 1.3459, "step": 3717 }, { "epoch": 0.46, "grad_norm": 6.444901208190972, "learning_rate": 5.847955195266699e-06, "loss": 1.1948, "step": 3718 }, { "epoch": 0.46, "grad_norm": 7.93575363001223, "learning_rate": 5.84596994749103e-06, "loss": 1.2964, "step": 3719 }, { "epoch": 0.46, "grad_norm": 5.873900910485178, "learning_rate": 5.843984562409775e-06, "loss": 0.9956, "step": 3720 }, { "epoch": 0.46, "grad_norm": 7.335033614235449, "learning_rate": 5.841999040345168e-06, "loss": 1.2473, "step": 3721 }, { "epoch": 0.46, "grad_norm": 6.318556658527696, "learning_rate": 5.840013381619475e-06, "loss": 0.8014, "step": 3722 }, { "epoch": 0.46, "grad_norm": 7.464237648110732, "learning_rate": 5.838027586554977e-06, "loss": 1.1536, "step": 3723 }, { "epoch": 0.46, "grad_norm": 6.509695985136334, "learning_rate": 5.836041655473978e-06, "loss": 0.8643, "step": 3724 }, { "epoch": 0.46, "grad_norm": 6.5584077070455225, "learning_rate": 5.834055588698808e-06, "loss": 1.2123, "step": 3725 }, { "epoch": 0.46, "grad_norm": 6.43308016919892, "learning_rate": 5.832069386551817e-06, "loss": 0.9268, "step": 3726 }, { "epoch": 0.46, "grad_norm": 7.393676507332999, "learning_rate": 5.830083049355374e-06, "loss": 1.2649, "step": 3727 }, { "epoch": 0.46, "grad_norm": 8.709525738517947, "learning_rate": 5.828096577431874e-06, "loss": 1.2963, "step": 3728 }, { "epoch": 0.46, "grad_norm": 8.039198631752024, "learning_rate": 5.826109971103734e-06, "loss": 1.1568, "step": 3729 }, { "epoch": 0.46, "grad_norm": 7.121541093827099, "learning_rate": 5.824123230693388e-06, "loss": 1.3047, "step": 3730 }, { "epoch": 0.46, "grad_norm": 7.563764462666253, "learning_rate": 5.822136356523297e-06, "loss": 1.2533, "step": 3731 }, { "epoch": 0.46, "grad_norm": 7.454137391186715, "learning_rate": 5.820149348915941e-06, "loss": 1.0656, "step": 3732 }, { "epoch": 0.46, "grad_norm": 7.081672484614805, "learning_rate": 5.818162208193821e-06, "loss": 1.0543, "step": 3733 }, { "epoch": 0.46, "grad_norm": 8.236925498996783, "learning_rate": 5.816174934679463e-06, "loss": 1.6825, "step": 3734 }, { "epoch": 0.46, "grad_norm": 6.684403734568684, "learning_rate": 5.814187528695412e-06, "loss": 0.962, "step": 3735 }, { "epoch": 0.46, "grad_norm": 7.9113509752891265, "learning_rate": 5.812199990564234e-06, "loss": 1.303, "step": 3736 }, { "epoch": 0.46, "grad_norm": 8.12213144425694, "learning_rate": 5.810212320608518e-06, "loss": 1.2423, "step": 3737 }, { "epoch": 0.46, "grad_norm": 6.491266805500375, "learning_rate": 5.8082245191508755e-06, "loss": 1.1496, "step": 3738 }, { "epoch": 0.47, "grad_norm": 7.589405298726832, "learning_rate": 5.806236586513935e-06, "loss": 1.0364, "step": 3739 }, { "epoch": 0.47, "grad_norm": 6.691947555103057, "learning_rate": 5.804248523020349e-06, "loss": 1.2903, "step": 3740 }, { "epoch": 0.47, "grad_norm": 6.821306208213904, "learning_rate": 5.802260328992793e-06, "loss": 1.0056, "step": 3741 }, { "epoch": 0.47, "grad_norm": 9.088076137770221, "learning_rate": 5.800272004753961e-06, "loss": 1.53, "step": 3742 }, { "epoch": 0.47, "grad_norm": 6.433910756895996, "learning_rate": 5.798283550626568e-06, "loss": 1.1806, "step": 3743 }, { "epoch": 0.47, "grad_norm": 7.6843967980353804, "learning_rate": 5.796294966933352e-06, "loss": 1.5486, "step": 3744 }, { "epoch": 0.47, "grad_norm": 7.267002151930757, "learning_rate": 5.7943062539970716e-06, "loss": 1.2238, "step": 3745 }, { "epoch": 0.47, "grad_norm": 7.722911061098347, "learning_rate": 5.7923174121405055e-06, "loss": 1.137, "step": 3746 }, { "epoch": 0.47, "grad_norm": 8.552851871710502, "learning_rate": 5.7903284416864546e-06, "loss": 1.5841, "step": 3747 }, { "epoch": 0.47, "grad_norm": 9.936617545040246, "learning_rate": 5.788339342957736e-06, "loss": 1.3044, "step": 3748 }, { "epoch": 0.47, "grad_norm": 6.59725681433404, "learning_rate": 5.786350116277195e-06, "loss": 0.928, "step": 3749 }, { "epoch": 0.47, "grad_norm": 8.726939407155593, "learning_rate": 5.784360761967695e-06, "loss": 1.408, "step": 3750 }, { "epoch": 0.47, "grad_norm": 9.982116216905364, "learning_rate": 5.782371280352116e-06, "loss": 1.3585, "step": 3751 }, { "epoch": 0.47, "grad_norm": 5.737236955978347, "learning_rate": 5.780381671753362e-06, "loss": 0.9998, "step": 3752 }, { "epoch": 0.47, "grad_norm": 7.485795450697081, "learning_rate": 5.778391936494361e-06, "loss": 0.989, "step": 3753 }, { "epoch": 0.47, "grad_norm": 5.68619196836881, "learning_rate": 5.776402074898053e-06, "loss": 0.808, "step": 3754 }, { "epoch": 0.47, "grad_norm": 7.61137207786145, "learning_rate": 5.774412087287409e-06, "loss": 1.7236, "step": 3755 }, { "epoch": 0.47, "grad_norm": 7.144627134874178, "learning_rate": 5.772421973985412e-06, "loss": 1.0932, "step": 3756 }, { "epoch": 0.47, "grad_norm": 6.35148164593327, "learning_rate": 5.770431735315066e-06, "loss": 0.8995, "step": 3757 }, { "epoch": 0.47, "grad_norm": 7.059693044154666, "learning_rate": 5.768441371599401e-06, "loss": 1.0636, "step": 3758 }, { "epoch": 0.47, "grad_norm": 6.840438167126896, "learning_rate": 5.766450883161464e-06, "loss": 1.006, "step": 3759 }, { "epoch": 0.47, "grad_norm": 8.309753161718316, "learning_rate": 5.764460270324321e-06, "loss": 1.3206, "step": 3760 }, { "epoch": 0.47, "grad_norm": 7.754663185770332, "learning_rate": 5.762469533411059e-06, "loss": 1.5221, "step": 3761 }, { "epoch": 0.47, "grad_norm": 6.551685474487566, "learning_rate": 5.760478672744789e-06, "loss": 1.1385, "step": 3762 }, { "epoch": 0.47, "grad_norm": 7.645575179655622, "learning_rate": 5.758487688648635e-06, "loss": 1.2384, "step": 3763 }, { "epoch": 0.47, "grad_norm": 7.21158675235357, "learning_rate": 5.756496581445745e-06, "loss": 1.3484, "step": 3764 }, { "epoch": 0.47, "grad_norm": 6.690388424499975, "learning_rate": 5.7545053514592885e-06, "loss": 1.2468, "step": 3765 }, { "epoch": 0.47, "grad_norm": 6.939270458112775, "learning_rate": 5.752513999012452e-06, "loss": 0.9962, "step": 3766 }, { "epoch": 0.47, "grad_norm": 8.010927367895688, "learning_rate": 5.750522524428442e-06, "loss": 1.0404, "step": 3767 }, { "epoch": 0.47, "grad_norm": 8.399625415207568, "learning_rate": 5.7485309280304894e-06, "loss": 1.1564, "step": 3768 }, { "epoch": 0.47, "grad_norm": 7.591454583092433, "learning_rate": 5.746539210141836e-06, "loss": 1.3721, "step": 3769 }, { "epoch": 0.47, "grad_norm": 7.051324121876554, "learning_rate": 5.744547371085752e-06, "loss": 1.2731, "step": 3770 }, { "epoch": 0.47, "grad_norm": 7.427354553349353, "learning_rate": 5.742555411185526e-06, "loss": 1.0972, "step": 3771 }, { "epoch": 0.47, "grad_norm": 7.780781050287128, "learning_rate": 5.7405633307644586e-06, "loss": 1.4857, "step": 3772 }, { "epoch": 0.47, "grad_norm": 7.265668725276835, "learning_rate": 5.738571130145878e-06, "loss": 1.3868, "step": 3773 }, { "epoch": 0.47, "grad_norm": 6.435212578462368, "learning_rate": 5.73657880965313e-06, "loss": 1.1012, "step": 3774 }, { "epoch": 0.47, "grad_norm": 7.557144859623457, "learning_rate": 5.734586369609579e-06, "loss": 1.1486, "step": 3775 }, { "epoch": 0.47, "grad_norm": 7.613846321823865, "learning_rate": 5.732593810338609e-06, "loss": 1.318, "step": 3776 }, { "epoch": 0.47, "grad_norm": 7.372433957186627, "learning_rate": 5.730601132163623e-06, "loss": 1.5416, "step": 3777 }, { "epoch": 0.47, "grad_norm": 7.135707099163322, "learning_rate": 5.728608335408044e-06, "loss": 1.3146, "step": 3778 }, { "epoch": 0.47, "grad_norm": 7.233696068952381, "learning_rate": 5.726615420395314e-06, "loss": 1.2655, "step": 3779 }, { "epoch": 0.47, "grad_norm": 7.194720488134677, "learning_rate": 5.724622387448894e-06, "loss": 1.3876, "step": 3780 }, { "epoch": 0.47, "grad_norm": 8.560506321730005, "learning_rate": 5.722629236892262e-06, "loss": 0.922, "step": 3781 }, { "epoch": 0.47, "grad_norm": 7.413500075623331, "learning_rate": 5.72063596904892e-06, "loss": 1.0115, "step": 3782 }, { "epoch": 0.47, "grad_norm": 6.927621106144314, "learning_rate": 5.718642584242388e-06, "loss": 1.0024, "step": 3783 }, { "epoch": 0.47, "grad_norm": 7.3197090949643036, "learning_rate": 5.716649082796199e-06, "loss": 1.4089, "step": 3784 }, { "epoch": 0.47, "grad_norm": 6.9112275001633465, "learning_rate": 5.71465546503391e-06, "loss": 1.4372, "step": 3785 }, { "epoch": 0.47, "grad_norm": 6.350708915064333, "learning_rate": 5.7126617312791e-06, "loss": 1.2277, "step": 3786 }, { "epoch": 0.47, "grad_norm": 7.927632789576358, "learning_rate": 5.710667881855361e-06, "loss": 1.0856, "step": 3787 }, { "epoch": 0.47, "grad_norm": 7.322038864150666, "learning_rate": 5.708673917086303e-06, "loss": 1.3778, "step": 3788 }, { "epoch": 0.47, "grad_norm": 6.53707734917034, "learning_rate": 5.7066798372955615e-06, "loss": 1.0703, "step": 3789 }, { "epoch": 0.47, "grad_norm": 7.011928702871116, "learning_rate": 5.704685642806782e-06, "loss": 0.9765, "step": 3790 }, { "epoch": 0.47, "grad_norm": 9.112220008512082, "learning_rate": 5.702691333943638e-06, "loss": 1.452, "step": 3791 }, { "epoch": 0.47, "grad_norm": 8.954129496939014, "learning_rate": 5.700696911029816e-06, "loss": 1.3862, "step": 3792 }, { "epoch": 0.47, "grad_norm": 7.25146622010233, "learning_rate": 5.698702374389019e-06, "loss": 1.4617, "step": 3793 }, { "epoch": 0.47, "grad_norm": 6.771516147951393, "learning_rate": 5.696707724344973e-06, "loss": 1.0613, "step": 3794 }, { "epoch": 0.47, "grad_norm": 7.301086873187493, "learning_rate": 5.694712961221423e-06, "loss": 1.0157, "step": 3795 }, { "epoch": 0.47, "grad_norm": 6.846139269445387, "learning_rate": 5.692718085342127e-06, "loss": 1.2062, "step": 3796 }, { "epoch": 0.47, "grad_norm": 6.8218267561935555, "learning_rate": 5.690723097030863e-06, "loss": 0.8921, "step": 3797 }, { "epoch": 0.47, "grad_norm": 6.714110204458978, "learning_rate": 5.688727996611434e-06, "loss": 1.2766, "step": 3798 }, { "epoch": 0.47, "grad_norm": 7.025151689949892, "learning_rate": 5.68673278440765e-06, "loss": 1.6053, "step": 3799 }, { "epoch": 0.47, "grad_norm": 10.263322396322065, "learning_rate": 5.684737460743349e-06, "loss": 1.4152, "step": 3800 }, { "epoch": 0.47, "grad_norm": 5.963374830599548, "learning_rate": 5.682742025942382e-06, "loss": 1.0318, "step": 3801 }, { "epoch": 0.47, "grad_norm": 7.0577440885471505, "learning_rate": 5.680746480328617e-06, "loss": 1.0488, "step": 3802 }, { "epoch": 0.47, "grad_norm": 7.5023937763359045, "learning_rate": 5.678750824225944e-06, "loss": 1.3996, "step": 3803 }, { "epoch": 0.47, "grad_norm": 7.532193561301867, "learning_rate": 5.676755057958272e-06, "loss": 1.0019, "step": 3804 }, { "epoch": 0.47, "grad_norm": 6.736407427751683, "learning_rate": 5.6747591818495185e-06, "loss": 0.9051, "step": 3805 }, { "epoch": 0.47, "grad_norm": 6.697495073362067, "learning_rate": 5.672763196223628e-06, "loss": 1.2084, "step": 3806 }, { "epoch": 0.47, "grad_norm": 6.656083622609074, "learning_rate": 5.6707671014045615e-06, "loss": 1.1628, "step": 3807 }, { "epoch": 0.47, "grad_norm": 7.898243832397909, "learning_rate": 5.668770897716294e-06, "loss": 1.128, "step": 3808 }, { "epoch": 0.47, "grad_norm": 8.370152741839116, "learning_rate": 5.666774585482822e-06, "loss": 1.2085, "step": 3809 }, { "epoch": 0.47, "grad_norm": 7.747333092460203, "learning_rate": 5.664778165028158e-06, "loss": 1.6193, "step": 3810 }, { "epoch": 0.47, "grad_norm": 6.776476823415381, "learning_rate": 5.662781636676329e-06, "loss": 0.9392, "step": 3811 }, { "epoch": 0.47, "grad_norm": 7.010896455543129, "learning_rate": 5.6607850007513876e-06, "loss": 0.8567, "step": 3812 }, { "epoch": 0.47, "grad_norm": 7.332262939844623, "learning_rate": 5.658788257577395e-06, "loss": 1.2295, "step": 3813 }, { "epoch": 0.47, "grad_norm": 6.1013677493894205, "learning_rate": 5.656791407478434e-06, "loss": 0.7536, "step": 3814 }, { "epoch": 0.47, "grad_norm": 7.447053416280338, "learning_rate": 5.654794450778606e-06, "loss": 1.232, "step": 3815 }, { "epoch": 0.47, "grad_norm": 7.751439384411479, "learning_rate": 5.652797387802028e-06, "loss": 1.1557, "step": 3816 }, { "epoch": 0.47, "grad_norm": 6.439095257227618, "learning_rate": 5.650800218872832e-06, "loss": 0.9941, "step": 3817 }, { "epoch": 0.47, "grad_norm": 6.784567313924208, "learning_rate": 5.648802944315171e-06, "loss": 0.8331, "step": 3818 }, { "epoch": 0.47, "grad_norm": 7.06996617521022, "learning_rate": 5.646805564453216e-06, "loss": 1.2968, "step": 3819 }, { "epoch": 0.48, "grad_norm": 8.901880554341053, "learning_rate": 5.64480807961115e-06, "loss": 1.3151, "step": 3820 }, { "epoch": 0.48, "grad_norm": 7.323527840199495, "learning_rate": 5.642810490113176e-06, "loss": 1.3003, "step": 3821 }, { "epoch": 0.48, "grad_norm": 7.658782191164983, "learning_rate": 5.6408127962835135e-06, "loss": 1.0078, "step": 3822 }, { "epoch": 0.48, "grad_norm": 8.644906463588184, "learning_rate": 5.6388149984464004e-06, "loss": 1.5339, "step": 3823 }, { "epoch": 0.48, "grad_norm": 7.899436304923085, "learning_rate": 5.636817096926088e-06, "loss": 1.2253, "step": 3824 }, { "epoch": 0.48, "grad_norm": 7.470941072578524, "learning_rate": 5.634819092046851e-06, "loss": 1.1294, "step": 3825 }, { "epoch": 0.48, "grad_norm": 6.707974743428382, "learning_rate": 5.632820984132973e-06, "loss": 1.0442, "step": 3826 }, { "epoch": 0.48, "grad_norm": 8.149445061281758, "learning_rate": 5.630822773508759e-06, "loss": 1.1534, "step": 3827 }, { "epoch": 0.48, "grad_norm": 7.659163340793592, "learning_rate": 5.628824460498532e-06, "loss": 1.3985, "step": 3828 }, { "epoch": 0.48, "grad_norm": 6.968602523907104, "learning_rate": 5.626826045426625e-06, "loss": 1.2889, "step": 3829 }, { "epoch": 0.48, "grad_norm": 6.534218943869651, "learning_rate": 5.624827528617393e-06, "loss": 1.2467, "step": 3830 }, { "epoch": 0.48, "grad_norm": 6.347244674726057, "learning_rate": 5.6228289103952084e-06, "loss": 1.1994, "step": 3831 }, { "epoch": 0.48, "grad_norm": 6.274213730317362, "learning_rate": 5.620830191084455e-06, "loss": 1.2454, "step": 3832 }, { "epoch": 0.48, "grad_norm": 5.942997080389691, "learning_rate": 5.6188313710095375e-06, "loss": 0.926, "step": 3833 }, { "epoch": 0.48, "grad_norm": 6.657914353385139, "learning_rate": 5.616832450494876e-06, "loss": 1.3465, "step": 3834 }, { "epoch": 0.48, "grad_norm": 5.9470836203841495, "learning_rate": 5.614833429864906e-06, "loss": 0.8938, "step": 3835 }, { "epoch": 0.48, "grad_norm": 6.956943718523864, "learning_rate": 5.6128343094440804e-06, "loss": 1.1718, "step": 3836 }, { "epoch": 0.48, "grad_norm": 7.263973372134467, "learning_rate": 5.610835089556866e-06, "loss": 1.2629, "step": 3837 }, { "epoch": 0.48, "grad_norm": 6.330077748676498, "learning_rate": 5.608835770527748e-06, "loss": 1.4398, "step": 3838 }, { "epoch": 0.48, "grad_norm": 7.119574348071008, "learning_rate": 5.6068363526812245e-06, "loss": 1.2865, "step": 3839 }, { "epoch": 0.48, "grad_norm": 7.683755406168671, "learning_rate": 5.604836836341816e-06, "loss": 1.2121, "step": 3840 }, { "epoch": 0.48, "grad_norm": 6.454743702881112, "learning_rate": 5.602837221834051e-06, "loss": 1.4639, "step": 3841 }, { "epoch": 0.48, "grad_norm": 7.6958505370446275, "learning_rate": 5.600837509482482e-06, "loss": 1.439, "step": 3842 }, { "epoch": 0.48, "grad_norm": 6.508380008683005, "learning_rate": 5.5988376996116714e-06, "loss": 0.8508, "step": 3843 }, { "epoch": 0.48, "grad_norm": 5.929808078022044, "learning_rate": 5.596837792546199e-06, "loss": 0.8652, "step": 3844 }, { "epoch": 0.48, "grad_norm": 8.560676422375943, "learning_rate": 5.5948377886106596e-06, "loss": 1.3853, "step": 3845 }, { "epoch": 0.48, "grad_norm": 6.308305376036877, "learning_rate": 5.5928376881296684e-06, "loss": 0.9029, "step": 3846 }, { "epoch": 0.48, "grad_norm": 8.216732250309684, "learning_rate": 5.5908374914278495e-06, "loss": 1.1848, "step": 3847 }, { "epoch": 0.48, "grad_norm": 6.612016585436929, "learning_rate": 5.588837198829848e-06, "loss": 1.0528, "step": 3848 }, { "epoch": 0.48, "grad_norm": 6.93232370278794, "learning_rate": 5.586836810660321e-06, "loss": 1.2359, "step": 3849 }, { "epoch": 0.48, "grad_norm": 7.295403564874343, "learning_rate": 5.584836327243943e-06, "loss": 1.1107, "step": 3850 }, { "epoch": 0.48, "grad_norm": 7.615619890757838, "learning_rate": 5.582835748905404e-06, "loss": 1.028, "step": 3851 }, { "epoch": 0.48, "grad_norm": 8.771456245099499, "learning_rate": 5.580835075969408e-06, "loss": 1.3602, "step": 3852 }, { "epoch": 0.48, "grad_norm": 6.663907032158555, "learning_rate": 5.5788343087606765e-06, "loss": 1.3681, "step": 3853 }, { "epoch": 0.48, "grad_norm": 8.338481746177777, "learning_rate": 5.576833447603943e-06, "loss": 1.2136, "step": 3854 }, { "epoch": 0.48, "grad_norm": 6.77872162795014, "learning_rate": 5.5748324928239616e-06, "loss": 1.0227, "step": 3855 }, { "epoch": 0.48, "grad_norm": 7.780377371708933, "learning_rate": 5.572831444745494e-06, "loss": 0.9886, "step": 3856 }, { "epoch": 0.48, "grad_norm": 6.6920094692485925, "learning_rate": 5.570830303693324e-06, "loss": 1.2624, "step": 3857 }, { "epoch": 0.48, "grad_norm": 6.158563322814183, "learning_rate": 5.5688290699922485e-06, "loss": 1.4019, "step": 3858 }, { "epoch": 0.48, "grad_norm": 9.494213953460136, "learning_rate": 5.566827743967077e-06, "loss": 1.4804, "step": 3859 }, { "epoch": 0.48, "grad_norm": 6.009401385213196, "learning_rate": 5.564826325942636e-06, "loss": 1.1223, "step": 3860 }, { "epoch": 0.48, "grad_norm": 6.560739797164752, "learning_rate": 5.562824816243769e-06, "loss": 1.0915, "step": 3861 }, { "epoch": 0.48, "grad_norm": 10.260032426439377, "learning_rate": 5.560823215195328e-06, "loss": 0.978, "step": 3862 }, { "epoch": 0.48, "grad_norm": 7.672400025850954, "learning_rate": 5.5588215231221865e-06, "loss": 1.3203, "step": 3863 }, { "epoch": 0.48, "grad_norm": 6.889258248889502, "learning_rate": 5.556819740349231e-06, "loss": 1.1435, "step": 3864 }, { "epoch": 0.48, "grad_norm": 7.030888792454473, "learning_rate": 5.554817867201359e-06, "loss": 1.2952, "step": 3865 }, { "epoch": 0.48, "grad_norm": 6.0314596631554265, "learning_rate": 5.552815904003488e-06, "loss": 0.6096, "step": 3866 }, { "epoch": 0.48, "grad_norm": 7.542342169066547, "learning_rate": 5.550813851080547e-06, "loss": 1.4162, "step": 3867 }, { "epoch": 0.48, "grad_norm": 6.213669130524384, "learning_rate": 5.5488117087574785e-06, "loss": 0.7765, "step": 3868 }, { "epoch": 0.48, "grad_norm": 7.437003821304307, "learning_rate": 5.546809477359245e-06, "loss": 1.2835, "step": 3869 }, { "epoch": 0.48, "grad_norm": 7.153938671985275, "learning_rate": 5.5448071572108175e-06, "loss": 1.2663, "step": 3870 }, { "epoch": 0.48, "grad_norm": 6.864175867800499, "learning_rate": 5.542804748637182e-06, "loss": 0.889, "step": 3871 }, { "epoch": 0.48, "grad_norm": 8.188227146830958, "learning_rate": 5.540802251963342e-06, "loss": 1.2315, "step": 3872 }, { "epoch": 0.48, "grad_norm": 7.9972974692399585, "learning_rate": 5.538799667514315e-06, "loss": 1.3988, "step": 3873 }, { "epoch": 0.48, "grad_norm": 7.163303170415484, "learning_rate": 5.53679699561513e-06, "loss": 1.2121, "step": 3874 }, { "epoch": 0.48, "grad_norm": 7.668176657468609, "learning_rate": 5.5347942365908315e-06, "loss": 1.2426, "step": 3875 }, { "epoch": 0.48, "grad_norm": 7.529788409326356, "learning_rate": 5.53279139076648e-06, "loss": 1.3914, "step": 3876 }, { "epoch": 0.48, "grad_norm": 7.196888673491739, "learning_rate": 5.530788458467147e-06, "loss": 1.0958, "step": 3877 }, { "epoch": 0.48, "grad_norm": 7.861995292171068, "learning_rate": 5.528785440017919e-06, "loss": 1.1467, "step": 3878 }, { "epoch": 0.48, "grad_norm": 6.840735885806106, "learning_rate": 5.526782335743898e-06, "loss": 1.0646, "step": 3879 }, { "epoch": 0.48, "grad_norm": 6.568620297225996, "learning_rate": 5.524779145970198e-06, "loss": 0.9819, "step": 3880 }, { "epoch": 0.48, "grad_norm": 7.347438129464995, "learning_rate": 5.522775871021949e-06, "loss": 1.3614, "step": 3881 }, { "epoch": 0.48, "grad_norm": 7.396806880542534, "learning_rate": 5.520772511224293e-06, "loss": 1.1778, "step": 3882 }, { "epoch": 0.48, "grad_norm": 5.839583564291719, "learning_rate": 5.5187690669023855e-06, "loss": 0.5818, "step": 3883 }, { "epoch": 0.48, "grad_norm": 6.5319925029370225, "learning_rate": 5.5167655383813965e-06, "loss": 0.8693, "step": 3884 }, { "epoch": 0.48, "grad_norm": 6.040507329262205, "learning_rate": 5.514761925986514e-06, "loss": 1.0429, "step": 3885 }, { "epoch": 0.48, "grad_norm": 7.367368162620081, "learning_rate": 5.51275823004293e-06, "loss": 1.199, "step": 3886 }, { "epoch": 0.48, "grad_norm": 6.439592908157939, "learning_rate": 5.510754450875856e-06, "loss": 1.0952, "step": 3887 }, { "epoch": 0.48, "grad_norm": 7.114737880926593, "learning_rate": 5.508750588810519e-06, "loss": 1.3459, "step": 3888 }, { "epoch": 0.48, "grad_norm": 6.815724776762098, "learning_rate": 5.506746644172154e-06, "loss": 1.4011, "step": 3889 }, { "epoch": 0.48, "grad_norm": 7.435346925588585, "learning_rate": 5.504742617286014e-06, "loss": 1.2585, "step": 3890 }, { "epoch": 0.48, "grad_norm": 10.803092672076966, "learning_rate": 5.502738508477365e-06, "loss": 1.3261, "step": 3891 }, { "epoch": 0.48, "grad_norm": 7.269428647593704, "learning_rate": 5.500734318071483e-06, "loss": 1.4096, "step": 3892 }, { "epoch": 0.48, "grad_norm": 7.3739031631883, "learning_rate": 5.498730046393659e-06, "loss": 0.9911, "step": 3893 }, { "epoch": 0.48, "grad_norm": 7.812071685455185, "learning_rate": 5.496725693769198e-06, "loss": 1.1552, "step": 3894 }, { "epoch": 0.48, "grad_norm": 7.5277879093564275, "learning_rate": 5.494721260523417e-06, "loss": 1.3556, "step": 3895 }, { "epoch": 0.48, "grad_norm": 8.49553323268379, "learning_rate": 5.492716746981646e-06, "loss": 1.5877, "step": 3896 }, { "epoch": 0.48, "grad_norm": 7.181715099280721, "learning_rate": 5.490712153469232e-06, "loss": 1.0966, "step": 3897 }, { "epoch": 0.48, "grad_norm": 8.20021875366005, "learning_rate": 5.488707480311527e-06, "loss": 1.3043, "step": 3898 }, { "epoch": 0.48, "grad_norm": 8.571414929056022, "learning_rate": 5.486702727833903e-06, "loss": 1.4145, "step": 3899 }, { "epoch": 0.49, "grad_norm": 7.697299196266378, "learning_rate": 5.4846978963617425e-06, "loss": 1.103, "step": 3900 }, { "epoch": 0.49, "grad_norm": 7.932431296611609, "learning_rate": 5.48269298622044e-06, "loss": 0.8863, "step": 3901 }, { "epoch": 0.49, "grad_norm": 7.676461303541907, "learning_rate": 5.480687997735404e-06, "loss": 1.4517, "step": 3902 }, { "epoch": 0.49, "grad_norm": 7.022910057954097, "learning_rate": 5.478682931232053e-06, "loss": 1.2831, "step": 3903 }, { "epoch": 0.49, "grad_norm": 7.42342858580545, "learning_rate": 5.476677787035823e-06, "loss": 1.4482, "step": 3904 }, { "epoch": 0.49, "grad_norm": 7.1189404437533526, "learning_rate": 5.47467256547216e-06, "loss": 1.009, "step": 3905 }, { "epoch": 0.49, "grad_norm": 5.950622666380751, "learning_rate": 5.472667266866521e-06, "loss": 0.9372, "step": 3906 }, { "epoch": 0.49, "grad_norm": 8.2793367608209, "learning_rate": 5.470661891544378e-06, "loss": 0.9811, "step": 3907 }, { "epoch": 0.49, "grad_norm": 7.40134771424265, "learning_rate": 5.468656439831214e-06, "loss": 1.4757, "step": 3908 }, { "epoch": 0.49, "grad_norm": 6.910213472386008, "learning_rate": 5.466650912052526e-06, "loss": 1.3239, "step": 3909 }, { "epoch": 0.49, "grad_norm": 7.250664984568205, "learning_rate": 5.46464530853382e-06, "loss": 0.9022, "step": 3910 }, { "epoch": 0.49, "grad_norm": 6.499283257089737, "learning_rate": 5.462639629600618e-06, "loss": 1.0734, "step": 3911 }, { "epoch": 0.49, "grad_norm": 6.826488964363927, "learning_rate": 5.460633875578454e-06, "loss": 1.0088, "step": 3912 }, { "epoch": 0.49, "grad_norm": 6.855524915477439, "learning_rate": 5.4586280467928694e-06, "loss": 1.0312, "step": 3913 }, { "epoch": 0.49, "grad_norm": 7.097633915800434, "learning_rate": 5.456622143569423e-06, "loss": 1.1945, "step": 3914 }, { "epoch": 0.49, "grad_norm": 7.387957012847609, "learning_rate": 5.454616166233686e-06, "loss": 1.2689, "step": 3915 }, { "epoch": 0.49, "grad_norm": 6.217861936198142, "learning_rate": 5.452610115111238e-06, "loss": 1.0475, "step": 3916 }, { "epoch": 0.49, "grad_norm": 6.480466815353098, "learning_rate": 5.45060399052767e-06, "loss": 0.9815, "step": 3917 }, { "epoch": 0.49, "grad_norm": 7.377815620849435, "learning_rate": 5.448597792808592e-06, "loss": 1.3437, "step": 3918 }, { "epoch": 0.49, "grad_norm": 7.000023948683544, "learning_rate": 5.446591522279616e-06, "loss": 0.95, "step": 3919 }, { "epoch": 0.49, "grad_norm": 7.642904999382642, "learning_rate": 5.444585179266373e-06, "loss": 1.4528, "step": 3920 }, { "epoch": 0.49, "grad_norm": 6.195060637002851, "learning_rate": 5.442578764094505e-06, "loss": 1.1429, "step": 3921 }, { "epoch": 0.49, "grad_norm": 6.659373087908234, "learning_rate": 5.440572277089661e-06, "loss": 1.1723, "step": 3922 }, { "epoch": 0.49, "grad_norm": 6.743106576239731, "learning_rate": 5.4385657185775065e-06, "loss": 1.2011, "step": 3923 }, { "epoch": 0.49, "grad_norm": 7.6214990306894554, "learning_rate": 5.4365590888837175e-06, "loss": 1.4337, "step": 3924 }, { "epoch": 0.49, "grad_norm": 6.339178103534094, "learning_rate": 5.43455238833398e-06, "loss": 1.4737, "step": 3925 }, { "epoch": 0.49, "grad_norm": 6.5412956187263696, "learning_rate": 5.4325456172539945e-06, "loss": 1.1969, "step": 3926 }, { "epoch": 0.49, "grad_norm": 5.889770182344809, "learning_rate": 5.4305387759694695e-06, "loss": 1.0879, "step": 3927 }, { "epoch": 0.49, "grad_norm": 8.167590432038873, "learning_rate": 5.428531864806126e-06, "loss": 1.082, "step": 3928 }, { "epoch": 0.49, "grad_norm": 6.98584605261284, "learning_rate": 5.426524884089697e-06, "loss": 0.8953, "step": 3929 }, { "epoch": 0.49, "grad_norm": 9.518818088856476, "learning_rate": 5.424517834145928e-06, "loss": 1.2894, "step": 3930 }, { "epoch": 0.49, "grad_norm": 6.7984211694515935, "learning_rate": 5.4225107153005715e-06, "loss": 1.2502, "step": 3931 }, { "epoch": 0.49, "grad_norm": 6.110128033738063, "learning_rate": 5.420503527879397e-06, "loss": 0.6468, "step": 3932 }, { "epoch": 0.49, "grad_norm": 7.397207909123826, "learning_rate": 5.41849627220818e-06, "loss": 1.1735, "step": 3933 }, { "epoch": 0.49, "grad_norm": 18.313315721049957, "learning_rate": 5.416488948612711e-06, "loss": 1.2611, "step": 3934 }, { "epoch": 0.49, "grad_norm": 7.294751239315694, "learning_rate": 5.414481557418788e-06, "loss": 1.4883, "step": 3935 }, { "epoch": 0.49, "grad_norm": 6.630421444266564, "learning_rate": 5.412474098952223e-06, "loss": 0.8558, "step": 3936 }, { "epoch": 0.49, "grad_norm": 6.166660573607759, "learning_rate": 5.410466573538835e-06, "loss": 0.8066, "step": 3937 }, { "epoch": 0.49, "grad_norm": 6.484398695195367, "learning_rate": 5.408458981504458e-06, "loss": 0.9657, "step": 3938 }, { "epoch": 0.49, "grad_norm": 6.956230715412006, "learning_rate": 5.406451323174938e-06, "loss": 1.1445, "step": 3939 }, { "epoch": 0.49, "grad_norm": 7.4091068764408, "learning_rate": 5.404443598876125e-06, "loss": 1.31, "step": 3940 }, { "epoch": 0.49, "grad_norm": 6.934187592491291, "learning_rate": 5.402435808933886e-06, "loss": 1.1037, "step": 3941 }, { "epoch": 0.49, "grad_norm": 7.554188173762802, "learning_rate": 5.400427953674097e-06, "loss": 1.2819, "step": 3942 }, { "epoch": 0.49, "grad_norm": 8.132734644028984, "learning_rate": 5.39842003342264e-06, "loss": 1.3625, "step": 3943 }, { "epoch": 0.49, "grad_norm": 6.875293741960835, "learning_rate": 5.396412048505414e-06, "loss": 1.1224, "step": 3944 }, { "epoch": 0.49, "grad_norm": 8.225816582179705, "learning_rate": 5.394403999248327e-06, "loss": 1.1649, "step": 3945 }, { "epoch": 0.49, "grad_norm": 7.140417139106704, "learning_rate": 5.392395885977296e-06, "loss": 0.9442, "step": 3946 }, { "epoch": 0.49, "grad_norm": 6.411521721869352, "learning_rate": 5.390387709018249e-06, "loss": 0.979, "step": 3947 }, { "epoch": 0.49, "grad_norm": 6.340672715352818, "learning_rate": 5.388379468697124e-06, "loss": 0.8521, "step": 3948 }, { "epoch": 0.49, "grad_norm": 8.723010787677014, "learning_rate": 5.386371165339868e-06, "loss": 1.4114, "step": 3949 }, { "epoch": 0.49, "grad_norm": 8.316011102458075, "learning_rate": 5.384362799272441e-06, "loss": 1.2892, "step": 3950 }, { "epoch": 0.49, "grad_norm": 6.977354417775045, "learning_rate": 5.382354370820813e-06, "loss": 1.0366, "step": 3951 }, { "epoch": 0.49, "grad_norm": 7.341333468406921, "learning_rate": 5.3803458803109606e-06, "loss": 1.0923, "step": 3952 }, { "epoch": 0.49, "grad_norm": 7.398546533476707, "learning_rate": 5.378337328068874e-06, "loss": 1.1143, "step": 3953 }, { "epoch": 0.49, "grad_norm": 6.570935924123186, "learning_rate": 5.376328714420553e-06, "loss": 1.0431, "step": 3954 }, { "epoch": 0.49, "grad_norm": 6.752638415086165, "learning_rate": 5.3743200396920055e-06, "loss": 1.1957, "step": 3955 }, { "epoch": 0.49, "grad_norm": 6.05591136767196, "learning_rate": 5.372311304209252e-06, "loss": 1.1878, "step": 3956 }, { "epoch": 0.49, "grad_norm": 7.75310500464622, "learning_rate": 5.370302508298319e-06, "loss": 1.1537, "step": 3957 }, { "epoch": 0.49, "grad_norm": 7.045043083924495, "learning_rate": 5.3682936522852495e-06, "loss": 0.8994, "step": 3958 }, { "epoch": 0.49, "grad_norm": 7.054109302662891, "learning_rate": 5.3662847364960855e-06, "loss": 1.2796, "step": 3959 }, { "epoch": 0.49, "grad_norm": 8.951258975344079, "learning_rate": 5.364275761256891e-06, "loss": 1.3342, "step": 3960 }, { "epoch": 0.49, "grad_norm": 7.190092137927333, "learning_rate": 5.36226672689373e-06, "loss": 1.252, "step": 3961 }, { "epoch": 0.49, "grad_norm": 8.557976567604026, "learning_rate": 5.360257633732682e-06, "loss": 1.3951, "step": 3962 }, { "epoch": 0.49, "grad_norm": 7.771538504813463, "learning_rate": 5.358248482099832e-06, "loss": 1.369, "step": 3963 }, { "epoch": 0.49, "grad_norm": 6.132244841206086, "learning_rate": 5.3562392723212785e-06, "loss": 0.8532, "step": 3964 }, { "epoch": 0.49, "grad_norm": 6.987015046419487, "learning_rate": 5.354230004723125e-06, "loss": 0.8306, "step": 3965 }, { "epoch": 0.49, "grad_norm": 6.409399687772281, "learning_rate": 5.352220679631491e-06, "loss": 1.2661, "step": 3966 }, { "epoch": 0.49, "grad_norm": 5.573625721444415, "learning_rate": 5.350211297372496e-06, "loss": 0.8704, "step": 3967 }, { "epoch": 0.49, "grad_norm": 7.091054966245592, "learning_rate": 5.348201858272276e-06, "loss": 1.0603, "step": 3968 }, { "epoch": 0.49, "grad_norm": 7.311485979032078, "learning_rate": 5.346192362656975e-06, "loss": 1.5255, "step": 3969 }, { "epoch": 0.49, "grad_norm": 7.002122823909366, "learning_rate": 5.3441828108527425e-06, "loss": 0.8773, "step": 3970 }, { "epoch": 0.49, "grad_norm": 6.550841100041979, "learning_rate": 5.342173203185742e-06, "loss": 0.9379, "step": 3971 }, { "epoch": 0.49, "grad_norm": 7.673422517668238, "learning_rate": 5.340163539982144e-06, "loss": 1.1755, "step": 3972 }, { "epoch": 0.49, "grad_norm": 6.049377172109944, "learning_rate": 5.338153821568127e-06, "loss": 1.1795, "step": 3973 }, { "epoch": 0.49, "grad_norm": 6.706813454145026, "learning_rate": 5.33614404826988e-06, "loss": 0.9867, "step": 3974 }, { "epoch": 0.49, "grad_norm": 7.083377733092305, "learning_rate": 5.3341342204136015e-06, "loss": 1.1052, "step": 3975 }, { "epoch": 0.49, "grad_norm": 8.235804793037953, "learning_rate": 5.332124338325494e-06, "loss": 1.5141, "step": 3976 }, { "epoch": 0.49, "grad_norm": 6.766886569152812, "learning_rate": 5.330114402331774e-06, "loss": 1.0602, "step": 3977 }, { "epoch": 0.49, "grad_norm": 6.761351586460719, "learning_rate": 5.328104412758668e-06, "loss": 1.1301, "step": 3978 }, { "epoch": 0.49, "grad_norm": 7.22118887016914, "learning_rate": 5.326094369932405e-06, "loss": 0.9837, "step": 3979 }, { "epoch": 0.5, "grad_norm": 6.28978381288516, "learning_rate": 5.324084274179228e-06, "loss": 1.0657, "step": 3980 }, { "epoch": 0.5, "grad_norm": 8.32241702977319, "learning_rate": 5.322074125825385e-06, "loss": 1.2606, "step": 3981 }, { "epoch": 0.5, "grad_norm": 7.337311255182966, "learning_rate": 5.3200639251971355e-06, "loss": 1.1682, "step": 3982 }, { "epoch": 0.5, "grad_norm": 6.92923481768463, "learning_rate": 5.318053672620747e-06, "loss": 0.8338, "step": 3983 }, { "epoch": 0.5, "grad_norm": 6.614579358109489, "learning_rate": 5.316043368422492e-06, "loss": 1.1425, "step": 3984 }, { "epoch": 0.5, "grad_norm": 7.238070998670676, "learning_rate": 5.314033012928654e-06, "loss": 1.0659, "step": 3985 }, { "epoch": 0.5, "grad_norm": 7.714903191123649, "learning_rate": 5.312022606465527e-06, "loss": 1.086, "step": 3986 }, { "epoch": 0.5, "grad_norm": 7.78738833253719, "learning_rate": 5.310012149359411e-06, "loss": 1.2187, "step": 3987 }, { "epoch": 0.5, "grad_norm": 7.856044087530598, "learning_rate": 5.308001641936612e-06, "loss": 1.1223, "step": 3988 }, { "epoch": 0.5, "grad_norm": 7.711130099863697, "learning_rate": 5.305991084523448e-06, "loss": 1.3783, "step": 3989 }, { "epoch": 0.5, "grad_norm": 8.570753748407176, "learning_rate": 5.303980477446245e-06, "loss": 1.2369, "step": 3990 }, { "epoch": 0.5, "grad_norm": 5.74789384929065, "learning_rate": 5.301969821031333e-06, "loss": 0.7257, "step": 3991 }, { "epoch": 0.5, "grad_norm": 6.730613459965789, "learning_rate": 5.299959115605053e-06, "loss": 0.9321, "step": 3992 }, { "epoch": 0.5, "grad_norm": 9.419585409660742, "learning_rate": 5.297948361493754e-06, "loss": 1.4793, "step": 3993 }, { "epoch": 0.5, "grad_norm": 8.493235504718065, "learning_rate": 5.295937559023794e-06, "loss": 1.1649, "step": 3994 }, { "epoch": 0.5, "grad_norm": 6.853786644066395, "learning_rate": 5.293926708521534e-06, "loss": 0.9773, "step": 3995 }, { "epoch": 0.5, "grad_norm": 7.739751875903645, "learning_rate": 5.291915810313349e-06, "loss": 1.4351, "step": 3996 }, { "epoch": 0.5, "grad_norm": 6.914061408635845, "learning_rate": 5.289904864725619e-06, "loss": 1.1154, "step": 3997 }, { "epoch": 0.5, "grad_norm": 7.825993931357119, "learning_rate": 5.287893872084729e-06, "loss": 1.2372, "step": 3998 }, { "epoch": 0.5, "grad_norm": 8.013207393565983, "learning_rate": 5.285882832717079e-06, "loss": 1.1608, "step": 3999 }, { "epoch": 0.5, "grad_norm": 7.237237279508866, "learning_rate": 5.283871746949065e-06, "loss": 1.0905, "step": 4000 }, { "epoch": 0.5, "grad_norm": 7.272258455870241, "learning_rate": 5.2818606151071015e-06, "loss": 1.2029, "step": 4001 }, { "epoch": 0.5, "grad_norm": 6.955042954046882, "learning_rate": 5.279849437517607e-06, "loss": 0.9146, "step": 4002 }, { "epoch": 0.5, "grad_norm": 7.602508781810984, "learning_rate": 5.277838214507003e-06, "loss": 1.1958, "step": 4003 }, { "epoch": 0.5, "grad_norm": 7.987038572306938, "learning_rate": 5.275826946401725e-06, "loss": 1.2668, "step": 4004 }, { "epoch": 0.5, "grad_norm": 6.455827012436777, "learning_rate": 5.2738156335282145e-06, "loss": 1.0913, "step": 4005 }, { "epoch": 0.5, "grad_norm": 7.624412739438399, "learning_rate": 5.271804276212914e-06, "loss": 1.4584, "step": 4006 }, { "epoch": 0.5, "grad_norm": 6.462311308853741, "learning_rate": 5.2697928747822825e-06, "loss": 1.1439, "step": 4007 }, { "epoch": 0.5, "grad_norm": 6.893708188436526, "learning_rate": 5.26778142956278e-06, "loss": 0.9127, "step": 4008 }, { "epoch": 0.5, "grad_norm": 7.000909817862087, "learning_rate": 5.265769940880871e-06, "loss": 1.0464, "step": 4009 }, { "epoch": 0.5, "grad_norm": 6.199051187913513, "learning_rate": 5.263758409063037e-06, "loss": 0.9555, "step": 4010 }, { "epoch": 0.5, "grad_norm": 10.382473534531561, "learning_rate": 5.261746834435759e-06, "loss": 1.1334, "step": 4011 }, { "epoch": 0.5, "grad_norm": 6.736203272051082, "learning_rate": 5.259735217325525e-06, "loss": 1.0506, "step": 4012 }, { "epoch": 0.5, "grad_norm": 7.131731481253235, "learning_rate": 5.257723558058833e-06, "loss": 1.2545, "step": 4013 }, { "epoch": 0.5, "grad_norm": 7.644997639342972, "learning_rate": 5.255711856962186e-06, "loss": 1.0171, "step": 4014 }, { "epoch": 0.5, "grad_norm": 6.988623492379718, "learning_rate": 5.253700114362096e-06, "loss": 1.1791, "step": 4015 }, { "epoch": 0.5, "grad_norm": 7.538726281036797, "learning_rate": 5.251688330585076e-06, "loss": 1.2425, "step": 4016 }, { "epoch": 0.5, "grad_norm": 6.541648923689651, "learning_rate": 5.2496765059576534e-06, "loss": 0.9442, "step": 4017 }, { "epoch": 0.5, "grad_norm": 6.990012031642818, "learning_rate": 5.247664640806356e-06, "loss": 0.9896, "step": 4018 }, { "epoch": 0.5, "grad_norm": 6.499642527307588, "learning_rate": 5.24565273545772e-06, "loss": 1.2708, "step": 4019 }, { "epoch": 0.5, "grad_norm": 6.874026450427717, "learning_rate": 5.243640790238292e-06, "loss": 0.9383, "step": 4020 }, { "epoch": 0.5, "grad_norm": 7.865672985696872, "learning_rate": 5.24162880547462e-06, "loss": 1.1715, "step": 4021 }, { "epoch": 0.5, "grad_norm": 7.078567877109842, "learning_rate": 5.23961678149326e-06, "loss": 1.0864, "step": 4022 }, { "epoch": 0.5, "grad_norm": 7.378646265804988, "learning_rate": 5.237604718620776e-06, "loss": 1.2815, "step": 4023 }, { "epoch": 0.5, "grad_norm": 7.235086105058207, "learning_rate": 5.235592617183733e-06, "loss": 0.8349, "step": 4024 }, { "epoch": 0.5, "grad_norm": 7.11135671400903, "learning_rate": 5.233580477508711e-06, "loss": 1.2799, "step": 4025 }, { "epoch": 0.5, "grad_norm": 7.727861037218242, "learning_rate": 5.23156829992229e-06, "loss": 1.1556, "step": 4026 }, { "epoch": 0.5, "grad_norm": 7.124807840420694, "learning_rate": 5.229556084751054e-06, "loss": 1.0676, "step": 4027 }, { "epoch": 0.5, "grad_norm": 6.8550767413912475, "learning_rate": 5.2275438323216e-06, "loss": 1.1722, "step": 4028 }, { "epoch": 0.5, "grad_norm": 6.157034652963288, "learning_rate": 5.225531542960528e-06, "loss": 1.0447, "step": 4029 }, { "epoch": 0.5, "grad_norm": 7.3220081662194, "learning_rate": 5.223519216994442e-06, "loss": 1.1152, "step": 4030 }, { "epoch": 0.5, "grad_norm": 7.352332142766396, "learning_rate": 5.221506854749955e-06, "loss": 1.1501, "step": 4031 }, { "epoch": 0.5, "grad_norm": 7.048433639957885, "learning_rate": 5.219494456553684e-06, "loss": 1.1078, "step": 4032 }, { "epoch": 0.5, "grad_norm": 6.619838495205482, "learning_rate": 5.217482022732252e-06, "loss": 1.2045, "step": 4033 }, { "epoch": 0.5, "grad_norm": 7.530639590832693, "learning_rate": 5.2154695536122865e-06, "loss": 1.0554, "step": 4034 }, { "epoch": 0.5, "grad_norm": 7.783346827794166, "learning_rate": 5.213457049520426e-06, "loss": 1.1973, "step": 4035 }, { "epoch": 0.5, "grad_norm": 6.9281148471645, "learning_rate": 5.211444510783309e-06, "loss": 1.4863, "step": 4036 }, { "epoch": 0.5, "grad_norm": 7.6421681878603005, "learning_rate": 5.209431937727581e-06, "loss": 1.0942, "step": 4037 }, { "epoch": 0.5, "grad_norm": 7.232444823609341, "learning_rate": 5.207419330679895e-06, "loss": 1.1578, "step": 4038 }, { "epoch": 0.5, "grad_norm": 6.832229217771915, "learning_rate": 5.205406689966906e-06, "loss": 0.9125, "step": 4039 }, { "epoch": 0.5, "grad_norm": 10.568956354735036, "learning_rate": 5.203394015915281e-06, "loss": 1.398, "step": 4040 }, { "epoch": 0.5, "grad_norm": 6.804770782775656, "learning_rate": 5.201381308851685e-06, "loss": 1.2993, "step": 4041 }, { "epoch": 0.5, "grad_norm": 7.519883885892597, "learning_rate": 5.19936856910279e-06, "loss": 1.2156, "step": 4042 }, { "epoch": 0.5, "grad_norm": 7.007548304047545, "learning_rate": 5.197355796995277e-06, "loss": 1.2012, "step": 4043 }, { "epoch": 0.5, "grad_norm": 7.56815630986614, "learning_rate": 5.195342992855832e-06, "loss": 1.0277, "step": 4044 }, { "epoch": 0.5, "grad_norm": 7.440001290419303, "learning_rate": 5.193330157011139e-06, "loss": 1.1626, "step": 4045 }, { "epoch": 0.5, "grad_norm": 8.245169037812278, "learning_rate": 5.191317289787896e-06, "loss": 1.4095, "step": 4046 }, { "epoch": 0.5, "grad_norm": 7.9661507865582175, "learning_rate": 5.189304391512803e-06, "loss": 1.2576, "step": 4047 }, { "epoch": 0.5, "grad_norm": 5.980066363317201, "learning_rate": 5.1872914625125625e-06, "loss": 1.0334, "step": 4048 }, { "epoch": 0.5, "grad_norm": 9.41542575136978, "learning_rate": 5.185278503113884e-06, "loss": 1.2568, "step": 4049 }, { "epoch": 0.5, "grad_norm": 8.015307242845635, "learning_rate": 5.183265513643484e-06, "loss": 1.1217, "step": 4050 }, { "epoch": 0.5, "grad_norm": 7.014070632157698, "learning_rate": 5.18125249442808e-06, "loss": 1.1949, "step": 4051 }, { "epoch": 0.5, "grad_norm": 7.247618761034366, "learning_rate": 5.179239445794395e-06, "loss": 1.5638, "step": 4052 }, { "epoch": 0.5, "grad_norm": 6.530863587907661, "learning_rate": 5.17722636806916e-06, "loss": 1.2681, "step": 4053 }, { "epoch": 0.5, "grad_norm": 9.748044096560198, "learning_rate": 5.1752132615791085e-06, "loss": 1.2655, "step": 4054 }, { "epoch": 0.5, "grad_norm": 6.615261967282607, "learning_rate": 5.1732001266509775e-06, "loss": 0.8119, "step": 4055 }, { "epoch": 0.5, "grad_norm": 6.102780496422473, "learning_rate": 5.171186963611513e-06, "loss": 0.9006, "step": 4056 }, { "epoch": 0.5, "grad_norm": 7.423011077800382, "learning_rate": 5.169173772787458e-06, "loss": 1.174, "step": 4057 }, { "epoch": 0.5, "grad_norm": 8.431213816403838, "learning_rate": 5.167160554505567e-06, "loss": 1.1867, "step": 4058 }, { "epoch": 0.5, "grad_norm": 7.395053561851366, "learning_rate": 5.165147309092596e-06, "loss": 1.3307, "step": 4059 }, { "epoch": 0.5, "grad_norm": 8.081846542439926, "learning_rate": 5.163134036875307e-06, "loss": 1.3193, "step": 4060 }, { "epoch": 0.51, "grad_norm": 6.263157757365764, "learning_rate": 5.161120738180462e-06, "loss": 1.0036, "step": 4061 }, { "epoch": 0.51, "grad_norm": 6.397315625577877, "learning_rate": 5.159107413334834e-06, "loss": 0.7885, "step": 4062 }, { "epoch": 0.51, "grad_norm": 6.60596784107888, "learning_rate": 5.157094062665195e-06, "loss": 0.8715, "step": 4063 }, { "epoch": 0.51, "grad_norm": 6.4116802657565986, "learning_rate": 5.1550806864983235e-06, "loss": 1.1465, "step": 4064 }, { "epoch": 0.51, "grad_norm": 8.166058348352234, "learning_rate": 5.153067285161002e-06, "loss": 1.1107, "step": 4065 }, { "epoch": 0.51, "grad_norm": 8.54435435514244, "learning_rate": 5.151053858980014e-06, "loss": 1.3317, "step": 4066 }, { "epoch": 0.51, "grad_norm": 8.32876890173857, "learning_rate": 5.149040408282152e-06, "loss": 1.3466, "step": 4067 }, { "epoch": 0.51, "grad_norm": 7.659186249683715, "learning_rate": 5.147026933394211e-06, "loss": 1.1409, "step": 4068 }, { "epoch": 0.51, "grad_norm": 7.384649940664721, "learning_rate": 5.145013434642987e-06, "loss": 1.3716, "step": 4069 }, { "epoch": 0.51, "grad_norm": 7.375324497081739, "learning_rate": 5.142999912355282e-06, "loss": 1.4516, "step": 4070 }, { "epoch": 0.51, "grad_norm": 7.112881603447969, "learning_rate": 5.140986366857904e-06, "loss": 1.2005, "step": 4071 }, { "epoch": 0.51, "grad_norm": 6.314054324677326, "learning_rate": 5.138972798477661e-06, "loss": 0.7989, "step": 4072 }, { "epoch": 0.51, "grad_norm": 6.675934652823707, "learning_rate": 5.136959207541365e-06, "loss": 1.0608, "step": 4073 }, { "epoch": 0.51, "grad_norm": 6.271264848173644, "learning_rate": 5.134945594375837e-06, "loss": 0.794, "step": 4074 }, { "epoch": 0.51, "grad_norm": 8.168754150783567, "learning_rate": 5.132931959307892e-06, "loss": 1.4683, "step": 4075 }, { "epoch": 0.51, "grad_norm": 6.910547927645689, "learning_rate": 5.1309183026643576e-06, "loss": 1.1219, "step": 4076 }, { "epoch": 0.51, "grad_norm": 5.134415795652555, "learning_rate": 5.128904624772061e-06, "loss": 0.7685, "step": 4077 }, { "epoch": 0.51, "grad_norm": 7.600870042087603, "learning_rate": 5.126890925957832e-06, "loss": 1.0128, "step": 4078 }, { "epoch": 0.51, "grad_norm": 6.975819236194821, "learning_rate": 5.124877206548505e-06, "loss": 1.1516, "step": 4079 }, { "epoch": 0.51, "grad_norm": 6.940409601203015, "learning_rate": 5.122863466870922e-06, "loss": 1.3498, "step": 4080 }, { "epoch": 0.51, "grad_norm": 7.29063855192545, "learning_rate": 5.120849707251918e-06, "loss": 1.255, "step": 4081 }, { "epoch": 0.51, "grad_norm": 6.472563462070199, "learning_rate": 5.118835928018341e-06, "loss": 1.1001, "step": 4082 }, { "epoch": 0.51, "grad_norm": 6.8832409138912185, "learning_rate": 5.116822129497038e-06, "loss": 0.9759, "step": 4083 }, { "epoch": 0.51, "grad_norm": 7.536509860090411, "learning_rate": 5.114808312014857e-06, "loss": 1.245, "step": 4084 }, { "epoch": 0.51, "grad_norm": 6.165066224979949, "learning_rate": 5.1127944758986545e-06, "loss": 1.0132, "step": 4085 }, { "epoch": 0.51, "grad_norm": 6.68923623610015, "learning_rate": 5.110780621475287e-06, "loss": 1.1361, "step": 4086 }, { "epoch": 0.51, "grad_norm": 7.366614569466846, "learning_rate": 5.108766749071614e-06, "loss": 0.9496, "step": 4087 }, { "epoch": 0.51, "grad_norm": 7.072895028794679, "learning_rate": 5.106752859014496e-06, "loss": 0.9025, "step": 4088 }, { "epoch": 0.51, "grad_norm": 6.649710120923273, "learning_rate": 5.104738951630804e-06, "loss": 1.1822, "step": 4089 }, { "epoch": 0.51, "grad_norm": 5.846100543345003, "learning_rate": 5.102725027247399e-06, "loss": 0.8075, "step": 4090 }, { "epoch": 0.51, "grad_norm": 7.234164884011394, "learning_rate": 5.100711086191157e-06, "loss": 0.8021, "step": 4091 }, { "epoch": 0.51, "grad_norm": 6.899493163962765, "learning_rate": 5.098697128788951e-06, "loss": 1.1524, "step": 4092 }, { "epoch": 0.51, "grad_norm": 7.7894210542216475, "learning_rate": 5.0966831553676545e-06, "loss": 1.528, "step": 4093 }, { "epoch": 0.51, "grad_norm": 7.965653176847485, "learning_rate": 5.0946691662541496e-06, "loss": 1.2179, "step": 4094 }, { "epoch": 0.51, "grad_norm": 7.057365846306175, "learning_rate": 5.092655161775317e-06, "loss": 1.1665, "step": 4095 }, { "epoch": 0.51, "grad_norm": 7.621641974265626, "learning_rate": 5.0906411422580405e-06, "loss": 1.5858, "step": 4096 }, { "epoch": 0.51, "grad_norm": 6.720979573121718, "learning_rate": 5.088627108029207e-06, "loss": 1.0934, "step": 4097 }, { "epoch": 0.51, "grad_norm": 6.560081581336576, "learning_rate": 5.086613059415706e-06, "loss": 1.1145, "step": 4098 }, { "epoch": 0.51, "grad_norm": 7.959362503490319, "learning_rate": 5.084598996744426e-06, "loss": 1.4982, "step": 4099 }, { "epoch": 0.51, "grad_norm": 7.810545547579748, "learning_rate": 5.082584920342262e-06, "loss": 1.0711, "step": 4100 }, { "epoch": 0.51, "grad_norm": 7.6532280915067545, "learning_rate": 5.080570830536111e-06, "loss": 0.9066, "step": 4101 }, { "epoch": 0.51, "grad_norm": 8.138131126543607, "learning_rate": 5.0785567276528695e-06, "loss": 1.1204, "step": 4102 }, { "epoch": 0.51, "grad_norm": 7.003245619570274, "learning_rate": 5.076542612019437e-06, "loss": 0.9966, "step": 4103 }, { "epoch": 0.51, "grad_norm": 6.480112684301249, "learning_rate": 5.0745284839627185e-06, "loss": 1.0229, "step": 4104 }, { "epoch": 0.51, "grad_norm": 7.650187679821789, "learning_rate": 5.0725143438096154e-06, "loss": 1.308, "step": 4105 }, { "epoch": 0.51, "grad_norm": 6.741312405247021, "learning_rate": 5.070500191887034e-06, "loss": 1.1299, "step": 4106 }, { "epoch": 0.51, "grad_norm": 6.88245286373686, "learning_rate": 5.068486028521883e-06, "loss": 0.9842, "step": 4107 }, { "epoch": 0.51, "grad_norm": 7.16238733300302, "learning_rate": 5.06647185404107e-06, "loss": 1.0965, "step": 4108 }, { "epoch": 0.51, "grad_norm": 7.338952898617764, "learning_rate": 5.0644576687715095e-06, "loss": 1.2338, "step": 4109 }, { "epoch": 0.51, "grad_norm": 7.958699608215351, "learning_rate": 5.062443473040115e-06, "loss": 1.4212, "step": 4110 }, { "epoch": 0.51, "grad_norm": 7.3179036252936704, "learning_rate": 5.060429267173799e-06, "loss": 1.4482, "step": 4111 }, { "epoch": 0.51, "grad_norm": 7.707967805121122, "learning_rate": 5.05841505149948e-06, "loss": 1.1741, "step": 4112 }, { "epoch": 0.51, "grad_norm": 6.569394096445711, "learning_rate": 5.056400826344078e-06, "loss": 1.3775, "step": 4113 }, { "epoch": 0.51, "grad_norm": 7.105795277749617, "learning_rate": 5.054386592034508e-06, "loss": 1.1778, "step": 4114 }, { "epoch": 0.51, "grad_norm": 6.62941755099159, "learning_rate": 5.052372348897695e-06, "loss": 1.1199, "step": 4115 }, { "epoch": 0.51, "grad_norm": 8.188356926913329, "learning_rate": 5.050358097260562e-06, "loss": 1.1864, "step": 4116 }, { "epoch": 0.51, "grad_norm": 7.158950612865394, "learning_rate": 5.04834383745003e-06, "loss": 1.2831, "step": 4117 }, { "epoch": 0.51, "grad_norm": 7.14267575552977, "learning_rate": 5.046329569793027e-06, "loss": 1.0845, "step": 4118 }, { "epoch": 0.51, "grad_norm": 6.357811035582972, "learning_rate": 5.044315294616481e-06, "loss": 0.9103, "step": 4119 }, { "epoch": 0.51, "grad_norm": 7.448686351771975, "learning_rate": 5.042301012247317e-06, "loss": 1.1018, "step": 4120 }, { "epoch": 0.51, "grad_norm": 8.469920990648621, "learning_rate": 5.040286723012467e-06, "loss": 1.6424, "step": 4121 }, { "epoch": 0.51, "grad_norm": 6.6907317285063, "learning_rate": 5.038272427238858e-06, "loss": 0.9924, "step": 4122 }, { "epoch": 0.51, "grad_norm": 7.785815462393574, "learning_rate": 5.036258125253423e-06, "loss": 1.5035, "step": 4123 }, { "epoch": 0.51, "grad_norm": 7.551314864342461, "learning_rate": 5.034243817383094e-06, "loss": 1.3255, "step": 4124 }, { "epoch": 0.51, "grad_norm": 5.840889110197306, "learning_rate": 5.032229503954807e-06, "loss": 0.6532, "step": 4125 }, { "epoch": 0.51, "grad_norm": 6.438002206925105, "learning_rate": 5.030215185295491e-06, "loss": 0.9564, "step": 4126 }, { "epoch": 0.51, "grad_norm": 6.68485720498124, "learning_rate": 5.028200861732083e-06, "loss": 0.9671, "step": 4127 }, { "epoch": 0.51, "grad_norm": 7.494167582454221, "learning_rate": 5.0261865335915225e-06, "loss": 1.3392, "step": 4128 }, { "epoch": 0.51, "grad_norm": 7.530529655461214, "learning_rate": 5.024172201200743e-06, "loss": 1.364, "step": 4129 }, { "epoch": 0.51, "grad_norm": 7.939605965884079, "learning_rate": 5.02215786488668e-06, "loss": 1.2747, "step": 4130 }, { "epoch": 0.51, "grad_norm": 7.938899672807934, "learning_rate": 5.020143524976275e-06, "loss": 1.3151, "step": 4131 }, { "epoch": 0.51, "grad_norm": 8.074096881090039, "learning_rate": 5.0181291817964635e-06, "loss": 1.2542, "step": 4132 }, { "epoch": 0.51, "grad_norm": 9.005950532890507, "learning_rate": 5.016114835674186e-06, "loss": 1.6661, "step": 4133 }, { "epoch": 0.51, "grad_norm": 7.080635953192988, "learning_rate": 5.014100486936383e-06, "loss": 1.1688, "step": 4134 }, { "epoch": 0.51, "grad_norm": 7.452409632874269, "learning_rate": 5.012086135909991e-06, "loss": 1.2591, "step": 4135 }, { "epoch": 0.51, "grad_norm": 6.190045736943634, "learning_rate": 5.010071782921954e-06, "loss": 0.9749, "step": 4136 }, { "epoch": 0.51, "grad_norm": 6.173455127293543, "learning_rate": 5.008057428299213e-06, "loss": 1.1703, "step": 4137 }, { "epoch": 0.51, "grad_norm": 6.7963515207652385, "learning_rate": 5.006043072368704e-06, "loss": 1.1507, "step": 4138 }, { "epoch": 0.51, "grad_norm": 7.286307793052436, "learning_rate": 5.0040287154573715e-06, "loss": 1.1709, "step": 4139 }, { "epoch": 0.51, "grad_norm": 5.403464483468694, "learning_rate": 5.002014357892158e-06, "loss": 0.9603, "step": 4140 }, { "epoch": 0.52, "grad_norm": 7.552986220637135, "learning_rate": 5e-06, "loss": 1.4009, "step": 4141 }, { "epoch": 0.52, "grad_norm": 6.617532676853811, "learning_rate": 4.997985642107845e-06, "loss": 1.0291, "step": 4142 }, { "epoch": 0.52, "grad_norm": 6.80453560950925, "learning_rate": 4.99597128454263e-06, "loss": 1.2371, "step": 4143 }, { "epoch": 0.52, "grad_norm": 7.793476274328617, "learning_rate": 4.993956927631298e-06, "loss": 1.2786, "step": 4144 }, { "epoch": 0.52, "grad_norm": 6.121574118043658, "learning_rate": 4.9919425717007884e-06, "loss": 1.0432, "step": 4145 }, { "epoch": 0.52, "grad_norm": 6.391419644949426, "learning_rate": 4.989928217078047e-06, "loss": 1.0144, "step": 4146 }, { "epoch": 0.52, "grad_norm": 6.65054429912239, "learning_rate": 4.987913864090009e-06, "loss": 1.0339, "step": 4147 }, { "epoch": 0.52, "grad_norm": 7.278801332956755, "learning_rate": 4.985899513063618e-06, "loss": 1.2256, "step": 4148 }, { "epoch": 0.52, "grad_norm": 6.698807966894739, "learning_rate": 4.983885164325814e-06, "loss": 1.1758, "step": 4149 }, { "epoch": 0.52, "grad_norm": 7.194461463465861, "learning_rate": 4.981870818203537e-06, "loss": 1.1423, "step": 4150 }, { "epoch": 0.52, "grad_norm": 6.308387042904766, "learning_rate": 4.979856475023727e-06, "loss": 1.0855, "step": 4151 }, { "epoch": 0.52, "grad_norm": 7.083107780181116, "learning_rate": 4.977842135113321e-06, "loss": 0.8821, "step": 4152 }, { "epoch": 0.52, "grad_norm": 7.054105621197241, "learning_rate": 4.9758277987992596e-06, "loss": 1.1805, "step": 4153 }, { "epoch": 0.52, "grad_norm": 6.9072693866393, "learning_rate": 4.9738134664084775e-06, "loss": 1.0726, "step": 4154 }, { "epoch": 0.52, "grad_norm": 7.172934768381804, "learning_rate": 4.971799138267918e-06, "loss": 1.2129, "step": 4155 }, { "epoch": 0.52, "grad_norm": 7.284943146862981, "learning_rate": 4.96978481470451e-06, "loss": 1.2931, "step": 4156 }, { "epoch": 0.52, "grad_norm": 7.597008581900943, "learning_rate": 4.967770496045196e-06, "loss": 1.1265, "step": 4157 }, { "epoch": 0.52, "grad_norm": 8.297676936867267, "learning_rate": 4.965756182616907e-06, "loss": 1.2965, "step": 4158 }, { "epoch": 0.52, "grad_norm": 7.269535172721976, "learning_rate": 4.963741874746578e-06, "loss": 1.2048, "step": 4159 }, { "epoch": 0.52, "grad_norm": 5.8372289481128625, "learning_rate": 4.961727572761144e-06, "loss": 0.8724, "step": 4160 }, { "epoch": 0.52, "grad_norm": 7.400335979990324, "learning_rate": 4.959713276987535e-06, "loss": 1.1717, "step": 4161 }, { "epoch": 0.52, "grad_norm": 6.450325328102365, "learning_rate": 4.957698987752684e-06, "loss": 1.0606, "step": 4162 }, { "epoch": 0.52, "grad_norm": 7.335482559448443, "learning_rate": 4.955684705383519e-06, "loss": 1.3453, "step": 4163 }, { "epoch": 0.52, "grad_norm": 7.013403686674356, "learning_rate": 4.9536704302069736e-06, "loss": 1.2473, "step": 4164 }, { "epoch": 0.52, "grad_norm": 7.506094079872789, "learning_rate": 4.95165616254997e-06, "loss": 1.1495, "step": 4165 }, { "epoch": 0.52, "grad_norm": 7.396276492241152, "learning_rate": 4.94964190273944e-06, "loss": 1.2098, "step": 4166 }, { "epoch": 0.52, "grad_norm": 7.1293016247990195, "learning_rate": 4.947627651102307e-06, "loss": 0.7751, "step": 4167 }, { "epoch": 0.52, "grad_norm": 8.343287425685748, "learning_rate": 4.945613407965494e-06, "loss": 1.1489, "step": 4168 }, { "epoch": 0.52, "grad_norm": 6.38687528330113, "learning_rate": 4.943599173655924e-06, "loss": 0.8298, "step": 4169 }, { "epoch": 0.52, "grad_norm": 7.556617581965195, "learning_rate": 4.9415849485005205e-06, "loss": 1.2306, "step": 4170 }, { "epoch": 0.52, "grad_norm": 6.34624484936924, "learning_rate": 4.939570732826203e-06, "loss": 0.9236, "step": 4171 }, { "epoch": 0.52, "grad_norm": 7.098130881306494, "learning_rate": 4.9375565269598865e-06, "loss": 1.0222, "step": 4172 }, { "epoch": 0.52, "grad_norm": 6.623725895814701, "learning_rate": 4.935542331228491e-06, "loss": 1.0153, "step": 4173 }, { "epoch": 0.52, "grad_norm": 7.154189584784748, "learning_rate": 4.933528145958932e-06, "loss": 0.9513, "step": 4174 }, { "epoch": 0.52, "grad_norm": 7.9399014449927385, "learning_rate": 4.931513971478119e-06, "loss": 1.0794, "step": 4175 }, { "epoch": 0.52, "grad_norm": 7.931862993874377, "learning_rate": 4.929499808112969e-06, "loss": 1.2664, "step": 4176 }, { "epoch": 0.52, "grad_norm": 6.725897175971271, "learning_rate": 4.927485656190386e-06, "loss": 1.0328, "step": 4177 }, { "epoch": 0.52, "grad_norm": 7.288458302769841, "learning_rate": 4.925471516037283e-06, "loss": 1.0175, "step": 4178 }, { "epoch": 0.52, "grad_norm": 6.29487888035646, "learning_rate": 4.923457387980563e-06, "loss": 0.7411, "step": 4179 }, { "epoch": 0.52, "grad_norm": 7.614023071043973, "learning_rate": 4.921443272347131e-06, "loss": 1.217, "step": 4180 }, { "epoch": 0.52, "grad_norm": 7.5020186564262135, "learning_rate": 4.91942916946389e-06, "loss": 1.3407, "step": 4181 }, { "epoch": 0.52, "grad_norm": 7.701170941297545, "learning_rate": 4.91741507965774e-06, "loss": 1.3106, "step": 4182 }, { "epoch": 0.52, "grad_norm": 6.7359456688224775, "learning_rate": 4.915401003255577e-06, "loss": 1.3199, "step": 4183 }, { "epoch": 0.52, "grad_norm": 7.73863220154665, "learning_rate": 4.913386940584296e-06, "loss": 1.3461, "step": 4184 }, { "epoch": 0.52, "grad_norm": 9.700515844622272, "learning_rate": 4.911372891970796e-06, "loss": 1.0787, "step": 4185 }, { "epoch": 0.52, "grad_norm": 8.39034167396905, "learning_rate": 4.9093588577419595e-06, "loss": 1.5726, "step": 4186 }, { "epoch": 0.52, "grad_norm": 7.124612177698425, "learning_rate": 4.907344838224684e-06, "loss": 1.2527, "step": 4187 }, { "epoch": 0.52, "grad_norm": 7.257414554099096, "learning_rate": 4.905330833745851e-06, "loss": 1.1374, "step": 4188 }, { "epoch": 0.52, "grad_norm": 6.5045310498267614, "learning_rate": 4.903316844632347e-06, "loss": 1.152, "step": 4189 }, { "epoch": 0.52, "grad_norm": 7.941982326095762, "learning_rate": 4.9013028712110526e-06, "loss": 1.0908, "step": 4190 }, { "epoch": 0.52, "grad_norm": 7.447817716978012, "learning_rate": 4.899288913808844e-06, "loss": 1.2557, "step": 4191 }, { "epoch": 0.52, "grad_norm": 7.800815084622353, "learning_rate": 4.897274972752602e-06, "loss": 1.2222, "step": 4192 }, { "epoch": 0.52, "grad_norm": 6.00262167871202, "learning_rate": 4.895261048369197e-06, "loss": 0.8605, "step": 4193 }, { "epoch": 0.52, "grad_norm": 7.001719207854625, "learning_rate": 4.8932471409855045e-06, "loss": 0.8924, "step": 4194 }, { "epoch": 0.52, "grad_norm": 6.295724408696917, "learning_rate": 4.891233250928386e-06, "loss": 1.1312, "step": 4195 }, { "epoch": 0.52, "grad_norm": 7.214666723322393, "learning_rate": 4.8892193785247135e-06, "loss": 1.0555, "step": 4196 }, { "epoch": 0.52, "grad_norm": 7.241055835854803, "learning_rate": 4.8872055241013455e-06, "loss": 1.0295, "step": 4197 }, { "epoch": 0.52, "grad_norm": 7.511445998074829, "learning_rate": 4.8851916879851445e-06, "loss": 1.1214, "step": 4198 }, { "epoch": 0.52, "grad_norm": 6.413891294177102, "learning_rate": 4.8831778705029644e-06, "loss": 1.0276, "step": 4199 }, { "epoch": 0.52, "grad_norm": 7.3820437029082635, "learning_rate": 4.88116407198166e-06, "loss": 1.2257, "step": 4200 }, { "epoch": 0.52, "grad_norm": 7.626113685140828, "learning_rate": 4.879150292748084e-06, "loss": 1.2326, "step": 4201 }, { "epoch": 0.52, "grad_norm": 7.905093348442066, "learning_rate": 4.877136533129079e-06, "loss": 1.2465, "step": 4202 }, { "epoch": 0.52, "grad_norm": 6.3970635347195985, "learning_rate": 4.875122793451496e-06, "loss": 1.0873, "step": 4203 }, { "epoch": 0.52, "grad_norm": 7.224901640802448, "learning_rate": 4.873109074042169e-06, "loss": 1.0482, "step": 4204 }, { "epoch": 0.52, "grad_norm": 7.252853327556815, "learning_rate": 4.871095375227941e-06, "loss": 1.268, "step": 4205 }, { "epoch": 0.52, "grad_norm": 7.637935664046721, "learning_rate": 4.869081697335643e-06, "loss": 1.2417, "step": 4206 }, { "epoch": 0.52, "grad_norm": 7.083935521514835, "learning_rate": 4.867068040692109e-06, "loss": 1.3001, "step": 4207 }, { "epoch": 0.52, "grad_norm": 6.575292129701769, "learning_rate": 4.865054405624166e-06, "loss": 0.9453, "step": 4208 }, { "epoch": 0.52, "grad_norm": 8.304564614510895, "learning_rate": 4.863040792458636e-06, "loss": 1.4949, "step": 4209 }, { "epoch": 0.52, "grad_norm": 7.0338965333437065, "learning_rate": 4.861027201522341e-06, "loss": 1.1978, "step": 4210 }, { "epoch": 0.52, "grad_norm": 7.193549459491062, "learning_rate": 4.859013633142096e-06, "loss": 1.0898, "step": 4211 }, { "epoch": 0.52, "grad_norm": 6.502955125144753, "learning_rate": 4.8570000876447185e-06, "loss": 1.2453, "step": 4212 }, { "epoch": 0.52, "grad_norm": 9.16693146435837, "learning_rate": 4.854986565357013e-06, "loss": 1.2417, "step": 4213 }, { "epoch": 0.52, "grad_norm": 8.11217316836057, "learning_rate": 4.85297306660579e-06, "loss": 1.0267, "step": 4214 }, { "epoch": 0.52, "grad_norm": 6.82375707794743, "learning_rate": 4.850959591717849e-06, "loss": 1.2764, "step": 4215 }, { "epoch": 0.52, "grad_norm": 8.210021957178602, "learning_rate": 4.8489461410199864e-06, "loss": 1.137, "step": 4216 }, { "epoch": 0.52, "grad_norm": 7.161092785127246, "learning_rate": 4.8469327148390005e-06, "loss": 1.0291, "step": 4217 }, { "epoch": 0.52, "grad_norm": 7.359719490500919, "learning_rate": 4.844919313501677e-06, "loss": 1.3596, "step": 4218 }, { "epoch": 0.52, "grad_norm": 8.18445670731447, "learning_rate": 4.842905937334806e-06, "loss": 1.2807, "step": 4219 }, { "epoch": 0.52, "grad_norm": 7.132923599411205, "learning_rate": 4.840892586665165e-06, "loss": 1.378, "step": 4220 }, { "epoch": 0.53, "grad_norm": 7.125126683636772, "learning_rate": 4.838879261819539e-06, "loss": 1.3611, "step": 4221 }, { "epoch": 0.53, "grad_norm": 7.939495422762576, "learning_rate": 4.836865963124694e-06, "loss": 1.31, "step": 4222 }, { "epoch": 0.53, "grad_norm": 6.979125074793841, "learning_rate": 4.8348526909074046e-06, "loss": 1.0983, "step": 4223 }, { "epoch": 0.53, "grad_norm": 6.964962832999563, "learning_rate": 4.832839445494435e-06, "loss": 1.1429, "step": 4224 }, { "epoch": 0.53, "grad_norm": 6.4015505977096625, "learning_rate": 4.830826227212543e-06, "loss": 1.0092, "step": 4225 }, { "epoch": 0.53, "grad_norm": 8.297008722718099, "learning_rate": 4.82881303638849e-06, "loss": 1.2304, "step": 4226 }, { "epoch": 0.53, "grad_norm": 6.746780280963342, "learning_rate": 4.826799873349023e-06, "loss": 0.9928, "step": 4227 }, { "epoch": 0.53, "grad_norm": 8.14065194743012, "learning_rate": 4.824786738420893e-06, "loss": 1.1268, "step": 4228 }, { "epoch": 0.53, "grad_norm": 6.462254084572075, "learning_rate": 4.82277363193084e-06, "loss": 1.2574, "step": 4229 }, { "epoch": 0.53, "grad_norm": 7.122884393399005, "learning_rate": 4.820760554205607e-06, "loss": 1.2693, "step": 4230 }, { "epoch": 0.53, "grad_norm": 7.150586992446739, "learning_rate": 4.818747505571923e-06, "loss": 1.1906, "step": 4231 }, { "epoch": 0.53, "grad_norm": 7.209360756832252, "learning_rate": 4.816734486356518e-06, "loss": 1.0084, "step": 4232 }, { "epoch": 0.53, "grad_norm": 7.022030589245008, "learning_rate": 4.8147214968861175e-06, "loss": 1.0221, "step": 4233 }, { "epoch": 0.53, "grad_norm": 7.1420476240040855, "learning_rate": 4.812708537487438e-06, "loss": 1.1045, "step": 4234 }, { "epoch": 0.53, "grad_norm": 7.349127562922637, "learning_rate": 4.810695608487199e-06, "loss": 0.9454, "step": 4235 }, { "epoch": 0.53, "grad_norm": 6.528993743670022, "learning_rate": 4.808682710212104e-06, "loss": 1.1006, "step": 4236 }, { "epoch": 0.53, "grad_norm": 7.2576746039775815, "learning_rate": 4.806669842988862e-06, "loss": 1.1639, "step": 4237 }, { "epoch": 0.53, "grad_norm": 8.137309207661783, "learning_rate": 4.8046570071441704e-06, "loss": 0.8267, "step": 4238 }, { "epoch": 0.53, "grad_norm": 6.663231673450604, "learning_rate": 4.802644203004724e-06, "loss": 1.2571, "step": 4239 }, { "epoch": 0.53, "grad_norm": 6.041446267858211, "learning_rate": 4.800631430897212e-06, "loss": 0.7156, "step": 4240 }, { "epoch": 0.53, "grad_norm": 8.281628211551979, "learning_rate": 4.7986186911483166e-06, "loss": 1.0093, "step": 4241 }, { "epoch": 0.53, "grad_norm": 7.335258993191719, "learning_rate": 4.7966059840847216e-06, "loss": 1.1321, "step": 4242 }, { "epoch": 0.53, "grad_norm": 7.566505190547188, "learning_rate": 4.794593310033094e-06, "loss": 1.263, "step": 4243 }, { "epoch": 0.53, "grad_norm": 7.527458368898604, "learning_rate": 4.792580669320106e-06, "loss": 1.3003, "step": 4244 }, { "epoch": 0.53, "grad_norm": 7.811879149463108, "learning_rate": 4.79056806227242e-06, "loss": 1.2605, "step": 4245 }, { "epoch": 0.53, "grad_norm": 8.350672477272619, "learning_rate": 4.788555489216692e-06, "loss": 1.3912, "step": 4246 }, { "epoch": 0.53, "grad_norm": 7.463307468925997, "learning_rate": 4.786542950479576e-06, "loss": 1.2917, "step": 4247 }, { "epoch": 0.53, "grad_norm": 6.52244898190693, "learning_rate": 4.784530446387714e-06, "loss": 0.8816, "step": 4248 }, { "epoch": 0.53, "grad_norm": 7.435547197472882, "learning_rate": 4.782517977267751e-06, "loss": 0.7504, "step": 4249 }, { "epoch": 0.53, "grad_norm": 7.7485727369848325, "learning_rate": 4.780505543446317e-06, "loss": 1.2304, "step": 4250 }, { "epoch": 0.53, "grad_norm": 6.986459176558435, "learning_rate": 4.778493145250047e-06, "loss": 1.172, "step": 4251 }, { "epoch": 0.53, "grad_norm": 8.020504630153061, "learning_rate": 4.7764807830055584e-06, "loss": 1.244, "step": 4252 }, { "epoch": 0.53, "grad_norm": 8.30047860346315, "learning_rate": 4.7744684570394734e-06, "loss": 1.005, "step": 4253 }, { "epoch": 0.53, "grad_norm": 8.05534292937712, "learning_rate": 4.7724561676784e-06, "loss": 1.1508, "step": 4254 }, { "epoch": 0.53, "grad_norm": 7.829700142916105, "learning_rate": 4.7704439152489475e-06, "loss": 1.2043, "step": 4255 }, { "epoch": 0.53, "grad_norm": 7.322750430489215, "learning_rate": 4.768431700077714e-06, "loss": 1.2114, "step": 4256 }, { "epoch": 0.53, "grad_norm": 7.489509068338132, "learning_rate": 4.766419522491291e-06, "loss": 1.1141, "step": 4257 }, { "epoch": 0.53, "grad_norm": 7.078392592602177, "learning_rate": 4.764407382816268e-06, "loss": 1.1461, "step": 4258 }, { "epoch": 0.53, "grad_norm": 10.491732561553052, "learning_rate": 4.762395281379226e-06, "loss": 1.285, "step": 4259 }, { "epoch": 0.53, "grad_norm": 7.586941518783628, "learning_rate": 4.760383218506742e-06, "loss": 1.3451, "step": 4260 }, { "epoch": 0.53, "grad_norm": 6.426952747491188, "learning_rate": 4.75837119452538e-06, "loss": 1.0285, "step": 4261 }, { "epoch": 0.53, "grad_norm": 6.7525053200270815, "learning_rate": 4.756359209761708e-06, "loss": 1.127, "step": 4262 }, { "epoch": 0.53, "grad_norm": 6.669164299632626, "learning_rate": 4.75434726454228e-06, "loss": 1.0089, "step": 4263 }, { "epoch": 0.53, "grad_norm": 8.033836831279702, "learning_rate": 4.752335359193646e-06, "loss": 1.0592, "step": 4264 }, { "epoch": 0.53, "grad_norm": 7.305623202657595, "learning_rate": 4.750323494042349e-06, "loss": 1.2298, "step": 4265 }, { "epoch": 0.53, "grad_norm": 6.3031267965792095, "learning_rate": 4.748311669414926e-06, "loss": 0.9329, "step": 4266 }, { "epoch": 0.53, "grad_norm": 6.569054655320031, "learning_rate": 4.7462998856379065e-06, "loss": 1.0006, "step": 4267 }, { "epoch": 0.53, "grad_norm": 6.769337348786256, "learning_rate": 4.7442881430378144e-06, "loss": 1.0757, "step": 4268 }, { "epoch": 0.53, "grad_norm": 10.139038509789854, "learning_rate": 4.742276441941169e-06, "loss": 1.0228, "step": 4269 }, { "epoch": 0.53, "grad_norm": 6.313269377023376, "learning_rate": 4.740264782674475e-06, "loss": 0.9373, "step": 4270 }, { "epoch": 0.53, "grad_norm": 7.2926295295199655, "learning_rate": 4.738253165564243e-06, "loss": 1.2685, "step": 4271 }, { "epoch": 0.53, "grad_norm": 6.789400091599826, "learning_rate": 4.736241590936965e-06, "loss": 1.1204, "step": 4272 }, { "epoch": 0.53, "grad_norm": 6.989450962388572, "learning_rate": 4.7342300591191295e-06, "loss": 0.9474, "step": 4273 }, { "epoch": 0.53, "grad_norm": 7.142584106364201, "learning_rate": 4.732218570437224e-06, "loss": 1.0375, "step": 4274 }, { "epoch": 0.53, "grad_norm": 9.092917759183356, "learning_rate": 4.730207125217719e-06, "loss": 1.6002, "step": 4275 }, { "epoch": 0.53, "grad_norm": 7.6948363882936155, "learning_rate": 4.7281957237870865e-06, "loss": 1.4027, "step": 4276 }, { "epoch": 0.53, "grad_norm": 8.235116078067367, "learning_rate": 4.726184366471786e-06, "loss": 0.9511, "step": 4277 }, { "epoch": 0.53, "grad_norm": 6.353229751873773, "learning_rate": 4.724173053598276e-06, "loss": 0.9124, "step": 4278 }, { "epoch": 0.53, "grad_norm": 7.434907331848723, "learning_rate": 4.722161785492997e-06, "loss": 1.2984, "step": 4279 }, { "epoch": 0.53, "grad_norm": 6.9798318568880235, "learning_rate": 4.720150562482395e-06, "loss": 0.7418, "step": 4280 }, { "epoch": 0.53, "grad_norm": 6.989894221452927, "learning_rate": 4.7181393848929e-06, "loss": 0.8, "step": 4281 }, { "epoch": 0.53, "grad_norm": 7.3538341337682676, "learning_rate": 4.716128253050937e-06, "loss": 1.2749, "step": 4282 }, { "epoch": 0.53, "grad_norm": 8.198028084807317, "learning_rate": 4.714117167282925e-06, "loss": 1.3594, "step": 4283 }, { "epoch": 0.53, "grad_norm": 7.660353146772675, "learning_rate": 4.712106127915272e-06, "loss": 1.2352, "step": 4284 }, { "epoch": 0.53, "grad_norm": 8.02403497625928, "learning_rate": 4.710095135274383e-06, "loss": 0.9537, "step": 4285 }, { "epoch": 0.53, "grad_norm": 7.5457950821939255, "learning_rate": 4.708084189686651e-06, "loss": 1.3959, "step": 4286 }, { "epoch": 0.53, "grad_norm": 6.8184861824583045, "learning_rate": 4.706073291478467e-06, "loss": 1.1875, "step": 4287 }, { "epoch": 0.53, "grad_norm": 5.411922656737199, "learning_rate": 4.704062440976209e-06, "loss": 0.6002, "step": 4288 }, { "epoch": 0.53, "grad_norm": 7.459115902150936, "learning_rate": 4.702051638506247e-06, "loss": 1.065, "step": 4289 }, { "epoch": 0.53, "grad_norm": 7.556879642934288, "learning_rate": 4.700040884394949e-06, "loss": 0.987, "step": 4290 }, { "epoch": 0.53, "grad_norm": 8.153365985464562, "learning_rate": 4.698030178968669e-06, "loss": 1.2739, "step": 4291 }, { "epoch": 0.53, "grad_norm": 6.375489819395151, "learning_rate": 4.696019522553757e-06, "loss": 0.9841, "step": 4292 }, { "epoch": 0.53, "grad_norm": 7.383517945936245, "learning_rate": 4.694008915476552e-06, "loss": 1.2485, "step": 4293 }, { "epoch": 0.53, "grad_norm": 7.272029118772029, "learning_rate": 4.69199835806339e-06, "loss": 1.2538, "step": 4294 }, { "epoch": 0.53, "grad_norm": 7.06061512563798, "learning_rate": 4.6899878506405904e-06, "loss": 1.1847, "step": 4295 }, { "epoch": 0.53, "grad_norm": 7.03772116577368, "learning_rate": 4.687977393534475e-06, "loss": 1.0696, "step": 4296 }, { "epoch": 0.53, "grad_norm": 6.523562815869111, "learning_rate": 4.6859669870713484e-06, "loss": 0.949, "step": 4297 }, { "epoch": 0.53, "grad_norm": 7.058386617853442, "learning_rate": 4.68395663157751e-06, "loss": 1.1742, "step": 4298 }, { "epoch": 0.53, "grad_norm": 7.423053229672438, "learning_rate": 4.6819463273792565e-06, "loss": 1.0987, "step": 4299 }, { "epoch": 0.53, "grad_norm": 6.913150766133195, "learning_rate": 4.679936074802865e-06, "loss": 0.9609, "step": 4300 }, { "epoch": 0.53, "grad_norm": 6.796125562182647, "learning_rate": 4.677925874174616e-06, "loss": 1.1877, "step": 4301 }, { "epoch": 0.54, "grad_norm": 6.870141019222875, "learning_rate": 4.675915725820773e-06, "loss": 1.1352, "step": 4302 }, { "epoch": 0.54, "grad_norm": 7.5065836647118385, "learning_rate": 4.673905630067596e-06, "loss": 1.153, "step": 4303 }, { "epoch": 0.54, "grad_norm": 6.756461652937691, "learning_rate": 4.671895587241334e-06, "loss": 0.9684, "step": 4304 }, { "epoch": 0.54, "grad_norm": 8.376664532389185, "learning_rate": 4.669885597668227e-06, "loss": 1.2187, "step": 4305 }, { "epoch": 0.54, "grad_norm": 7.336363304473213, "learning_rate": 4.667875661674509e-06, "loss": 1.0677, "step": 4306 }, { "epoch": 0.54, "grad_norm": 6.3935765812865215, "learning_rate": 4.6658657795864e-06, "loss": 1.177, "step": 4307 }, { "epoch": 0.54, "grad_norm": 7.207009260642097, "learning_rate": 4.6638559517301216e-06, "loss": 1.3471, "step": 4308 }, { "epoch": 0.54, "grad_norm": 8.077212027263908, "learning_rate": 4.661846178431873e-06, "loss": 0.9856, "step": 4309 }, { "epoch": 0.54, "grad_norm": 6.7344740382062485, "learning_rate": 4.659836460017857e-06, "loss": 0.9818, "step": 4310 }, { "epoch": 0.54, "grad_norm": 7.452548304374172, "learning_rate": 4.657826796814259e-06, "loss": 1.0917, "step": 4311 }, { "epoch": 0.54, "grad_norm": 7.129640776725427, "learning_rate": 4.655817189147258e-06, "loss": 1.3984, "step": 4312 }, { "epoch": 0.54, "grad_norm": 6.96639715285299, "learning_rate": 4.653807637343028e-06, "loss": 0.9781, "step": 4313 }, { "epoch": 0.54, "grad_norm": 8.09088651908722, "learning_rate": 4.651798141727725e-06, "loss": 1.5346, "step": 4314 }, { "epoch": 0.54, "grad_norm": 7.547288801692991, "learning_rate": 4.649788702627506e-06, "loss": 1.2883, "step": 4315 }, { "epoch": 0.54, "grad_norm": 7.403053021631139, "learning_rate": 4.64777932036851e-06, "loss": 1.4292, "step": 4316 }, { "epoch": 0.54, "grad_norm": 7.345429795614255, "learning_rate": 4.645769995276876e-06, "loss": 1.2678, "step": 4317 }, { "epoch": 0.54, "grad_norm": 7.391218776225658, "learning_rate": 4.643760727678722e-06, "loss": 1.1778, "step": 4318 }, { "epoch": 0.54, "grad_norm": 7.113705086795898, "learning_rate": 4.6417515179001686e-06, "loss": 1.1455, "step": 4319 }, { "epoch": 0.54, "grad_norm": 6.7156766668587915, "learning_rate": 4.639742366267319e-06, "loss": 1.0684, "step": 4320 }, { "epoch": 0.54, "grad_norm": 7.237403368728543, "learning_rate": 4.637733273106272e-06, "loss": 1.4037, "step": 4321 }, { "epoch": 0.54, "grad_norm": 7.539226432457346, "learning_rate": 4.635724238743111e-06, "loss": 1.36, "step": 4322 }, { "epoch": 0.54, "grad_norm": 7.480667993461808, "learning_rate": 4.633715263503915e-06, "loss": 1.1404, "step": 4323 }, { "epoch": 0.54, "grad_norm": 6.779613368107125, "learning_rate": 4.631706347714754e-06, "loss": 1.1086, "step": 4324 }, { "epoch": 0.54, "grad_norm": 7.901633138372594, "learning_rate": 4.629697491701681e-06, "loss": 1.3448, "step": 4325 }, { "epoch": 0.54, "grad_norm": 6.015512818036188, "learning_rate": 4.62768869579075e-06, "loss": 0.8268, "step": 4326 }, { "epoch": 0.54, "grad_norm": 6.211467918931019, "learning_rate": 4.625679960307995e-06, "loss": 0.7453, "step": 4327 }, { "epoch": 0.54, "grad_norm": 6.360097742080352, "learning_rate": 4.623671285579448e-06, "loss": 0.6479, "step": 4328 }, { "epoch": 0.54, "grad_norm": 7.734614524803638, "learning_rate": 4.621662671931127e-06, "loss": 1.3293, "step": 4329 }, { "epoch": 0.54, "grad_norm": 7.808343631883726, "learning_rate": 4.619654119689041e-06, "loss": 1.3419, "step": 4330 }, { "epoch": 0.54, "grad_norm": 7.447129909913415, "learning_rate": 4.61764562917919e-06, "loss": 1.3309, "step": 4331 }, { "epoch": 0.54, "grad_norm": 8.205914124897111, "learning_rate": 4.61563720072756e-06, "loss": 1.0651, "step": 4332 }, { "epoch": 0.54, "grad_norm": 8.263780677728402, "learning_rate": 4.613628834660135e-06, "loss": 1.4129, "step": 4333 }, { "epoch": 0.54, "grad_norm": 6.849924680073393, "learning_rate": 4.611620531302877e-06, "loss": 1.1132, "step": 4334 }, { "epoch": 0.54, "grad_norm": 7.462585346767363, "learning_rate": 4.609612290981753e-06, "loss": 1.1372, "step": 4335 }, { "epoch": 0.54, "grad_norm": 6.534184732531488, "learning_rate": 4.607604114022703e-06, "loss": 0.8279, "step": 4336 }, { "epoch": 0.54, "grad_norm": 6.001263229539449, "learning_rate": 4.6055960007516734e-06, "loss": 0.8086, "step": 4337 }, { "epoch": 0.54, "grad_norm": 6.757916467838377, "learning_rate": 4.603587951494587e-06, "loss": 0.9769, "step": 4338 }, { "epoch": 0.54, "grad_norm": 7.125840630446187, "learning_rate": 4.601579966577362e-06, "loss": 1.1588, "step": 4339 }, { "epoch": 0.54, "grad_norm": 6.441393719051667, "learning_rate": 4.5995720463259065e-06, "loss": 0.9996, "step": 4340 }, { "epoch": 0.54, "grad_norm": 6.7763815346594685, "learning_rate": 4.597564191066115e-06, "loss": 1.1178, "step": 4341 }, { "epoch": 0.54, "grad_norm": 7.310080206504425, "learning_rate": 4.595556401123876e-06, "loss": 1.1078, "step": 4342 }, { "epoch": 0.54, "grad_norm": 7.267213565718032, "learning_rate": 4.593548676825062e-06, "loss": 1.0437, "step": 4343 }, { "epoch": 0.54, "grad_norm": 7.134047992623577, "learning_rate": 4.591541018495543e-06, "loss": 1.1771, "step": 4344 }, { "epoch": 0.54, "grad_norm": 6.134656299057262, "learning_rate": 4.589533426461168e-06, "loss": 0.7038, "step": 4345 }, { "epoch": 0.54, "grad_norm": 7.663236061402761, "learning_rate": 4.587525901047778e-06, "loss": 1.3213, "step": 4346 }, { "epoch": 0.54, "grad_norm": 7.536500637012055, "learning_rate": 4.585518442581213e-06, "loss": 1.198, "step": 4347 }, { "epoch": 0.54, "grad_norm": 6.9543651399176944, "learning_rate": 4.5835110513872895e-06, "loss": 1.137, "step": 4348 }, { "epoch": 0.54, "grad_norm": 7.50606375137481, "learning_rate": 4.5815037277918205e-06, "loss": 0.798, "step": 4349 }, { "epoch": 0.54, "grad_norm": 7.600573338792672, "learning_rate": 4.579496472120604e-06, "loss": 0.9846, "step": 4350 }, { "epoch": 0.54, "grad_norm": 6.75434666723982, "learning_rate": 4.577489284699429e-06, "loss": 1.0699, "step": 4351 }, { "epoch": 0.54, "grad_norm": 7.16553948327097, "learning_rate": 4.575482165854073e-06, "loss": 1.0426, "step": 4352 }, { "epoch": 0.54, "grad_norm": 7.309374960356512, "learning_rate": 4.573475115910305e-06, "loss": 1.2021, "step": 4353 }, { "epoch": 0.54, "grad_norm": 5.963863557146407, "learning_rate": 4.571468135193877e-06, "loss": 0.9443, "step": 4354 }, { "epoch": 0.54, "grad_norm": 7.106068015188058, "learning_rate": 4.569461224030531e-06, "loss": 1.1883, "step": 4355 }, { "epoch": 0.54, "grad_norm": 7.9750718689160065, "learning_rate": 4.567454382746008e-06, "loss": 1.0915, "step": 4356 }, { "epoch": 0.54, "grad_norm": 7.561496508818063, "learning_rate": 4.56544761166602e-06, "loss": 1.0406, "step": 4357 }, { "epoch": 0.54, "grad_norm": 7.0339675262264345, "learning_rate": 4.563440911116283e-06, "loss": 1.1349, "step": 4358 }, { "epoch": 0.54, "grad_norm": 6.924850440432363, "learning_rate": 4.561434281422494e-06, "loss": 1.1977, "step": 4359 }, { "epoch": 0.54, "grad_norm": 7.8306179302608205, "learning_rate": 4.5594277229103405e-06, "loss": 1.1908, "step": 4360 }, { "epoch": 0.54, "grad_norm": 7.751290687567995, "learning_rate": 4.557421235905498e-06, "loss": 1.3705, "step": 4361 }, { "epoch": 0.54, "grad_norm": 7.196609033300798, "learning_rate": 4.5554148207336275e-06, "loss": 0.9484, "step": 4362 }, { "epoch": 0.54, "grad_norm": 7.146130214388568, "learning_rate": 4.553408477720386e-06, "loss": 1.2759, "step": 4363 }, { "epoch": 0.54, "grad_norm": 6.224704100387172, "learning_rate": 4.5514022071914085e-06, "loss": 0.9946, "step": 4364 }, { "epoch": 0.54, "grad_norm": 7.227844147279108, "learning_rate": 4.549396009472331e-06, "loss": 0.9983, "step": 4365 }, { "epoch": 0.54, "grad_norm": 7.781948893850753, "learning_rate": 4.547389884888763e-06, "loss": 1.2457, "step": 4366 }, { "epoch": 0.54, "grad_norm": 7.648987149497182, "learning_rate": 4.545383833766315e-06, "loss": 1.0541, "step": 4367 }, { "epoch": 0.54, "grad_norm": 5.756456752349553, "learning_rate": 4.543377856430577e-06, "loss": 0.9253, "step": 4368 }, { "epoch": 0.54, "grad_norm": 6.577765498451657, "learning_rate": 4.541371953207132e-06, "loss": 0.8602, "step": 4369 }, { "epoch": 0.54, "grad_norm": 7.069815276722731, "learning_rate": 4.5393661244215484e-06, "loss": 1.0964, "step": 4370 }, { "epoch": 0.54, "grad_norm": 6.6211796610846925, "learning_rate": 4.537360370399383e-06, "loss": 1.1596, "step": 4371 }, { "epoch": 0.54, "grad_norm": 6.850793767713443, "learning_rate": 4.535354691466181e-06, "loss": 1.1803, "step": 4372 }, { "epoch": 0.54, "grad_norm": 8.147849218369746, "learning_rate": 4.533349087947474e-06, "loss": 1.125, "step": 4373 }, { "epoch": 0.54, "grad_norm": 6.192824226874214, "learning_rate": 4.531343560168788e-06, "loss": 1.056, "step": 4374 }, { "epoch": 0.54, "grad_norm": 7.421810579998091, "learning_rate": 4.529338108455622e-06, "loss": 1.3292, "step": 4375 }, { "epoch": 0.54, "grad_norm": 6.8603314819684655, "learning_rate": 4.5273327331334796e-06, "loss": 1.0992, "step": 4376 }, { "epoch": 0.54, "grad_norm": 8.587142447580502, "learning_rate": 4.52532743452784e-06, "loss": 1.1545, "step": 4377 }, { "epoch": 0.54, "grad_norm": 7.218708998836021, "learning_rate": 4.523322212964178e-06, "loss": 1.1531, "step": 4378 }, { "epoch": 0.54, "grad_norm": 6.807404634008102, "learning_rate": 4.521317068767949e-06, "loss": 1.1597, "step": 4379 }, { "epoch": 0.54, "grad_norm": 7.586311616026468, "learning_rate": 4.519312002264599e-06, "loss": 1.2177, "step": 4380 }, { "epoch": 0.54, "grad_norm": 6.6650825384774635, "learning_rate": 4.517307013779562e-06, "loss": 0.9962, "step": 4381 }, { "epoch": 0.55, "grad_norm": 7.458483300855107, "learning_rate": 4.515302103638258e-06, "loss": 1.0573, "step": 4382 }, { "epoch": 0.55, "grad_norm": 6.277591188262525, "learning_rate": 4.513297272166099e-06, "loss": 1.0357, "step": 4383 }, { "epoch": 0.55, "grad_norm": 7.292402820845383, "learning_rate": 4.5112925196884734e-06, "loss": 1.2711, "step": 4384 }, { "epoch": 0.55, "grad_norm": 7.278456545890892, "learning_rate": 4.50928784653077e-06, "loss": 1.1051, "step": 4385 }, { "epoch": 0.55, "grad_norm": 6.530449710394131, "learning_rate": 4.507283253018355e-06, "loss": 0.9371, "step": 4386 }, { "epoch": 0.55, "grad_norm": 7.510243998872073, "learning_rate": 4.505278739476585e-06, "loss": 1.403, "step": 4387 }, { "epoch": 0.55, "grad_norm": 7.153073103839199, "learning_rate": 4.503274306230804e-06, "loss": 1.2808, "step": 4388 }, { "epoch": 0.55, "grad_norm": 7.308814910387325, "learning_rate": 4.501269953606343e-06, "loss": 1.0471, "step": 4389 }, { "epoch": 0.55, "grad_norm": 7.857139105562495, "learning_rate": 4.499265681928519e-06, "loss": 1.072, "step": 4390 }, { "epoch": 0.55, "grad_norm": 6.331060013380664, "learning_rate": 4.497261491522636e-06, "loss": 1.0742, "step": 4391 }, { "epoch": 0.55, "grad_norm": 7.900794848633998, "learning_rate": 4.4952573827139875e-06, "loss": 1.4437, "step": 4392 }, { "epoch": 0.55, "grad_norm": 7.279786989121533, "learning_rate": 4.493253355827846e-06, "loss": 1.0468, "step": 4393 }, { "epoch": 0.55, "grad_norm": 6.8858710295095715, "learning_rate": 4.491249411189483e-06, "loss": 0.8368, "step": 4394 }, { "epoch": 0.55, "grad_norm": 8.265920042207433, "learning_rate": 4.489245549124146e-06, "loss": 1.0594, "step": 4395 }, { "epoch": 0.55, "grad_norm": 5.968922463617523, "learning_rate": 4.487241769957073e-06, "loss": 0.6912, "step": 4396 }, { "epoch": 0.55, "grad_norm": 7.174335182322584, "learning_rate": 4.485238074013488e-06, "loss": 1.075, "step": 4397 }, { "epoch": 0.55, "grad_norm": 6.444066876638451, "learning_rate": 4.483234461618603e-06, "loss": 1.1777, "step": 4398 }, { "epoch": 0.55, "grad_norm": 6.231438190409306, "learning_rate": 4.481230933097616e-06, "loss": 0.8408, "step": 4399 }, { "epoch": 0.55, "grad_norm": 7.910460866169644, "learning_rate": 4.479227488775707e-06, "loss": 1.4382, "step": 4400 }, { "epoch": 0.55, "grad_norm": 7.770837781889916, "learning_rate": 4.477224128978052e-06, "loss": 1.0654, "step": 4401 }, { "epoch": 0.55, "grad_norm": 8.426666214583234, "learning_rate": 4.475220854029804e-06, "loss": 1.1264, "step": 4402 }, { "epoch": 0.55, "grad_norm": 6.240325161857041, "learning_rate": 4.473217664256103e-06, "loss": 0.8848, "step": 4403 }, { "epoch": 0.55, "grad_norm": 6.968528535746932, "learning_rate": 4.471214559982083e-06, "loss": 1.2932, "step": 4404 }, { "epoch": 0.55, "grad_norm": 7.821736282251847, "learning_rate": 4.469211541532855e-06, "loss": 1.5259, "step": 4405 }, { "epoch": 0.55, "grad_norm": 6.795710298684724, "learning_rate": 4.4672086092335215e-06, "loss": 0.9866, "step": 4406 }, { "epoch": 0.55, "grad_norm": 6.861668827105829, "learning_rate": 4.465205763409169e-06, "loss": 1.2426, "step": 4407 }, { "epoch": 0.55, "grad_norm": 7.744707151867036, "learning_rate": 4.463203004384872e-06, "loss": 1.1689, "step": 4408 }, { "epoch": 0.55, "grad_norm": 7.799717716350104, "learning_rate": 4.461200332485684e-06, "loss": 1.4175, "step": 4409 }, { "epoch": 0.55, "grad_norm": 7.488825098431496, "learning_rate": 4.459197748036659e-06, "loss": 1.5112, "step": 4410 }, { "epoch": 0.55, "grad_norm": 6.48772918284964, "learning_rate": 4.45719525136282e-06, "loss": 1.1926, "step": 4411 }, { "epoch": 0.55, "grad_norm": 8.50320238012362, "learning_rate": 4.455192842789184e-06, "loss": 1.4085, "step": 4412 }, { "epoch": 0.55, "grad_norm": 6.500198128315734, "learning_rate": 4.453190522640756e-06, "loss": 1.0516, "step": 4413 }, { "epoch": 0.55, "grad_norm": 6.355559042714051, "learning_rate": 4.4511882912425214e-06, "loss": 0.8085, "step": 4414 }, { "epoch": 0.55, "grad_norm": 6.423878359266024, "learning_rate": 4.449186148919455e-06, "loss": 0.8205, "step": 4415 }, { "epoch": 0.55, "grad_norm": 6.826699762105456, "learning_rate": 4.447184095996513e-06, "loss": 1.0357, "step": 4416 }, { "epoch": 0.55, "grad_norm": 8.593023237802807, "learning_rate": 4.445182132798642e-06, "loss": 1.0955, "step": 4417 }, { "epoch": 0.55, "grad_norm": 6.810315245693576, "learning_rate": 4.443180259650772e-06, "loss": 1.0146, "step": 4418 }, { "epoch": 0.55, "grad_norm": 7.084665165443339, "learning_rate": 4.441178476877815e-06, "loss": 1.147, "step": 4419 }, { "epoch": 0.55, "grad_norm": 7.152246804693797, "learning_rate": 4.439176784804674e-06, "loss": 1.0089, "step": 4420 }, { "epoch": 0.55, "grad_norm": 7.894467743393735, "learning_rate": 4.437175183756233e-06, "loss": 1.3126, "step": 4421 }, { "epoch": 0.55, "grad_norm": 7.166337378541831, "learning_rate": 4.435173674057366e-06, "loss": 1.1761, "step": 4422 }, { "epoch": 0.55, "grad_norm": 7.299053605912965, "learning_rate": 4.433172256032924e-06, "loss": 1.2932, "step": 4423 }, { "epoch": 0.55, "grad_norm": 6.17927139196405, "learning_rate": 4.431170930007752e-06, "loss": 1.062, "step": 4424 }, { "epoch": 0.55, "grad_norm": 7.329799611853846, "learning_rate": 4.429169696306676e-06, "loss": 1.2874, "step": 4425 }, { "epoch": 0.55, "grad_norm": 6.49890513089342, "learning_rate": 4.427168555254507e-06, "loss": 0.9908, "step": 4426 }, { "epoch": 0.55, "grad_norm": 8.424015383358215, "learning_rate": 4.425167507176042e-06, "loss": 1.332, "step": 4427 }, { "epoch": 0.55, "grad_norm": 7.552444922179637, "learning_rate": 4.4231665523960574e-06, "loss": 1.0734, "step": 4428 }, { "epoch": 0.55, "grad_norm": 7.675884425207098, "learning_rate": 4.421165691239326e-06, "loss": 1.1559, "step": 4429 }, { "epoch": 0.55, "grad_norm": 7.544917746139315, "learning_rate": 4.419164924030592e-06, "loss": 1.0171, "step": 4430 }, { "epoch": 0.55, "grad_norm": 7.284989927960741, "learning_rate": 4.417164251094598e-06, "loss": 1.2823, "step": 4431 }, { "epoch": 0.55, "grad_norm": 7.352210225916279, "learning_rate": 4.415163672756057e-06, "loss": 0.9486, "step": 4432 }, { "epoch": 0.55, "grad_norm": 7.002020532343996, "learning_rate": 4.413163189339679e-06, "loss": 1.2226, "step": 4433 }, { "epoch": 0.55, "grad_norm": 7.303787578005008, "learning_rate": 4.411162801170152e-06, "loss": 0.9707, "step": 4434 }, { "epoch": 0.55, "grad_norm": 7.358379158085623, "learning_rate": 4.409162508572151e-06, "loss": 1.0914, "step": 4435 }, { "epoch": 0.55, "grad_norm": 7.190945638751971, "learning_rate": 4.407162311870334e-06, "loss": 1.0296, "step": 4436 }, { "epoch": 0.55, "grad_norm": 7.184599496203093, "learning_rate": 4.405162211389341e-06, "loss": 1.3076, "step": 4437 }, { "epoch": 0.55, "grad_norm": 7.83625663017327, "learning_rate": 4.403162207453804e-06, "loss": 1.1407, "step": 4438 }, { "epoch": 0.55, "grad_norm": 7.532582679913653, "learning_rate": 4.401162300388329e-06, "loss": 1.1606, "step": 4439 }, { "epoch": 0.55, "grad_norm": 7.649825645316168, "learning_rate": 4.3991624905175204e-06, "loss": 0.9974, "step": 4440 }, { "epoch": 0.55, "grad_norm": 6.9500354499288575, "learning_rate": 4.3971627781659485e-06, "loss": 1.1891, "step": 4441 }, { "epoch": 0.55, "grad_norm": 7.688719902475785, "learning_rate": 4.395163163658186e-06, "loss": 1.2326, "step": 4442 }, { "epoch": 0.55, "grad_norm": 5.424977514026484, "learning_rate": 4.393163647318777e-06, "loss": 0.5168, "step": 4443 }, { "epoch": 0.55, "grad_norm": 7.940893912990251, "learning_rate": 4.391164229472255e-06, "loss": 1.5023, "step": 4444 }, { "epoch": 0.55, "grad_norm": 7.332445059295958, "learning_rate": 4.389164910443136e-06, "loss": 1.1272, "step": 4445 }, { "epoch": 0.55, "grad_norm": 6.839761043368552, "learning_rate": 4.38716569055592e-06, "loss": 1.228, "step": 4446 }, { "epoch": 0.55, "grad_norm": 7.771343395981489, "learning_rate": 4.385166570135095e-06, "loss": 1.49, "step": 4447 }, { "epoch": 0.55, "grad_norm": 5.890058501465121, "learning_rate": 4.383167549505123e-06, "loss": 0.7251, "step": 4448 }, { "epoch": 0.55, "grad_norm": 7.4295725441174785, "learning_rate": 4.381168628990463e-06, "loss": 1.0976, "step": 4449 }, { "epoch": 0.55, "grad_norm": 7.342406396791055, "learning_rate": 4.379169808915545e-06, "loss": 1.1382, "step": 4450 }, { "epoch": 0.55, "grad_norm": 8.56181435873748, "learning_rate": 4.377171089604794e-06, "loss": 1.0569, "step": 4451 }, { "epoch": 0.55, "grad_norm": 7.031880361251951, "learning_rate": 4.375172471382609e-06, "loss": 1.1091, "step": 4452 }, { "epoch": 0.55, "grad_norm": 7.500386921518493, "learning_rate": 4.373173954573377e-06, "loss": 1.3008, "step": 4453 }, { "epoch": 0.55, "grad_norm": 7.133667398517603, "learning_rate": 4.371175539501471e-06, "loss": 1.1629, "step": 4454 }, { "epoch": 0.55, "grad_norm": 7.413795721717892, "learning_rate": 4.369177226491242e-06, "loss": 1.2425, "step": 4455 }, { "epoch": 0.55, "grad_norm": 6.682273231420417, "learning_rate": 4.367179015867028e-06, "loss": 1.1005, "step": 4456 }, { "epoch": 0.55, "grad_norm": 6.58181819021239, "learning_rate": 4.36518090795315e-06, "loss": 1.0075, "step": 4457 }, { "epoch": 0.55, "grad_norm": 5.982071619084826, "learning_rate": 4.3631829030739124e-06, "loss": 0.9127, "step": 4458 }, { "epoch": 0.55, "grad_norm": 6.308794524920886, "learning_rate": 4.361185001553603e-06, "loss": 1.0656, "step": 4459 }, { "epoch": 0.55, "grad_norm": 6.707048775205098, "learning_rate": 4.359187203716488e-06, "loss": 1.1949, "step": 4460 }, { "epoch": 0.55, "grad_norm": 6.033219707949914, "learning_rate": 4.357189509886826e-06, "loss": 1.0546, "step": 4461 }, { "epoch": 0.55, "grad_norm": 6.187053562058346, "learning_rate": 4.355191920388851e-06, "loss": 0.8753, "step": 4462 }, { "epoch": 0.56, "grad_norm": 7.182529263680151, "learning_rate": 4.3531944355467855e-06, "loss": 0.8346, "step": 4463 }, { "epoch": 0.56, "grad_norm": 6.1795736640306655, "learning_rate": 4.351197055684829e-06, "loss": 0.9809, "step": 4464 }, { "epoch": 0.56, "grad_norm": 6.311732535351031, "learning_rate": 4.349199781127169e-06, "loss": 0.9983, "step": 4465 }, { "epoch": 0.56, "grad_norm": 6.695843435013735, "learning_rate": 4.347202612197973e-06, "loss": 0.7775, "step": 4466 }, { "epoch": 0.56, "grad_norm": 8.830973440722456, "learning_rate": 4.345205549221395e-06, "loss": 1.5327, "step": 4467 }, { "epoch": 0.56, "grad_norm": 7.078525774227721, "learning_rate": 4.343208592521568e-06, "loss": 0.8156, "step": 4468 }, { "epoch": 0.56, "grad_norm": 6.863680950852403, "learning_rate": 4.341211742422606e-06, "loss": 1.076, "step": 4469 }, { "epoch": 0.56, "grad_norm": 6.932308036022359, "learning_rate": 4.339214999248614e-06, "loss": 0.9456, "step": 4470 }, { "epoch": 0.56, "grad_norm": 6.26307739247336, "learning_rate": 4.337218363323671e-06, "loss": 0.8113, "step": 4471 }, { "epoch": 0.56, "grad_norm": 8.114573768300307, "learning_rate": 4.335221834971845e-06, "loss": 1.2591, "step": 4472 }, { "epoch": 0.56, "grad_norm": 9.972769208088934, "learning_rate": 4.333225414517178e-06, "loss": 1.383, "step": 4473 }, { "epoch": 0.56, "grad_norm": 7.493244428387276, "learning_rate": 4.331229102283707e-06, "loss": 1.1583, "step": 4474 }, { "epoch": 0.56, "grad_norm": 8.307257312366605, "learning_rate": 4.329232898595441e-06, "loss": 1.1992, "step": 4475 }, { "epoch": 0.56, "grad_norm": 5.913961755886156, "learning_rate": 4.327236803776373e-06, "loss": 0.6966, "step": 4476 }, { "epoch": 0.56, "grad_norm": 7.180174326746378, "learning_rate": 4.325240818150485e-06, "loss": 0.9691, "step": 4477 }, { "epoch": 0.56, "grad_norm": 7.03355726469038, "learning_rate": 4.32324494204173e-06, "loss": 1.1097, "step": 4478 }, { "epoch": 0.56, "grad_norm": 7.089764603105008, "learning_rate": 4.321249175774057e-06, "loss": 1.0603, "step": 4479 }, { "epoch": 0.56, "grad_norm": 6.771416979309275, "learning_rate": 4.319253519671383e-06, "loss": 1.133, "step": 4480 }, { "epoch": 0.56, "grad_norm": 10.252026154647675, "learning_rate": 4.317257974057619e-06, "loss": 1.1797, "step": 4481 }, { "epoch": 0.56, "grad_norm": 6.507342043574264, "learning_rate": 4.3152625392566516e-06, "loss": 1.2637, "step": 4482 }, { "epoch": 0.56, "grad_norm": 6.109813818651525, "learning_rate": 4.3132672155923505e-06, "loss": 0.7629, "step": 4483 }, { "epoch": 0.56, "grad_norm": 6.045224926767705, "learning_rate": 4.311272003388569e-06, "loss": 0.8254, "step": 4484 }, { "epoch": 0.56, "grad_norm": 6.4801892661112115, "learning_rate": 4.309276902969138e-06, "loss": 1.0406, "step": 4485 }, { "epoch": 0.56, "grad_norm": 7.069340037981442, "learning_rate": 4.307281914657876e-06, "loss": 1.1296, "step": 4486 }, { "epoch": 0.56, "grad_norm": 7.237238901440246, "learning_rate": 4.305287038778578e-06, "loss": 1.393, "step": 4487 }, { "epoch": 0.56, "grad_norm": 7.774718897208878, "learning_rate": 4.3032922756550275e-06, "loss": 1.3321, "step": 4488 }, { "epoch": 0.56, "grad_norm": 7.83390004126551, "learning_rate": 4.301297625610981e-06, "loss": 1.2969, "step": 4489 }, { "epoch": 0.56, "grad_norm": 7.498110670174991, "learning_rate": 4.299303088970186e-06, "loss": 1.0345, "step": 4490 }, { "epoch": 0.56, "grad_norm": 7.497276123055087, "learning_rate": 4.297308666056363e-06, "loss": 1.3047, "step": 4491 }, { "epoch": 0.56, "grad_norm": 5.506040400367845, "learning_rate": 4.295314357193219e-06, "loss": 0.6595, "step": 4492 }, { "epoch": 0.56, "grad_norm": 8.285561567619993, "learning_rate": 4.293320162704442e-06, "loss": 1.1172, "step": 4493 }, { "epoch": 0.56, "grad_norm": 6.0308584428092225, "learning_rate": 4.291326082913699e-06, "loss": 1.1411, "step": 4494 }, { "epoch": 0.56, "grad_norm": 6.868448987404491, "learning_rate": 4.289332118144642e-06, "loss": 0.9838, "step": 4495 }, { "epoch": 0.56, "grad_norm": 6.615882228101285, "learning_rate": 4.2873382687209e-06, "loss": 1.3156, "step": 4496 }, { "epoch": 0.56, "grad_norm": 7.436564714023864, "learning_rate": 4.285344534966091e-06, "loss": 1.289, "step": 4497 }, { "epoch": 0.56, "grad_norm": 7.415945455542449, "learning_rate": 4.283350917203802e-06, "loss": 1.328, "step": 4498 }, { "epoch": 0.56, "grad_norm": 7.1720741196439155, "learning_rate": 4.281357415757615e-06, "loss": 0.9721, "step": 4499 }, { "epoch": 0.56, "grad_norm": 7.75647767454659, "learning_rate": 4.279364030951081e-06, "loss": 1.121, "step": 4500 }, { "epoch": 0.56, "grad_norm": 8.620988030640005, "learning_rate": 4.277370763107739e-06, "loss": 1.53, "step": 4501 }, { "epoch": 0.56, "grad_norm": 7.456915733178308, "learning_rate": 4.275377612551109e-06, "loss": 1.2372, "step": 4502 }, { "epoch": 0.56, "grad_norm": 6.902509070488166, "learning_rate": 4.273384579604687e-06, "loss": 1.2486, "step": 4503 }, { "epoch": 0.56, "grad_norm": 6.35910992355815, "learning_rate": 4.271391664591957e-06, "loss": 0.6626, "step": 4504 }, { "epoch": 0.56, "grad_norm": 8.34978625661634, "learning_rate": 4.269398867836377e-06, "loss": 1.2085, "step": 4505 }, { "epoch": 0.56, "grad_norm": 6.000410125574306, "learning_rate": 4.2674061896613924e-06, "loss": 0.803, "step": 4506 }, { "epoch": 0.56, "grad_norm": 6.507991735403453, "learning_rate": 4.265413630390421e-06, "loss": 1.1534, "step": 4507 }, { "epoch": 0.56, "grad_norm": 7.0583659024639624, "learning_rate": 4.263421190346871e-06, "loss": 0.9476, "step": 4508 }, { "epoch": 0.56, "grad_norm": 6.53393761270035, "learning_rate": 4.2614288698541234e-06, "loss": 1.012, "step": 4509 }, { "epoch": 0.56, "grad_norm": 5.907735663445673, "learning_rate": 4.259436669235543e-06, "loss": 0.8725, "step": 4510 }, { "epoch": 0.56, "grad_norm": 6.772728464653023, "learning_rate": 4.257444588814477e-06, "loss": 1.0767, "step": 4511 }, { "epoch": 0.56, "grad_norm": 6.989048740036183, "learning_rate": 4.255452628914248e-06, "loss": 1.2874, "step": 4512 }, { "epoch": 0.56, "grad_norm": 7.143302222671674, "learning_rate": 4.253460789858166e-06, "loss": 1.1771, "step": 4513 }, { "epoch": 0.56, "grad_norm": 7.5810426625700105, "learning_rate": 4.251469071969511e-06, "loss": 1.2686, "step": 4514 }, { "epoch": 0.56, "grad_norm": 7.0806928646546154, "learning_rate": 4.2494774755715595e-06, "loss": 1.0367, "step": 4515 }, { "epoch": 0.56, "grad_norm": 7.002598041813544, "learning_rate": 4.247486000987551e-06, "loss": 1.0866, "step": 4516 }, { "epoch": 0.56, "grad_norm": 6.784216954919511, "learning_rate": 4.245494648540712e-06, "loss": 1.0002, "step": 4517 }, { "epoch": 0.56, "grad_norm": 6.324251173586583, "learning_rate": 4.2435034185542565e-06, "loss": 1.2666, "step": 4518 }, { "epoch": 0.56, "grad_norm": 7.923239789482707, "learning_rate": 4.2415123113513665e-06, "loss": 1.0193, "step": 4519 }, { "epoch": 0.56, "grad_norm": 8.138083985766874, "learning_rate": 4.2395213272552125e-06, "loss": 1.2182, "step": 4520 }, { "epoch": 0.56, "grad_norm": 5.783685786169437, "learning_rate": 4.23753046658894e-06, "loss": 0.6769, "step": 4521 }, { "epoch": 0.56, "grad_norm": 6.847421743458096, "learning_rate": 4.23553972967568e-06, "loss": 0.9417, "step": 4522 }, { "epoch": 0.56, "grad_norm": 7.497907526959912, "learning_rate": 4.233549116838536e-06, "loss": 1.0391, "step": 4523 }, { "epoch": 0.56, "grad_norm": 6.639156277694514, "learning_rate": 4.2315586284006005e-06, "loss": 0.9598, "step": 4524 }, { "epoch": 0.56, "grad_norm": 7.284848051808703, "learning_rate": 4.2295682646849365e-06, "loss": 0.8197, "step": 4525 }, { "epoch": 0.56, "grad_norm": 8.79404778269587, "learning_rate": 4.22757802601459e-06, "loss": 1.4597, "step": 4526 }, { "epoch": 0.56, "grad_norm": 7.1439575698465045, "learning_rate": 4.225587912712593e-06, "loss": 0.9836, "step": 4527 }, { "epoch": 0.56, "grad_norm": 6.760462686184006, "learning_rate": 4.223597925101947e-06, "loss": 0.7724, "step": 4528 }, { "epoch": 0.56, "grad_norm": 7.471133024624641, "learning_rate": 4.22160806350564e-06, "loss": 0.9278, "step": 4529 }, { "epoch": 0.56, "grad_norm": 7.198066471269798, "learning_rate": 4.219618328246638e-06, "loss": 1.1593, "step": 4530 }, { "epoch": 0.56, "grad_norm": 6.28856457361521, "learning_rate": 4.217628719647886e-06, "loss": 1.1713, "step": 4531 }, { "epoch": 0.56, "grad_norm": 7.786239046767647, "learning_rate": 4.215639238032308e-06, "loss": 1.2584, "step": 4532 }, { "epoch": 0.56, "grad_norm": 7.942615647442839, "learning_rate": 4.213649883722806e-06, "loss": 1.4533, "step": 4533 }, { "epoch": 0.56, "grad_norm": 7.331233903447509, "learning_rate": 4.211660657042266e-06, "loss": 1.3268, "step": 4534 }, { "epoch": 0.56, "grad_norm": 7.1651919732155465, "learning_rate": 4.209671558313547e-06, "loss": 1.2384, "step": 4535 }, { "epoch": 0.56, "grad_norm": 6.943349912866793, "learning_rate": 4.207682587859495e-06, "loss": 1.1925, "step": 4536 }, { "epoch": 0.56, "grad_norm": 7.23909017823378, "learning_rate": 4.205693746002929e-06, "loss": 1.2614, "step": 4537 }, { "epoch": 0.56, "grad_norm": 6.7054276144309455, "learning_rate": 4.203705033066649e-06, "loss": 0.872, "step": 4538 }, { "epoch": 0.56, "grad_norm": 9.557648076704035, "learning_rate": 4.201716449373433e-06, "loss": 1.342, "step": 4539 }, { "epoch": 0.56, "grad_norm": 7.607524566040396, "learning_rate": 4.199727995246041e-06, "loss": 1.3717, "step": 4540 }, { "epoch": 0.56, "grad_norm": 6.538498709611852, "learning_rate": 4.197739671007209e-06, "loss": 0.8538, "step": 4541 }, { "epoch": 0.56, "grad_norm": 7.366740549561133, "learning_rate": 4.195751476979652e-06, "loss": 1.0125, "step": 4542 }, { "epoch": 0.57, "grad_norm": 8.009522391039088, "learning_rate": 4.193763413486067e-06, "loss": 1.1773, "step": 4543 }, { "epoch": 0.57, "grad_norm": 6.577393352339413, "learning_rate": 4.191775480849125e-06, "loss": 0.947, "step": 4544 }, { "epoch": 0.57, "grad_norm": 6.8737443577837505, "learning_rate": 4.1897876793914825e-06, "loss": 1.041, "step": 4545 }, { "epoch": 0.57, "grad_norm": 7.9733567498329645, "learning_rate": 4.187800009435766e-06, "loss": 1.1971, "step": 4546 }, { "epoch": 0.57, "grad_norm": 5.889070854683266, "learning_rate": 4.185812471304589e-06, "loss": 1.1502, "step": 4547 }, { "epoch": 0.57, "grad_norm": 6.865435644585306, "learning_rate": 4.183825065320538e-06, "loss": 1.0572, "step": 4548 }, { "epoch": 0.57, "grad_norm": 7.010841120195167, "learning_rate": 4.1818377918061804e-06, "loss": 0.8294, "step": 4549 }, { "epoch": 0.57, "grad_norm": 7.469189129785823, "learning_rate": 4.179850651084062e-06, "loss": 1.1809, "step": 4550 }, { "epoch": 0.57, "grad_norm": 7.589194405444836, "learning_rate": 4.177863643476705e-06, "loss": 0.9661, "step": 4551 }, { "epoch": 0.57, "grad_norm": 8.22630890412705, "learning_rate": 4.175876769306614e-06, "loss": 1.3745, "step": 4552 }, { "epoch": 0.57, "grad_norm": 8.335392135350018, "learning_rate": 4.1738900288962665e-06, "loss": 1.1284, "step": 4553 }, { "epoch": 0.57, "grad_norm": 6.981664443445061, "learning_rate": 4.171903422568128e-06, "loss": 1.1991, "step": 4554 }, { "epoch": 0.57, "grad_norm": 6.922191456190942, "learning_rate": 4.169916950644626e-06, "loss": 1.0478, "step": 4555 }, { "epoch": 0.57, "grad_norm": 6.3727482173383665, "learning_rate": 4.167930613448184e-06, "loss": 0.8315, "step": 4556 }, { "epoch": 0.57, "grad_norm": 6.496531965517115, "learning_rate": 4.1659444113011935e-06, "loss": 0.8087, "step": 4557 }, { "epoch": 0.57, "grad_norm": 7.54413492302196, "learning_rate": 4.1639583445260235e-06, "loss": 0.9897, "step": 4558 }, { "epoch": 0.57, "grad_norm": 12.233081464828556, "learning_rate": 4.161972413445026e-06, "loss": 1.1161, "step": 4559 }, { "epoch": 0.57, "grad_norm": 7.406424907963368, "learning_rate": 4.1599866183805265e-06, "loss": 1.2166, "step": 4560 }, { "epoch": 0.57, "grad_norm": 6.9757901547473065, "learning_rate": 4.158000959654833e-06, "loss": 1.1468, "step": 4561 }, { "epoch": 0.57, "grad_norm": 7.5723274321398035, "learning_rate": 4.156015437590226e-06, "loss": 0.9162, "step": 4562 }, { "epoch": 0.57, "grad_norm": 9.531002129649353, "learning_rate": 4.154030052508971e-06, "loss": 0.9733, "step": 4563 }, { "epoch": 0.57, "grad_norm": 6.8074684308217535, "learning_rate": 4.152044804733301e-06, "loss": 1.129, "step": 4564 }, { "epoch": 0.57, "grad_norm": 8.675738570956357, "learning_rate": 4.150059694585438e-06, "loss": 1.2921, "step": 4565 }, { "epoch": 0.57, "grad_norm": 8.290920261814438, "learning_rate": 4.148074722387575e-06, "loss": 1.4337, "step": 4566 }, { "epoch": 0.57, "grad_norm": 7.434319006133057, "learning_rate": 4.146089888461881e-06, "loss": 1.4631, "step": 4567 }, { "epoch": 0.57, "grad_norm": 7.617674265849355, "learning_rate": 4.1441051931305095e-06, "loss": 1.3505, "step": 4568 }, { "epoch": 0.57, "grad_norm": 6.930193505521901, "learning_rate": 4.142120636715584e-06, "loss": 1.2411, "step": 4569 }, { "epoch": 0.57, "grad_norm": 7.19716289669022, "learning_rate": 4.1401362195392124e-06, "loss": 1.2481, "step": 4570 }, { "epoch": 0.57, "grad_norm": 7.38835306524844, "learning_rate": 4.138151941923472e-06, "loss": 1.2201, "step": 4571 }, { "epoch": 0.57, "grad_norm": 6.200399409229822, "learning_rate": 4.136167804190428e-06, "loss": 0.9045, "step": 4572 }, { "epoch": 0.57, "grad_norm": 6.301417391437858, "learning_rate": 4.134183806662113e-06, "loss": 0.9129, "step": 4573 }, { "epoch": 0.57, "grad_norm": 7.28503354776808, "learning_rate": 4.132199949660539e-06, "loss": 0.9381, "step": 4574 }, { "epoch": 0.57, "grad_norm": 7.315549590031127, "learning_rate": 4.130216233507702e-06, "loss": 1.3233, "step": 4575 }, { "epoch": 0.57, "grad_norm": 7.336805154349041, "learning_rate": 4.128232658525566e-06, "loss": 1.2037, "step": 4576 }, { "epoch": 0.57, "grad_norm": 7.6170692854175375, "learning_rate": 4.126249225036078e-06, "loss": 1.2587, "step": 4577 }, { "epoch": 0.57, "grad_norm": 7.597152526147462, "learning_rate": 4.124265933361159e-06, "loss": 1.0829, "step": 4578 }, { "epoch": 0.57, "grad_norm": 6.695112651728933, "learning_rate": 4.122282783822711e-06, "loss": 1.1905, "step": 4579 }, { "epoch": 0.57, "grad_norm": 9.021140238675633, "learning_rate": 4.120299776742605e-06, "loss": 1.1616, "step": 4580 }, { "epoch": 0.57, "grad_norm": 7.5712189094870785, "learning_rate": 4.1183169124427005e-06, "loss": 1.191, "step": 4581 }, { "epoch": 0.57, "grad_norm": 6.414929237455807, "learning_rate": 4.116334191244823e-06, "loss": 0.8872, "step": 4582 }, { "epoch": 0.57, "grad_norm": 6.586720596892836, "learning_rate": 4.114351613470777e-06, "loss": 0.9224, "step": 4583 }, { "epoch": 0.57, "grad_norm": 6.548039499259718, "learning_rate": 4.112369179442352e-06, "loss": 1.0977, "step": 4584 }, { "epoch": 0.57, "grad_norm": 7.060810595326451, "learning_rate": 4.1103868894813034e-06, "loss": 1.156, "step": 4585 }, { "epoch": 0.57, "grad_norm": 6.612141267109039, "learning_rate": 4.10840474390937e-06, "loss": 1.2314, "step": 4586 }, { "epoch": 0.57, "grad_norm": 6.370004635076029, "learning_rate": 4.1064227430482625e-06, "loss": 1.1678, "step": 4587 }, { "epoch": 0.57, "grad_norm": 7.399694571968294, "learning_rate": 4.1044408872196736e-06, "loss": 0.9405, "step": 4588 }, { "epoch": 0.57, "grad_norm": 6.634096437363113, "learning_rate": 4.102459176745267e-06, "loss": 0.9749, "step": 4589 }, { "epoch": 0.57, "grad_norm": 7.537655770172898, "learning_rate": 4.100477611946687e-06, "loss": 1.135, "step": 4590 }, { "epoch": 0.57, "grad_norm": 8.95657140352387, "learning_rate": 4.098496193145552e-06, "loss": 0.8633, "step": 4591 }, { "epoch": 0.57, "grad_norm": 7.513082745573403, "learning_rate": 4.096514920663455e-06, "loss": 1.133, "step": 4592 }, { "epoch": 0.57, "grad_norm": 6.875056178774618, "learning_rate": 4.094533794821971e-06, "loss": 1.035, "step": 4593 }, { "epoch": 0.57, "grad_norm": 7.567059483583511, "learning_rate": 4.092552815942644e-06, "loss": 1.169, "step": 4594 }, { "epoch": 0.57, "grad_norm": 8.393788402307981, "learning_rate": 4.090571984347003e-06, "loss": 1.4876, "step": 4595 }, { "epoch": 0.57, "grad_norm": 6.206068169348774, "learning_rate": 4.088591300356543e-06, "loss": 0.9151, "step": 4596 }, { "epoch": 0.57, "grad_norm": 8.456776713612992, "learning_rate": 4.086610764292742e-06, "loss": 1.1383, "step": 4597 }, { "epoch": 0.57, "grad_norm": 7.054411181297542, "learning_rate": 4.084630376477051e-06, "loss": 0.8274, "step": 4598 }, { "epoch": 0.57, "grad_norm": 7.941283317518153, "learning_rate": 4.082650137230898e-06, "loss": 1.1097, "step": 4599 }, { "epoch": 0.57, "grad_norm": 7.418561679669632, "learning_rate": 4.0806700468756895e-06, "loss": 1.0514, "step": 4600 }, { "epoch": 0.57, "grad_norm": 6.452413930712972, "learning_rate": 4.078690105732799e-06, "loss": 0.9959, "step": 4601 }, { "epoch": 0.57, "grad_norm": 7.275750202252596, "learning_rate": 4.07671031412359e-06, "loss": 0.9857, "step": 4602 }, { "epoch": 0.57, "grad_norm": 7.2936469652190965, "learning_rate": 4.074730672369386e-06, "loss": 1.5206, "step": 4603 }, { "epoch": 0.57, "grad_norm": 7.186787529867618, "learning_rate": 4.072751180791498e-06, "loss": 1.3515, "step": 4604 }, { "epoch": 0.57, "grad_norm": 6.94635419551507, "learning_rate": 4.070771839711208e-06, "loss": 0.9064, "step": 4605 }, { "epoch": 0.57, "grad_norm": 7.743916910430595, "learning_rate": 4.068792649449772e-06, "loss": 1.2238, "step": 4606 }, { "epoch": 0.57, "grad_norm": 7.027689138185255, "learning_rate": 4.066813610328427e-06, "loss": 1.0636, "step": 4607 }, { "epoch": 0.57, "grad_norm": 7.226607237940224, "learning_rate": 4.064834722668378e-06, "loss": 1.0377, "step": 4608 }, { "epoch": 0.57, "grad_norm": 7.166987274466326, "learning_rate": 4.062855986790812e-06, "loss": 1.0442, "step": 4609 }, { "epoch": 0.57, "grad_norm": 7.269053932683464, "learning_rate": 4.060877403016886e-06, "loss": 1.2114, "step": 4610 }, { "epoch": 0.57, "grad_norm": 7.5932248967140215, "learning_rate": 4.058898971667741e-06, "loss": 1.0751, "step": 4611 }, { "epoch": 0.57, "grad_norm": 7.085322595191666, "learning_rate": 4.056920693064479e-06, "loss": 0.7987, "step": 4612 }, { "epoch": 0.57, "grad_norm": 8.090954112899992, "learning_rate": 4.054942567528193e-06, "loss": 1.0877, "step": 4613 }, { "epoch": 0.57, "grad_norm": 7.678859024630106, "learning_rate": 4.0529645953799404e-06, "loss": 1.217, "step": 4614 }, { "epoch": 0.57, "grad_norm": 8.531034487955122, "learning_rate": 4.050986776940755e-06, "loss": 1.2762, "step": 4615 }, { "epoch": 0.57, "grad_norm": 7.819756403504974, "learning_rate": 4.049009112531652e-06, "loss": 1.333, "step": 4616 }, { "epoch": 0.57, "grad_norm": 9.078002956719088, "learning_rate": 4.047031602473613e-06, "loss": 1.3313, "step": 4617 }, { "epoch": 0.57, "grad_norm": 6.909843743647086, "learning_rate": 4.045054247087603e-06, "loss": 0.7744, "step": 4618 }, { "epoch": 0.57, "grad_norm": 7.039806718472107, "learning_rate": 4.043077046694553e-06, "loss": 1.015, "step": 4619 }, { "epoch": 0.57, "grad_norm": 6.260961296914123, "learning_rate": 4.041100001615379e-06, "loss": 0.8988, "step": 4620 }, { "epoch": 0.57, "grad_norm": 6.9762456156979376, "learning_rate": 4.039123112170961e-06, "loss": 1.0354, "step": 4621 }, { "epoch": 0.57, "grad_norm": 6.9975313407366, "learning_rate": 4.037146378682162e-06, "loss": 1.3105, "step": 4622 }, { "epoch": 0.57, "grad_norm": 7.670304969547596, "learning_rate": 4.035169801469817e-06, "loss": 1.2817, "step": 4623 }, { "epoch": 0.58, "grad_norm": 7.229778390353353, "learning_rate": 4.033193380854733e-06, "loss": 1.1406, "step": 4624 }, { "epoch": 0.58, "grad_norm": 7.2339938665623995, "learning_rate": 4.031217117157698e-06, "loss": 0.9031, "step": 4625 }, { "epoch": 0.58, "grad_norm": 6.73839635728877, "learning_rate": 4.029241010699465e-06, "loss": 0.7914, "step": 4626 }, { "epoch": 0.58, "grad_norm": 7.236877093351567, "learning_rate": 4.027265061800773e-06, "loss": 1.0122, "step": 4627 }, { "epoch": 0.58, "grad_norm": 9.379202552941491, "learning_rate": 4.0252892707823235e-06, "loss": 1.2218, "step": 4628 }, { "epoch": 0.58, "grad_norm": 6.669617064658325, "learning_rate": 4.023313637964804e-06, "loss": 1.1413, "step": 4629 }, { "epoch": 0.58, "grad_norm": 6.526325661621076, "learning_rate": 4.021338163668867e-06, "loss": 0.7158, "step": 4630 }, { "epoch": 0.58, "grad_norm": 6.674717908488879, "learning_rate": 4.019362848215141e-06, "loss": 1.2647, "step": 4631 }, { "epoch": 0.58, "grad_norm": 7.47166330422935, "learning_rate": 4.017387691924235e-06, "loss": 0.9851, "step": 4632 }, { "epoch": 0.58, "grad_norm": 8.141531935522677, "learning_rate": 4.015412695116726e-06, "loss": 1.0162, "step": 4633 }, { "epoch": 0.58, "grad_norm": 8.45162491366797, "learning_rate": 4.013437858113168e-06, "loss": 1.255, "step": 4634 }, { "epoch": 0.58, "grad_norm": 8.031400375534806, "learning_rate": 4.011463181234086e-06, "loss": 1.4501, "step": 4635 }, { "epoch": 0.58, "grad_norm": 8.862007238459698, "learning_rate": 4.009488664799982e-06, "loss": 1.3696, "step": 4636 }, { "epoch": 0.58, "grad_norm": 7.353473387845594, "learning_rate": 4.007514309131328e-06, "loss": 1.0322, "step": 4637 }, { "epoch": 0.58, "grad_norm": 7.061292958120872, "learning_rate": 4.00554011454858e-06, "loss": 1.1754, "step": 4638 }, { "epoch": 0.58, "grad_norm": 7.395434810441802, "learning_rate": 4.003566081372154e-06, "loss": 1.1067, "step": 4639 }, { "epoch": 0.58, "grad_norm": 7.286975995653064, "learning_rate": 4.001592209922447e-06, "loss": 1.3014, "step": 4640 }, { "epoch": 0.58, "grad_norm": 8.043442713846098, "learning_rate": 3.999618500519833e-06, "loss": 1.3527, "step": 4641 }, { "epoch": 0.58, "grad_norm": 6.25505176021345, "learning_rate": 3.997644953484652e-06, "loss": 0.9642, "step": 4642 }, { "epoch": 0.58, "grad_norm": 7.070734419513632, "learning_rate": 3.9956715691372255e-06, "loss": 1.2387, "step": 4643 }, { "epoch": 0.58, "grad_norm": 7.570645564194706, "learning_rate": 3.993698347797841e-06, "loss": 1.3765, "step": 4644 }, { "epoch": 0.58, "grad_norm": 6.9189898625248505, "learning_rate": 3.991725289786766e-06, "loss": 1.2884, "step": 4645 }, { "epoch": 0.58, "grad_norm": 9.419029552861632, "learning_rate": 3.989752395424237e-06, "loss": 1.0536, "step": 4646 }, { "epoch": 0.58, "grad_norm": 6.571144878306125, "learning_rate": 3.987779665030465e-06, "loss": 0.8442, "step": 4647 }, { "epoch": 0.58, "grad_norm": 7.188996221044726, "learning_rate": 3.985807098925637e-06, "loss": 0.8177, "step": 4648 }, { "epoch": 0.58, "grad_norm": 6.729270498899382, "learning_rate": 3.983834697429908e-06, "loss": 0.9923, "step": 4649 }, { "epoch": 0.58, "grad_norm": 7.641056414601487, "learning_rate": 3.981862460863415e-06, "loss": 0.9287, "step": 4650 }, { "epoch": 0.58, "grad_norm": 7.8065379068738485, "learning_rate": 3.979890389546258e-06, "loss": 1.0887, "step": 4651 }, { "epoch": 0.58, "grad_norm": 6.668944022887418, "learning_rate": 3.977918483798519e-06, "loss": 1.1935, "step": 4652 }, { "epoch": 0.58, "grad_norm": 10.410597968294411, "learning_rate": 3.975946743940246e-06, "loss": 0.8067, "step": 4653 }, { "epoch": 0.58, "grad_norm": 6.478396880030237, "learning_rate": 3.973975170291465e-06, "loss": 1.181, "step": 4654 }, { "epoch": 0.58, "grad_norm": 7.644604157206049, "learning_rate": 3.9720037631721735e-06, "loss": 1.2971, "step": 4655 }, { "epoch": 0.58, "grad_norm": 6.873472036993938, "learning_rate": 3.970032522902338e-06, "loss": 0.8639, "step": 4656 }, { "epoch": 0.58, "grad_norm": 7.281168866361253, "learning_rate": 3.968061449801908e-06, "loss": 1.2932, "step": 4657 }, { "epoch": 0.58, "grad_norm": 7.8784735411849995, "learning_rate": 3.966090544190792e-06, "loss": 1.1394, "step": 4658 }, { "epoch": 0.58, "grad_norm": 6.620339877922317, "learning_rate": 3.964119806388887e-06, "loss": 1.0164, "step": 4659 }, { "epoch": 0.58, "grad_norm": 6.664710746781671, "learning_rate": 3.962149236716048e-06, "loss": 1.0441, "step": 4660 }, { "epoch": 0.58, "grad_norm": 7.623005830633639, "learning_rate": 3.960178835492114e-06, "loss": 1.0022, "step": 4661 }, { "epoch": 0.58, "grad_norm": 11.61742638311963, "learning_rate": 3.958208603036889e-06, "loss": 1.127, "step": 4662 }, { "epoch": 0.58, "grad_norm": 8.734663889815847, "learning_rate": 3.956238539670153e-06, "loss": 1.2855, "step": 4663 }, { "epoch": 0.58, "grad_norm": 7.4382719768551855, "learning_rate": 3.954268645711659e-06, "loss": 1.1454, "step": 4664 }, { "epoch": 0.58, "grad_norm": 7.862526711484591, "learning_rate": 3.952298921481132e-06, "loss": 1.2599, "step": 4665 }, { "epoch": 0.58, "grad_norm": 6.5567345225261775, "learning_rate": 3.950329367298268e-06, "loss": 0.8178, "step": 4666 }, { "epoch": 0.58, "grad_norm": 7.841104223616007, "learning_rate": 3.948359983482735e-06, "loss": 1.1668, "step": 4667 }, { "epoch": 0.58, "grad_norm": 7.276965926176497, "learning_rate": 3.946390770354179e-06, "loss": 1.0946, "step": 4668 }, { "epoch": 0.58, "grad_norm": 7.787356266435841, "learning_rate": 3.944421728232211e-06, "loss": 1.2426, "step": 4669 }, { "epoch": 0.58, "grad_norm": 6.791796762427119, "learning_rate": 3.942452857436419e-06, "loss": 1.2989, "step": 4670 }, { "epoch": 0.58, "grad_norm": 7.823481408264359, "learning_rate": 3.940484158286362e-06, "loss": 1.3884, "step": 4671 }, { "epoch": 0.58, "grad_norm": 7.267916719560244, "learning_rate": 3.938515631101567e-06, "loss": 0.9951, "step": 4672 }, { "epoch": 0.58, "grad_norm": 7.7978961646634675, "learning_rate": 3.936547276201542e-06, "loss": 1.2514, "step": 4673 }, { "epoch": 0.58, "grad_norm": 7.415245222391334, "learning_rate": 3.934579093905758e-06, "loss": 1.2774, "step": 4674 }, { "epoch": 0.58, "grad_norm": 6.232115951521356, "learning_rate": 3.932611084533663e-06, "loss": 0.7952, "step": 4675 }, { "epoch": 0.58, "grad_norm": 8.402296461817532, "learning_rate": 3.930643248404675e-06, "loss": 1.1894, "step": 4676 }, { "epoch": 0.58, "grad_norm": 8.005086010744705, "learning_rate": 3.928675585838188e-06, "loss": 1.4238, "step": 4677 }, { "epoch": 0.58, "grad_norm": 7.233784503925877, "learning_rate": 3.9267080971535595e-06, "loss": 0.9813, "step": 4678 }, { "epoch": 0.58, "grad_norm": 8.11926373995178, "learning_rate": 3.924740782670129e-06, "loss": 1.2999, "step": 4679 }, { "epoch": 0.58, "grad_norm": 7.257131753033789, "learning_rate": 3.9227736427071995e-06, "loss": 0.8915, "step": 4680 }, { "epoch": 0.58, "grad_norm": 7.110754214551729, "learning_rate": 3.920806677584047e-06, "loss": 1.061, "step": 4681 }, { "epoch": 0.58, "grad_norm": 5.897666205149153, "learning_rate": 3.918839887619925e-06, "loss": 1.1363, "step": 4682 }, { "epoch": 0.58, "grad_norm": 6.742147677124593, "learning_rate": 3.91687327313405e-06, "loss": 0.9885, "step": 4683 }, { "epoch": 0.58, "grad_norm": 7.431290461204053, "learning_rate": 3.9149068344456185e-06, "loss": 1.068, "step": 4684 }, { "epoch": 0.58, "grad_norm": 6.762047179638904, "learning_rate": 3.91294057187379e-06, "loss": 1.0782, "step": 4685 }, { "epoch": 0.58, "grad_norm": 9.850173642041252, "learning_rate": 3.9109744857377056e-06, "loss": 0.7399, "step": 4686 }, { "epoch": 0.58, "grad_norm": 6.8248684827589505, "learning_rate": 3.909008576356467e-06, "loss": 1.1273, "step": 4687 }, { "epoch": 0.58, "grad_norm": 7.056852852649351, "learning_rate": 3.907042844049152e-06, "loss": 0.8572, "step": 4688 }, { "epoch": 0.58, "grad_norm": 7.534392775940267, "learning_rate": 3.905077289134812e-06, "loss": 1.1859, "step": 4689 }, { "epoch": 0.58, "grad_norm": 6.265895265098737, "learning_rate": 3.903111911932468e-06, "loss": 0.9961, "step": 4690 }, { "epoch": 0.58, "grad_norm": 6.5988120421705005, "learning_rate": 3.90114671276111e-06, "loss": 1.0927, "step": 4691 }, { "epoch": 0.58, "grad_norm": 7.526264642311622, "learning_rate": 3.8991816919397e-06, "loss": 1.018, "step": 4692 }, { "epoch": 0.58, "grad_norm": 6.754532279041184, "learning_rate": 3.897216849787174e-06, "loss": 1.3156, "step": 4693 }, { "epoch": 0.58, "grad_norm": 7.354097375266978, "learning_rate": 3.895252186622433e-06, "loss": 1.2081, "step": 4694 }, { "epoch": 0.58, "grad_norm": 6.947965551326087, "learning_rate": 3.8932877027643584e-06, "loss": 0.9476, "step": 4695 }, { "epoch": 0.58, "grad_norm": 7.699520318697423, "learning_rate": 3.8913233985317926e-06, "loss": 1.31, "step": 4696 }, { "epoch": 0.58, "grad_norm": 11.232861883070818, "learning_rate": 3.889359274243552e-06, "loss": 1.0508, "step": 4697 }, { "epoch": 0.58, "grad_norm": 7.337314484753224, "learning_rate": 3.887395330218429e-06, "loss": 1.0153, "step": 4698 }, { "epoch": 0.58, "grad_norm": 7.457696301135867, "learning_rate": 3.885431566775179e-06, "loss": 1.1093, "step": 4699 }, { "epoch": 0.58, "grad_norm": 6.657320302554863, "learning_rate": 3.883467984232533e-06, "loss": 0.7434, "step": 4700 }, { "epoch": 0.58, "grad_norm": 9.002838733338471, "learning_rate": 3.8815045829091915e-06, "loss": 1.2045, "step": 4701 }, { "epoch": 0.58, "grad_norm": 7.452805884935878, "learning_rate": 3.879541363123825e-06, "loss": 1.1989, "step": 4702 }, { "epoch": 0.58, "grad_norm": 7.151831463897267, "learning_rate": 3.877578325195076e-06, "loss": 1.1184, "step": 4703 }, { "epoch": 0.59, "grad_norm": 8.310821893926562, "learning_rate": 3.875615469441552e-06, "loss": 1.2754, "step": 4704 }, { "epoch": 0.59, "grad_norm": 7.367466452397048, "learning_rate": 3.873652796181842e-06, "loss": 1.2103, "step": 4705 }, { "epoch": 0.59, "grad_norm": 7.707325042761155, "learning_rate": 3.871690305734493e-06, "loss": 1.3026, "step": 4706 }, { "epoch": 0.59, "grad_norm": 7.645988989066659, "learning_rate": 3.869727998418032e-06, "loss": 1.1856, "step": 4707 }, { "epoch": 0.59, "grad_norm": 7.752993698030875, "learning_rate": 3.867765874550949e-06, "loss": 1.051, "step": 4708 }, { "epoch": 0.59, "grad_norm": 6.988220457110303, "learning_rate": 3.86580393445171e-06, "loss": 1.0045, "step": 4709 }, { "epoch": 0.59, "grad_norm": 7.6839117155317815, "learning_rate": 3.863842178438748e-06, "loss": 1.0004, "step": 4710 }, { "epoch": 0.59, "grad_norm": 6.956216120058965, "learning_rate": 3.861880606830468e-06, "loss": 0.8978, "step": 4711 }, { "epoch": 0.59, "grad_norm": 7.582839859603141, "learning_rate": 3.859919219945242e-06, "loss": 1.0089, "step": 4712 }, { "epoch": 0.59, "grad_norm": 6.939014523314941, "learning_rate": 3.857958018101413e-06, "loss": 1.2246, "step": 4713 }, { "epoch": 0.59, "grad_norm": 8.076923851307873, "learning_rate": 3.855997001617298e-06, "loss": 1.0712, "step": 4714 }, { "epoch": 0.59, "grad_norm": 6.938946302397536, "learning_rate": 3.854036170811176e-06, "loss": 1.0382, "step": 4715 }, { "epoch": 0.59, "grad_norm": 7.074699967349572, "learning_rate": 3.852075526001307e-06, "loss": 1.1507, "step": 4716 }, { "epoch": 0.59, "grad_norm": 6.4856811565640085, "learning_rate": 3.850115067505909e-06, "loss": 0.9325, "step": 4717 }, { "epoch": 0.59, "grad_norm": 7.861094491860718, "learning_rate": 3.84815479564318e-06, "loss": 1.2585, "step": 4718 }, { "epoch": 0.59, "grad_norm": 7.6607458725369275, "learning_rate": 3.84619471073128e-06, "loss": 1.1861, "step": 4719 }, { "epoch": 0.59, "grad_norm": 6.160727395277563, "learning_rate": 3.844234813088341e-06, "loss": 1.0147, "step": 4720 }, { "epoch": 0.59, "grad_norm": 7.744723263480435, "learning_rate": 3.842275103032466e-06, "loss": 1.1569, "step": 4721 }, { "epoch": 0.59, "grad_norm": 6.963398734437968, "learning_rate": 3.840315580881728e-06, "loss": 1.0555, "step": 4722 }, { "epoch": 0.59, "grad_norm": 6.932712513768758, "learning_rate": 3.838356246954168e-06, "loss": 0.8657, "step": 4723 }, { "epoch": 0.59, "grad_norm": 8.403554075819267, "learning_rate": 3.836397101567792e-06, "loss": 1.2141, "step": 4724 }, { "epoch": 0.59, "grad_norm": 8.101619069431019, "learning_rate": 3.834438145040589e-06, "loss": 1.0835, "step": 4725 }, { "epoch": 0.59, "grad_norm": 6.883649467034025, "learning_rate": 3.8324793776905e-06, "loss": 1.1502, "step": 4726 }, { "epoch": 0.59, "grad_norm": 7.626174308227287, "learning_rate": 3.830520799835448e-06, "loss": 1.2453, "step": 4727 }, { "epoch": 0.59, "grad_norm": 8.391020769416983, "learning_rate": 3.828562411793322e-06, "loss": 1.2526, "step": 4728 }, { "epoch": 0.59, "grad_norm": 6.649781031037459, "learning_rate": 3.826604213881975e-06, "loss": 0.9926, "step": 4729 }, { "epoch": 0.59, "grad_norm": 7.639002100493408, "learning_rate": 3.824646206419237e-06, "loss": 1.1229, "step": 4730 }, { "epoch": 0.59, "grad_norm": 6.85899148225627, "learning_rate": 3.822688389722902e-06, "loss": 1.2505, "step": 4731 }, { "epoch": 0.59, "grad_norm": 6.3605221091674276, "learning_rate": 3.820730764110734e-06, "loss": 0.6944, "step": 4732 }, { "epoch": 0.59, "grad_norm": 7.731927878390792, "learning_rate": 3.818773329900466e-06, "loss": 0.9794, "step": 4733 }, { "epoch": 0.59, "grad_norm": 7.753257160907898, "learning_rate": 3.816816087409805e-06, "loss": 0.949, "step": 4734 }, { "epoch": 0.59, "grad_norm": 8.105688302587563, "learning_rate": 3.814859036956414e-06, "loss": 1.2489, "step": 4735 }, { "epoch": 0.59, "grad_norm": 7.706204269254252, "learning_rate": 3.812902178857941e-06, "loss": 1.377, "step": 4736 }, { "epoch": 0.59, "grad_norm": 7.816214673283169, "learning_rate": 3.810945513431991e-06, "loss": 1.1639, "step": 4737 }, { "epoch": 0.59, "grad_norm": 6.500842933713402, "learning_rate": 3.8089890409961416e-06, "loss": 0.8957, "step": 4738 }, { "epoch": 0.59, "grad_norm": 6.654327647097569, "learning_rate": 3.8070327618679406e-06, "loss": 1.0528, "step": 4739 }, { "epoch": 0.59, "grad_norm": 7.072856039108048, "learning_rate": 3.8050766763649004e-06, "loss": 1.0528, "step": 4740 }, { "epoch": 0.59, "grad_norm": 7.707225359461424, "learning_rate": 3.803120784804507e-06, "loss": 1.2761, "step": 4741 }, { "epoch": 0.59, "grad_norm": 7.401263931901672, "learning_rate": 3.8011650875042093e-06, "loss": 1.251, "step": 4742 }, { "epoch": 0.59, "grad_norm": 8.454003097600971, "learning_rate": 3.7992095847814337e-06, "loss": 1.3773, "step": 4743 }, { "epoch": 0.59, "grad_norm": 9.456187812743394, "learning_rate": 3.7972542769535636e-06, "loss": 1.279, "step": 4744 }, { "epoch": 0.59, "grad_norm": 6.863260177641934, "learning_rate": 3.7952991643379554e-06, "loss": 0.7415, "step": 4745 }, { "epoch": 0.59, "grad_norm": 6.641066114434392, "learning_rate": 3.79334424725194e-06, "loss": 1.1246, "step": 4746 }, { "epoch": 0.59, "grad_norm": 6.786479036438607, "learning_rate": 3.7913895260128064e-06, "loss": 0.9688, "step": 4747 }, { "epoch": 0.59, "grad_norm": 7.503936416972698, "learning_rate": 3.7894350009378205e-06, "loss": 1.0213, "step": 4748 }, { "epoch": 0.59, "grad_norm": 7.695258634644699, "learning_rate": 3.787480672344209e-06, "loss": 1.2001, "step": 4749 }, { "epoch": 0.59, "grad_norm": 7.090042613578131, "learning_rate": 3.785526540549173e-06, "loss": 1.3413, "step": 4750 }, { "epoch": 0.59, "grad_norm": 8.089342024614284, "learning_rate": 3.783572605869875e-06, "loss": 1.4766, "step": 4751 }, { "epoch": 0.59, "grad_norm": 12.534461683504869, "learning_rate": 3.7816188686234562e-06, "loss": 1.053, "step": 4752 }, { "epoch": 0.59, "grad_norm": 7.0987401156547145, "learning_rate": 3.779665329127013e-06, "loss": 0.8801, "step": 4753 }, { "epoch": 0.59, "grad_norm": 6.718816206290637, "learning_rate": 3.7777119876976153e-06, "loss": 0.8978, "step": 4754 }, { "epoch": 0.59, "grad_norm": 6.535733872264154, "learning_rate": 3.7757588446523057e-06, "loss": 0.9728, "step": 4755 }, { "epoch": 0.59, "grad_norm": 7.403782367025457, "learning_rate": 3.773805900308086e-06, "loss": 0.917, "step": 4756 }, { "epoch": 0.59, "grad_norm": 6.15527731311066, "learning_rate": 3.771853154981934e-06, "loss": 0.9479, "step": 4757 }, { "epoch": 0.59, "grad_norm": 5.892746549089618, "learning_rate": 3.7699006089907856e-06, "loss": 0.7684, "step": 4758 }, { "epoch": 0.59, "grad_norm": 10.046274704612587, "learning_rate": 3.767948262651555e-06, "loss": 0.7661, "step": 4759 }, { "epoch": 0.59, "grad_norm": 6.3594615452371475, "learning_rate": 3.7659961162811157e-06, "loss": 0.8007, "step": 4760 }, { "epoch": 0.59, "grad_norm": 7.692901776954227, "learning_rate": 3.764044170196313e-06, "loss": 1.0157, "step": 4761 }, { "epoch": 0.59, "grad_norm": 7.095921939926845, "learning_rate": 3.7620924247139577e-06, "loss": 0.9, "step": 4762 }, { "epoch": 0.59, "grad_norm": 6.918227379078793, "learning_rate": 3.7601408801508276e-06, "loss": 1.1537, "step": 4763 }, { "epoch": 0.59, "grad_norm": 8.16308931880706, "learning_rate": 3.758189536823673e-06, "loss": 1.4226, "step": 4764 }, { "epoch": 0.59, "grad_norm": 7.11600195922587, "learning_rate": 3.7562383950492043e-06, "loss": 0.9987, "step": 4765 }, { "epoch": 0.59, "grad_norm": 5.833615198542507, "learning_rate": 3.7542874551441054e-06, "loss": 0.6941, "step": 4766 }, { "epoch": 0.59, "grad_norm": 7.802808939736562, "learning_rate": 3.7523367174250214e-06, "loss": 1.1118, "step": 4767 }, { "epoch": 0.59, "grad_norm": 6.957293236780196, "learning_rate": 3.7503861822085706e-06, "loss": 1.1575, "step": 4768 }, { "epoch": 0.59, "grad_norm": 7.150164790233015, "learning_rate": 3.748435849811335e-06, "loss": 1.0746, "step": 4769 }, { "epoch": 0.59, "grad_norm": 6.657868195808875, "learning_rate": 3.7464857205498615e-06, "loss": 0.8543, "step": 4770 }, { "epoch": 0.59, "grad_norm": 8.13834883549852, "learning_rate": 3.7445357947406714e-06, "loss": 1.2671, "step": 4771 }, { "epoch": 0.59, "grad_norm": 7.2116805740961185, "learning_rate": 3.742586072700243e-06, "loss": 1.0409, "step": 4772 }, { "epoch": 0.59, "grad_norm": 7.290448124382393, "learning_rate": 3.740636554745032e-06, "loss": 1.0217, "step": 4773 }, { "epoch": 0.59, "grad_norm": 8.283595600003899, "learning_rate": 3.7386872411914532e-06, "loss": 1.3055, "step": 4774 }, { "epoch": 0.59, "grad_norm": 7.267224357720374, "learning_rate": 3.7367381323558927e-06, "loss": 1.0672, "step": 4775 }, { "epoch": 0.59, "grad_norm": 7.433008929043253, "learning_rate": 3.7347892285547012e-06, "loss": 0.9361, "step": 4776 }, { "epoch": 0.59, "grad_norm": 6.389031512068266, "learning_rate": 3.7328405301041943e-06, "loss": 0.81, "step": 4777 }, { "epoch": 0.59, "grad_norm": 7.517711630082026, "learning_rate": 3.730892037320659e-06, "loss": 1.0072, "step": 4778 }, { "epoch": 0.59, "grad_norm": 6.918275765280966, "learning_rate": 3.7289437505203447e-06, "loss": 1.1714, "step": 4779 }, { "epoch": 0.59, "grad_norm": 7.897377045489972, "learning_rate": 3.726995670019471e-06, "loss": 1.3222, "step": 4780 }, { "epoch": 0.59, "grad_norm": 6.1244855877939015, "learning_rate": 3.7250477961342192e-06, "loss": 0.9982, "step": 4781 }, { "epoch": 0.59, "grad_norm": 6.91054035333159, "learning_rate": 3.7231001291807445e-06, "loss": 0.9515, "step": 4782 }, { "epoch": 0.59, "grad_norm": 7.532385694342037, "learning_rate": 3.721152669475158e-06, "loss": 1.1274, "step": 4783 }, { "epoch": 0.6, "grad_norm": 7.388117502449727, "learning_rate": 3.719205417333549e-06, "loss": 0.8857, "step": 4784 }, { "epoch": 0.6, "grad_norm": 6.640423600552168, "learning_rate": 3.717258373071965e-06, "loss": 0.8708, "step": 4785 }, { "epoch": 0.6, "grad_norm": 6.240030997805979, "learning_rate": 3.71531153700642e-06, "loss": 0.8796, "step": 4786 }, { "epoch": 0.6, "grad_norm": 8.997981435542565, "learning_rate": 3.7133649094528996e-06, "loss": 1.1974, "step": 4787 }, { "epoch": 0.6, "grad_norm": 7.014671388951135, "learning_rate": 3.7114184907273494e-06, "loss": 1.1102, "step": 4788 }, { "epoch": 0.6, "grad_norm": 7.3897577254440465, "learning_rate": 3.7094722811456862e-06, "loss": 1.1249, "step": 4789 }, { "epoch": 0.6, "grad_norm": 7.233175637115436, "learning_rate": 3.707526281023787e-06, "loss": 0.8684, "step": 4790 }, { "epoch": 0.6, "grad_norm": 8.636535253554316, "learning_rate": 3.705580490677504e-06, "loss": 1.287, "step": 4791 }, { "epoch": 0.6, "grad_norm": 6.361495597187878, "learning_rate": 3.7036349104226434e-06, "loss": 1.0481, "step": 4792 }, { "epoch": 0.6, "grad_norm": 6.0625265972238145, "learning_rate": 3.7016895405749887e-06, "loss": 0.7862, "step": 4793 }, { "epoch": 0.6, "grad_norm": 9.480064973192482, "learning_rate": 3.699744381450282e-06, "loss": 1.2215, "step": 4794 }, { "epoch": 0.6, "grad_norm": 7.402894629629661, "learning_rate": 3.6977994333642326e-06, "loss": 1.235, "step": 4795 }, { "epoch": 0.6, "grad_norm": 6.902200599767298, "learning_rate": 3.695854696632518e-06, "loss": 0.83, "step": 4796 }, { "epoch": 0.6, "grad_norm": 6.676295849425797, "learning_rate": 3.6939101715707783e-06, "loss": 1.1627, "step": 4797 }, { "epoch": 0.6, "grad_norm": 7.959549438832257, "learning_rate": 3.6919658584946215e-06, "loss": 1.4077, "step": 4798 }, { "epoch": 0.6, "grad_norm": 7.6277453673288775, "learning_rate": 3.6900217577196183e-06, "loss": 0.948, "step": 4799 }, { "epoch": 0.6, "grad_norm": 6.93099490473935, "learning_rate": 3.688077869561313e-06, "loss": 1.0507, "step": 4800 }, { "epoch": 0.6, "grad_norm": 7.333370096079489, "learning_rate": 3.6861341943352026e-06, "loss": 0.8638, "step": 4801 }, { "epoch": 0.6, "grad_norm": 6.61237437223497, "learning_rate": 3.6841907323567572e-06, "loss": 0.7019, "step": 4802 }, { "epoch": 0.6, "grad_norm": 6.551125848282291, "learning_rate": 3.6822474839414153e-06, "loss": 1.3031, "step": 4803 }, { "epoch": 0.6, "grad_norm": 7.608828751458042, "learning_rate": 3.6803044494045735e-06, "loss": 0.7829, "step": 4804 }, { "epoch": 0.6, "grad_norm": 6.0701353809466205, "learning_rate": 3.6783616290615986e-06, "loss": 0.8348, "step": 4805 }, { "epoch": 0.6, "grad_norm": 7.813466141041074, "learning_rate": 3.67641902322782e-06, "loss": 1.3268, "step": 4806 }, { "epoch": 0.6, "grad_norm": 8.786169862729023, "learning_rate": 3.674476632218535e-06, "loss": 1.3205, "step": 4807 }, { "epoch": 0.6, "grad_norm": 8.297060819125573, "learning_rate": 3.6725344563490005e-06, "loss": 1.1598, "step": 4808 }, { "epoch": 0.6, "grad_norm": 7.187860392503719, "learning_rate": 3.6705924959344485e-06, "loss": 0.9413, "step": 4809 }, { "epoch": 0.6, "grad_norm": 7.092951504467992, "learning_rate": 3.668650751290066e-06, "loss": 1.2538, "step": 4810 }, { "epoch": 0.6, "grad_norm": 9.20366031944392, "learning_rate": 3.6667092227310066e-06, "loss": 1.3194, "step": 4811 }, { "epoch": 0.6, "grad_norm": 8.525592281917833, "learning_rate": 3.6647679105723964e-06, "loss": 0.7764, "step": 4812 }, { "epoch": 0.6, "grad_norm": 7.7424237358286705, "learning_rate": 3.662826815129317e-06, "loss": 1.466, "step": 4813 }, { "epoch": 0.6, "grad_norm": 6.513582500852906, "learning_rate": 3.6608859367168213e-06, "loss": 1.0436, "step": 4814 }, { "epoch": 0.6, "grad_norm": 8.195377532323777, "learning_rate": 3.658945275649923e-06, "loss": 1.3676, "step": 4815 }, { "epoch": 0.6, "grad_norm": 7.1478307238946535, "learning_rate": 3.6570048322436046e-06, "loss": 0.8437, "step": 4816 }, { "epoch": 0.6, "grad_norm": 7.815474782479913, "learning_rate": 3.6550646068128083e-06, "loss": 1.153, "step": 4817 }, { "epoch": 0.6, "grad_norm": 8.816259853603684, "learning_rate": 3.6531245996724423e-06, "loss": 1.1787, "step": 4818 }, { "epoch": 0.6, "grad_norm": 7.754837099906442, "learning_rate": 3.6511848111373836e-06, "loss": 1.3314, "step": 4819 }, { "epoch": 0.6, "grad_norm": 5.9063792864154, "learning_rate": 3.6492452415224675e-06, "loss": 0.757, "step": 4820 }, { "epoch": 0.6, "grad_norm": 7.812491811901939, "learning_rate": 3.6473058911425007e-06, "loss": 1.0846, "step": 4821 }, { "epoch": 0.6, "grad_norm": 8.17538352162312, "learning_rate": 3.645366760312247e-06, "loss": 1.16, "step": 4822 }, { "epoch": 0.6, "grad_norm": 5.929823604701374, "learning_rate": 3.6434278493464397e-06, "loss": 0.8367, "step": 4823 }, { "epoch": 0.6, "grad_norm": 7.444034328330484, "learning_rate": 3.641489158559774e-06, "loss": 1.2232, "step": 4824 }, { "epoch": 0.6, "grad_norm": 7.8919252643800135, "learning_rate": 3.639550688266911e-06, "loss": 1.1488, "step": 4825 }, { "epoch": 0.6, "grad_norm": 8.421716645285555, "learning_rate": 3.637612438782475e-06, "loss": 1.4658, "step": 4826 }, { "epoch": 0.6, "grad_norm": 7.1918133294464, "learning_rate": 3.6356744104210528e-06, "loss": 1.0733, "step": 4827 }, { "epoch": 0.6, "grad_norm": 6.4451739374493515, "learning_rate": 3.6337366034972e-06, "loss": 0.9234, "step": 4828 }, { "epoch": 0.6, "grad_norm": 8.364790746988522, "learning_rate": 3.6317990183254294e-06, "loss": 1.2287, "step": 4829 }, { "epoch": 0.6, "grad_norm": 6.691886602169591, "learning_rate": 3.629861655220226e-06, "loss": 1.023, "step": 4830 }, { "epoch": 0.6, "grad_norm": 7.8696728022564395, "learning_rate": 3.6279245144960316e-06, "loss": 0.9308, "step": 4831 }, { "epoch": 0.6, "grad_norm": 7.560746488494237, "learning_rate": 3.6259875964672577e-06, "loss": 1.1345, "step": 4832 }, { "epoch": 0.6, "grad_norm": 7.029724478621388, "learning_rate": 3.6240509014482742e-06, "loss": 1.0596, "step": 4833 }, { "epoch": 0.6, "grad_norm": 8.677850338412926, "learning_rate": 3.6221144297534178e-06, "loss": 1.0732, "step": 4834 }, { "epoch": 0.6, "grad_norm": 6.51954025183151, "learning_rate": 3.6201781816969896e-06, "loss": 0.936, "step": 4835 }, { "epoch": 0.6, "grad_norm": 6.78262978565767, "learning_rate": 3.6182421575932516e-06, "loss": 1.0521, "step": 4836 }, { "epoch": 0.6, "grad_norm": 7.400011251098342, "learning_rate": 3.6163063577564327e-06, "loss": 1.192, "step": 4837 }, { "epoch": 0.6, "grad_norm": 7.318360832057814, "learning_rate": 3.6143707825007224e-06, "loss": 0.8426, "step": 4838 }, { "epoch": 0.6, "grad_norm": 7.735841159930092, "learning_rate": 3.6124354321402777e-06, "loss": 1.0017, "step": 4839 }, { "epoch": 0.6, "grad_norm": 12.091884516671115, "learning_rate": 3.6105003069892142e-06, "loss": 0.9469, "step": 4840 }, { "epoch": 0.6, "grad_norm": 7.662257109898139, "learning_rate": 3.608565407361615e-06, "loss": 1.4477, "step": 4841 }, { "epoch": 0.6, "grad_norm": 7.841933636437167, "learning_rate": 3.6066307335715252e-06, "loss": 1.2657, "step": 4842 }, { "epoch": 0.6, "grad_norm": 7.638065649290922, "learning_rate": 3.6046962859329504e-06, "loss": 1.3047, "step": 4843 }, { "epoch": 0.6, "grad_norm": 7.354065167032733, "learning_rate": 3.6027620647598653e-06, "loss": 1.0763, "step": 4844 }, { "epoch": 0.6, "grad_norm": 8.420026178919331, "learning_rate": 3.6008280703662023e-06, "loss": 1.0676, "step": 4845 }, { "epoch": 0.6, "grad_norm": 7.580931964303676, "learning_rate": 3.598894303065861e-06, "loss": 1.3841, "step": 4846 }, { "epoch": 0.6, "grad_norm": 6.849481009817735, "learning_rate": 3.5969607631727e-06, "loss": 1.2201, "step": 4847 }, { "epoch": 0.6, "grad_norm": 8.67804936636302, "learning_rate": 3.595027451000549e-06, "loss": 1.2261, "step": 4848 }, { "epoch": 0.6, "grad_norm": 7.064633924868057, "learning_rate": 3.593094366863187e-06, "loss": 0.8677, "step": 4849 }, { "epoch": 0.6, "grad_norm": 7.776338537186358, "learning_rate": 3.5911615110743723e-06, "loss": 1.2217, "step": 4850 }, { "epoch": 0.6, "grad_norm": 5.851738400605782, "learning_rate": 3.589228883947814e-06, "loss": 0.6932, "step": 4851 }, { "epoch": 0.6, "grad_norm": 6.534598929921327, "learning_rate": 3.587296485797187e-06, "loss": 0.7765, "step": 4852 }, { "epoch": 0.6, "grad_norm": 7.707399824843648, "learning_rate": 3.5853643169361334e-06, "loss": 1.1858, "step": 4853 }, { "epoch": 0.6, "grad_norm": 7.3788147097116275, "learning_rate": 3.583432377678252e-06, "loss": 0.7806, "step": 4854 }, { "epoch": 0.6, "grad_norm": 7.351394690134993, "learning_rate": 3.58150066833711e-06, "loss": 0.8846, "step": 4855 }, { "epoch": 0.6, "grad_norm": 7.716599958422539, "learning_rate": 3.5795691892262306e-06, "loss": 1.3049, "step": 4856 }, { "epoch": 0.6, "grad_norm": 5.604779802699663, "learning_rate": 3.5776379406591088e-06, "loss": 0.6302, "step": 4857 }, { "epoch": 0.6, "grad_norm": 6.668464816762925, "learning_rate": 3.575706922949194e-06, "loss": 1.2518, "step": 4858 }, { "epoch": 0.6, "grad_norm": 6.61450003492311, "learning_rate": 3.5737761364098977e-06, "loss": 0.7383, "step": 4859 }, { "epoch": 0.6, "grad_norm": 6.9971723967661115, "learning_rate": 3.5718455813546026e-06, "loss": 0.8877, "step": 4860 }, { "epoch": 0.6, "grad_norm": 6.861656134955055, "learning_rate": 3.5699152580966456e-06, "loss": 1.1226, "step": 4861 }, { "epoch": 0.6, "grad_norm": 6.005885854958624, "learning_rate": 3.567985166949331e-06, "loss": 0.8511, "step": 4862 }, { "epoch": 0.6, "grad_norm": 7.598675808200962, "learning_rate": 3.5660553082259208e-06, "loss": 0.9151, "step": 4863 }, { "epoch": 0.6, "grad_norm": 6.84246987299128, "learning_rate": 3.564125682239643e-06, "loss": 1.1626, "step": 4864 }, { "epoch": 0.61, "grad_norm": 7.0462661235778565, "learning_rate": 3.562196289303684e-06, "loss": 0.7678, "step": 4865 }, { "epoch": 0.61, "grad_norm": 8.455852965050509, "learning_rate": 3.5602671297312013e-06, "loss": 1.374, "step": 4866 }, { "epoch": 0.61, "grad_norm": 7.347915112662913, "learning_rate": 3.5583382038353025e-06, "loss": 1.209, "step": 4867 }, { "epoch": 0.61, "grad_norm": 7.595477093476798, "learning_rate": 3.556409511929062e-06, "loss": 1.5578, "step": 4868 }, { "epoch": 0.61, "grad_norm": 7.614057498943941, "learning_rate": 3.554481054325522e-06, "loss": 0.8832, "step": 4869 }, { "epoch": 0.61, "grad_norm": 8.952183977297237, "learning_rate": 3.5525528313376783e-06, "loss": 1.0914, "step": 4870 }, { "epoch": 0.61, "grad_norm": 8.101431994258835, "learning_rate": 3.550624843278494e-06, "loss": 0.9418, "step": 4871 }, { "epoch": 0.61, "grad_norm": 8.89707486842097, "learning_rate": 3.5486970904608908e-06, "loss": 1.3116, "step": 4872 }, { "epoch": 0.61, "grad_norm": 6.475491036938188, "learning_rate": 3.546769573197755e-06, "loss": 0.7037, "step": 4873 }, { "epoch": 0.61, "grad_norm": 7.089059023294228, "learning_rate": 3.544842291801932e-06, "loss": 0.7573, "step": 4874 }, { "epoch": 0.61, "grad_norm": 6.098522977782624, "learning_rate": 3.54291524658623e-06, "loss": 0.8311, "step": 4875 }, { "epoch": 0.61, "grad_norm": 7.263945183416988, "learning_rate": 3.540988437863421e-06, "loss": 0.9986, "step": 4876 }, { "epoch": 0.61, "grad_norm": 5.648842503687606, "learning_rate": 3.539061865946233e-06, "loss": 1.0626, "step": 4877 }, { "epoch": 0.61, "grad_norm": 8.601855860673597, "learning_rate": 3.537135531147363e-06, "loss": 1.2583, "step": 4878 }, { "epoch": 0.61, "grad_norm": 6.553877843753342, "learning_rate": 3.535209433779464e-06, "loss": 1.0644, "step": 4879 }, { "epoch": 0.61, "grad_norm": 6.7988314630414655, "learning_rate": 3.533283574155154e-06, "loss": 1.0604, "step": 4880 }, { "epoch": 0.61, "grad_norm": 5.821975589582733, "learning_rate": 3.5313579525870084e-06, "loss": 0.8154, "step": 4881 }, { "epoch": 0.61, "grad_norm": 8.150952509130887, "learning_rate": 3.5294325693875675e-06, "loss": 1.132, "step": 4882 }, { "epoch": 0.61, "grad_norm": 8.247696575792649, "learning_rate": 3.527507424869332e-06, "loss": 1.1459, "step": 4883 }, { "epoch": 0.61, "grad_norm": 5.106719595139707, "learning_rate": 3.5255825193447614e-06, "loss": 0.6839, "step": 4884 }, { "epoch": 0.61, "grad_norm": 8.612604607546139, "learning_rate": 3.52365785312628e-06, "loss": 1.0688, "step": 4885 }, { "epoch": 0.61, "grad_norm": 7.983094311224434, "learning_rate": 3.5217334265262705e-06, "loss": 1.275, "step": 4886 }, { "epoch": 0.61, "grad_norm": 7.078599979551266, "learning_rate": 3.51980923985708e-06, "loss": 0.9383, "step": 4887 }, { "epoch": 0.61, "grad_norm": 7.2421676342292285, "learning_rate": 3.5178852934310124e-06, "loss": 1.0864, "step": 4888 }, { "epoch": 0.61, "grad_norm": 7.021002379755827, "learning_rate": 3.515961587560338e-06, "loss": 1.0038, "step": 4889 }, { "epoch": 0.61, "grad_norm": 8.310847232003198, "learning_rate": 3.5140381225572826e-06, "loss": 1.4419, "step": 4890 }, { "epoch": 0.61, "grad_norm": 7.480120094050279, "learning_rate": 3.5121148987340344e-06, "loss": 1.0825, "step": 4891 }, { "epoch": 0.61, "grad_norm": 7.347036007959976, "learning_rate": 3.5101919164027453e-06, "loss": 1.0801, "step": 4892 }, { "epoch": 0.61, "grad_norm": 6.66033212707981, "learning_rate": 3.5082691758755234e-06, "loss": 0.8987, "step": 4893 }, { "epoch": 0.61, "grad_norm": 8.44217875587478, "learning_rate": 3.506346677464443e-06, "loss": 0.9641, "step": 4894 }, { "epoch": 0.61, "grad_norm": 8.127399877900514, "learning_rate": 3.5044244214815325e-06, "loss": 1.2133, "step": 4895 }, { "epoch": 0.61, "grad_norm": 6.4888383624640324, "learning_rate": 3.5025024082387894e-06, "loss": 0.933, "step": 4896 }, { "epoch": 0.61, "grad_norm": 7.350681472724244, "learning_rate": 3.5005806380481634e-06, "loss": 0.9411, "step": 4897 }, { "epoch": 0.61, "grad_norm": 6.813884542884887, "learning_rate": 3.49865911122157e-06, "loss": 1.3355, "step": 4898 }, { "epoch": 0.61, "grad_norm": 5.897038284625791, "learning_rate": 3.4967378280708843e-06, "loss": 0.8153, "step": 4899 }, { "epoch": 0.61, "grad_norm": 7.178951371784468, "learning_rate": 3.4948167889079375e-06, "loss": 1.3045, "step": 4900 }, { "epoch": 0.61, "grad_norm": 7.266109995818813, "learning_rate": 3.4928959940445295e-06, "loss": 1.2803, "step": 4901 }, { "epoch": 0.61, "grad_norm": 6.461813527863543, "learning_rate": 3.4909754437924125e-06, "loss": 0.7183, "step": 4902 }, { "epoch": 0.61, "grad_norm": 6.565268697482076, "learning_rate": 3.4890551384633053e-06, "loss": 0.8739, "step": 4903 }, { "epoch": 0.61, "grad_norm": 6.523671291810503, "learning_rate": 3.4871350783688795e-06, "loss": 0.8403, "step": 4904 }, { "epoch": 0.61, "grad_norm": 9.121445426536743, "learning_rate": 3.485215263820778e-06, "loss": 1.0126, "step": 4905 }, { "epoch": 0.61, "grad_norm": 6.921279586393992, "learning_rate": 3.483295695130591e-06, "loss": 1.0511, "step": 4906 }, { "epoch": 0.61, "grad_norm": 6.4312813433471385, "learning_rate": 3.4813763726098786e-06, "loss": 1.1926, "step": 4907 }, { "epoch": 0.61, "grad_norm": 8.55171244475188, "learning_rate": 3.4794572965701573e-06, "loss": 1.1984, "step": 4908 }, { "epoch": 0.61, "grad_norm": 7.6392027328840015, "learning_rate": 3.4775384673229017e-06, "loss": 1.4814, "step": 4909 }, { "epoch": 0.61, "grad_norm": 7.093109798966103, "learning_rate": 3.475619885179551e-06, "loss": 1.3042, "step": 4910 }, { "epoch": 0.61, "grad_norm": 7.036420419549678, "learning_rate": 3.4737015504514993e-06, "loss": 0.9647, "step": 4911 }, { "epoch": 0.61, "grad_norm": 6.339564760234139, "learning_rate": 3.4717834634501046e-06, "loss": 1.2637, "step": 4912 }, { "epoch": 0.61, "grad_norm": 7.172078884450735, "learning_rate": 3.46986562448668e-06, "loss": 0.8772, "step": 4913 }, { "epoch": 0.61, "grad_norm": 5.644113167313792, "learning_rate": 3.467948033872508e-06, "loss": 0.6801, "step": 4914 }, { "epoch": 0.61, "grad_norm": 6.561900249471246, "learning_rate": 3.466030691918818e-06, "loss": 0.8619, "step": 4915 }, { "epoch": 0.61, "grad_norm": 7.749839028049226, "learning_rate": 3.4641135989368045e-06, "loss": 1.1742, "step": 4916 }, { "epoch": 0.61, "grad_norm": 8.343130223050785, "learning_rate": 3.4621967552376277e-06, "loss": 1.0737, "step": 4917 }, { "epoch": 0.61, "grad_norm": 7.260167461212373, "learning_rate": 3.4602801611323977e-06, "loss": 1.2298, "step": 4918 }, { "epoch": 0.61, "grad_norm": 7.475894917601626, "learning_rate": 3.458363816932191e-06, "loss": 0.8607, "step": 4919 }, { "epoch": 0.61, "grad_norm": 7.771863421393, "learning_rate": 3.4564477229480376e-06, "loss": 1.0604, "step": 4920 }, { "epoch": 0.61, "grad_norm": 7.021620134250192, "learning_rate": 3.4545318794909334e-06, "loss": 0.9085, "step": 4921 }, { "epoch": 0.61, "grad_norm": 8.064845223750856, "learning_rate": 3.4526162868718262e-06, "loss": 1.3653, "step": 4922 }, { "epoch": 0.61, "grad_norm": 5.679841064819927, "learning_rate": 3.450700945401633e-06, "loss": 0.6479, "step": 4923 }, { "epoch": 0.61, "grad_norm": 7.767671391333607, "learning_rate": 3.448785855391221e-06, "loss": 1.0399, "step": 4924 }, { "epoch": 0.61, "grad_norm": 7.40820490193422, "learning_rate": 3.4468710171514175e-06, "loss": 0.8386, "step": 4925 }, { "epoch": 0.61, "grad_norm": 7.334431797729987, "learning_rate": 3.4449564309930163e-06, "loss": 1.1111, "step": 4926 }, { "epoch": 0.61, "grad_norm": 6.59088958064727, "learning_rate": 3.443042097226761e-06, "loss": 0.9744, "step": 4927 }, { "epoch": 0.61, "grad_norm": 8.279912265483658, "learning_rate": 3.441128016163362e-06, "loss": 1.0317, "step": 4928 }, { "epoch": 0.61, "grad_norm": 7.010811548478017, "learning_rate": 3.4392141881134828e-06, "loss": 1.0309, "step": 4929 }, { "epoch": 0.61, "grad_norm": 7.981366410804689, "learning_rate": 3.4373006133877498e-06, "loss": 1.0828, "step": 4930 }, { "epoch": 0.61, "grad_norm": 7.8755237021461815, "learning_rate": 3.4353872922967455e-06, "loss": 1.3283, "step": 4931 }, { "epoch": 0.61, "grad_norm": 6.13609953450888, "learning_rate": 3.4334742251510127e-06, "loss": 0.827, "step": 4932 }, { "epoch": 0.61, "grad_norm": 6.897847698604183, "learning_rate": 3.431561412261054e-06, "loss": 1.0055, "step": 4933 }, { "epoch": 0.61, "grad_norm": 8.933803590289978, "learning_rate": 3.4296488539373265e-06, "loss": 1.1375, "step": 4934 }, { "epoch": 0.61, "grad_norm": 7.5213245935288615, "learning_rate": 3.4277365504902526e-06, "loss": 1.1192, "step": 4935 }, { "epoch": 0.61, "grad_norm": 7.859061701140829, "learning_rate": 3.425824502230207e-06, "loss": 1.1073, "step": 4936 }, { "epoch": 0.61, "grad_norm": 6.742137566796028, "learning_rate": 3.4239127094675277e-06, "loss": 0.9755, "step": 4937 }, { "epoch": 0.61, "grad_norm": 7.322719047449746, "learning_rate": 3.422001172512508e-06, "loss": 1.0586, "step": 4938 }, { "epoch": 0.61, "grad_norm": 6.595222310952581, "learning_rate": 3.420089891675401e-06, "loss": 0.7504, "step": 4939 }, { "epoch": 0.61, "grad_norm": 6.115412809868749, "learning_rate": 3.418178867266419e-06, "loss": 0.903, "step": 4940 }, { "epoch": 0.61, "grad_norm": 7.261577328180091, "learning_rate": 3.41626809959573e-06, "loss": 1.0072, "step": 4941 }, { "epoch": 0.61, "grad_norm": 7.387606150946317, "learning_rate": 3.414357588973464e-06, "loss": 0.957, "step": 4942 }, { "epoch": 0.61, "grad_norm": 6.883874963831038, "learning_rate": 3.4124473357097044e-06, "loss": 1.0516, "step": 4943 }, { "epoch": 0.61, "grad_norm": 7.312371743322032, "learning_rate": 3.4105373401145004e-06, "loss": 0.9693, "step": 4944 }, { "epoch": 0.62, "grad_norm": 7.1999477135878545, "learning_rate": 3.4086276024978515e-06, "loss": 0.8519, "step": 4945 }, { "epoch": 0.62, "grad_norm": 6.791880268598384, "learning_rate": 3.40671812316972e-06, "loss": 1.0301, "step": 4946 }, { "epoch": 0.62, "grad_norm": 7.951406979763232, "learning_rate": 3.4048089024400255e-06, "loss": 1.2432, "step": 4947 }, { "epoch": 0.62, "grad_norm": 6.562705065156959, "learning_rate": 3.4028999406186426e-06, "loss": 0.7986, "step": 4948 }, { "epoch": 0.62, "grad_norm": 7.01588374183275, "learning_rate": 3.4009912380154087e-06, "loss": 0.9994, "step": 4949 }, { "epoch": 0.62, "grad_norm": 7.3976869156725265, "learning_rate": 3.399082794940115e-06, "loss": 1.0958, "step": 4950 }, { "epoch": 0.62, "grad_norm": 7.13081175032188, "learning_rate": 3.397174611702514e-06, "loss": 1.2838, "step": 4951 }, { "epoch": 0.62, "grad_norm": 7.754381594588832, "learning_rate": 3.3952666886123114e-06, "loss": 1.515, "step": 4952 }, { "epoch": 0.62, "grad_norm": 7.5648153365244255, "learning_rate": 3.393359025979178e-06, "loss": 1.0601, "step": 4953 }, { "epoch": 0.62, "grad_norm": 7.780963110422213, "learning_rate": 3.391451624112735e-06, "loss": 1.1232, "step": 4954 }, { "epoch": 0.62, "grad_norm": 8.003640603205401, "learning_rate": 3.389544483322566e-06, "loss": 0.9737, "step": 4955 }, { "epoch": 0.62, "grad_norm": 8.188127539876126, "learning_rate": 3.3876376039182097e-06, "loss": 1.2493, "step": 4956 }, { "epoch": 0.62, "grad_norm": 7.672023800527241, "learning_rate": 3.3857309862091612e-06, "loss": 1.1173, "step": 4957 }, { "epoch": 0.62, "grad_norm": 6.419667609235491, "learning_rate": 3.383824630504878e-06, "loss": 0.7533, "step": 4958 }, { "epoch": 0.62, "grad_norm": 6.218380828912165, "learning_rate": 3.3819185371147704e-06, "loss": 1.0797, "step": 4959 }, { "epoch": 0.62, "grad_norm": 7.252545843599836, "learning_rate": 3.3800127063482097e-06, "loss": 0.8609, "step": 4960 }, { "epoch": 0.62, "grad_norm": 6.316601007818719, "learning_rate": 3.3781071385145185e-06, "loss": 1.0775, "step": 4961 }, { "epoch": 0.62, "grad_norm": 8.268601734378915, "learning_rate": 3.376201833922987e-06, "loss": 1.1624, "step": 4962 }, { "epoch": 0.62, "grad_norm": 7.720518300987581, "learning_rate": 3.3742967928828517e-06, "loss": 1.1221, "step": 4963 }, { "epoch": 0.62, "grad_norm": 6.577023402239551, "learning_rate": 3.372392015703314e-06, "loss": 0.8967, "step": 4964 }, { "epoch": 0.62, "grad_norm": 6.607821266681979, "learning_rate": 3.37048750269353e-06, "loss": 0.6933, "step": 4965 }, { "epoch": 0.62, "grad_norm": 8.434486471283241, "learning_rate": 3.3685832541626094e-06, "loss": 1.2456, "step": 4966 }, { "epoch": 0.62, "grad_norm": 7.827714498920124, "learning_rate": 3.366679270419626e-06, "loss": 1.1497, "step": 4967 }, { "epoch": 0.62, "grad_norm": 7.149011182092399, "learning_rate": 3.364775551773605e-06, "loss": 1.0765, "step": 4968 }, { "epoch": 0.62, "grad_norm": 7.406897666350475, "learning_rate": 3.3628720985335306e-06, "loss": 0.9612, "step": 4969 }, { "epoch": 0.62, "grad_norm": 5.7015126797143205, "learning_rate": 3.3609689110083417e-06, "loss": 0.9031, "step": 4970 }, { "epoch": 0.62, "grad_norm": 6.450444534652956, "learning_rate": 3.3590659895069415e-06, "loss": 1.0219, "step": 4971 }, { "epoch": 0.62, "grad_norm": 6.9937280180703345, "learning_rate": 3.3571633343381804e-06, "loss": 0.9578, "step": 4972 }, { "epoch": 0.62, "grad_norm": 7.531913411514067, "learning_rate": 3.3552609458108693e-06, "loss": 0.9991, "step": 4973 }, { "epoch": 0.62, "grad_norm": 7.967239342139623, "learning_rate": 3.35335882423378e-06, "loss": 1.2182, "step": 4974 }, { "epoch": 0.62, "grad_norm": 7.284270120284617, "learning_rate": 3.3514569699156334e-06, "loss": 1.1131, "step": 4975 }, { "epoch": 0.62, "grad_norm": 5.746159898067612, "learning_rate": 3.3495553831651144e-06, "loss": 0.7644, "step": 4976 }, { "epoch": 0.62, "grad_norm": 8.037857549992985, "learning_rate": 3.3476540642908584e-06, "loss": 1.2233, "step": 4977 }, { "epoch": 0.62, "grad_norm": 6.438661492609012, "learning_rate": 3.3457530136014615e-06, "loss": 0.7765, "step": 4978 }, { "epoch": 0.62, "grad_norm": 7.4561368844325555, "learning_rate": 3.343852231405473e-06, "loss": 0.8662, "step": 4979 }, { "epoch": 0.62, "grad_norm": 6.140583901070939, "learning_rate": 3.3419517180114035e-06, "loss": 0.7988, "step": 4980 }, { "epoch": 0.62, "grad_norm": 6.525920010528341, "learning_rate": 3.3400514737277144e-06, "loss": 0.8613, "step": 4981 }, { "epoch": 0.62, "grad_norm": 7.829152214681996, "learning_rate": 3.338151498862824e-06, "loss": 1.0505, "step": 4982 }, { "epoch": 0.62, "grad_norm": 7.025425565590529, "learning_rate": 3.336251793725113e-06, "loss": 0.8725, "step": 4983 }, { "epoch": 0.62, "grad_norm": 6.950835961571623, "learning_rate": 3.334352358622911e-06, "loss": 0.9606, "step": 4984 }, { "epoch": 0.62, "grad_norm": 7.989685083770485, "learning_rate": 3.3324531938645087e-06, "loss": 1.1705, "step": 4985 }, { "epoch": 0.62, "grad_norm": 8.20837830930351, "learning_rate": 3.3305542997581485e-06, "loss": 1.4304, "step": 4986 }, { "epoch": 0.62, "grad_norm": 6.031191283561274, "learning_rate": 3.328655676612035e-06, "loss": 0.8925, "step": 4987 }, { "epoch": 0.62, "grad_norm": 7.500853895332002, "learning_rate": 3.326757324734322e-06, "loss": 1.1462, "step": 4988 }, { "epoch": 0.62, "grad_norm": 6.901559137969641, "learning_rate": 3.3248592444331236e-06, "loss": 0.8491, "step": 4989 }, { "epoch": 0.62, "grad_norm": 7.280182895103531, "learning_rate": 3.3229614360165096e-06, "loss": 1.3639, "step": 4990 }, { "epoch": 0.62, "grad_norm": 6.744728210771407, "learning_rate": 3.3210638997925017e-06, "loss": 0.9694, "step": 4991 }, { "epoch": 0.62, "grad_norm": 8.347135953249365, "learning_rate": 3.3191666360690844e-06, "loss": 0.8809, "step": 4992 }, { "epoch": 0.62, "grad_norm": 8.120100218304378, "learning_rate": 3.3172696451541912e-06, "loss": 0.9152, "step": 4993 }, { "epoch": 0.62, "grad_norm": 8.273803513536135, "learning_rate": 3.3153729273557168e-06, "loss": 1.1508, "step": 4994 }, { "epoch": 0.62, "grad_norm": 6.684191735333981, "learning_rate": 3.3134764829815064e-06, "loss": 1.0774, "step": 4995 }, { "epoch": 0.62, "grad_norm": 6.724902583646144, "learning_rate": 3.3115803123393654e-06, "loss": 0.7392, "step": 4996 }, { "epoch": 0.62, "grad_norm": 7.6153996332382, "learning_rate": 3.3096844157370522e-06, "loss": 0.8724, "step": 4997 }, { "epoch": 0.62, "grad_norm": 7.193913251036538, "learning_rate": 3.3077887934822796e-06, "loss": 1.0244, "step": 4998 }, { "epoch": 0.62, "grad_norm": 6.339855573795074, "learning_rate": 3.3058934458827195e-06, "loss": 0.5776, "step": 4999 }, { "epoch": 0.62, "grad_norm": 7.503455145307826, "learning_rate": 3.3039983732459953e-06, "loss": 1.0897, "step": 5000 }, { "epoch": 0.62, "grad_norm": 8.739263277296697, "learning_rate": 3.3021035758796904e-06, "loss": 1.2114, "step": 5001 }, { "epoch": 0.62, "grad_norm": 7.101900857612586, "learning_rate": 3.300209054091339e-06, "loss": 0.9043, "step": 5002 }, { "epoch": 0.62, "grad_norm": 7.5784420454170265, "learning_rate": 3.298314808188434e-06, "loss": 1.2062, "step": 5003 }, { "epoch": 0.62, "grad_norm": 8.522536646972183, "learning_rate": 3.2964208384784206e-06, "loss": 1.2732, "step": 5004 }, { "epoch": 0.62, "grad_norm": 6.877560850577354, "learning_rate": 3.2945271452687e-06, "loss": 0.9281, "step": 5005 }, { "epoch": 0.62, "grad_norm": 6.925101161701804, "learning_rate": 3.292633728866631e-06, "loss": 0.6329, "step": 5006 }, { "epoch": 0.62, "grad_norm": 6.839932606601598, "learning_rate": 3.2907405895795237e-06, "loss": 0.7889, "step": 5007 }, { "epoch": 0.62, "grad_norm": 8.206082654632006, "learning_rate": 3.288847727714646e-06, "loss": 0.9822, "step": 5008 }, { "epoch": 0.62, "grad_norm": 7.250346045737997, "learning_rate": 3.2869551435792185e-06, "loss": 0.9647, "step": 5009 }, { "epoch": 0.62, "grad_norm": 6.768638769823692, "learning_rate": 3.2850628374804204e-06, "loss": 0.8198, "step": 5010 }, { "epoch": 0.62, "grad_norm": 7.63512403903724, "learning_rate": 3.2831708097253817e-06, "loss": 0.957, "step": 5011 }, { "epoch": 0.62, "grad_norm": 9.641642169410273, "learning_rate": 3.2812790606211903e-06, "loss": 1.0751, "step": 5012 }, { "epoch": 0.62, "grad_norm": 7.015730007694692, "learning_rate": 3.279387590474886e-06, "loss": 0.7772, "step": 5013 }, { "epoch": 0.62, "grad_norm": 8.281797976820695, "learning_rate": 3.277496399593465e-06, "loss": 0.7895, "step": 5014 }, { "epoch": 0.62, "grad_norm": 6.884119333275225, "learning_rate": 3.2756054882838793e-06, "loss": 1.0197, "step": 5015 }, { "epoch": 0.62, "grad_norm": 6.574528639122123, "learning_rate": 3.273714856853033e-06, "loss": 0.842, "step": 5016 }, { "epoch": 0.62, "grad_norm": 7.11932752202648, "learning_rate": 3.271824505607787e-06, "loss": 0.8753, "step": 5017 }, { "epoch": 0.62, "grad_norm": 6.766002722381533, "learning_rate": 3.2699344348549527e-06, "loss": 1.0211, "step": 5018 }, { "epoch": 0.62, "grad_norm": 6.801507185667898, "learning_rate": 3.2680446449013037e-06, "loss": 0.9055, "step": 5019 }, { "epoch": 0.62, "grad_norm": 7.323529731316672, "learning_rate": 3.2661551360535605e-06, "loss": 0.9859, "step": 5020 }, { "epoch": 0.62, "grad_norm": 7.838128385101783, "learning_rate": 3.2642659086184016e-06, "loss": 1.1115, "step": 5021 }, { "epoch": 0.62, "grad_norm": 6.545183149218938, "learning_rate": 3.2623769629024594e-06, "loss": 0.7005, "step": 5022 }, { "epoch": 0.62, "grad_norm": 7.634400202881424, "learning_rate": 3.260488299212319e-06, "loss": 1.0251, "step": 5023 }, { "epoch": 0.62, "grad_norm": 8.041361322704843, "learning_rate": 3.258599917854521e-06, "loss": 1.1343, "step": 5024 }, { "epoch": 0.62, "grad_norm": 7.867425741572747, "learning_rate": 3.256711819135561e-06, "loss": 1.2745, "step": 5025 }, { "epoch": 0.63, "grad_norm": 7.0884427040083455, "learning_rate": 3.254824003361888e-06, "loss": 1.0425, "step": 5026 }, { "epoch": 0.63, "grad_norm": 6.957744096795884, "learning_rate": 3.2529364708399024e-06, "loss": 1.1064, "step": 5027 }, { "epoch": 0.63, "grad_norm": 7.118390106117682, "learning_rate": 3.251049221875967e-06, "loss": 0.9332, "step": 5028 }, { "epoch": 0.63, "grad_norm": 7.920552569873825, "learning_rate": 3.249162256776387e-06, "loss": 1.2168, "step": 5029 }, { "epoch": 0.63, "grad_norm": 7.974726825391908, "learning_rate": 3.247275575847427e-06, "loss": 1.2353, "step": 5030 }, { "epoch": 0.63, "grad_norm": 5.3348129089411715, "learning_rate": 3.245389179395311e-06, "loss": 0.6532, "step": 5031 }, { "epoch": 0.63, "grad_norm": 7.834171430520801, "learning_rate": 3.243503067726206e-06, "loss": 1.0131, "step": 5032 }, { "epoch": 0.63, "grad_norm": 6.7235556228880204, "learning_rate": 3.241617241146242e-06, "loss": 0.9571, "step": 5033 }, { "epoch": 0.63, "grad_norm": 8.088285220723469, "learning_rate": 3.239731699961497e-06, "loss": 1.0053, "step": 5034 }, { "epoch": 0.63, "grad_norm": 6.403067295257292, "learning_rate": 3.2378464444780066e-06, "loss": 0.9214, "step": 5035 }, { "epoch": 0.63, "grad_norm": 7.678500807982722, "learning_rate": 3.235961475001755e-06, "loss": 1.0829, "step": 5036 }, { "epoch": 0.63, "grad_norm": 7.03354905505619, "learning_rate": 3.2340767918386883e-06, "loss": 1.2566, "step": 5037 }, { "epoch": 0.63, "grad_norm": 7.467274518106049, "learning_rate": 3.2321923952946965e-06, "loss": 1.0331, "step": 5038 }, { "epoch": 0.63, "grad_norm": 7.73680037897138, "learning_rate": 3.2303082856756275e-06, "loss": 1.1477, "step": 5039 }, { "epoch": 0.63, "grad_norm": 7.892394646320422, "learning_rate": 3.2284244632872854e-06, "loss": 1.326, "step": 5040 }, { "epoch": 0.63, "grad_norm": 7.24633622583086, "learning_rate": 3.2265409284354234e-06, "loss": 1.0793, "step": 5041 }, { "epoch": 0.63, "grad_norm": 7.823489700070621, "learning_rate": 3.22465768142575e-06, "loss": 0.9592, "step": 5042 }, { "epoch": 0.63, "grad_norm": 6.6581098364052185, "learning_rate": 3.222774722563926e-06, "loss": 0.8928, "step": 5043 }, { "epoch": 0.63, "grad_norm": 7.630688840195031, "learning_rate": 3.2208920521555677e-06, "loss": 1.228, "step": 5044 }, { "epoch": 0.63, "grad_norm": 7.32924166308402, "learning_rate": 3.2190096705062412e-06, "loss": 1.2272, "step": 5045 }, { "epoch": 0.63, "grad_norm": 7.059283557736981, "learning_rate": 3.217127577921467e-06, "loss": 1.0083, "step": 5046 }, { "epoch": 0.63, "grad_norm": 9.016398866336768, "learning_rate": 3.2152457747067206e-06, "loss": 1.1062, "step": 5047 }, { "epoch": 0.63, "grad_norm": 6.292870738950701, "learning_rate": 3.2133642611674265e-06, "loss": 0.8875, "step": 5048 }, { "epoch": 0.63, "grad_norm": 7.150155285363499, "learning_rate": 3.211483037608969e-06, "loss": 1.2678, "step": 5049 }, { "epoch": 0.63, "grad_norm": 8.10476716269108, "learning_rate": 3.2096021043366775e-06, "loss": 0.9869, "step": 5050 }, { "epoch": 0.63, "grad_norm": 7.737815234582565, "learning_rate": 3.20772146165584e-06, "loss": 1.2307, "step": 5051 }, { "epoch": 0.63, "grad_norm": 6.837529545651859, "learning_rate": 3.2058411098716937e-06, "loss": 0.9222, "step": 5052 }, { "epoch": 0.63, "grad_norm": 8.039980423576534, "learning_rate": 3.203961049289431e-06, "loss": 1.1699, "step": 5053 }, { "epoch": 0.63, "grad_norm": 6.45273840967221, "learning_rate": 3.2020812802141952e-06, "loss": 1.0812, "step": 5054 }, { "epoch": 0.63, "grad_norm": 5.724685799472264, "learning_rate": 3.200201802951084e-06, "loss": 0.7249, "step": 5055 }, { "epoch": 0.63, "grad_norm": 8.217028311771031, "learning_rate": 3.1983226178051465e-06, "loss": 1.2488, "step": 5056 }, { "epoch": 0.63, "grad_norm": 7.55271377744457, "learning_rate": 3.1964437250813835e-06, "loss": 1.1459, "step": 5057 }, { "epoch": 0.63, "grad_norm": 7.639672646420612, "learning_rate": 3.194565125084753e-06, "loss": 1.3479, "step": 5058 }, { "epoch": 0.63, "grad_norm": 6.55144154292412, "learning_rate": 3.1926868181201586e-06, "loss": 0.771, "step": 5059 }, { "epoch": 0.63, "grad_norm": 8.501554850185967, "learning_rate": 3.1908088044924634e-06, "loss": 1.2306, "step": 5060 }, { "epoch": 0.63, "grad_norm": 7.294888789528945, "learning_rate": 3.1889310845064768e-06, "loss": 1.1364, "step": 5061 }, { "epoch": 0.63, "grad_norm": 6.512355570236632, "learning_rate": 3.187053658466963e-06, "loss": 1.0399, "step": 5062 }, { "epoch": 0.63, "grad_norm": 17.38646321912353, "learning_rate": 3.1851765266786405e-06, "loss": 0.8158, "step": 5063 }, { "epoch": 0.63, "grad_norm": 9.220815768130409, "learning_rate": 3.183299689446176e-06, "loss": 1.374, "step": 5064 }, { "epoch": 0.63, "grad_norm": 7.473889478226787, "learning_rate": 3.181423147074192e-06, "loss": 0.8166, "step": 5065 }, { "epoch": 0.63, "grad_norm": 6.969971433977603, "learning_rate": 3.1795468998672594e-06, "loss": 1.1141, "step": 5066 }, { "epoch": 0.63, "grad_norm": 5.726022534915236, "learning_rate": 3.177670948129908e-06, "loss": 0.6594, "step": 5067 }, { "epoch": 0.63, "grad_norm": 7.218994127964467, "learning_rate": 3.1757952921666106e-06, "loss": 1.0043, "step": 5068 }, { "epoch": 0.63, "grad_norm": 7.247933578968887, "learning_rate": 3.1739199322818e-06, "loss": 1.0316, "step": 5069 }, { "epoch": 0.63, "grad_norm": 7.461887957500442, "learning_rate": 3.1720448687798556e-06, "loss": 0.9184, "step": 5070 }, { "epoch": 0.63, "grad_norm": 7.036658738822162, "learning_rate": 3.170170101965109e-06, "loss": 0.9459, "step": 5071 }, { "epoch": 0.63, "grad_norm": 8.05674306284606, "learning_rate": 3.1682956321418484e-06, "loss": 1.1351, "step": 5072 }, { "epoch": 0.63, "grad_norm": 8.01984842928001, "learning_rate": 3.1664214596143084e-06, "loss": 1.2363, "step": 5073 }, { "epoch": 0.63, "grad_norm": 9.30124785339358, "learning_rate": 3.1645475846866784e-06, "loss": 1.3033, "step": 5074 }, { "epoch": 0.63, "grad_norm": 7.25385851149175, "learning_rate": 3.162674007663097e-06, "loss": 1.0011, "step": 5075 }, { "epoch": 0.63, "grad_norm": 6.436663553029244, "learning_rate": 3.1608007288476593e-06, "loss": 0.915, "step": 5076 }, { "epoch": 0.63, "grad_norm": 7.890217424235713, "learning_rate": 3.1589277485444065e-06, "loss": 1.1734, "step": 5077 }, { "epoch": 0.63, "grad_norm": 6.7046668786032635, "learning_rate": 3.1570550670573348e-06, "loss": 1.1017, "step": 5078 }, { "epoch": 0.63, "grad_norm": 6.425235623888763, "learning_rate": 3.15518268469039e-06, "loss": 0.9511, "step": 5079 }, { "epoch": 0.63, "grad_norm": 8.054301358980128, "learning_rate": 3.1533106017474695e-06, "loss": 1.1354, "step": 5080 }, { "epoch": 0.63, "grad_norm": 6.372129935912876, "learning_rate": 3.151438818532424e-06, "loss": 0.8461, "step": 5081 }, { "epoch": 0.63, "grad_norm": 7.464662427596598, "learning_rate": 3.149567335349053e-06, "loss": 1.1422, "step": 5082 }, { "epoch": 0.63, "grad_norm": 6.677507902049005, "learning_rate": 3.14769615250111e-06, "loss": 0.7614, "step": 5083 }, { "epoch": 0.63, "grad_norm": 6.258107305207901, "learning_rate": 3.1458252702922954e-06, "loss": 0.8077, "step": 5084 }, { "epoch": 0.63, "grad_norm": 7.234252784714042, "learning_rate": 3.143954689026268e-06, "loss": 1.1998, "step": 5085 }, { "epoch": 0.63, "grad_norm": 8.147387430800313, "learning_rate": 3.1420844090066315e-06, "loss": 1.1366, "step": 5086 }, { "epoch": 0.63, "grad_norm": 6.933193469018878, "learning_rate": 3.14021443053694e-06, "loss": 0.9107, "step": 5087 }, { "epoch": 0.63, "grad_norm": 8.084471859697764, "learning_rate": 3.138344753920706e-06, "loss": 0.8931, "step": 5088 }, { "epoch": 0.63, "grad_norm": 7.373436401890735, "learning_rate": 3.1364753794613845e-06, "loss": 1.2567, "step": 5089 }, { "epoch": 0.63, "grad_norm": 7.333422385756127, "learning_rate": 3.134606307462388e-06, "loss": 1.2256, "step": 5090 }, { "epoch": 0.63, "grad_norm": 7.507294092553211, "learning_rate": 3.132737538227075e-06, "loss": 0.8763, "step": 5091 }, { "epoch": 0.63, "grad_norm": 6.73863265994542, "learning_rate": 3.1308690720587587e-06, "loss": 1.0856, "step": 5092 }, { "epoch": 0.63, "grad_norm": 6.147338700018098, "learning_rate": 3.1290009092606988e-06, "loss": 1.013, "step": 5093 }, { "epoch": 0.63, "grad_norm": 7.331976267393696, "learning_rate": 3.1271330501361143e-06, "loss": 1.2213, "step": 5094 }, { "epoch": 0.63, "grad_norm": 7.655906194437276, "learning_rate": 3.1252654949881634e-06, "loss": 1.094, "step": 5095 }, { "epoch": 0.63, "grad_norm": 7.363505274209336, "learning_rate": 3.12339824411996e-06, "loss": 1.2748, "step": 5096 }, { "epoch": 0.63, "grad_norm": 8.018606991018359, "learning_rate": 3.121531297834574e-06, "loss": 1.045, "step": 5097 }, { "epoch": 0.63, "grad_norm": 6.849778128151583, "learning_rate": 3.119664656435018e-06, "loss": 0.9964, "step": 5098 }, { "epoch": 0.63, "grad_norm": 8.758213603572129, "learning_rate": 3.1177983202242595e-06, "loss": 1.3048, "step": 5099 }, { "epoch": 0.63, "grad_norm": 7.472087230321474, "learning_rate": 3.1159322895052135e-06, "loss": 0.9714, "step": 5100 }, { "epoch": 0.63, "grad_norm": 7.268793090659114, "learning_rate": 3.1140665645807493e-06, "loss": 1.0523, "step": 5101 }, { "epoch": 0.63, "grad_norm": 6.291453598180116, "learning_rate": 3.1122011457536834e-06, "loss": 1.0516, "step": 5102 }, { "epoch": 0.63, "grad_norm": 8.615010786584456, "learning_rate": 3.1103360333267818e-06, "loss": 1.0686, "step": 5103 }, { "epoch": 0.63, "grad_norm": 6.452076469339997, "learning_rate": 3.108471227602765e-06, "loss": 0.8452, "step": 5104 }, { "epoch": 0.63, "grad_norm": 8.774239084174654, "learning_rate": 3.106606728884299e-06, "loss": 1.0872, "step": 5105 }, { "epoch": 0.64, "grad_norm": 7.273887077426776, "learning_rate": 3.104742537474005e-06, "loss": 0.9908, "step": 5106 }, { "epoch": 0.64, "grad_norm": 6.883443522752786, "learning_rate": 3.1028786536744495e-06, "loss": 0.7826, "step": 5107 }, { "epoch": 0.64, "grad_norm": 6.001430039966391, "learning_rate": 3.101015077788152e-06, "loss": 0.6941, "step": 5108 }, { "epoch": 0.64, "grad_norm": 6.661780304726104, "learning_rate": 3.09915181011758e-06, "loss": 0.9525, "step": 5109 }, { "epoch": 0.64, "grad_norm": 7.158451640187897, "learning_rate": 3.097288850965154e-06, "loss": 1.1311, "step": 5110 }, { "epoch": 0.64, "grad_norm": 7.685137269863461, "learning_rate": 3.095426200633241e-06, "loss": 1.3281, "step": 5111 }, { "epoch": 0.64, "grad_norm": 8.030243557074769, "learning_rate": 3.09356385942416e-06, "loss": 1.3429, "step": 5112 }, { "epoch": 0.64, "grad_norm": 6.902874822704695, "learning_rate": 3.09170182764018e-06, "loss": 0.9359, "step": 5113 }, { "epoch": 0.64, "grad_norm": 6.686317799910952, "learning_rate": 3.089840105583516e-06, "loss": 1.031, "step": 5114 }, { "epoch": 0.64, "grad_norm": 6.91886743218818, "learning_rate": 3.0879786935563393e-06, "loss": 0.881, "step": 5115 }, { "epoch": 0.64, "grad_norm": 7.659301691043597, "learning_rate": 3.0861175918607654e-06, "loss": 1.0452, "step": 5116 }, { "epoch": 0.64, "grad_norm": 7.889525202263665, "learning_rate": 3.0842568007988637e-06, "loss": 1.1013, "step": 5117 }, { "epoch": 0.64, "grad_norm": 6.552355734953079, "learning_rate": 3.082396320672648e-06, "loss": 1.1632, "step": 5118 }, { "epoch": 0.64, "grad_norm": 7.758474128417766, "learning_rate": 3.080536151784085e-06, "loss": 0.7474, "step": 5119 }, { "epoch": 0.64, "grad_norm": 7.816146398929389, "learning_rate": 3.0786762944350925e-06, "loss": 1.0599, "step": 5120 }, { "epoch": 0.64, "grad_norm": 8.108151554601545, "learning_rate": 3.0768167489275325e-06, "loss": 1.1696, "step": 5121 }, { "epoch": 0.64, "grad_norm": 8.344601076580512, "learning_rate": 3.0749575155632226e-06, "loss": 1.1204, "step": 5122 }, { "epoch": 0.64, "grad_norm": 7.641212598958625, "learning_rate": 3.073098594643923e-06, "loss": 0.8275, "step": 5123 }, { "epoch": 0.64, "grad_norm": 8.730418520633998, "learning_rate": 3.0712399864713505e-06, "loss": 1.2036, "step": 5124 }, { "epoch": 0.64, "grad_norm": 6.272999880015942, "learning_rate": 3.069381691347165e-06, "loss": 0.8638, "step": 5125 }, { "epoch": 0.64, "grad_norm": 7.633288415837974, "learning_rate": 3.0675237095729795e-06, "loss": 1.1469, "step": 5126 }, { "epoch": 0.64, "grad_norm": 7.423870186074798, "learning_rate": 3.065666041450355e-06, "loss": 1.0254, "step": 5127 }, { "epoch": 0.64, "grad_norm": 6.541668853197243, "learning_rate": 3.0638086872807987e-06, "loss": 0.9145, "step": 5128 }, { "epoch": 0.64, "grad_norm": 7.973424641470473, "learning_rate": 3.0619516473657722e-06, "loss": 0.9631, "step": 5129 }, { "epoch": 0.64, "grad_norm": 7.636723491388468, "learning_rate": 3.0600949220066815e-06, "loss": 1.1135, "step": 5130 }, { "epoch": 0.64, "grad_norm": 6.637593983553805, "learning_rate": 3.0582385115048853e-06, "loss": 0.8304, "step": 5131 }, { "epoch": 0.64, "grad_norm": 6.747590677122914, "learning_rate": 3.0563824161616857e-06, "loss": 1.2941, "step": 5132 }, { "epoch": 0.64, "grad_norm": 6.560910424316468, "learning_rate": 3.0545266362783416e-06, "loss": 0.801, "step": 5133 }, { "epoch": 0.64, "grad_norm": 7.404100208638634, "learning_rate": 3.052671172156053e-06, "loss": 1.2636, "step": 5134 }, { "epoch": 0.64, "grad_norm": 6.719457337056238, "learning_rate": 3.050816024095975e-06, "loss": 1.1372, "step": 5135 }, { "epoch": 0.64, "grad_norm": 8.651791022847052, "learning_rate": 3.0489611923992056e-06, "loss": 1.3059, "step": 5136 }, { "epoch": 0.64, "grad_norm": 8.287540243065264, "learning_rate": 3.0471066773667957e-06, "loss": 1.3554, "step": 5137 }, { "epoch": 0.64, "grad_norm": 7.568773681118446, "learning_rate": 3.045252479299743e-06, "loss": 0.9001, "step": 5138 }, { "epoch": 0.64, "grad_norm": 7.471927962553153, "learning_rate": 3.0433985984989923e-06, "loss": 0.8101, "step": 5139 }, { "epoch": 0.64, "grad_norm": 7.664810727884328, "learning_rate": 3.041545035265442e-06, "loss": 1.1296, "step": 5140 }, { "epoch": 0.64, "grad_norm": 7.790756540227467, "learning_rate": 3.039691789899932e-06, "loss": 1.08, "step": 5141 }, { "epoch": 0.64, "grad_norm": 7.931175370887564, "learning_rate": 3.037838862703258e-06, "loss": 1.6774, "step": 5142 }, { "epoch": 0.64, "grad_norm": 7.052237088698433, "learning_rate": 3.035986253976159e-06, "loss": 1.3349, "step": 5143 }, { "epoch": 0.64, "grad_norm": 7.565721129535214, "learning_rate": 3.0341339640193207e-06, "loss": 0.9683, "step": 5144 }, { "epoch": 0.64, "grad_norm": 7.144974956602499, "learning_rate": 3.0322819931333848e-06, "loss": 1.0898, "step": 5145 }, { "epoch": 0.64, "grad_norm": 5.9033505176130365, "learning_rate": 3.0304303416189317e-06, "loss": 0.8117, "step": 5146 }, { "epoch": 0.64, "grad_norm": 7.93305524625642, "learning_rate": 3.028579009776499e-06, "loss": 1.1345, "step": 5147 }, { "epoch": 0.64, "grad_norm": 7.6915412126070555, "learning_rate": 3.0267279979065645e-06, "loss": 1.0811, "step": 5148 }, { "epoch": 0.64, "grad_norm": 6.274372067909861, "learning_rate": 3.0248773063095606e-06, "loss": 0.8786, "step": 5149 }, { "epoch": 0.64, "grad_norm": 8.956621736793892, "learning_rate": 3.0230269352858597e-06, "loss": 1.1348, "step": 5150 }, { "epoch": 0.64, "grad_norm": 6.940815742669744, "learning_rate": 3.0211768851357958e-06, "loss": 0.9446, "step": 5151 }, { "epoch": 0.64, "grad_norm": 7.266587179070879, "learning_rate": 3.019327156159635e-06, "loss": 0.8377, "step": 5152 }, { "epoch": 0.64, "grad_norm": 6.371929721115616, "learning_rate": 3.017477748657599e-06, "loss": 0.8132, "step": 5153 }, { "epoch": 0.64, "grad_norm": 6.845649052825259, "learning_rate": 3.015628662929861e-06, "loss": 0.9358, "step": 5154 }, { "epoch": 0.64, "grad_norm": 6.629459010904344, "learning_rate": 3.0137798992765345e-06, "loss": 0.9009, "step": 5155 }, { "epoch": 0.64, "grad_norm": 7.6386957343020505, "learning_rate": 3.0119314579976854e-06, "loss": 0.9579, "step": 5156 }, { "epoch": 0.64, "grad_norm": 6.48280791697481, "learning_rate": 3.0100833393933244e-06, "loss": 0.8536, "step": 5157 }, { "epoch": 0.64, "grad_norm": 7.1619260155508915, "learning_rate": 3.0082355437634136e-06, "loss": 1.1847, "step": 5158 }, { "epoch": 0.64, "grad_norm": 6.630122548229815, "learning_rate": 3.0063880714078592e-06, "loss": 0.8498, "step": 5159 }, { "epoch": 0.64, "grad_norm": 6.409311650121668, "learning_rate": 3.004540922626515e-06, "loss": 0.9186, "step": 5160 }, { "epoch": 0.64, "grad_norm": 8.638148100762146, "learning_rate": 3.0026940977191853e-06, "loss": 1.2147, "step": 5161 }, { "epoch": 0.64, "grad_norm": 6.731083956629137, "learning_rate": 3.000847596985618e-06, "loss": 1.2033, "step": 5162 }, { "epoch": 0.64, "grad_norm": 8.237421821406796, "learning_rate": 2.9990014207255134e-06, "loss": 1.0691, "step": 5163 }, { "epoch": 0.64, "grad_norm": 6.819623776913041, "learning_rate": 2.9971555692385134e-06, "loss": 0.8614, "step": 5164 }, { "epoch": 0.64, "grad_norm": 7.264255388248198, "learning_rate": 2.9953100428242115e-06, "loss": 0.9672, "step": 5165 }, { "epoch": 0.64, "grad_norm": 7.353540948150671, "learning_rate": 2.9934648417821456e-06, "loss": 0.9613, "step": 5166 }, { "epoch": 0.64, "grad_norm": 6.571267545285419, "learning_rate": 2.991619966411803e-06, "loss": 0.9467, "step": 5167 }, { "epoch": 0.64, "grad_norm": 7.2789688063048015, "learning_rate": 2.989775417012617e-06, "loss": 0.7086, "step": 5168 }, { "epoch": 0.64, "grad_norm": 6.893551705231239, "learning_rate": 2.9879311938839663e-06, "loss": 0.8219, "step": 5169 }, { "epoch": 0.64, "grad_norm": 6.912261951207187, "learning_rate": 2.9860872973251815e-06, "loss": 0.671, "step": 5170 }, { "epoch": 0.64, "grad_norm": 7.984994076451821, "learning_rate": 2.984243727635533e-06, "loss": 1.0158, "step": 5171 }, { "epoch": 0.64, "grad_norm": 7.217083789992397, "learning_rate": 2.9824004851142474e-06, "loss": 0.773, "step": 5172 }, { "epoch": 0.64, "grad_norm": 7.162942648155719, "learning_rate": 2.98055757006049e-06, "loss": 1.1763, "step": 5173 }, { "epoch": 0.64, "grad_norm": 6.49788998987277, "learning_rate": 2.9787149827733774e-06, "loss": 0.9736, "step": 5174 }, { "epoch": 0.64, "grad_norm": 6.035505138142798, "learning_rate": 2.9768727235519712e-06, "loss": 0.7997, "step": 5175 }, { "epoch": 0.64, "grad_norm": 6.848691634672223, "learning_rate": 2.975030792695279e-06, "loss": 1.0021, "step": 5176 }, { "epoch": 0.64, "grad_norm": 7.261310366845935, "learning_rate": 2.9731891905022593e-06, "loss": 1.0672, "step": 5177 }, { "epoch": 0.64, "grad_norm": 7.6073815821719455, "learning_rate": 2.9713479172718107e-06, "loss": 1.1938, "step": 5178 }, { "epoch": 0.64, "grad_norm": 7.086848961550033, "learning_rate": 2.969506973302785e-06, "loss": 1.0806, "step": 5179 }, { "epoch": 0.64, "grad_norm": 9.26140475725372, "learning_rate": 2.9676663588939735e-06, "loss": 1.245, "step": 5180 }, { "epoch": 0.64, "grad_norm": 7.413288076273214, "learning_rate": 2.9658260743441236e-06, "loss": 1.1102, "step": 5181 }, { "epoch": 0.64, "grad_norm": 7.557569295201061, "learning_rate": 2.9639861199519194e-06, "loss": 0.93, "step": 5182 }, { "epoch": 0.64, "grad_norm": 7.1808412556604715, "learning_rate": 2.9621464960159984e-06, "loss": 1.057, "step": 5183 }, { "epoch": 0.64, "grad_norm": 8.376569959647881, "learning_rate": 2.960307202834941e-06, "loss": 1.1109, "step": 5184 }, { "epoch": 0.64, "grad_norm": 7.573614859827187, "learning_rate": 2.958468240707272e-06, "loss": 0.9922, "step": 5185 }, { "epoch": 0.65, "grad_norm": 8.071068644832232, "learning_rate": 2.9566296099314694e-06, "loss": 0.9275, "step": 5186 }, { "epoch": 0.65, "grad_norm": 9.114604690857986, "learning_rate": 2.954791310805949e-06, "loss": 1.1035, "step": 5187 }, { "epoch": 0.65, "grad_norm": 6.567537136892897, "learning_rate": 2.95295334362908e-06, "loss": 0.6435, "step": 5188 }, { "epoch": 0.65, "grad_norm": 8.541200189472946, "learning_rate": 2.9511157086991717e-06, "loss": 1.2107, "step": 5189 }, { "epoch": 0.65, "grad_norm": 6.076878235894326, "learning_rate": 2.9492784063144856e-06, "loss": 0.7995, "step": 5190 }, { "epoch": 0.65, "grad_norm": 7.478324200254508, "learning_rate": 2.947441436773224e-06, "loss": 1.1985, "step": 5191 }, { "epoch": 0.65, "grad_norm": 6.270460618658271, "learning_rate": 2.9456048003735384e-06, "loss": 0.8721, "step": 5192 }, { "epoch": 0.65, "grad_norm": 7.413008414815554, "learning_rate": 2.943768497413524e-06, "loss": 1.3415, "step": 5193 }, { "epoch": 0.65, "grad_norm": 7.321087457832808, "learning_rate": 2.9419325281912223e-06, "loss": 1.2564, "step": 5194 }, { "epoch": 0.65, "grad_norm": 8.558028511140504, "learning_rate": 2.9400968930046236e-06, "loss": 1.0808, "step": 5195 }, { "epoch": 0.65, "grad_norm": 7.863613829929677, "learning_rate": 2.938261592151659e-06, "loss": 0.8839, "step": 5196 }, { "epoch": 0.65, "grad_norm": 7.8620871014458595, "learning_rate": 2.9364266259302098e-06, "loss": 1.1919, "step": 5197 }, { "epoch": 0.65, "grad_norm": 7.31370781525864, "learning_rate": 2.9345919946380985e-06, "loss": 0.6918, "step": 5198 }, { "epoch": 0.65, "grad_norm": 8.013294671706737, "learning_rate": 2.9327576985730997e-06, "loss": 0.9301, "step": 5199 }, { "epoch": 0.65, "grad_norm": 9.692448867539493, "learning_rate": 2.9309237380329294e-06, "loss": 1.3734, "step": 5200 }, { "epoch": 0.65, "grad_norm": 6.86574894722783, "learning_rate": 2.9290901133152446e-06, "loss": 0.9109, "step": 5201 }, { "epoch": 0.65, "grad_norm": 7.67084246341751, "learning_rate": 2.927256824717658e-06, "loss": 0.9773, "step": 5202 }, { "epoch": 0.65, "grad_norm": 7.2189208947304335, "learning_rate": 2.925423872537719e-06, "loss": 0.7734, "step": 5203 }, { "epoch": 0.65, "grad_norm": 9.01404328762323, "learning_rate": 2.9235912570729297e-06, "loss": 1.0725, "step": 5204 }, { "epoch": 0.65, "grad_norm": 6.563881699852165, "learning_rate": 2.9217589786207296e-06, "loss": 0.9594, "step": 5205 }, { "epoch": 0.65, "grad_norm": 7.459057324870908, "learning_rate": 2.9199270374785104e-06, "loss": 0.9, "step": 5206 }, { "epoch": 0.65, "grad_norm": 8.55474983103973, "learning_rate": 2.918095433943604e-06, "loss": 1.0134, "step": 5207 }, { "epoch": 0.65, "grad_norm": 7.538832379649102, "learning_rate": 2.916264168313292e-06, "loss": 0.9263, "step": 5208 }, { "epoch": 0.65, "grad_norm": 6.925340519653504, "learning_rate": 2.9144332408847986e-06, "loss": 0.9484, "step": 5209 }, { "epoch": 0.65, "grad_norm": 7.3022773994228904, "learning_rate": 2.912602651955292e-06, "loss": 1.0485, "step": 5210 }, { "epoch": 0.65, "grad_norm": 7.340972679524602, "learning_rate": 2.9107724018218865e-06, "loss": 1.1494, "step": 5211 }, { "epoch": 0.65, "grad_norm": 7.003773347455224, "learning_rate": 2.9089424907816433e-06, "loss": 1.0221, "step": 5212 }, { "epoch": 0.65, "grad_norm": 8.591026673936536, "learning_rate": 2.9071129191315684e-06, "loss": 1.2213, "step": 5213 }, { "epoch": 0.65, "grad_norm": 7.718051556195271, "learning_rate": 2.9052836871686063e-06, "loss": 1.0506, "step": 5214 }, { "epoch": 0.65, "grad_norm": 8.062260401731791, "learning_rate": 2.903454795189658e-06, "loss": 0.833, "step": 5215 }, { "epoch": 0.65, "grad_norm": 6.437534449248686, "learning_rate": 2.9016262434915577e-06, "loss": 0.8562, "step": 5216 }, { "epoch": 0.65, "grad_norm": 7.57340341542766, "learning_rate": 2.8997980323710917e-06, "loss": 1.2236, "step": 5217 }, { "epoch": 0.65, "grad_norm": 8.918032195960393, "learning_rate": 2.8979701621249895e-06, "loss": 1.3098, "step": 5218 }, { "epoch": 0.65, "grad_norm": 6.299316240054171, "learning_rate": 2.896142633049922e-06, "loss": 0.8001, "step": 5219 }, { "epoch": 0.65, "grad_norm": 7.305033254884612, "learning_rate": 2.894315445442508e-06, "loss": 1.2079, "step": 5220 }, { "epoch": 0.65, "grad_norm": 6.930479445186204, "learning_rate": 2.8924885995993115e-06, "loss": 1.3584, "step": 5221 }, { "epoch": 0.65, "grad_norm": 11.268272423569574, "learning_rate": 2.8906620958168396e-06, "loss": 0.9424, "step": 5222 }, { "epoch": 0.65, "grad_norm": 8.426264274869407, "learning_rate": 2.8888359343915396e-06, "loss": 0.9456, "step": 5223 }, { "epoch": 0.65, "grad_norm": 6.56253810240979, "learning_rate": 2.8870101156198155e-06, "loss": 0.7273, "step": 5224 }, { "epoch": 0.65, "grad_norm": 6.693811915662176, "learning_rate": 2.885184639798001e-06, "loss": 0.9676, "step": 5225 }, { "epoch": 0.65, "grad_norm": 8.578786712538662, "learning_rate": 2.8833595072223842e-06, "loss": 1.0357, "step": 5226 }, { "epoch": 0.65, "grad_norm": 6.260135225268313, "learning_rate": 2.881534718189194e-06, "loss": 0.6716, "step": 5227 }, { "epoch": 0.65, "grad_norm": 9.041143078016018, "learning_rate": 2.879710272994602e-06, "loss": 1.3124, "step": 5228 }, { "epoch": 0.65, "grad_norm": 7.654025443966513, "learning_rate": 2.877886171934726e-06, "loss": 1.0674, "step": 5229 }, { "epoch": 0.65, "grad_norm": 7.028699244306599, "learning_rate": 2.876062415305629e-06, "loss": 0.8689, "step": 5230 }, { "epoch": 0.65, "grad_norm": 7.7918002242592355, "learning_rate": 2.8742390034033174e-06, "loss": 1.135, "step": 5231 }, { "epoch": 0.65, "grad_norm": 6.544528581911695, "learning_rate": 2.8724159365237383e-06, "loss": 0.9258, "step": 5232 }, { "epoch": 0.65, "grad_norm": 6.704296840501192, "learning_rate": 2.870593214962787e-06, "loss": 0.732, "step": 5233 }, { "epoch": 0.65, "grad_norm": 8.370704359585808, "learning_rate": 2.8687708390163005e-06, "loss": 1.0379, "step": 5234 }, { "epoch": 0.65, "grad_norm": 7.484725479710246, "learning_rate": 2.8669488089800625e-06, "loss": 0.8609, "step": 5235 }, { "epoch": 0.65, "grad_norm": 8.154234679832017, "learning_rate": 2.865127125149798e-06, "loss": 0.8995, "step": 5236 }, { "epoch": 0.65, "grad_norm": 7.586177204879745, "learning_rate": 2.863305787821174e-06, "loss": 0.9153, "step": 5237 }, { "epoch": 0.65, "grad_norm": 7.735589916380567, "learning_rate": 2.8614847972898054e-06, "loss": 1.0857, "step": 5238 }, { "epoch": 0.65, "grad_norm": 7.038202791285047, "learning_rate": 2.859664153851248e-06, "loss": 0.8652, "step": 5239 }, { "epoch": 0.65, "grad_norm": 7.34760658005635, "learning_rate": 2.8578438578010053e-06, "loss": 0.9932, "step": 5240 }, { "epoch": 0.65, "grad_norm": 5.303120756837237, "learning_rate": 2.856023909434518e-06, "loss": 0.6667, "step": 5241 }, { "epoch": 0.65, "grad_norm": 8.382879384195226, "learning_rate": 2.854204309047174e-06, "loss": 1.2645, "step": 5242 }, { "epoch": 0.65, "grad_norm": 12.464009953502742, "learning_rate": 2.852385056934307e-06, "loss": 1.3114, "step": 5243 }, { "epoch": 0.65, "grad_norm": 7.237521805381343, "learning_rate": 2.8505661533911867e-06, "loss": 0.794, "step": 5244 }, { "epoch": 0.65, "grad_norm": 6.77660778679953, "learning_rate": 2.8487475987130374e-06, "loss": 0.8241, "step": 5245 }, { "epoch": 0.65, "grad_norm": 8.545735699752656, "learning_rate": 2.846929393195017e-06, "loss": 1.0161, "step": 5246 }, { "epoch": 0.65, "grad_norm": 7.115116568645703, "learning_rate": 2.8451115371322302e-06, "loss": 0.9092, "step": 5247 }, { "epoch": 0.65, "grad_norm": 6.705544585245344, "learning_rate": 2.8432940308197256e-06, "loss": 1.084, "step": 5248 }, { "epoch": 0.65, "grad_norm": 7.6484872281841865, "learning_rate": 2.8414768745524967e-06, "loss": 0.9126, "step": 5249 }, { "epoch": 0.65, "grad_norm": 9.682313120617584, "learning_rate": 2.839660068625474e-06, "loss": 1.1163, "step": 5250 }, { "epoch": 0.65, "grad_norm": 7.187185397012004, "learning_rate": 2.8378436133335373e-06, "loss": 0.5613, "step": 5251 }, { "epoch": 0.65, "grad_norm": 7.322763883776838, "learning_rate": 2.8360275089715094e-06, "loss": 1.0207, "step": 5252 }, { "epoch": 0.65, "grad_norm": 7.309808485861459, "learning_rate": 2.8342117558341473e-06, "loss": 0.7632, "step": 5253 }, { "epoch": 0.65, "grad_norm": 7.568137381384038, "learning_rate": 2.8323963542161665e-06, "loss": 1.0464, "step": 5254 }, { "epoch": 0.65, "grad_norm": 7.440447989139382, "learning_rate": 2.83058130441221e-06, "loss": 0.8869, "step": 5255 }, { "epoch": 0.65, "grad_norm": 9.306647889297043, "learning_rate": 2.828766606716873e-06, "loss": 1.1989, "step": 5256 }, { "epoch": 0.65, "grad_norm": 8.260932622636261, "learning_rate": 2.8269522614246923e-06, "loss": 1.3658, "step": 5257 }, { "epoch": 0.65, "grad_norm": 7.111356031676972, "learning_rate": 2.8251382688301433e-06, "loss": 1.0634, "step": 5258 }, { "epoch": 0.65, "grad_norm": 7.746311382064683, "learning_rate": 2.8233246292276473e-06, "loss": 1.0506, "step": 5259 }, { "epoch": 0.65, "grad_norm": 6.19172889348848, "learning_rate": 2.82151134291157e-06, "loss": 0.8394, "step": 5260 }, { "epoch": 0.65, "grad_norm": 7.08401424478069, "learning_rate": 2.8196984101762182e-06, "loss": 0.9148, "step": 5261 }, { "epoch": 0.65, "grad_norm": 7.832973194902057, "learning_rate": 2.8178858313158354e-06, "loss": 1.1514, "step": 5262 }, { "epoch": 0.65, "grad_norm": 7.40494177020366, "learning_rate": 2.8160736066246214e-06, "loss": 1.1295, "step": 5263 }, { "epoch": 0.65, "grad_norm": 7.376278407189612, "learning_rate": 2.814261736396704e-06, "loss": 1.0197, "step": 5264 }, { "epoch": 0.65, "grad_norm": 6.535436146111684, "learning_rate": 2.8124502209261615e-06, "loss": 0.8267, "step": 5265 }, { "epoch": 0.65, "grad_norm": 7.804745130215362, "learning_rate": 2.8106390605070153e-06, "loss": 1.1363, "step": 5266 }, { "epoch": 0.66, "grad_norm": 6.194475148095169, "learning_rate": 2.8088282554332224e-06, "loss": 0.9381, "step": 5267 }, { "epoch": 0.66, "grad_norm": 8.917698651837252, "learning_rate": 2.807017805998689e-06, "loss": 1.5461, "step": 5268 }, { "epoch": 0.66, "grad_norm": 6.997669000737521, "learning_rate": 2.8052077124972603e-06, "loss": 0.8389, "step": 5269 }, { "epoch": 0.66, "grad_norm": 7.77470086351236, "learning_rate": 2.803397975222727e-06, "loss": 1.0309, "step": 5270 }, { "epoch": 0.66, "grad_norm": 8.588220293886076, "learning_rate": 2.801588594468813e-06, "loss": 0.9928, "step": 5271 }, { "epoch": 0.66, "grad_norm": 8.198647659499779, "learning_rate": 2.799779570529199e-06, "loss": 1.105, "step": 5272 }, { "epoch": 0.66, "grad_norm": 6.241115031529843, "learning_rate": 2.797970903697494e-06, "loss": 0.91, "step": 5273 }, { "epoch": 0.66, "grad_norm": 7.454553762910656, "learning_rate": 2.7961625942672555e-06, "loss": 1.0574, "step": 5274 }, { "epoch": 0.66, "grad_norm": 7.137576699758145, "learning_rate": 2.7943546425319857e-06, "loss": 1.3481, "step": 5275 }, { "epoch": 0.66, "grad_norm": 7.632560991589597, "learning_rate": 2.792547048785119e-06, "loss": 1.1398, "step": 5276 }, { "epoch": 0.66, "grad_norm": 8.73553886496091, "learning_rate": 2.7907398133200418e-06, "loss": 1.3525, "step": 5277 }, { "epoch": 0.66, "grad_norm": 7.8877525442106124, "learning_rate": 2.7889329364300777e-06, "loss": 1.1088, "step": 5278 }, { "epoch": 0.66, "grad_norm": 8.598261044495827, "learning_rate": 2.7871264184084946e-06, "loss": 0.903, "step": 5279 }, { "epoch": 0.66, "grad_norm": 7.894139593310478, "learning_rate": 2.7853202595484942e-06, "loss": 0.8688, "step": 5280 }, { "epoch": 0.66, "grad_norm": 8.87172504721701, "learning_rate": 2.783514460143234e-06, "loss": 1.0839, "step": 5281 }, { "epoch": 0.66, "grad_norm": 9.199413165233606, "learning_rate": 2.7817090204857997e-06, "loss": 1.2976, "step": 5282 }, { "epoch": 0.66, "grad_norm": 8.251864004318499, "learning_rate": 2.7799039408692262e-06, "loss": 0.92, "step": 5283 }, { "epoch": 0.66, "grad_norm": 6.242072955156169, "learning_rate": 2.77809922158649e-06, "loss": 1.0489, "step": 5284 }, { "epoch": 0.66, "grad_norm": 7.833620149592906, "learning_rate": 2.776294862930502e-06, "loss": 0.9354, "step": 5285 }, { "epoch": 0.66, "grad_norm": 7.455553767942661, "learning_rate": 2.7744908651941214e-06, "loss": 0.8754, "step": 5286 }, { "epoch": 0.66, "grad_norm": 6.645917551024936, "learning_rate": 2.7726872286701487e-06, "loss": 1.0296, "step": 5287 }, { "epoch": 0.66, "grad_norm": 7.081487370607297, "learning_rate": 2.770883953651325e-06, "loss": 0.947, "step": 5288 }, { "epoch": 0.66, "grad_norm": 7.682731441552253, "learning_rate": 2.7690810404303276e-06, "loss": 0.9784, "step": 5289 }, { "epoch": 0.66, "grad_norm": 7.4389937200824034, "learning_rate": 2.767278489299782e-06, "loss": 1.0085, "step": 5290 }, { "epoch": 0.66, "grad_norm": 8.58814600192265, "learning_rate": 2.765476300552251e-06, "loss": 1.3016, "step": 5291 }, { "epoch": 0.66, "grad_norm": 6.67996898115021, "learning_rate": 2.763674474480241e-06, "loss": 0.8705, "step": 5292 }, { "epoch": 0.66, "grad_norm": 8.789275333651483, "learning_rate": 2.7618730113761993e-06, "loss": 1.1055, "step": 5293 }, { "epoch": 0.66, "grad_norm": 5.764213456033777, "learning_rate": 2.7600719115325105e-06, "loss": 0.9096, "step": 5294 }, { "epoch": 0.66, "grad_norm": 8.322114616428143, "learning_rate": 2.7582711752415036e-06, "loss": 1.4828, "step": 5295 }, { "epoch": 0.66, "grad_norm": 6.573840722083225, "learning_rate": 2.756470802795449e-06, "loss": 0.8266, "step": 5296 }, { "epoch": 0.66, "grad_norm": 7.481454139088215, "learning_rate": 2.754670794486559e-06, "loss": 1.0683, "step": 5297 }, { "epoch": 0.66, "grad_norm": 7.558781958590451, "learning_rate": 2.7528711506069805e-06, "loss": 0.8727, "step": 5298 }, { "epoch": 0.66, "grad_norm": 6.811802944242443, "learning_rate": 2.751071871448807e-06, "loss": 0.8245, "step": 5299 }, { "epoch": 0.66, "grad_norm": 7.552181574241908, "learning_rate": 2.7492729573040756e-06, "loss": 0.8076, "step": 5300 }, { "epoch": 0.66, "grad_norm": 6.868640552105781, "learning_rate": 2.747474408464752e-06, "loss": 0.9309, "step": 5301 }, { "epoch": 0.66, "grad_norm": 7.35811484037328, "learning_rate": 2.74567622522276e-06, "loss": 0.8695, "step": 5302 }, { "epoch": 0.66, "grad_norm": 5.844580614835523, "learning_rate": 2.743878407869947e-06, "loss": 0.768, "step": 5303 }, { "epoch": 0.66, "grad_norm": 6.580689231520714, "learning_rate": 2.742080956698112e-06, "loss": 0.9044, "step": 5304 }, { "epoch": 0.66, "grad_norm": 8.008909742952229, "learning_rate": 2.7402838719989905e-06, "loss": 1.3115, "step": 5305 }, { "epoch": 0.66, "grad_norm": 8.6111187711007, "learning_rate": 2.738487154064262e-06, "loss": 1.302, "step": 5306 }, { "epoch": 0.66, "grad_norm": 8.03479235924753, "learning_rate": 2.736690803185539e-06, "loss": 1.2573, "step": 5307 }, { "epoch": 0.66, "grad_norm": 7.8020384908171705, "learning_rate": 2.734894819654381e-06, "loss": 1.3973, "step": 5308 }, { "epoch": 0.66, "grad_norm": 8.500735024782335, "learning_rate": 2.7330992037622893e-06, "loss": 1.0582, "step": 5309 }, { "epoch": 0.66, "grad_norm": 8.22058748282902, "learning_rate": 2.7313039558006952e-06, "loss": 0.9905, "step": 5310 }, { "epoch": 0.66, "grad_norm": 6.948016687700261, "learning_rate": 2.7295090760609853e-06, "loss": 0.8817, "step": 5311 }, { "epoch": 0.66, "grad_norm": 6.4236743555894105, "learning_rate": 2.727714564834474e-06, "loss": 0.9953, "step": 5312 }, { "epoch": 0.66, "grad_norm": 7.860121503492068, "learning_rate": 2.7259204224124197e-06, "loss": 1.1026, "step": 5313 }, { "epoch": 0.66, "grad_norm": 7.334099352328932, "learning_rate": 2.724126649086026e-06, "loss": 0.9236, "step": 5314 }, { "epoch": 0.66, "grad_norm": 7.031948355589525, "learning_rate": 2.722333245146427e-06, "loss": 0.927, "step": 5315 }, { "epoch": 0.66, "grad_norm": 6.885865531369113, "learning_rate": 2.7205402108847035e-06, "loss": 0.7034, "step": 5316 }, { "epoch": 0.66, "grad_norm": 7.7412379806433105, "learning_rate": 2.7187475465918768e-06, "loss": 0.7384, "step": 5317 }, { "epoch": 0.66, "grad_norm": 7.266076250044627, "learning_rate": 2.716955252558906e-06, "loss": 1.106, "step": 5318 }, { "epoch": 0.66, "grad_norm": 7.1928573198008845, "learning_rate": 2.7151633290766867e-06, "loss": 1.0976, "step": 5319 }, { "epoch": 0.66, "grad_norm": 7.5613491466164096, "learning_rate": 2.7133717764360634e-06, "loss": 1.1573, "step": 5320 }, { "epoch": 0.66, "grad_norm": 7.533608770738081, "learning_rate": 2.711580594927811e-06, "loss": 0.9324, "step": 5321 }, { "epoch": 0.66, "grad_norm": 7.296467055011365, "learning_rate": 2.7097897848426492e-06, "loss": 0.8086, "step": 5322 }, { "epoch": 0.66, "grad_norm": 8.339226591383666, "learning_rate": 2.707999346471238e-06, "loss": 0.9811, "step": 5323 }, { "epoch": 0.66, "grad_norm": 9.057573870119821, "learning_rate": 2.7062092801041717e-06, "loss": 0.8772, "step": 5324 }, { "epoch": 0.66, "grad_norm": 12.92116939422039, "learning_rate": 2.704419586031991e-06, "loss": 1.5659, "step": 5325 }, { "epoch": 0.66, "grad_norm": 7.098672855426301, "learning_rate": 2.702630264545172e-06, "loss": 1.16, "step": 5326 }, { "epoch": 0.66, "grad_norm": 7.51181229877217, "learning_rate": 2.700841315934134e-06, "loss": 1.0082, "step": 5327 }, { "epoch": 0.66, "grad_norm": 6.780326800186746, "learning_rate": 2.6990527404892265e-06, "loss": 0.9157, "step": 5328 }, { "epoch": 0.66, "grad_norm": 8.816259109826111, "learning_rate": 2.697264538500754e-06, "loss": 1.1001, "step": 5329 }, { "epoch": 0.66, "grad_norm": 9.010900207266962, "learning_rate": 2.695476710258945e-06, "loss": 0.861, "step": 5330 }, { "epoch": 0.66, "grad_norm": 7.662996985536857, "learning_rate": 2.693689256053976e-06, "loss": 0.9092, "step": 5331 }, { "epoch": 0.66, "grad_norm": 8.182216140432613, "learning_rate": 2.691902176175963e-06, "loss": 1.1943, "step": 5332 }, { "epoch": 0.66, "grad_norm": 7.610981896586773, "learning_rate": 2.6901154709149547e-06, "loss": 1.2377, "step": 5333 }, { "epoch": 0.66, "grad_norm": 7.5114972764355095, "learning_rate": 2.6883291405609446e-06, "loss": 0.8785, "step": 5334 }, { "epoch": 0.66, "grad_norm": 7.113029715209903, "learning_rate": 2.6865431854038648e-06, "loss": 1.1093, "step": 5335 }, { "epoch": 0.66, "grad_norm": 7.606248365098239, "learning_rate": 2.6847576057335872e-06, "loss": 0.9736, "step": 5336 }, { "epoch": 0.66, "grad_norm": 8.461871154639915, "learning_rate": 2.6829724018399155e-06, "loss": 1.2117, "step": 5337 }, { "epoch": 0.66, "grad_norm": 6.5799725922318135, "learning_rate": 2.6811875740126063e-06, "loss": 0.8476, "step": 5338 }, { "epoch": 0.66, "grad_norm": 6.852270224570119, "learning_rate": 2.6794031225413407e-06, "loss": 0.9716, "step": 5339 }, { "epoch": 0.66, "grad_norm": 7.121715102305211, "learning_rate": 2.6776190477157472e-06, "loss": 0.8863, "step": 5340 }, { "epoch": 0.66, "grad_norm": 6.4799666658971145, "learning_rate": 2.6758353498253926e-06, "loss": 0.8035, "step": 5341 }, { "epoch": 0.66, "grad_norm": 7.291369635932592, "learning_rate": 2.674052029159777e-06, "loss": 1.1485, "step": 5342 }, { "epoch": 0.66, "grad_norm": 6.512858527983567, "learning_rate": 2.6722690860083455e-06, "loss": 0.7709, "step": 5343 }, { "epoch": 0.66, "grad_norm": 6.738802484016993, "learning_rate": 2.67048652066048e-06, "loss": 0.7897, "step": 5344 }, { "epoch": 0.66, "grad_norm": 8.571125567788474, "learning_rate": 2.6687043334055017e-06, "loss": 0.958, "step": 5345 }, { "epoch": 0.66, "grad_norm": 6.9591155272411385, "learning_rate": 2.6669225245326658e-06, "loss": 1.0116, "step": 5346 }, { "epoch": 0.67, "grad_norm": 7.6606904579029855, "learning_rate": 2.6651410943311716e-06, "loss": 0.9638, "step": 5347 }, { "epoch": 0.67, "grad_norm": 6.501929078365205, "learning_rate": 2.6633600430901563e-06, "loss": 0.8318, "step": 5348 }, { "epoch": 0.67, "grad_norm": 6.659358447209052, "learning_rate": 2.6615793710986928e-06, "loss": 0.8158, "step": 5349 }, { "epoch": 0.67, "grad_norm": 7.833421208005519, "learning_rate": 2.6597990786457955e-06, "loss": 0.8232, "step": 5350 }, { "epoch": 0.67, "grad_norm": 7.313684212910248, "learning_rate": 2.6580191660204136e-06, "loss": 1.0909, "step": 5351 }, { "epoch": 0.67, "grad_norm": 6.28352479481675, "learning_rate": 2.656239633511437e-06, "loss": 0.8395, "step": 5352 }, { "epoch": 0.67, "grad_norm": 6.573943356962498, "learning_rate": 2.6544604814076946e-06, "loss": 1.0754, "step": 5353 }, { "epoch": 0.67, "grad_norm": 7.798825006009495, "learning_rate": 2.6526817099979552e-06, "loss": 0.9867, "step": 5354 }, { "epoch": 0.67, "grad_norm": 6.7456620163572625, "learning_rate": 2.6509033195709178e-06, "loss": 0.8504, "step": 5355 }, { "epoch": 0.67, "grad_norm": 7.003296773974347, "learning_rate": 2.6491253104152277e-06, "loss": 1.0409, "step": 5356 }, { "epoch": 0.67, "grad_norm": 6.841635345620618, "learning_rate": 2.6473476828194654e-06, "loss": 1.1034, "step": 5357 }, { "epoch": 0.67, "grad_norm": 8.748503255654526, "learning_rate": 2.6455704370721498e-06, "loss": 1.2254, "step": 5358 }, { "epoch": 0.67, "grad_norm": 8.319831290000412, "learning_rate": 2.643793573461739e-06, "loss": 0.7912, "step": 5359 }, { "epoch": 0.67, "grad_norm": 6.849886463349563, "learning_rate": 2.6420170922766253e-06, "loss": 0.878, "step": 5360 }, { "epoch": 0.67, "grad_norm": 5.934534568340102, "learning_rate": 2.640240993805142e-06, "loss": 0.7152, "step": 5361 }, { "epoch": 0.67, "grad_norm": 8.071280377846, "learning_rate": 2.6384652783355625e-06, "loss": 1.5945, "step": 5362 }, { "epoch": 0.67, "grad_norm": 6.874276431914977, "learning_rate": 2.6366899461560915e-06, "loss": 0.9813, "step": 5363 }, { "epoch": 0.67, "grad_norm": 7.090871437197312, "learning_rate": 2.6349149975548757e-06, "loss": 0.9363, "step": 5364 }, { "epoch": 0.67, "grad_norm": 7.48833412647036, "learning_rate": 2.6331404328199994e-06, "loss": 0.7834, "step": 5365 }, { "epoch": 0.67, "grad_norm": 9.693463223894819, "learning_rate": 2.631366252239488e-06, "loss": 1.3781, "step": 5366 }, { "epoch": 0.67, "grad_norm": 6.990346149676539, "learning_rate": 2.6295924561012932e-06, "loss": 0.7335, "step": 5367 }, { "epoch": 0.67, "grad_norm": 7.358627233103369, "learning_rate": 2.6278190446933195e-06, "loss": 1.2282, "step": 5368 }, { "epoch": 0.67, "grad_norm": 12.082124616191654, "learning_rate": 2.626046018303397e-06, "loss": 1.1717, "step": 5369 }, { "epoch": 0.67, "grad_norm": 6.782400104680824, "learning_rate": 2.6242733772192984e-06, "loss": 0.992, "step": 5370 }, { "epoch": 0.67, "grad_norm": 6.948596337740748, "learning_rate": 2.6225011217287355e-06, "loss": 0.8545, "step": 5371 }, { "epoch": 0.67, "grad_norm": 7.6419360701211225, "learning_rate": 2.6207292521193516e-06, "loss": 1.0562, "step": 5372 }, { "epoch": 0.67, "grad_norm": 7.968436923423985, "learning_rate": 2.6189577686787317e-06, "loss": 1.0138, "step": 5373 }, { "epoch": 0.67, "grad_norm": 6.984861856829821, "learning_rate": 2.617186671694398e-06, "loss": 0.7978, "step": 5374 }, { "epoch": 0.67, "grad_norm": 10.138466583592221, "learning_rate": 2.615415961453812e-06, "loss": 1.1589, "step": 5375 }, { "epoch": 0.67, "grad_norm": 6.835380718341109, "learning_rate": 2.6136456382443635e-06, "loss": 0.9452, "step": 5376 }, { "epoch": 0.67, "grad_norm": 7.568113125615517, "learning_rate": 2.6118757023533932e-06, "loss": 1.066, "step": 5377 }, { "epoch": 0.67, "grad_norm": 7.380296931429701, "learning_rate": 2.610106154068165e-06, "loss": 0.8605, "step": 5378 }, { "epoch": 0.67, "grad_norm": 6.8366562413605925, "learning_rate": 2.6083369936758894e-06, "loss": 0.8701, "step": 5379 }, { "epoch": 0.67, "grad_norm": 8.677015054947928, "learning_rate": 2.6065682214637124e-06, "loss": 1.43, "step": 5380 }, { "epoch": 0.67, "grad_norm": 8.38716048191584, "learning_rate": 2.604799837718711e-06, "loss": 0.9339, "step": 5381 }, { "epoch": 0.67, "grad_norm": 6.453090399506439, "learning_rate": 2.6030318427279067e-06, "loss": 0.9988, "step": 5382 }, { "epoch": 0.67, "grad_norm": 7.568447049798195, "learning_rate": 2.601264236778255e-06, "loss": 1.064, "step": 5383 }, { "epoch": 0.67, "grad_norm": 6.935935222791941, "learning_rate": 2.599497020156648e-06, "loss": 0.9093, "step": 5384 }, { "epoch": 0.67, "grad_norm": 8.555315562991723, "learning_rate": 2.597730193149911e-06, "loss": 1.122, "step": 5385 }, { "epoch": 0.67, "grad_norm": 7.201180798681249, "learning_rate": 2.5959637560448165e-06, "loss": 0.9339, "step": 5386 }, { "epoch": 0.67, "grad_norm": 6.287155830763279, "learning_rate": 2.5941977091280614e-06, "loss": 1.0788, "step": 5387 }, { "epoch": 0.67, "grad_norm": 7.959120172898379, "learning_rate": 2.5924320526862866e-06, "loss": 1.2833, "step": 5388 }, { "epoch": 0.67, "grad_norm": 8.538007048476757, "learning_rate": 2.590666787006071e-06, "loss": 0.8538, "step": 5389 }, { "epoch": 0.67, "grad_norm": 8.510230863346695, "learning_rate": 2.588901912373921e-06, "loss": 1.0045, "step": 5390 }, { "epoch": 0.67, "grad_norm": 6.972621080101569, "learning_rate": 2.587137429076289e-06, "loss": 1.1567, "step": 5391 }, { "epoch": 0.67, "grad_norm": 8.232412064164507, "learning_rate": 2.58537333739956e-06, "loss": 1.1912, "step": 5392 }, { "epoch": 0.67, "grad_norm": 7.551688970933809, "learning_rate": 2.5836096376300572e-06, "loss": 0.8823, "step": 5393 }, { "epoch": 0.67, "grad_norm": 7.767277301321147, "learning_rate": 2.581846330054034e-06, "loss": 1.3886, "step": 5394 }, { "epoch": 0.67, "grad_norm": 6.7848438479032565, "learning_rate": 2.580083414957692e-06, "loss": 1.0136, "step": 5395 }, { "epoch": 0.67, "grad_norm": 7.902319031072881, "learning_rate": 2.5783208926271562e-06, "loss": 1.0489, "step": 5396 }, { "epoch": 0.67, "grad_norm": 8.499406229367743, "learning_rate": 2.5765587633484957e-06, "loss": 0.8211, "step": 5397 }, { "epoch": 0.67, "grad_norm": 7.429251053212407, "learning_rate": 2.574797027407716e-06, "loss": 0.8723, "step": 5398 }, { "epoch": 0.67, "grad_norm": 6.951929548256885, "learning_rate": 2.5730356850907523e-06, "loss": 0.9439, "step": 5399 }, { "epoch": 0.67, "grad_norm": 7.76278870890365, "learning_rate": 2.571274736683483e-06, "loss": 1.1273, "step": 5400 }, { "epoch": 0.67, "grad_norm": 6.87306374115828, "learning_rate": 2.5695141824717183e-06, "loss": 1.1139, "step": 5401 }, { "epoch": 0.67, "grad_norm": 7.304420089603648, "learning_rate": 2.567754022741209e-06, "loss": 0.7448, "step": 5402 }, { "epoch": 0.67, "grad_norm": 7.651686136998783, "learning_rate": 2.565994257777634e-06, "loss": 0.8828, "step": 5403 }, { "epoch": 0.67, "grad_norm": 8.105986587177403, "learning_rate": 2.5642348878666155e-06, "loss": 0.9672, "step": 5404 }, { "epoch": 0.67, "grad_norm": 6.962694547869925, "learning_rate": 2.5624759132937093e-06, "loss": 1.0464, "step": 5405 }, { "epoch": 0.67, "grad_norm": 6.8764771363429835, "learning_rate": 2.560717334344405e-06, "loss": 0.8484, "step": 5406 }, { "epoch": 0.67, "grad_norm": 6.411268071771782, "learning_rate": 2.5589591513041332e-06, "loss": 1.0358, "step": 5407 }, { "epoch": 0.67, "grad_norm": 7.111054453437832, "learning_rate": 2.557201364458252e-06, "loss": 0.9125, "step": 5408 }, { "epoch": 0.67, "grad_norm": 6.677919038933841, "learning_rate": 2.555443974092062e-06, "loss": 1.0128, "step": 5409 }, { "epoch": 0.67, "grad_norm": 6.965082632000775, "learning_rate": 2.5536869804907984e-06, "loss": 0.8465, "step": 5410 }, { "epoch": 0.67, "grad_norm": 7.86748442815723, "learning_rate": 2.551930383939631e-06, "loss": 1.1616, "step": 5411 }, { "epoch": 0.67, "grad_norm": 8.818094115896365, "learning_rate": 2.5501741847236627e-06, "loss": 1.2419, "step": 5412 }, { "epoch": 0.67, "grad_norm": 5.780525381637531, "learning_rate": 2.5484183831279353e-06, "loss": 0.7534, "step": 5413 }, { "epoch": 0.67, "grad_norm": 7.4418764554853585, "learning_rate": 2.5466629794374254e-06, "loss": 0.9037, "step": 5414 }, { "epoch": 0.67, "grad_norm": 7.347442950040076, "learning_rate": 2.5449079739370454e-06, "loss": 0.9626, "step": 5415 }, { "epoch": 0.67, "grad_norm": 7.203223293287464, "learning_rate": 2.5431533669116426e-06, "loss": 0.9927, "step": 5416 }, { "epoch": 0.67, "grad_norm": 7.587522098126098, "learning_rate": 2.5413991586459973e-06, "loss": 1.3136, "step": 5417 }, { "epoch": 0.67, "grad_norm": 7.691302180045096, "learning_rate": 2.5396453494248292e-06, "loss": 1.122, "step": 5418 }, { "epoch": 0.67, "grad_norm": 6.187592186019819, "learning_rate": 2.5378919395327917e-06, "loss": 0.9037, "step": 5419 }, { "epoch": 0.67, "grad_norm": 7.7197199454786025, "learning_rate": 2.5361389292544697e-06, "loss": 1.0376, "step": 5420 }, { "epoch": 0.67, "grad_norm": 8.742826221345867, "learning_rate": 2.5343863188743883e-06, "loss": 1.2322, "step": 5421 }, { "epoch": 0.67, "grad_norm": 8.665974183453704, "learning_rate": 2.532634108677006e-06, "loss": 1.0425, "step": 5422 }, { "epoch": 0.67, "grad_norm": 8.418798257280208, "learning_rate": 2.5308822989467187e-06, "loss": 1.1311, "step": 5423 }, { "epoch": 0.67, "grad_norm": 6.5293341766372865, "learning_rate": 2.5291308899678476e-06, "loss": 1.0885, "step": 5424 }, { "epoch": 0.67, "grad_norm": 7.515576154824337, "learning_rate": 2.527379882024664e-06, "loss": 1.0249, "step": 5425 }, { "epoch": 0.67, "grad_norm": 10.700563532858808, "learning_rate": 2.5256292754013623e-06, "loss": 1.3625, "step": 5426 }, { "epoch": 0.68, "grad_norm": 6.011268830987423, "learning_rate": 2.5238790703820747e-06, "loss": 0.7707, "step": 5427 }, { "epoch": 0.68, "grad_norm": 6.810700684807323, "learning_rate": 2.5221292672508733e-06, "loss": 1.1167, "step": 5428 }, { "epoch": 0.68, "grad_norm": 7.573923530227719, "learning_rate": 2.5203798662917555e-06, "loss": 0.9314, "step": 5429 }, { "epoch": 0.68, "grad_norm": 7.177905066820054, "learning_rate": 2.5186308677886615e-06, "loss": 0.9745, "step": 5430 }, { "epoch": 0.68, "grad_norm": 7.282542764822348, "learning_rate": 2.5168822720254636e-06, "loss": 0.8384, "step": 5431 }, { "epoch": 0.68, "grad_norm": 7.167210056623535, "learning_rate": 2.5151340792859693e-06, "loss": 1.3217, "step": 5432 }, { "epoch": 0.68, "grad_norm": 6.766243500061389, "learning_rate": 2.513386289853915e-06, "loss": 0.841, "step": 5433 }, { "epoch": 0.68, "grad_norm": 6.807832228836329, "learning_rate": 2.5116389040129845e-06, "loss": 0.7385, "step": 5434 }, { "epoch": 0.68, "grad_norm": 6.618685502823863, "learning_rate": 2.509891922046782e-06, "loss": 0.7674, "step": 5435 }, { "epoch": 0.68, "grad_norm": 6.592653761426294, "learning_rate": 2.508145344238854e-06, "loss": 0.8473, "step": 5436 }, { "epoch": 0.68, "grad_norm": 6.916304283978867, "learning_rate": 2.5063991708726815e-06, "loss": 0.7795, "step": 5437 }, { "epoch": 0.68, "grad_norm": 5.820953674726026, "learning_rate": 2.504653402231675e-06, "loss": 0.7558, "step": 5438 }, { "epoch": 0.68, "grad_norm": 7.707558830278691, "learning_rate": 2.5029080385991834e-06, "loss": 0.8083, "step": 5439 }, { "epoch": 0.68, "grad_norm": 6.779198562408011, "learning_rate": 2.5011630802584897e-06, "loss": 0.8119, "step": 5440 }, { "epoch": 0.68, "grad_norm": 6.626957734800608, "learning_rate": 2.4994185274928124e-06, "loss": 0.9508, "step": 5441 }, { "epoch": 0.68, "grad_norm": 8.15692333547396, "learning_rate": 2.4976743805852947e-06, "loss": 1.2301, "step": 5442 }, { "epoch": 0.68, "grad_norm": 6.71145711036978, "learning_rate": 2.4959306398190304e-06, "loss": 0.7828, "step": 5443 }, { "epoch": 0.68, "grad_norm": 7.280625047405975, "learning_rate": 2.4941873054770322e-06, "loss": 0.9441, "step": 5444 }, { "epoch": 0.68, "grad_norm": 5.908666188456708, "learning_rate": 2.492444377842254e-06, "loss": 1.1135, "step": 5445 }, { "epoch": 0.68, "grad_norm": 7.123617360841644, "learning_rate": 2.4907018571975857e-06, "loss": 0.8592, "step": 5446 }, { "epoch": 0.68, "grad_norm": 6.737365722943362, "learning_rate": 2.4889597438258436e-06, "loss": 1.1413, "step": 5447 }, { "epoch": 0.68, "grad_norm": 6.2777772724885095, "learning_rate": 2.4872180380097837e-06, "loss": 0.6871, "step": 5448 }, { "epoch": 0.68, "grad_norm": 7.45581718190125, "learning_rate": 2.485476740032095e-06, "loss": 1.046, "step": 5449 }, { "epoch": 0.68, "grad_norm": 8.350957863943455, "learning_rate": 2.483735850175402e-06, "loss": 1.2274, "step": 5450 }, { "epoch": 0.68, "grad_norm": 6.687483829594496, "learning_rate": 2.4819953687222547e-06, "loss": 0.6005, "step": 5451 }, { "epoch": 0.68, "grad_norm": 8.139520949818932, "learning_rate": 2.4802552959551503e-06, "loss": 1.2768, "step": 5452 }, { "epoch": 0.68, "grad_norm": 8.954926948167614, "learning_rate": 2.4785156321565063e-06, "loss": 1.1359, "step": 5453 }, { "epoch": 0.68, "grad_norm": 6.446783389758241, "learning_rate": 2.4767763776086824e-06, "loss": 0.7361, "step": 5454 }, { "epoch": 0.68, "grad_norm": 8.658093494693455, "learning_rate": 2.4750375325939702e-06, "loss": 1.2429, "step": 5455 }, { "epoch": 0.68, "grad_norm": 7.192701825346585, "learning_rate": 2.4732990973945904e-06, "loss": 0.9674, "step": 5456 }, { "epoch": 0.68, "grad_norm": 7.2275995035943685, "learning_rate": 2.471561072292703e-06, "loss": 1.1357, "step": 5457 }, { "epoch": 0.68, "grad_norm": 7.709675666926339, "learning_rate": 2.4698234575703983e-06, "loss": 0.8634, "step": 5458 }, { "epoch": 0.68, "grad_norm": 8.24818924225939, "learning_rate": 2.4680862535097035e-06, "loss": 0.993, "step": 5459 }, { "epoch": 0.68, "grad_norm": 7.602641966329551, "learning_rate": 2.4663494603925712e-06, "loss": 0.8679, "step": 5460 }, { "epoch": 0.68, "grad_norm": 6.7852367656768084, "learning_rate": 2.4646130785008958e-06, "loss": 0.8934, "step": 5461 }, { "epoch": 0.68, "grad_norm": 8.778210563159888, "learning_rate": 2.4628771081165015e-06, "loss": 1.0654, "step": 5462 }, { "epoch": 0.68, "grad_norm": 6.531103280284467, "learning_rate": 2.4611415495211454e-06, "loss": 0.7234, "step": 5463 }, { "epoch": 0.68, "grad_norm": 7.519679857619453, "learning_rate": 2.4594064029965197e-06, "loss": 0.7243, "step": 5464 }, { "epoch": 0.68, "grad_norm": 7.393239097295917, "learning_rate": 2.457671668824246e-06, "loss": 0.8662, "step": 5465 }, { "epoch": 0.68, "grad_norm": 6.607924059070878, "learning_rate": 2.455937347285881e-06, "loss": 0.8595, "step": 5466 }, { "epoch": 0.68, "grad_norm": 7.330189326652301, "learning_rate": 2.4542034386629165e-06, "loss": 0.8165, "step": 5467 }, { "epoch": 0.68, "grad_norm": 7.345449885602254, "learning_rate": 2.4524699432367764e-06, "loss": 0.9335, "step": 5468 }, { "epoch": 0.68, "grad_norm": 5.695123900792349, "learning_rate": 2.450736861288813e-06, "loss": 0.569, "step": 5469 }, { "epoch": 0.68, "grad_norm": 7.628969869932227, "learning_rate": 2.449004193100317e-06, "loss": 0.954, "step": 5470 }, { "epoch": 0.68, "grad_norm": 7.306335591192682, "learning_rate": 2.44727193895251e-06, "loss": 0.7106, "step": 5471 }, { "epoch": 0.68, "grad_norm": 6.9567388057299295, "learning_rate": 2.4455400991265467e-06, "loss": 0.9219, "step": 5472 }, { "epoch": 0.68, "grad_norm": 7.8582745675488175, "learning_rate": 2.4438086739035155e-06, "loss": 0.9991, "step": 5473 }, { "epoch": 0.68, "grad_norm": 8.011744962505283, "learning_rate": 2.442077663564433e-06, "loss": 1.183, "step": 5474 }, { "epoch": 0.68, "grad_norm": 9.54084274499221, "learning_rate": 2.4403470683902524e-06, "loss": 1.3627, "step": 5475 }, { "epoch": 0.68, "grad_norm": 7.478864986696547, "learning_rate": 2.4386168886618628e-06, "loss": 0.8884, "step": 5476 }, { "epoch": 0.68, "grad_norm": 7.774640416822696, "learning_rate": 2.436887124660077e-06, "loss": 0.8774, "step": 5477 }, { "epoch": 0.68, "grad_norm": 7.885548318565556, "learning_rate": 2.4351577766656465e-06, "loss": 1.1063, "step": 5478 }, { "epoch": 0.68, "grad_norm": 7.6940961553555445, "learning_rate": 2.4334288449592554e-06, "loss": 0.9675, "step": 5479 }, { "epoch": 0.68, "grad_norm": 6.4184456961157155, "learning_rate": 2.4317003298215175e-06, "loss": 0.7671, "step": 5480 }, { "epoch": 0.68, "grad_norm": 5.988297799925814, "learning_rate": 2.429972231532981e-06, "loss": 0.8676, "step": 5481 }, { "epoch": 0.68, "grad_norm": 8.76863202968807, "learning_rate": 2.428244550374128e-06, "loss": 1.0796, "step": 5482 }, { "epoch": 0.68, "grad_norm": 7.398699134294452, "learning_rate": 2.4265172866253668e-06, "loss": 1.1942, "step": 5483 }, { "epoch": 0.68, "grad_norm": 8.546608587274878, "learning_rate": 2.424790440567043e-06, "loss": 1.2651, "step": 5484 }, { "epoch": 0.68, "grad_norm": 6.743506969297592, "learning_rate": 2.4230640124794364e-06, "loss": 0.7604, "step": 5485 }, { "epoch": 0.68, "grad_norm": 5.1313703780969355, "learning_rate": 2.42133800264275e-06, "loss": 0.6838, "step": 5486 }, { "epoch": 0.68, "grad_norm": 7.953938290897593, "learning_rate": 2.4196124113371288e-06, "loss": 1.2943, "step": 5487 }, { "epoch": 0.68, "grad_norm": 7.156508217755856, "learning_rate": 2.417887238842645e-06, "loss": 0.9661, "step": 5488 }, { "epoch": 0.68, "grad_norm": 6.669754123144207, "learning_rate": 2.4161624854393043e-06, "loss": 0.773, "step": 5489 }, { "epoch": 0.68, "grad_norm": 7.710071000867334, "learning_rate": 2.41443815140704e-06, "loss": 1.2882, "step": 5490 }, { "epoch": 0.68, "grad_norm": 6.7032492734634905, "learning_rate": 2.4127142370257274e-06, "loss": 0.7736, "step": 5491 }, { "epoch": 0.68, "grad_norm": 7.107260934583822, "learning_rate": 2.4109907425751616e-06, "loss": 0.897, "step": 5492 }, { "epoch": 0.68, "grad_norm": 7.276440998755082, "learning_rate": 2.409267668335078e-06, "loss": 1.0739, "step": 5493 }, { "epoch": 0.68, "grad_norm": 8.18955057427779, "learning_rate": 2.407545014585142e-06, "loss": 1.0147, "step": 5494 }, { "epoch": 0.68, "grad_norm": 5.873639457522321, "learning_rate": 2.405822781604946e-06, "loss": 0.6804, "step": 5495 }, { "epoch": 0.68, "grad_norm": 7.092554758853365, "learning_rate": 2.404100969674021e-06, "loss": 0.8589, "step": 5496 }, { "epoch": 0.68, "grad_norm": 8.484659375824558, "learning_rate": 2.4023795790718252e-06, "loss": 0.9934, "step": 5497 }, { "epoch": 0.68, "grad_norm": 6.482952825350839, "learning_rate": 2.4006586100777523e-06, "loss": 0.6802, "step": 5498 }, { "epoch": 0.68, "grad_norm": 6.598386814973918, "learning_rate": 2.3989380629711197e-06, "loss": 1.1626, "step": 5499 }, { "epoch": 0.68, "grad_norm": 8.547484226906084, "learning_rate": 2.3972179380311888e-06, "loss": 1.2701, "step": 5500 }, { "epoch": 0.68, "grad_norm": 6.7049101810684375, "learning_rate": 2.3954982355371394e-06, "loss": 0.7716, "step": 5501 }, { "epoch": 0.68, "grad_norm": 8.192523904070647, "learning_rate": 2.3937789557680914e-06, "loss": 1.3561, "step": 5502 }, { "epoch": 0.68, "grad_norm": 6.930523180479399, "learning_rate": 2.3920600990030944e-06, "loss": 1.1827, "step": 5503 }, { "epoch": 0.68, "grad_norm": 8.39796344862505, "learning_rate": 2.3903416655211252e-06, "loss": 1.0582, "step": 5504 }, { "epoch": 0.68, "grad_norm": 7.236050336023892, "learning_rate": 2.3886236556010968e-06, "loss": 0.9958, "step": 5505 }, { "epoch": 0.68, "grad_norm": 8.975362921467577, "learning_rate": 2.3869060695218513e-06, "loss": 1.2611, "step": 5506 }, { "epoch": 0.68, "grad_norm": 7.3474728843916095, "learning_rate": 2.385188907562165e-06, "loss": 1.0728, "step": 5507 }, { "epoch": 0.69, "grad_norm": 8.104511833611593, "learning_rate": 2.3834721700007363e-06, "loss": 0.8971, "step": 5508 }, { "epoch": 0.69, "grad_norm": 8.606507149832565, "learning_rate": 2.381755857116209e-06, "loss": 1.1043, "step": 5509 }, { "epoch": 0.69, "grad_norm": 7.600416597095433, "learning_rate": 2.3800399691871453e-06, "loss": 1.2222, "step": 5510 }, { "epoch": 0.69, "grad_norm": 8.594544385369005, "learning_rate": 2.378324506492044e-06, "loss": 1.0309, "step": 5511 }, { "epoch": 0.69, "grad_norm": 7.093108654263093, "learning_rate": 2.3766094693093366e-06, "loss": 0.9618, "step": 5512 }, { "epoch": 0.69, "grad_norm": 8.159145937786839, "learning_rate": 2.3748948579173792e-06, "loss": 1.0874, "step": 5513 }, { "epoch": 0.69, "grad_norm": 7.201277752798462, "learning_rate": 2.3731806725944644e-06, "loss": 1.1492, "step": 5514 }, { "epoch": 0.69, "grad_norm": 7.829527234190366, "learning_rate": 2.371466913618814e-06, "loss": 1.1247, "step": 5515 }, { "epoch": 0.69, "grad_norm": 7.949943267554963, "learning_rate": 2.369753581268583e-06, "loss": 1.3154, "step": 5516 }, { "epoch": 0.69, "grad_norm": 7.23329240107259, "learning_rate": 2.36804067582185e-06, "loss": 1.1137, "step": 5517 }, { "epoch": 0.69, "grad_norm": 8.832184330528545, "learning_rate": 2.3663281975566317e-06, "loss": 0.9803, "step": 5518 }, { "epoch": 0.69, "grad_norm": 5.7958265141346805, "learning_rate": 2.364616146750872e-06, "loss": 0.5407, "step": 5519 }, { "epoch": 0.69, "grad_norm": 5.52021758573661, "learning_rate": 2.362904523682447e-06, "loss": 0.6326, "step": 5520 }, { "epoch": 0.69, "grad_norm": 6.584514540998481, "learning_rate": 2.3611933286291637e-06, "loss": 0.8619, "step": 5521 }, { "epoch": 0.69, "grad_norm": 8.724643417578763, "learning_rate": 2.3594825618687557e-06, "loss": 0.9704, "step": 5522 }, { "epoch": 0.69, "grad_norm": 7.825195828811543, "learning_rate": 2.357772223678891e-06, "loss": 0.981, "step": 5523 }, { "epoch": 0.69, "grad_norm": 7.055604883282922, "learning_rate": 2.356062314337166e-06, "loss": 1.0586, "step": 5524 }, { "epoch": 0.69, "grad_norm": 7.679638902807885, "learning_rate": 2.3543528341211127e-06, "loss": 1.2363, "step": 5525 }, { "epoch": 0.69, "grad_norm": 7.416426741833359, "learning_rate": 2.3526437833081834e-06, "loss": 1.5227, "step": 5526 }, { "epoch": 0.69, "grad_norm": 6.476517900574041, "learning_rate": 2.350935162175769e-06, "loss": 0.7337, "step": 5527 }, { "epoch": 0.69, "grad_norm": 6.611099171898404, "learning_rate": 2.3492269710011884e-06, "loss": 0.6668, "step": 5528 }, { "epoch": 0.69, "grad_norm": 6.696780708387588, "learning_rate": 2.3475192100616902e-06, "loss": 0.7295, "step": 5529 }, { "epoch": 0.69, "grad_norm": 9.97513700586033, "learning_rate": 2.3458118796344553e-06, "loss": 1.3699, "step": 5530 }, { "epoch": 0.69, "grad_norm": 7.014458157152479, "learning_rate": 2.3441049799965886e-06, "loss": 0.9002, "step": 5531 }, { "epoch": 0.69, "grad_norm": 6.028303891775879, "learning_rate": 2.3423985114251326e-06, "loss": 0.747, "step": 5532 }, { "epoch": 0.69, "grad_norm": 7.650291343375791, "learning_rate": 2.3406924741970566e-06, "loss": 1.1041, "step": 5533 }, { "epoch": 0.69, "grad_norm": 6.167073741005326, "learning_rate": 2.3389868685892573e-06, "loss": 0.8741, "step": 5534 }, { "epoch": 0.69, "grad_norm": 6.4021110505537004, "learning_rate": 2.3372816948785654e-06, "loss": 0.9639, "step": 5535 }, { "epoch": 0.69, "grad_norm": 6.847916869647533, "learning_rate": 2.3355769533417404e-06, "loss": 0.9867, "step": 5536 }, { "epoch": 0.69, "grad_norm": 7.120154448052862, "learning_rate": 2.333872644255471e-06, "loss": 1.1199, "step": 5537 }, { "epoch": 0.69, "grad_norm": 5.375241868211922, "learning_rate": 2.3321687678963758e-06, "loss": 0.6352, "step": 5538 }, { "epoch": 0.69, "grad_norm": 7.75314847511314, "learning_rate": 2.3304653245410054e-06, "loss": 1.4113, "step": 5539 }, { "epoch": 0.69, "grad_norm": 8.643017264180822, "learning_rate": 2.3287623144658337e-06, "loss": 1.0899, "step": 5540 }, { "epoch": 0.69, "grad_norm": 7.295513323158918, "learning_rate": 2.3270597379472713e-06, "loss": 1.1496, "step": 5541 }, { "epoch": 0.69, "grad_norm": 9.170841645363247, "learning_rate": 2.3253575952616576e-06, "loss": 1.0698, "step": 5542 }, { "epoch": 0.69, "grad_norm": 10.004305428727148, "learning_rate": 2.3236558866852553e-06, "loss": 0.8478, "step": 5543 }, { "epoch": 0.69, "grad_norm": 6.404950733978257, "learning_rate": 2.3219546124942637e-06, "loss": 0.9455, "step": 5544 }, { "epoch": 0.69, "grad_norm": 8.445746957868314, "learning_rate": 2.3202537729648087e-06, "loss": 1.0534, "step": 5545 }, { "epoch": 0.69, "grad_norm": 6.754966866570655, "learning_rate": 2.318553368372948e-06, "loss": 0.9687, "step": 5546 }, { "epoch": 0.69, "grad_norm": 7.468777128669065, "learning_rate": 2.3168533989946606e-06, "loss": 1.0383, "step": 5547 }, { "epoch": 0.69, "grad_norm": 6.916537935767413, "learning_rate": 2.3151538651058687e-06, "loss": 0.715, "step": 5548 }, { "epoch": 0.69, "grad_norm": 8.69617571108172, "learning_rate": 2.3134547669824104e-06, "loss": 0.8417, "step": 5549 }, { "epoch": 0.69, "grad_norm": 7.704135559768505, "learning_rate": 2.3117561049000602e-06, "loss": 1.3352, "step": 5550 }, { "epoch": 0.69, "grad_norm": 7.158445308736757, "learning_rate": 2.310057879134523e-06, "loss": 0.8701, "step": 5551 }, { "epoch": 0.69, "grad_norm": 7.350127482420008, "learning_rate": 2.3083600899614258e-06, "loss": 0.8821, "step": 5552 }, { "epoch": 0.69, "grad_norm": 7.146605901388358, "learning_rate": 2.3066627376563312e-06, "loss": 0.9291, "step": 5553 }, { "epoch": 0.69, "grad_norm": 6.955412348726583, "learning_rate": 2.3049658224947285e-06, "loss": 1.1863, "step": 5554 }, { "epoch": 0.69, "grad_norm": 6.746942484005451, "learning_rate": 2.303269344752039e-06, "loss": 0.7833, "step": 5555 }, { "epoch": 0.69, "grad_norm": 6.6022636666898125, "learning_rate": 2.301573304703604e-06, "loss": 0.9358, "step": 5556 }, { "epoch": 0.69, "grad_norm": 8.538610233061139, "learning_rate": 2.2998777026247084e-06, "loss": 1.0467, "step": 5557 }, { "epoch": 0.69, "grad_norm": 6.577443601113839, "learning_rate": 2.298182538790551e-06, "loss": 0.8661, "step": 5558 }, { "epoch": 0.69, "grad_norm": 8.286375120794329, "learning_rate": 2.29648781347627e-06, "loss": 1.0991, "step": 5559 }, { "epoch": 0.69, "grad_norm": 9.045455439308974, "learning_rate": 2.2947935269569283e-06, "loss": 0.9372, "step": 5560 }, { "epoch": 0.69, "grad_norm": 8.051479066424358, "learning_rate": 2.2930996795075154e-06, "loss": 1.1141, "step": 5561 }, { "epoch": 0.69, "grad_norm": 7.383018480752964, "learning_rate": 2.2914062714029545e-06, "loss": 0.7905, "step": 5562 }, { "epoch": 0.69, "grad_norm": 7.521262536530376, "learning_rate": 2.2897133029180944e-06, "loss": 0.9697, "step": 5563 }, { "epoch": 0.69, "grad_norm": 8.737568019611368, "learning_rate": 2.288020774327715e-06, "loss": 0.7935, "step": 5564 }, { "epoch": 0.69, "grad_norm": 7.799129897061949, "learning_rate": 2.2863286859065177e-06, "loss": 0.9124, "step": 5565 }, { "epoch": 0.69, "grad_norm": 7.077451695798893, "learning_rate": 2.2846370379291444e-06, "loss": 0.9366, "step": 5566 }, { "epoch": 0.69, "grad_norm": 7.34425972492835, "learning_rate": 2.2829458306701545e-06, "loss": 0.8415, "step": 5567 }, { "epoch": 0.69, "grad_norm": 7.311072758255354, "learning_rate": 2.2812550644040416e-06, "loss": 0.9988, "step": 5568 }, { "epoch": 0.69, "grad_norm": 8.109769240603491, "learning_rate": 2.2795647394052284e-06, "loss": 0.9223, "step": 5569 }, { "epoch": 0.69, "grad_norm": 8.995344528520361, "learning_rate": 2.27787485594806e-06, "loss": 1.0763, "step": 5570 }, { "epoch": 0.69, "grad_norm": 7.541115938044446, "learning_rate": 2.276185414306816e-06, "loss": 1.0036, "step": 5571 }, { "epoch": 0.69, "grad_norm": 8.352000560011525, "learning_rate": 2.274496414755702e-06, "loss": 1.0127, "step": 5572 }, { "epoch": 0.69, "grad_norm": 7.1918810435239156, "learning_rate": 2.2728078575688534e-06, "loss": 1.0178, "step": 5573 }, { "epoch": 0.69, "grad_norm": 6.207761295490915, "learning_rate": 2.2711197430203287e-06, "loss": 0.9163, "step": 5574 }, { "epoch": 0.69, "grad_norm": 7.625502743284341, "learning_rate": 2.2694320713841208e-06, "loss": 1.0169, "step": 5575 }, { "epoch": 0.69, "grad_norm": 6.928190099252302, "learning_rate": 2.267744842934147e-06, "loss": 0.939, "step": 5576 }, { "epoch": 0.69, "grad_norm": 12.472774672585384, "learning_rate": 2.266058057944255e-06, "loss": 1.0592, "step": 5577 }, { "epoch": 0.69, "grad_norm": 8.216365348415879, "learning_rate": 2.26437171668822e-06, "loss": 1.0002, "step": 5578 }, { "epoch": 0.69, "grad_norm": 7.452949881748489, "learning_rate": 2.2626858194397417e-06, "loss": 0.909, "step": 5579 }, { "epoch": 0.69, "grad_norm": 7.322476468510491, "learning_rate": 2.2610003664724516e-06, "loss": 1.0612, "step": 5580 }, { "epoch": 0.69, "grad_norm": 7.469815621355512, "learning_rate": 2.2593153580599087e-06, "loss": 1.2429, "step": 5581 }, { "epoch": 0.69, "grad_norm": 7.4708210592449245, "learning_rate": 2.257630794475601e-06, "loss": 0.9866, "step": 5582 }, { "epoch": 0.69, "grad_norm": 7.739031173346153, "learning_rate": 2.255946675992938e-06, "loss": 1.2215, "step": 5583 }, { "epoch": 0.69, "grad_norm": 7.766188596056296, "learning_rate": 2.254263002885264e-06, "loss": 0.9966, "step": 5584 }, { "epoch": 0.69, "grad_norm": 7.4519005435022185, "learning_rate": 2.252579775425848e-06, "loss": 1.0712, "step": 5585 }, { "epoch": 0.69, "grad_norm": 7.789450615591923, "learning_rate": 2.2508969938878874e-06, "loss": 0.839, "step": 5586 }, { "epoch": 0.69, "grad_norm": 8.973048626678443, "learning_rate": 2.2492146585445084e-06, "loss": 1.0536, "step": 5587 }, { "epoch": 0.7, "grad_norm": 9.404294698538303, "learning_rate": 2.2475327696687603e-06, "loss": 1.1149, "step": 5588 }, { "epoch": 0.7, "grad_norm": 7.569897986724842, "learning_rate": 2.245851327533624e-06, "loss": 0.8652, "step": 5589 }, { "epoch": 0.7, "grad_norm": 7.089694805932787, "learning_rate": 2.2441703324120095e-06, "loss": 1.0311, "step": 5590 }, { "epoch": 0.7, "grad_norm": 7.295507030618713, "learning_rate": 2.2424897845767468e-06, "loss": 0.8182, "step": 5591 }, { "epoch": 0.7, "grad_norm": 9.169581457498422, "learning_rate": 2.240809684300601e-06, "loss": 1.0368, "step": 5592 }, { "epoch": 0.7, "grad_norm": 7.728414943984491, "learning_rate": 2.239130031856261e-06, "loss": 1.1831, "step": 5593 }, { "epoch": 0.7, "grad_norm": 7.218689543680638, "learning_rate": 2.237450827516344e-06, "loss": 1.0907, "step": 5594 }, { "epoch": 0.7, "grad_norm": 7.4667117158686915, "learning_rate": 2.2357720715533936e-06, "loss": 0.7462, "step": 5595 }, { "epoch": 0.7, "grad_norm": 6.544231725587409, "learning_rate": 2.2340937642398837e-06, "loss": 0.594, "step": 5596 }, { "epoch": 0.7, "grad_norm": 7.386810804172014, "learning_rate": 2.2324159058482086e-06, "loss": 0.9046, "step": 5597 }, { "epoch": 0.7, "grad_norm": 8.318926774603174, "learning_rate": 2.230738496650696e-06, "loss": 1.478, "step": 5598 }, { "epoch": 0.7, "grad_norm": 8.880722718039927, "learning_rate": 2.2290615369196006e-06, "loss": 1.427, "step": 5599 }, { "epoch": 0.7, "grad_norm": 6.1339515854302435, "learning_rate": 2.2273850269270985e-06, "loss": 0.9711, "step": 5600 }, { "epoch": 0.7, "grad_norm": 7.799455009526418, "learning_rate": 2.225708966945298e-06, "loss": 1.1209, "step": 5601 }, { "epoch": 0.7, "grad_norm": 7.028741220017212, "learning_rate": 2.2240333572462337e-06, "loss": 0.9513, "step": 5602 }, { "epoch": 0.7, "grad_norm": 7.420101923399622, "learning_rate": 2.2223581981018653e-06, "loss": 0.7355, "step": 5603 }, { "epoch": 0.7, "grad_norm": 7.520928036431411, "learning_rate": 2.2206834897840814e-06, "loss": 1.1287, "step": 5604 }, { "epoch": 0.7, "grad_norm": 7.419989681058859, "learning_rate": 2.2190092325646973e-06, "loss": 1.2326, "step": 5605 }, { "epoch": 0.7, "grad_norm": 7.83513690698936, "learning_rate": 2.2173354267154514e-06, "loss": 1.1507, "step": 5606 }, { "epoch": 0.7, "grad_norm": 6.34096410777285, "learning_rate": 2.2156620725080125e-06, "loss": 0.7976, "step": 5607 }, { "epoch": 0.7, "grad_norm": 8.380398232927384, "learning_rate": 2.2139891702139772e-06, "loss": 0.9283, "step": 5608 }, { "epoch": 0.7, "grad_norm": 6.732678554555582, "learning_rate": 2.212316720104864e-06, "loss": 0.8136, "step": 5609 }, { "epoch": 0.7, "grad_norm": 7.405533930045669, "learning_rate": 2.2106447224521215e-06, "loss": 0.9777, "step": 5610 }, { "epoch": 0.7, "grad_norm": 8.752846975975492, "learning_rate": 2.208973177527125e-06, "loss": 1.3338, "step": 5611 }, { "epoch": 0.7, "grad_norm": 8.91123648803862, "learning_rate": 2.2073020856011773e-06, "loss": 0.8719, "step": 5612 }, { "epoch": 0.7, "grad_norm": 6.476196422899501, "learning_rate": 2.2056314469454993e-06, "loss": 0.6753, "step": 5613 }, { "epoch": 0.7, "grad_norm": 6.920078860885444, "learning_rate": 2.2039612618312535e-06, "loss": 1.3196, "step": 5614 }, { "epoch": 0.7, "grad_norm": 7.079634499994337, "learning_rate": 2.2022915305295134e-06, "loss": 0.7021, "step": 5615 }, { "epoch": 0.7, "grad_norm": 6.1509725090387475, "learning_rate": 2.2006222533112887e-06, "loss": 0.8537, "step": 5616 }, { "epoch": 0.7, "grad_norm": 6.903362314359044, "learning_rate": 2.198953430447513e-06, "loss": 1.1743, "step": 5617 }, { "epoch": 0.7, "grad_norm": 7.456155659731786, "learning_rate": 2.1972850622090426e-06, "loss": 0.9912, "step": 5618 }, { "epoch": 0.7, "grad_norm": 6.212528417283683, "learning_rate": 2.1956171488666644e-06, "loss": 0.8512, "step": 5619 }, { "epoch": 0.7, "grad_norm": 6.747756273560983, "learning_rate": 2.1939496906910894e-06, "loss": 0.8113, "step": 5620 }, { "epoch": 0.7, "grad_norm": 6.481271151018585, "learning_rate": 2.1922826879529584e-06, "loss": 0.6088, "step": 5621 }, { "epoch": 0.7, "grad_norm": 8.768430374641698, "learning_rate": 2.1906161409228288e-06, "loss": 0.8562, "step": 5622 }, { "epoch": 0.7, "grad_norm": 5.882106656839223, "learning_rate": 2.1889500498711976e-06, "loss": 0.802, "step": 5623 }, { "epoch": 0.7, "grad_norm": 7.367343716541569, "learning_rate": 2.1872844150684747e-06, "loss": 0.5489, "step": 5624 }, { "epoch": 0.7, "grad_norm": 6.5816877904700135, "learning_rate": 2.185619236785005e-06, "loss": 0.8178, "step": 5625 }, { "epoch": 0.7, "grad_norm": 7.743740865974344, "learning_rate": 2.1839545152910562e-06, "loss": 0.8745, "step": 5626 }, { "epoch": 0.7, "grad_norm": 7.4045773610674495, "learning_rate": 2.18229025085682e-06, "loss": 0.8295, "step": 5627 }, { "epoch": 0.7, "grad_norm": 13.003677574059356, "learning_rate": 2.1806264437524162e-06, "loss": 1.1558, "step": 5628 }, { "epoch": 0.7, "grad_norm": 7.731007010570596, "learning_rate": 2.17896309424789e-06, "loss": 0.9005, "step": 5629 }, { "epoch": 0.7, "grad_norm": 8.65045789409166, "learning_rate": 2.177300202613215e-06, "loss": 1.1773, "step": 5630 }, { "epoch": 0.7, "grad_norm": 8.368509865013303, "learning_rate": 2.1756377691182827e-06, "loss": 1.1366, "step": 5631 }, { "epoch": 0.7, "grad_norm": 6.086930012975313, "learning_rate": 2.1739757940329177e-06, "loss": 0.7532, "step": 5632 }, { "epoch": 0.7, "grad_norm": 6.657872258428628, "learning_rate": 2.172314277626868e-06, "loss": 0.8824, "step": 5633 }, { "epoch": 0.7, "grad_norm": 8.067231013447033, "learning_rate": 2.170653220169806e-06, "loss": 0.9166, "step": 5634 }, { "epoch": 0.7, "grad_norm": 7.069920465652297, "learning_rate": 2.1689926219313336e-06, "loss": 1.0193, "step": 5635 }, { "epoch": 0.7, "grad_norm": 6.157760859157222, "learning_rate": 2.16733248318097e-06, "loss": 0.7834, "step": 5636 }, { "epoch": 0.7, "grad_norm": 8.180355851543341, "learning_rate": 2.1656728041881674e-06, "loss": 1.0621, "step": 5637 }, { "epoch": 0.7, "grad_norm": 8.86878087200589, "learning_rate": 2.1640135852223017e-06, "loss": 1.4167, "step": 5638 }, { "epoch": 0.7, "grad_norm": 8.517756284927172, "learning_rate": 2.1623548265526734e-06, "loss": 1.021, "step": 5639 }, { "epoch": 0.7, "grad_norm": 8.48444088728381, "learning_rate": 2.1606965284485052e-06, "loss": 0.9932, "step": 5640 }, { "epoch": 0.7, "grad_norm": 7.200232776586898, "learning_rate": 2.159038691178951e-06, "loss": 0.7163, "step": 5641 }, { "epoch": 0.7, "grad_norm": 7.400741537536331, "learning_rate": 2.157381315013085e-06, "loss": 1.2344, "step": 5642 }, { "epoch": 0.7, "grad_norm": 6.29017240954335, "learning_rate": 2.15572440021991e-06, "loss": 0.6089, "step": 5643 }, { "epoch": 0.7, "grad_norm": 7.768900194524409, "learning_rate": 2.1540679470683536e-06, "loss": 0.9585, "step": 5644 }, { "epoch": 0.7, "grad_norm": 7.6655927153787875, "learning_rate": 2.1524119558272634e-06, "loss": 1.3609, "step": 5645 }, { "epoch": 0.7, "grad_norm": 7.573523006860468, "learning_rate": 2.1507564267654187e-06, "loss": 0.9308, "step": 5646 }, { "epoch": 0.7, "grad_norm": 8.181151952718801, "learning_rate": 2.1491013601515217e-06, "loss": 1.1275, "step": 5647 }, { "epoch": 0.7, "grad_norm": 6.424612358237312, "learning_rate": 2.1474467562541956e-06, "loss": 1.0109, "step": 5648 }, { "epoch": 0.7, "grad_norm": 7.554035151507947, "learning_rate": 2.145792615341994e-06, "loss": 0.9482, "step": 5649 }, { "epoch": 0.7, "grad_norm": 7.639104585343546, "learning_rate": 2.144138937683392e-06, "loss": 1.1452, "step": 5650 }, { "epoch": 0.7, "grad_norm": 7.051044947898372, "learning_rate": 2.142485723546792e-06, "loss": 0.9794, "step": 5651 }, { "epoch": 0.7, "grad_norm": 7.836944525802468, "learning_rate": 2.140832973200519e-06, "loss": 0.9392, "step": 5652 }, { "epoch": 0.7, "grad_norm": 8.038277501816692, "learning_rate": 2.139180686912825e-06, "loss": 0.9685, "step": 5653 }, { "epoch": 0.7, "grad_norm": 7.184573988615559, "learning_rate": 2.137528864951882e-06, "loss": 0.9637, "step": 5654 }, { "epoch": 0.7, "grad_norm": 8.41547653674082, "learning_rate": 2.1358775075857923e-06, "loss": 1.1813, "step": 5655 }, { "epoch": 0.7, "grad_norm": 7.362638008320967, "learning_rate": 2.134226615082581e-06, "loss": 1.128, "step": 5656 }, { "epoch": 0.7, "grad_norm": 8.47602763955718, "learning_rate": 2.1325761877101938e-06, "loss": 1.3775, "step": 5657 }, { "epoch": 0.7, "grad_norm": 7.8138213380328, "learning_rate": 2.130926225736506e-06, "loss": 1.0226, "step": 5658 }, { "epoch": 0.7, "grad_norm": 8.134668840260721, "learning_rate": 2.1292767294293155e-06, "loss": 1.2093, "step": 5659 }, { "epoch": 0.7, "grad_norm": 6.612476939788806, "learning_rate": 2.127627699056345e-06, "loss": 1.1514, "step": 5660 }, { "epoch": 0.7, "grad_norm": 7.587730308473502, "learning_rate": 2.1259791348852405e-06, "loss": 1.0006, "step": 5661 }, { "epoch": 0.7, "grad_norm": 8.829925369317756, "learning_rate": 2.1243310371835753e-06, "loss": 1.2021, "step": 5662 }, { "epoch": 0.7, "grad_norm": 6.44919817378396, "learning_rate": 2.1226834062188413e-06, "loss": 0.8141, "step": 5663 }, { "epoch": 0.7, "grad_norm": 8.79901785902825, "learning_rate": 2.1210362422584595e-06, "loss": 1.1667, "step": 5664 }, { "epoch": 0.7, "grad_norm": 8.353966007279153, "learning_rate": 2.119389545569776e-06, "loss": 1.0221, "step": 5665 }, { "epoch": 0.7, "grad_norm": 7.22507716072179, "learning_rate": 2.117743316420055e-06, "loss": 1.2152, "step": 5666 }, { "epoch": 0.7, "grad_norm": 6.445696820685294, "learning_rate": 2.11609755507649e-06, "loss": 0.6714, "step": 5667 }, { "epoch": 0.7, "grad_norm": 7.508158244970352, "learning_rate": 2.1144522618061973e-06, "loss": 1.0264, "step": 5668 }, { "epoch": 0.71, "grad_norm": 7.043781727314199, "learning_rate": 2.112807436876219e-06, "loss": 0.9212, "step": 5669 }, { "epoch": 0.71, "grad_norm": 7.198473966682302, "learning_rate": 2.1111630805535137e-06, "loss": 0.8524, "step": 5670 }, { "epoch": 0.71, "grad_norm": 8.328025734788989, "learning_rate": 2.1095191931049774e-06, "loss": 1.2141, "step": 5671 }, { "epoch": 0.71, "grad_norm": 8.087893631909768, "learning_rate": 2.1078757747974156e-06, "loss": 1.2301, "step": 5672 }, { "epoch": 0.71, "grad_norm": 8.89200099115882, "learning_rate": 2.1062328258975663e-06, "loss": 1.1518, "step": 5673 }, { "epoch": 0.71, "grad_norm": 7.358979331789453, "learning_rate": 2.1045903466720915e-06, "loss": 0.8818, "step": 5674 }, { "epoch": 0.71, "grad_norm": 8.356756833505381, "learning_rate": 2.1029483373875702e-06, "loss": 0.9762, "step": 5675 }, { "epoch": 0.71, "grad_norm": 7.786573855937218, "learning_rate": 2.1013067983105125e-06, "loss": 0.9982, "step": 5676 }, { "epoch": 0.71, "grad_norm": 7.206006818112111, "learning_rate": 2.099665729707348e-06, "loss": 0.793, "step": 5677 }, { "epoch": 0.71, "grad_norm": 7.9349424577903, "learning_rate": 2.098025131844434e-06, "loss": 1.2035, "step": 5678 }, { "epoch": 0.71, "grad_norm": 6.309418936174547, "learning_rate": 2.0963850049880425e-06, "loss": 0.8278, "step": 5679 }, { "epoch": 0.71, "grad_norm": 8.375457250238739, "learning_rate": 2.094745349404383e-06, "loss": 1.0346, "step": 5680 }, { "epoch": 0.71, "grad_norm": 6.797200258479205, "learning_rate": 2.093106165359574e-06, "loss": 1.0083, "step": 5681 }, { "epoch": 0.71, "grad_norm": 7.895890613413377, "learning_rate": 2.0914674531196673e-06, "loss": 1.5469, "step": 5682 }, { "epoch": 0.71, "grad_norm": 7.6415585903467385, "learning_rate": 2.089829212950636e-06, "loss": 0.8717, "step": 5683 }, { "epoch": 0.71, "grad_norm": 6.282869532386937, "learning_rate": 2.088191445118372e-06, "loss": 0.8, "step": 5684 }, { "epoch": 0.71, "grad_norm": 9.33851798962582, "learning_rate": 2.0865541498886965e-06, "loss": 0.9711, "step": 5685 }, { "epoch": 0.71, "grad_norm": 8.544168866272331, "learning_rate": 2.0849173275273504e-06, "loss": 0.7877, "step": 5686 }, { "epoch": 0.71, "grad_norm": 7.660770648223702, "learning_rate": 2.083280978300001e-06, "loss": 1.2236, "step": 5687 }, { "epoch": 0.71, "grad_norm": 7.051518442929429, "learning_rate": 2.0816451024722344e-06, "loss": 0.7266, "step": 5688 }, { "epoch": 0.71, "grad_norm": 4.945317111381169, "learning_rate": 2.080009700309563e-06, "loss": 0.7772, "step": 5689 }, { "epoch": 0.71, "grad_norm": 8.23298438410209, "learning_rate": 2.078374772077422e-06, "loss": 1.1561, "step": 5690 }, { "epoch": 0.71, "grad_norm": 7.79792825062254, "learning_rate": 2.0767403180411686e-06, "loss": 0.8601, "step": 5691 }, { "epoch": 0.71, "grad_norm": 7.557566033725328, "learning_rate": 2.075106338466087e-06, "loss": 0.9189, "step": 5692 }, { "epoch": 0.71, "grad_norm": 7.15936648245612, "learning_rate": 2.0734728336173754e-06, "loss": 0.8864, "step": 5693 }, { "epoch": 0.71, "grad_norm": 8.751047853633395, "learning_rate": 2.0718398037601645e-06, "loss": 0.7984, "step": 5694 }, { "epoch": 0.71, "grad_norm": 7.24967553066696, "learning_rate": 2.0702072491595023e-06, "loss": 1.1356, "step": 5695 }, { "epoch": 0.71, "grad_norm": 6.616911094188815, "learning_rate": 2.0685751700803647e-06, "loss": 0.7416, "step": 5696 }, { "epoch": 0.71, "grad_norm": 9.09862700700453, "learning_rate": 2.0669435667876427e-06, "loss": 0.9267, "step": 5697 }, { "epoch": 0.71, "grad_norm": 8.334036889419455, "learning_rate": 2.065312439546156e-06, "loss": 1.345, "step": 5698 }, { "epoch": 0.71, "grad_norm": 7.8120725933695265, "learning_rate": 2.0636817886206463e-06, "loss": 0.953, "step": 5699 }, { "epoch": 0.71, "grad_norm": 8.113427958533881, "learning_rate": 2.0620516142757767e-06, "loss": 1.3327, "step": 5700 }, { "epoch": 0.71, "grad_norm": 8.705697644841552, "learning_rate": 2.060421916776136e-06, "loss": 1.1696, "step": 5701 }, { "epoch": 0.71, "grad_norm": 7.269193374217374, "learning_rate": 2.0587926963862287e-06, "loss": 1.0, "step": 5702 }, { "epoch": 0.71, "grad_norm": 7.859716808679012, "learning_rate": 2.0571639533704892e-06, "loss": 1.2701, "step": 5703 }, { "epoch": 0.71, "grad_norm": 8.25093609583749, "learning_rate": 2.0555356879932715e-06, "loss": 1.3117, "step": 5704 }, { "epoch": 0.71, "grad_norm": 7.368446496776558, "learning_rate": 2.0539079005188494e-06, "loss": 1.0385, "step": 5705 }, { "epoch": 0.71, "grad_norm": 5.87065640235579, "learning_rate": 2.052280591211424e-06, "loss": 0.7869, "step": 5706 }, { "epoch": 0.71, "grad_norm": 6.822158256944469, "learning_rate": 2.050653760335117e-06, "loss": 0.8955, "step": 5707 }, { "epoch": 0.71, "grad_norm": 6.90942480082285, "learning_rate": 2.0490274081539695e-06, "loss": 0.727, "step": 5708 }, { "epoch": 0.71, "grad_norm": 6.649979704786663, "learning_rate": 2.0474015349319505e-06, "loss": 0.6753, "step": 5709 }, { "epoch": 0.71, "grad_norm": 6.108796184465183, "learning_rate": 2.045776140932948e-06, "loss": 0.7587, "step": 5710 }, { "epoch": 0.71, "grad_norm": 6.869662804093532, "learning_rate": 2.0441512264207697e-06, "loss": 0.677, "step": 5711 }, { "epoch": 0.71, "grad_norm": 7.703392248457174, "learning_rate": 2.0425267916591494e-06, "loss": 0.9445, "step": 5712 }, { "epoch": 0.71, "grad_norm": 8.677797023718751, "learning_rate": 2.0409028369117445e-06, "loss": 1.2015, "step": 5713 }, { "epoch": 0.71, "grad_norm": 7.451409046498012, "learning_rate": 2.039279362442127e-06, "loss": 0.955, "step": 5714 }, { "epoch": 0.71, "grad_norm": 7.952299790710598, "learning_rate": 2.0376563685137984e-06, "loss": 0.8534, "step": 5715 }, { "epoch": 0.71, "grad_norm": 7.733014818381558, "learning_rate": 2.03603385539018e-06, "loss": 0.9754, "step": 5716 }, { "epoch": 0.71, "grad_norm": 7.852568237613428, "learning_rate": 2.0344118233346133e-06, "loss": 1.3245, "step": 5717 }, { "epoch": 0.71, "grad_norm": 6.894663578271059, "learning_rate": 2.0327902726103636e-06, "loss": 0.8377, "step": 5718 }, { "epoch": 0.71, "grad_norm": 6.946946911285317, "learning_rate": 2.0311692034806204e-06, "loss": 1.1304, "step": 5719 }, { "epoch": 0.71, "grad_norm": 7.95934083422123, "learning_rate": 2.029548616208487e-06, "loss": 1.214, "step": 5720 }, { "epoch": 0.71, "grad_norm": 8.392955013376309, "learning_rate": 2.0279285110569963e-06, "loss": 1.3541, "step": 5721 }, { "epoch": 0.71, "grad_norm": 7.562184841252564, "learning_rate": 2.0263088882891017e-06, "loss": 0.8256, "step": 5722 }, { "epoch": 0.71, "grad_norm": 8.05825814706175, "learning_rate": 2.0246897481676735e-06, "loss": 1.0398, "step": 5723 }, { "epoch": 0.71, "grad_norm": 5.726502515925439, "learning_rate": 2.023071090955509e-06, "loss": 0.728, "step": 5724 }, { "epoch": 0.71, "grad_norm": 7.112098682711975, "learning_rate": 2.0214529169153253e-06, "loss": 0.8187, "step": 5725 }, { "epoch": 0.71, "grad_norm": 8.299987147884279, "learning_rate": 2.019835226309762e-06, "loss": 1.46, "step": 5726 }, { "epoch": 0.71, "grad_norm": 6.925918875539858, "learning_rate": 2.018218019401375e-06, "loss": 0.9987, "step": 5727 }, { "epoch": 0.71, "grad_norm": 6.726503899210235, "learning_rate": 2.0166012964526522e-06, "loss": 0.8549, "step": 5728 }, { "epoch": 0.71, "grad_norm": 6.769953446792271, "learning_rate": 2.0149850577259916e-06, "loss": 0.8113, "step": 5729 }, { "epoch": 0.71, "grad_norm": 8.252602554408318, "learning_rate": 2.013369303483719e-06, "loss": 1.0442, "step": 5730 }, { "epoch": 0.71, "grad_norm": 7.205468835862481, "learning_rate": 2.0117540339880825e-06, "loss": 0.8888, "step": 5731 }, { "epoch": 0.71, "grad_norm": 8.009979019740586, "learning_rate": 2.0101392495012457e-06, "loss": 0.9882, "step": 5732 }, { "epoch": 0.71, "grad_norm": 7.349236541632101, "learning_rate": 2.008524950285299e-06, "loss": 1.179, "step": 5733 }, { "epoch": 0.71, "grad_norm": 6.535707747199044, "learning_rate": 2.006911136602251e-06, "loss": 0.8065, "step": 5734 }, { "epoch": 0.71, "grad_norm": 7.166847521584121, "learning_rate": 2.005297808714036e-06, "loss": 1.0412, "step": 5735 }, { "epoch": 0.71, "grad_norm": 8.218915328794463, "learning_rate": 2.0036849668824985e-06, "loss": 0.8394, "step": 5736 }, { "epoch": 0.71, "grad_norm": 6.192393110498699, "learning_rate": 2.0020726113694204e-06, "loss": 0.6873, "step": 5737 }, { "epoch": 0.71, "grad_norm": 6.059022051173662, "learning_rate": 2.0004607424364907e-06, "loss": 0.543, "step": 5738 }, { "epoch": 0.71, "grad_norm": 7.841148225721911, "learning_rate": 1.9988493603453257e-06, "loss": 1.1615, "step": 5739 }, { "epoch": 0.71, "grad_norm": 6.19673226027549, "learning_rate": 1.9972384653574634e-06, "loss": 0.5947, "step": 5740 }, { "epoch": 0.71, "grad_norm": 7.087532571198194, "learning_rate": 1.9956280577343573e-06, "loss": 0.8392, "step": 5741 }, { "epoch": 0.71, "grad_norm": 7.361610337401203, "learning_rate": 1.9940181377373874e-06, "loss": 0.9702, "step": 5742 }, { "epoch": 0.71, "grad_norm": 6.034380888305064, "learning_rate": 1.992408705627853e-06, "loss": 0.7166, "step": 5743 }, { "epoch": 0.71, "grad_norm": 8.624916985450145, "learning_rate": 1.990799761666975e-06, "loss": 1.1146, "step": 5744 }, { "epoch": 0.71, "grad_norm": 7.788495745236911, "learning_rate": 1.9891913061158906e-06, "loss": 1.0187, "step": 5745 }, { "epoch": 0.71, "grad_norm": 11.402404595689209, "learning_rate": 1.9875833392356624e-06, "loss": 0.9233, "step": 5746 }, { "epoch": 0.71, "grad_norm": 7.202574734292438, "learning_rate": 1.985975861287273e-06, "loss": 0.8315, "step": 5747 }, { "epoch": 0.71, "grad_norm": 7.749619791168681, "learning_rate": 1.9843688725316247e-06, "loss": 0.8377, "step": 5748 }, { "epoch": 0.72, "grad_norm": 7.2322847751681625, "learning_rate": 1.9827623732295414e-06, "loss": 1.038, "step": 5749 }, { "epoch": 0.72, "grad_norm": 6.7760653965123065, "learning_rate": 1.9811563636417647e-06, "loss": 0.9875, "step": 5750 }, { "epoch": 0.72, "grad_norm": 7.80449120336898, "learning_rate": 1.97955084402896e-06, "loss": 0.7697, "step": 5751 }, { "epoch": 0.72, "grad_norm": 7.381196069041541, "learning_rate": 1.9779458146517123e-06, "loss": 0.9474, "step": 5752 }, { "epoch": 0.72, "grad_norm": 7.436315588349855, "learning_rate": 1.9763412757705276e-06, "loss": 1.148, "step": 5753 }, { "epoch": 0.72, "grad_norm": 7.299380311624286, "learning_rate": 1.9747372276458287e-06, "loss": 0.7987, "step": 5754 }, { "epoch": 0.72, "grad_norm": 7.19051698552164, "learning_rate": 1.9731336705379627e-06, "loss": 0.6652, "step": 5755 }, { "epoch": 0.72, "grad_norm": 7.947665875404551, "learning_rate": 1.9715306047071964e-06, "loss": 1.2077, "step": 5756 }, { "epoch": 0.72, "grad_norm": 8.629786705045895, "learning_rate": 1.9699280304137157e-06, "loss": 1.0247, "step": 5757 }, { "epoch": 0.72, "grad_norm": 7.517211985264053, "learning_rate": 1.9683259479176294e-06, "loss": 0.9458, "step": 5758 }, { "epoch": 0.72, "grad_norm": 7.3062630863615015, "learning_rate": 1.9667243574789598e-06, "loss": 0.6706, "step": 5759 }, { "epoch": 0.72, "grad_norm": 6.7105913936002874, "learning_rate": 1.965123259357657e-06, "loss": 0.6612, "step": 5760 }, { "epoch": 0.72, "grad_norm": 8.108620747702567, "learning_rate": 1.9635226538135894e-06, "loss": 0.8351, "step": 5761 }, { "epoch": 0.72, "grad_norm": 7.745258002786883, "learning_rate": 1.9619225411065395e-06, "loss": 0.972, "step": 5762 }, { "epoch": 0.72, "grad_norm": 7.351270474893895, "learning_rate": 1.960322921496218e-06, "loss": 0.876, "step": 5763 }, { "epoch": 0.72, "grad_norm": 8.31991505407077, "learning_rate": 1.9587237952422506e-06, "loss": 1.1145, "step": 5764 }, { "epoch": 0.72, "grad_norm": 8.441029594772859, "learning_rate": 1.9571251626041847e-06, "loss": 0.8765, "step": 5765 }, { "epoch": 0.72, "grad_norm": 7.526046289043925, "learning_rate": 1.9555270238414875e-06, "loss": 1.2066, "step": 5766 }, { "epoch": 0.72, "grad_norm": 8.005375205175485, "learning_rate": 1.953929379213547e-06, "loss": 1.1523, "step": 5767 }, { "epoch": 0.72, "grad_norm": 7.669435310834957, "learning_rate": 1.952332228979667e-06, "loss": 0.9383, "step": 5768 }, { "epoch": 0.72, "grad_norm": 7.96547996544576, "learning_rate": 1.9507355733990745e-06, "loss": 0.9719, "step": 5769 }, { "epoch": 0.72, "grad_norm": 7.284023579850988, "learning_rate": 1.9491394127309178e-06, "loss": 1.0214, "step": 5770 }, { "epoch": 0.72, "grad_norm": 9.725738418595958, "learning_rate": 1.9475437472342596e-06, "loss": 0.9573, "step": 5771 }, { "epoch": 0.72, "grad_norm": 6.76033621076739, "learning_rate": 1.945948577168086e-06, "loss": 0.8596, "step": 5772 }, { "epoch": 0.72, "grad_norm": 6.068360702164329, "learning_rate": 1.9443539027913022e-06, "loss": 0.6979, "step": 5773 }, { "epoch": 0.72, "grad_norm": 7.982917828411762, "learning_rate": 1.942759724362733e-06, "loss": 0.9028, "step": 5774 }, { "epoch": 0.72, "grad_norm": 5.643036284319473, "learning_rate": 1.9411660421411216e-06, "loss": 0.582, "step": 5775 }, { "epoch": 0.72, "grad_norm": 6.674841835143923, "learning_rate": 1.939572856385134e-06, "loss": 0.567, "step": 5776 }, { "epoch": 0.72, "grad_norm": 7.481683673963843, "learning_rate": 1.937980167353349e-06, "loss": 1.0768, "step": 5777 }, { "epoch": 0.72, "grad_norm": 8.452314844959444, "learning_rate": 1.9363879753042715e-06, "loss": 1.1228, "step": 5778 }, { "epoch": 0.72, "grad_norm": 8.461178879903688, "learning_rate": 1.9347962804963238e-06, "loss": 1.309, "step": 5779 }, { "epoch": 0.72, "grad_norm": 7.998149713552538, "learning_rate": 1.933205083187843e-06, "loss": 1.1229, "step": 5780 }, { "epoch": 0.72, "grad_norm": 8.63807315122844, "learning_rate": 1.931614383637092e-06, "loss": 0.9803, "step": 5781 }, { "epoch": 0.72, "grad_norm": 8.410214920658344, "learning_rate": 1.930024182102249e-06, "loss": 1.0957, "step": 5782 }, { "epoch": 0.72, "grad_norm": 7.132503716076036, "learning_rate": 1.9284344788414137e-06, "loss": 1.0828, "step": 5783 }, { "epoch": 0.72, "grad_norm": 7.4788689230906416, "learning_rate": 1.9268452741126033e-06, "loss": 0.988, "step": 5784 }, { "epoch": 0.72, "grad_norm": 8.338277676478127, "learning_rate": 1.925256568173755e-06, "loss": 1.3617, "step": 5785 }, { "epoch": 0.72, "grad_norm": 6.447433530039869, "learning_rate": 1.923668361282723e-06, "loss": 0.6693, "step": 5786 }, { "epoch": 0.72, "grad_norm": 9.295433439391973, "learning_rate": 1.9220806536972824e-06, "loss": 1.164, "step": 5787 }, { "epoch": 0.72, "grad_norm": 9.018526201266303, "learning_rate": 1.920493445675129e-06, "loss": 1.2495, "step": 5788 }, { "epoch": 0.72, "grad_norm": 6.492200152200832, "learning_rate": 1.9189067374738723e-06, "loss": 0.9186, "step": 5789 }, { "epoch": 0.72, "grad_norm": 6.9001339843616725, "learning_rate": 1.917320529351045e-06, "loss": 0.868, "step": 5790 }, { "epoch": 0.72, "grad_norm": 8.195286076241356, "learning_rate": 1.9157348215640974e-06, "loss": 1.0742, "step": 5791 }, { "epoch": 0.72, "grad_norm": 7.012414645896322, "learning_rate": 1.9141496143704014e-06, "loss": 0.8901, "step": 5792 }, { "epoch": 0.72, "grad_norm": 7.865784987402999, "learning_rate": 1.9125649080272383e-06, "loss": 1.1264, "step": 5793 }, { "epoch": 0.72, "grad_norm": 8.833086584221862, "learning_rate": 1.9109807027918226e-06, "loss": 1.0395, "step": 5794 }, { "epoch": 0.72, "grad_norm": 9.580606956495338, "learning_rate": 1.9093969989212735e-06, "loss": 1.0384, "step": 5795 }, { "epoch": 0.72, "grad_norm": 6.862982558563486, "learning_rate": 1.907813796672637e-06, "loss": 0.9788, "step": 5796 }, { "epoch": 0.72, "grad_norm": 8.44685293209522, "learning_rate": 1.906231096302878e-06, "loss": 1.3065, "step": 5797 }, { "epoch": 0.72, "grad_norm": 8.061072546178035, "learning_rate": 1.9046488980688727e-06, "loss": 0.7548, "step": 5798 }, { "epoch": 0.72, "grad_norm": 6.586219656860169, "learning_rate": 1.9030672022274233e-06, "loss": 0.7427, "step": 5799 }, { "epoch": 0.72, "grad_norm": 13.48798314237581, "learning_rate": 1.9014860090352477e-06, "loss": 0.7576, "step": 5800 }, { "epoch": 0.72, "grad_norm": 7.677545050600655, "learning_rate": 1.8999053187489836e-06, "loss": 1.1055, "step": 5801 }, { "epoch": 0.72, "grad_norm": 5.260315165175075, "learning_rate": 1.8983251316251822e-06, "loss": 0.544, "step": 5802 }, { "epoch": 0.72, "grad_norm": 6.4520004474628765, "learning_rate": 1.896745447920319e-06, "loss": 0.678, "step": 5803 }, { "epoch": 0.72, "grad_norm": 8.431961641225655, "learning_rate": 1.8951662678907845e-06, "loss": 1.2563, "step": 5804 }, { "epoch": 0.72, "grad_norm": 6.809440717576274, "learning_rate": 1.8935875917928893e-06, "loss": 0.9789, "step": 5805 }, { "epoch": 0.72, "grad_norm": 8.94870473509577, "learning_rate": 1.892009419882862e-06, "loss": 1.2833, "step": 5806 }, { "epoch": 0.72, "grad_norm": 7.743113321249665, "learning_rate": 1.8904317524168458e-06, "loss": 0.8516, "step": 5807 }, { "epoch": 0.72, "grad_norm": 7.611626669743821, "learning_rate": 1.8888545896509064e-06, "loss": 1.0498, "step": 5808 }, { "epoch": 0.72, "grad_norm": 5.985522583816668, "learning_rate": 1.8872779318410262e-06, "loss": 0.7566, "step": 5809 }, { "epoch": 0.72, "grad_norm": 8.564803491159035, "learning_rate": 1.8857017792431065e-06, "loss": 0.9418, "step": 5810 }, { "epoch": 0.72, "grad_norm": 7.831332617834449, "learning_rate": 1.8841261321129629e-06, "loss": 0.8054, "step": 5811 }, { "epoch": 0.72, "grad_norm": 8.600910865114963, "learning_rate": 1.8825509907063328e-06, "loss": 0.9217, "step": 5812 }, { "epoch": 0.72, "grad_norm": 7.647002543893038, "learning_rate": 1.8809763552788707e-06, "loss": 1.0353, "step": 5813 }, { "epoch": 0.72, "grad_norm": 5.978341792959838, "learning_rate": 1.8794022260861483e-06, "loss": 0.954, "step": 5814 }, { "epoch": 0.72, "grad_norm": 8.174100919564077, "learning_rate": 1.877828603383658e-06, "loss": 0.7673, "step": 5815 }, { "epoch": 0.72, "grad_norm": 6.190229382103745, "learning_rate": 1.8762554874268025e-06, "loss": 0.9377, "step": 5816 }, { "epoch": 0.72, "grad_norm": 6.895794413706725, "learning_rate": 1.8746828784709098e-06, "loss": 0.9189, "step": 5817 }, { "epoch": 0.72, "grad_norm": 7.253699027270779, "learning_rate": 1.8731107767712252e-06, "loss": 0.7266, "step": 5818 }, { "epoch": 0.72, "grad_norm": 7.800489499898389, "learning_rate": 1.8715391825829055e-06, "loss": 0.9611, "step": 5819 }, { "epoch": 0.72, "grad_norm": 9.639177510271567, "learning_rate": 1.8699680961610307e-06, "loss": 1.2382, "step": 5820 }, { "epoch": 0.72, "grad_norm": 7.055913858296071, "learning_rate": 1.8683975177605968e-06, "loss": 0.8597, "step": 5821 }, { "epoch": 0.72, "grad_norm": 7.352546869759753, "learning_rate": 1.8668274476365173e-06, "loss": 1.0883, "step": 5822 }, { "epoch": 0.72, "grad_norm": 8.937971846453268, "learning_rate": 1.8652578860436238e-06, "loss": 0.9318, "step": 5823 }, { "epoch": 0.72, "grad_norm": 7.320473045259121, "learning_rate": 1.863688833236666e-06, "loss": 0.9818, "step": 5824 }, { "epoch": 0.72, "grad_norm": 7.165617011383311, "learning_rate": 1.8621202894703067e-06, "loss": 1.1548, "step": 5825 }, { "epoch": 0.72, "grad_norm": 7.742615159834531, "learning_rate": 1.86055225499913e-06, "loss": 1.1236, "step": 5826 }, { "epoch": 0.72, "grad_norm": 7.7390063557719015, "learning_rate": 1.8589847300776399e-06, "loss": 0.8583, "step": 5827 }, { "epoch": 0.72, "grad_norm": 8.95025693591185, "learning_rate": 1.8574177149602496e-06, "loss": 1.1991, "step": 5828 }, { "epoch": 0.72, "grad_norm": 8.405341292666787, "learning_rate": 1.8558512099012964e-06, "loss": 0.8447, "step": 5829 }, { "epoch": 0.73, "grad_norm": 6.99547421286452, "learning_rate": 1.8542852151550329e-06, "loss": 0.9682, "step": 5830 }, { "epoch": 0.73, "grad_norm": 7.97386212470743, "learning_rate": 1.8527197309756285e-06, "loss": 1.3323, "step": 5831 }, { "epoch": 0.73, "grad_norm": 8.060763466879113, "learning_rate": 1.85115475761717e-06, "loss": 1.2298, "step": 5832 }, { "epoch": 0.73, "grad_norm": 7.977876239329818, "learning_rate": 1.8495902953336625e-06, "loss": 0.9848, "step": 5833 }, { "epoch": 0.73, "grad_norm": 8.411907961492151, "learning_rate": 1.8480263443790241e-06, "loss": 1.1624, "step": 5834 }, { "epoch": 0.73, "grad_norm": 6.750828357999516, "learning_rate": 1.8464629050070941e-06, "loss": 0.5632, "step": 5835 }, { "epoch": 0.73, "grad_norm": 7.246929623613918, "learning_rate": 1.8448999774716291e-06, "loss": 0.8449, "step": 5836 }, { "epoch": 0.73, "grad_norm": 7.5468747143193085, "learning_rate": 1.8433375620262973e-06, "loss": 0.6253, "step": 5837 }, { "epoch": 0.73, "grad_norm": 5.1477683660823255, "learning_rate": 1.8417756589246888e-06, "loss": 0.6025, "step": 5838 }, { "epoch": 0.73, "grad_norm": 8.175450833865536, "learning_rate": 1.8402142684203095e-06, "loss": 1.2231, "step": 5839 }, { "epoch": 0.73, "grad_norm": 8.634006539220358, "learning_rate": 1.8386533907665815e-06, "loss": 0.9032, "step": 5840 }, { "epoch": 0.73, "grad_norm": 6.955479753517562, "learning_rate": 1.837093026216844e-06, "loss": 0.8842, "step": 5841 }, { "epoch": 0.73, "grad_norm": 7.597913775680036, "learning_rate": 1.835533175024355e-06, "loss": 1.106, "step": 5842 }, { "epoch": 0.73, "grad_norm": 6.573976170702158, "learning_rate": 1.8339738374422823e-06, "loss": 0.986, "step": 5843 }, { "epoch": 0.73, "grad_norm": 6.839796959813083, "learning_rate": 1.8324150137237174e-06, "loss": 0.6954, "step": 5844 }, { "epoch": 0.73, "grad_norm": 8.609846352629118, "learning_rate": 1.830856704121668e-06, "loss": 1.0916, "step": 5845 }, { "epoch": 0.73, "grad_norm": 7.360371579573928, "learning_rate": 1.8292989088890522e-06, "loss": 0.759, "step": 5846 }, { "epoch": 0.73, "grad_norm": 10.091246823746078, "learning_rate": 1.8277416282787108e-06, "loss": 0.8222, "step": 5847 }, { "epoch": 0.73, "grad_norm": 8.111270893543468, "learning_rate": 1.8261848625433993e-06, "loss": 1.1434, "step": 5848 }, { "epoch": 0.73, "grad_norm": 7.527343253454421, "learning_rate": 1.8246286119357903e-06, "loss": 1.2034, "step": 5849 }, { "epoch": 0.73, "grad_norm": 7.5559162727500295, "learning_rate": 1.8230728767084681e-06, "loss": 1.2487, "step": 5850 }, { "epoch": 0.73, "grad_norm": 7.831807501505738, "learning_rate": 1.8215176571139425e-06, "loss": 1.2, "step": 5851 }, { "epoch": 0.73, "grad_norm": 6.9421239953129135, "learning_rate": 1.8199629534046299e-06, "loss": 0.628, "step": 5852 }, { "epoch": 0.73, "grad_norm": 7.121080417301748, "learning_rate": 1.8184087658328692e-06, "loss": 0.9293, "step": 5853 }, { "epoch": 0.73, "grad_norm": 6.192155580200836, "learning_rate": 1.8168550946509144e-06, "loss": 0.7851, "step": 5854 }, { "epoch": 0.73, "grad_norm": 6.535737687244748, "learning_rate": 1.8153019401109324e-06, "loss": 0.8145, "step": 5855 }, { "epoch": 0.73, "grad_norm": 7.652796275686151, "learning_rate": 1.8137493024650094e-06, "loss": 0.9491, "step": 5856 }, { "epoch": 0.73, "grad_norm": 8.022361543471927, "learning_rate": 1.8121971819651485e-06, "loss": 1.1662, "step": 5857 }, { "epoch": 0.73, "grad_norm": 7.28822139482956, "learning_rate": 1.8106455788632676e-06, "loss": 0.8352, "step": 5858 }, { "epoch": 0.73, "grad_norm": 8.677888581578655, "learning_rate": 1.809094493411198e-06, "loss": 1.0789, "step": 5859 }, { "epoch": 0.73, "grad_norm": 7.766575944752728, "learning_rate": 1.8075439258606914e-06, "loss": 0.7304, "step": 5860 }, { "epoch": 0.73, "grad_norm": 7.06444693510365, "learning_rate": 1.8059938764634127e-06, "loss": 0.6023, "step": 5861 }, { "epoch": 0.73, "grad_norm": 8.09431830343951, "learning_rate": 1.8044443454709436e-06, "loss": 1.2532, "step": 5862 }, { "epoch": 0.73, "grad_norm": 8.369157209145818, "learning_rate": 1.802895333134783e-06, "loss": 1.0408, "step": 5863 }, { "epoch": 0.73, "grad_norm": 9.313952722610908, "learning_rate": 1.8013468397063416e-06, "loss": 1.1562, "step": 5864 }, { "epoch": 0.73, "grad_norm": 8.308689667946005, "learning_rate": 1.7997988654369497e-06, "loss": 0.9577, "step": 5865 }, { "epoch": 0.73, "grad_norm": 6.617609076375526, "learning_rate": 1.798251410577852e-06, "loss": 0.8417, "step": 5866 }, { "epoch": 0.73, "grad_norm": 7.838822354384654, "learning_rate": 1.7967044753802103e-06, "loss": 0.9901, "step": 5867 }, { "epoch": 0.73, "grad_norm": 8.241175979980973, "learning_rate": 1.795158060095098e-06, "loss": 0.9972, "step": 5868 }, { "epoch": 0.73, "grad_norm": 6.839036441642912, "learning_rate": 1.7936121649735078e-06, "loss": 0.8541, "step": 5869 }, { "epoch": 0.73, "grad_norm": 7.438372648696305, "learning_rate": 1.792066790266348e-06, "loss": 1.1664, "step": 5870 }, { "epoch": 0.73, "grad_norm": 6.527043513147024, "learning_rate": 1.7905219362244402e-06, "loss": 0.7855, "step": 5871 }, { "epoch": 0.73, "grad_norm": 7.71441237078178, "learning_rate": 1.788977603098525e-06, "loss": 1.0081, "step": 5872 }, { "epoch": 0.73, "grad_norm": 7.642792217820639, "learning_rate": 1.7874337911392536e-06, "loss": 0.8643, "step": 5873 }, { "epoch": 0.73, "grad_norm": 9.733022957608355, "learning_rate": 1.7858905005971955e-06, "loss": 0.9667, "step": 5874 }, { "epoch": 0.73, "grad_norm": 7.7228774988044115, "learning_rate": 1.784347731722838e-06, "loss": 1.0064, "step": 5875 }, { "epoch": 0.73, "grad_norm": 7.5056880454092685, "learning_rate": 1.7828054847665772e-06, "loss": 0.8953, "step": 5876 }, { "epoch": 0.73, "grad_norm": 7.791567927230154, "learning_rate": 1.7812637599787298e-06, "loss": 0.7836, "step": 5877 }, { "epoch": 0.73, "grad_norm": 7.542811244143136, "learning_rate": 1.7797225576095262e-06, "loss": 0.8806, "step": 5878 }, { "epoch": 0.73, "grad_norm": 7.406538463512458, "learning_rate": 1.7781818779091126e-06, "loss": 0.779, "step": 5879 }, { "epoch": 0.73, "grad_norm": 7.423671592087555, "learning_rate": 1.7766417211275495e-06, "loss": 1.0488, "step": 5880 }, { "epoch": 0.73, "grad_norm": 5.739659890601174, "learning_rate": 1.7751020875148146e-06, "loss": 0.6886, "step": 5881 }, { "epoch": 0.73, "grad_norm": 7.5850874687806575, "learning_rate": 1.7735629773207952e-06, "loss": 0.9289, "step": 5882 }, { "epoch": 0.73, "grad_norm": 7.0682389830898735, "learning_rate": 1.7720243907952995e-06, "loss": 0.9014, "step": 5883 }, { "epoch": 0.73, "grad_norm": 6.852148297430389, "learning_rate": 1.7704863281880496e-06, "loss": 0.8806, "step": 5884 }, { "epoch": 0.73, "grad_norm": 8.38563299618672, "learning_rate": 1.768948789748679e-06, "loss": 1.0039, "step": 5885 }, { "epoch": 0.73, "grad_norm": 7.235105631037719, "learning_rate": 1.7674117757267401e-06, "loss": 0.9676, "step": 5886 }, { "epoch": 0.73, "grad_norm": 6.38421962202883, "learning_rate": 1.7658752863716988e-06, "loss": 0.8658, "step": 5887 }, { "epoch": 0.73, "grad_norm": 8.614274186134564, "learning_rate": 1.7643393219329359e-06, "loss": 1.1174, "step": 5888 }, { "epoch": 0.73, "grad_norm": 8.545146478657438, "learning_rate": 1.762803882659746e-06, "loss": 1.3103, "step": 5889 }, { "epoch": 0.73, "grad_norm": 7.974993175986364, "learning_rate": 1.7612689688013417e-06, "loss": 1.0537, "step": 5890 }, { "epoch": 0.73, "grad_norm": 7.46536616733226, "learning_rate": 1.759734580606845e-06, "loss": 1.1057, "step": 5891 }, { "epoch": 0.73, "grad_norm": 6.921743497962384, "learning_rate": 1.7582007183252969e-06, "loss": 0.852, "step": 5892 }, { "epoch": 0.73, "grad_norm": 6.535517678712168, "learning_rate": 1.7566673822056536e-06, "loss": 0.7393, "step": 5893 }, { "epoch": 0.73, "grad_norm": 7.985056545837258, "learning_rate": 1.75513457249678e-06, "loss": 0.8585, "step": 5894 }, { "epoch": 0.73, "grad_norm": 8.343321909733614, "learning_rate": 1.7536022894474619e-06, "loss": 0.9783, "step": 5895 }, { "epoch": 0.73, "grad_norm": 9.605665509908274, "learning_rate": 1.7520705333063969e-06, "loss": 1.179, "step": 5896 }, { "epoch": 0.73, "grad_norm": 7.833222988917871, "learning_rate": 1.7505393043221974e-06, "loss": 0.991, "step": 5897 }, { "epoch": 0.73, "grad_norm": 6.363823460835332, "learning_rate": 1.749008602743391e-06, "loss": 0.7452, "step": 5898 }, { "epoch": 0.73, "grad_norm": 6.7616426520625375, "learning_rate": 1.7474784288184198e-06, "loss": 1.0212, "step": 5899 }, { "epoch": 0.73, "grad_norm": 8.014545551676763, "learning_rate": 1.7459487827956373e-06, "loss": 0.7984, "step": 5900 }, { "epoch": 0.73, "grad_norm": 8.834727528638131, "learning_rate": 1.7444196649233135e-06, "loss": 1.0155, "step": 5901 }, { "epoch": 0.73, "grad_norm": 5.7268581798311144, "learning_rate": 1.7428910754496358e-06, "loss": 0.5098, "step": 5902 }, { "epoch": 0.73, "grad_norm": 5.853425267162043, "learning_rate": 1.7413630146226984e-06, "loss": 0.6708, "step": 5903 }, { "epoch": 0.73, "grad_norm": 6.980011521305462, "learning_rate": 1.7398354826905167e-06, "loss": 0.7836, "step": 5904 }, { "epoch": 0.73, "grad_norm": 6.824770798459683, "learning_rate": 1.7383084799010164e-06, "loss": 1.0034, "step": 5905 }, { "epoch": 0.73, "grad_norm": 7.451664147231238, "learning_rate": 1.7367820065020392e-06, "loss": 1.1014, "step": 5906 }, { "epoch": 0.73, "grad_norm": 8.21994407238061, "learning_rate": 1.7352560627413395e-06, "loss": 1.0423, "step": 5907 }, { "epoch": 0.73, "grad_norm": 8.447366017151834, "learning_rate": 1.7337306488665888e-06, "loss": 1.0147, "step": 5908 }, { "epoch": 0.73, "grad_norm": 7.792733147214253, "learning_rate": 1.7322057651253672e-06, "loss": 1.3195, "step": 5909 }, { "epoch": 0.74, "grad_norm": 6.230636361109052, "learning_rate": 1.7306814117651715e-06, "loss": 0.9635, "step": 5910 }, { "epoch": 0.74, "grad_norm": 6.84327780988611, "learning_rate": 1.7291575890334166e-06, "loss": 0.6957, "step": 5911 }, { "epoch": 0.74, "grad_norm": 9.467212962879865, "learning_rate": 1.7276342971774225e-06, "loss": 1.0169, "step": 5912 }, { "epoch": 0.74, "grad_norm": 7.1175304870491285, "learning_rate": 1.7261115364444304e-06, "loss": 0.8198, "step": 5913 }, { "epoch": 0.74, "grad_norm": 7.9115421257164815, "learning_rate": 1.7245893070815922e-06, "loss": 0.8823, "step": 5914 }, { "epoch": 0.74, "grad_norm": 6.507650240656259, "learning_rate": 1.723067609335976e-06, "loss": 0.7805, "step": 5915 }, { "epoch": 0.74, "grad_norm": 7.906060770466862, "learning_rate": 1.721546443454558e-06, "loss": 1.0342, "step": 5916 }, { "epoch": 0.74, "grad_norm": 7.6206911822564365, "learning_rate": 1.7200258096842342e-06, "loss": 1.1315, "step": 5917 }, { "epoch": 0.74, "grad_norm": 7.332122666722046, "learning_rate": 1.7185057082718115e-06, "loss": 1.0311, "step": 5918 }, { "epoch": 0.74, "grad_norm": 6.952988156102764, "learning_rate": 1.7169861394640108e-06, "loss": 0.9526, "step": 5919 }, { "epoch": 0.74, "grad_norm": 7.105065249420177, "learning_rate": 1.7154671035074682e-06, "loss": 0.9459, "step": 5920 }, { "epoch": 0.74, "grad_norm": 7.366926126087524, "learning_rate": 1.7139486006487283e-06, "loss": 0.9575, "step": 5921 }, { "epoch": 0.74, "grad_norm": 6.902376380489357, "learning_rate": 1.7124306311342536e-06, "loss": 1.0522, "step": 5922 }, { "epoch": 0.74, "grad_norm": 10.044540418518881, "learning_rate": 1.7109131952104196e-06, "loss": 1.2863, "step": 5923 }, { "epoch": 0.74, "grad_norm": 7.1358848694217505, "learning_rate": 1.709396293123516e-06, "loss": 1.044, "step": 5924 }, { "epoch": 0.74, "grad_norm": 7.731014842050064, "learning_rate": 1.707879925119741e-06, "loss": 1.2748, "step": 5925 }, { "epoch": 0.74, "grad_norm": 8.862857868755768, "learning_rate": 1.7063640914452113e-06, "loss": 1.2868, "step": 5926 }, { "epoch": 0.74, "grad_norm": 8.27090196126991, "learning_rate": 1.7048487923459544e-06, "loss": 1.0426, "step": 5927 }, { "epoch": 0.74, "grad_norm": 6.831669831942169, "learning_rate": 1.7033340280679128e-06, "loss": 0.4972, "step": 5928 }, { "epoch": 0.74, "grad_norm": 7.853240318749423, "learning_rate": 1.7018197988569417e-06, "loss": 1.1459, "step": 5929 }, { "epoch": 0.74, "grad_norm": 7.262362123647092, "learning_rate": 1.7003061049588065e-06, "loss": 1.0932, "step": 5930 }, { "epoch": 0.74, "grad_norm": 7.592502811926839, "learning_rate": 1.6987929466191889e-06, "loss": 1.1219, "step": 5931 }, { "epoch": 0.74, "grad_norm": 6.436704292959608, "learning_rate": 1.697280324083685e-06, "loss": 0.7545, "step": 5932 }, { "epoch": 0.74, "grad_norm": 6.8272907521032105, "learning_rate": 1.6957682375977986e-06, "loss": 0.7789, "step": 5933 }, { "epoch": 0.74, "grad_norm": 8.738214392662819, "learning_rate": 1.6942566874069506e-06, "loss": 1.0139, "step": 5934 }, { "epoch": 0.74, "grad_norm": 7.177783499347855, "learning_rate": 1.692745673756474e-06, "loss": 1.1351, "step": 5935 }, { "epoch": 0.74, "grad_norm": 6.723868191963561, "learning_rate": 1.6912351968916157e-06, "loss": 0.5457, "step": 5936 }, { "epoch": 0.74, "grad_norm": 6.826943374948314, "learning_rate": 1.689725257057534e-06, "loss": 1.0309, "step": 5937 }, { "epoch": 0.74, "grad_norm": 7.09031894471779, "learning_rate": 1.688215854499301e-06, "loss": 0.9123, "step": 5938 }, { "epoch": 0.74, "grad_norm": 7.329938298828167, "learning_rate": 1.6867069894618993e-06, "loss": 0.7303, "step": 5939 }, { "epoch": 0.74, "grad_norm": 8.508156278251176, "learning_rate": 1.6851986621902267e-06, "loss": 1.2607, "step": 5940 }, { "epoch": 0.74, "grad_norm": 7.125471080060795, "learning_rate": 1.6836908729290952e-06, "loss": 1.0649, "step": 5941 }, { "epoch": 0.74, "grad_norm": 6.741980234823364, "learning_rate": 1.6821836219232235e-06, "loss": 0.553, "step": 5942 }, { "epoch": 0.74, "grad_norm": 7.746994898635353, "learning_rate": 1.6806769094172487e-06, "loss": 0.756, "step": 5943 }, { "epoch": 0.74, "grad_norm": 6.27072466340677, "learning_rate": 1.6791707356557191e-06, "loss": 0.6871, "step": 5944 }, { "epoch": 0.74, "grad_norm": 8.078074124034165, "learning_rate": 1.6776651008830935e-06, "loss": 1.0182, "step": 5945 }, { "epoch": 0.74, "grad_norm": 7.196804081149406, "learning_rate": 1.6761600053437465e-06, "loss": 1.1098, "step": 5946 }, { "epoch": 0.74, "grad_norm": 8.2695503909974, "learning_rate": 1.6746554492819638e-06, "loss": 1.0949, "step": 5947 }, { "epoch": 0.74, "grad_norm": 7.490919594968587, "learning_rate": 1.6731514329419408e-06, "loss": 0.8447, "step": 5948 }, { "epoch": 0.74, "grad_norm": 7.415635070142045, "learning_rate": 1.6716479565677885e-06, "loss": 0.8126, "step": 5949 }, { "epoch": 0.74, "grad_norm": 7.430957784938795, "learning_rate": 1.6701450204035307e-06, "loss": 0.9253, "step": 5950 }, { "epoch": 0.74, "grad_norm": 8.746742082991126, "learning_rate": 1.6686426246931004e-06, "loss": 1.2935, "step": 5951 }, { "epoch": 0.74, "grad_norm": 7.0125966354441065, "learning_rate": 1.6671407696803454e-06, "loss": 0.7271, "step": 5952 }, { "epoch": 0.74, "grad_norm": 7.092479029136181, "learning_rate": 1.665639455609025e-06, "loss": 0.7871, "step": 5953 }, { "epoch": 0.74, "grad_norm": 6.512257299111101, "learning_rate": 1.6641386827228107e-06, "loss": 0.6632, "step": 5954 }, { "epoch": 0.74, "grad_norm": 6.784209671002042, "learning_rate": 1.6626384512652865e-06, "loss": 0.7394, "step": 5955 }, { "epoch": 0.74, "grad_norm": 7.345423633619345, "learning_rate": 1.6611387614799502e-06, "loss": 0.9714, "step": 5956 }, { "epoch": 0.74, "grad_norm": 7.090926086778088, "learning_rate": 1.6596396136102061e-06, "loss": 0.7468, "step": 5957 }, { "epoch": 0.74, "grad_norm": 8.004120227279044, "learning_rate": 1.6581410078993754e-06, "loss": 1.2791, "step": 5958 }, { "epoch": 0.74, "grad_norm": 6.582655172701581, "learning_rate": 1.6566429445906923e-06, "loss": 0.6219, "step": 5959 }, { "epoch": 0.74, "grad_norm": 6.915793037008937, "learning_rate": 1.6551454239272973e-06, "loss": 0.8444, "step": 5960 }, { "epoch": 0.74, "grad_norm": 7.760971519831862, "learning_rate": 1.653648446152248e-06, "loss": 1.1683, "step": 5961 }, { "epoch": 0.74, "grad_norm": 6.66203812645366, "learning_rate": 1.6521520115085115e-06, "loss": 0.7357, "step": 5962 }, { "epoch": 0.74, "grad_norm": 5.419762745878408, "learning_rate": 1.650656120238968e-06, "loss": 0.8196, "step": 5963 }, { "epoch": 0.74, "grad_norm": 6.016311451905913, "learning_rate": 1.649160772586409e-06, "loss": 0.5371, "step": 5964 }, { "epoch": 0.74, "grad_norm": 6.442724810290603, "learning_rate": 1.6476659687935393e-06, "loss": 0.7188, "step": 5965 }, { "epoch": 0.74, "grad_norm": 8.533068888681756, "learning_rate": 1.64617170910297e-06, "loss": 1.1251, "step": 5966 }, { "epoch": 0.74, "grad_norm": 8.108080746397654, "learning_rate": 1.6446779937572299e-06, "loss": 1.0924, "step": 5967 }, { "epoch": 0.74, "grad_norm": 7.805440981098618, "learning_rate": 1.6431848229987586e-06, "loss": 0.9453, "step": 5968 }, { "epoch": 0.74, "grad_norm": 6.831973786046416, "learning_rate": 1.6416921970699023e-06, "loss": 0.9374, "step": 5969 }, { "epoch": 0.74, "grad_norm": 5.683535004452241, "learning_rate": 1.640200116212925e-06, "loss": 0.7127, "step": 5970 }, { "epoch": 0.74, "grad_norm": 6.651374083565638, "learning_rate": 1.6387085806699987e-06, "loss": 0.8747, "step": 5971 }, { "epoch": 0.74, "grad_norm": 7.21247556618134, "learning_rate": 1.63721759068321e-06, "loss": 1.4081, "step": 5972 }, { "epoch": 0.74, "grad_norm": 8.249468180901301, "learning_rate": 1.6357271464945517e-06, "loss": 1.1147, "step": 5973 }, { "epoch": 0.74, "grad_norm": 7.495543568418827, "learning_rate": 1.6342372483459323e-06, "loss": 1.2613, "step": 5974 }, { "epoch": 0.74, "grad_norm": 6.990966668167194, "learning_rate": 1.6327478964791705e-06, "loss": 0.9259, "step": 5975 }, { "epoch": 0.74, "grad_norm": 6.805640803411766, "learning_rate": 1.6312590911359972e-06, "loss": 0.7175, "step": 5976 }, { "epoch": 0.74, "grad_norm": 7.8832033900349, "learning_rate": 1.6297708325580536e-06, "loss": 0.817, "step": 5977 }, { "epoch": 0.74, "grad_norm": 7.613555394927017, "learning_rate": 1.6282831209868905e-06, "loss": 1.1696, "step": 5978 }, { "epoch": 0.74, "grad_norm": 6.691916019062038, "learning_rate": 1.626795956663973e-06, "loss": 0.8912, "step": 5979 }, { "epoch": 0.74, "grad_norm": 7.974420136465641, "learning_rate": 1.6253093398306757e-06, "loss": 1.0979, "step": 5980 }, { "epoch": 0.74, "grad_norm": 6.979902262592967, "learning_rate": 1.623823270728287e-06, "loss": 0.9282, "step": 5981 }, { "epoch": 0.74, "grad_norm": 9.1330036722357, "learning_rate": 1.6223377495980003e-06, "loss": 1.099, "step": 5982 }, { "epoch": 0.74, "grad_norm": 8.263130938300032, "learning_rate": 1.6208527766809252e-06, "loss": 0.9767, "step": 5983 }, { "epoch": 0.74, "grad_norm": 7.556225504512814, "learning_rate": 1.619368352218082e-06, "loss": 0.84, "step": 5984 }, { "epoch": 0.74, "grad_norm": 7.475572484236809, "learning_rate": 1.6178844764503998e-06, "loss": 0.7797, "step": 5985 }, { "epoch": 0.74, "grad_norm": 9.232869021865413, "learning_rate": 1.6164011496187226e-06, "loss": 0.9976, "step": 5986 }, { "epoch": 0.74, "grad_norm": 7.583969198936061, "learning_rate": 1.6149183719637984e-06, "loss": 0.9985, "step": 5987 }, { "epoch": 0.74, "grad_norm": 7.7083611559349805, "learning_rate": 1.6134361437262924e-06, "loss": 0.9724, "step": 5988 }, { "epoch": 0.74, "grad_norm": 8.663155251965478, "learning_rate": 1.61195446514678e-06, "loss": 0.7498, "step": 5989 }, { "epoch": 0.75, "grad_norm": 7.945060845817601, "learning_rate": 1.610473336465742e-06, "loss": 0.884, "step": 5990 }, { "epoch": 0.75, "grad_norm": 8.116826358906955, "learning_rate": 1.608992757923576e-06, "loss": 0.9298, "step": 5991 }, { "epoch": 0.75, "grad_norm": 6.5915825659963305, "learning_rate": 1.6075127297605869e-06, "loss": 0.5061, "step": 5992 }, { "epoch": 0.75, "grad_norm": 7.0416293087176465, "learning_rate": 1.606033252216993e-06, "loss": 0.8139, "step": 5993 }, { "epoch": 0.75, "grad_norm": 7.346836326850587, "learning_rate": 1.604554325532921e-06, "loss": 1.045, "step": 5994 }, { "epoch": 0.75, "grad_norm": 7.021638067561093, "learning_rate": 1.6030759499484106e-06, "loss": 0.5773, "step": 5995 }, { "epoch": 0.75, "grad_norm": 6.883697866534524, "learning_rate": 1.601598125703407e-06, "loss": 0.7912, "step": 5996 }, { "epoch": 0.75, "grad_norm": 7.5423343656496264, "learning_rate": 1.6001208530377703e-06, "loss": 0.9658, "step": 5997 }, { "epoch": 0.75, "grad_norm": 7.284897369362996, "learning_rate": 1.5986441321912728e-06, "loss": 1.0389, "step": 5998 }, { "epoch": 0.75, "grad_norm": 6.579798629687353, "learning_rate": 1.5971679634035897e-06, "loss": 0.7898, "step": 5999 }, { "epoch": 0.75, "grad_norm": 7.828237591726074, "learning_rate": 1.5956923469143143e-06, "loss": 0.8539, "step": 6000 }, { "epoch": 0.75, "grad_norm": 6.046404569711753, "learning_rate": 1.5942172829629466e-06, "loss": 0.5499, "step": 6001 }, { "epoch": 0.75, "grad_norm": 5.971236821846037, "learning_rate": 1.5927427717888977e-06, "loss": 0.5832, "step": 6002 }, { "epoch": 0.75, "grad_norm": 7.799529241528319, "learning_rate": 1.5912688136314886e-06, "loss": 0.7482, "step": 6003 }, { "epoch": 0.75, "grad_norm": 7.179369730004088, "learning_rate": 1.5897954087299534e-06, "loss": 0.819, "step": 6004 }, { "epoch": 0.75, "grad_norm": 7.756792664925699, "learning_rate": 1.5883225573234295e-06, "loss": 1.2276, "step": 6005 }, { "epoch": 0.75, "grad_norm": 6.705284310894783, "learning_rate": 1.5868502596509716e-06, "loss": 0.9218, "step": 6006 }, { "epoch": 0.75, "grad_norm": 7.551141638150958, "learning_rate": 1.5853785159515422e-06, "loss": 1.0167, "step": 6007 }, { "epoch": 0.75, "grad_norm": 8.189947881020357, "learning_rate": 1.5839073264640109e-06, "loss": 1.0402, "step": 6008 }, { "epoch": 0.75, "grad_norm": 10.57251853939412, "learning_rate": 1.5824366914271611e-06, "loss": 0.6156, "step": 6009 }, { "epoch": 0.75, "grad_norm": 6.9542334271547785, "learning_rate": 1.5809666110796856e-06, "loss": 1.1194, "step": 6010 }, { "epoch": 0.75, "grad_norm": 8.17325576694272, "learning_rate": 1.5794970856601855e-06, "loss": 1.0663, "step": 6011 }, { "epoch": 0.75, "grad_norm": 6.60007415001683, "learning_rate": 1.5780281154071742e-06, "loss": 0.5922, "step": 6012 }, { "epoch": 0.75, "grad_norm": 7.939093847831966, "learning_rate": 1.5765597005590737e-06, "loss": 0.7697, "step": 6013 }, { "epoch": 0.75, "grad_norm": 6.456398476344836, "learning_rate": 1.5750918413542138e-06, "loss": 0.6914, "step": 6014 }, { "epoch": 0.75, "grad_norm": 7.520756942504221, "learning_rate": 1.5736245380308379e-06, "loss": 1.0143, "step": 6015 }, { "epoch": 0.75, "grad_norm": 9.180472776234712, "learning_rate": 1.572157790827098e-06, "loss": 1.1149, "step": 6016 }, { "epoch": 0.75, "grad_norm": 8.396258976955767, "learning_rate": 1.570691599981053e-06, "loss": 1.0349, "step": 6017 }, { "epoch": 0.75, "grad_norm": 9.4941520318139, "learning_rate": 1.5692259657306741e-06, "loss": 1.1457, "step": 6018 }, { "epoch": 0.75, "grad_norm": 6.013728009191721, "learning_rate": 1.5677608883138433e-06, "loss": 0.9431, "step": 6019 }, { "epoch": 0.75, "grad_norm": 6.848392117736283, "learning_rate": 1.56629636796835e-06, "loss": 1.1389, "step": 6020 }, { "epoch": 0.75, "grad_norm": 6.1684547831985554, "learning_rate": 1.5648324049318947e-06, "loss": 0.585, "step": 6021 }, { "epoch": 0.75, "grad_norm": 6.2536129419502275, "learning_rate": 1.563368999442087e-06, "loss": 0.7987, "step": 6022 }, { "epoch": 0.75, "grad_norm": 8.168061669238885, "learning_rate": 1.561906151736443e-06, "loss": 1.22, "step": 6023 }, { "epoch": 0.75, "grad_norm": 8.08610831821841, "learning_rate": 1.5604438620523932e-06, "loss": 0.9726, "step": 6024 }, { "epoch": 0.75, "grad_norm": 7.798018267302895, "learning_rate": 1.5589821306272762e-06, "loss": 1.216, "step": 6025 }, { "epoch": 0.75, "grad_norm": 8.102684262194096, "learning_rate": 1.5575209576983357e-06, "loss": 1.1044, "step": 6026 }, { "epoch": 0.75, "grad_norm": 8.233237291824139, "learning_rate": 1.5560603435027304e-06, "loss": 0.7751, "step": 6027 }, { "epoch": 0.75, "grad_norm": 8.753937450157629, "learning_rate": 1.5546002882775252e-06, "loss": 0.9029, "step": 6028 }, { "epoch": 0.75, "grad_norm": 9.189540620309856, "learning_rate": 1.5531407922596958e-06, "loss": 1.4025, "step": 6029 }, { "epoch": 0.75, "grad_norm": 7.415542022706103, "learning_rate": 1.5516818556861285e-06, "loss": 1.0143, "step": 6030 }, { "epoch": 0.75, "grad_norm": 7.304343361057231, "learning_rate": 1.550223478793612e-06, "loss": 0.7975, "step": 6031 }, { "epoch": 0.75, "grad_norm": 8.090666390333434, "learning_rate": 1.548765661818852e-06, "loss": 0.9289, "step": 6032 }, { "epoch": 0.75, "grad_norm": 7.159177200556087, "learning_rate": 1.5473084049984593e-06, "loss": 0.9521, "step": 6033 }, { "epoch": 0.75, "grad_norm": 8.16362401960558, "learning_rate": 1.5458517085689574e-06, "loss": 0.7916, "step": 6034 }, { "epoch": 0.75, "grad_norm": 7.680447935049727, "learning_rate": 1.5443955727667715e-06, "loss": 0.8213, "step": 6035 }, { "epoch": 0.75, "grad_norm": 6.892891616701031, "learning_rate": 1.5429399978282428e-06, "loss": 0.7725, "step": 6036 }, { "epoch": 0.75, "grad_norm": 6.86139841419019, "learning_rate": 1.5414849839896195e-06, "loss": 0.9041, "step": 6037 }, { "epoch": 0.75, "grad_norm": 8.002991856154201, "learning_rate": 1.5400305314870596e-06, "loss": 0.9794, "step": 6038 }, { "epoch": 0.75, "grad_norm": 8.146967535813845, "learning_rate": 1.5385766405566249e-06, "loss": 0.995, "step": 6039 }, { "epoch": 0.75, "grad_norm": 6.912576120998694, "learning_rate": 1.537123311434293e-06, "loss": 1.0385, "step": 6040 }, { "epoch": 0.75, "grad_norm": 6.863288375728985, "learning_rate": 1.5356705443559462e-06, "loss": 0.7863, "step": 6041 }, { "epoch": 0.75, "grad_norm": 6.23284603145064, "learning_rate": 1.5342183395573767e-06, "loss": 0.8532, "step": 6042 }, { "epoch": 0.75, "grad_norm": 5.720347623390987, "learning_rate": 1.5327666972742872e-06, "loss": 0.5934, "step": 6043 }, { "epoch": 0.75, "grad_norm": 7.537863451868832, "learning_rate": 1.5313156177422845e-06, "loss": 0.9745, "step": 6044 }, { "epoch": 0.75, "grad_norm": 10.17404629073535, "learning_rate": 1.5298651011968868e-06, "loss": 0.872, "step": 6045 }, { "epoch": 0.75, "grad_norm": 7.146147072223796, "learning_rate": 1.5284151478735244e-06, "loss": 1.0113, "step": 6046 }, { "epoch": 0.75, "grad_norm": 6.682464837452263, "learning_rate": 1.5269657580075287e-06, "loss": 0.8098, "step": 6047 }, { "epoch": 0.75, "grad_norm": 6.592650589910723, "learning_rate": 1.5255169318341444e-06, "loss": 0.9682, "step": 6048 }, { "epoch": 0.75, "grad_norm": 6.830435601766406, "learning_rate": 1.524068669588526e-06, "loss": 0.7547, "step": 6049 }, { "epoch": 0.75, "grad_norm": 6.710952726801638, "learning_rate": 1.5226209715057327e-06, "loss": 0.7649, "step": 6050 }, { "epoch": 0.75, "grad_norm": 7.969909438766694, "learning_rate": 1.5211738378207347e-06, "loss": 1.3015, "step": 6051 }, { "epoch": 0.75, "grad_norm": 7.8043860518245305, "learning_rate": 1.5197272687684106e-06, "loss": 1.1331, "step": 6052 }, { "epoch": 0.75, "grad_norm": 9.230279940243587, "learning_rate": 1.5182812645835442e-06, "loss": 1.4424, "step": 6053 }, { "epoch": 0.75, "grad_norm": 5.3179908517646055, "learning_rate": 1.5168358255008314e-06, "loss": 0.5293, "step": 6054 }, { "epoch": 0.75, "grad_norm": 7.256074107795304, "learning_rate": 1.5153909517548764e-06, "loss": 0.7346, "step": 6055 }, { "epoch": 0.75, "grad_norm": 6.724197668395929, "learning_rate": 1.513946643580187e-06, "loss": 0.7952, "step": 6056 }, { "epoch": 0.75, "grad_norm": 7.235387906832479, "learning_rate": 1.5125029012111841e-06, "loss": 0.8773, "step": 6057 }, { "epoch": 0.75, "grad_norm": 7.249607789749853, "learning_rate": 1.5110597248821946e-06, "loss": 0.9122, "step": 6058 }, { "epoch": 0.75, "grad_norm": 6.592677207886724, "learning_rate": 1.5096171148274546e-06, "loss": 0.8905, "step": 6059 }, { "epoch": 0.75, "grad_norm": 8.273162640657167, "learning_rate": 1.5081750712811073e-06, "loss": 0.9324, "step": 6060 }, { "epoch": 0.75, "grad_norm": 9.37259557351981, "learning_rate": 1.5067335944772055e-06, "loss": 0.927, "step": 6061 }, { "epoch": 0.75, "grad_norm": 7.391505196118463, "learning_rate": 1.5052926846497068e-06, "loss": 0.766, "step": 6062 }, { "epoch": 0.75, "grad_norm": 8.306736208102741, "learning_rate": 1.503852342032479e-06, "loss": 0.9134, "step": 6063 }, { "epoch": 0.75, "grad_norm": 6.90228098865876, "learning_rate": 1.5024125668593004e-06, "loss": 0.748, "step": 6064 }, { "epoch": 0.75, "grad_norm": 7.65625096241402, "learning_rate": 1.500973359363851e-06, "loss": 0.898, "step": 6065 }, { "epoch": 0.75, "grad_norm": 7.491726560435348, "learning_rate": 1.4995347197797227e-06, "loss": 1.0622, "step": 6066 }, { "epoch": 0.75, "grad_norm": 7.4247257244796705, "learning_rate": 1.4980966483404164e-06, "loss": 0.8405, "step": 6067 }, { "epoch": 0.75, "grad_norm": 7.142695330690357, "learning_rate": 1.4966591452793378e-06, "loss": 0.8529, "step": 6068 }, { "epoch": 0.75, "grad_norm": 7.282736326313206, "learning_rate": 1.4952222108298015e-06, "loss": 0.7754, "step": 6069 }, { "epoch": 0.75, "grad_norm": 6.33115690183603, "learning_rate": 1.4937858452250325e-06, "loss": 0.7878, "step": 6070 }, { "epoch": 0.76, "grad_norm": 7.104219244337066, "learning_rate": 1.492350048698157e-06, "loss": 1.0794, "step": 6071 }, { "epoch": 0.76, "grad_norm": 8.471440787357087, "learning_rate": 1.4909148214822134e-06, "loss": 0.9346, "step": 6072 }, { "epoch": 0.76, "grad_norm": 7.096765318657301, "learning_rate": 1.4894801638101502e-06, "loss": 0.8872, "step": 6073 }, { "epoch": 0.76, "grad_norm": 8.13184763796968, "learning_rate": 1.4880460759148163e-06, "loss": 0.9211, "step": 6074 }, { "epoch": 0.76, "grad_norm": 7.539184447535033, "learning_rate": 1.4866125580289737e-06, "loss": 1.344, "step": 6075 }, { "epoch": 0.76, "grad_norm": 7.916910344736676, "learning_rate": 1.4851796103852906e-06, "loss": 0.947, "step": 6076 }, { "epoch": 0.76, "grad_norm": 5.616924153147525, "learning_rate": 1.4837472332163415e-06, "loss": 0.7327, "step": 6077 }, { "epoch": 0.76, "grad_norm": 7.8322802473176605, "learning_rate": 1.4823154267546096e-06, "loss": 1.0328, "step": 6078 }, { "epoch": 0.76, "grad_norm": 7.291820725014869, "learning_rate": 1.4808841912324866e-06, "loss": 0.84, "step": 6079 }, { "epoch": 0.76, "grad_norm": 7.2852341151946165, "learning_rate": 1.4794535268822674e-06, "loss": 0.9907, "step": 6080 }, { "epoch": 0.76, "grad_norm": 7.088334007881328, "learning_rate": 1.4780234339361572e-06, "loss": 0.6659, "step": 6081 }, { "epoch": 0.76, "grad_norm": 7.473017889054465, "learning_rate": 1.4765939126262696e-06, "loss": 1.0489, "step": 6082 }, { "epoch": 0.76, "grad_norm": 7.062048466466114, "learning_rate": 1.4751649631846216e-06, "loss": 1.0658, "step": 6083 }, { "epoch": 0.76, "grad_norm": 7.944096420414592, "learning_rate": 1.4737365858431402e-06, "loss": 0.9207, "step": 6084 }, { "epoch": 0.76, "grad_norm": 7.168181951918402, "learning_rate": 1.4723087808336595e-06, "loss": 0.8539, "step": 6085 }, { "epoch": 0.76, "grad_norm": 7.400790017987083, "learning_rate": 1.4708815483879203e-06, "loss": 0.9999, "step": 6086 }, { "epoch": 0.76, "grad_norm": 5.826062469156613, "learning_rate": 1.469454888737571e-06, "loss": 0.7174, "step": 6087 }, { "epoch": 0.76, "grad_norm": 6.593121941331746, "learning_rate": 1.4680288021141636e-06, "loss": 0.9805, "step": 6088 }, { "epoch": 0.76, "grad_norm": 7.481379267272096, "learning_rate": 1.4666032887491616e-06, "loss": 0.9331, "step": 6089 }, { "epoch": 0.76, "grad_norm": 7.613757981533236, "learning_rate": 1.4651783488739334e-06, "loss": 0.8061, "step": 6090 }, { "epoch": 0.76, "grad_norm": 7.595616100312831, "learning_rate": 1.4637539827197567e-06, "loss": 0.7437, "step": 6091 }, { "epoch": 0.76, "grad_norm": 10.10884575301076, "learning_rate": 1.46233019051781e-06, "loss": 0.8996, "step": 6092 }, { "epoch": 0.76, "grad_norm": 7.742167943457869, "learning_rate": 1.4609069724991853e-06, "loss": 1.0075, "step": 6093 }, { "epoch": 0.76, "grad_norm": 7.9747528770858, "learning_rate": 1.4594843288948773e-06, "loss": 0.8813, "step": 6094 }, { "epoch": 0.76, "grad_norm": 7.309973562961697, "learning_rate": 1.458062259935792e-06, "loss": 0.6501, "step": 6095 }, { "epoch": 0.76, "grad_norm": 8.5146558292764, "learning_rate": 1.4566407658527349e-06, "loss": 1.2186, "step": 6096 }, { "epoch": 0.76, "grad_norm": 9.507836255770423, "learning_rate": 1.455219846876424e-06, "loss": 1.1866, "step": 6097 }, { "epoch": 0.76, "grad_norm": 8.642127006967359, "learning_rate": 1.453799503237483e-06, "loss": 0.9074, "step": 6098 }, { "epoch": 0.76, "grad_norm": 10.325402238602575, "learning_rate": 1.4523797351664404e-06, "loss": 0.9236, "step": 6099 }, { "epoch": 0.76, "grad_norm": 9.320334767425821, "learning_rate": 1.4509605428937345e-06, "loss": 1.1947, "step": 6100 }, { "epoch": 0.76, "grad_norm": 8.491280630911119, "learning_rate": 1.449541926649705e-06, "loss": 1.1435, "step": 6101 }, { "epoch": 0.76, "grad_norm": 8.5362533266068, "learning_rate": 1.448123886664603e-06, "loss": 1.0219, "step": 6102 }, { "epoch": 0.76, "grad_norm": 6.905860913698486, "learning_rate": 1.4467064231685851e-06, "loss": 0.9841, "step": 6103 }, { "epoch": 0.76, "grad_norm": 6.626360620391359, "learning_rate": 1.4452895363917107e-06, "loss": 0.6621, "step": 6104 }, { "epoch": 0.76, "grad_norm": 6.954586669764853, "learning_rate": 1.4438732265639498e-06, "loss": 1.111, "step": 6105 }, { "epoch": 0.76, "grad_norm": 7.480174806564262, "learning_rate": 1.442457493915177e-06, "loss": 1.0378, "step": 6106 }, { "epoch": 0.76, "grad_norm": 6.309763396106513, "learning_rate": 1.4410423386751743e-06, "loss": 1.0259, "step": 6107 }, { "epoch": 0.76, "grad_norm": 8.200110728442517, "learning_rate": 1.4396277610736287e-06, "loss": 0.8528, "step": 6108 }, { "epoch": 0.76, "grad_norm": 7.623676644824056, "learning_rate": 1.4382137613401349e-06, "loss": 0.8798, "step": 6109 }, { "epoch": 0.76, "grad_norm": 8.666306248812125, "learning_rate": 1.4368003397041908e-06, "loss": 1.093, "step": 6110 }, { "epoch": 0.76, "grad_norm": 6.950800713100012, "learning_rate": 1.4353874963952036e-06, "loss": 1.0054, "step": 6111 }, { "epoch": 0.76, "grad_norm": 7.588936412270139, "learning_rate": 1.4339752316424876e-06, "loss": 1.0179, "step": 6112 }, { "epoch": 0.76, "grad_norm": 6.585969978827577, "learning_rate": 1.4325635456752573e-06, "loss": 0.6006, "step": 6113 }, { "epoch": 0.76, "grad_norm": 8.576071320949705, "learning_rate": 1.431152438722639e-06, "loss": 1.3811, "step": 6114 }, { "epoch": 0.76, "grad_norm": 7.015866387539722, "learning_rate": 1.4297419110136628e-06, "loss": 0.6153, "step": 6115 }, { "epoch": 0.76, "grad_norm": 8.15211075218068, "learning_rate": 1.4283319627772657e-06, "loss": 0.825, "step": 6116 }, { "epoch": 0.76, "grad_norm": 6.550585968030283, "learning_rate": 1.4269225942422904e-06, "loss": 1.0036, "step": 6117 }, { "epoch": 0.76, "grad_norm": 6.855940052202526, "learning_rate": 1.4255138056374856e-06, "loss": 0.7163, "step": 6118 }, { "epoch": 0.76, "grad_norm": 6.613645927177532, "learning_rate": 1.424105597191503e-06, "loss": 0.8261, "step": 6119 }, { "epoch": 0.76, "grad_norm": 7.3815087134532975, "learning_rate": 1.422697969132904e-06, "loss": 0.6667, "step": 6120 }, { "epoch": 0.76, "grad_norm": 8.694862353800245, "learning_rate": 1.421290921690156e-06, "loss": 1.1742, "step": 6121 }, { "epoch": 0.76, "grad_norm": 6.658952245968198, "learning_rate": 1.419884455091628e-06, "loss": 0.8851, "step": 6122 }, { "epoch": 0.76, "grad_norm": 6.425411326364675, "learning_rate": 1.418478569565598e-06, "loss": 0.7283, "step": 6123 }, { "epoch": 0.76, "grad_norm": 7.908873741552127, "learning_rate": 1.4170732653402492e-06, "loss": 1.175, "step": 6124 }, { "epoch": 0.76, "grad_norm": 8.989769413876216, "learning_rate": 1.41566854264367e-06, "loss": 0.5736, "step": 6125 }, { "epoch": 0.76, "grad_norm": 7.558053598311973, "learning_rate": 1.4142644017038555e-06, "loss": 0.9712, "step": 6126 }, { "epoch": 0.76, "grad_norm": 6.739900859302644, "learning_rate": 1.412860842748706e-06, "loss": 0.7619, "step": 6127 }, { "epoch": 0.76, "grad_norm": 8.433380394816627, "learning_rate": 1.4114578660060252e-06, "loss": 1.034, "step": 6128 }, { "epoch": 0.76, "grad_norm": 8.280151523595311, "learning_rate": 1.4100554717035242e-06, "loss": 0.984, "step": 6129 }, { "epoch": 0.76, "grad_norm": 8.961134384838932, "learning_rate": 1.408653660068821e-06, "loss": 0.8073, "step": 6130 }, { "epoch": 0.76, "grad_norm": 8.401091952164528, "learning_rate": 1.4072524313294344e-06, "loss": 1.1905, "step": 6131 }, { "epoch": 0.76, "grad_norm": 7.772828270956294, "learning_rate": 1.405851785712794e-06, "loss": 0.8999, "step": 6132 }, { "epoch": 0.76, "grad_norm": 8.162552377607746, "learning_rate": 1.4044517234462313e-06, "loss": 1.1903, "step": 6133 }, { "epoch": 0.76, "grad_norm": 7.217160455061038, "learning_rate": 1.403052244756984e-06, "loss": 0.8031, "step": 6134 }, { "epoch": 0.76, "grad_norm": 8.773274043143209, "learning_rate": 1.4016533498721957e-06, "loss": 1.2553, "step": 6135 }, { "epoch": 0.76, "grad_norm": 7.144354995284264, "learning_rate": 1.4002550390189162e-06, "loss": 1.0683, "step": 6136 }, { "epoch": 0.76, "grad_norm": 6.769383313725188, "learning_rate": 1.398857312424096e-06, "loss": 0.6063, "step": 6137 }, { "epoch": 0.76, "grad_norm": 7.003733110770394, "learning_rate": 1.3974601703145958e-06, "loss": 0.5791, "step": 6138 }, { "epoch": 0.76, "grad_norm": 6.889285749600775, "learning_rate": 1.39606361291718e-06, "loss": 0.7622, "step": 6139 }, { "epoch": 0.76, "grad_norm": 8.025775743733092, "learning_rate": 1.3946676404585158e-06, "loss": 1.0149, "step": 6140 }, { "epoch": 0.76, "grad_norm": 8.248416805579277, "learning_rate": 1.393272253165177e-06, "loss": 1.0494, "step": 6141 }, { "epoch": 0.76, "grad_norm": 7.96536011359978, "learning_rate": 1.3918774512636445e-06, "loss": 1.1027, "step": 6142 }, { "epoch": 0.76, "grad_norm": 7.9170000085903744, "learning_rate": 1.3904832349803011e-06, "loss": 0.942, "step": 6143 }, { "epoch": 0.76, "grad_norm": 8.568419493797812, "learning_rate": 1.3890896045414376e-06, "loss": 0.9763, "step": 6144 }, { "epoch": 0.76, "grad_norm": 7.855550190125075, "learning_rate": 1.3876965601732456e-06, "loss": 0.9457, "step": 6145 }, { "epoch": 0.76, "grad_norm": 7.192532611690134, "learning_rate": 1.3863041021018242e-06, "loss": 0.9102, "step": 6146 }, { "epoch": 0.76, "grad_norm": 7.888458835893654, "learning_rate": 1.3849122305531775e-06, "loss": 0.8656, "step": 6147 }, { "epoch": 0.76, "grad_norm": 7.644407036055543, "learning_rate": 1.3835209457532162e-06, "loss": 0.7437, "step": 6148 }, { "epoch": 0.76, "grad_norm": 8.57235635958339, "learning_rate": 1.3821302479277493e-06, "loss": 1.2423, "step": 6149 }, { "epoch": 0.76, "grad_norm": 8.27157550610635, "learning_rate": 1.380740137302497e-06, "loss": 0.8079, "step": 6150 }, { "epoch": 0.77, "grad_norm": 6.574896645574573, "learning_rate": 1.379350614103082e-06, "loss": 0.85, "step": 6151 }, { "epoch": 0.77, "grad_norm": 9.055019283958325, "learning_rate": 1.3779616785550326e-06, "loss": 1.0011, "step": 6152 }, { "epoch": 0.77, "grad_norm": 7.260345658098952, "learning_rate": 1.3765733308837776e-06, "loss": 0.8994, "step": 6153 }, { "epoch": 0.77, "grad_norm": 7.667866166339653, "learning_rate": 1.3751855713146556e-06, "loss": 0.9395, "step": 6154 }, { "epoch": 0.77, "grad_norm": 6.911594631677139, "learning_rate": 1.373798400072907e-06, "loss": 0.9211, "step": 6155 }, { "epoch": 0.77, "grad_norm": 8.253422940348628, "learning_rate": 1.3724118173836775e-06, "loss": 1.3132, "step": 6156 }, { "epoch": 0.77, "grad_norm": 6.2437820156491375, "learning_rate": 1.3710258234720191e-06, "loss": 0.8121, "step": 6157 }, { "epoch": 0.77, "grad_norm": 7.447571443268703, "learning_rate": 1.3696404185628825e-06, "loss": 1.0633, "step": 6158 }, { "epoch": 0.77, "grad_norm": 5.803457581519982, "learning_rate": 1.368255602881129e-06, "loss": 0.6345, "step": 6159 }, { "epoch": 0.77, "grad_norm": 7.250989659624346, "learning_rate": 1.3668713766515224e-06, "loss": 0.9361, "step": 6160 }, { "epoch": 0.77, "grad_norm": 7.243620066114873, "learning_rate": 1.3654877400987281e-06, "loss": 0.8604, "step": 6161 }, { "epoch": 0.77, "grad_norm": 7.227893035389863, "learning_rate": 1.3641046934473185e-06, "loss": 0.9105, "step": 6162 }, { "epoch": 0.77, "grad_norm": 6.943744498957228, "learning_rate": 1.3627222369217701e-06, "loss": 0.9037, "step": 6163 }, { "epoch": 0.77, "grad_norm": 8.276978924099645, "learning_rate": 1.361340370746464e-06, "loss": 0.8096, "step": 6164 }, { "epoch": 0.77, "grad_norm": 6.282081607147705, "learning_rate": 1.3599590951456832e-06, "loss": 0.7741, "step": 6165 }, { "epoch": 0.77, "grad_norm": 8.103111780840287, "learning_rate": 1.3585784103436195e-06, "loss": 1.0933, "step": 6166 }, { "epoch": 0.77, "grad_norm": 8.152207902537125, "learning_rate": 1.357198316564361e-06, "loss": 0.905, "step": 6167 }, { "epoch": 0.77, "grad_norm": 8.133328264643671, "learning_rate": 1.3558188140319074e-06, "loss": 0.9105, "step": 6168 }, { "epoch": 0.77, "grad_norm": 7.432691712306532, "learning_rate": 1.35443990297016e-06, "loss": 0.8509, "step": 6169 }, { "epoch": 0.77, "grad_norm": 7.998067271940793, "learning_rate": 1.3530615836029215e-06, "loss": 1.2243, "step": 6170 }, { "epoch": 0.77, "grad_norm": 6.458666999615361, "learning_rate": 1.3516838561539019e-06, "loss": 0.9891, "step": 6171 }, { "epoch": 0.77, "grad_norm": 7.964751900003888, "learning_rate": 1.350306720846714e-06, "loss": 0.929, "step": 6172 }, { "epoch": 0.77, "grad_norm": 8.656827152633818, "learning_rate": 1.348930177904874e-06, "loss": 1.0392, "step": 6173 }, { "epoch": 0.77, "grad_norm": 6.457227584888046, "learning_rate": 1.3475542275518033e-06, "loss": 0.613, "step": 6174 }, { "epoch": 0.77, "grad_norm": 8.408712271803447, "learning_rate": 1.3461788700108275e-06, "loss": 1.2832, "step": 6175 }, { "epoch": 0.77, "grad_norm": 7.714623602972508, "learning_rate": 1.3448041055051713e-06, "loss": 1.2229, "step": 6176 }, { "epoch": 0.77, "grad_norm": 5.640994217978003, "learning_rate": 1.3434299342579682e-06, "loss": 0.6838, "step": 6177 }, { "epoch": 0.77, "grad_norm": 7.873274647647572, "learning_rate": 1.342056356492255e-06, "loss": 0.8066, "step": 6178 }, { "epoch": 0.77, "grad_norm": 8.390553889239007, "learning_rate": 1.3406833724309686e-06, "loss": 0.8424, "step": 6179 }, { "epoch": 0.77, "grad_norm": 8.054046522320473, "learning_rate": 1.339310982296953e-06, "loss": 0.767, "step": 6180 }, { "epoch": 0.77, "grad_norm": 7.760940993398398, "learning_rate": 1.3379391863129547e-06, "loss": 0.9273, "step": 6181 }, { "epoch": 0.77, "grad_norm": 9.020588557866962, "learning_rate": 1.3365679847016238e-06, "loss": 1.05, "step": 6182 }, { "epoch": 0.77, "grad_norm": 9.398404937500258, "learning_rate": 1.3351973776855142e-06, "loss": 0.9623, "step": 6183 }, { "epoch": 0.77, "grad_norm": 6.291608743547253, "learning_rate": 1.3338273654870843e-06, "loss": 0.9095, "step": 6184 }, { "epoch": 0.77, "grad_norm": 6.2762425338654495, "learning_rate": 1.332457948328691e-06, "loss": 0.7672, "step": 6185 }, { "epoch": 0.77, "grad_norm": 7.07275781836096, "learning_rate": 1.3310891264326003e-06, "loss": 0.7496, "step": 6186 }, { "epoch": 0.77, "grad_norm": 7.842767045373658, "learning_rate": 1.329720900020981e-06, "loss": 1.0012, "step": 6187 }, { "epoch": 0.77, "grad_norm": 8.029390725385769, "learning_rate": 1.3283532693159008e-06, "loss": 0.9021, "step": 6188 }, { "epoch": 0.77, "grad_norm": 8.897053773514354, "learning_rate": 1.3269862345393353e-06, "loss": 1.0289, "step": 6189 }, { "epoch": 0.77, "grad_norm": 8.367134587993743, "learning_rate": 1.3256197959131616e-06, "loss": 0.9367, "step": 6190 }, { "epoch": 0.77, "grad_norm": 8.161272944312744, "learning_rate": 1.3242539536591603e-06, "loss": 0.9663, "step": 6191 }, { "epoch": 0.77, "grad_norm": 6.26013867368466, "learning_rate": 1.3228887079990155e-06, "loss": 0.6587, "step": 6192 }, { "epoch": 0.77, "grad_norm": 7.40270566214662, "learning_rate": 1.321524059154315e-06, "loss": 1.0611, "step": 6193 }, { "epoch": 0.77, "grad_norm": 7.968955735236427, "learning_rate": 1.320160007346546e-06, "loss": 0.8246, "step": 6194 }, { "epoch": 0.77, "grad_norm": 8.216189948624852, "learning_rate": 1.318796552797103e-06, "loss": 0.6581, "step": 6195 }, { "epoch": 0.77, "grad_norm": 7.470727932278073, "learning_rate": 1.3174336957272843e-06, "loss": 1.0087, "step": 6196 }, { "epoch": 0.77, "grad_norm": 8.24922617731567, "learning_rate": 1.3160714363582855e-06, "loss": 1.2064, "step": 6197 }, { "epoch": 0.77, "grad_norm": 7.95157296228788, "learning_rate": 1.3147097749112103e-06, "loss": 0.7644, "step": 6198 }, { "epoch": 0.77, "grad_norm": 6.500051205901057, "learning_rate": 1.3133487116070643e-06, "loss": 0.7213, "step": 6199 }, { "epoch": 0.77, "grad_norm": 8.393440454147578, "learning_rate": 1.3119882466667556e-06, "loss": 1.083, "step": 6200 }, { "epoch": 0.77, "grad_norm": 6.426500455557489, "learning_rate": 1.3106283803110964e-06, "loss": 0.5654, "step": 6201 }, { "epoch": 0.77, "grad_norm": 6.992641437537432, "learning_rate": 1.309269112760797e-06, "loss": 0.6222, "step": 6202 }, { "epoch": 0.77, "grad_norm": 12.983796012957354, "learning_rate": 1.3079104442364766e-06, "loss": 0.9458, "step": 6203 }, { "epoch": 0.77, "grad_norm": 6.668371478585894, "learning_rate": 1.306552374958654e-06, "loss": 0.6275, "step": 6204 }, { "epoch": 0.77, "grad_norm": 10.595120740600684, "learning_rate": 1.3051949051477525e-06, "loss": 0.9879, "step": 6205 }, { "epoch": 0.77, "grad_norm": 7.1741339151831784, "learning_rate": 1.3038380350240948e-06, "loss": 0.9013, "step": 6206 }, { "epoch": 0.77, "grad_norm": 5.86503178110639, "learning_rate": 1.302481764807909e-06, "loss": 0.5902, "step": 6207 }, { "epoch": 0.77, "grad_norm": 7.913236464312624, "learning_rate": 1.3011260947193261e-06, "loss": 1.2727, "step": 6208 }, { "epoch": 0.77, "grad_norm": 8.439435285043254, "learning_rate": 1.2997710249783785e-06, "loss": 1.323, "step": 6209 }, { "epoch": 0.77, "grad_norm": 7.447835268148365, "learning_rate": 1.2984165558050027e-06, "loss": 1.0729, "step": 6210 }, { "epoch": 0.77, "grad_norm": 8.132992568698551, "learning_rate": 1.2970626874190334e-06, "loss": 1.1316, "step": 6211 }, { "epoch": 0.77, "grad_norm": 7.535639056090623, "learning_rate": 1.2957094200402131e-06, "loss": 1.0128, "step": 6212 }, { "epoch": 0.77, "grad_norm": 5.962875353447242, "learning_rate": 1.2943567538881841e-06, "loss": 0.8298, "step": 6213 }, { "epoch": 0.77, "grad_norm": 6.5946056083182905, "learning_rate": 1.2930046891824932e-06, "loss": 0.9034, "step": 6214 }, { "epoch": 0.77, "grad_norm": 8.20894395559542, "learning_rate": 1.2916532261425847e-06, "loss": 0.9863, "step": 6215 }, { "epoch": 0.77, "grad_norm": 6.4568356104679525, "learning_rate": 1.2903023649878105e-06, "loss": 0.5318, "step": 6216 }, { "epoch": 0.77, "grad_norm": 7.370037696445746, "learning_rate": 1.2889521059374237e-06, "loss": 0.9808, "step": 6217 }, { "epoch": 0.77, "grad_norm": 6.949447104255821, "learning_rate": 1.287602449210576e-06, "loss": 1.1241, "step": 6218 }, { "epoch": 0.77, "grad_norm": 9.016147475503391, "learning_rate": 1.286253395026326e-06, "loss": 0.9872, "step": 6219 }, { "epoch": 0.77, "grad_norm": 7.2365575027470275, "learning_rate": 1.2849049436036325e-06, "loss": 0.8092, "step": 6220 }, { "epoch": 0.77, "grad_norm": 8.466013281002079, "learning_rate": 1.2835570951613568e-06, "loss": 1.4466, "step": 6221 }, { "epoch": 0.77, "grad_norm": 8.785436065463053, "learning_rate": 1.2822098499182612e-06, "loss": 1.0706, "step": 6222 }, { "epoch": 0.77, "grad_norm": 8.242624661132842, "learning_rate": 1.2808632080930134e-06, "loss": 0.9966, "step": 6223 }, { "epoch": 0.77, "grad_norm": 8.494372315812152, "learning_rate": 1.2795171699041775e-06, "loss": 1.0456, "step": 6224 }, { "epoch": 0.77, "grad_norm": 7.45116039093327, "learning_rate": 1.2781717355702244e-06, "loss": 1.2438, "step": 6225 }, { "epoch": 0.77, "grad_norm": 8.7123817859857, "learning_rate": 1.2768269053095273e-06, "loss": 1.1004, "step": 6226 }, { "epoch": 0.77, "grad_norm": 7.925009446134832, "learning_rate": 1.2754826793403563e-06, "loss": 1.1071, "step": 6227 }, { "epoch": 0.77, "grad_norm": 7.682875059350042, "learning_rate": 1.2741390578808881e-06, "loss": 0.9632, "step": 6228 }, { "epoch": 0.77, "grad_norm": 6.681243461950727, "learning_rate": 1.2727960411492007e-06, "loss": 0.682, "step": 6229 }, { "epoch": 0.77, "grad_norm": 8.596685605856631, "learning_rate": 1.2714536293632717e-06, "loss": 1.0437, "step": 6230 }, { "epoch": 0.78, "grad_norm": 7.947165723741571, "learning_rate": 1.2701118227409832e-06, "loss": 1.0491, "step": 6231 }, { "epoch": 0.78, "grad_norm": 7.373516272371008, "learning_rate": 1.2687706215001184e-06, "loss": 0.9445, "step": 6232 }, { "epoch": 0.78, "grad_norm": 7.378728811790026, "learning_rate": 1.267430025858359e-06, "loss": 0.9804, "step": 6233 }, { "epoch": 0.78, "grad_norm": 7.8213824735075095, "learning_rate": 1.2660900360332927e-06, "loss": 1.0345, "step": 6234 }, { "epoch": 0.78, "grad_norm": 9.40852204035573, "learning_rate": 1.2647506522424086e-06, "loss": 1.1542, "step": 6235 }, { "epoch": 0.78, "grad_norm": 8.086521097890596, "learning_rate": 1.2634118747030932e-06, "loss": 1.0694, "step": 6236 }, { "epoch": 0.78, "grad_norm": 7.631564122336955, "learning_rate": 1.2620737036326391e-06, "loss": 0.868, "step": 6237 }, { "epoch": 0.78, "grad_norm": 6.886060697801457, "learning_rate": 1.2607361392482386e-06, "loss": 0.7826, "step": 6238 }, { "epoch": 0.78, "grad_norm": 7.294774532696786, "learning_rate": 1.2593991817669859e-06, "loss": 0.9166, "step": 6239 }, { "epoch": 0.78, "grad_norm": 5.9439539774414625, "learning_rate": 1.2580628314058762e-06, "loss": 0.608, "step": 6240 }, { "epoch": 0.78, "grad_norm": 7.365672221674175, "learning_rate": 1.256727088381809e-06, "loss": 0.9143, "step": 6241 }, { "epoch": 0.78, "grad_norm": 6.378250424803412, "learning_rate": 1.2553919529115782e-06, "loss": 0.5768, "step": 6242 }, { "epoch": 0.78, "grad_norm": 6.496576628197443, "learning_rate": 1.2540574252118864e-06, "loss": 0.7107, "step": 6243 }, { "epoch": 0.78, "grad_norm": 7.671450074933941, "learning_rate": 1.2527235054993364e-06, "loss": 0.8244, "step": 6244 }, { "epoch": 0.78, "grad_norm": 5.956299577963251, "learning_rate": 1.2513901939904266e-06, "loss": 0.75, "step": 6245 }, { "epoch": 0.78, "grad_norm": 7.722087669704942, "learning_rate": 1.2500574909015633e-06, "loss": 0.9234, "step": 6246 }, { "epoch": 0.78, "grad_norm": 9.05913755963554, "learning_rate": 1.2487253964490514e-06, "loss": 1.0751, "step": 6247 }, { "epoch": 0.78, "grad_norm": 3.8700058267209214, "learning_rate": 1.2473939108490974e-06, "loss": 0.3115, "step": 6248 }, { "epoch": 0.78, "grad_norm": 8.30778400151884, "learning_rate": 1.2460630343178076e-06, "loss": 1.3445, "step": 6249 }, { "epoch": 0.78, "grad_norm": 5.983833931808254, "learning_rate": 1.2447327670711934e-06, "loss": 0.8768, "step": 6250 }, { "epoch": 0.78, "grad_norm": 7.182194699185446, "learning_rate": 1.2434031093251609e-06, "loss": 0.6989, "step": 6251 }, { "epoch": 0.78, "grad_norm": 7.216817228228401, "learning_rate": 1.2420740612955223e-06, "loss": 0.7244, "step": 6252 }, { "epoch": 0.78, "grad_norm": 11.969836719369418, "learning_rate": 1.2407456231979915e-06, "loss": 1.0498, "step": 6253 }, { "epoch": 0.78, "grad_norm": 6.786724302504079, "learning_rate": 1.2394177952481773e-06, "loss": 0.9938, "step": 6254 }, { "epoch": 0.78, "grad_norm": 8.24924952969541, "learning_rate": 1.238090577661596e-06, "loss": 1.3867, "step": 6255 }, { "epoch": 0.78, "grad_norm": 7.854293864052789, "learning_rate": 1.2367639706536622e-06, "loss": 0.7919, "step": 6256 }, { "epoch": 0.78, "grad_norm": 7.197829870116718, "learning_rate": 1.235437974439691e-06, "loss": 1.0511, "step": 6257 }, { "epoch": 0.78, "grad_norm": 8.906739834808663, "learning_rate": 1.2341125892349009e-06, "loss": 0.9241, "step": 6258 }, { "epoch": 0.78, "grad_norm": 7.421475559191355, "learning_rate": 1.232787815254406e-06, "loss": 0.9664, "step": 6259 }, { "epoch": 0.78, "grad_norm": 8.84134090434974, "learning_rate": 1.2314636527132261e-06, "loss": 1.0099, "step": 6260 }, { "epoch": 0.78, "grad_norm": 8.019004087833288, "learning_rate": 1.2301401018262798e-06, "loss": 1.1179, "step": 6261 }, { "epoch": 0.78, "grad_norm": 9.364796426461039, "learning_rate": 1.2288171628083883e-06, "loss": 1.2498, "step": 6262 }, { "epoch": 0.78, "grad_norm": 6.376683704796591, "learning_rate": 1.227494835874269e-06, "loss": 0.9905, "step": 6263 }, { "epoch": 0.78, "grad_norm": 7.477842849263771, "learning_rate": 1.2261731212385442e-06, "loss": 0.8281, "step": 6264 }, { "epoch": 0.78, "grad_norm": 7.416198379427804, "learning_rate": 1.2248520191157359e-06, "loss": 0.8871, "step": 6265 }, { "epoch": 0.78, "grad_norm": 7.0215570783793995, "learning_rate": 1.2235315297202655e-06, "loss": 1.1916, "step": 6266 }, { "epoch": 0.78, "grad_norm": 7.269813598948592, "learning_rate": 1.2222116532664579e-06, "loss": 1.1601, "step": 6267 }, { "epoch": 0.78, "grad_norm": 6.565821640523772, "learning_rate": 1.2208923899685331e-06, "loss": 0.7147, "step": 6268 }, { "epoch": 0.78, "grad_norm": 7.816767050075859, "learning_rate": 1.2195737400406165e-06, "loss": 0.8722, "step": 6269 }, { "epoch": 0.78, "grad_norm": 7.514348691342447, "learning_rate": 1.2182557036967318e-06, "loss": 0.8332, "step": 6270 }, { "epoch": 0.78, "grad_norm": 6.697778480044468, "learning_rate": 1.2169382811508051e-06, "loss": 1.0324, "step": 6271 }, { "epoch": 0.78, "grad_norm": 7.652185898208494, "learning_rate": 1.2156214726166589e-06, "loss": 0.806, "step": 6272 }, { "epoch": 0.78, "grad_norm": 7.285931537053844, "learning_rate": 1.2143052783080196e-06, "loss": 0.776, "step": 6273 }, { "epoch": 0.78, "grad_norm": 7.852454899837336, "learning_rate": 1.2129896984385142e-06, "loss": 1.2154, "step": 6274 }, { "epoch": 0.78, "grad_norm": 6.97459452137965, "learning_rate": 1.211674733221666e-06, "loss": 0.7615, "step": 6275 }, { "epoch": 0.78, "grad_norm": 7.390460676085216, "learning_rate": 1.210360382870902e-06, "loss": 0.9587, "step": 6276 }, { "epoch": 0.78, "grad_norm": 7.653829360641308, "learning_rate": 1.2090466475995493e-06, "loss": 0.9419, "step": 6277 }, { "epoch": 0.78, "grad_norm": 8.819024460636658, "learning_rate": 1.2077335276208335e-06, "loss": 0.9738, "step": 6278 }, { "epoch": 0.78, "grad_norm": 6.448630063842917, "learning_rate": 1.2064210231478817e-06, "loss": 0.6304, "step": 6279 }, { "epoch": 0.78, "grad_norm": 7.5585341228187595, "learning_rate": 1.205109134393722e-06, "loss": 0.8615, "step": 6280 }, { "epoch": 0.78, "grad_norm": 8.806578235321412, "learning_rate": 1.2037978615712786e-06, "loss": 1.0768, "step": 6281 }, { "epoch": 0.78, "grad_norm": 9.225333910777326, "learning_rate": 1.2024872048933789e-06, "loss": 1.0153, "step": 6282 }, { "epoch": 0.78, "grad_norm": 6.176350427411344, "learning_rate": 1.201177164572752e-06, "loss": 0.7481, "step": 6283 }, { "epoch": 0.78, "grad_norm": 7.05776860212822, "learning_rate": 1.1998677408220216e-06, "loss": 1.15, "step": 6284 }, { "epoch": 0.78, "grad_norm": 8.82835103691083, "learning_rate": 1.1985589338537157e-06, "loss": 1.0836, "step": 6285 }, { "epoch": 0.78, "grad_norm": 7.9345148665088745, "learning_rate": 1.1972507438802605e-06, "loss": 0.988, "step": 6286 }, { "epoch": 0.78, "grad_norm": 7.446732939661458, "learning_rate": 1.195943171113983e-06, "loss": 1.0246, "step": 6287 }, { "epoch": 0.78, "grad_norm": 7.912988378853286, "learning_rate": 1.1946362157671094e-06, "loss": 1.1502, "step": 6288 }, { "epoch": 0.78, "grad_norm": 7.196702831583766, "learning_rate": 1.193329878051767e-06, "loss": 0.9557, "step": 6289 }, { "epoch": 0.78, "grad_norm": 7.021354734576192, "learning_rate": 1.192024158179979e-06, "loss": 0.8666, "step": 6290 }, { "epoch": 0.78, "grad_norm": 8.493340761391453, "learning_rate": 1.1907190563636723e-06, "loss": 1.228, "step": 6291 }, { "epoch": 0.78, "grad_norm": 7.665044083471569, "learning_rate": 1.1894145728146732e-06, "loss": 0.8101, "step": 6292 }, { "epoch": 0.78, "grad_norm": 7.446294099868211, "learning_rate": 1.1881107077447047e-06, "loss": 0.9245, "step": 6293 }, { "epoch": 0.78, "grad_norm": 7.992592921763523, "learning_rate": 1.1868074613653912e-06, "loss": 0.9094, "step": 6294 }, { "epoch": 0.78, "grad_norm": 8.393808897452624, "learning_rate": 1.185504833888258e-06, "loss": 0.9203, "step": 6295 }, { "epoch": 0.78, "grad_norm": 7.276059184554427, "learning_rate": 1.1842028255247278e-06, "loss": 0.9149, "step": 6296 }, { "epoch": 0.78, "grad_norm": 8.792419998647132, "learning_rate": 1.1829014364861252e-06, "loss": 1.1928, "step": 6297 }, { "epoch": 0.78, "grad_norm": 7.99850703448294, "learning_rate": 1.1816006669836726e-06, "loss": 0.9291, "step": 6298 }, { "epoch": 0.78, "grad_norm": 6.652366735333053, "learning_rate": 1.18030051722849e-06, "loss": 0.9273, "step": 6299 }, { "epoch": 0.78, "grad_norm": 6.9060911130776885, "learning_rate": 1.1790009874316005e-06, "loss": 0.871, "step": 6300 }, { "epoch": 0.78, "grad_norm": 6.276666662310294, "learning_rate": 1.1777020778039255e-06, "loss": 0.5081, "step": 6301 }, { "epoch": 0.78, "grad_norm": 7.828689154635954, "learning_rate": 1.1764037885562834e-06, "loss": 0.9767, "step": 6302 }, { "epoch": 0.78, "grad_norm": 10.104212531015891, "learning_rate": 1.1751061198993947e-06, "loss": 1.0962, "step": 6303 }, { "epoch": 0.78, "grad_norm": 8.807891438183564, "learning_rate": 1.1738090720438782e-06, "loss": 0.9226, "step": 6304 }, { "epoch": 0.78, "grad_norm": 7.589248055700643, "learning_rate": 1.1725126452002517e-06, "loss": 0.8622, "step": 6305 }, { "epoch": 0.78, "grad_norm": 6.800666652678707, "learning_rate": 1.1712168395789325e-06, "loss": 0.7576, "step": 6306 }, { "epoch": 0.78, "grad_norm": 5.9764326192205175, "learning_rate": 1.1699216553902388e-06, "loss": 0.8714, "step": 6307 }, { "epoch": 0.78, "grad_norm": 9.021018998703692, "learning_rate": 1.1686270928443832e-06, "loss": 1.2646, "step": 6308 }, { "epoch": 0.78, "grad_norm": 9.378929917172748, "learning_rate": 1.1673331521514813e-06, "loss": 0.8123, "step": 6309 }, { "epoch": 0.78, "grad_norm": 7.458058392140768, "learning_rate": 1.1660398335215484e-06, "loss": 0.9312, "step": 6310 }, { "epoch": 0.78, "grad_norm": 7.2497623115024865, "learning_rate": 1.1647471371644943e-06, "loss": 1.0232, "step": 6311 }, { "epoch": 0.79, "grad_norm": 5.939938874100004, "learning_rate": 1.1634550632901326e-06, "loss": 0.5736, "step": 6312 }, { "epoch": 0.79, "grad_norm": 7.97079639589032, "learning_rate": 1.1621636121081742e-06, "loss": 1.2274, "step": 6313 }, { "epoch": 0.79, "grad_norm": 7.303949846798132, "learning_rate": 1.160872783828228e-06, "loss": 0.5504, "step": 6314 }, { "epoch": 0.79, "grad_norm": 8.426429387141171, "learning_rate": 1.1595825786598041e-06, "loss": 1.1828, "step": 6315 }, { "epoch": 0.79, "grad_norm": 8.11702456601074, "learning_rate": 1.1582929968123073e-06, "loss": 1.0598, "step": 6316 }, { "epoch": 0.79, "grad_norm": 7.948383736950368, "learning_rate": 1.157004038495046e-06, "loss": 1.0617, "step": 6317 }, { "epoch": 0.79, "grad_norm": 8.34065048813838, "learning_rate": 1.155715703917224e-06, "loss": 1.1046, "step": 6318 }, { "epoch": 0.79, "grad_norm": 7.673316168097314, "learning_rate": 1.154427993287947e-06, "loss": 0.8407, "step": 6319 }, { "epoch": 0.79, "grad_norm": 6.604305730078402, "learning_rate": 1.1531409068162147e-06, "loss": 0.8617, "step": 6320 }, { "epoch": 0.79, "grad_norm": 13.594585177000438, "learning_rate": 1.1518544447109298e-06, "loss": 1.2345, "step": 6321 }, { "epoch": 0.79, "grad_norm": 5.457813847267186, "learning_rate": 1.150568607180892e-06, "loss": 0.7738, "step": 6322 }, { "epoch": 0.79, "grad_norm": 8.133545174942466, "learning_rate": 1.1492833944348003e-06, "loss": 1.179, "step": 6323 }, { "epoch": 0.79, "grad_norm": 8.368821124620839, "learning_rate": 1.1479988066812531e-06, "loss": 1.1502, "step": 6324 }, { "epoch": 0.79, "grad_norm": 8.401185053174101, "learning_rate": 1.1467148441287423e-06, "loss": 1.2421, "step": 6325 }, { "epoch": 0.79, "grad_norm": 7.271102320790227, "learning_rate": 1.1454315069856654e-06, "loss": 1.1334, "step": 6326 }, { "epoch": 0.79, "grad_norm": 7.45969443444666, "learning_rate": 1.1441487954603131e-06, "loss": 0.8049, "step": 6327 }, { "epoch": 0.79, "grad_norm": 8.576263658478062, "learning_rate": 1.1428667097608793e-06, "loss": 0.8767, "step": 6328 }, { "epoch": 0.79, "grad_norm": 7.84257280202563, "learning_rate": 1.1415852500954499e-06, "loss": 0.7386, "step": 6329 }, { "epoch": 0.79, "grad_norm": 8.14851764609486, "learning_rate": 1.140304416672015e-06, "loss": 1.2298, "step": 6330 }, { "epoch": 0.79, "grad_norm": 7.564070833947921, "learning_rate": 1.1390242096984622e-06, "loss": 0.609, "step": 6331 }, { "epoch": 0.79, "grad_norm": 6.422508978760736, "learning_rate": 1.1377446293825717e-06, "loss": 0.6487, "step": 6332 }, { "epoch": 0.79, "grad_norm": 7.212336705363411, "learning_rate": 1.136465675932032e-06, "loss": 0.9131, "step": 6333 }, { "epoch": 0.79, "grad_norm": 6.892013582694894, "learning_rate": 1.1351873495544202e-06, "loss": 0.9098, "step": 6334 }, { "epoch": 0.79, "grad_norm": 6.810014526808766, "learning_rate": 1.1339096504572173e-06, "loss": 1.0327, "step": 6335 }, { "epoch": 0.79, "grad_norm": 6.590354385481652, "learning_rate": 1.1326325788478003e-06, "loss": 0.8374, "step": 6336 }, { "epoch": 0.79, "grad_norm": 7.223317896485059, "learning_rate": 1.1313561349334472e-06, "loss": 0.8708, "step": 6337 }, { "epoch": 0.79, "grad_norm": 6.820275526961284, "learning_rate": 1.130080318921329e-06, "loss": 1.0493, "step": 6338 }, { "epoch": 0.79, "grad_norm": 9.07717456695718, "learning_rate": 1.1288051310185182e-06, "loss": 1.0444, "step": 6339 }, { "epoch": 0.79, "grad_norm": 6.236291998078365, "learning_rate": 1.1275305714319866e-06, "loss": 0.7417, "step": 6340 }, { "epoch": 0.79, "grad_norm": 7.841216676454368, "learning_rate": 1.1262566403686003e-06, "loss": 0.9865, "step": 6341 }, { "epoch": 0.79, "grad_norm": 7.877582245370671, "learning_rate": 1.124983338035126e-06, "loss": 0.9447, "step": 6342 }, { "epoch": 0.79, "grad_norm": 7.780568970817361, "learning_rate": 1.123710664638228e-06, "loss": 0.8848, "step": 6343 }, { "epoch": 0.79, "grad_norm": 5.950068361267421, "learning_rate": 1.1224386203844674e-06, "loss": 0.9417, "step": 6344 }, { "epoch": 0.79, "grad_norm": 7.4810229403480815, "learning_rate": 1.1211672054803053e-06, "loss": 1.1228, "step": 6345 }, { "epoch": 0.79, "grad_norm": 7.479065802386136, "learning_rate": 1.1198964201320994e-06, "loss": 1.0322, "step": 6346 }, { "epoch": 0.79, "grad_norm": 8.356450829647047, "learning_rate": 1.1186262645461033e-06, "loss": 1.2493, "step": 6347 }, { "epoch": 0.79, "grad_norm": 6.644360251940601, "learning_rate": 1.1173567389284712e-06, "loss": 0.9668, "step": 6348 }, { "epoch": 0.79, "grad_norm": 7.690615341339174, "learning_rate": 1.1160878434852557e-06, "loss": 0.8688, "step": 6349 }, { "epoch": 0.79, "grad_norm": 8.331619744794402, "learning_rate": 1.114819578422403e-06, "loss": 1.1745, "step": 6350 }, { "epoch": 0.79, "grad_norm": 7.149129977945347, "learning_rate": 1.1135519439457604e-06, "loss": 0.7543, "step": 6351 }, { "epoch": 0.79, "grad_norm": 7.524898761130065, "learning_rate": 1.1122849402610724e-06, "loss": 0.8526, "step": 6352 }, { "epoch": 0.79, "grad_norm": 6.975988676396395, "learning_rate": 1.1110185675739804e-06, "loss": 0.5415, "step": 6353 }, { "epoch": 0.79, "grad_norm": 7.95631238947218, "learning_rate": 1.1097528260900236e-06, "loss": 1.1613, "step": 6354 }, { "epoch": 0.79, "grad_norm": 5.765350978516115, "learning_rate": 1.1084877160146407e-06, "loss": 0.596, "step": 6355 }, { "epoch": 0.79, "grad_norm": 7.217204604785241, "learning_rate": 1.1072232375531628e-06, "loss": 0.8587, "step": 6356 }, { "epoch": 0.79, "grad_norm": 6.352096207150414, "learning_rate": 1.105959390910823e-06, "loss": 0.6216, "step": 6357 }, { "epoch": 0.79, "grad_norm": 7.4629623968085435, "learning_rate": 1.1046961762927527e-06, "loss": 1.035, "step": 6358 }, { "epoch": 0.79, "grad_norm": 7.056137959099582, "learning_rate": 1.1034335939039748e-06, "loss": 0.8566, "step": 6359 }, { "epoch": 0.79, "grad_norm": 5.590095553462641, "learning_rate": 1.1021716439494157e-06, "loss": 0.5059, "step": 6360 }, { "epoch": 0.79, "grad_norm": 7.578463162750346, "learning_rate": 1.100910326633896e-06, "loss": 1.0043, "step": 6361 }, { "epoch": 0.79, "grad_norm": 7.71159439689919, "learning_rate": 1.0996496421621356e-06, "loss": 0.8307, "step": 6362 }, { "epoch": 0.79, "grad_norm": 7.881750321764691, "learning_rate": 1.0983895907387494e-06, "loss": 0.7147, "step": 6363 }, { "epoch": 0.79, "grad_norm": 7.701275520730476, "learning_rate": 1.0971301725682526e-06, "loss": 1.0007, "step": 6364 }, { "epoch": 0.79, "grad_norm": 7.230300905355169, "learning_rate": 1.0958713878550531e-06, "loss": 0.8979, "step": 6365 }, { "epoch": 0.79, "grad_norm": 6.412617435814691, "learning_rate": 1.0946132368034595e-06, "loss": 0.6517, "step": 6366 }, { "epoch": 0.79, "grad_norm": 7.811877700759981, "learning_rate": 1.093355719617678e-06, "loss": 1.0406, "step": 6367 }, { "epoch": 0.79, "grad_norm": 7.63466606548504, "learning_rate": 1.0920988365018087e-06, "loss": 0.717, "step": 6368 }, { "epoch": 0.79, "grad_norm": 7.47260198112607, "learning_rate": 1.0908425876598512e-06, "loss": 0.9742, "step": 6369 }, { "epoch": 0.79, "grad_norm": 5.988147188463314, "learning_rate": 1.0895869732957021e-06, "loss": 0.7498, "step": 6370 }, { "epoch": 0.79, "grad_norm": 8.14433000810367, "learning_rate": 1.0883319936131542e-06, "loss": 1.083, "step": 6371 }, { "epoch": 0.79, "grad_norm": 7.427051608260936, "learning_rate": 1.0870776488158991e-06, "loss": 1.0342, "step": 6372 }, { "epoch": 0.79, "grad_norm": 7.849027582283282, "learning_rate": 1.0858239391075208e-06, "loss": 0.9149, "step": 6373 }, { "epoch": 0.79, "grad_norm": 8.479728765325678, "learning_rate": 1.0845708646915054e-06, "loss": 1.2053, "step": 6374 }, { "epoch": 0.79, "grad_norm": 7.75901570853853, "learning_rate": 1.0833184257712331e-06, "loss": 0.9269, "step": 6375 }, { "epoch": 0.79, "grad_norm": 7.299618160974475, "learning_rate": 1.0820666225499838e-06, "loss": 1.1922, "step": 6376 }, { "epoch": 0.79, "grad_norm": 7.6999746537128635, "learning_rate": 1.0808154552309286e-06, "loss": 0.9642, "step": 6377 }, { "epoch": 0.79, "grad_norm": 6.553601269760744, "learning_rate": 1.0795649240171402e-06, "loss": 0.7726, "step": 6378 }, { "epoch": 0.79, "grad_norm": 5.960400063316856, "learning_rate": 1.0783150291115874e-06, "loss": 0.6677, "step": 6379 }, { "epoch": 0.79, "grad_norm": 9.211264349481441, "learning_rate": 1.0770657707171345e-06, "loss": 0.9318, "step": 6380 }, { "epoch": 0.79, "grad_norm": 7.989916210561004, "learning_rate": 1.0758171490365444e-06, "loss": 1.1796, "step": 6381 }, { "epoch": 0.79, "grad_norm": 6.805734802846645, "learning_rate": 1.0745691642724725e-06, "loss": 0.7332, "step": 6382 }, { "epoch": 0.79, "grad_norm": 11.171393862382658, "learning_rate": 1.0733218166274756e-06, "loss": 0.7589, "step": 6383 }, { "epoch": 0.79, "grad_norm": 7.6614642078676765, "learning_rate": 1.0720751063040041e-06, "loss": 0.8497, "step": 6384 }, { "epoch": 0.79, "grad_norm": 7.56362604291725, "learning_rate": 1.070829033504408e-06, "loss": 0.9713, "step": 6385 }, { "epoch": 0.79, "grad_norm": 9.03683113810709, "learning_rate": 1.0695835984309294e-06, "loss": 1.1701, "step": 6386 }, { "epoch": 0.79, "grad_norm": 8.954415682479397, "learning_rate": 1.06833880128571e-06, "loss": 1.2141, "step": 6387 }, { "epoch": 0.79, "grad_norm": 7.815535685902482, "learning_rate": 1.0670946422707883e-06, "loss": 1.1284, "step": 6388 }, { "epoch": 0.79, "grad_norm": 7.103690610335312, "learning_rate": 1.065851121588095e-06, "loss": 1.1343, "step": 6389 }, { "epoch": 0.79, "grad_norm": 7.906227097212068, "learning_rate": 1.0646082394394653e-06, "loss": 1.4577, "step": 6390 }, { "epoch": 0.79, "grad_norm": 5.661260079593615, "learning_rate": 1.0633659960266218e-06, "loss": 0.6931, "step": 6391 }, { "epoch": 0.8, "grad_norm": 7.871755375275243, "learning_rate": 1.0621243915511887e-06, "loss": 0.6543, "step": 6392 }, { "epoch": 0.8, "grad_norm": 18.6639899470768, "learning_rate": 1.0608834262146855e-06, "loss": 0.685, "step": 6393 }, { "epoch": 0.8, "grad_norm": 7.026072030742321, "learning_rate": 1.0596431002185293e-06, "loss": 0.5911, "step": 6394 }, { "epoch": 0.8, "grad_norm": 8.08255644519032, "learning_rate": 1.058403413764028e-06, "loss": 1.0077, "step": 6395 }, { "epoch": 0.8, "grad_norm": 8.054352302145606, "learning_rate": 1.0571643670523924e-06, "loss": 0.842, "step": 6396 }, { "epoch": 0.8, "grad_norm": 8.23882145055319, "learning_rate": 1.0559259602847272e-06, "loss": 0.9145, "step": 6397 }, { "epoch": 0.8, "grad_norm": 6.881060761447533, "learning_rate": 1.0546881936620296e-06, "loss": 1.1989, "step": 6398 }, { "epoch": 0.8, "grad_norm": 7.66079938775639, "learning_rate": 1.0534510673851984e-06, "loss": 1.2752, "step": 6399 }, { "epoch": 0.8, "grad_norm": 7.313707371597091, "learning_rate": 1.0522145816550249e-06, "loss": 1.0706, "step": 6400 }, { "epoch": 0.8, "grad_norm": 6.932292138233072, "learning_rate": 1.050978736672198e-06, "loss": 0.9937, "step": 6401 }, { "epoch": 0.8, "grad_norm": 7.988256080658134, "learning_rate": 1.0497435326373023e-06, "loss": 0.927, "step": 6402 }, { "epoch": 0.8, "grad_norm": 9.518059185584317, "learning_rate": 1.0485089697508194e-06, "loss": 1.1884, "step": 6403 }, { "epoch": 0.8, "grad_norm": 8.022314501702379, "learning_rate": 1.0472750482131228e-06, "loss": 0.8425, "step": 6404 }, { "epoch": 0.8, "grad_norm": 6.0379102193316685, "learning_rate": 1.046041768224486e-06, "loss": 0.4983, "step": 6405 }, { "epoch": 0.8, "grad_norm": 7.463352301005753, "learning_rate": 1.0448091299850793e-06, "loss": 1.0091, "step": 6406 }, { "epoch": 0.8, "grad_norm": 7.393622728724438, "learning_rate": 1.0435771336949634e-06, "loss": 0.9358, "step": 6407 }, { "epoch": 0.8, "grad_norm": 8.628875946426144, "learning_rate": 1.0423457795540998e-06, "loss": 0.7433, "step": 6408 }, { "epoch": 0.8, "grad_norm": 6.634195798242417, "learning_rate": 1.0411150677623438e-06, "loss": 0.6313, "step": 6409 }, { "epoch": 0.8, "grad_norm": 7.133445511875645, "learning_rate": 1.0398849985194465e-06, "loss": 0.6898, "step": 6410 }, { "epoch": 0.8, "grad_norm": 7.739859482196719, "learning_rate": 1.0386555720250547e-06, "loss": 1.0276, "step": 6411 }, { "epoch": 0.8, "grad_norm": 7.2690036357291286, "learning_rate": 1.0374267884787138e-06, "loss": 0.9333, "step": 6412 }, { "epoch": 0.8, "grad_norm": 7.531078642665635, "learning_rate": 1.0361986480798575e-06, "loss": 0.6384, "step": 6413 }, { "epoch": 0.8, "grad_norm": 7.2534194694830205, "learning_rate": 1.0349711510278226e-06, "loss": 1.2042, "step": 6414 }, { "epoch": 0.8, "grad_norm": 6.642378483311148, "learning_rate": 1.0337442975218399e-06, "loss": 0.7275, "step": 6415 }, { "epoch": 0.8, "grad_norm": 7.288284535329324, "learning_rate": 1.0325180877610313e-06, "loss": 1.0048, "step": 6416 }, { "epoch": 0.8, "grad_norm": 8.290051863059462, "learning_rate": 1.0312925219444187e-06, "loss": 0.9168, "step": 6417 }, { "epoch": 0.8, "grad_norm": 6.509675510903243, "learning_rate": 1.030067600270918e-06, "loss": 1.006, "step": 6418 }, { "epoch": 0.8, "grad_norm": 7.307053999717512, "learning_rate": 1.0288433229393418e-06, "loss": 0.8028, "step": 6419 }, { "epoch": 0.8, "grad_norm": 6.720639116983523, "learning_rate": 1.027619690148396e-06, "loss": 0.8572, "step": 6420 }, { "epoch": 0.8, "grad_norm": 7.029984515441714, "learning_rate": 1.0263967020966847e-06, "loss": 0.8607, "step": 6421 }, { "epoch": 0.8, "grad_norm": 7.1063278467384405, "learning_rate": 1.025174358982703e-06, "loss": 0.752, "step": 6422 }, { "epoch": 0.8, "grad_norm": 7.972174669479994, "learning_rate": 1.023952661004845e-06, "loss": 0.7993, "step": 6423 }, { "epoch": 0.8, "grad_norm": 9.687981653777959, "learning_rate": 1.0227316083614009e-06, "loss": 0.8619, "step": 6424 }, { "epoch": 0.8, "grad_norm": 6.222966203179098, "learning_rate": 1.0215112012505512e-06, "loss": 1.0131, "step": 6425 }, { "epoch": 0.8, "grad_norm": 7.841133993100777, "learning_rate": 1.0202914398703757e-06, "loss": 1.0164, "step": 6426 }, { "epoch": 0.8, "grad_norm": 7.075635527323229, "learning_rate": 1.0190723244188488e-06, "loss": 0.7109, "step": 6427 }, { "epoch": 0.8, "grad_norm": 7.010558162392601, "learning_rate": 1.0178538550938398e-06, "loss": 0.9314, "step": 6428 }, { "epoch": 0.8, "grad_norm": 6.913754730937058, "learning_rate": 1.016636032093114e-06, "loss": 0.7086, "step": 6429 }, { "epoch": 0.8, "grad_norm": 8.403076830318271, "learning_rate": 1.0154188556143286e-06, "loss": 1.2637, "step": 6430 }, { "epoch": 0.8, "grad_norm": 7.784604432308379, "learning_rate": 1.0142023258550387e-06, "loss": 0.8088, "step": 6431 }, { "epoch": 0.8, "grad_norm": 6.9617665659623365, "learning_rate": 1.0129864430126946e-06, "loss": 0.5321, "step": 6432 }, { "epoch": 0.8, "grad_norm": 6.579138205290287, "learning_rate": 1.0117712072846414e-06, "loss": 0.841, "step": 6433 }, { "epoch": 0.8, "grad_norm": 8.342803611292624, "learning_rate": 1.0105566188681159e-06, "loss": 1.1292, "step": 6434 }, { "epoch": 0.8, "grad_norm": 5.799245529663847, "learning_rate": 1.0093426779602545e-06, "loss": 0.5219, "step": 6435 }, { "epoch": 0.8, "grad_norm": 8.303517979300617, "learning_rate": 1.0081293847580858e-06, "loss": 0.9503, "step": 6436 }, { "epoch": 0.8, "grad_norm": 7.82842308438677, "learning_rate": 1.006916739458535e-06, "loss": 0.9498, "step": 6437 }, { "epoch": 0.8, "grad_norm": 7.407351720996177, "learning_rate": 1.0057047422584209e-06, "loss": 1.1691, "step": 6438 }, { "epoch": 0.8, "grad_norm": 7.849059980078966, "learning_rate": 1.004493393354456e-06, "loss": 0.8357, "step": 6439 }, { "epoch": 0.8, "grad_norm": 7.27835210636504, "learning_rate": 1.0032826929432492e-06, "loss": 0.8616, "step": 6440 }, { "epoch": 0.8, "grad_norm": 6.5098432384364955, "learning_rate": 1.002072641221305e-06, "loss": 0.8341, "step": 6441 }, { "epoch": 0.8, "grad_norm": 6.2212972464467375, "learning_rate": 1.000863238385022e-06, "loss": 0.7042, "step": 6442 }, { "epoch": 0.8, "grad_norm": 7.334532417615168, "learning_rate": 9.996544846306906e-07, "loss": 0.8889, "step": 6443 }, { "epoch": 0.8, "grad_norm": 9.37009387707428, "learning_rate": 9.984463801544992e-07, "loss": 0.9654, "step": 6444 }, { "epoch": 0.8, "grad_norm": 6.3255408476088775, "learning_rate": 9.972389251525316e-07, "loss": 0.6695, "step": 6445 }, { "epoch": 0.8, "grad_norm": 7.978276363412533, "learning_rate": 9.960321198207601e-07, "loss": 0.826, "step": 6446 }, { "epoch": 0.8, "grad_norm": 8.301657963684704, "learning_rate": 9.948259643550611e-07, "loss": 1.0841, "step": 6447 }, { "epoch": 0.8, "grad_norm": 7.948572265568916, "learning_rate": 9.936204589511966e-07, "loss": 0.8637, "step": 6448 }, { "epoch": 0.8, "grad_norm": 6.8150122517765155, "learning_rate": 9.924156038048282e-07, "loss": 0.7775, "step": 6449 }, { "epoch": 0.8, "grad_norm": 8.296195899385891, "learning_rate": 9.912113991115103e-07, "loss": 1.4093, "step": 6450 }, { "epoch": 0.8, "grad_norm": 7.398923562915318, "learning_rate": 9.900078450666929e-07, "loss": 1.2178, "step": 6451 }, { "epoch": 0.8, "grad_norm": 6.1553464723797795, "learning_rate": 9.888049418657174e-07, "loss": 0.6821, "step": 6452 }, { "epoch": 0.8, "grad_norm": 6.824217674809985, "learning_rate": 9.876026897038227e-07, "loss": 0.743, "step": 6453 }, { "epoch": 0.8, "grad_norm": 7.080794533326512, "learning_rate": 9.864010887761421e-07, "loss": 0.6963, "step": 6454 }, { "epoch": 0.8, "grad_norm": 7.865832900665423, "learning_rate": 9.852001392776988e-07, "loss": 1.2693, "step": 6455 }, { "epoch": 0.8, "grad_norm": 7.032091535950616, "learning_rate": 9.839998414034185e-07, "loss": 0.9291, "step": 6456 }, { "epoch": 0.8, "grad_norm": 7.696547843077292, "learning_rate": 9.828001953481114e-07, "loss": 0.9305, "step": 6457 }, { "epoch": 0.8, "grad_norm": 7.606443454628082, "learning_rate": 9.81601201306489e-07, "loss": 1.0271, "step": 6458 }, { "epoch": 0.8, "grad_norm": 6.455559964452474, "learning_rate": 9.804028594731542e-07, "loss": 0.8788, "step": 6459 }, { "epoch": 0.8, "grad_norm": 7.459039579332543, "learning_rate": 9.792051700426058e-07, "loss": 0.8539, "step": 6460 }, { "epoch": 0.8, "grad_norm": 5.9648976499783135, "learning_rate": 9.780081332092328e-07, "loss": 0.5896, "step": 6461 }, { "epoch": 0.8, "grad_norm": 9.108535610312913, "learning_rate": 9.768117491673223e-07, "loss": 1.3302, "step": 6462 }, { "epoch": 0.8, "grad_norm": 7.558590192223414, "learning_rate": 9.756160181110559e-07, "loss": 1.2035, "step": 6463 }, { "epoch": 0.8, "grad_norm": 7.972873748286248, "learning_rate": 9.744209402345035e-07, "loss": 1.1817, "step": 6464 }, { "epoch": 0.8, "grad_norm": 7.701395076241374, "learning_rate": 9.732265157316344e-07, "loss": 0.8782, "step": 6465 }, { "epoch": 0.8, "grad_norm": 10.241083468049096, "learning_rate": 9.720327447963113e-07, "loss": 0.5923, "step": 6466 }, { "epoch": 0.8, "grad_norm": 6.72256009824317, "learning_rate": 9.708396276222886e-07, "loss": 0.7302, "step": 6467 }, { "epoch": 0.8, "grad_norm": 7.315788841685916, "learning_rate": 9.696471644032169e-07, "loss": 0.8085, "step": 6468 }, { "epoch": 0.8, "grad_norm": 7.4576267772843785, "learning_rate": 9.684553553326399e-07, "loss": 0.8483, "step": 6469 }, { "epoch": 0.8, "grad_norm": 7.83147922798443, "learning_rate": 9.67264200603993e-07, "loss": 0.823, "step": 6470 }, { "epoch": 0.8, "grad_norm": 7.4461594711733605, "learning_rate": 9.660737004106075e-07, "loss": 0.9983, "step": 6471 }, { "epoch": 0.8, "grad_norm": 9.027216073381883, "learning_rate": 9.648838549457101e-07, "loss": 0.9846, "step": 6472 }, { "epoch": 0.81, "grad_norm": 6.6751919590344, "learning_rate": 9.63694664402417e-07, "loss": 0.7216, "step": 6473 }, { "epoch": 0.81, "grad_norm": 7.49788331725494, "learning_rate": 9.62506128973741e-07, "loss": 0.6434, "step": 6474 }, { "epoch": 0.81, "grad_norm": 7.492540252477053, "learning_rate": 9.613182488525886e-07, "loss": 1.0504, "step": 6475 }, { "epoch": 0.81, "grad_norm": 13.51608487989284, "learning_rate": 9.601310242317586e-07, "loss": 0.8063, "step": 6476 }, { "epoch": 0.81, "grad_norm": 7.276032400683627, "learning_rate": 9.58944455303944e-07, "loss": 0.9707, "step": 6477 }, { "epoch": 0.81, "grad_norm": 7.2166194356895845, "learning_rate": 9.577585422617335e-07, "loss": 0.7657, "step": 6478 }, { "epoch": 0.81, "grad_norm": 7.49838505014865, "learning_rate": 9.56573285297605e-07, "loss": 0.9547, "step": 6479 }, { "epoch": 0.81, "grad_norm": 9.082451849341734, "learning_rate": 9.553886846039324e-07, "loss": 1.1666, "step": 6480 }, { "epoch": 0.81, "grad_norm": 7.317877892768834, "learning_rate": 9.542047403729848e-07, "loss": 0.8087, "step": 6481 }, { "epoch": 0.81, "grad_norm": 8.335954956475952, "learning_rate": 9.530214527969206e-07, "loss": 1.1877, "step": 6482 }, { "epoch": 0.81, "grad_norm": 7.9728295644161395, "learning_rate": 9.518388220677949e-07, "loss": 1.1762, "step": 6483 }, { "epoch": 0.81, "grad_norm": 7.176694754899893, "learning_rate": 9.506568483775547e-07, "loss": 0.8264, "step": 6484 }, { "epoch": 0.81, "grad_norm": 7.334339918654, "learning_rate": 9.494755319180421e-07, "loss": 1.0344, "step": 6485 }, { "epoch": 0.81, "grad_norm": 9.758607569686161, "learning_rate": 9.482948728809909e-07, "loss": 1.3995, "step": 6486 }, { "epoch": 0.81, "grad_norm": 8.416343359099686, "learning_rate": 9.471148714580275e-07, "loss": 1.1368, "step": 6487 }, { "epoch": 0.81, "grad_norm": 8.418683722682047, "learning_rate": 9.459355278406735e-07, "loss": 1.0536, "step": 6488 }, { "epoch": 0.81, "grad_norm": 6.270608286131649, "learning_rate": 9.447568422203418e-07, "loss": 0.8418, "step": 6489 }, { "epoch": 0.81, "grad_norm": 7.691473700102015, "learning_rate": 9.435788147883423e-07, "loss": 1.1746, "step": 6490 }, { "epoch": 0.81, "grad_norm": 5.5040598666030025, "learning_rate": 9.424014457358721e-07, "loss": 0.5813, "step": 6491 }, { "epoch": 0.81, "grad_norm": 6.355515758393881, "learning_rate": 9.412247352540266e-07, "loss": 0.6546, "step": 6492 }, { "epoch": 0.81, "grad_norm": 6.655781491045564, "learning_rate": 9.400486835337913e-07, "loss": 0.7036, "step": 6493 }, { "epoch": 0.81, "grad_norm": 7.467295229556411, "learning_rate": 9.38873290766047e-07, "loss": 0.8754, "step": 6494 }, { "epoch": 0.81, "grad_norm": 8.196526450770108, "learning_rate": 9.376985571415664e-07, "loss": 1.0184, "step": 6495 }, { "epoch": 0.81, "grad_norm": 7.809248057931514, "learning_rate": 9.365244828510145e-07, "loss": 1.1461, "step": 6496 }, { "epoch": 0.81, "grad_norm": 6.728575022912974, "learning_rate": 9.353510680849493e-07, "loss": 0.9914, "step": 6497 }, { "epoch": 0.81, "grad_norm": 8.841571661291455, "learning_rate": 9.341783130338239e-07, "loss": 1.3342, "step": 6498 }, { "epoch": 0.81, "grad_norm": 8.20460851340954, "learning_rate": 9.330062178879839e-07, "loss": 1.1321, "step": 6499 }, { "epoch": 0.81, "grad_norm": 8.227460690833533, "learning_rate": 9.318347828376639e-07, "loss": 1.2197, "step": 6500 }, { "epoch": 0.81, "grad_norm": 8.984316200195098, "learning_rate": 9.306640080729962e-07, "loss": 1.1367, "step": 6501 }, { "epoch": 0.81, "grad_norm": 7.901765158441092, "learning_rate": 9.294938937840048e-07, "loss": 1.2308, "step": 6502 }, { "epoch": 0.81, "grad_norm": 7.675608521586698, "learning_rate": 9.283244401606012e-07, "loss": 1.1552, "step": 6503 }, { "epoch": 0.81, "grad_norm": 5.760043666781078, "learning_rate": 9.271556473926008e-07, "loss": 0.4922, "step": 6504 }, { "epoch": 0.81, "grad_norm": 6.988152057661283, "learning_rate": 9.259875156697002e-07, "loss": 1.0262, "step": 6505 }, { "epoch": 0.81, "grad_norm": 7.038102749435534, "learning_rate": 9.248200451814948e-07, "loss": 0.4158, "step": 6506 }, { "epoch": 0.81, "grad_norm": 8.23935389548401, "learning_rate": 9.236532361174727e-07, "loss": 1.087, "step": 6507 }, { "epoch": 0.81, "grad_norm": 8.010620785835261, "learning_rate": 9.224870886670134e-07, "loss": 0.8555, "step": 6508 }, { "epoch": 0.81, "grad_norm": 6.411455973969533, "learning_rate": 9.213216030193867e-07, "loss": 0.6233, "step": 6509 }, { "epoch": 0.81, "grad_norm": 6.278881770894432, "learning_rate": 9.201567793637595e-07, "loss": 0.7913, "step": 6510 }, { "epoch": 0.81, "grad_norm": 8.39636560222325, "learning_rate": 9.189926178891895e-07, "loss": 1.0528, "step": 6511 }, { "epoch": 0.81, "grad_norm": 8.253674042976781, "learning_rate": 9.178291187846233e-07, "loss": 0.9849, "step": 6512 }, { "epoch": 0.81, "grad_norm": 7.518935489855378, "learning_rate": 9.166662822389078e-07, "loss": 1.0363, "step": 6513 }, { "epoch": 0.81, "grad_norm": 7.606392432487802, "learning_rate": 9.15504108440774e-07, "loss": 0.685, "step": 6514 }, { "epoch": 0.81, "grad_norm": 7.300105519046278, "learning_rate": 9.143425975788511e-07, "loss": 1.0974, "step": 6515 }, { "epoch": 0.81, "grad_norm": 9.650153283195083, "learning_rate": 9.131817498416579e-07, "loss": 1.2043, "step": 6516 }, { "epoch": 0.81, "grad_norm": 8.485616857205263, "learning_rate": 9.120215654176079e-07, "loss": 0.9786, "step": 6517 }, { "epoch": 0.81, "grad_norm": 7.949521085019548, "learning_rate": 9.108620444950034e-07, "loss": 1.1806, "step": 6518 }, { "epoch": 0.81, "grad_norm": 6.825744547179324, "learning_rate": 9.097031872620416e-07, "loss": 0.7191, "step": 6519 }, { "epoch": 0.81, "grad_norm": 7.317365293456581, "learning_rate": 9.085449939068125e-07, "loss": 0.8761, "step": 6520 }, { "epoch": 0.81, "grad_norm": 7.1659631867520845, "learning_rate": 9.073874646172958e-07, "loss": 0.7466, "step": 6521 }, { "epoch": 0.81, "grad_norm": 7.831336178627502, "learning_rate": 9.062305995813652e-07, "loss": 1.0317, "step": 6522 }, { "epoch": 0.81, "grad_norm": 10.224368163393294, "learning_rate": 9.050743989867871e-07, "loss": 0.9667, "step": 6523 }, { "epoch": 0.81, "grad_norm": 7.4064883074983605, "learning_rate": 9.039188630212187e-07, "loss": 0.8703, "step": 6524 }, { "epoch": 0.81, "grad_norm": 7.740650410670225, "learning_rate": 9.027639918722092e-07, "loss": 0.9759, "step": 6525 }, { "epoch": 0.81, "grad_norm": 9.273107453490075, "learning_rate": 9.016097857272033e-07, "loss": 0.7389, "step": 6526 }, { "epoch": 0.81, "grad_norm": 7.073163859081866, "learning_rate": 9.004562447735315e-07, "loss": 0.9238, "step": 6527 }, { "epoch": 0.81, "grad_norm": 6.842929939271821, "learning_rate": 8.993033691984215e-07, "loss": 0.635, "step": 6528 }, { "epoch": 0.81, "grad_norm": 7.91241472194664, "learning_rate": 8.981511591889919e-07, "loss": 0.9453, "step": 6529 }, { "epoch": 0.81, "grad_norm": 8.279399028284656, "learning_rate": 8.969996149322513e-07, "loss": 1.0776, "step": 6530 }, { "epoch": 0.81, "grad_norm": 5.752918774001046, "learning_rate": 8.958487366151026e-07, "loss": 0.5409, "step": 6531 }, { "epoch": 0.81, "grad_norm": 6.3468704875722395, "learning_rate": 8.946985244243395e-07, "loss": 0.5552, "step": 6532 }, { "epoch": 0.81, "grad_norm": 6.750111935821993, "learning_rate": 8.935489785466472e-07, "loss": 0.8978, "step": 6533 }, { "epoch": 0.81, "grad_norm": 6.355886395656245, "learning_rate": 8.924000991686044e-07, "loss": 0.686, "step": 6534 }, { "epoch": 0.81, "grad_norm": 8.836946618747417, "learning_rate": 8.912518864766817e-07, "loss": 1.0192, "step": 6535 }, { "epoch": 0.81, "grad_norm": 7.7808301625018075, "learning_rate": 8.901043406572374e-07, "loss": 0.9235, "step": 6536 }, { "epoch": 0.81, "grad_norm": 7.887822528666151, "learning_rate": 8.889574618965252e-07, "loss": 0.9282, "step": 6537 }, { "epoch": 0.81, "grad_norm": 7.486983143048684, "learning_rate": 8.878112503806923e-07, "loss": 0.5825, "step": 6538 }, { "epoch": 0.81, "grad_norm": 7.117750986772633, "learning_rate": 8.866657062957723e-07, "loss": 0.9285, "step": 6539 }, { "epoch": 0.81, "grad_norm": 7.58777176174044, "learning_rate": 8.855208298276946e-07, "loss": 1.3673, "step": 6540 }, { "epoch": 0.81, "grad_norm": 7.645787844824561, "learning_rate": 8.843766211622784e-07, "loss": 1.2741, "step": 6541 }, { "epoch": 0.81, "grad_norm": 7.460197835027896, "learning_rate": 8.832330804852351e-07, "loss": 1.0001, "step": 6542 }, { "epoch": 0.81, "grad_norm": 5.729817219743797, "learning_rate": 8.820902079821697e-07, "loss": 0.8196, "step": 6543 }, { "epoch": 0.81, "grad_norm": 8.509536999677628, "learning_rate": 8.809480038385737e-07, "loss": 0.9339, "step": 6544 }, { "epoch": 0.81, "grad_norm": 9.453040208855304, "learning_rate": 8.798064682398344e-07, "loss": 1.3538, "step": 6545 }, { "epoch": 0.81, "grad_norm": 7.168320651960178, "learning_rate": 8.786656013712297e-07, "loss": 0.9181, "step": 6546 }, { "epoch": 0.81, "grad_norm": 7.7248664923288235, "learning_rate": 8.775254034179292e-07, "loss": 0.9012, "step": 6547 }, { "epoch": 0.81, "grad_norm": 7.971396584219796, "learning_rate": 8.763858745649911e-07, "loss": 1.0235, "step": 6548 }, { "epoch": 0.81, "grad_norm": 6.838180086433475, "learning_rate": 8.752470149973686e-07, "loss": 0.7583, "step": 6549 }, { "epoch": 0.81, "grad_norm": 7.045417867321577, "learning_rate": 8.741088248999052e-07, "loss": 0.7425, "step": 6550 }, { "epoch": 0.81, "grad_norm": 7.122903065573472, "learning_rate": 8.729713044573346e-07, "loss": 0.863, "step": 6551 }, { "epoch": 0.81, "grad_norm": 7.001516284933652, "learning_rate": 8.718344538542839e-07, "loss": 0.8717, "step": 6552 }, { "epoch": 0.82, "grad_norm": 9.123161587344327, "learning_rate": 8.706982732752689e-07, "loss": 1.2089, "step": 6553 }, { "epoch": 0.82, "grad_norm": 7.555019769008848, "learning_rate": 8.695627629046982e-07, "loss": 0.9917, "step": 6554 }, { "epoch": 0.82, "grad_norm": 7.88754371795742, "learning_rate": 8.684279229268716e-07, "loss": 1.2022, "step": 6555 }, { "epoch": 0.82, "grad_norm": 5.5721366921672875, "learning_rate": 8.672937535259812e-07, "loss": 0.4507, "step": 6556 }, { "epoch": 0.82, "grad_norm": 9.765642789707226, "learning_rate": 8.661602548861065e-07, "loss": 1.375, "step": 6557 }, { "epoch": 0.82, "grad_norm": 8.625953597337803, "learning_rate": 8.650274271912212e-07, "loss": 0.8154, "step": 6558 }, { "epoch": 0.82, "grad_norm": 6.348984379142585, "learning_rate": 8.638952706251918e-07, "loss": 0.7955, "step": 6559 }, { "epoch": 0.82, "grad_norm": 9.609570968645002, "learning_rate": 8.627637853717696e-07, "loss": 1.2228, "step": 6560 }, { "epoch": 0.82, "grad_norm": 7.353906033149408, "learning_rate": 8.616329716146049e-07, "loss": 1.08, "step": 6561 }, { "epoch": 0.82, "grad_norm": 8.160459217753088, "learning_rate": 8.605028295372325e-07, "loss": 1.1782, "step": 6562 }, { "epoch": 0.82, "grad_norm": 9.171903083892854, "learning_rate": 8.593733593230813e-07, "loss": 1.1337, "step": 6563 }, { "epoch": 0.82, "grad_norm": 6.457763348044495, "learning_rate": 8.582445611554707e-07, "loss": 0.8158, "step": 6564 }, { "epoch": 0.82, "grad_norm": 7.383936434131693, "learning_rate": 8.57116435217612e-07, "loss": 0.8672, "step": 6565 }, { "epoch": 0.82, "grad_norm": 6.8231451234894, "learning_rate": 8.559889816926037e-07, "loss": 1.1251, "step": 6566 }, { "epoch": 0.82, "grad_norm": 6.676180705934885, "learning_rate": 8.548622007634394e-07, "loss": 0.7809, "step": 6567 }, { "epoch": 0.82, "grad_norm": 6.020916340319557, "learning_rate": 8.537360926130029e-07, "loss": 0.7748, "step": 6568 }, { "epoch": 0.82, "grad_norm": 6.145725541445596, "learning_rate": 8.526106574240633e-07, "loss": 0.5565, "step": 6569 }, { "epoch": 0.82, "grad_norm": 6.59094991676361, "learning_rate": 8.51485895379291e-07, "loss": 0.7439, "step": 6570 }, { "epoch": 0.82, "grad_norm": 8.033798468321724, "learning_rate": 8.503618066612368e-07, "loss": 0.9934, "step": 6571 }, { "epoch": 0.82, "grad_norm": 6.437216206437578, "learning_rate": 8.492383914523483e-07, "loss": 0.8658, "step": 6572 }, { "epoch": 0.82, "grad_norm": 6.963102098686678, "learning_rate": 8.481156499349608e-07, "loss": 0.7792, "step": 6573 }, { "epoch": 0.82, "grad_norm": 7.963069829732219, "learning_rate": 8.469935822913039e-07, "loss": 1.0954, "step": 6574 }, { "epoch": 0.82, "grad_norm": 6.103975089954645, "learning_rate": 8.45872188703492e-07, "loss": 0.9403, "step": 6575 }, { "epoch": 0.82, "grad_norm": 9.037124736754423, "learning_rate": 8.447514693535359e-07, "loss": 1.0785, "step": 6576 }, { "epoch": 0.82, "grad_norm": 7.379883652170491, "learning_rate": 8.43631424423334e-07, "loss": 0.9542, "step": 6577 }, { "epoch": 0.82, "grad_norm": 6.2014312208357785, "learning_rate": 8.425120540946752e-07, "loss": 0.7649, "step": 6578 }, { "epoch": 0.82, "grad_norm": 7.734528780060793, "learning_rate": 8.41393358549239e-07, "loss": 0.9324, "step": 6579 }, { "epoch": 0.82, "grad_norm": 7.939884669053959, "learning_rate": 8.402753379685968e-07, "loss": 1.2185, "step": 6580 }, { "epoch": 0.82, "grad_norm": 8.381184080630396, "learning_rate": 8.391579925342097e-07, "loss": 1.0597, "step": 6581 }, { "epoch": 0.82, "grad_norm": 7.803985963901327, "learning_rate": 8.380413224274286e-07, "loss": 0.759, "step": 6582 }, { "epoch": 0.82, "grad_norm": 7.018092534656266, "learning_rate": 8.369253278294965e-07, "loss": 1.0908, "step": 6583 }, { "epoch": 0.82, "grad_norm": 6.9867392438511615, "learning_rate": 8.358100089215426e-07, "loss": 0.8102, "step": 6584 }, { "epoch": 0.82, "grad_norm": 7.835343953736637, "learning_rate": 8.346953658845913e-07, "loss": 1.0285, "step": 6585 }, { "epoch": 0.82, "grad_norm": 7.797624273512139, "learning_rate": 8.335813988995567e-07, "loss": 0.9159, "step": 6586 }, { "epoch": 0.82, "grad_norm": 9.733884725629697, "learning_rate": 8.324681081472379e-07, "loss": 1.3263, "step": 6587 }, { "epoch": 0.82, "grad_norm": 7.936743856943871, "learning_rate": 8.313554938083313e-07, "loss": 0.877, "step": 6588 }, { "epoch": 0.82, "grad_norm": 6.584441343903289, "learning_rate": 8.302435560634187e-07, "loss": 0.6119, "step": 6589 }, { "epoch": 0.82, "grad_norm": 6.177619268163285, "learning_rate": 8.291322950929742e-07, "loss": 0.6817, "step": 6590 }, { "epoch": 0.82, "grad_norm": 7.693927024244764, "learning_rate": 8.280217110773625e-07, "loss": 0.9021, "step": 6591 }, { "epoch": 0.82, "grad_norm": 6.12763900524339, "learning_rate": 8.269118041968377e-07, "loss": 0.8758, "step": 6592 }, { "epoch": 0.82, "grad_norm": 7.4369598404884005, "learning_rate": 8.258025746315412e-07, "loss": 1.1424, "step": 6593 }, { "epoch": 0.82, "grad_norm": 6.898522177615792, "learning_rate": 8.246940225615091e-07, "loss": 0.7111, "step": 6594 }, { "epoch": 0.82, "grad_norm": 7.929493866634991, "learning_rate": 8.235861481666668e-07, "loss": 0.8023, "step": 6595 }, { "epoch": 0.82, "grad_norm": 6.632493557936843, "learning_rate": 8.22478951626825e-07, "loss": 0.9919, "step": 6596 }, { "epoch": 0.82, "grad_norm": 7.504955864665973, "learning_rate": 8.213724331216899e-07, "loss": 0.9574, "step": 6597 }, { "epoch": 0.82, "grad_norm": 6.934665458301703, "learning_rate": 8.202665928308551e-07, "loss": 0.8306, "step": 6598 }, { "epoch": 0.82, "grad_norm": 7.779103889102562, "learning_rate": 8.191614309338048e-07, "loss": 1.2288, "step": 6599 }, { "epoch": 0.82, "grad_norm": 7.210525084187654, "learning_rate": 8.180569476099143e-07, "loss": 1.0928, "step": 6600 }, { "epoch": 0.82, "grad_norm": 8.03807790941782, "learning_rate": 8.169531430384442e-07, "loss": 0.7375, "step": 6601 }, { "epoch": 0.82, "grad_norm": 7.8476610351840455, "learning_rate": 8.158500173985501e-07, "loss": 1.1622, "step": 6602 }, { "epoch": 0.82, "grad_norm": 7.88932666823519, "learning_rate": 8.147475708692749e-07, "loss": 0.9933, "step": 6603 }, { "epoch": 0.82, "grad_norm": 8.10377445122356, "learning_rate": 8.136458036295524e-07, "loss": 1.1856, "step": 6604 }, { "epoch": 0.82, "grad_norm": 7.167726973950561, "learning_rate": 8.125447158582045e-07, "loss": 1.1848, "step": 6605 }, { "epoch": 0.82, "grad_norm": 7.91638962978106, "learning_rate": 8.11444307733944e-07, "loss": 0.9562, "step": 6606 }, { "epoch": 0.82, "grad_norm": 8.744200546652063, "learning_rate": 8.103445794353737e-07, "loss": 1.1113, "step": 6607 }, { "epoch": 0.82, "grad_norm": 9.106277653941559, "learning_rate": 8.092455311409847e-07, "loss": 0.8531, "step": 6608 }, { "epoch": 0.82, "grad_norm": 17.860400208348985, "learning_rate": 8.081471630291609e-07, "loss": 0.9517, "step": 6609 }, { "epoch": 0.82, "grad_norm": 8.141562236430756, "learning_rate": 8.07049475278171e-07, "loss": 1.4728, "step": 6610 }, { "epoch": 0.82, "grad_norm": 7.780310707449039, "learning_rate": 8.059524680661762e-07, "loss": 1.2159, "step": 6611 }, { "epoch": 0.82, "grad_norm": 6.3263083900536445, "learning_rate": 8.048561415712269e-07, "loss": 0.4851, "step": 6612 }, { "epoch": 0.82, "grad_norm": 8.532163836479588, "learning_rate": 8.037604959712652e-07, "loss": 0.9612, "step": 6613 }, { "epoch": 0.82, "grad_norm": 7.811468133175818, "learning_rate": 8.026655314441167e-07, "loss": 0.8775, "step": 6614 }, { "epoch": 0.82, "grad_norm": 5.800404945229206, "learning_rate": 8.015712481675025e-07, "loss": 0.7044, "step": 6615 }, { "epoch": 0.82, "grad_norm": 7.686750241640823, "learning_rate": 8.004776463190312e-07, "loss": 0.9885, "step": 6616 }, { "epoch": 0.82, "grad_norm": 8.583024466508494, "learning_rate": 7.993847260761972e-07, "loss": 1.0316, "step": 6617 }, { "epoch": 0.82, "grad_norm": 6.773154911109567, "learning_rate": 7.982924876163917e-07, "loss": 0.9847, "step": 6618 }, { "epoch": 0.82, "grad_norm": 6.838672989495781, "learning_rate": 7.972009311168883e-07, "loss": 0.902, "step": 6619 }, { "epoch": 0.82, "grad_norm": 7.807861987026742, "learning_rate": 7.961100567548536e-07, "loss": 0.9081, "step": 6620 }, { "epoch": 0.82, "grad_norm": 8.629464699359003, "learning_rate": 7.95019864707342e-07, "loss": 1.2061, "step": 6621 }, { "epoch": 0.82, "grad_norm": 11.974604317250936, "learning_rate": 7.939303551512995e-07, "loss": 1.1273, "step": 6622 }, { "epoch": 0.82, "grad_norm": 7.9800677101086555, "learning_rate": 7.928415282635571e-07, "loss": 1.2548, "step": 6623 }, { "epoch": 0.82, "grad_norm": 8.758148873218564, "learning_rate": 7.91753384220838e-07, "loss": 0.9489, "step": 6624 }, { "epoch": 0.82, "grad_norm": 15.149212977334685, "learning_rate": 7.906659231997559e-07, "loss": 0.9349, "step": 6625 }, { "epoch": 0.82, "grad_norm": 6.798041173464474, "learning_rate": 7.895791453768076e-07, "loss": 0.6291, "step": 6626 }, { "epoch": 0.82, "grad_norm": 7.4590091081207355, "learning_rate": 7.884930509283877e-07, "loss": 0.945, "step": 6627 }, { "epoch": 0.82, "grad_norm": 9.330615168562753, "learning_rate": 7.874076400307729e-07, "loss": 1.0193, "step": 6628 }, { "epoch": 0.82, "grad_norm": 6.487611628308139, "learning_rate": 7.863229128601312e-07, "loss": 0.6955, "step": 6629 }, { "epoch": 0.82, "grad_norm": 7.398055937435638, "learning_rate": 7.852388695925206e-07, "loss": 0.7698, "step": 6630 }, { "epoch": 0.82, "grad_norm": 6.999966386039964, "learning_rate": 7.841555104038884e-07, "loss": 1.0853, "step": 6631 }, { "epoch": 0.82, "grad_norm": 6.901779674340763, "learning_rate": 7.830728354700673e-07, "loss": 0.7598, "step": 6632 }, { "epoch": 0.82, "grad_norm": 7.904972273028985, "learning_rate": 7.819908449667824e-07, "loss": 1.1841, "step": 6633 }, { "epoch": 0.83, "grad_norm": 6.603304704637652, "learning_rate": 7.80909539069648e-07, "loss": 1.0245, "step": 6634 }, { "epoch": 0.83, "grad_norm": 7.682215118879145, "learning_rate": 7.798289179541629e-07, "loss": 0.8214, "step": 6635 }, { "epoch": 0.83, "grad_norm": 7.534557518458134, "learning_rate": 7.787489817957222e-07, "loss": 1.1436, "step": 6636 }, { "epoch": 0.83, "grad_norm": 8.655471542025714, "learning_rate": 7.776697307696018e-07, "loss": 0.7237, "step": 6637 }, { "epoch": 0.83, "grad_norm": 7.925757065700981, "learning_rate": 7.765911650509716e-07, "loss": 0.8961, "step": 6638 }, { "epoch": 0.83, "grad_norm": 7.964415426107919, "learning_rate": 7.755132848148888e-07, "loss": 1.1473, "step": 6639 }, { "epoch": 0.83, "grad_norm": 8.098651060668052, "learning_rate": 7.744360902363002e-07, "loss": 1.2911, "step": 6640 }, { "epoch": 0.83, "grad_norm": 8.40623891818325, "learning_rate": 7.73359581490038e-07, "loss": 1.2657, "step": 6641 }, { "epoch": 0.83, "grad_norm": 8.73262490839661, "learning_rate": 7.722837587508275e-07, "loss": 0.9186, "step": 6642 }, { "epoch": 0.83, "grad_norm": 6.289646440971806, "learning_rate": 7.712086221932807e-07, "loss": 0.8931, "step": 6643 }, { "epoch": 0.83, "grad_norm": 7.1976644338450235, "learning_rate": 7.701341719918965e-07, "loss": 1.0844, "step": 6644 }, { "epoch": 0.83, "grad_norm": 8.675007616658291, "learning_rate": 7.690604083210651e-07, "loss": 0.8333, "step": 6645 }, { "epoch": 0.83, "grad_norm": 6.015574373031152, "learning_rate": 7.679873313550646e-07, "loss": 0.7599, "step": 6646 }, { "epoch": 0.83, "grad_norm": 7.421349273586168, "learning_rate": 7.669149412680604e-07, "loss": 0.7044, "step": 6647 }, { "epoch": 0.83, "grad_norm": 7.532350377143027, "learning_rate": 7.658432382341086e-07, "loss": 1.0185, "step": 6648 }, { "epoch": 0.83, "grad_norm": 9.988570491028085, "learning_rate": 7.647722224271526e-07, "loss": 1.2963, "step": 6649 }, { "epoch": 0.83, "grad_norm": 8.172800294240458, "learning_rate": 7.637018940210223e-07, "loss": 0.7276, "step": 6650 }, { "epoch": 0.83, "grad_norm": 6.669946374825938, "learning_rate": 7.626322531894393e-07, "loss": 0.9458, "step": 6651 }, { "epoch": 0.83, "grad_norm": 8.443581240615016, "learning_rate": 7.615633001060124e-07, "loss": 0.7013, "step": 6652 }, { "epoch": 0.83, "grad_norm": 6.280354314952523, "learning_rate": 7.604950349442374e-07, "loss": 0.6778, "step": 6653 }, { "epoch": 0.83, "grad_norm": 7.542540401290917, "learning_rate": 7.594274578775007e-07, "loss": 1.0226, "step": 6654 }, { "epoch": 0.83, "grad_norm": 8.530333128316169, "learning_rate": 7.583605690790752e-07, "loss": 0.8516, "step": 6655 }, { "epoch": 0.83, "grad_norm": 6.693370545462026, "learning_rate": 7.572943687221235e-07, "loss": 0.8879, "step": 6656 }, { "epoch": 0.83, "grad_norm": 6.477095957726365, "learning_rate": 7.562288569796966e-07, "loss": 0.7647, "step": 6657 }, { "epoch": 0.83, "grad_norm": 7.2367740248385335, "learning_rate": 7.551640340247301e-07, "loss": 0.7357, "step": 6658 }, { "epoch": 0.83, "grad_norm": 5.68881572929766, "learning_rate": 7.540999000300531e-07, "loss": 0.5026, "step": 6659 }, { "epoch": 0.83, "grad_norm": 7.230788861109058, "learning_rate": 7.530364551683794e-07, "loss": 0.9733, "step": 6660 }, { "epoch": 0.83, "grad_norm": 7.2625997172888805, "learning_rate": 7.519736996123139e-07, "loss": 0.6981, "step": 6661 }, { "epoch": 0.83, "grad_norm": 8.206984948648202, "learning_rate": 7.509116335343441e-07, "loss": 0.6908, "step": 6662 }, { "epoch": 0.83, "grad_norm": 8.306668459953405, "learning_rate": 7.498502571068517e-07, "loss": 1.0187, "step": 6663 }, { "epoch": 0.83, "grad_norm": 7.407952361697884, "learning_rate": 7.487895705021031e-07, "loss": 0.8971, "step": 6664 }, { "epoch": 0.83, "grad_norm": 8.287421995886321, "learning_rate": 7.477295738922541e-07, "loss": 1.2652, "step": 6665 }, { "epoch": 0.83, "grad_norm": 7.560395141867356, "learning_rate": 7.466702674493492e-07, "loss": 0.9106, "step": 6666 }, { "epoch": 0.83, "grad_norm": 21.268520487129802, "learning_rate": 7.456116513453165e-07, "loss": 1.0319, "step": 6667 }, { "epoch": 0.83, "grad_norm": 5.783874746547997, "learning_rate": 7.445537257519775e-07, "loss": 0.527, "step": 6668 }, { "epoch": 0.83, "grad_norm": 8.177198105186447, "learning_rate": 7.434964908410386e-07, "loss": 0.6667, "step": 6669 }, { "epoch": 0.83, "grad_norm": 7.285366449501971, "learning_rate": 7.424399467840959e-07, "loss": 0.658, "step": 6670 }, { "epoch": 0.83, "grad_norm": 9.0587466332393, "learning_rate": 7.413840937526312e-07, "loss": 0.7682, "step": 6671 }, { "epoch": 0.83, "grad_norm": 7.465194998001531, "learning_rate": 7.403289319180151e-07, "loss": 1.0517, "step": 6672 }, { "epoch": 0.83, "grad_norm": 8.336482928939423, "learning_rate": 7.392744614515079e-07, "loss": 1.0735, "step": 6673 }, { "epoch": 0.83, "grad_norm": 7.477670530521732, "learning_rate": 7.382206825242521e-07, "loss": 1.2241, "step": 6674 }, { "epoch": 0.83, "grad_norm": 7.936685971064261, "learning_rate": 7.371675953072871e-07, "loss": 1.0111, "step": 6675 }, { "epoch": 0.83, "grad_norm": 7.373564616926824, "learning_rate": 7.36115199971531e-07, "loss": 0.8205, "step": 6676 }, { "epoch": 0.83, "grad_norm": 6.0334383998133285, "learning_rate": 7.350634966877951e-07, "loss": 0.6421, "step": 6677 }, { "epoch": 0.83, "grad_norm": 7.656204151426966, "learning_rate": 7.340124856267755e-07, "loss": 0.8188, "step": 6678 }, { "epoch": 0.83, "grad_norm": 6.8823954188802245, "learning_rate": 7.329621669590587e-07, "loss": 0.6867, "step": 6679 }, { "epoch": 0.83, "grad_norm": 7.0880496419811285, "learning_rate": 7.319125408551148e-07, "loss": 0.9723, "step": 6680 }, { "epoch": 0.83, "grad_norm": 8.612064596616145, "learning_rate": 7.308636074853059e-07, "loss": 1.144, "step": 6681 }, { "epoch": 0.83, "grad_norm": 7.6943587720543425, "learning_rate": 7.298153670198799e-07, "loss": 1.0008, "step": 6682 }, { "epoch": 0.83, "grad_norm": 7.719193292582627, "learning_rate": 7.287678196289688e-07, "loss": 0.8066, "step": 6683 }, { "epoch": 0.83, "grad_norm": 7.8646348679863, "learning_rate": 7.277209654826001e-07, "loss": 1.0379, "step": 6684 }, { "epoch": 0.83, "grad_norm": 8.053080245281606, "learning_rate": 7.266748047506806e-07, "loss": 0.6888, "step": 6685 }, { "epoch": 0.83, "grad_norm": 8.197425490567444, "learning_rate": 7.256293376030083e-07, "loss": 1.0039, "step": 6686 }, { "epoch": 0.83, "grad_norm": 8.72221396872405, "learning_rate": 7.245845642092697e-07, "loss": 1.0116, "step": 6687 }, { "epoch": 0.83, "grad_norm": 7.366994373151584, "learning_rate": 7.23540484739037e-07, "loss": 1.0832, "step": 6688 }, { "epoch": 0.83, "grad_norm": 7.016372038052102, "learning_rate": 7.224970993617685e-07, "loss": 0.9704, "step": 6689 }, { "epoch": 0.83, "grad_norm": 8.31034932377976, "learning_rate": 7.214544082468128e-07, "loss": 0.8817, "step": 6690 }, { "epoch": 0.83, "grad_norm": 9.56271274402207, "learning_rate": 7.204124115634049e-07, "loss": 1.0018, "step": 6691 }, { "epoch": 0.83, "grad_norm": 7.044371741368022, "learning_rate": 7.193711094806638e-07, "loss": 0.8675, "step": 6692 }, { "epoch": 0.83, "grad_norm": 7.459722327314529, "learning_rate": 7.183305021676029e-07, "loss": 0.8672, "step": 6693 }, { "epoch": 0.83, "grad_norm": 6.399279379721464, "learning_rate": 7.172905897931148e-07, "loss": 0.5966, "step": 6694 }, { "epoch": 0.83, "grad_norm": 6.449086179176543, "learning_rate": 7.16251372525984e-07, "loss": 0.6967, "step": 6695 }, { "epoch": 0.83, "grad_norm": 7.675503775321406, "learning_rate": 7.152128505348821e-07, "loss": 0.781, "step": 6696 }, { "epoch": 0.83, "grad_norm": 6.602157649326028, "learning_rate": 7.141750239883672e-07, "loss": 0.7095, "step": 6697 }, { "epoch": 0.83, "grad_norm": 7.5979880827935, "learning_rate": 7.131378930548827e-07, "loss": 1.0277, "step": 6698 }, { "epoch": 0.83, "grad_norm": 7.581707540136637, "learning_rate": 7.121014579027613e-07, "loss": 0.871, "step": 6699 }, { "epoch": 0.83, "grad_norm": 8.15014128942873, "learning_rate": 7.110657187002234e-07, "loss": 0.909, "step": 6700 }, { "epoch": 0.83, "grad_norm": 8.87187515068465, "learning_rate": 7.10030675615373e-07, "loss": 1.1601, "step": 6701 }, { "epoch": 0.83, "grad_norm": 7.640457188946548, "learning_rate": 7.089963288162044e-07, "loss": 0.8704, "step": 6702 }, { "epoch": 0.83, "grad_norm": 8.32157714325027, "learning_rate": 7.079626784705978e-07, "loss": 1.049, "step": 6703 }, { "epoch": 0.83, "grad_norm": 6.7809389760242125, "learning_rate": 7.069297247463203e-07, "loss": 0.8212, "step": 6704 }, { "epoch": 0.83, "grad_norm": 8.284638903984092, "learning_rate": 7.058974678110259e-07, "loss": 1.1134, "step": 6705 }, { "epoch": 0.83, "grad_norm": 7.017458193512619, "learning_rate": 7.048659078322573e-07, "loss": 0.9787, "step": 6706 }, { "epoch": 0.83, "grad_norm": 7.117784212654921, "learning_rate": 7.03835044977439e-07, "loss": 0.8508, "step": 6707 }, { "epoch": 0.83, "grad_norm": 8.455570902887214, "learning_rate": 7.028048794138881e-07, "loss": 1.1929, "step": 6708 }, { "epoch": 0.83, "grad_norm": 6.8061054656456506, "learning_rate": 7.017754113088066e-07, "loss": 0.7795, "step": 6709 }, { "epoch": 0.83, "grad_norm": 6.3123732881997165, "learning_rate": 7.007466408292801e-07, "loss": 0.5885, "step": 6710 }, { "epoch": 0.83, "grad_norm": 6.4071013274204605, "learning_rate": 6.997185681422858e-07, "loss": 0.7136, "step": 6711 }, { "epoch": 0.83, "grad_norm": 6.567275170012353, "learning_rate": 6.986911934146856e-07, "loss": 0.8564, "step": 6712 }, { "epoch": 0.83, "grad_norm": 8.49671985465361, "learning_rate": 6.976645168132274e-07, "loss": 0.9909, "step": 6713 }, { "epoch": 0.84, "grad_norm": 8.219458409040403, "learning_rate": 6.966385385045476e-07, "loss": 0.926, "step": 6714 }, { "epoch": 0.84, "grad_norm": 8.015248443661916, "learning_rate": 6.956132586551662e-07, "loss": 0.9177, "step": 6715 }, { "epoch": 0.84, "grad_norm": 7.0616360219054615, "learning_rate": 6.945886774314931e-07, "loss": 0.956, "step": 6716 }, { "epoch": 0.84, "grad_norm": 8.650958247926441, "learning_rate": 6.935647949998231e-07, "loss": 0.8679, "step": 6717 }, { "epoch": 0.84, "grad_norm": 8.746136136520274, "learning_rate": 6.925416115263395e-07, "loss": 0.9882, "step": 6718 }, { "epoch": 0.84, "grad_norm": 7.091024984783389, "learning_rate": 6.915191271771082e-07, "loss": 0.9757, "step": 6719 }, { "epoch": 0.84, "grad_norm": 8.780291906595014, "learning_rate": 6.90497342118085e-07, "loss": 0.9085, "step": 6720 }, { "epoch": 0.84, "grad_norm": 7.573958846487399, "learning_rate": 6.894762565151109e-07, "loss": 0.94, "step": 6721 }, { "epoch": 0.84, "grad_norm": 7.808076529885257, "learning_rate": 6.884558705339145e-07, "loss": 0.8733, "step": 6722 }, { "epoch": 0.84, "grad_norm": 7.020549106405796, "learning_rate": 6.874361843401112e-07, "loss": 0.8416, "step": 6723 }, { "epoch": 0.84, "grad_norm": 7.910612720622373, "learning_rate": 6.864171980991985e-07, "loss": 0.8017, "step": 6724 }, { "epoch": 0.84, "grad_norm": 6.71027220061586, "learning_rate": 6.853989119765658e-07, "loss": 0.9987, "step": 6725 }, { "epoch": 0.84, "grad_norm": 7.078960294324181, "learning_rate": 6.843813261374854e-07, "loss": 0.8733, "step": 6726 }, { "epoch": 0.84, "grad_norm": 8.169078779142104, "learning_rate": 6.83364440747119e-07, "loss": 0.953, "step": 6727 }, { "epoch": 0.84, "grad_norm": 6.521770034601892, "learning_rate": 6.823482559705108e-07, "loss": 0.8788, "step": 6728 }, { "epoch": 0.84, "grad_norm": 9.300644373765943, "learning_rate": 6.813327719725932e-07, "loss": 0.9585, "step": 6729 }, { "epoch": 0.84, "grad_norm": 7.599838713503711, "learning_rate": 6.803179889181866e-07, "loss": 0.9038, "step": 6730 }, { "epoch": 0.84, "grad_norm": 6.1690689977305215, "learning_rate": 6.793039069719925e-07, "loss": 0.5014, "step": 6731 }, { "epoch": 0.84, "grad_norm": 8.050738479893196, "learning_rate": 6.782905262986067e-07, "loss": 0.8952, "step": 6732 }, { "epoch": 0.84, "grad_norm": 7.991789663548489, "learning_rate": 6.772778470625024e-07, "loss": 1.0077, "step": 6733 }, { "epoch": 0.84, "grad_norm": 7.057291690940514, "learning_rate": 6.76265869428045e-07, "loss": 0.7665, "step": 6734 }, { "epoch": 0.84, "grad_norm": 8.383937797868136, "learning_rate": 6.752545935594834e-07, "loss": 0.9883, "step": 6735 }, { "epoch": 0.84, "grad_norm": 8.972580833305926, "learning_rate": 6.742440196209548e-07, "loss": 1.4704, "step": 6736 }, { "epoch": 0.84, "grad_norm": 7.103259572732026, "learning_rate": 6.732341477764781e-07, "loss": 0.8768, "step": 6737 }, { "epoch": 0.84, "grad_norm": 8.31444983171907, "learning_rate": 6.722249781899631e-07, "loss": 1.3189, "step": 6738 }, { "epoch": 0.84, "grad_norm": 8.282453319704132, "learning_rate": 6.712165110252039e-07, "loss": 1.0156, "step": 6739 }, { "epoch": 0.84, "grad_norm": 5.575037132447968, "learning_rate": 6.702087464458778e-07, "loss": 0.7707, "step": 6740 }, { "epoch": 0.84, "grad_norm": 6.5047803964244, "learning_rate": 6.692016846155536e-07, "loss": 0.5728, "step": 6741 }, { "epoch": 0.84, "grad_norm": 6.734846912962003, "learning_rate": 6.681953256976814e-07, "loss": 0.8027, "step": 6742 }, { "epoch": 0.84, "grad_norm": 8.948827238094704, "learning_rate": 6.671896698555985e-07, "loss": 0.9885, "step": 6743 }, { "epoch": 0.84, "grad_norm": 6.620321520362557, "learning_rate": 6.661847172525288e-07, "loss": 0.8986, "step": 6744 }, { "epoch": 0.84, "grad_norm": 9.70831949852124, "learning_rate": 6.651804680515828e-07, "loss": 1.0986, "step": 6745 }, { "epoch": 0.84, "grad_norm": 8.79286489543572, "learning_rate": 6.64176922415754e-07, "loss": 1.156, "step": 6746 }, { "epoch": 0.84, "grad_norm": 7.730069362191007, "learning_rate": 6.631740805079235e-07, "loss": 0.8677, "step": 6747 }, { "epoch": 0.84, "grad_norm": 7.303979868496582, "learning_rate": 6.621719424908601e-07, "loss": 0.8389, "step": 6748 }, { "epoch": 0.84, "grad_norm": 7.252862718553777, "learning_rate": 6.611705085272124e-07, "loss": 0.7008, "step": 6749 }, { "epoch": 0.84, "grad_norm": 6.4913583017181224, "learning_rate": 6.601697787795236e-07, "loss": 0.5734, "step": 6750 }, { "epoch": 0.84, "grad_norm": 6.8436888930153, "learning_rate": 6.591697534102138e-07, "loss": 0.8696, "step": 6751 }, { "epoch": 0.84, "grad_norm": 6.808338152538735, "learning_rate": 6.581704325815941e-07, "loss": 0.7109, "step": 6752 }, { "epoch": 0.84, "grad_norm": 6.773833430912757, "learning_rate": 6.571718164558593e-07, "loss": 1.0136, "step": 6753 }, { "epoch": 0.84, "grad_norm": 8.880400995419372, "learning_rate": 6.561739051950922e-07, "loss": 1.2517, "step": 6754 }, { "epoch": 0.84, "grad_norm": 6.173768369792934, "learning_rate": 6.551766989612562e-07, "loss": 0.7754, "step": 6755 }, { "epoch": 0.84, "grad_norm": 7.588548479474189, "learning_rate": 6.541801979162054e-07, "loss": 0.6544, "step": 6756 }, { "epoch": 0.84, "grad_norm": 8.394591262319008, "learning_rate": 6.531844022216776e-07, "loss": 0.797, "step": 6757 }, { "epoch": 0.84, "grad_norm": 7.885008125329612, "learning_rate": 6.521893120392936e-07, "loss": 1.0279, "step": 6758 }, { "epoch": 0.84, "grad_norm": 8.042622660861252, "learning_rate": 6.511949275305657e-07, "loss": 1.1003, "step": 6759 }, { "epoch": 0.84, "grad_norm": 6.99961186139674, "learning_rate": 6.502012488568848e-07, "loss": 0.7965, "step": 6760 }, { "epoch": 0.84, "grad_norm": 7.673062757501115, "learning_rate": 6.492082761795315e-07, "loss": 1.0686, "step": 6761 }, { "epoch": 0.84, "grad_norm": 8.441147368456914, "learning_rate": 6.482160096596712e-07, "loss": 1.0016, "step": 6762 }, { "epoch": 0.84, "grad_norm": 7.432037324836279, "learning_rate": 6.472244494583546e-07, "loss": 0.8733, "step": 6763 }, { "epoch": 0.84, "grad_norm": 6.000320722058551, "learning_rate": 6.462335957365163e-07, "loss": 0.9222, "step": 6764 }, { "epoch": 0.84, "grad_norm": 7.641558954701704, "learning_rate": 6.45243448654977e-07, "loss": 0.8176, "step": 6765 }, { "epoch": 0.84, "grad_norm": 6.8581784742499465, "learning_rate": 6.442540083744453e-07, "loss": 0.7634, "step": 6766 }, { "epoch": 0.84, "grad_norm": 7.60604332552879, "learning_rate": 6.432652750555096e-07, "loss": 1.0569, "step": 6767 }, { "epoch": 0.84, "grad_norm": 7.602850505624342, "learning_rate": 6.422772488586488e-07, "loss": 0.6496, "step": 6768 }, { "epoch": 0.84, "grad_norm": 7.046394292946042, "learning_rate": 6.412899299442249e-07, "loss": 0.957, "step": 6769 }, { "epoch": 0.84, "grad_norm": 7.285742147385358, "learning_rate": 6.403033184724844e-07, "loss": 0.8341, "step": 6770 }, { "epoch": 0.84, "grad_norm": 8.689265355603986, "learning_rate": 6.393174146035619e-07, "loss": 1.1227, "step": 6771 }, { "epoch": 0.84, "grad_norm": 8.068991342886639, "learning_rate": 6.383322184974716e-07, "loss": 0.983, "step": 6772 }, { "epoch": 0.84, "grad_norm": 8.327620668511676, "learning_rate": 6.37347730314119e-07, "loss": 1.1915, "step": 6773 }, { "epoch": 0.84, "grad_norm": 7.0801330472099, "learning_rate": 6.363639502132907e-07, "loss": 0.9503, "step": 6774 }, { "epoch": 0.84, "grad_norm": 6.181031318803977, "learning_rate": 6.353808783546611e-07, "loss": 0.5848, "step": 6775 }, { "epoch": 0.84, "grad_norm": 8.189369289664434, "learning_rate": 6.34398514897786e-07, "loss": 0.9404, "step": 6776 }, { "epoch": 0.84, "grad_norm": 7.7002225593617215, "learning_rate": 6.334168600021095e-07, "loss": 1.2211, "step": 6777 }, { "epoch": 0.84, "grad_norm": 8.094007856299147, "learning_rate": 6.324359138269598e-07, "loss": 0.8438, "step": 6778 }, { "epoch": 0.84, "grad_norm": 6.322185638715561, "learning_rate": 6.314556765315493e-07, "loss": 0.8699, "step": 6779 }, { "epoch": 0.84, "grad_norm": 7.905846641954826, "learning_rate": 6.304761482749777e-07, "loss": 0.9191, "step": 6780 }, { "epoch": 0.84, "grad_norm": 11.738217552407141, "learning_rate": 6.294973292162249e-07, "loss": 0.8992, "step": 6781 }, { "epoch": 0.84, "grad_norm": 7.977831192298976, "learning_rate": 6.285192195141609e-07, "loss": 1.3557, "step": 6782 }, { "epoch": 0.84, "grad_norm": 5.869286018097771, "learning_rate": 6.275418193275367e-07, "loss": 0.4898, "step": 6783 }, { "epoch": 0.84, "grad_norm": 8.28919982273714, "learning_rate": 6.265651288149926e-07, "loss": 1.037, "step": 6784 }, { "epoch": 0.84, "grad_norm": 7.468845100909648, "learning_rate": 6.255891481350473e-07, "loss": 0.9776, "step": 6785 }, { "epoch": 0.84, "grad_norm": 6.429655170098135, "learning_rate": 6.246138774461097e-07, "loss": 0.8117, "step": 6786 }, { "epoch": 0.84, "grad_norm": 7.085281098578509, "learning_rate": 6.23639316906472e-07, "loss": 0.9088, "step": 6787 }, { "epoch": 0.84, "grad_norm": 12.160775539257948, "learning_rate": 6.226654666743082e-07, "loss": 1.1137, "step": 6788 }, { "epoch": 0.84, "grad_norm": 9.102885936345448, "learning_rate": 6.216923269076836e-07, "loss": 1.3017, "step": 6789 }, { "epoch": 0.84, "grad_norm": 7.837103676445881, "learning_rate": 6.207198977645406e-07, "loss": 0.8006, "step": 6790 }, { "epoch": 0.84, "grad_norm": 8.508075701992002, "learning_rate": 6.197481794027116e-07, "loss": 0.991, "step": 6791 }, { "epoch": 0.84, "grad_norm": 8.869964961890842, "learning_rate": 6.187771719799112e-07, "loss": 0.985, "step": 6792 }, { "epoch": 0.84, "grad_norm": 6.719764020309271, "learning_rate": 6.178068756537409e-07, "loss": 0.7264, "step": 6793 }, { "epoch": 0.85, "grad_norm": 5.68793998952767, "learning_rate": 6.168372905816822e-07, "loss": 0.7212, "step": 6794 }, { "epoch": 0.85, "grad_norm": 7.8244480881200005, "learning_rate": 6.15868416921106e-07, "loss": 0.9608, "step": 6795 }, { "epoch": 0.85, "grad_norm": 6.981378422617563, "learning_rate": 6.149002548292665e-07, "loss": 0.7464, "step": 6796 }, { "epoch": 0.85, "grad_norm": 7.6960120435514, "learning_rate": 6.139328044632986e-07, "loss": 0.7193, "step": 6797 }, { "epoch": 0.85, "grad_norm": 7.733605795901136, "learning_rate": 6.129660659802294e-07, "loss": 0.7265, "step": 6798 }, { "epoch": 0.85, "grad_norm": 6.786468690072861, "learning_rate": 6.120000395369624e-07, "loss": 0.8059, "step": 6799 }, { "epoch": 0.85, "grad_norm": 8.109302772548306, "learning_rate": 6.110347252902899e-07, "loss": 0.824, "step": 6800 }, { "epoch": 0.85, "grad_norm": 7.014108258930261, "learning_rate": 6.100701233968876e-07, "loss": 1.036, "step": 6801 }, { "epoch": 0.85, "grad_norm": 7.460318009800887, "learning_rate": 6.091062340133175e-07, "loss": 0.7522, "step": 6802 }, { "epoch": 0.85, "grad_norm": 7.371935808621093, "learning_rate": 6.081430572960211e-07, "loss": 1.006, "step": 6803 }, { "epoch": 0.85, "grad_norm": 8.42466292608873, "learning_rate": 6.071805934013297e-07, "loss": 1.3977, "step": 6804 }, { "epoch": 0.85, "grad_norm": 7.606808623442708, "learning_rate": 6.062188424854565e-07, "loss": 0.9645, "step": 6805 }, { "epoch": 0.85, "grad_norm": 6.003602080280321, "learning_rate": 6.052578047044955e-07, "loss": 0.5821, "step": 6806 }, { "epoch": 0.85, "grad_norm": 7.157278928213894, "learning_rate": 6.042974802144335e-07, "loss": 0.7001, "step": 6807 }, { "epoch": 0.85, "grad_norm": 8.52240779286377, "learning_rate": 6.033378691711333e-07, "loss": 1.1853, "step": 6808 }, { "epoch": 0.85, "grad_norm": 8.123446516388844, "learning_rate": 6.023789717303458e-07, "loss": 0.8967, "step": 6809 }, { "epoch": 0.85, "grad_norm": 8.302046350039426, "learning_rate": 6.014207880477047e-07, "loss": 0.9648, "step": 6810 }, { "epoch": 0.85, "grad_norm": 6.134489164932955, "learning_rate": 6.004633182787306e-07, "loss": 0.6848, "step": 6811 }, { "epoch": 0.85, "grad_norm": 8.504518308585899, "learning_rate": 5.995065625788238e-07, "loss": 1.249, "step": 6812 }, { "epoch": 0.85, "grad_norm": 8.304148062766556, "learning_rate": 5.985505211032716e-07, "loss": 1.1797, "step": 6813 }, { "epoch": 0.85, "grad_norm": 7.040845275566652, "learning_rate": 5.975951940072466e-07, "loss": 0.9774, "step": 6814 }, { "epoch": 0.85, "grad_norm": 7.956354684935347, "learning_rate": 5.966405814457999e-07, "loss": 0.8831, "step": 6815 }, { "epoch": 0.85, "grad_norm": 7.477968870528765, "learning_rate": 5.956866835738745e-07, "loss": 0.6845, "step": 6816 }, { "epoch": 0.85, "grad_norm": 7.166358156796287, "learning_rate": 5.947335005462907e-07, "loss": 0.8145, "step": 6817 }, { "epoch": 0.85, "grad_norm": 8.104391943678523, "learning_rate": 5.937810325177556e-07, "loss": 0.8464, "step": 6818 }, { "epoch": 0.85, "grad_norm": 7.899995656837404, "learning_rate": 5.928292796428608e-07, "loss": 1.0316, "step": 6819 }, { "epoch": 0.85, "grad_norm": 7.322987034417743, "learning_rate": 5.918782420760821e-07, "loss": 0.9429, "step": 6820 }, { "epoch": 0.85, "grad_norm": 7.217458390185986, "learning_rate": 5.909279199717755e-07, "loss": 0.8327, "step": 6821 }, { "epoch": 0.85, "grad_norm": 7.838211131669868, "learning_rate": 5.899783134841846e-07, "loss": 0.8657, "step": 6822 }, { "epoch": 0.85, "grad_norm": 9.297721686987051, "learning_rate": 5.890294227674376e-07, "loss": 1.0414, "step": 6823 }, { "epoch": 0.85, "grad_norm": 5.754001192688713, "learning_rate": 5.880812479755416e-07, "loss": 0.7096, "step": 6824 }, { "epoch": 0.85, "grad_norm": 8.310450664438727, "learning_rate": 5.871337892623919e-07, "loss": 0.868, "step": 6825 }, { "epoch": 0.85, "grad_norm": 6.879355983555224, "learning_rate": 5.861870467817671e-07, "loss": 0.8074, "step": 6826 }, { "epoch": 0.85, "grad_norm": 7.3852905174135985, "learning_rate": 5.852410206873277e-07, "loss": 1.0675, "step": 6827 }, { "epoch": 0.85, "grad_norm": 6.667168571180055, "learning_rate": 5.842957111326203e-07, "loss": 0.7723, "step": 6828 }, { "epoch": 0.85, "grad_norm": 6.172474230172058, "learning_rate": 5.833511182710716e-07, "loss": 1.1151, "step": 6829 }, { "epoch": 0.85, "grad_norm": 6.94024983130489, "learning_rate": 5.824072422559962e-07, "loss": 0.7268, "step": 6830 }, { "epoch": 0.85, "grad_norm": 7.803888055440208, "learning_rate": 5.814640832405893e-07, "loss": 1.0397, "step": 6831 }, { "epoch": 0.85, "grad_norm": 8.836751063211729, "learning_rate": 5.805216413779318e-07, "loss": 1.2197, "step": 6832 }, { "epoch": 0.85, "grad_norm": 7.13729141026771, "learning_rate": 5.795799168209865e-07, "loss": 0.9546, "step": 6833 }, { "epoch": 0.85, "grad_norm": 8.728166555283, "learning_rate": 5.786389097226003e-07, "loss": 0.9838, "step": 6834 }, { "epoch": 0.85, "grad_norm": 7.530772615736291, "learning_rate": 5.776986202355039e-07, "loss": 0.9249, "step": 6835 }, { "epoch": 0.85, "grad_norm": 8.579901001804489, "learning_rate": 5.76759048512312e-07, "loss": 0.9989, "step": 6836 }, { "epoch": 0.85, "grad_norm": 7.844846222968754, "learning_rate": 5.758201947055225e-07, "loss": 0.7077, "step": 6837 }, { "epoch": 0.85, "grad_norm": 6.997835381543465, "learning_rate": 5.748820589675158e-07, "loss": 0.6318, "step": 6838 }, { "epoch": 0.85, "grad_norm": 8.973348778553921, "learning_rate": 5.739446414505562e-07, "loss": 1.0909, "step": 6839 }, { "epoch": 0.85, "grad_norm": 7.029794826797016, "learning_rate": 5.730079423067924e-07, "loss": 0.718, "step": 6840 }, { "epoch": 0.85, "grad_norm": 9.253176254346764, "learning_rate": 5.72071961688257e-07, "loss": 0.9989, "step": 6841 }, { "epoch": 0.85, "grad_norm": 8.802668750005882, "learning_rate": 5.71136699746862e-07, "loss": 0.8377, "step": 6842 }, { "epoch": 0.85, "grad_norm": 7.358801010415235, "learning_rate": 5.702021566344079e-07, "loss": 0.6868, "step": 6843 }, { "epoch": 0.85, "grad_norm": 7.374738614719034, "learning_rate": 5.692683325025756e-07, "loss": 0.8158, "step": 6844 }, { "epoch": 0.85, "grad_norm": 6.570330094723927, "learning_rate": 5.68335227502928e-07, "loss": 0.6734, "step": 6845 }, { "epoch": 0.85, "grad_norm": 7.459239499204963, "learning_rate": 5.674028417869171e-07, "loss": 0.6495, "step": 6846 }, { "epoch": 0.85, "grad_norm": 8.441426747140419, "learning_rate": 5.664711755058711e-07, "loss": 0.9773, "step": 6847 }, { "epoch": 0.85, "grad_norm": 8.330834087387474, "learning_rate": 5.655402288110051e-07, "loss": 1.0135, "step": 6848 }, { "epoch": 0.85, "grad_norm": 6.930389800876494, "learning_rate": 5.646100018534178e-07, "loss": 1.0195, "step": 6849 }, { "epoch": 0.85, "grad_norm": 7.0492417278687824, "learning_rate": 5.636804947840907e-07, "loss": 0.7005, "step": 6850 }, { "epoch": 0.85, "grad_norm": 7.599299053794649, "learning_rate": 5.627517077538852e-07, "loss": 1.0429, "step": 6851 }, { "epoch": 0.85, "grad_norm": 6.612882578411582, "learning_rate": 5.618236409135508e-07, "loss": 0.8066, "step": 6852 }, { "epoch": 0.85, "grad_norm": 7.3718372318132035, "learning_rate": 5.608962944137186e-07, "loss": 0.7887, "step": 6853 }, { "epoch": 0.85, "grad_norm": 9.80185680394337, "learning_rate": 5.599696684048983e-07, "loss": 1.1551, "step": 6854 }, { "epoch": 0.85, "grad_norm": 7.0422929388259154, "learning_rate": 5.590437630374906e-07, "loss": 0.8935, "step": 6855 }, { "epoch": 0.85, "grad_norm": 4.982641140598578, "learning_rate": 5.581185784617732e-07, "loss": 0.443, "step": 6856 }, { "epoch": 0.85, "grad_norm": 8.204777459070018, "learning_rate": 5.571941148279081e-07, "loss": 0.7577, "step": 6857 }, { "epoch": 0.85, "grad_norm": 10.604357784248222, "learning_rate": 5.56270372285942e-07, "loss": 1.1006, "step": 6858 }, { "epoch": 0.85, "grad_norm": 7.63339604093781, "learning_rate": 5.553473509858038e-07, "loss": 0.8685, "step": 6859 }, { "epoch": 0.85, "grad_norm": 8.083862622262858, "learning_rate": 5.54425051077303e-07, "loss": 1.0561, "step": 6860 }, { "epoch": 0.85, "grad_norm": 7.915765618647807, "learning_rate": 5.535034727101346e-07, "loss": 0.8654, "step": 6861 }, { "epoch": 0.85, "grad_norm": 8.142884157355116, "learning_rate": 5.525826160338782e-07, "loss": 0.7825, "step": 6862 }, { "epoch": 0.85, "grad_norm": 6.849975912360907, "learning_rate": 5.516624811979893e-07, "loss": 0.8788, "step": 6863 }, { "epoch": 0.85, "grad_norm": 6.665543974653707, "learning_rate": 5.507430683518161e-07, "loss": 0.6293, "step": 6864 }, { "epoch": 0.85, "grad_norm": 7.21495841506633, "learning_rate": 5.498243776445806e-07, "loss": 0.8886, "step": 6865 }, { "epoch": 0.85, "grad_norm": 7.57976359296963, "learning_rate": 5.489064092253926e-07, "loss": 1.1055, "step": 6866 }, { "epoch": 0.85, "grad_norm": 8.786777425989191, "learning_rate": 5.479891632432438e-07, "loss": 0.9695, "step": 6867 }, { "epoch": 0.85, "grad_norm": 7.445253129574161, "learning_rate": 5.470726398470089e-07, "loss": 0.9665, "step": 6868 }, { "epoch": 0.85, "grad_norm": 7.494657067215812, "learning_rate": 5.461568391854422e-07, "loss": 0.9434, "step": 6869 }, { "epoch": 0.85, "grad_norm": 6.7177926607301055, "learning_rate": 5.452417614071853e-07, "loss": 0.9077, "step": 6870 }, { "epoch": 0.85, "grad_norm": 8.903418172914215, "learning_rate": 5.443274066607607e-07, "loss": 0.8335, "step": 6871 }, { "epoch": 0.85, "grad_norm": 8.235062460882, "learning_rate": 5.4341377509457e-07, "loss": 1.0472, "step": 6872 }, { "epoch": 0.85, "grad_norm": 8.205088249799145, "learning_rate": 5.425008668569054e-07, "loss": 0.9568, "step": 6873 }, { "epoch": 0.85, "grad_norm": 7.164135792381971, "learning_rate": 5.415886820959337e-07, "loss": 0.9551, "step": 6874 }, { "epoch": 0.86, "grad_norm": 9.599004459769809, "learning_rate": 5.406772209597077e-07, "loss": 1.1414, "step": 6875 }, { "epoch": 0.86, "grad_norm": 7.102349430438272, "learning_rate": 5.397664835961647e-07, "loss": 0.6339, "step": 6876 }, { "epoch": 0.86, "grad_norm": 10.220584086199246, "learning_rate": 5.388564701531202e-07, "loss": 0.9996, "step": 6877 }, { "epoch": 0.86, "grad_norm": 8.465801067286382, "learning_rate": 5.379471807782743e-07, "loss": 0.8171, "step": 6878 }, { "epoch": 0.86, "grad_norm": 7.1354241696247795, "learning_rate": 5.370386156192108e-07, "loss": 1.0171, "step": 6879 }, { "epoch": 0.86, "grad_norm": 7.811911445357547, "learning_rate": 5.361307748233957e-07, "loss": 0.7885, "step": 6880 }, { "epoch": 0.86, "grad_norm": 9.256396922511797, "learning_rate": 5.352236585381731e-07, "loss": 1.2013, "step": 6881 }, { "epoch": 0.86, "grad_norm": 8.79602288761628, "learning_rate": 5.343172669107776e-07, "loss": 1.1864, "step": 6882 }, { "epoch": 0.86, "grad_norm": 7.870501821151754, "learning_rate": 5.334116000883183e-07, "loss": 0.921, "step": 6883 }, { "epoch": 0.86, "grad_norm": 7.819106016184039, "learning_rate": 5.325066582177907e-07, "loss": 0.8637, "step": 6884 }, { "epoch": 0.86, "grad_norm": 8.128642907003423, "learning_rate": 5.316024414460729e-07, "loss": 1.1179, "step": 6885 }, { "epoch": 0.86, "grad_norm": 6.40187851125162, "learning_rate": 5.306989499199222e-07, "loss": 0.761, "step": 6886 }, { "epoch": 0.86, "grad_norm": 6.940232746324415, "learning_rate": 5.297961837859816e-07, "loss": 0.8835, "step": 6887 }, { "epoch": 0.86, "grad_norm": 6.696806190993629, "learning_rate": 5.288941431907752e-07, "loss": 0.6921, "step": 6888 }, { "epoch": 0.86, "grad_norm": 6.296225240390322, "learning_rate": 5.279928282807096e-07, "loss": 0.5893, "step": 6889 }, { "epoch": 0.86, "grad_norm": 7.011143088951439, "learning_rate": 5.27092239202071e-07, "loss": 0.7449, "step": 6890 }, { "epoch": 0.86, "grad_norm": 6.843520174993093, "learning_rate": 5.261923761010318e-07, "loss": 0.8939, "step": 6891 }, { "epoch": 0.86, "grad_norm": 7.512005941867069, "learning_rate": 5.252932391236443e-07, "loss": 0.9228, "step": 6892 }, { "epoch": 0.86, "grad_norm": 7.712706443396023, "learning_rate": 5.243948284158434e-07, "loss": 0.8714, "step": 6893 }, { "epoch": 0.86, "grad_norm": 7.012214944147975, "learning_rate": 5.234971441234471e-07, "loss": 0.68, "step": 6894 }, { "epoch": 0.86, "grad_norm": 12.201841795793271, "learning_rate": 5.226001863921526e-07, "loss": 1.2767, "step": 6895 }, { "epoch": 0.86, "grad_norm": 8.141308023653403, "learning_rate": 5.217039553675419e-07, "loss": 1.2734, "step": 6896 }, { "epoch": 0.86, "grad_norm": 8.08806940585837, "learning_rate": 5.208084511950784e-07, "loss": 0.7172, "step": 6897 }, { "epoch": 0.86, "grad_norm": 8.797253023855856, "learning_rate": 5.199136740201083e-07, "loss": 1.0662, "step": 6898 }, { "epoch": 0.86, "grad_norm": 7.655242295632965, "learning_rate": 5.19019623987857e-07, "loss": 0.9506, "step": 6899 }, { "epoch": 0.86, "grad_norm": 9.362438475743271, "learning_rate": 5.181263012434346e-07, "loss": 1.1453, "step": 6900 }, { "epoch": 0.86, "grad_norm": 8.678222189890572, "learning_rate": 5.172337059318333e-07, "loss": 0.9986, "step": 6901 }, { "epoch": 0.86, "grad_norm": 8.648304923984101, "learning_rate": 5.163418381979235e-07, "loss": 0.8298, "step": 6902 }, { "epoch": 0.86, "grad_norm": 8.154224958386456, "learning_rate": 5.154506981864638e-07, "loss": 1.2186, "step": 6903 }, { "epoch": 0.86, "grad_norm": 8.798855833636292, "learning_rate": 5.145602860420884e-07, "loss": 1.0296, "step": 6904 }, { "epoch": 0.86, "grad_norm": 7.137991574595758, "learning_rate": 5.136706019093174e-07, "loss": 0.6653, "step": 6905 }, { "epoch": 0.86, "grad_norm": 7.491041106553058, "learning_rate": 5.127816459325508e-07, "loss": 0.7947, "step": 6906 }, { "epoch": 0.86, "grad_norm": 9.218312346149212, "learning_rate": 5.118934182560725e-07, "loss": 1.0862, "step": 6907 }, { "epoch": 0.86, "grad_norm": 7.3042083829751325, "learning_rate": 5.110059190240447e-07, "loss": 0.8643, "step": 6908 }, { "epoch": 0.86, "grad_norm": 7.398334228373771, "learning_rate": 5.10119148380514e-07, "loss": 0.8967, "step": 6909 }, { "epoch": 0.86, "grad_norm": 7.954125648733162, "learning_rate": 5.0923310646941e-07, "loss": 0.7969, "step": 6910 }, { "epoch": 0.86, "grad_norm": 7.065202569041579, "learning_rate": 5.083477934345382e-07, "loss": 0.7718, "step": 6911 }, { "epoch": 0.86, "grad_norm": 7.147001385072407, "learning_rate": 5.074632094195947e-07, "loss": 0.6415, "step": 6912 }, { "epoch": 0.86, "grad_norm": 8.627191486660777, "learning_rate": 5.065793545681491e-07, "loss": 1.1107, "step": 6913 }, { "epoch": 0.86, "grad_norm": 6.834817425101269, "learning_rate": 5.056962290236566e-07, "loss": 0.7965, "step": 6914 }, { "epoch": 0.86, "grad_norm": 7.998685352779382, "learning_rate": 5.048138329294533e-07, "loss": 0.8783, "step": 6915 }, { "epoch": 0.86, "grad_norm": 8.804610925268406, "learning_rate": 5.039321664287588e-07, "loss": 1.0874, "step": 6916 }, { "epoch": 0.86, "grad_norm": 7.647201190811569, "learning_rate": 5.030512296646695e-07, "loss": 0.9885, "step": 6917 }, { "epoch": 0.86, "grad_norm": 8.443130197796185, "learning_rate": 5.021710227801674e-07, "loss": 0.9777, "step": 6918 }, { "epoch": 0.86, "grad_norm": 6.541037863353053, "learning_rate": 5.012915459181162e-07, "loss": 0.679, "step": 6919 }, { "epoch": 0.86, "grad_norm": 6.979444956637863, "learning_rate": 5.00412799221257e-07, "loss": 1.1456, "step": 6920 }, { "epoch": 0.86, "grad_norm": 6.3695300000928805, "learning_rate": 4.995347828322189e-07, "loss": 0.9202, "step": 6921 }, { "epoch": 0.86, "grad_norm": 7.669978951646171, "learning_rate": 4.986574968935059e-07, "loss": 0.8333, "step": 6922 }, { "epoch": 0.86, "grad_norm": 7.3523464601535515, "learning_rate": 4.977809415475076e-07, "loss": 0.7346, "step": 6923 }, { "epoch": 0.86, "grad_norm": 7.404952751103893, "learning_rate": 4.969051169364936e-07, "loss": 0.7285, "step": 6924 }, { "epoch": 0.86, "grad_norm": 7.8521749601688295, "learning_rate": 4.960300232026155e-07, "loss": 0.9033, "step": 6925 }, { "epoch": 0.86, "grad_norm": 7.652878152204011, "learning_rate": 4.951556604879049e-07, "loss": 0.9787, "step": 6926 }, { "epoch": 0.86, "grad_norm": 8.166391058820334, "learning_rate": 4.942820289342759e-07, "loss": 1.1548, "step": 6927 }, { "epoch": 0.86, "grad_norm": 6.217246144923634, "learning_rate": 4.934091286835247e-07, "loss": 0.9518, "step": 6928 }, { "epoch": 0.86, "grad_norm": 7.97864397107118, "learning_rate": 4.925369598773256e-07, "loss": 1.005, "step": 6929 }, { "epoch": 0.86, "grad_norm": 7.196204098922791, "learning_rate": 4.916655226572392e-07, "loss": 0.9461, "step": 6930 }, { "epoch": 0.86, "grad_norm": 7.95409966104526, "learning_rate": 4.907948171647031e-07, "loss": 0.7998, "step": 6931 }, { "epoch": 0.86, "grad_norm": 6.3106980831050015, "learning_rate": 4.89924843541037e-07, "loss": 0.561, "step": 6932 }, { "epoch": 0.86, "grad_norm": 7.7019833200347785, "learning_rate": 4.890556019274445e-07, "loss": 1.0247, "step": 6933 }, { "epoch": 0.86, "grad_norm": 8.355639783849885, "learning_rate": 4.881870924650062e-07, "loss": 0.7077, "step": 6934 }, { "epoch": 0.86, "grad_norm": 7.301141836278764, "learning_rate": 4.873193152946864e-07, "loss": 0.7056, "step": 6935 }, { "epoch": 0.86, "grad_norm": 8.536548992623795, "learning_rate": 4.86452270557331e-07, "loss": 1.2515, "step": 6936 }, { "epoch": 0.86, "grad_norm": 6.601701860366143, "learning_rate": 4.855859583936661e-07, "loss": 0.5904, "step": 6937 }, { "epoch": 0.86, "grad_norm": 6.995232317962636, "learning_rate": 4.847203789442967e-07, "loss": 0.8299, "step": 6938 }, { "epoch": 0.86, "grad_norm": 9.166505337085518, "learning_rate": 4.838555323497151e-07, "loss": 0.8849, "step": 6939 }, { "epoch": 0.86, "grad_norm": 7.237982195888312, "learning_rate": 4.829914187502871e-07, "loss": 0.8323, "step": 6940 }, { "epoch": 0.86, "grad_norm": 7.319848814292077, "learning_rate": 4.821280382862647e-07, "loss": 0.9645, "step": 6941 }, { "epoch": 0.86, "grad_norm": 7.971551713068836, "learning_rate": 4.812653910977799e-07, "loss": 0.8533, "step": 6942 }, { "epoch": 0.86, "grad_norm": 8.176064274120366, "learning_rate": 4.804034773248434e-07, "loss": 1.1424, "step": 6943 }, { "epoch": 0.86, "grad_norm": 6.98336187290051, "learning_rate": 4.795422971073494e-07, "loss": 0.9833, "step": 6944 }, { "epoch": 0.86, "grad_norm": 7.161969752635508, "learning_rate": 4.786818505850721e-07, "loss": 1.0631, "step": 6945 }, { "epoch": 0.86, "grad_norm": 10.555902283156467, "learning_rate": 4.778221378976678e-07, "loss": 0.9065, "step": 6946 }, { "epoch": 0.86, "grad_norm": 6.795358576458968, "learning_rate": 4.769631591846707e-07, "loss": 0.7461, "step": 6947 }, { "epoch": 0.86, "grad_norm": 7.160800365772563, "learning_rate": 4.76104914585499e-07, "loss": 0.9187, "step": 6948 }, { "epoch": 0.86, "grad_norm": 6.7956653625495855, "learning_rate": 4.7524740423945005e-07, "loss": 0.9778, "step": 6949 }, { "epoch": 0.86, "grad_norm": 5.382824094092828, "learning_rate": 4.7439062828570305e-07, "loss": 0.4738, "step": 6950 }, { "epoch": 0.86, "grad_norm": 7.68203106952714, "learning_rate": 4.735345868633179e-07, "loss": 1.2813, "step": 6951 }, { "epoch": 0.86, "grad_norm": 7.487463652373114, "learning_rate": 4.726792801112329e-07, "loss": 0.6731, "step": 6952 }, { "epoch": 0.86, "grad_norm": 7.931222826184431, "learning_rate": 4.718247081682703e-07, "loss": 0.723, "step": 6953 }, { "epoch": 0.86, "grad_norm": 6.821478190604997, "learning_rate": 4.70970871173132e-07, "loss": 0.5902, "step": 6954 }, { "epoch": 0.87, "grad_norm": 7.582392657083679, "learning_rate": 4.70117769264401e-07, "loss": 0.6565, "step": 6955 }, { "epoch": 0.87, "grad_norm": 8.583785006193109, "learning_rate": 4.6926540258053877e-07, "loss": 0.8396, "step": 6956 }, { "epoch": 0.87, "grad_norm": 7.468074544009861, "learning_rate": 4.6841377125989016e-07, "loss": 0.9489, "step": 6957 }, { "epoch": 0.87, "grad_norm": 6.817501510931237, "learning_rate": 4.6756287544068026e-07, "loss": 0.8408, "step": 6958 }, { "epoch": 0.87, "grad_norm": 6.109203416551377, "learning_rate": 4.667127152610118e-07, "loss": 0.6708, "step": 6959 }, { "epoch": 0.87, "grad_norm": 7.636181934991497, "learning_rate": 4.658632908588739e-07, "loss": 0.8182, "step": 6960 }, { "epoch": 0.87, "grad_norm": 8.989196845888747, "learning_rate": 4.650146023721297e-07, "loss": 1.1948, "step": 6961 }, { "epoch": 0.87, "grad_norm": 7.961815763672733, "learning_rate": 4.641666499385278e-07, "loss": 0.7898, "step": 6962 }, { "epoch": 0.87, "grad_norm": 8.040942814149002, "learning_rate": 4.6331943369569474e-07, "loss": 0.8718, "step": 6963 }, { "epoch": 0.87, "grad_norm": 8.006935803046115, "learning_rate": 4.6247295378113956e-07, "loss": 0.8645, "step": 6964 }, { "epoch": 0.87, "grad_norm": 7.074720756281137, "learning_rate": 4.6162721033224886e-07, "loss": 0.9124, "step": 6965 }, { "epoch": 0.87, "grad_norm": 7.595592874555939, "learning_rate": 4.6078220348629297e-07, "loss": 0.9343, "step": 6966 }, { "epoch": 0.87, "grad_norm": 8.349531896778094, "learning_rate": 4.59937933380421e-07, "loss": 1.1242, "step": 6967 }, { "epoch": 0.87, "grad_norm": 7.09385036810813, "learning_rate": 4.590944001516601e-07, "loss": 0.6091, "step": 6968 }, { "epoch": 0.87, "grad_norm": 7.237181486931412, "learning_rate": 4.582516039369245e-07, "loss": 0.909, "step": 6969 }, { "epoch": 0.87, "grad_norm": 9.813599884072312, "learning_rate": 4.5740954487300203e-07, "loss": 1.0706, "step": 6970 }, { "epoch": 0.87, "grad_norm": 9.387261212657835, "learning_rate": 4.56568223096564e-07, "loss": 1.22, "step": 6971 }, { "epoch": 0.87, "grad_norm": 8.245635705280728, "learning_rate": 4.557276387441617e-07, "loss": 1.0616, "step": 6972 }, { "epoch": 0.87, "grad_norm": 7.960432606435044, "learning_rate": 4.5488779195222766e-07, "loss": 0.8422, "step": 6973 }, { "epoch": 0.87, "grad_norm": 5.9462572971391845, "learning_rate": 4.540486828570717e-07, "loss": 0.5686, "step": 6974 }, { "epoch": 0.87, "grad_norm": 8.106943520205691, "learning_rate": 4.532103115948866e-07, "loss": 0.8436, "step": 6975 }, { "epoch": 0.87, "grad_norm": 7.206091026509109, "learning_rate": 4.523726783017457e-07, "loss": 0.8563, "step": 6976 }, { "epoch": 0.87, "grad_norm": 6.131522474486438, "learning_rate": 4.515357831135986e-07, "loss": 0.6183, "step": 6977 }, { "epoch": 0.87, "grad_norm": 7.2429117013802315, "learning_rate": 4.5069962616628215e-07, "loss": 0.8038, "step": 6978 }, { "epoch": 0.87, "grad_norm": 8.327774963630093, "learning_rate": 4.498642075955062e-07, "loss": 0.6794, "step": 6979 }, { "epoch": 0.87, "grad_norm": 8.685463081609493, "learning_rate": 4.490295275368645e-07, "loss": 0.722, "step": 6980 }, { "epoch": 0.87, "grad_norm": 5.870791672236776, "learning_rate": 4.481955861258308e-07, "loss": 0.8667, "step": 6981 }, { "epoch": 0.87, "grad_norm": 8.77354428978134, "learning_rate": 4.473623834977586e-07, "loss": 1.1691, "step": 6982 }, { "epoch": 0.87, "grad_norm": 7.191351894618201, "learning_rate": 4.4652991978787975e-07, "loss": 0.7589, "step": 6983 }, { "epoch": 0.87, "grad_norm": 6.902790531747042, "learning_rate": 4.4569819513130884e-07, "loss": 0.7424, "step": 6984 }, { "epoch": 0.87, "grad_norm": 8.353708995713493, "learning_rate": 4.448672096630402e-07, "loss": 0.7304, "step": 6985 }, { "epoch": 0.87, "grad_norm": 9.215127068001868, "learning_rate": 4.4403696351794423e-07, "loss": 0.9375, "step": 6986 }, { "epoch": 0.87, "grad_norm": 6.963384771738652, "learning_rate": 4.4320745683077817e-07, "loss": 0.5699, "step": 6987 }, { "epoch": 0.87, "grad_norm": 7.489798270013057, "learning_rate": 4.4237868973617315e-07, "loss": 0.9818, "step": 6988 }, { "epoch": 0.87, "grad_norm": 8.510319280240344, "learning_rate": 4.415506623686439e-07, "loss": 1.1689, "step": 6989 }, { "epoch": 0.87, "grad_norm": 9.223240403600013, "learning_rate": 4.407233748625839e-07, "loss": 1.207, "step": 6990 }, { "epoch": 0.87, "grad_norm": 8.36561931496917, "learning_rate": 4.398968273522647e-07, "loss": 0.8965, "step": 6991 }, { "epoch": 0.87, "grad_norm": 7.968316162356268, "learning_rate": 4.390710199718412e-07, "loss": 1.1731, "step": 6992 }, { "epoch": 0.87, "grad_norm": 7.706850879652968, "learning_rate": 4.3824595285534565e-07, "loss": 0.6671, "step": 6993 }, { "epoch": 0.87, "grad_norm": 7.358597455777135, "learning_rate": 4.3742162613669257e-07, "loss": 1.0172, "step": 6994 }, { "epoch": 0.87, "grad_norm": 6.479754987538606, "learning_rate": 4.3659803994967165e-07, "loss": 0.9383, "step": 6995 }, { "epoch": 0.87, "grad_norm": 7.2899012599177855, "learning_rate": 4.3577519442795867e-07, "loss": 0.7321, "step": 6996 }, { "epoch": 0.87, "grad_norm": 7.299141321504667, "learning_rate": 4.3495308970510463e-07, "loss": 1.1491, "step": 6997 }, { "epoch": 0.87, "grad_norm": 5.8769249680040065, "learning_rate": 4.341317259145411e-07, "loss": 0.693, "step": 6998 }, { "epoch": 0.87, "grad_norm": 8.030163053685625, "learning_rate": 4.3331110318958147e-07, "loss": 1.0413, "step": 6999 }, { "epoch": 0.87, "grad_norm": 6.252802979729506, "learning_rate": 4.3249122166341586e-07, "loss": 0.7426, "step": 7000 }, { "epoch": 0.87, "grad_norm": 6.955102563292019, "learning_rate": 4.316720814691161e-07, "loss": 0.9898, "step": 7001 }, { "epoch": 0.87, "grad_norm": 9.62458452619625, "learning_rate": 4.308536827396326e-07, "loss": 1.0371, "step": 7002 }, { "epoch": 0.87, "grad_norm": 7.512831598603425, "learning_rate": 4.300360256077979e-07, "loss": 0.7504, "step": 7003 }, { "epoch": 0.87, "grad_norm": 7.306008478462293, "learning_rate": 4.2921911020631926e-07, "loss": 0.9457, "step": 7004 }, { "epoch": 0.87, "grad_norm": 7.643190600546775, "learning_rate": 4.2840293666779e-07, "loss": 0.987, "step": 7005 }, { "epoch": 0.87, "grad_norm": 8.362223853277582, "learning_rate": 4.2758750512467693e-07, "loss": 1.0442, "step": 7006 }, { "epoch": 0.87, "grad_norm": 8.036879655946967, "learning_rate": 4.2677281570932984e-07, "loss": 1.0969, "step": 7007 }, { "epoch": 0.87, "grad_norm": 8.051845004553721, "learning_rate": 4.259588685539784e-07, "loss": 1.0287, "step": 7008 }, { "epoch": 0.87, "grad_norm": 6.169627344605556, "learning_rate": 4.251456637907286e-07, "loss": 0.4007, "step": 7009 }, { "epoch": 0.87, "grad_norm": 8.53714588244733, "learning_rate": 4.2433320155156944e-07, "loss": 1.1522, "step": 7010 }, { "epoch": 0.87, "grad_norm": 6.869224452202533, "learning_rate": 4.235214819683681e-07, "loss": 0.7819, "step": 7011 }, { "epoch": 0.87, "grad_norm": 7.554986317342389, "learning_rate": 4.2271050517287136e-07, "loss": 1.0905, "step": 7012 }, { "epoch": 0.87, "grad_norm": 6.208421252548046, "learning_rate": 4.2190027129670406e-07, "loss": 0.7064, "step": 7013 }, { "epoch": 0.87, "grad_norm": 9.010712324982752, "learning_rate": 4.210907804713721e-07, "loss": 0.9706, "step": 7014 }, { "epoch": 0.87, "grad_norm": 9.228640656842892, "learning_rate": 4.202820328282614e-07, "loss": 0.9925, "step": 7015 }, { "epoch": 0.87, "grad_norm": 8.154884554620317, "learning_rate": 4.194740284986337e-07, "loss": 0.8266, "step": 7016 }, { "epoch": 0.87, "grad_norm": 8.411409722004947, "learning_rate": 4.1866676761363577e-07, "loss": 1.1989, "step": 7017 }, { "epoch": 0.87, "grad_norm": 9.287136081211269, "learning_rate": 4.1786025030428776e-07, "loss": 0.8751, "step": 7018 }, { "epoch": 0.87, "grad_norm": 8.948947808770601, "learning_rate": 4.1705447670149334e-07, "loss": 1.3897, "step": 7019 }, { "epoch": 0.87, "grad_norm": 7.959931262557137, "learning_rate": 4.1624944693603344e-07, "loss": 0.6772, "step": 7020 }, { "epoch": 0.87, "grad_norm": 8.171278972413566, "learning_rate": 4.1544516113857014e-07, "loss": 0.951, "step": 7021 }, { "epoch": 0.87, "grad_norm": 8.058295457756568, "learning_rate": 4.1464161943964134e-07, "loss": 0.7368, "step": 7022 }, { "epoch": 0.87, "grad_norm": 7.528829886952169, "learning_rate": 4.138388219696671e-07, "loss": 0.7277, "step": 7023 }, { "epoch": 0.87, "grad_norm": 9.330225392697686, "learning_rate": 4.130367688589471e-07, "loss": 1.0553, "step": 7024 }, { "epoch": 0.87, "grad_norm": 7.4685127514703185, "learning_rate": 4.1223546023765604e-07, "loss": 0.7751, "step": 7025 }, { "epoch": 0.87, "grad_norm": 7.432815663702991, "learning_rate": 4.11434896235855e-07, "loss": 0.8927, "step": 7026 }, { "epoch": 0.87, "grad_norm": 7.148707669055754, "learning_rate": 4.1063507698347604e-07, "loss": 0.8964, "step": 7027 }, { "epoch": 0.87, "grad_norm": 8.958161009419706, "learning_rate": 4.0983600261033597e-07, "loss": 0.9437, "step": 7028 }, { "epoch": 0.87, "grad_norm": 6.982324520924774, "learning_rate": 4.0903767324612876e-07, "loss": 0.6048, "step": 7029 }, { "epoch": 0.87, "grad_norm": 7.734468298357775, "learning_rate": 4.082400890204291e-07, "loss": 0.9249, "step": 7030 }, { "epoch": 0.87, "grad_norm": 8.132062223556987, "learning_rate": 4.0744325006268625e-07, "loss": 0.8178, "step": 7031 }, { "epoch": 0.87, "grad_norm": 7.251993524621417, "learning_rate": 4.0664715650223343e-07, "loss": 0.9658, "step": 7032 }, { "epoch": 0.87, "grad_norm": 6.0952368180135785, "learning_rate": 4.058518084682822e-07, "loss": 0.6414, "step": 7033 }, { "epoch": 0.87, "grad_norm": 7.260478677496843, "learning_rate": 4.050572060899183e-07, "loss": 0.8386, "step": 7034 }, { "epoch": 0.88, "grad_norm": 7.896224058708331, "learning_rate": 4.04263349496114e-07, "loss": 0.804, "step": 7035 }, { "epoch": 0.88, "grad_norm": 6.392505913350584, "learning_rate": 4.0347023881571413e-07, "loss": 0.6717, "step": 7036 }, { "epoch": 0.88, "grad_norm": 7.466242979901708, "learning_rate": 4.0267787417744564e-07, "loss": 0.9928, "step": 7037 }, { "epoch": 0.88, "grad_norm": 8.723606376030245, "learning_rate": 4.01886255709914e-07, "loss": 0.9524, "step": 7038 }, { "epoch": 0.88, "grad_norm": 8.878606594295887, "learning_rate": 4.010953835416037e-07, "loss": 1.247, "step": 7039 }, { "epoch": 0.88, "grad_norm": 8.535627879921025, "learning_rate": 4.003052578008759e-07, "loss": 1.0445, "step": 7040 }, { "epoch": 0.88, "grad_norm": 9.699716894376296, "learning_rate": 3.9951587861597363e-07, "loss": 1.465, "step": 7041 }, { "epoch": 0.88, "grad_norm": 8.075824759167855, "learning_rate": 3.987272461150182e-07, "loss": 1.0622, "step": 7042 }, { "epoch": 0.88, "grad_norm": 7.440019644159243, "learning_rate": 3.979393604260062e-07, "loss": 0.7725, "step": 7043 }, { "epoch": 0.88, "grad_norm": 7.359503046782645, "learning_rate": 3.9715222167681976e-07, "loss": 0.6345, "step": 7044 }, { "epoch": 0.88, "grad_norm": 7.13609821383068, "learning_rate": 3.963658299952128e-07, "loss": 0.7157, "step": 7045 }, { "epoch": 0.88, "grad_norm": 8.191177694138386, "learning_rate": 3.9558018550882204e-07, "loss": 1.1223, "step": 7046 }, { "epoch": 0.88, "grad_norm": 7.297114615738758, "learning_rate": 3.9479528834516224e-07, "loss": 0.8042, "step": 7047 }, { "epoch": 0.88, "grad_norm": 9.056663659617756, "learning_rate": 3.9401113863162587e-07, "loss": 1.1306, "step": 7048 }, { "epoch": 0.88, "grad_norm": 8.008955544822706, "learning_rate": 3.932277364954851e-07, "loss": 0.9497, "step": 7049 }, { "epoch": 0.88, "grad_norm": 7.788929060852642, "learning_rate": 3.924450820638903e-07, "loss": 1.1389, "step": 7050 }, { "epoch": 0.88, "grad_norm": 7.3471581291157255, "learning_rate": 3.916631754638717e-07, "loss": 0.8458, "step": 7051 }, { "epoch": 0.88, "grad_norm": 6.540219275301476, "learning_rate": 3.9088201682233376e-07, "loss": 0.6513, "step": 7052 }, { "epoch": 0.88, "grad_norm": 8.10436859945616, "learning_rate": 3.901016062660673e-07, "loss": 1.1596, "step": 7053 }, { "epoch": 0.88, "grad_norm": 6.69722152335066, "learning_rate": 3.8932194392173437e-07, "loss": 0.7863, "step": 7054 }, { "epoch": 0.88, "grad_norm": 8.820362197901735, "learning_rate": 3.8854302991587877e-07, "loss": 0.9498, "step": 7055 }, { "epoch": 0.88, "grad_norm": 8.652450662654564, "learning_rate": 3.8776486437492435e-07, "loss": 0.9369, "step": 7056 }, { "epoch": 0.88, "grad_norm": 8.76490706869432, "learning_rate": 3.8698744742516893e-07, "loss": 1.0909, "step": 7057 }, { "epoch": 0.88, "grad_norm": 6.27888658165102, "learning_rate": 3.8621077919279326e-07, "loss": 0.5974, "step": 7058 }, { "epoch": 0.88, "grad_norm": 7.2949333833062475, "learning_rate": 3.854348598038543e-07, "loss": 0.9637, "step": 7059 }, { "epoch": 0.88, "grad_norm": 7.5735112457397, "learning_rate": 3.846596893842891e-07, "loss": 0.6726, "step": 7060 }, { "epoch": 0.88, "grad_norm": 7.368158450810568, "learning_rate": 3.8388526805990976e-07, "loss": 0.8141, "step": 7061 }, { "epoch": 0.88, "grad_norm": 7.04186297786081, "learning_rate": 3.831115959564119e-07, "loss": 0.629, "step": 7062 }, { "epoch": 0.88, "grad_norm": 6.992833443197661, "learning_rate": 3.82338673199365e-07, "loss": 0.5859, "step": 7063 }, { "epoch": 0.88, "grad_norm": 6.65435036688121, "learning_rate": 3.8156649991421934e-07, "loss": 0.688, "step": 7064 }, { "epoch": 0.88, "grad_norm": 9.366303430483471, "learning_rate": 3.8079507622630354e-07, "loss": 1.0037, "step": 7065 }, { "epoch": 0.88, "grad_norm": 7.361405685480732, "learning_rate": 3.8002440226082237e-07, "loss": 0.5579, "step": 7066 }, { "epoch": 0.88, "grad_norm": 7.163540362054024, "learning_rate": 3.792544781428609e-07, "loss": 1.051, "step": 7067 }, { "epoch": 0.88, "grad_norm": 7.360586387698955, "learning_rate": 3.7848530399738236e-07, "loss": 0.8713, "step": 7068 }, { "epoch": 0.88, "grad_norm": 7.0659282472040905, "learning_rate": 3.7771687994922925e-07, "loss": 0.7672, "step": 7069 }, { "epoch": 0.88, "grad_norm": 8.276701573588891, "learning_rate": 3.769492061231189e-07, "loss": 0.7512, "step": 7070 }, { "epoch": 0.88, "grad_norm": 7.869210150366549, "learning_rate": 3.7618228264364953e-07, "loss": 1.0141, "step": 7071 }, { "epoch": 0.88, "grad_norm": 7.656729207095141, "learning_rate": 3.754161096352982e-07, "loss": 0.926, "step": 7072 }, { "epoch": 0.88, "grad_norm": 7.060088023901653, "learning_rate": 3.74650687222417e-07, "loss": 0.5925, "step": 7073 }, { "epoch": 0.88, "grad_norm": 9.823381925018188, "learning_rate": 3.7388601552924066e-07, "loss": 1.2569, "step": 7074 }, { "epoch": 0.88, "grad_norm": 8.958351648223505, "learning_rate": 3.7312209467987806e-07, "loss": 1.1087, "step": 7075 }, { "epoch": 0.88, "grad_norm": 7.735725595151983, "learning_rate": 3.7235892479831727e-07, "loss": 1.0915, "step": 7076 }, { "epoch": 0.88, "grad_norm": 7.374797397087289, "learning_rate": 3.7159650600842644e-07, "loss": 0.9573, "step": 7077 }, { "epoch": 0.88, "grad_norm": 7.504343429927272, "learning_rate": 3.708348384339505e-07, "loss": 0.7708, "step": 7078 }, { "epoch": 0.88, "grad_norm": 7.162601473478175, "learning_rate": 3.7007392219851055e-07, "loss": 0.7268, "step": 7079 }, { "epoch": 0.88, "grad_norm": 7.591365051256664, "learning_rate": 3.6931375742560837e-07, "loss": 0.8693, "step": 7080 }, { "epoch": 0.88, "grad_norm": 7.337709551004614, "learning_rate": 3.6855434423862356e-07, "loss": 0.8243, "step": 7081 }, { "epoch": 0.88, "grad_norm": 5.785072392135027, "learning_rate": 3.6779568276081093e-07, "loss": 0.7212, "step": 7082 }, { "epoch": 0.88, "grad_norm": 7.01181528877018, "learning_rate": 3.670377731153091e-07, "loss": 0.6079, "step": 7083 }, { "epoch": 0.88, "grad_norm": 7.579374268763982, "learning_rate": 3.662806154251275e-07, "loss": 1.1312, "step": 7084 }, { "epoch": 0.88, "grad_norm": 8.117017518883879, "learning_rate": 3.655242098131584e-07, "loss": 0.5892, "step": 7085 }, { "epoch": 0.88, "grad_norm": 8.713506422248322, "learning_rate": 3.6476855640217077e-07, "loss": 0.9514, "step": 7086 }, { "epoch": 0.88, "grad_norm": 7.435361306287497, "learning_rate": 3.640136553148116e-07, "loss": 1.0941, "step": 7087 }, { "epoch": 0.88, "grad_norm": 7.239916634953293, "learning_rate": 3.6325950667360443e-07, "loss": 0.9019, "step": 7088 }, { "epoch": 0.88, "grad_norm": 7.387321265839811, "learning_rate": 3.62506110600952e-07, "loss": 0.5268, "step": 7089 }, { "epoch": 0.88, "grad_norm": 7.615971003207588, "learning_rate": 3.6175346721913584e-07, "loss": 0.8003, "step": 7090 }, { "epoch": 0.88, "grad_norm": 9.13111371093359, "learning_rate": 3.610015766503117e-07, "loss": 1.1604, "step": 7091 }, { "epoch": 0.88, "grad_norm": 7.254040516209428, "learning_rate": 3.60250439016519e-07, "loss": 0.7742, "step": 7092 }, { "epoch": 0.88, "grad_norm": 6.694022797824501, "learning_rate": 3.5950005443966875e-07, "loss": 0.704, "step": 7093 }, { "epoch": 0.88, "grad_norm": 8.967611004559847, "learning_rate": 3.5875042304155285e-07, "loss": 0.9208, "step": 7094 }, { "epoch": 0.88, "grad_norm": 6.528123070290391, "learning_rate": 3.5800154494384176e-07, "loss": 0.7544, "step": 7095 }, { "epoch": 0.88, "grad_norm": 10.881035182818481, "learning_rate": 3.572534202680827e-07, "loss": 0.8073, "step": 7096 }, { "epoch": 0.88, "grad_norm": 7.161029870219596, "learning_rate": 3.565060491356986e-07, "loss": 0.863, "step": 7097 }, { "epoch": 0.88, "grad_norm": 7.143219165957113, "learning_rate": 3.55759431667993e-07, "loss": 0.9332, "step": 7098 }, { "epoch": 0.88, "grad_norm": 6.538694209021403, "learning_rate": 3.550135679861466e-07, "loss": 0.799, "step": 7099 }, { "epoch": 0.88, "grad_norm": 7.547946440970506, "learning_rate": 3.542684582112149e-07, "loss": 0.6375, "step": 7100 }, { "epoch": 0.88, "grad_norm": 8.34816158907605, "learning_rate": 3.5352410246413727e-07, "loss": 0.8515, "step": 7101 }, { "epoch": 0.88, "grad_norm": 7.780677103614165, "learning_rate": 3.5278050086572313e-07, "loss": 0.6947, "step": 7102 }, { "epoch": 0.88, "grad_norm": 7.891770743187363, "learning_rate": 3.5203765353666486e-07, "loss": 0.8821, "step": 7103 }, { "epoch": 0.88, "grad_norm": 6.688550080174804, "learning_rate": 3.5129556059753046e-07, "loss": 0.7469, "step": 7104 }, { "epoch": 0.88, "grad_norm": 8.660048788062253, "learning_rate": 3.505542221687652e-07, "loss": 1.137, "step": 7105 }, { "epoch": 0.88, "grad_norm": 6.8377851413601585, "learning_rate": 3.4981363837069295e-07, "loss": 0.681, "step": 7106 }, { "epoch": 0.88, "grad_norm": 8.15864883759353, "learning_rate": 3.490738093235141e-07, "loss": 0.899, "step": 7107 }, { "epoch": 0.88, "grad_norm": 8.342761122102262, "learning_rate": 3.483347351473076e-07, "loss": 1.1106, "step": 7108 }, { "epoch": 0.88, "grad_norm": 7.918133106701256, "learning_rate": 3.4759641596202766e-07, "loss": 0.8012, "step": 7109 }, { "epoch": 0.88, "grad_norm": 6.84648518361171, "learning_rate": 3.4685885188751047e-07, "loss": 0.673, "step": 7110 }, { "epoch": 0.88, "grad_norm": 6.363898050945236, "learning_rate": 3.461220430434642e-07, "loss": 0.69, "step": 7111 }, { "epoch": 0.88, "grad_norm": 8.53911505144908, "learning_rate": 3.4538598954947764e-07, "loss": 1.0393, "step": 7112 }, { "epoch": 0.88, "grad_norm": 9.517224064778397, "learning_rate": 3.4465069152501804e-07, "loss": 1.4277, "step": 7113 }, { "epoch": 0.88, "grad_norm": 7.560097650277457, "learning_rate": 3.439161490894255e-07, "loss": 1.0753, "step": 7114 }, { "epoch": 0.88, "grad_norm": 10.06979306632631, "learning_rate": 3.4318236236192127e-07, "loss": 1.2258, "step": 7115 }, { "epoch": 0.89, "grad_norm": 7.839142487967073, "learning_rate": 3.4244933146160395e-07, "loss": 0.8838, "step": 7116 }, { "epoch": 0.89, "grad_norm": 7.634731416956453, "learning_rate": 3.4171705650744847e-07, "loss": 0.9465, "step": 7117 }, { "epoch": 0.89, "grad_norm": 8.771811969912862, "learning_rate": 3.4098553761830466e-07, "loss": 0.9816, "step": 7118 }, { "epoch": 0.89, "grad_norm": 8.274550627988011, "learning_rate": 3.402547749129059e-07, "loss": 1.15, "step": 7119 }, { "epoch": 0.89, "grad_norm": 9.782850482375553, "learning_rate": 3.395247685098563e-07, "loss": 1.2351, "step": 7120 }, { "epoch": 0.89, "grad_norm": 6.468126087265541, "learning_rate": 3.3879551852764036e-07, "loss": 0.5661, "step": 7121 }, { "epoch": 0.89, "grad_norm": 6.673651116165978, "learning_rate": 3.380670250846207e-07, "loss": 0.6419, "step": 7122 }, { "epoch": 0.89, "grad_norm": 8.622943680864148, "learning_rate": 3.3733928829903396e-07, "loss": 0.8913, "step": 7123 }, { "epoch": 0.89, "grad_norm": 7.8124475541386555, "learning_rate": 3.3661230828899605e-07, "loss": 0.8301, "step": 7124 }, { "epoch": 0.89, "grad_norm": 7.276807107864476, "learning_rate": 3.3588608517250097e-07, "loss": 0.7736, "step": 7125 }, { "epoch": 0.89, "grad_norm": 6.58616512104241, "learning_rate": 3.351606190674195e-07, "loss": 0.5458, "step": 7126 }, { "epoch": 0.89, "grad_norm": 9.325943053188759, "learning_rate": 3.3443591009149624e-07, "loss": 0.733, "step": 7127 }, { "epoch": 0.89, "grad_norm": 8.611053126108484, "learning_rate": 3.337119583623566e-07, "loss": 0.9193, "step": 7128 }, { "epoch": 0.89, "grad_norm": 7.104984213304769, "learning_rate": 3.3298876399750325e-07, "loss": 0.8118, "step": 7129 }, { "epoch": 0.89, "grad_norm": 7.826169007620514, "learning_rate": 3.322663271143112e-07, "loss": 1.1632, "step": 7130 }, { "epoch": 0.89, "grad_norm": 5.979974338079011, "learning_rate": 3.315446478300405e-07, "loss": 0.6756, "step": 7131 }, { "epoch": 0.89, "grad_norm": 9.260886512086776, "learning_rate": 3.308237262618202e-07, "loss": 1.111, "step": 7132 }, { "epoch": 0.89, "grad_norm": 6.83572687507472, "learning_rate": 3.3010356252666076e-07, "loss": 0.8064, "step": 7133 }, { "epoch": 0.89, "grad_norm": 6.957977220341724, "learning_rate": 3.2938415674144965e-07, "loss": 0.6689, "step": 7134 }, { "epoch": 0.89, "grad_norm": 7.542434611399396, "learning_rate": 3.2866550902295015e-07, "loss": 0.8734, "step": 7135 }, { "epoch": 0.89, "grad_norm": 9.199031601249485, "learning_rate": 3.279476194878012e-07, "loss": 0.8078, "step": 7136 }, { "epoch": 0.89, "grad_norm": 8.508447029553235, "learning_rate": 3.2723048825252177e-07, "loss": 0.6336, "step": 7137 }, { "epoch": 0.89, "grad_norm": 7.8654350668015045, "learning_rate": 3.265141154335061e-07, "loss": 0.915, "step": 7138 }, { "epoch": 0.89, "grad_norm": 6.852509628820114, "learning_rate": 3.2579850114702436e-07, "loss": 0.9418, "step": 7139 }, { "epoch": 0.89, "grad_norm": 6.901122843801517, "learning_rate": 3.2508364550922656e-07, "loss": 0.8767, "step": 7140 }, { "epoch": 0.89, "grad_norm": 7.231795864167868, "learning_rate": 3.243695486361359e-07, "loss": 0.9636, "step": 7141 }, { "epoch": 0.89, "grad_norm": 7.1132350839084095, "learning_rate": 3.236562106436547e-07, "loss": 0.6763, "step": 7142 }, { "epoch": 0.89, "grad_norm": 6.384986554774711, "learning_rate": 3.2294363164756203e-07, "loss": 0.8663, "step": 7143 }, { "epoch": 0.89, "grad_norm": 7.19022233404815, "learning_rate": 3.222318117635143e-07, "loss": 0.8381, "step": 7144 }, { "epoch": 0.89, "grad_norm": 6.735874507094254, "learning_rate": 3.2152075110704226e-07, "loss": 0.5595, "step": 7145 }, { "epoch": 0.89, "grad_norm": 7.873860685278922, "learning_rate": 3.208104497935549e-07, "loss": 1.0765, "step": 7146 }, { "epoch": 0.89, "grad_norm": 8.134353517975203, "learning_rate": 3.2010090793833994e-07, "loss": 0.9476, "step": 7147 }, { "epoch": 0.89, "grad_norm": 4.8113221951563885, "learning_rate": 3.1939212565655687e-07, "loss": 0.5332, "step": 7148 }, { "epoch": 0.89, "grad_norm": 8.193065287666654, "learning_rate": 3.1868410306324816e-07, "loss": 0.8553, "step": 7149 }, { "epoch": 0.89, "grad_norm": 10.623575404127852, "learning_rate": 3.1797684027332744e-07, "loss": 1.3458, "step": 7150 }, { "epoch": 0.89, "grad_norm": 7.713533577414935, "learning_rate": 3.172703374015884e-07, "loss": 0.765, "step": 7151 }, { "epoch": 0.89, "grad_norm": 7.01107690857461, "learning_rate": 3.165645945627005e-07, "loss": 0.78, "step": 7152 }, { "epoch": 0.89, "grad_norm": 9.260696216486135, "learning_rate": 3.1585961187120985e-07, "loss": 0.9859, "step": 7153 }, { "epoch": 0.89, "grad_norm": 8.640394252375348, "learning_rate": 3.1515538944153824e-07, "loss": 0.966, "step": 7154 }, { "epoch": 0.89, "grad_norm": 7.74908195361762, "learning_rate": 3.144519273879848e-07, "loss": 0.7663, "step": 7155 }, { "epoch": 0.89, "grad_norm": 7.3925890714363724, "learning_rate": 3.1374922582472655e-07, "loss": 0.9931, "step": 7156 }, { "epoch": 0.89, "grad_norm": 6.430768884978323, "learning_rate": 3.1304728486581395e-07, "loss": 0.6842, "step": 7157 }, { "epoch": 0.89, "grad_norm": 7.71214999599147, "learning_rate": 3.12346104625178e-07, "loss": 0.8829, "step": 7158 }, { "epoch": 0.89, "grad_norm": 8.880448710881591, "learning_rate": 3.116456852166222e-07, "loss": 0.9924, "step": 7159 }, { "epoch": 0.89, "grad_norm": 6.955293727492427, "learning_rate": 3.1094602675382946e-07, "loss": 0.7475, "step": 7160 }, { "epoch": 0.89, "grad_norm": 8.2481543761564, "learning_rate": 3.1024712935035896e-07, "loss": 1.2036, "step": 7161 }, { "epoch": 0.89, "grad_norm": 9.199292231673757, "learning_rate": 3.095489931196438e-07, "loss": 1.0315, "step": 7162 }, { "epoch": 0.89, "grad_norm": 7.5226789562707035, "learning_rate": 3.0885161817499675e-07, "loss": 0.7723, "step": 7163 }, { "epoch": 0.89, "grad_norm": 8.423295043768091, "learning_rate": 3.0815500462960447e-07, "loss": 0.9034, "step": 7164 }, { "epoch": 0.89, "grad_norm": 9.61057628545546, "learning_rate": 3.0745915259653314e-07, "loss": 1.1707, "step": 7165 }, { "epoch": 0.89, "grad_norm": 9.011448310799615, "learning_rate": 3.0676406218872026e-07, "loss": 1.1472, "step": 7166 }, { "epoch": 0.89, "grad_norm": 8.159062166371104, "learning_rate": 3.0606973351898606e-07, "loss": 1.1499, "step": 7167 }, { "epoch": 0.89, "grad_norm": 9.396342279956166, "learning_rate": 3.0537616670002213e-07, "loss": 1.0566, "step": 7168 }, { "epoch": 0.89, "grad_norm": 8.330442980964985, "learning_rate": 3.0468336184439837e-07, "loss": 1.411, "step": 7169 }, { "epoch": 0.89, "grad_norm": 4.921303913408231, "learning_rate": 3.0399131906456157e-07, "loss": 0.4095, "step": 7170 }, { "epoch": 0.89, "grad_norm": 8.017529735479789, "learning_rate": 3.0330003847283296e-07, "loss": 0.7668, "step": 7171 }, { "epoch": 0.89, "grad_norm": 6.651888666070096, "learning_rate": 3.026095201814122e-07, "loss": 0.893, "step": 7172 }, { "epoch": 0.89, "grad_norm": 8.338219580103903, "learning_rate": 3.019197643023736e-07, "loss": 0.9346, "step": 7173 }, { "epoch": 0.89, "grad_norm": 7.845682261807978, "learning_rate": 3.012307709476692e-07, "loss": 0.9017, "step": 7174 }, { "epoch": 0.89, "grad_norm": 8.118250336412837, "learning_rate": 3.0054254022912397e-07, "loss": 0.8679, "step": 7175 }, { "epoch": 0.89, "grad_norm": 8.21098093005113, "learning_rate": 2.998550722584448e-07, "loss": 0.9658, "step": 7176 }, { "epoch": 0.89, "grad_norm": 8.436812599067606, "learning_rate": 2.991683671472101e-07, "loss": 1.0026, "step": 7177 }, { "epoch": 0.89, "grad_norm": 8.746427763752898, "learning_rate": 2.9848242500687517e-07, "loss": 1.081, "step": 7178 }, { "epoch": 0.89, "grad_norm": 7.502680281109211, "learning_rate": 2.9779724594877377e-07, "loss": 0.7666, "step": 7179 }, { "epoch": 0.89, "grad_norm": 8.256898920086657, "learning_rate": 2.9711283008411306e-07, "loss": 1.1996, "step": 7180 }, { "epoch": 0.89, "grad_norm": 7.219941313803401, "learning_rate": 2.964291775239775e-07, "loss": 1.0217, "step": 7181 }, { "epoch": 0.89, "grad_norm": 7.875719192813909, "learning_rate": 2.9574628837932774e-07, "loss": 0.7997, "step": 7182 }, { "epoch": 0.89, "grad_norm": 8.332526538441137, "learning_rate": 2.9506416276100183e-07, "loss": 1.111, "step": 7183 }, { "epoch": 0.89, "grad_norm": 8.319009704665842, "learning_rate": 2.9438280077971004e-07, "loss": 1.003, "step": 7184 }, { "epoch": 0.89, "grad_norm": 8.731366491513231, "learning_rate": 2.937022025460434e-07, "loss": 1.1895, "step": 7185 }, { "epoch": 0.89, "grad_norm": 6.528177227803355, "learning_rate": 2.9302236817046636e-07, "loss": 0.9789, "step": 7186 }, { "epoch": 0.89, "grad_norm": 7.729740061630762, "learning_rate": 2.923432977633178e-07, "loss": 0.9571, "step": 7187 }, { "epoch": 0.89, "grad_norm": 9.07082397746325, "learning_rate": 2.9166499143481796e-07, "loss": 1.4074, "step": 7188 }, { "epoch": 0.89, "grad_norm": 6.857826676000053, "learning_rate": 2.909874492950565e-07, "loss": 0.5587, "step": 7189 }, { "epoch": 0.89, "grad_norm": 7.003634788761226, "learning_rate": 2.9031067145400385e-07, "loss": 0.8715, "step": 7190 }, { "epoch": 0.89, "grad_norm": 6.663885125094709, "learning_rate": 2.8963465802150483e-07, "loss": 1.0175, "step": 7191 }, { "epoch": 0.89, "grad_norm": 4.865303590044084, "learning_rate": 2.8895940910728005e-07, "loss": 0.3944, "step": 7192 }, { "epoch": 0.89, "grad_norm": 8.541840230126041, "learning_rate": 2.8828492482092576e-07, "loss": 1.0554, "step": 7193 }, { "epoch": 0.89, "grad_norm": 7.788874020673683, "learning_rate": 2.8761120527191435e-07, "loss": 1.2035, "step": 7194 }, { "epoch": 0.89, "grad_norm": 9.266012509976974, "learning_rate": 2.86938250569595e-07, "loss": 0.7253, "step": 7195 }, { "epoch": 0.9, "grad_norm": 6.758738299348889, "learning_rate": 2.8626606082319044e-07, "loss": 0.7891, "step": 7196 }, { "epoch": 0.9, "grad_norm": 6.730320824028278, "learning_rate": 2.8559463614180326e-07, "loss": 0.7475, "step": 7197 }, { "epoch": 0.9, "grad_norm": 8.57184906508328, "learning_rate": 2.84923976634407e-07, "loss": 0.9806, "step": 7198 }, { "epoch": 0.9, "grad_norm": 7.664117405778475, "learning_rate": 2.8425408240985495e-07, "loss": 1.076, "step": 7199 }, { "epoch": 0.9, "grad_norm": 6.140544984634771, "learning_rate": 2.8358495357687366e-07, "loss": 0.6462, "step": 7200 }, { "epoch": 0.9, "grad_norm": 9.000060647898747, "learning_rate": 2.8291659024406726e-07, "loss": 0.9934, "step": 7201 }, { "epoch": 0.9, "grad_norm": 7.1297282799480675, "learning_rate": 2.82248992519914e-07, "loss": 0.7248, "step": 7202 }, { "epoch": 0.9, "grad_norm": 8.001793295627568, "learning_rate": 2.815821605127689e-07, "loss": 1.0787, "step": 7203 }, { "epoch": 0.9, "grad_norm": 7.430332045964232, "learning_rate": 2.809160943308631e-07, "loss": 0.7359, "step": 7204 }, { "epoch": 0.9, "grad_norm": 7.195245075771524, "learning_rate": 2.802507940823013e-07, "loss": 1.0154, "step": 7205 }, { "epoch": 0.9, "grad_norm": 8.43557588544141, "learning_rate": 2.795862598750676e-07, "loss": 1.007, "step": 7206 }, { "epoch": 0.9, "grad_norm": 8.107983770278807, "learning_rate": 2.78922491817018e-07, "loss": 0.6234, "step": 7207 }, { "epoch": 0.9, "grad_norm": 7.704457349511037, "learning_rate": 2.782594900158858e-07, "loss": 1.0443, "step": 7208 }, { "epoch": 0.9, "grad_norm": 7.428888395151358, "learning_rate": 2.775972545792804e-07, "loss": 0.7455, "step": 7209 }, { "epoch": 0.9, "grad_norm": 8.83873823791114, "learning_rate": 2.7693578561468646e-07, "loss": 0.7404, "step": 7210 }, { "epoch": 0.9, "grad_norm": 6.104782665907905, "learning_rate": 2.7627508322946307e-07, "loss": 0.6736, "step": 7211 }, { "epoch": 0.9, "grad_norm": 6.976941132835931, "learning_rate": 2.756151475308461e-07, "loss": 0.8445, "step": 7212 }, { "epoch": 0.9, "grad_norm": 8.330751030492344, "learning_rate": 2.7495597862594813e-07, "loss": 0.9538, "step": 7213 }, { "epoch": 0.9, "grad_norm": 5.131416914091947, "learning_rate": 2.7429757662175316e-07, "loss": 0.6803, "step": 7214 }, { "epoch": 0.9, "grad_norm": 12.695678227025118, "learning_rate": 2.7363994162512673e-07, "loss": 0.9246, "step": 7215 }, { "epoch": 0.9, "grad_norm": 8.405609924517057, "learning_rate": 2.729830737428041e-07, "loss": 1.144, "step": 7216 }, { "epoch": 0.9, "grad_norm": 7.645247654702252, "learning_rate": 2.723269730813999e-07, "loss": 0.8817, "step": 7217 }, { "epoch": 0.9, "grad_norm": 9.113102807433608, "learning_rate": 2.7167163974740283e-07, "loss": 1.2081, "step": 7218 }, { "epoch": 0.9, "grad_norm": 8.486570006609995, "learning_rate": 2.710170738471762e-07, "loss": 0.9452, "step": 7219 }, { "epoch": 0.9, "grad_norm": 11.967375237334577, "learning_rate": 2.7036327548696005e-07, "loss": 0.8544, "step": 7220 }, { "epoch": 0.9, "grad_norm": 7.1683761813021425, "learning_rate": 2.6971024477287e-07, "loss": 0.8406, "step": 7221 }, { "epoch": 0.9, "grad_norm": 6.359689714565043, "learning_rate": 2.690579818108963e-07, "loss": 0.7718, "step": 7222 }, { "epoch": 0.9, "grad_norm": 6.96047913206667, "learning_rate": 2.684064867069036e-07, "loss": 0.6146, "step": 7223 }, { "epoch": 0.9, "grad_norm": 8.496786257810065, "learning_rate": 2.677557595666358e-07, "loss": 0.8102, "step": 7224 }, { "epoch": 0.9, "grad_norm": 8.43818470637994, "learning_rate": 2.671058004957067e-07, "loss": 1.0303, "step": 7225 }, { "epoch": 0.9, "grad_norm": 6.606348855461394, "learning_rate": 2.664566095996102e-07, "loss": 0.7671, "step": 7226 }, { "epoch": 0.9, "grad_norm": 7.513116534999467, "learning_rate": 2.6580818698371315e-07, "loss": 0.7813, "step": 7227 }, { "epoch": 0.9, "grad_norm": 7.889914598715503, "learning_rate": 2.651605327532569e-07, "loss": 0.88, "step": 7228 }, { "epoch": 0.9, "grad_norm": 7.9006637433787335, "learning_rate": 2.645136470133602e-07, "loss": 0.8418, "step": 7229 }, { "epoch": 0.9, "grad_norm": 7.813194579885173, "learning_rate": 2.6386752986901687e-07, "loss": 0.9705, "step": 7230 }, { "epoch": 0.9, "grad_norm": 8.347036417511475, "learning_rate": 2.6322218142509457e-07, "loss": 0.9014, "step": 7231 }, { "epoch": 0.9, "grad_norm": 7.261917430944761, "learning_rate": 2.625776017863357e-07, "loss": 1.0035, "step": 7232 }, { "epoch": 0.9, "grad_norm": 7.1149093775755246, "learning_rate": 2.619337910573622e-07, "loss": 0.6847, "step": 7233 }, { "epoch": 0.9, "grad_norm": 7.140284087497325, "learning_rate": 2.6129074934266527e-07, "loss": 0.8265, "step": 7234 }, { "epoch": 0.9, "grad_norm": 9.095433008407635, "learning_rate": 2.60648476746615e-07, "loss": 1.245, "step": 7235 }, { "epoch": 0.9, "grad_norm": 8.362976143183678, "learning_rate": 2.600069733734567e-07, "loss": 0.7633, "step": 7236 }, { "epoch": 0.9, "grad_norm": 6.87467730676106, "learning_rate": 2.593662393273083e-07, "loss": 0.7816, "step": 7237 }, { "epoch": 0.9, "grad_norm": 8.59979106055169, "learning_rate": 2.5872627471216494e-07, "loss": 0.8398, "step": 7238 }, { "epoch": 0.9, "grad_norm": 10.911268057033569, "learning_rate": 2.580870796318974e-07, "loss": 0.8652, "step": 7239 }, { "epoch": 0.9, "grad_norm": 8.663455744998833, "learning_rate": 2.574486541902499e-07, "loss": 0.929, "step": 7240 }, { "epoch": 0.9, "grad_norm": 7.856500941374419, "learning_rate": 2.5681099849084125e-07, "loss": 0.9562, "step": 7241 }, { "epoch": 0.9, "grad_norm": 7.547356215515631, "learning_rate": 2.561741126371692e-07, "loss": 0.9731, "step": 7242 }, { "epoch": 0.9, "grad_norm": 8.396601802370183, "learning_rate": 2.5553799673260214e-07, "loss": 0.8787, "step": 7243 }, { "epoch": 0.9, "grad_norm": 7.546037474090702, "learning_rate": 2.5490265088038367e-07, "loss": 0.6268, "step": 7244 }, { "epoch": 0.9, "grad_norm": 8.175491134607316, "learning_rate": 2.542680751836374e-07, "loss": 0.8445, "step": 7245 }, { "epoch": 0.9, "grad_norm": 7.212426274078948, "learning_rate": 2.5363426974535533e-07, "loss": 0.9106, "step": 7246 }, { "epoch": 0.9, "grad_norm": 6.701696351868566, "learning_rate": 2.530012346684091e-07, "loss": 0.8305, "step": 7247 }, { "epoch": 0.9, "grad_norm": 6.047045949302142, "learning_rate": 2.523689700555437e-07, "loss": 0.7284, "step": 7248 }, { "epoch": 0.9, "grad_norm": 7.192375982873553, "learning_rate": 2.5173747600937994e-07, "loss": 0.8569, "step": 7249 }, { "epoch": 0.9, "grad_norm": 7.754157675955853, "learning_rate": 2.5110675263241126e-07, "loss": 0.7705, "step": 7250 }, { "epoch": 0.9, "grad_norm": 7.6150154085776744, "learning_rate": 2.504768000270075e-07, "loss": 1.0836, "step": 7251 }, { "epoch": 0.9, "grad_norm": 6.328868528178668, "learning_rate": 2.4984761829541515e-07, "loss": 0.5246, "step": 7252 }, { "epoch": 0.9, "grad_norm": 7.061292023029832, "learning_rate": 2.492192075397515e-07, "loss": 0.8419, "step": 7253 }, { "epoch": 0.9, "grad_norm": 6.911103227330953, "learning_rate": 2.485915678620143e-07, "loss": 0.6823, "step": 7254 }, { "epoch": 0.9, "grad_norm": 7.34216335231783, "learning_rate": 2.4796469936406943e-07, "loss": 1.2595, "step": 7255 }, { "epoch": 0.9, "grad_norm": 6.993883452850493, "learning_rate": 2.4733860214766315e-07, "loss": 0.8639, "step": 7256 }, { "epoch": 0.9, "grad_norm": 13.418509952235954, "learning_rate": 2.467132763144142e-07, "loss": 1.2901, "step": 7257 }, { "epoch": 0.9, "grad_norm": 8.026126216221627, "learning_rate": 2.46088721965817e-07, "loss": 1.0003, "step": 7258 }, { "epoch": 0.9, "grad_norm": 6.644809077073467, "learning_rate": 2.454649392032382e-07, "loss": 0.6682, "step": 7259 }, { "epoch": 0.9, "grad_norm": 9.188077296566355, "learning_rate": 2.4484192812792296e-07, "loss": 1.5279, "step": 7260 }, { "epoch": 0.9, "grad_norm": 8.868709247593904, "learning_rate": 2.4421968884098977e-07, "loss": 0.8989, "step": 7261 }, { "epoch": 0.9, "grad_norm": 6.69107828256129, "learning_rate": 2.4359822144342846e-07, "loss": 1.1048, "step": 7262 }, { "epoch": 0.9, "grad_norm": 7.247066227068927, "learning_rate": 2.429775260361106e-07, "loss": 0.9136, "step": 7263 }, { "epoch": 0.9, "grad_norm": 8.039037342319691, "learning_rate": 2.4235760271977603e-07, "loss": 0.9822, "step": 7264 }, { "epoch": 0.9, "grad_norm": 7.58979223383401, "learning_rate": 2.4173845159504217e-07, "loss": 0.7489, "step": 7265 }, { "epoch": 0.9, "grad_norm": 7.64400528301824, "learning_rate": 2.411200727624008e-07, "loss": 1.045, "step": 7266 }, { "epoch": 0.9, "grad_norm": 6.4579508908821595, "learning_rate": 2.4050246632221887e-07, "loss": 0.6937, "step": 7267 }, { "epoch": 0.9, "grad_norm": 4.459699128198873, "learning_rate": 2.3988563237473616e-07, "loss": 0.3465, "step": 7268 }, { "epoch": 0.9, "grad_norm": 8.664780020316316, "learning_rate": 2.392695710200688e-07, "loss": 0.9981, "step": 7269 }, { "epoch": 0.9, "grad_norm": 8.163585780591086, "learning_rate": 2.3865428235820775e-07, "loss": 0.9663, "step": 7270 }, { "epoch": 0.9, "grad_norm": 8.020807407290404, "learning_rate": 2.3803976648901494e-07, "loss": 0.8366, "step": 7271 }, { "epoch": 0.9, "grad_norm": 6.7079486296152835, "learning_rate": 2.374260235122333e-07, "loss": 0.8717, "step": 7272 }, { "epoch": 0.9, "grad_norm": 7.509444269049483, "learning_rate": 2.368130535274743e-07, "loss": 0.7831, "step": 7273 }, { "epoch": 0.9, "grad_norm": 7.658782847820283, "learning_rate": 2.3620085663422721e-07, "loss": 0.77, "step": 7274 }, { "epoch": 0.9, "grad_norm": 7.858429619390651, "learning_rate": 2.3558943293185531e-07, "loss": 0.9245, "step": 7275 }, { "epoch": 0.9, "grad_norm": 6.821333150663698, "learning_rate": 2.3497878251959537e-07, "loss": 0.8251, "step": 7276 }, { "epoch": 0.91, "grad_norm": 7.768172349577662, "learning_rate": 2.3436890549655922e-07, "loss": 0.9788, "step": 7277 }, { "epoch": 0.91, "grad_norm": 6.956014854053639, "learning_rate": 2.337598019617332e-07, "loss": 0.8772, "step": 7278 }, { "epoch": 0.91, "grad_norm": 7.181702234268578, "learning_rate": 2.3315147201397992e-07, "loss": 0.658, "step": 7279 }, { "epoch": 0.91, "grad_norm": 8.473507042540716, "learning_rate": 2.325439157520315e-07, "loss": 1.0043, "step": 7280 }, { "epoch": 0.91, "grad_norm": 7.01835235584827, "learning_rate": 2.3193713327450075e-07, "loss": 0.9791, "step": 7281 }, { "epoch": 0.91, "grad_norm": 8.082165117100951, "learning_rate": 2.3133112467986996e-07, "loss": 0.9275, "step": 7282 }, { "epoch": 0.91, "grad_norm": 6.828409587733953, "learning_rate": 2.3072589006649825e-07, "loss": 0.9017, "step": 7283 }, { "epoch": 0.91, "grad_norm": 8.217005088105827, "learning_rate": 2.301214295326193e-07, "loss": 0.9857, "step": 7284 }, { "epoch": 0.91, "grad_norm": 10.345278559911483, "learning_rate": 2.2951774317633902e-07, "loss": 1.1383, "step": 7285 }, { "epoch": 0.91, "grad_norm": 9.996368314542046, "learning_rate": 2.289148310956396e-07, "loss": 1.3518, "step": 7286 }, { "epoch": 0.91, "grad_norm": 6.332641298191856, "learning_rate": 2.283126933883767e-07, "loss": 0.4444, "step": 7287 }, { "epoch": 0.91, "grad_norm": 8.881907824157071, "learning_rate": 2.277113301522821e-07, "loss": 1.0603, "step": 7288 }, { "epoch": 0.91, "grad_norm": 8.446364708050558, "learning_rate": 2.2711074148495716e-07, "loss": 1.0106, "step": 7289 }, { "epoch": 0.91, "grad_norm": 12.007022856155144, "learning_rate": 2.2651092748388447e-07, "loss": 0.8847, "step": 7290 }, { "epoch": 0.91, "grad_norm": 6.94172892983464, "learning_rate": 2.2591188824641508e-07, "loss": 0.9231, "step": 7291 }, { "epoch": 0.91, "grad_norm": 7.779967695883658, "learning_rate": 2.253136238697762e-07, "loss": 0.902, "step": 7292 }, { "epoch": 0.91, "grad_norm": 7.455273200129915, "learning_rate": 2.2471613445107122e-07, "loss": 0.708, "step": 7293 }, { "epoch": 0.91, "grad_norm": 7.6881035480582405, "learning_rate": 2.241194200872737e-07, "loss": 0.8865, "step": 7294 }, { "epoch": 0.91, "grad_norm": 7.364675422269602, "learning_rate": 2.2352348087523457e-07, "loss": 0.9896, "step": 7295 }, { "epoch": 0.91, "grad_norm": 8.381739768507211, "learning_rate": 2.2292831691167806e-07, "loss": 0.9518, "step": 7296 }, { "epoch": 0.91, "grad_norm": 7.24126857498209, "learning_rate": 2.2233392829320354e-07, "loss": 0.7969, "step": 7297 }, { "epoch": 0.91, "grad_norm": 8.339454650103749, "learning_rate": 2.217403151162817e-07, "loss": 1.077, "step": 7298 }, { "epoch": 0.91, "grad_norm": 7.18489598752973, "learning_rate": 2.2114747747726095e-07, "loss": 1.1527, "step": 7299 }, { "epoch": 0.91, "grad_norm": 9.144853190795613, "learning_rate": 2.2055541547236158e-07, "loss": 1.0653, "step": 7300 }, { "epoch": 0.91, "grad_norm": 6.688175861289011, "learning_rate": 2.1996412919767728e-07, "loss": 0.8422, "step": 7301 }, { "epoch": 0.91, "grad_norm": 8.41908149833699, "learning_rate": 2.1937361874917905e-07, "loss": 1.0258, "step": 7302 }, { "epoch": 0.91, "grad_norm": 8.921580704176767, "learning_rate": 2.1878388422270858e-07, "loss": 1.176, "step": 7303 }, { "epoch": 0.91, "grad_norm": 7.070807627519029, "learning_rate": 2.181949257139837e-07, "loss": 0.9326, "step": 7304 }, { "epoch": 0.91, "grad_norm": 6.915694188163397, "learning_rate": 2.176067433185952e-07, "loss": 0.7444, "step": 7305 }, { "epoch": 0.91, "grad_norm": 7.39262501122352, "learning_rate": 2.1701933713200894e-07, "loss": 0.6587, "step": 7306 }, { "epoch": 0.91, "grad_norm": 7.754365940870297, "learning_rate": 2.164327072495631e-07, "loss": 0.7138, "step": 7307 }, { "epoch": 0.91, "grad_norm": 7.692141151847097, "learning_rate": 2.1584685376647263e-07, "loss": 0.8439, "step": 7308 }, { "epoch": 0.91, "grad_norm": 7.5833167762852485, "learning_rate": 2.1526177677782368e-07, "loss": 1.0057, "step": 7309 }, { "epoch": 0.91, "grad_norm": 5.906394542809562, "learning_rate": 2.146774763785764e-07, "loss": 0.6757, "step": 7310 }, { "epoch": 0.91, "grad_norm": 7.374100821724986, "learning_rate": 2.140939526635688e-07, "loss": 0.7804, "step": 7311 }, { "epoch": 0.91, "grad_norm": 15.080610390347216, "learning_rate": 2.1351120572750737e-07, "loss": 1.029, "step": 7312 }, { "epoch": 0.91, "grad_norm": 6.018757782297615, "learning_rate": 2.1292923566497591e-07, "loss": 0.7147, "step": 7313 }, { "epoch": 0.91, "grad_norm": 7.299619292155804, "learning_rate": 2.1234804257043218e-07, "loss": 0.7971, "step": 7314 }, { "epoch": 0.91, "grad_norm": 7.637588468170472, "learning_rate": 2.1176762653820626e-07, "loss": 1.0092, "step": 7315 }, { "epoch": 0.91, "grad_norm": 7.412068905664633, "learning_rate": 2.1118798766250336e-07, "loss": 0.7427, "step": 7316 }, { "epoch": 0.91, "grad_norm": 8.74305676177376, "learning_rate": 2.1060912603740101e-07, "loss": 1.1908, "step": 7317 }, { "epoch": 0.91, "grad_norm": 7.363734140147743, "learning_rate": 2.100310417568535e-07, "loss": 0.7301, "step": 7318 }, { "epoch": 0.91, "grad_norm": 6.453688483153167, "learning_rate": 2.0945373491468468e-07, "loss": 0.5276, "step": 7319 }, { "epoch": 0.91, "grad_norm": 7.730392565185901, "learning_rate": 2.0887720560459735e-07, "loss": 0.9055, "step": 7320 }, { "epoch": 0.91, "grad_norm": 9.667717889731433, "learning_rate": 2.0830145392016276e-07, "loss": 0.9771, "step": 7321 }, { "epoch": 0.91, "grad_norm": 7.398852844370825, "learning_rate": 2.0772647995483063e-07, "loss": 0.8784, "step": 7322 }, { "epoch": 0.91, "grad_norm": 8.398952629312058, "learning_rate": 2.0715228380192076e-07, "loss": 1.2305, "step": 7323 }, { "epoch": 0.91, "grad_norm": 7.195742493980471, "learning_rate": 2.0657886555463024e-07, "loss": 0.9403, "step": 7324 }, { "epoch": 0.91, "grad_norm": 7.41319099613542, "learning_rate": 2.0600622530602633e-07, "loss": 0.9575, "step": 7325 }, { "epoch": 0.91, "grad_norm": 8.524995248339426, "learning_rate": 2.0543436314905242e-07, "loss": 1.0432, "step": 7326 }, { "epoch": 0.91, "grad_norm": 7.772793369877108, "learning_rate": 2.0486327917652538e-07, "loss": 0.7414, "step": 7327 }, { "epoch": 0.91, "grad_norm": 7.613684906783761, "learning_rate": 2.042929734811333e-07, "loss": 0.7505, "step": 7328 }, { "epoch": 0.91, "grad_norm": 7.270407911005924, "learning_rate": 2.0372344615544326e-07, "loss": 0.6882, "step": 7329 }, { "epoch": 0.91, "grad_norm": 7.488751405983528, "learning_rate": 2.0315469729188963e-07, "loss": 1.0581, "step": 7330 }, { "epoch": 0.91, "grad_norm": 7.332955932426946, "learning_rate": 2.025867269827847e-07, "loss": 0.8315, "step": 7331 }, { "epoch": 0.91, "grad_norm": 7.4116975972155315, "learning_rate": 2.020195353203136e-07, "loss": 0.7671, "step": 7332 }, { "epoch": 0.91, "grad_norm": 6.927197523738928, "learning_rate": 2.0145312239653325e-07, "loss": 0.5271, "step": 7333 }, { "epoch": 0.91, "grad_norm": 6.384655339762162, "learning_rate": 2.0088748830337733e-07, "loss": 0.8041, "step": 7334 }, { "epoch": 0.91, "grad_norm": 6.182820371782788, "learning_rate": 2.003226331326502e-07, "loss": 0.6133, "step": 7335 }, { "epoch": 0.91, "grad_norm": 6.533758969584244, "learning_rate": 1.9975855697603187e-07, "loss": 0.7905, "step": 7336 }, { "epoch": 0.91, "grad_norm": 6.21789211745679, "learning_rate": 1.991952599250735e-07, "loss": 0.506, "step": 7337 }, { "epoch": 0.91, "grad_norm": 6.838944702786815, "learning_rate": 1.986327420712031e-07, "loss": 0.5449, "step": 7338 }, { "epoch": 0.91, "grad_norm": 7.736926759702, "learning_rate": 1.9807100350571929e-07, "loss": 0.8509, "step": 7339 }, { "epoch": 0.91, "grad_norm": 6.763096329438361, "learning_rate": 1.975100443197958e-07, "loss": 0.9494, "step": 7340 }, { "epoch": 0.91, "grad_norm": 7.3897921985851855, "learning_rate": 1.969498646044793e-07, "loss": 0.7075, "step": 7341 }, { "epoch": 0.91, "grad_norm": 7.468533777730475, "learning_rate": 1.9639046445068976e-07, "loss": 0.6658, "step": 7342 }, { "epoch": 0.91, "grad_norm": 7.916182444147458, "learning_rate": 1.9583184394922127e-07, "loss": 1.0847, "step": 7343 }, { "epoch": 0.91, "grad_norm": 8.7183958447708, "learning_rate": 1.9527400319074075e-07, "loss": 1.2834, "step": 7344 }, { "epoch": 0.91, "grad_norm": 8.608529167836751, "learning_rate": 1.9471694226578963e-07, "loss": 0.8134, "step": 7345 }, { "epoch": 0.91, "grad_norm": 6.3785697021806405, "learning_rate": 1.9416066126478062e-07, "loss": 0.7117, "step": 7346 }, { "epoch": 0.91, "grad_norm": 8.31544241993256, "learning_rate": 1.9360516027800258e-07, "loss": 0.6482, "step": 7347 }, { "epoch": 0.91, "grad_norm": 6.498594939666952, "learning_rate": 1.9305043939561508e-07, "loss": 0.7507, "step": 7348 }, { "epoch": 0.91, "grad_norm": 6.470441697609525, "learning_rate": 1.9249649870765273e-07, "loss": 0.6914, "step": 7349 }, { "epoch": 0.91, "grad_norm": 7.862789075236983, "learning_rate": 1.919433383040248e-07, "loss": 1.0218, "step": 7350 }, { "epoch": 0.91, "grad_norm": 7.26253323157417, "learning_rate": 1.9139095827450993e-07, "loss": 0.8209, "step": 7351 }, { "epoch": 0.91, "grad_norm": 7.700796249443189, "learning_rate": 1.9083935870876314e-07, "loss": 0.8086, "step": 7352 }, { "epoch": 0.91, "grad_norm": 7.94532494194477, "learning_rate": 1.902885396963128e-07, "loss": 0.8981, "step": 7353 }, { "epoch": 0.91, "grad_norm": 6.820211003197498, "learning_rate": 1.8973850132655957e-07, "loss": 0.7029, "step": 7354 }, { "epoch": 0.91, "grad_norm": 7.653838677124961, "learning_rate": 1.8918924368877656e-07, "loss": 1.17, "step": 7355 }, { "epoch": 0.91, "grad_norm": 8.68336363207336, "learning_rate": 1.886407668721135e-07, "loss": 0.6857, "step": 7356 }, { "epoch": 0.92, "grad_norm": 9.20839047704955, "learning_rate": 1.8809307096559038e-07, "loss": 1.2936, "step": 7357 }, { "epoch": 0.92, "grad_norm": 6.735226150800264, "learning_rate": 1.8754615605809933e-07, "loss": 0.6292, "step": 7358 }, { "epoch": 0.92, "grad_norm": 7.480787408454847, "learning_rate": 1.8700002223841052e-07, "loss": 0.715, "step": 7359 }, { "epoch": 0.92, "grad_norm": 7.1047739510979895, "learning_rate": 1.8645466959516302e-07, "loss": 0.8577, "step": 7360 }, { "epoch": 0.92, "grad_norm": 7.277365948150145, "learning_rate": 1.8591009821687044e-07, "loss": 0.6776, "step": 7361 }, { "epoch": 0.92, "grad_norm": 8.203152043613883, "learning_rate": 1.8536630819191993e-07, "loss": 0.8913, "step": 7362 }, { "epoch": 0.92, "grad_norm": 6.939477839128616, "learning_rate": 1.8482329960857249e-07, "loss": 0.9101, "step": 7363 }, { "epoch": 0.92, "grad_norm": 7.2673360004055585, "learning_rate": 1.8428107255495932e-07, "loss": 0.7014, "step": 7364 }, { "epoch": 0.92, "grad_norm": 7.2624565150147715, "learning_rate": 1.8373962711908944e-07, "loss": 0.7835, "step": 7365 }, { "epoch": 0.92, "grad_norm": 9.343474947319246, "learning_rate": 1.8319896338884146e-07, "loss": 1.2049, "step": 7366 }, { "epoch": 0.92, "grad_norm": 7.201956922999122, "learning_rate": 1.8265908145196577e-07, "loss": 1.0863, "step": 7367 }, { "epoch": 0.92, "grad_norm": 9.07965576593602, "learning_rate": 1.8211998139609222e-07, "loss": 0.9159, "step": 7368 }, { "epoch": 0.92, "grad_norm": 8.766132619225752, "learning_rate": 1.8158166330871697e-07, "loss": 0.8038, "step": 7369 }, { "epoch": 0.92, "grad_norm": 7.144426656363007, "learning_rate": 1.810441272772123e-07, "loss": 0.8625, "step": 7370 }, { "epoch": 0.92, "grad_norm": 7.104964231916531, "learning_rate": 1.8050737338882406e-07, "loss": 0.8713, "step": 7371 }, { "epoch": 0.92, "grad_norm": 7.775398069433064, "learning_rate": 1.7997140173067086e-07, "loss": 1.0013, "step": 7372 }, { "epoch": 0.92, "grad_norm": 7.486931631654516, "learning_rate": 1.7943621238974196e-07, "loss": 0.8156, "step": 7373 }, { "epoch": 0.92, "grad_norm": 10.033280666524538, "learning_rate": 1.789018054529029e-07, "loss": 0.8526, "step": 7374 }, { "epoch": 0.92, "grad_norm": 7.788575141896489, "learning_rate": 1.78368181006891e-07, "loss": 1.052, "step": 7375 }, { "epoch": 0.92, "grad_norm": 7.047097860775753, "learning_rate": 1.778353391383153e-07, "loss": 1.0402, "step": 7376 }, { "epoch": 0.92, "grad_norm": 7.148223757460166, "learning_rate": 1.7730327993366048e-07, "loss": 0.691, "step": 7377 }, { "epoch": 0.92, "grad_norm": 8.06047960678098, "learning_rate": 1.7677200347928136e-07, "loss": 0.968, "step": 7378 }, { "epoch": 0.92, "grad_norm": 7.494362334359723, "learning_rate": 1.7624150986140732e-07, "loss": 1.0591, "step": 7379 }, { "epoch": 0.92, "grad_norm": 6.763261385539501, "learning_rate": 1.757117991661411e-07, "loss": 0.586, "step": 7380 }, { "epoch": 0.92, "grad_norm": 8.503212622707844, "learning_rate": 1.751828714794579e-07, "loss": 1.1221, "step": 7381 }, { "epoch": 0.92, "grad_norm": 8.875779806628422, "learning_rate": 1.7465472688720397e-07, "loss": 1.0899, "step": 7382 }, { "epoch": 0.92, "grad_norm": 7.414931193768686, "learning_rate": 1.7412736547510134e-07, "loss": 0.6496, "step": 7383 }, { "epoch": 0.92, "grad_norm": 7.055819177902979, "learning_rate": 1.7360078732874375e-07, "loss": 1.0021, "step": 7384 }, { "epoch": 0.92, "grad_norm": 5.747711342150171, "learning_rate": 1.7307499253359672e-07, "loss": 0.4939, "step": 7385 }, { "epoch": 0.92, "grad_norm": 8.623221855281917, "learning_rate": 1.7254998117500088e-07, "loss": 0.9677, "step": 7386 }, { "epoch": 0.92, "grad_norm": 7.539910143652002, "learning_rate": 1.7202575333816751e-07, "loss": 0.7931, "step": 7387 }, { "epoch": 0.92, "grad_norm": 8.615527198994734, "learning_rate": 1.7150230910818244e-07, "loss": 1.2726, "step": 7388 }, { "epoch": 0.92, "grad_norm": 7.664036551068616, "learning_rate": 1.7097964857000326e-07, "loss": 0.6451, "step": 7389 }, { "epoch": 0.92, "grad_norm": 6.297447691420457, "learning_rate": 1.7045777180846046e-07, "loss": 0.7377, "step": 7390 }, { "epoch": 0.92, "grad_norm": 9.162591906304847, "learning_rate": 1.6993667890825738e-07, "loss": 1.337, "step": 7391 }, { "epoch": 0.92, "grad_norm": 7.0548921534324105, "learning_rate": 1.6941636995397025e-07, "loss": 0.7862, "step": 7392 }, { "epoch": 0.92, "grad_norm": 6.994894207489797, "learning_rate": 1.6889684503004931e-07, "loss": 0.691, "step": 7393 }, { "epoch": 0.92, "grad_norm": 6.902472229608513, "learning_rate": 1.6837810422081435e-07, "loss": 0.8923, "step": 7394 }, { "epoch": 0.92, "grad_norm": 8.762422637496988, "learning_rate": 1.6786014761046133e-07, "loss": 1.1659, "step": 7395 }, { "epoch": 0.92, "grad_norm": 7.269717787624337, "learning_rate": 1.6734297528305687e-07, "loss": 0.798, "step": 7396 }, { "epoch": 0.92, "grad_norm": 8.476647805841232, "learning_rate": 1.6682658732254165e-07, "loss": 0.9702, "step": 7397 }, { "epoch": 0.92, "grad_norm": 6.181201010942212, "learning_rate": 1.6631098381272748e-07, "loss": 0.7146, "step": 7398 }, { "epoch": 0.92, "grad_norm": 8.721176578129047, "learning_rate": 1.6579616483729966e-07, "loss": 0.9121, "step": 7399 }, { "epoch": 0.92, "grad_norm": 8.242525178307734, "learning_rate": 1.652821304798158e-07, "loss": 0.9283, "step": 7400 }, { "epoch": 0.92, "grad_norm": 6.9988703688811515, "learning_rate": 1.6476888082370746e-07, "loss": 0.8012, "step": 7401 }, { "epoch": 0.92, "grad_norm": 6.79035037173692, "learning_rate": 1.6425641595227804e-07, "loss": 0.7235, "step": 7402 }, { "epoch": 0.92, "grad_norm": 9.472170811890283, "learning_rate": 1.6374473594870155e-07, "loss": 1.4359, "step": 7403 }, { "epoch": 0.92, "grad_norm": 8.157733622647218, "learning_rate": 1.6323384089602935e-07, "loss": 0.8277, "step": 7404 }, { "epoch": 0.92, "grad_norm": 6.644070187285957, "learning_rate": 1.6272373087718006e-07, "loss": 0.7996, "step": 7405 }, { "epoch": 0.92, "grad_norm": 8.170228550254462, "learning_rate": 1.622144059749481e-07, "loss": 1.2461, "step": 7406 }, { "epoch": 0.92, "grad_norm": 6.483273845767408, "learning_rate": 1.6170586627200057e-07, "loss": 0.7947, "step": 7407 }, { "epoch": 0.92, "grad_norm": 7.151542908320634, "learning_rate": 1.6119811185087542e-07, "loss": 0.7791, "step": 7408 }, { "epoch": 0.92, "grad_norm": 7.539146502955731, "learning_rate": 1.6069114279398336e-07, "loss": 0.9996, "step": 7409 }, { "epoch": 0.92, "grad_norm": 8.515157877163876, "learning_rate": 1.6018495918360965e-07, "loss": 0.7761, "step": 7410 }, { "epoch": 0.92, "grad_norm": 7.63488837343887, "learning_rate": 1.596795611019103e-07, "loss": 1.0028, "step": 7411 }, { "epoch": 0.92, "grad_norm": 8.881669825955228, "learning_rate": 1.5917494863091298e-07, "loss": 0.78, "step": 7412 }, { "epoch": 0.92, "grad_norm": 9.776934325246614, "learning_rate": 1.5867112185252108e-07, "loss": 1.3787, "step": 7413 }, { "epoch": 0.92, "grad_norm": 7.526534702939226, "learning_rate": 1.5816808084850754e-07, "loss": 1.0383, "step": 7414 }, { "epoch": 0.92, "grad_norm": 7.009800755613519, "learning_rate": 1.5766582570051759e-07, "loss": 0.911, "step": 7415 }, { "epoch": 0.92, "grad_norm": 8.8903021552267, "learning_rate": 1.5716435649007213e-07, "loss": 0.9002, "step": 7416 }, { "epoch": 0.92, "grad_norm": 7.647147188822341, "learning_rate": 1.5666367329856046e-07, "loss": 0.7379, "step": 7417 }, { "epoch": 0.92, "grad_norm": 7.4567551317094365, "learning_rate": 1.561637762072471e-07, "loss": 0.8068, "step": 7418 }, { "epoch": 0.92, "grad_norm": 7.236492375064186, "learning_rate": 1.556646652972682e-07, "loss": 0.9695, "step": 7419 }, { "epoch": 0.92, "grad_norm": 5.383454274252351, "learning_rate": 1.551663406496323e-07, "loss": 0.6376, "step": 7420 }, { "epoch": 0.92, "grad_norm": 7.237949558182746, "learning_rate": 1.546688023452192e-07, "loss": 0.5194, "step": 7421 }, { "epoch": 0.92, "grad_norm": 6.040901055164687, "learning_rate": 1.5417205046478422e-07, "loss": 0.4384, "step": 7422 }, { "epoch": 0.92, "grad_norm": 8.10563293350915, "learning_rate": 1.5367608508895127e-07, "loss": 1.1033, "step": 7423 }, { "epoch": 0.92, "grad_norm": 6.591722621321597, "learning_rate": 1.5318090629821757e-07, "loss": 0.5797, "step": 7424 }, { "epoch": 0.92, "grad_norm": 7.207735547451325, "learning_rate": 1.5268651417295556e-07, "loss": 0.8188, "step": 7425 }, { "epoch": 0.92, "grad_norm": 7.170930713089653, "learning_rate": 1.5219290879340597e-07, "loss": 0.8457, "step": 7426 }, { "epoch": 0.92, "grad_norm": 7.4461152934568, "learning_rate": 1.5170009023968425e-07, "loss": 0.7285, "step": 7427 }, { "epoch": 0.92, "grad_norm": 6.484985297089132, "learning_rate": 1.5120805859177802e-07, "loss": 0.812, "step": 7428 }, { "epoch": 0.92, "grad_norm": 8.713243905264882, "learning_rate": 1.5071681392954674e-07, "loss": 1.0749, "step": 7429 }, { "epoch": 0.92, "grad_norm": 9.539477377056159, "learning_rate": 1.5022635633272109e-07, "loss": 1.0579, "step": 7430 }, { "epoch": 0.92, "grad_norm": 8.054716686810417, "learning_rate": 1.4973668588090572e-07, "loss": 0.9751, "step": 7431 }, { "epoch": 0.92, "grad_norm": 7.007659692016419, "learning_rate": 1.4924780265357762e-07, "loss": 0.7825, "step": 7432 }, { "epoch": 0.92, "grad_norm": 9.415145028609134, "learning_rate": 1.4875970673008333e-07, "loss": 1.3099, "step": 7433 }, { "epoch": 0.92, "grad_norm": 6.971595984855112, "learning_rate": 1.482723981896461e-07, "loss": 0.9802, "step": 7434 }, { "epoch": 0.92, "grad_norm": 8.914250587041034, "learning_rate": 1.4778587711135662e-07, "loss": 0.9807, "step": 7435 }, { "epoch": 0.92, "grad_norm": 8.417813193663033, "learning_rate": 1.4730014357418e-07, "loss": 1.0817, "step": 7436 }, { "epoch": 0.93, "grad_norm": 7.14445399467085, "learning_rate": 1.4681519765695484e-07, "loss": 0.8942, "step": 7437 }, { "epoch": 0.93, "grad_norm": 5.544375695868685, "learning_rate": 1.4633103943839045e-07, "loss": 0.6237, "step": 7438 }, { "epoch": 0.93, "grad_norm": 7.479138347595917, "learning_rate": 1.458476689970667e-07, "loss": 1.0324, "step": 7439 }, { "epoch": 0.93, "grad_norm": 8.207508897537549, "learning_rate": 1.4536508641143866e-07, "loss": 0.9452, "step": 7440 }, { "epoch": 0.93, "grad_norm": 8.919677430789138, "learning_rate": 1.448832917598325e-07, "loss": 1.26, "step": 7441 }, { "epoch": 0.93, "grad_norm": 8.393859836044191, "learning_rate": 1.4440228512044406e-07, "loss": 1.1496, "step": 7442 }, { "epoch": 0.93, "grad_norm": 5.65980491641431, "learning_rate": 1.439220665713459e-07, "loss": 0.6685, "step": 7443 }, { "epoch": 0.93, "grad_norm": 9.279870374351866, "learning_rate": 1.4344263619047894e-07, "loss": 0.853, "step": 7444 }, { "epoch": 0.93, "grad_norm": 5.945759499320623, "learning_rate": 1.429639940556571e-07, "loss": 0.5992, "step": 7445 }, { "epoch": 0.93, "grad_norm": 7.602359391683833, "learning_rate": 1.4248614024456764e-07, "loss": 0.9996, "step": 7446 }, { "epoch": 0.93, "grad_norm": 8.741979162392058, "learning_rate": 1.4200907483476745e-07, "loss": 0.8674, "step": 7447 }, { "epoch": 0.93, "grad_norm": 9.234312708960525, "learning_rate": 1.415327979036879e-07, "loss": 1.248, "step": 7448 }, { "epoch": 0.93, "grad_norm": 7.013724797485963, "learning_rate": 1.4105730952863107e-07, "loss": 0.7183, "step": 7449 }, { "epoch": 0.93, "grad_norm": 9.366419332766702, "learning_rate": 1.4058260978677184e-07, "loss": 0.5206, "step": 7450 }, { "epoch": 0.93, "grad_norm": 7.920760021208802, "learning_rate": 1.4010869875515533e-07, "loss": 0.9851, "step": 7451 }, { "epoch": 0.93, "grad_norm": 6.697396150081536, "learning_rate": 1.396355765107016e-07, "loss": 0.6854, "step": 7452 }, { "epoch": 0.93, "grad_norm": 7.390811746261521, "learning_rate": 1.391632431301998e-07, "loss": 0.5898, "step": 7453 }, { "epoch": 0.93, "grad_norm": 6.731148541173342, "learning_rate": 1.3869169869031253e-07, "loss": 0.6103, "step": 7454 }, { "epoch": 0.93, "grad_norm": 7.109757081813576, "learning_rate": 1.3822094326757462e-07, "loss": 1.1667, "step": 7455 }, { "epoch": 0.93, "grad_norm": 7.79054673558052, "learning_rate": 1.3775097693839113e-07, "loss": 0.7974, "step": 7456 }, { "epoch": 0.93, "grad_norm": 6.332157232034352, "learning_rate": 1.37281799779041e-07, "loss": 0.8254, "step": 7457 }, { "epoch": 0.93, "grad_norm": 7.747528296552942, "learning_rate": 1.3681341186567387e-07, "loss": 1.1344, "step": 7458 }, { "epoch": 0.93, "grad_norm": 6.638344762563241, "learning_rate": 1.363458132743123e-07, "loss": 0.772, "step": 7459 }, { "epoch": 0.93, "grad_norm": 8.231770971755697, "learning_rate": 1.3587900408084885e-07, "loss": 0.9775, "step": 7460 }, { "epoch": 0.93, "grad_norm": 8.812838306671294, "learning_rate": 1.3541298436105133e-07, "loss": 0.9737, "step": 7461 }, { "epoch": 0.93, "grad_norm": 5.8643522424070005, "learning_rate": 1.349477541905553e-07, "loss": 0.6255, "step": 7462 }, { "epoch": 0.93, "grad_norm": 6.931142153973184, "learning_rate": 1.3448331364487088e-07, "loss": 0.6621, "step": 7463 }, { "epoch": 0.93, "grad_norm": 7.151585928081264, "learning_rate": 1.3401966279938006e-07, "loss": 0.8293, "step": 7464 }, { "epoch": 0.93, "grad_norm": 12.236354897031536, "learning_rate": 1.3355680172933484e-07, "loss": 1.2185, "step": 7465 }, { "epoch": 0.93, "grad_norm": 7.095857052635766, "learning_rate": 1.3309473050986067e-07, "loss": 0.5878, "step": 7466 }, { "epoch": 0.93, "grad_norm": 9.939703444839171, "learning_rate": 1.3263344921595368e-07, "loss": 0.7855, "step": 7467 }, { "epoch": 0.93, "grad_norm": 6.669196336888405, "learning_rate": 1.3217295792248342e-07, "loss": 0.9566, "step": 7468 }, { "epoch": 0.93, "grad_norm": 8.1748633013586, "learning_rate": 1.31713256704189e-07, "loss": 1.038, "step": 7469 }, { "epoch": 0.93, "grad_norm": 9.65807943961284, "learning_rate": 1.3125434563568406e-07, "loss": 1.229, "step": 7470 }, { "epoch": 0.93, "grad_norm": 7.9504340268793365, "learning_rate": 1.3079622479145072e-07, "loss": 0.7876, "step": 7471 }, { "epoch": 0.93, "grad_norm": 8.264126647595603, "learning_rate": 1.30338894245845e-07, "loss": 0.8388, "step": 7472 }, { "epoch": 0.93, "grad_norm": 9.352075847821554, "learning_rate": 1.298823540730948e-07, "loss": 1.1574, "step": 7473 }, { "epoch": 0.93, "grad_norm": 9.603278661705746, "learning_rate": 1.294266043472986e-07, "loss": 1.2986, "step": 7474 }, { "epoch": 0.93, "grad_norm": 6.928905652858834, "learning_rate": 1.2897164514242667e-07, "loss": 0.8816, "step": 7475 }, { "epoch": 0.93, "grad_norm": 8.94063750321989, "learning_rate": 1.2851747653232216e-07, "loss": 1.2246, "step": 7476 }, { "epoch": 0.93, "grad_norm": 8.135116867101406, "learning_rate": 1.2806409859069946e-07, "loss": 0.7633, "step": 7477 }, { "epoch": 0.93, "grad_norm": 27.09285311602173, "learning_rate": 1.2761151139114247e-07, "loss": 0.7343, "step": 7478 }, { "epoch": 0.93, "grad_norm": 8.493570930796354, "learning_rate": 1.2715971500711077e-07, "loss": 1.1706, "step": 7479 }, { "epoch": 0.93, "grad_norm": 7.854321716288516, "learning_rate": 1.2670870951193293e-07, "loss": 0.8587, "step": 7480 }, { "epoch": 0.93, "grad_norm": 7.234168541293522, "learning_rate": 1.262584949788076e-07, "loss": 0.7599, "step": 7481 }, { "epoch": 0.93, "grad_norm": 8.395719620573566, "learning_rate": 1.2580907148081024e-07, "loss": 0.983, "step": 7482 }, { "epoch": 0.93, "grad_norm": 7.2602462194517035, "learning_rate": 1.253604390908819e-07, "loss": 0.6957, "step": 7483 }, { "epoch": 0.93, "grad_norm": 6.879678054304324, "learning_rate": 1.2491259788183996e-07, "loss": 0.6141, "step": 7484 }, { "epoch": 0.93, "grad_norm": 7.102948292696861, "learning_rate": 1.2446554792637065e-07, "loss": 0.8465, "step": 7485 }, { "epoch": 0.93, "grad_norm": 9.803000234267147, "learning_rate": 1.2401928929703376e-07, "loss": 1.3014, "step": 7486 }, { "epoch": 0.93, "grad_norm": 9.179979977601693, "learning_rate": 1.2357382206625802e-07, "loss": 1.1278, "step": 7487 }, { "epoch": 0.93, "grad_norm": 7.912273507279923, "learning_rate": 1.2312914630634555e-07, "loss": 0.7591, "step": 7488 }, { "epoch": 0.93, "grad_norm": 6.908292359400575, "learning_rate": 1.2268526208947086e-07, "loss": 0.9441, "step": 7489 }, { "epoch": 0.93, "grad_norm": 6.71520719697927, "learning_rate": 1.2224216948767686e-07, "loss": 0.6358, "step": 7490 }, { "epoch": 0.93, "grad_norm": 6.785813067239039, "learning_rate": 1.2179986857288217e-07, "loss": 0.6484, "step": 7491 }, { "epoch": 0.93, "grad_norm": 8.907063449421631, "learning_rate": 1.213583594168727e-07, "loss": 1.1673, "step": 7492 }, { "epoch": 0.93, "grad_norm": 8.314726581700995, "learning_rate": 1.2091764209130885e-07, "loss": 0.8837, "step": 7493 }, { "epoch": 0.93, "grad_norm": 7.657297618894905, "learning_rate": 1.2047771666772124e-07, "loss": 0.8492, "step": 7494 }, { "epoch": 0.93, "grad_norm": 8.202177003312178, "learning_rate": 1.2003858321751272e-07, "loss": 1.0089, "step": 7495 }, { "epoch": 0.93, "grad_norm": 8.83140203293537, "learning_rate": 1.196002418119563e-07, "loss": 0.9448, "step": 7496 }, { "epoch": 0.93, "grad_norm": 6.865850706930471, "learning_rate": 1.1916269252219726e-07, "loss": 0.6332, "step": 7497 }, { "epoch": 0.93, "grad_norm": 6.266310257809961, "learning_rate": 1.187259354192527e-07, "loss": 0.507, "step": 7498 }, { "epoch": 0.93, "grad_norm": 8.11822812920706, "learning_rate": 1.1828997057400982e-07, "loss": 0.8893, "step": 7499 }, { "epoch": 0.93, "grad_norm": 8.259621988044968, "learning_rate": 1.1785479805722977e-07, "loss": 0.8355, "step": 7500 }, { "epoch": 0.93, "grad_norm": 8.058133610875709, "learning_rate": 1.1742041793954162e-07, "loss": 0.8537, "step": 7501 }, { "epoch": 0.93, "grad_norm": 6.432320385243552, "learning_rate": 1.1698683029144897e-07, "loss": 0.6809, "step": 7502 }, { "epoch": 0.93, "grad_norm": 6.875624432880838, "learning_rate": 1.1655403518332498e-07, "loss": 0.6407, "step": 7503 }, { "epoch": 0.93, "grad_norm": 7.715299442139905, "learning_rate": 1.1612203268541456e-07, "loss": 0.7896, "step": 7504 }, { "epoch": 0.93, "grad_norm": 8.200507251942438, "learning_rate": 1.156908228678344e-07, "loss": 0.9112, "step": 7505 }, { "epoch": 0.93, "grad_norm": 7.679036891868252, "learning_rate": 1.1526040580057185e-07, "loss": 0.9734, "step": 7506 }, { "epoch": 0.93, "grad_norm": 8.866397717379481, "learning_rate": 1.1483078155348659e-07, "loss": 1.1702, "step": 7507 }, { "epoch": 0.93, "grad_norm": 6.640562510833453, "learning_rate": 1.1440195019630784e-07, "loss": 0.7294, "step": 7508 }, { "epoch": 0.93, "grad_norm": 8.634007923015245, "learning_rate": 1.1397391179863881e-07, "loss": 1.1198, "step": 7509 }, { "epoch": 0.93, "grad_norm": 8.011511543383069, "learning_rate": 1.1354666642995171e-07, "loss": 0.7936, "step": 7510 }, { "epoch": 0.93, "grad_norm": 7.826491077738472, "learning_rate": 1.1312021415959052e-07, "loss": 0.7266, "step": 7511 }, { "epoch": 0.93, "grad_norm": 7.517716348807571, "learning_rate": 1.1269455505677207e-07, "loss": 0.8607, "step": 7512 }, { "epoch": 0.93, "grad_norm": 6.587083936269144, "learning_rate": 1.1226968919058112e-07, "loss": 0.8362, "step": 7513 }, { "epoch": 0.93, "grad_norm": 8.307002741096083, "learning_rate": 1.118456166299775e-07, "loss": 0.9005, "step": 7514 }, { "epoch": 0.93, "grad_norm": 7.644654138469584, "learning_rate": 1.114223374437895e-07, "loss": 0.6199, "step": 7515 }, { "epoch": 0.93, "grad_norm": 8.209090997818487, "learning_rate": 1.1099985170071881e-07, "loss": 0.8154, "step": 7516 }, { "epoch": 0.93, "grad_norm": 7.445602457367411, "learning_rate": 1.1057815946933559e-07, "loss": 1.1346, "step": 7517 }, { "epoch": 0.94, "grad_norm": 7.779978298487194, "learning_rate": 1.1015726081808398e-07, "loss": 0.6837, "step": 7518 }, { "epoch": 0.94, "grad_norm": 8.022243511013542, "learning_rate": 1.0973715581527822e-07, "loss": 1.1408, "step": 7519 }, { "epoch": 0.94, "grad_norm": 7.537605383043544, "learning_rate": 1.0931784452910265e-07, "loss": 0.8509, "step": 7520 }, { "epoch": 0.94, "grad_norm": 7.916205987994681, "learning_rate": 1.0889932702761507e-07, "loss": 0.831, "step": 7521 }, { "epoch": 0.94, "grad_norm": 6.566800936759555, "learning_rate": 1.0848160337874225e-07, "loss": 0.7635, "step": 7522 }, { "epoch": 0.94, "grad_norm": 5.90265649583431, "learning_rate": 1.0806467365028327e-07, "loss": 0.6377, "step": 7523 }, { "epoch": 0.94, "grad_norm": 6.357938816567507, "learning_rate": 1.0764853790990848e-07, "loss": 0.8986, "step": 7524 }, { "epoch": 0.94, "grad_norm": 7.954336705584543, "learning_rate": 1.0723319622515882e-07, "loss": 1.0353, "step": 7525 }, { "epoch": 0.94, "grad_norm": 7.758717635510581, "learning_rate": 1.0681864866344593e-07, "loss": 0.9647, "step": 7526 }, { "epoch": 0.94, "grad_norm": 8.2508843276052, "learning_rate": 1.0640489529205433e-07, "loss": 1.2427, "step": 7527 }, { "epoch": 0.94, "grad_norm": 7.986914921469731, "learning_rate": 1.0599193617813808e-07, "loss": 0.8022, "step": 7528 }, { "epoch": 0.94, "grad_norm": 8.417633558312529, "learning_rate": 1.0557977138872133e-07, "loss": 1.0605, "step": 7529 }, { "epoch": 0.94, "grad_norm": 6.40534366940111, "learning_rate": 1.051684009907028e-07, "loss": 0.6973, "step": 7530 }, { "epoch": 0.94, "grad_norm": 9.383196593448048, "learning_rate": 1.0475782505084853e-07, "loss": 1.0898, "step": 7531 }, { "epoch": 0.94, "grad_norm": 7.965956633248974, "learning_rate": 1.0434804363579853e-07, "loss": 0.9592, "step": 7532 }, { "epoch": 0.94, "grad_norm": 7.505427975830201, "learning_rate": 1.0393905681206185e-07, "loss": 1.2698, "step": 7533 }, { "epoch": 0.94, "grad_norm": 8.194223388944389, "learning_rate": 1.035308646460198e-07, "loss": 1.2425, "step": 7534 }, { "epoch": 0.94, "grad_norm": 9.293988241104682, "learning_rate": 1.0312346720392275e-07, "loss": 1.1279, "step": 7535 }, { "epoch": 0.94, "grad_norm": 7.869974198165912, "learning_rate": 1.0271686455189556e-07, "loss": 0.9745, "step": 7536 }, { "epoch": 0.94, "grad_norm": 7.450384097659198, "learning_rate": 1.0231105675593156e-07, "loss": 0.9918, "step": 7537 }, { "epoch": 0.94, "grad_norm": 9.141066283066765, "learning_rate": 1.0190604388189417e-07, "loss": 1.3233, "step": 7538 }, { "epoch": 0.94, "grad_norm": 6.987755033003991, "learning_rate": 1.0150182599552139e-07, "loss": 1.0116, "step": 7539 }, { "epoch": 0.94, "grad_norm": 7.568268790466792, "learning_rate": 1.0109840316241792e-07, "loss": 0.7175, "step": 7540 }, { "epoch": 0.94, "grad_norm": 8.592356977583545, "learning_rate": 1.0069577544806309e-07, "loss": 1.2094, "step": 7541 }, { "epoch": 0.94, "grad_norm": 8.277946008662814, "learning_rate": 1.0029394291780459e-07, "loss": 1.1383, "step": 7542 }, { "epoch": 0.94, "grad_norm": 8.146031950997477, "learning_rate": 9.989290563686305e-08, "loss": 0.7345, "step": 7543 }, { "epoch": 0.94, "grad_norm": 8.724076835443777, "learning_rate": 9.949266367032806e-08, "loss": 0.9945, "step": 7544 }, { "epoch": 0.94, "grad_norm": 7.205844740890516, "learning_rate": 9.909321708316155e-08, "loss": 0.743, "step": 7545 }, { "epoch": 0.94, "grad_norm": 7.704252325269433, "learning_rate": 9.869456594019611e-08, "loss": 0.9347, "step": 7546 }, { "epoch": 0.94, "grad_norm": 8.418994952719375, "learning_rate": 9.829671030613441e-08, "loss": 0.8971, "step": 7547 }, { "epoch": 0.94, "grad_norm": 7.152423237801951, "learning_rate": 9.789965024555149e-08, "loss": 0.722, "step": 7548 }, { "epoch": 0.94, "grad_norm": 8.415390757305477, "learning_rate": 9.750338582289132e-08, "loss": 1.0509, "step": 7549 }, { "epoch": 0.94, "grad_norm": 7.818666680622717, "learning_rate": 9.710791710247025e-08, "loss": 0.5575, "step": 7550 }, { "epoch": 0.94, "grad_norm": 9.595994174559946, "learning_rate": 9.671324414847527e-08, "loss": 0.6156, "step": 7551 }, { "epoch": 0.94, "grad_norm": 6.950521303657905, "learning_rate": 9.631936702496403e-08, "loss": 0.9711, "step": 7552 }, { "epoch": 0.94, "grad_norm": 8.936340240604379, "learning_rate": 9.592628579586482e-08, "loss": 1.0364, "step": 7553 }, { "epoch": 0.94, "grad_norm": 8.368558270746233, "learning_rate": 9.553400052497608e-08, "loss": 1.0832, "step": 7554 }, { "epoch": 0.94, "grad_norm": 10.010480755591644, "learning_rate": 9.514251127596963e-08, "loss": 1.1891, "step": 7555 }, { "epoch": 0.94, "grad_norm": 8.734976920446762, "learning_rate": 9.475181811238465e-08, "loss": 1.2808, "step": 7556 }, { "epoch": 0.94, "grad_norm": 5.436115488027169, "learning_rate": 9.436192109763376e-08, "loss": 0.5784, "step": 7557 }, { "epoch": 0.94, "grad_norm": 6.881027466242789, "learning_rate": 9.39728202949991e-08, "loss": 0.9255, "step": 7558 }, { "epoch": 0.94, "grad_norm": 5.956674677833924, "learning_rate": 9.35845157676335e-08, "loss": 0.7544, "step": 7559 }, { "epoch": 0.94, "grad_norm": 8.394588105931495, "learning_rate": 9.319700757856209e-08, "loss": 0.9611, "step": 7560 }, { "epoch": 0.94, "grad_norm": 7.124076975128214, "learning_rate": 9.281029579067846e-08, "loss": 0.7201, "step": 7561 }, { "epoch": 0.94, "grad_norm": 7.08624347944439, "learning_rate": 9.242438046674796e-08, "loss": 0.7396, "step": 7562 }, { "epoch": 0.94, "grad_norm": 6.457803592574859, "learning_rate": 9.203926166940769e-08, "loss": 0.7206, "step": 7563 }, { "epoch": 0.94, "grad_norm": 6.7797926949788785, "learning_rate": 9.165493946116432e-08, "loss": 0.643, "step": 7564 }, { "epoch": 0.94, "grad_norm": 7.745217034798061, "learning_rate": 9.127141390439464e-08, "loss": 1.125, "step": 7565 }, { "epoch": 0.94, "grad_norm": 7.3358677525942895, "learning_rate": 9.088868506134773e-08, "loss": 1.1404, "step": 7566 }, { "epoch": 0.94, "grad_norm": 8.442112146928213, "learning_rate": 9.05067529941428e-08, "loss": 0.9362, "step": 7567 }, { "epoch": 0.94, "grad_norm": 5.814427183663609, "learning_rate": 9.012561776476857e-08, "loss": 0.4823, "step": 7568 }, { "epoch": 0.94, "grad_norm": 7.041892466060148, "learning_rate": 8.974527943508671e-08, "loss": 0.9057, "step": 7569 }, { "epoch": 0.94, "grad_norm": 6.689625036778064, "learning_rate": 8.936573806682669e-08, "loss": 0.6491, "step": 7570 }, { "epoch": 0.94, "grad_norm": 9.278455478840629, "learning_rate": 8.898699372159147e-08, "loss": 1.3469, "step": 7571 }, { "epoch": 0.94, "grad_norm": 6.6338163466804785, "learning_rate": 8.860904646085245e-08, "loss": 0.8952, "step": 7572 }, { "epoch": 0.94, "grad_norm": 6.908472029276421, "learning_rate": 8.823189634595386e-08, "loss": 0.6568, "step": 7573 }, { "epoch": 0.94, "grad_norm": 9.002298293175329, "learning_rate": 8.785554343810732e-08, "loss": 0.7958, "step": 7574 }, { "epoch": 0.94, "grad_norm": 8.936838678721335, "learning_rate": 8.747998779839839e-08, "loss": 1.2896, "step": 7575 }, { "epoch": 0.94, "grad_norm": 5.789126312336505, "learning_rate": 8.710522948778167e-08, "loss": 0.6976, "step": 7576 }, { "epoch": 0.94, "grad_norm": 7.4769001520068, "learning_rate": 8.673126856708181e-08, "loss": 0.8963, "step": 7577 }, { "epoch": 0.94, "grad_norm": 8.42784463182779, "learning_rate": 8.635810509699583e-08, "loss": 0.8425, "step": 7578 }, { "epoch": 0.94, "grad_norm": 6.839304741063822, "learning_rate": 8.598573913808861e-08, "loss": 0.95, "step": 7579 }, { "epoch": 0.94, "grad_norm": 6.6481723695888935, "learning_rate": 8.561417075079903e-08, "loss": 0.6293, "step": 7580 }, { "epoch": 0.94, "grad_norm": 8.178716701382285, "learning_rate": 8.52433999954333e-08, "loss": 1.0995, "step": 7581 }, { "epoch": 0.94, "grad_norm": 7.295412229742778, "learning_rate": 8.487342693217048e-08, "loss": 0.8246, "step": 7582 }, { "epoch": 0.94, "grad_norm": 8.70003882418465, "learning_rate": 8.450425162105813e-08, "loss": 0.8613, "step": 7583 }, { "epoch": 0.94, "grad_norm": 7.590078118658278, "learning_rate": 8.413587412201663e-08, "loss": 0.8933, "step": 7584 }, { "epoch": 0.94, "grad_norm": 7.579466482391574, "learning_rate": 8.376829449483537e-08, "loss": 0.7743, "step": 7585 }, { "epoch": 0.94, "grad_norm": 8.44835778328406, "learning_rate": 8.340151279917385e-08, "loss": 0.9015, "step": 7586 }, { "epoch": 0.94, "grad_norm": 8.566233376934504, "learning_rate": 8.303552909456391e-08, "loss": 1.1644, "step": 7587 }, { "epoch": 0.94, "grad_norm": 7.036382554142634, "learning_rate": 8.26703434404058e-08, "loss": 0.8916, "step": 7588 }, { "epoch": 0.94, "grad_norm": 7.767064272663496, "learning_rate": 8.230595589597156e-08, "loss": 0.8252, "step": 7589 }, { "epoch": 0.94, "grad_norm": 8.548050831358237, "learning_rate": 8.194236652040277e-08, "loss": 1.0998, "step": 7590 }, { "epoch": 0.94, "grad_norm": 9.576848037152697, "learning_rate": 8.157957537271333e-08, "loss": 1.06, "step": 7591 }, { "epoch": 0.94, "grad_norm": 8.119937016821416, "learning_rate": 8.121758251178391e-08, "loss": 0.9627, "step": 7592 }, { "epoch": 0.94, "grad_norm": 7.72064539456001, "learning_rate": 8.085638799637086e-08, "loss": 1.1121, "step": 7593 }, { "epoch": 0.94, "grad_norm": 6.9988607494676796, "learning_rate": 8.049599188509616e-08, "loss": 0.8045, "step": 7594 }, { "epoch": 0.94, "grad_norm": 8.65981098782014, "learning_rate": 8.013639423645359e-08, "loss": 0.9736, "step": 7595 }, { "epoch": 0.94, "grad_norm": 8.68682356489522, "learning_rate": 7.977759510880977e-08, "loss": 0.948, "step": 7596 }, { "epoch": 0.94, "grad_norm": 7.567817786179287, "learning_rate": 7.941959456039816e-08, "loss": 1.0975, "step": 7597 }, { "epoch": 0.95, "grad_norm": 7.117295564935177, "learning_rate": 7.906239264932503e-08, "loss": 0.8888, "step": 7598 }, { "epoch": 0.95, "grad_norm": 7.114885915854222, "learning_rate": 7.870598943356622e-08, "loss": 0.8359, "step": 7599 }, { "epoch": 0.95, "grad_norm": 7.661004986345881, "learning_rate": 7.83503849709677e-08, "loss": 0.9282, "step": 7600 }, { "epoch": 0.95, "grad_norm": 6.316622959155446, "learning_rate": 7.79955793192455e-08, "loss": 0.7008, "step": 7601 }, { "epoch": 0.95, "grad_norm": 8.091503455839973, "learning_rate": 7.764157253598748e-08, "loss": 1.2337, "step": 7602 }, { "epoch": 0.95, "grad_norm": 8.335019268462958, "learning_rate": 7.728836467865041e-08, "loss": 1.0631, "step": 7603 }, { "epoch": 0.95, "grad_norm": 7.666982577771678, "learning_rate": 7.693595580456126e-08, "loss": 0.7531, "step": 7604 }, { "epoch": 0.95, "grad_norm": 8.09398610002219, "learning_rate": 7.658434597091979e-08, "loss": 1.1233, "step": 7605 }, { "epoch": 0.95, "grad_norm": 7.583282760052106, "learning_rate": 7.62335352347926e-08, "loss": 0.6873, "step": 7606 }, { "epoch": 0.95, "grad_norm": 9.390661587790623, "learning_rate": 7.588352365311857e-08, "loss": 0.9358, "step": 7607 }, { "epoch": 0.95, "grad_norm": 8.28672030040155, "learning_rate": 7.553431128270616e-08, "loss": 1.1047, "step": 7608 }, { "epoch": 0.95, "grad_norm": 8.246010263664976, "learning_rate": 7.518589818023614e-08, "loss": 1.227, "step": 7609 }, { "epoch": 0.95, "grad_norm": 7.304915554453067, "learning_rate": 7.483828440225549e-08, "loss": 0.8108, "step": 7610 }, { "epoch": 0.95, "grad_norm": 9.060968985517528, "learning_rate": 7.449147000518519e-08, "loss": 1.1715, "step": 7611 }, { "epoch": 0.95, "grad_norm": 7.568616884036889, "learning_rate": 7.414545504531579e-08, "loss": 0.9949, "step": 7612 }, { "epoch": 0.95, "grad_norm": 6.29845040337696, "learning_rate": 7.380023957880511e-08, "loss": 0.6829, "step": 7613 }, { "epoch": 0.95, "grad_norm": 13.89011659314627, "learning_rate": 7.345582366168614e-08, "loss": 1.2202, "step": 7614 }, { "epoch": 0.95, "grad_norm": 7.400136569336456, "learning_rate": 7.311220734985802e-08, "loss": 0.8599, "step": 7615 }, { "epoch": 0.95, "grad_norm": 11.856323967391793, "learning_rate": 7.276939069909173e-08, "loss": 1.1862, "step": 7616 }, { "epoch": 0.95, "grad_norm": 7.950296577060376, "learning_rate": 7.242737376502884e-08, "loss": 1.0204, "step": 7617 }, { "epoch": 0.95, "grad_norm": 9.836314777358604, "learning_rate": 7.208615660317941e-08, "loss": 1.3326, "step": 7618 }, { "epoch": 0.95, "grad_norm": 6.908763062991864, "learning_rate": 7.174573926892636e-08, "loss": 0.9007, "step": 7619 }, { "epoch": 0.95, "grad_norm": 7.168080817603906, "learning_rate": 7.140612181752049e-08, "loss": 0.9033, "step": 7620 }, { "epoch": 0.95, "grad_norm": 6.488488122460141, "learning_rate": 7.106730430408382e-08, "loss": 0.4938, "step": 7621 }, { "epoch": 0.95, "grad_norm": 8.736999435082017, "learning_rate": 7.072928678360735e-08, "loss": 1.0801, "step": 7622 }, { "epoch": 0.95, "grad_norm": 6.562735362143175, "learning_rate": 7.039206931095499e-08, "loss": 0.6763, "step": 7623 }, { "epoch": 0.95, "grad_norm": 7.659499870453641, "learning_rate": 7.005565194085739e-08, "loss": 0.8354, "step": 7624 }, { "epoch": 0.95, "grad_norm": 7.86272855393821, "learning_rate": 6.972003472791755e-08, "loss": 1.0888, "step": 7625 }, { "epoch": 0.95, "grad_norm": 8.939891407795798, "learning_rate": 6.938521772660855e-08, "loss": 1.0006, "step": 7626 }, { "epoch": 0.95, "grad_norm": 7.766421618101794, "learning_rate": 6.905120099127249e-08, "loss": 0.8784, "step": 7627 }, { "epoch": 0.95, "grad_norm": 7.89311348205487, "learning_rate": 6.871798457612156e-08, "loss": 0.7634, "step": 7628 }, { "epoch": 0.95, "grad_norm": 8.735539204402823, "learning_rate": 6.838556853523915e-08, "loss": 1.141, "step": 7629 }, { "epoch": 0.95, "grad_norm": 7.035693067669505, "learning_rate": 6.805395292257932e-08, "loss": 0.4454, "step": 7630 }, { "epoch": 0.95, "grad_norm": 6.968342080719336, "learning_rate": 6.772313779196294e-08, "loss": 0.6497, "step": 7631 }, { "epoch": 0.95, "grad_norm": 6.7041775036259645, "learning_rate": 6.73931231970848e-08, "loss": 0.8074, "step": 7632 }, { "epoch": 0.95, "grad_norm": 7.813427834478068, "learning_rate": 6.706390919150707e-08, "loss": 0.7648, "step": 7633 }, { "epoch": 0.95, "grad_norm": 8.093155914728548, "learning_rate": 6.673549582866368e-08, "loss": 1.0321, "step": 7634 }, { "epoch": 0.95, "grad_norm": 7.485963071196619, "learning_rate": 6.640788316185809e-08, "loss": 0.8757, "step": 7635 }, { "epoch": 0.95, "grad_norm": 6.933224511267588, "learning_rate": 6.608107124426333e-08, "loss": 0.9047, "step": 7636 }, { "epoch": 0.95, "grad_norm": 5.668530637609777, "learning_rate": 6.575506012892196e-08, "loss": 0.5404, "step": 7637 }, { "epoch": 0.95, "grad_norm": 7.174725034037603, "learning_rate": 6.542984986874889e-08, "loss": 0.792, "step": 7638 }, { "epoch": 0.95, "grad_norm": 5.039911430815488, "learning_rate": 6.510544051652634e-08, "loss": 0.4197, "step": 7639 }, { "epoch": 0.95, "grad_norm": 7.064607916642504, "learning_rate": 6.47818321249083e-08, "loss": 0.9605, "step": 7640 }, { "epoch": 0.95, "grad_norm": 6.8791991251193485, "learning_rate": 6.44590247464183e-08, "loss": 0.8451, "step": 7641 }, { "epoch": 0.95, "grad_norm": 8.1423232887583, "learning_rate": 6.413701843344944e-08, "loss": 0.9222, "step": 7642 }, { "epoch": 0.95, "grad_norm": 7.630816921868362, "learning_rate": 6.38158132382649e-08, "loss": 1.0993, "step": 7643 }, { "epoch": 0.95, "grad_norm": 8.302017718346619, "learning_rate": 6.349540921299857e-08, "loss": 1.0791, "step": 7644 }, { "epoch": 0.95, "grad_norm": 9.711421677473457, "learning_rate": 6.317580640965382e-08, "loss": 1.2154, "step": 7645 }, { "epoch": 0.95, "grad_norm": 6.881993661145732, "learning_rate": 6.28570048801036e-08, "loss": 0.8205, "step": 7646 }, { "epoch": 0.95, "grad_norm": 7.306252361978706, "learning_rate": 6.253900467609098e-08, "loss": 1.0368, "step": 7647 }, { "epoch": 0.95, "grad_norm": 7.306204878245108, "learning_rate": 6.222180584923021e-08, "loss": 0.7959, "step": 7648 }, { "epoch": 0.95, "grad_norm": 7.661807224125571, "learning_rate": 6.190540845100234e-08, "loss": 0.981, "step": 7649 }, { "epoch": 0.95, "grad_norm": 8.143578500526298, "learning_rate": 6.158981253276298e-08, "loss": 0.852, "step": 7650 }, { "epoch": 0.95, "grad_norm": 7.602320180871014, "learning_rate": 6.127501814573334e-08, "loss": 0.7488, "step": 7651 }, { "epoch": 0.95, "grad_norm": 6.546790304920784, "learning_rate": 6.096102534100645e-08, "loss": 0.7922, "step": 7652 }, { "epoch": 0.95, "grad_norm": 6.694959865762456, "learning_rate": 6.0647834169546e-08, "loss": 0.6944, "step": 7653 }, { "epoch": 0.95, "grad_norm": 7.307723755377983, "learning_rate": 6.033544468218355e-08, "loss": 0.7121, "step": 7654 }, { "epoch": 0.95, "grad_norm": 7.467003352663644, "learning_rate": 6.002385692962242e-08, "loss": 0.9541, "step": 7655 }, { "epoch": 0.95, "grad_norm": 7.294124218215528, "learning_rate": 5.971307096243496e-08, "loss": 0.8596, "step": 7656 }, { "epoch": 0.95, "grad_norm": 7.070895996507408, "learning_rate": 5.9403086831062995e-08, "loss": 0.5452, "step": 7657 }, { "epoch": 0.95, "grad_norm": 8.77149004757592, "learning_rate": 5.909390458581854e-08, "loss": 0.9016, "step": 7658 }, { "epoch": 0.95, "grad_norm": 7.933656699462912, "learning_rate": 5.878552427688422e-08, "loss": 0.6993, "step": 7659 }, { "epoch": 0.95, "grad_norm": 9.423623012556861, "learning_rate": 5.8477945954311663e-08, "loss": 1.141, "step": 7660 }, { "epoch": 0.95, "grad_norm": 7.628997270054208, "learning_rate": 5.817116966802205e-08, "loss": 1.1578, "step": 7661 }, { "epoch": 0.95, "grad_norm": 8.605391609472308, "learning_rate": 5.7865195467807775e-08, "loss": 0.8115, "step": 7662 }, { "epoch": 0.95, "grad_norm": 7.382398749909514, "learning_rate": 5.7560023403329116e-08, "loss": 1.0377, "step": 7663 }, { "epoch": 0.95, "grad_norm": 8.965442523774016, "learning_rate": 5.725565352411755e-08, "loss": 1.0481, "step": 7664 }, { "epoch": 0.95, "grad_norm": 7.264682991169731, "learning_rate": 5.695208587957468e-08, "loss": 0.8661, "step": 7665 }, { "epoch": 0.95, "grad_norm": 7.324558696169589, "learning_rate": 5.664932051897054e-08, "loss": 0.7305, "step": 7666 }, { "epoch": 0.95, "grad_norm": 7.152773051686971, "learning_rate": 5.6347357491445266e-08, "loss": 0.8011, "step": 7667 }, { "epoch": 0.95, "grad_norm": 8.268438652823903, "learning_rate": 5.6046196846009646e-08, "loss": 0.8995, "step": 7668 }, { "epoch": 0.95, "grad_norm": 6.630571808630594, "learning_rate": 5.5745838631544036e-08, "loss": 0.7051, "step": 7669 }, { "epoch": 0.95, "grad_norm": 7.716042419425135, "learning_rate": 5.544628289679721e-08, "loss": 0.9765, "step": 7670 }, { "epoch": 0.95, "grad_norm": 8.42586669900808, "learning_rate": 5.5147529690390275e-08, "loss": 0.8311, "step": 7671 }, { "epoch": 0.95, "grad_norm": 7.11128068904658, "learning_rate": 5.4849579060810566e-08, "loss": 0.6665, "step": 7672 }, { "epoch": 0.95, "grad_norm": 6.3888894934063, "learning_rate": 5.455243105641883e-08, "loss": 0.6435, "step": 7673 }, { "epoch": 0.95, "grad_norm": 6.5447326537857355, "learning_rate": 5.4256085725443165e-08, "loss": 0.6882, "step": 7674 }, { "epoch": 0.95, "grad_norm": 7.588955499024193, "learning_rate": 5.396054311598176e-08, "loss": 0.8416, "step": 7675 }, { "epoch": 0.95, "grad_norm": 7.180236732476331, "learning_rate": 5.3665803276002906e-08, "loss": 0.7581, "step": 7676 }, { "epoch": 0.95, "grad_norm": 7.656629597923214, "learning_rate": 5.337186625334445e-08, "loss": 0.7944, "step": 7677 }, { "epoch": 0.95, "grad_norm": 8.018691119889127, "learning_rate": 5.3078732095714904e-08, "loss": 0.797, "step": 7678 }, { "epoch": 0.96, "grad_norm": 7.9278743873688455, "learning_rate": 5.278640085069009e-08, "loss": 0.8686, "step": 7679 }, { "epoch": 0.96, "grad_norm": 8.756770363940605, "learning_rate": 5.249487256571817e-08, "loss": 0.6401, "step": 7680 }, { "epoch": 0.96, "grad_norm": 6.681027454900606, "learning_rate": 5.2204147288115735e-08, "loss": 0.6335, "step": 7681 }, { "epoch": 0.96, "grad_norm": 7.8105640583535445, "learning_rate": 5.191422506506838e-08, "loss": 0.9287, "step": 7682 }, { "epoch": 0.96, "grad_norm": 7.780460479791868, "learning_rate": 5.162510594363235e-08, "loss": 0.9208, "step": 7683 }, { "epoch": 0.96, "grad_norm": 8.456347244917222, "learning_rate": 5.1336789970733434e-08, "loss": 1.1301, "step": 7684 }, { "epoch": 0.96, "grad_norm": 7.422404209688432, "learning_rate": 5.104927719316699e-08, "loss": 0.8292, "step": 7685 }, { "epoch": 0.96, "grad_norm": 9.424281980232443, "learning_rate": 5.076256765759735e-08, "loss": 0.9372, "step": 7686 }, { "epoch": 0.96, "grad_norm": 7.9947573417812, "learning_rate": 5.047666141056007e-08, "loss": 0.9103, "step": 7687 }, { "epoch": 0.96, "grad_norm": 8.26990644751523, "learning_rate": 5.0191558498458026e-08, "loss": 0.9345, "step": 7688 }, { "epoch": 0.96, "grad_norm": 6.214255005271796, "learning_rate": 4.990725896756643e-08, "loss": 0.7331, "step": 7689 }, { "epoch": 0.96, "grad_norm": 6.405276412740413, "learning_rate": 4.9623762864027815e-08, "loss": 0.9718, "step": 7690 }, { "epoch": 0.96, "grad_norm": 7.712613545307503, "learning_rate": 4.934107023385481e-08, "loss": 0.8453, "step": 7691 }, { "epoch": 0.96, "grad_norm": 7.777256833332432, "learning_rate": 4.905918112293073e-08, "loss": 0.7233, "step": 7692 }, { "epoch": 0.96, "grad_norm": 6.812906971977406, "learning_rate": 4.87780955770073e-08, "loss": 0.783, "step": 7693 }, { "epoch": 0.96, "grad_norm": 6.733289785882246, "learning_rate": 4.849781364170636e-08, "loss": 0.7442, "step": 7694 }, { "epoch": 0.96, "grad_norm": 7.107487329897954, "learning_rate": 4.8218335362519295e-08, "loss": 0.7756, "step": 7695 }, { "epoch": 0.96, "grad_norm": 7.321360660623493, "learning_rate": 4.793966078480705e-08, "loss": 0.8739, "step": 7696 }, { "epoch": 0.96, "grad_norm": 8.00250896370838, "learning_rate": 4.7661789953799553e-08, "loss": 1.066, "step": 7697 }, { "epoch": 0.96, "grad_norm": 6.74030629727173, "learning_rate": 4.738472291459795e-08, "loss": 0.9204, "step": 7698 }, { "epoch": 0.96, "grad_norm": 8.774516751725356, "learning_rate": 4.710845971217071e-08, "loss": 1.1389, "step": 7699 }, { "epoch": 0.96, "grad_norm": 7.322363371942157, "learning_rate": 4.6833000391356406e-08, "loss": 1.0184, "step": 7700 }, { "epoch": 0.96, "grad_norm": 7.260185700518305, "learning_rate": 4.6558344996864846e-08, "loss": 1.1415, "step": 7701 }, { "epoch": 0.96, "grad_norm": 8.460107868367794, "learning_rate": 4.628449357327369e-08, "loss": 0.9466, "step": 7702 }, { "epoch": 0.96, "grad_norm": 8.841477248070909, "learning_rate": 4.601144616503017e-08, "loss": 0.7828, "step": 7703 }, { "epoch": 0.96, "grad_norm": 7.346075479326063, "learning_rate": 4.573920281645161e-08, "loss": 0.8445, "step": 7704 }, { "epoch": 0.96, "grad_norm": 5.724159227356523, "learning_rate": 4.546776357172489e-08, "loss": 0.5041, "step": 7705 }, { "epoch": 0.96, "grad_norm": 7.346876089565947, "learning_rate": 4.519712847490532e-08, "loss": 0.9517, "step": 7706 }, { "epoch": 0.96, "grad_norm": 6.916983111224325, "learning_rate": 4.492729756991942e-08, "loss": 0.677, "step": 7707 }, { "epoch": 0.96, "grad_norm": 6.965966515843737, "learning_rate": 4.4658270900561626e-08, "loss": 0.7638, "step": 7708 }, { "epoch": 0.96, "grad_norm": 7.398216483448165, "learning_rate": 4.439004851049644e-08, "loss": 1.0379, "step": 7709 }, { "epoch": 0.96, "grad_norm": 7.241151634157984, "learning_rate": 4.412263044325849e-08, "loss": 0.9666, "step": 7710 }, { "epoch": 0.96, "grad_norm": 6.721952654693089, "learning_rate": 4.385601674225082e-08, "loss": 0.5939, "step": 7711 }, { "epoch": 0.96, "grad_norm": 6.123359596215796, "learning_rate": 4.359020745074605e-08, "loss": 0.7324, "step": 7712 }, { "epoch": 0.96, "grad_norm": 7.778528380633437, "learning_rate": 4.332520261188633e-08, "loss": 0.8777, "step": 7713 }, { "epoch": 0.96, "grad_norm": 7.686830089318002, "learning_rate": 4.3061002268683925e-08, "loss": 0.9456, "step": 7714 }, { "epoch": 0.96, "grad_norm": 10.488098695187796, "learning_rate": 4.279760646402009e-08, "loss": 0.9556, "step": 7715 }, { "epoch": 0.96, "grad_norm": 7.0928258485336775, "learning_rate": 4.253501524064507e-08, "loss": 0.762, "step": 7716 }, { "epoch": 0.96, "grad_norm": 7.279419793463579, "learning_rate": 4.2273228641178666e-08, "loss": 0.8194, "step": 7717 }, { "epoch": 0.96, "grad_norm": 7.026166339186611, "learning_rate": 4.2012246708110774e-08, "loss": 1.0827, "step": 7718 }, { "epoch": 0.96, "grad_norm": 7.345030372758911, "learning_rate": 4.175206948379973e-08, "loss": 0.7253, "step": 7719 }, { "epoch": 0.96, "grad_norm": 7.086669141029298, "learning_rate": 4.1492697010474534e-08, "loss": 0.8479, "step": 7720 }, { "epoch": 0.96, "grad_norm": 7.127420770411622, "learning_rate": 4.123412933023152e-08, "loss": 1.1372, "step": 7721 }, { "epoch": 0.96, "grad_norm": 7.581930746193265, "learning_rate": 4.097636648503878e-08, "loss": 0.839, "step": 7722 }, { "epoch": 0.96, "grad_norm": 8.219674637972748, "learning_rate": 4.0719408516732284e-08, "loss": 0.8716, "step": 7723 }, { "epoch": 0.96, "grad_norm": 8.560729214572167, "learning_rate": 4.046325546701757e-08, "loss": 0.8629, "step": 7724 }, { "epoch": 0.96, "grad_norm": 9.719688718153687, "learning_rate": 4.020790737746971e-08, "loss": 0.7054, "step": 7725 }, { "epoch": 0.96, "grad_norm": 6.8498916742690295, "learning_rate": 3.995336428953389e-08, "loss": 1.0707, "step": 7726 }, { "epoch": 0.96, "grad_norm": 7.5868644456613765, "learning_rate": 3.969962624452206e-08, "loss": 0.8443, "step": 7727 }, { "epoch": 0.96, "grad_norm": 8.87291447761529, "learning_rate": 3.944669328361905e-08, "loss": 1.004, "step": 7728 }, { "epoch": 0.96, "grad_norm": 7.4231022099057675, "learning_rate": 3.919456544787648e-08, "loss": 0.783, "step": 7729 }, { "epoch": 0.96, "grad_norm": 7.185949470864072, "learning_rate": 3.8943242778216594e-08, "loss": 0.7644, "step": 7730 }, { "epoch": 0.96, "grad_norm": 7.438636206699861, "learning_rate": 3.869272531542956e-08, "loss": 0.7219, "step": 7731 }, { "epoch": 0.96, "grad_norm": 8.592255864106543, "learning_rate": 3.844301310017673e-08, "loss": 0.8638, "step": 7732 }, { "epoch": 0.96, "grad_norm": 7.499376512725564, "learning_rate": 3.81941061729868e-08, "loss": 0.8168, "step": 7733 }, { "epoch": 0.96, "grad_norm": 7.5018753852925, "learning_rate": 3.7946004574259674e-08, "loss": 1.0185, "step": 7734 }, { "epoch": 0.96, "grad_norm": 9.388416391414868, "learning_rate": 3.7698708344263146e-08, "loss": 0.8501, "step": 7735 }, { "epoch": 0.96, "grad_norm": 7.696069651129747, "learning_rate": 3.7452217523134e-08, "loss": 0.9588, "step": 7736 }, { "epoch": 0.96, "grad_norm": 8.411179831286136, "learning_rate": 3.720653215088077e-08, "loss": 0.9194, "step": 7737 }, { "epoch": 0.96, "grad_norm": 7.047517143290421, "learning_rate": 3.69616522673788e-08, "loss": 0.7073, "step": 7738 }, { "epoch": 0.96, "grad_norm": 7.121591370587705, "learning_rate": 3.6717577912372406e-08, "loss": 0.8823, "step": 7739 }, { "epoch": 0.96, "grad_norm": 8.183684234181658, "learning_rate": 3.647430912547767e-08, "loss": 1.0165, "step": 7740 }, { "epoch": 0.96, "grad_norm": 7.7699514220513, "learning_rate": 3.623184594617801e-08, "loss": 0.8278, "step": 7741 }, { "epoch": 0.96, "grad_norm": 12.51130222441533, "learning_rate": 3.599018841382584e-08, "loss": 0.9309, "step": 7742 }, { "epoch": 0.96, "grad_norm": 7.674935510950294, "learning_rate": 3.574933656764423e-08, "loss": 0.675, "step": 7743 }, { "epoch": 0.96, "grad_norm": 7.939083220014419, "learning_rate": 3.550929044672524e-08, "loss": 1.0133, "step": 7744 }, { "epoch": 0.96, "grad_norm": 8.851391853405504, "learning_rate": 3.527005009002826e-08, "loss": 0.8824, "step": 7745 }, { "epoch": 0.96, "grad_norm": 8.985265404717694, "learning_rate": 3.503161553638445e-08, "loss": 1.0921, "step": 7746 }, { "epoch": 0.96, "grad_norm": 5.9189732660330625, "learning_rate": 3.479398682449287e-08, "loss": 0.7374, "step": 7747 }, { "epoch": 0.96, "grad_norm": 7.826076073172072, "learning_rate": 3.455716399292097e-08, "loss": 0.9494, "step": 7748 }, { "epoch": 0.96, "grad_norm": 5.967757711881632, "learning_rate": 3.432114708010803e-08, "loss": 0.3979, "step": 7749 }, { "epoch": 0.96, "grad_norm": 7.569303001729152, "learning_rate": 3.408593612435951e-08, "loss": 0.7362, "step": 7750 }, { "epoch": 0.96, "grad_norm": 7.567876229534706, "learning_rate": 3.3851531163852644e-08, "loss": 0.894, "step": 7751 }, { "epoch": 0.96, "grad_norm": 7.35737924293276, "learning_rate": 3.3617932236631454e-08, "loss": 0.8808, "step": 7752 }, { "epoch": 0.96, "grad_norm": 8.763441791428924, "learning_rate": 3.338513938061172e-08, "loss": 0.7857, "step": 7753 }, { "epoch": 0.96, "grad_norm": 8.810882943211748, "learning_rate": 3.315315263357544e-08, "loss": 1.0214, "step": 7754 }, { "epoch": 0.96, "grad_norm": 8.605355452591267, "learning_rate": 3.2921972033176374e-08, "loss": 0.8844, "step": 7755 }, { "epoch": 0.96, "grad_norm": 6.483773459986639, "learning_rate": 3.269159761693619e-08, "loss": 0.8932, "step": 7756 }, { "epoch": 0.96, "grad_norm": 8.808371073573381, "learning_rate": 3.246202942224552e-08, "loss": 1.0861, "step": 7757 }, { "epoch": 0.96, "grad_norm": 7.534672940763783, "learning_rate": 3.223326748636568e-08, "loss": 0.7001, "step": 7758 }, { "epoch": 0.97, "grad_norm": 9.1470471764977, "learning_rate": 3.200531184642419e-08, "loss": 1.1394, "step": 7759 }, { "epoch": 0.97, "grad_norm": 8.738870615384087, "learning_rate": 3.177816253942145e-08, "loss": 1.0786, "step": 7760 }, { "epoch": 0.97, "grad_norm": 7.740364923140811, "learning_rate": 3.155181960222353e-08, "loss": 0.7499, "step": 7761 }, { "epoch": 0.97, "grad_norm": 7.115107311891776, "learning_rate": 3.132628307156826e-08, "loss": 0.7645, "step": 7762 }, { "epoch": 0.97, "grad_norm": 9.264938047284017, "learning_rate": 3.110155298406026e-08, "loss": 0.8667, "step": 7763 }, { "epoch": 0.97, "grad_norm": 7.8898018566575585, "learning_rate": 3.0877629376175886e-08, "loss": 0.9344, "step": 7764 }, { "epoch": 0.97, "grad_norm": 8.1720589309431, "learning_rate": 3.065451228425831e-08, "loss": 0.7562, "step": 7765 }, { "epoch": 0.97, "grad_norm": 6.912271580957708, "learning_rate": 3.043220174452022e-08, "loss": 0.764, "step": 7766 }, { "epoch": 0.97, "grad_norm": 8.327569675558978, "learning_rate": 3.021069779304498e-08, "loss": 1.1381, "step": 7767 }, { "epoch": 0.97, "grad_norm": 7.02148268587384, "learning_rate": 2.999000046578271e-08, "loss": 0.8412, "step": 7768 }, { "epoch": 0.97, "grad_norm": 8.584798696397906, "learning_rate": 2.977010979855477e-08, "loss": 1.1157, "step": 7769 }, { "epoch": 0.97, "grad_norm": 8.643609663895592, "learning_rate": 2.9551025827050384e-08, "loss": 0.9189, "step": 7770 }, { "epoch": 0.97, "grad_norm": 7.768948970055532, "learning_rate": 2.933274858682833e-08, "loss": 0.9664, "step": 7771 }, { "epoch": 0.97, "grad_norm": 7.582860161411707, "learning_rate": 2.911527811331527e-08, "loss": 1.0238, "step": 7772 }, { "epoch": 0.97, "grad_norm": 7.081873794542918, "learning_rate": 2.8898614441808526e-08, "loss": 0.6685, "step": 7773 }, { "epoch": 0.97, "grad_norm": 7.376964790761141, "learning_rate": 2.868275760747441e-08, "loss": 0.8346, "step": 7774 }, { "epoch": 0.97, "grad_norm": 9.65626877031707, "learning_rate": 2.846770764534601e-08, "loss": 0.8795, "step": 7775 }, { "epoch": 0.97, "grad_norm": 8.57127065729317, "learning_rate": 2.8253464590328737e-08, "loss": 0.9651, "step": 7776 }, { "epoch": 0.97, "grad_norm": 7.877882104741077, "learning_rate": 2.8040028477194782e-08, "loss": 0.858, "step": 7777 }, { "epoch": 0.97, "grad_norm": 7.209330330312522, "learning_rate": 2.782739934058587e-08, "loss": 1.0831, "step": 7778 }, { "epoch": 0.97, "grad_norm": 8.203545598459213, "learning_rate": 2.7615577215012734e-08, "loss": 1.2273, "step": 7779 }, { "epoch": 0.97, "grad_norm": 8.21425939366833, "learning_rate": 2.7404562134856204e-08, "loss": 0.9145, "step": 7780 }, { "epoch": 0.97, "grad_norm": 9.973996803013234, "learning_rate": 2.7194354134363886e-08, "loss": 1.1918, "step": 7781 }, { "epoch": 0.97, "grad_norm": 8.773938040571313, "learning_rate": 2.6984953247655154e-08, "loss": 0.8026, "step": 7782 }, { "epoch": 0.97, "grad_norm": 7.955318880828329, "learning_rate": 2.67763595087156e-08, "loss": 0.9972, "step": 7783 }, { "epoch": 0.97, "grad_norm": 7.851316460299667, "learning_rate": 2.6568572951401472e-08, "loss": 0.849, "step": 7784 }, { "epoch": 0.97, "grad_norm": 9.74755776804241, "learning_rate": 2.6361593609438575e-08, "loss": 0.9558, "step": 7785 }, { "epoch": 0.97, "grad_norm": 7.069859332669022, "learning_rate": 2.6155421516420033e-08, "loss": 0.577, "step": 7786 }, { "epoch": 0.97, "grad_norm": 7.12031845499092, "learning_rate": 2.5950056705808522e-08, "loss": 0.843, "step": 7787 }, { "epoch": 0.97, "grad_norm": 7.162879425744935, "learning_rate": 2.5745499210936274e-08, "loss": 0.6781, "step": 7788 }, { "epoch": 0.97, "grad_norm": 7.9551104618054085, "learning_rate": 2.5541749065003397e-08, "loss": 0.7888, "step": 7789 }, { "epoch": 0.97, "grad_norm": 8.210495105466238, "learning_rate": 2.533880630108121e-08, "loss": 0.8325, "step": 7790 }, { "epoch": 0.97, "grad_norm": 6.937145877804261, "learning_rate": 2.5136670952106702e-08, "loss": 0.9046, "step": 7791 }, { "epoch": 0.97, "grad_norm": 7.6854354112329055, "learning_rate": 2.4935343050888626e-08, "loss": 0.7863, "step": 7792 }, { "epoch": 0.97, "grad_norm": 9.17834003636535, "learning_rate": 2.4734822630103074e-08, "loss": 1.0751, "step": 7793 }, { "epoch": 0.97, "grad_norm": 7.565052056348379, "learning_rate": 2.4535109722296224e-08, "loss": 0.8811, "step": 7794 }, { "epoch": 0.97, "grad_norm": 7.665837433074809, "learning_rate": 2.4336204359882153e-08, "loss": 0.8623, "step": 7795 }, { "epoch": 0.97, "grad_norm": 9.30549128994072, "learning_rate": 2.413810657514448e-08, "loss": 0.8642, "step": 7796 }, { "epoch": 0.97, "grad_norm": 8.04766412011426, "learning_rate": 2.3940816400235267e-08, "loss": 1.1137, "step": 7797 }, { "epoch": 0.97, "grad_norm": 6.414238367688872, "learning_rate": 2.3744333867176116e-08, "loss": 0.648, "step": 7798 }, { "epoch": 0.97, "grad_norm": 6.8892292471408645, "learning_rate": 2.3548659007856524e-08, "loss": 0.7989, "step": 7799 }, { "epoch": 0.97, "grad_norm": 6.656002410732119, "learning_rate": 2.3353791854036634e-08, "loss": 0.6172, "step": 7800 }, { "epoch": 0.97, "grad_norm": 7.938987636323357, "learning_rate": 2.315973243734393e-08, "loss": 0.7559, "step": 7801 }, { "epoch": 0.97, "grad_norm": 10.350358692102105, "learning_rate": 2.2966480789275438e-08, "loss": 1.0935, "step": 7802 }, { "epoch": 0.97, "grad_norm": 7.533778776978607, "learning_rate": 2.2774036941197174e-08, "loss": 0.7658, "step": 7803 }, { "epoch": 0.97, "grad_norm": 8.913986851566179, "learning_rate": 2.2582400924343052e-08, "loss": 0.7847, "step": 7804 }, { "epoch": 0.97, "grad_norm": 6.725562721560802, "learning_rate": 2.2391572769817072e-08, "loss": 0.8478, "step": 7805 }, { "epoch": 0.97, "grad_norm": 8.090781084840199, "learning_rate": 2.220155250859224e-08, "loss": 0.8681, "step": 7806 }, { "epoch": 0.97, "grad_norm": 7.943321451227303, "learning_rate": 2.201234017150944e-08, "loss": 0.9782, "step": 7807 }, { "epoch": 0.97, "grad_norm": 7.822550158103134, "learning_rate": 2.1823935789279104e-08, "loss": 1.0045, "step": 7808 }, { "epoch": 0.97, "grad_norm": 8.235486240077943, "learning_rate": 2.1636339392479553e-08, "loss": 0.8471, "step": 7809 }, { "epoch": 0.97, "grad_norm": 7.341137912928694, "learning_rate": 2.1449551011559766e-08, "loss": 0.6729, "step": 7810 }, { "epoch": 0.97, "grad_norm": 8.310292988147475, "learning_rate": 2.1263570676835487e-08, "loss": 0.9177, "step": 7811 }, { "epoch": 0.97, "grad_norm": 7.734021044826549, "learning_rate": 2.1078398418493128e-08, "loss": 0.7107, "step": 7812 }, { "epoch": 0.97, "grad_norm": 7.313323734189752, "learning_rate": 2.0894034266586983e-08, "loss": 0.6817, "step": 7813 }, { "epoch": 0.97, "grad_norm": 8.37800492030316, "learning_rate": 2.0710478251040334e-08, "loss": 1.3706, "step": 7814 }, { "epoch": 0.97, "grad_norm": 8.164573487954852, "learning_rate": 2.052773040164491e-08, "loss": 1.1004, "step": 7815 }, { "epoch": 0.97, "grad_norm": 6.047749462301328, "learning_rate": 2.0345790748062532e-08, "loss": 0.6126, "step": 7816 }, { "epoch": 0.97, "grad_norm": 8.210676140767251, "learning_rate": 2.0164659319822365e-08, "loss": 0.9491, "step": 7817 }, { "epoch": 0.97, "grad_norm": 8.329992828043409, "learning_rate": 1.9984336146323112e-08, "loss": 0.7173, "step": 7818 }, { "epoch": 0.97, "grad_norm": 7.45217187337908, "learning_rate": 1.980482125683303e-08, "loss": 0.6789, "step": 7819 }, { "epoch": 0.97, "grad_norm": 8.331417603775625, "learning_rate": 1.9626114680487142e-08, "loss": 0.8102, "step": 7820 }, { "epoch": 0.97, "grad_norm": 6.830111197139681, "learning_rate": 1.9448216446291134e-08, "loss": 0.7965, "step": 7821 }, { "epoch": 0.97, "grad_norm": 6.921862638945309, "learning_rate": 1.9271126583119138e-08, "loss": 0.979, "step": 7822 }, { "epoch": 0.97, "grad_norm": 9.119254740141725, "learning_rate": 1.9094845119712603e-08, "loss": 0.8563, "step": 7823 }, { "epoch": 0.97, "grad_norm": 7.7468057456440444, "learning_rate": 1.891937208468475e-08, "loss": 1.0978, "step": 7824 }, { "epoch": 0.97, "grad_norm": 7.876771220164228, "learning_rate": 1.874470750651447e-08, "loss": 0.6591, "step": 7825 }, { "epoch": 0.97, "grad_norm": 7.569582075873105, "learning_rate": 1.8570851413551307e-08, "loss": 1.0191, "step": 7826 }, { "epoch": 0.97, "grad_norm": 7.598223106822167, "learning_rate": 1.8397803834012685e-08, "loss": 0.819, "step": 7827 }, { "epoch": 0.97, "grad_norm": 6.392396540344027, "learning_rate": 1.822556479598614e-08, "loss": 0.8265, "step": 7828 }, { "epoch": 0.97, "grad_norm": 7.368176101401322, "learning_rate": 1.8054134327425976e-08, "loss": 1.1461, "step": 7829 }, { "epoch": 0.97, "grad_norm": 7.8106095313561985, "learning_rate": 1.788351245615716e-08, "loss": 1.1318, "step": 7830 }, { "epoch": 0.97, "grad_norm": 7.96124246401117, "learning_rate": 1.7713699209871983e-08, "loss": 0.812, "step": 7831 }, { "epoch": 0.97, "grad_norm": 8.062823564928923, "learning_rate": 1.754469461613173e-08, "loss": 1.2548, "step": 7832 }, { "epoch": 0.97, "grad_norm": 8.080628017079423, "learning_rate": 1.7376498702367796e-08, "loss": 0.8178, "step": 7833 }, { "epoch": 0.97, "grad_norm": 38.56434793746689, "learning_rate": 1.720911149587834e-08, "loss": 0.8577, "step": 7834 }, { "epoch": 0.97, "grad_norm": 7.327708708905715, "learning_rate": 1.7042533023831632e-08, "loss": 0.8678, "step": 7835 }, { "epoch": 0.97, "grad_norm": 8.574069982947263, "learning_rate": 1.6876763313264376e-08, "loss": 1.1949, "step": 7836 }, { "epoch": 0.97, "grad_norm": 8.00762264643311, "learning_rate": 1.6711802391081723e-08, "loss": 1.0141, "step": 7837 }, { "epoch": 0.97, "grad_norm": 8.386760703845852, "learning_rate": 1.654765028405836e-08, "loss": 1.005, "step": 7838 }, { "epoch": 0.97, "grad_norm": 6.565572154964139, "learning_rate": 1.6384307018835754e-08, "loss": 0.6811, "step": 7839 }, { "epoch": 0.98, "grad_norm": 7.678284914518995, "learning_rate": 1.6221772621927147e-08, "loss": 0.8755, "step": 7840 }, { "epoch": 0.98, "grad_norm": 6.41154783194352, "learning_rate": 1.6060047119710877e-08, "loss": 0.5978, "step": 7841 }, { "epoch": 0.98, "grad_norm": 7.599849255910175, "learning_rate": 1.5899130538437613e-08, "loss": 0.9989, "step": 7842 }, { "epoch": 0.98, "grad_norm": 7.797125314307297, "learning_rate": 1.573902290422369e-08, "loss": 0.8193, "step": 7843 }, { "epoch": 0.98, "grad_norm": 7.286925515478193, "learning_rate": 1.557972424305665e-08, "loss": 0.6709, "step": 7844 }, { "epoch": 0.98, "grad_norm": 7.444610812904821, "learning_rate": 1.5421234580790257e-08, "loss": 0.8572, "step": 7845 }, { "epoch": 0.98, "grad_norm": 8.39829589511171, "learning_rate": 1.5263553943149488e-08, "loss": 0.7734, "step": 7846 }, { "epoch": 0.98, "grad_norm": 7.480125484546686, "learning_rate": 1.5106682355725544e-08, "loss": 0.9394, "step": 7847 }, { "epoch": 0.98, "grad_norm": 7.655952231067908, "learning_rate": 1.495061984398083e-08, "loss": 1.148, "step": 7848 }, { "epoch": 0.98, "grad_norm": 6.773757557846866, "learning_rate": 1.4795366433243974e-08, "loss": 0.6407, "step": 7849 }, { "epoch": 0.98, "grad_norm": 5.489170425727021, "learning_rate": 1.4640922148714265e-08, "loss": 0.6806, "step": 7850 }, { "epoch": 0.98, "grad_norm": 7.160931319577928, "learning_rate": 1.4487287015458872e-08, "loss": 1.1394, "step": 7851 }, { "epoch": 0.98, "grad_norm": 7.058321797333538, "learning_rate": 1.4334461058412852e-08, "loss": 1.1198, "step": 7852 }, { "epoch": 0.98, "grad_norm": 6.694943420996506, "learning_rate": 1.4182444302381359e-08, "loss": 0.9979, "step": 7853 }, { "epoch": 0.98, "grad_norm": 7.503476409216847, "learning_rate": 1.4031236772037993e-08, "loss": 0.933, "step": 7854 }, { "epoch": 0.98, "grad_norm": 7.628382394639165, "learning_rate": 1.388083849192312e-08, "loss": 0.8196, "step": 7855 }, { "epoch": 0.98, "grad_norm": 9.581126245586939, "learning_rate": 1.3731249486448883e-08, "loss": 1.4193, "step": 7856 }, { "epoch": 0.98, "grad_norm": 8.19611144496669, "learning_rate": 1.3582469779893082e-08, "loss": 1.1281, "step": 7857 }, { "epoch": 0.98, "grad_norm": 7.49731750890965, "learning_rate": 1.3434499396404176e-08, "loss": 0.9452, "step": 7858 }, { "epoch": 0.98, "grad_norm": 8.144430101569924, "learning_rate": 1.328733835999796e-08, "loss": 0.8634, "step": 7859 }, { "epoch": 0.98, "grad_norm": 4.408277089681653, "learning_rate": 1.314098669456032e-08, "loss": 0.4005, "step": 7860 }, { "epoch": 0.98, "grad_norm": 9.47790989883341, "learning_rate": 1.2995444423844483e-08, "loss": 0.9051, "step": 7861 }, { "epoch": 0.98, "grad_norm": 7.920161105884525, "learning_rate": 1.2850711571472663e-08, "loss": 0.8855, "step": 7862 }, { "epoch": 0.98, "grad_norm": 8.33368080710497, "learning_rate": 1.2706788160936068e-08, "loss": 0.9475, "step": 7863 }, { "epoch": 0.98, "grad_norm": 5.997162331815678, "learning_rate": 1.2563674215594346e-08, "loss": 0.4558, "step": 7864 }, { "epoch": 0.98, "grad_norm": 7.722408848877355, "learning_rate": 1.2421369758675027e-08, "loss": 0.9337, "step": 7865 }, { "epoch": 0.98, "grad_norm": 6.628531960570929, "learning_rate": 1.2279874813275749e-08, "loss": 0.8687, "step": 7866 }, { "epoch": 0.98, "grad_norm": 9.86220360003119, "learning_rate": 1.2139189402361473e-08, "loss": 0.8369, "step": 7867 }, { "epoch": 0.98, "grad_norm": 7.671034728144625, "learning_rate": 1.19993135487656e-08, "loss": 1.1942, "step": 7868 }, { "epoch": 0.98, "grad_norm": 7.426505818126136, "learning_rate": 1.1860247275192194e-08, "loss": 0.9859, "step": 7869 }, { "epoch": 0.98, "grad_norm": 7.403115325110788, "learning_rate": 1.172199060421153e-08, "loss": 1.0507, "step": 7870 }, { "epoch": 0.98, "grad_norm": 7.931487294414725, "learning_rate": 1.1584543558263438e-08, "loss": 0.9167, "step": 7871 }, { "epoch": 0.98, "grad_norm": 7.0207078675391505, "learning_rate": 1.1447906159656741e-08, "loss": 0.6645, "step": 7872 }, { "epoch": 0.98, "grad_norm": 8.8201912471756, "learning_rate": 1.1312078430568141e-08, "loss": 0.7507, "step": 7873 }, { "epoch": 0.98, "grad_norm": 8.633348835799064, "learning_rate": 1.1177060393043337e-08, "loss": 0.7906, "step": 7874 }, { "epoch": 0.98, "grad_norm": 9.682123918776051, "learning_rate": 1.1042852068996467e-08, "loss": 1.019, "step": 7875 }, { "epoch": 0.98, "grad_norm": 9.878039418968768, "learning_rate": 1.0909453480210108e-08, "loss": 1.1031, "step": 7876 }, { "epoch": 0.98, "grad_norm": 9.482277763936555, "learning_rate": 1.0776864648335827e-08, "loss": 1.2268, "step": 7877 }, { "epoch": 0.98, "grad_norm": 6.448779759446906, "learning_rate": 1.0645085594893633e-08, "loss": 0.5802, "step": 7878 }, { "epoch": 0.98, "grad_norm": 6.731714468036248, "learning_rate": 1.0514116341271419e-08, "loss": 0.7357, "step": 7879 }, { "epoch": 0.98, "grad_norm": 7.773898359485032, "learning_rate": 1.0383956908726623e-08, "loss": 1.1696, "step": 7880 }, { "epoch": 0.98, "grad_norm": 8.03737985872843, "learning_rate": 1.0254607318385123e-08, "loss": 0.7345, "step": 7881 }, { "epoch": 0.98, "grad_norm": 7.474436113631884, "learning_rate": 1.0126067591240684e-08, "loss": 0.7014, "step": 7882 }, { "epoch": 0.98, "grad_norm": 11.09075794025759, "learning_rate": 9.998337748156061e-09, "loss": 1.1754, "step": 7883 }, { "epoch": 0.98, "grad_norm": 6.989083683457874, "learning_rate": 9.871417809862448e-09, "loss": 1.0098, "step": 7884 }, { "epoch": 0.98, "grad_norm": 7.593580929860139, "learning_rate": 9.745307796960035e-09, "loss": 0.8099, "step": 7885 }, { "epoch": 0.98, "grad_norm": 7.545368283999246, "learning_rate": 9.620007729916337e-09, "loss": 1.0151, "step": 7886 }, { "epoch": 0.98, "grad_norm": 7.212647339615244, "learning_rate": 9.495517629069528e-09, "loss": 0.8589, "step": 7887 }, { "epoch": 0.98, "grad_norm": 8.954707466915812, "learning_rate": 9.371837514624005e-09, "loss": 0.9961, "step": 7888 }, { "epoch": 0.98, "grad_norm": 9.189438120424121, "learning_rate": 9.248967406653709e-09, "loss": 0.9065, "step": 7889 }, { "epoch": 0.98, "grad_norm": 9.706615012545901, "learning_rate": 9.126907325101575e-09, "loss": 0.9082, "step": 7890 }, { "epoch": 0.98, "grad_norm": 7.347723504408924, "learning_rate": 9.005657289778425e-09, "loss": 0.9535, "step": 7891 }, { "epoch": 0.98, "grad_norm": 9.07211968843927, "learning_rate": 8.88521732036407e-09, "loss": 1.0784, "step": 7892 }, { "epoch": 0.98, "grad_norm": 7.539369020112782, "learning_rate": 8.765587436406765e-09, "loss": 0.8726, "step": 7893 }, { "epoch": 0.98, "grad_norm": 9.122169760839498, "learning_rate": 8.646767657322086e-09, "loss": 0.8587, "step": 7894 }, { "epoch": 0.98, "grad_norm": 8.46147603069403, "learning_rate": 8.528758002396276e-09, "loss": 1.1547, "step": 7895 }, { "epoch": 0.98, "grad_norm": 7.877501796462211, "learning_rate": 8.411558490782345e-09, "loss": 0.7166, "step": 7896 }, { "epoch": 0.98, "grad_norm": 7.115514803758634, "learning_rate": 8.295169141502302e-09, "loss": 0.7484, "step": 7897 }, { "epoch": 0.98, "grad_norm": 6.406951502201524, "learning_rate": 8.179589973447144e-09, "loss": 0.6404, "step": 7898 }, { "epoch": 0.98, "grad_norm": 8.510454467561328, "learning_rate": 8.06482100537631e-09, "loss": 1.1741, "step": 7899 }, { "epoch": 0.98, "grad_norm": 8.646261199479628, "learning_rate": 7.95086225591657e-09, "loss": 1.1395, "step": 7900 }, { "epoch": 0.98, "grad_norm": 7.130364137221212, "learning_rate": 7.83771374356479e-09, "loss": 0.9292, "step": 7901 }, { "epoch": 0.98, "grad_norm": 7.001493230741504, "learning_rate": 7.725375486685172e-09, "loss": 0.912, "step": 7902 }, { "epoch": 0.98, "grad_norm": 7.791014168713506, "learning_rate": 7.613847503510907e-09, "loss": 0.6583, "step": 7903 }, { "epoch": 0.98, "grad_norm": 7.861725511261248, "learning_rate": 7.503129812143628e-09, "loss": 0.7789, "step": 7904 }, { "epoch": 0.98, "grad_norm": 7.478351665238959, "learning_rate": 7.393222430553404e-09, "loss": 0.9301, "step": 7905 }, { "epoch": 0.98, "grad_norm": 8.249012230984702, "learning_rate": 7.284125376579299e-09, "loss": 0.9147, "step": 7906 }, { "epoch": 0.98, "grad_norm": 7.729522138675472, "learning_rate": 7.175838667927149e-09, "loss": 1.0838, "step": 7907 }, { "epoch": 0.98, "grad_norm": 7.77091814927739, "learning_rate": 7.068362322174005e-09, "loss": 0.8401, "step": 7908 }, { "epoch": 0.98, "grad_norm": 6.018285456288051, "learning_rate": 6.961696356762582e-09, "loss": 0.8272, "step": 7909 }, { "epoch": 0.98, "grad_norm": 9.04844211325169, "learning_rate": 6.855840789006252e-09, "loss": 1.0208, "step": 7910 }, { "epoch": 0.98, "grad_norm": 8.177805005891285, "learning_rate": 6.750795636085716e-09, "loss": 0.7576, "step": 7911 }, { "epoch": 0.98, "grad_norm": 8.513369796573096, "learning_rate": 6.646560915050115e-09, "loss": 1.1896, "step": 7912 }, { "epoch": 0.98, "grad_norm": 8.04440684318755, "learning_rate": 6.5431366428175824e-09, "loss": 0.9633, "step": 7913 }, { "epoch": 0.98, "grad_norm": 6.913953502553834, "learning_rate": 6.440522836174135e-09, "loss": 0.6712, "step": 7914 }, { "epoch": 0.98, "grad_norm": 8.507779260998145, "learning_rate": 6.3387195117753375e-09, "loss": 0.8543, "step": 7915 }, { "epoch": 0.98, "grad_norm": 8.952008820288984, "learning_rate": 6.237726686142975e-09, "loss": 0.8984, "step": 7916 }, { "epoch": 0.98, "grad_norm": 6.750527338424795, "learning_rate": 6.137544375670601e-09, "loss": 0.5618, "step": 7917 }, { "epoch": 0.98, "grad_norm": 8.317290817970287, "learning_rate": 6.038172596616876e-09, "loss": 1.1764, "step": 7918 }, { "epoch": 0.98, "grad_norm": 8.969815130313371, "learning_rate": 5.939611365110565e-09, "loss": 0.9578, "step": 7919 }, { "epoch": 0.99, "grad_norm": 6.792949591064012, "learning_rate": 5.841860697149981e-09, "loss": 0.8297, "step": 7920 }, { "epoch": 0.99, "grad_norm": 9.90508051356545, "learning_rate": 5.744920608598547e-09, "loss": 1.2847, "step": 7921 }, { "epoch": 0.99, "grad_norm": 8.862152570777372, "learning_rate": 5.648791115192009e-09, "loss": 0.8236, "step": 7922 }, { "epoch": 0.99, "grad_norm": 7.62477332995533, "learning_rate": 5.553472232531776e-09, "loss": 0.9605, "step": 7923 }, { "epoch": 0.99, "grad_norm": 7.942864442131368, "learning_rate": 5.4589639760888046e-09, "loss": 1.1925, "step": 7924 }, { "epoch": 0.99, "grad_norm": 9.583320026555944, "learning_rate": 5.365266361202492e-09, "loss": 1.0064, "step": 7925 }, { "epoch": 0.99, "grad_norm": 7.044570457931815, "learning_rate": 5.272379403080119e-09, "loss": 0.6509, "step": 7926 }, { "epoch": 0.99, "grad_norm": 8.776811234309301, "learning_rate": 5.180303116798513e-09, "loss": 1.1611, "step": 7927 }, { "epoch": 0.99, "grad_norm": 8.458632378495489, "learning_rate": 5.089037517300721e-09, "loss": 0.9889, "step": 7928 }, { "epoch": 0.99, "grad_norm": 8.69252678035171, "learning_rate": 4.998582619401004e-09, "loss": 0.9074, "step": 7929 }, { "epoch": 0.99, "grad_norm": 6.789370120453084, "learning_rate": 4.908938437780397e-09, "loss": 0.5449, "step": 7930 }, { "epoch": 0.99, "grad_norm": 12.004783714683324, "learning_rate": 4.820104986988372e-09, "loss": 0.8143, "step": 7931 }, { "epoch": 0.99, "grad_norm": 8.160081261416916, "learning_rate": 4.73208228144284e-09, "loss": 1.0253, "step": 7932 }, { "epoch": 0.99, "grad_norm": 8.233885330065522, "learning_rate": 4.644870335431262e-09, "loss": 1.0486, "step": 7933 }, { "epoch": 0.99, "grad_norm": 6.840442432133487, "learning_rate": 4.558469163107315e-09, "loss": 1.0578, "step": 7934 }, { "epoch": 0.99, "grad_norm": 8.146984926987699, "learning_rate": 4.472878778495892e-09, "loss": 0.7832, "step": 7935 }, { "epoch": 0.99, "grad_norm": 7.051928749048424, "learning_rate": 4.388099195488105e-09, "loss": 0.9981, "step": 7936 }, { "epoch": 0.99, "grad_norm": 6.692943140090004, "learning_rate": 4.3041304278435e-09, "loss": 0.961, "step": 7937 }, { "epoch": 0.99, "grad_norm": 7.6638446919122245, "learning_rate": 4.220972489191733e-09, "loss": 0.8317, "step": 7938 }, { "epoch": 0.99, "grad_norm": 7.107825135492562, "learning_rate": 4.138625393028673e-09, "loss": 0.7121, "step": 7939 }, { "epoch": 0.99, "grad_norm": 7.479891304238664, "learning_rate": 4.05708915272085e-09, "loss": 0.9263, "step": 7940 }, { "epoch": 0.99, "grad_norm": 6.43935571612004, "learning_rate": 3.9763637815015686e-09, "loss": 0.692, "step": 7941 }, { "epoch": 0.99, "grad_norm": 7.331559320551143, "learning_rate": 3.896449292473125e-09, "loss": 1.1027, "step": 7942 }, { "epoch": 0.99, "grad_norm": 8.29719776162686, "learning_rate": 3.817345698605701e-09, "loss": 0.9722, "step": 7943 }, { "epoch": 0.99, "grad_norm": 7.212108061763276, "learning_rate": 3.73905301273847e-09, "loss": 0.9076, "step": 7944 }, { "epoch": 0.99, "grad_norm": 7.613334064610977, "learning_rate": 3.661571247579043e-09, "loss": 0.8719, "step": 7945 }, { "epoch": 0.99, "grad_norm": 7.4715628734369846, "learning_rate": 3.584900415702364e-09, "loss": 0.7022, "step": 7946 }, { "epoch": 0.99, "grad_norm": 8.892021833956004, "learning_rate": 3.5090405295534757e-09, "loss": 1.1256, "step": 7947 }, { "epoch": 0.99, "grad_norm": 7.895691477757845, "learning_rate": 3.4339916014441975e-09, "loss": 1.0822, "step": 7948 }, { "epoch": 0.99, "grad_norm": 6.671124914222953, "learning_rate": 3.359753643555341e-09, "loss": 0.94, "step": 7949 }, { "epoch": 0.99, "grad_norm": 8.20008411869384, "learning_rate": 3.286326667936712e-09, "loss": 0.7835, "step": 7950 }, { "epoch": 0.99, "grad_norm": 6.5275340319616575, "learning_rate": 3.213710686506e-09, "loss": 0.7549, "step": 7951 }, { "epoch": 0.99, "grad_norm": 7.83734064646481, "learning_rate": 3.141905711048221e-09, "loss": 1.1859, "step": 7952 }, { "epoch": 0.99, "grad_norm": 5.786791391383109, "learning_rate": 3.070911753218497e-09, "loss": 0.5997, "step": 7953 }, { "epoch": 0.99, "grad_norm": 6.852451817370056, "learning_rate": 3.0007288245392785e-09, "loss": 0.9623, "step": 7954 }, { "epoch": 0.99, "grad_norm": 7.040692144939703, "learning_rate": 2.9313569364020076e-09, "loss": 0.5946, "step": 7955 }, { "epoch": 0.99, "grad_norm": 7.108213772168279, "learning_rate": 2.862796100065457e-09, "loss": 1.046, "step": 7956 }, { "epoch": 0.99, "grad_norm": 8.110111348597114, "learning_rate": 2.7950463266579465e-09, "loss": 0.9421, "step": 7957 }, { "epoch": 0.99, "grad_norm": 6.834492437129687, "learning_rate": 2.728107627175125e-09, "loss": 0.8051, "step": 7958 }, { "epoch": 0.99, "grad_norm": 7.134864201429312, "learning_rate": 2.6619800124821905e-09, "loss": 0.6372, "step": 7959 }, { "epoch": 0.99, "grad_norm": 15.497828088714853, "learning_rate": 2.5966634933116685e-09, "loss": 1.0175, "step": 7960 }, { "epoch": 0.99, "grad_norm": 10.750783846847142, "learning_rate": 2.532158080264524e-09, "loss": 0.9981, "step": 7961 }, { "epoch": 0.99, "grad_norm": 9.399464654541989, "learning_rate": 2.468463783810715e-09, "loss": 0.8243, "step": 7962 }, { "epoch": 0.99, "grad_norm": 7.587266930527627, "learning_rate": 2.4055806142880835e-09, "loss": 0.8389, "step": 7963 }, { "epoch": 0.99, "grad_norm": 7.965255543527575, "learning_rate": 2.3435085819034644e-09, "loss": 0.7608, "step": 7964 }, { "epoch": 0.99, "grad_norm": 7.468040092913483, "learning_rate": 2.2822476967304662e-09, "loss": 0.7562, "step": 7965 }, { "epoch": 0.99, "grad_norm": 6.591622460981013, "learning_rate": 2.221797968712247e-09, "loss": 0.7958, "step": 7966 }, { "epoch": 0.99, "grad_norm": 6.026766573675073, "learning_rate": 2.1621594076609574e-09, "loss": 0.7824, "step": 7967 }, { "epoch": 0.99, "grad_norm": 6.569612605012618, "learning_rate": 2.1033320232555222e-09, "loss": 0.6453, "step": 7968 }, { "epoch": 0.99, "grad_norm": 8.263396053303715, "learning_rate": 2.045315825044414e-09, "loss": 1.0309, "step": 7969 }, { "epoch": 0.99, "grad_norm": 7.943031035181001, "learning_rate": 1.988110822443434e-09, "loss": 0.9608, "step": 7970 }, { "epoch": 0.99, "grad_norm": 7.92801928536323, "learning_rate": 1.9317170247373783e-09, "loss": 0.8246, "step": 7971 }, { "epoch": 0.99, "grad_norm": 7.566681156333081, "learning_rate": 1.8761344410800354e-09, "loss": 0.9454, "step": 7972 }, { "epoch": 0.99, "grad_norm": 7.493292339508073, "learning_rate": 1.8213630804919668e-09, "loss": 0.9687, "step": 7973 }, { "epoch": 0.99, "grad_norm": 7.451097467095879, "learning_rate": 1.7674029518627289e-09, "loss": 1.0055, "step": 7974 }, { "epoch": 0.99, "grad_norm": 8.246317460590191, "learning_rate": 1.7142540639514261e-09, "loss": 1.2277, "step": 7975 }, { "epoch": 0.99, "grad_norm": 7.712085819866291, "learning_rate": 1.6619164253828257e-09, "loss": 0.7494, "step": 7976 }, { "epoch": 0.99, "grad_norm": 6.548928833339295, "learning_rate": 1.6103900446534648e-09, "loss": 0.7253, "step": 7977 }, { "epoch": 0.99, "grad_norm": 7.657153446126681, "learning_rate": 1.5596749301244329e-09, "loss": 0.778, "step": 7978 }, { "epoch": 0.99, "grad_norm": 7.461523832116296, "learning_rate": 1.509771090028589e-09, "loss": 0.73, "step": 7979 }, { "epoch": 0.99, "grad_norm": 7.323765786612594, "learning_rate": 1.4606785324650096e-09, "loss": 0.6646, "step": 7980 }, { "epoch": 0.99, "grad_norm": 6.766120604820479, "learning_rate": 1.4123972654017659e-09, "loss": 0.513, "step": 7981 }, { "epoch": 0.99, "grad_norm": 7.39418307846664, "learning_rate": 1.3649272966748116e-09, "loss": 0.9402, "step": 7982 }, { "epoch": 0.99, "grad_norm": 6.546603479501607, "learning_rate": 1.318268633989095e-09, "loss": 0.7737, "step": 7983 }, { "epoch": 0.99, "grad_norm": 8.448864383656414, "learning_rate": 1.2724212849180019e-09, "loss": 0.8169, "step": 7984 }, { "epoch": 0.99, "grad_norm": 6.515207052665901, "learning_rate": 1.227385256901692e-09, "loss": 0.7858, "step": 7985 }, { "epoch": 0.99, "grad_norm": 7.766047356797592, "learning_rate": 1.183160557250984e-09, "loss": 0.9161, "step": 7986 }, { "epoch": 0.99, "grad_norm": 9.23530773391374, "learning_rate": 1.13974719314347e-09, "loss": 1.3378, "step": 7987 }, { "epoch": 0.99, "grad_norm": 7.378112052360138, "learning_rate": 1.097145171624625e-09, "loss": 1.0522, "step": 7988 }, { "epoch": 0.99, "grad_norm": 7.796270577165907, "learning_rate": 1.0553544996100285e-09, "loss": 0.7785, "step": 7989 }, { "epoch": 0.99, "grad_norm": 7.622132502808943, "learning_rate": 1.0143751838814774e-09, "loss": 0.925, "step": 7990 }, { "epoch": 0.99, "grad_norm": 7.9387796112946845, "learning_rate": 9.742072310908735e-10, "loss": 0.6988, "step": 7991 }, { "epoch": 0.99, "grad_norm": 6.621135051040071, "learning_rate": 9.348506477580011e-10, "loss": 0.8318, "step": 7992 }, { "epoch": 0.99, "grad_norm": 8.359157408055125, "learning_rate": 8.963054402699734e-10, "loss": 1.0054, "step": 7993 }, { "epoch": 0.99, "grad_norm": 8.091209784436439, "learning_rate": 8.585716148828971e-10, "loss": 0.9762, "step": 7994 }, { "epoch": 0.99, "grad_norm": 9.768836000348779, "learning_rate": 8.216491777213176e-10, "loss": 0.8564, "step": 7995 }, { "epoch": 0.99, "grad_norm": 7.42482652138197, "learning_rate": 7.855381347782187e-10, "loss": 0.9484, "step": 7996 }, { "epoch": 0.99, "grad_norm": 7.886853274440447, "learning_rate": 7.502384919144678e-10, "loss": 0.9689, "step": 7997 }, { "epoch": 0.99, "grad_norm": 8.056371632528457, "learning_rate": 7.157502548588158e-10, "loss": 0.8778, "step": 7998 }, { "epoch": 0.99, "grad_norm": 8.274029747113348, "learning_rate": 6.820734292101173e-10, "loss": 0.9692, "step": 7999 }, { "epoch": 1.0, "grad_norm": 8.991141828486768, "learning_rate": 6.49208020433445e-10, "loss": 1.1164, "step": 8000 }, { "epoch": 1.0, "grad_norm": 9.417357181679622, "learning_rate": 6.171540338628657e-10, "loss": 0.8732, "step": 8001 }, { "epoch": 1.0, "grad_norm": 8.869388209156906, "learning_rate": 5.859114747014393e-10, "loss": 0.9343, "step": 8002 }, { "epoch": 1.0, "grad_norm": 7.570137457628905, "learning_rate": 5.554803480195547e-10, "loss": 1.0646, "step": 8003 }, { "epoch": 1.0, "grad_norm": 10.1128886252961, "learning_rate": 5.258606587571491e-10, "loss": 1.3266, "step": 8004 }, { "epoch": 1.0, "grad_norm": 8.62421521696499, "learning_rate": 4.97052411720933e-10, "loss": 1.0691, "step": 8005 }, { "epoch": 1.0, "grad_norm": 6.232100554518818, "learning_rate": 4.690556115866107e-10, "loss": 0.7169, "step": 8006 }, { "epoch": 1.0, "grad_norm": 8.78300045357847, "learning_rate": 4.418702628988802e-10, "loss": 0.7987, "step": 8007 }, { "epoch": 1.0, "grad_norm": 7.5821052775037385, "learning_rate": 4.154963700692127e-10, "loss": 0.7267, "step": 8008 }, { "epoch": 1.0, "grad_norm": 6.4480638924275215, "learning_rate": 3.899339373786282e-10, "loss": 0.6646, "step": 8009 }, { "epoch": 1.0, "grad_norm": 8.116198642898599, "learning_rate": 3.6518296897658513e-10, "loss": 1.0037, "step": 8010 }, { "epoch": 1.0, "grad_norm": 6.928440849916577, "learning_rate": 3.412434688793154e-10, "loss": 0.7459, "step": 8011 }, { "epoch": 1.0, "grad_norm": 7.4650593830840855, "learning_rate": 3.181154409725995e-10, "loss": 1.0445, "step": 8012 }, { "epoch": 1.0, "grad_norm": 8.035361276012576, "learning_rate": 2.957988890112118e-10, "loss": 1.296, "step": 8013 }, { "epoch": 1.0, "grad_norm": 8.009131472754003, "learning_rate": 2.7429381661614463e-10, "loss": 0.8206, "step": 8014 }, { "epoch": 1.0, "grad_norm": 8.142398919372303, "learning_rate": 2.536002272779392e-10, "loss": 0.8849, "step": 8015 }, { "epoch": 1.0, "grad_norm": 7.228878345351815, "learning_rate": 2.337181243555753e-10, "loss": 0.8034, "step": 8016 }, { "epoch": 1.0, "grad_norm": 7.425503158480752, "learning_rate": 2.1464751107591607e-10, "loss": 1.1974, "step": 8017 }, { "epoch": 1.0, "grad_norm": 7.379459866771303, "learning_rate": 1.963883905348185e-10, "loss": 0.7411, "step": 8018 }, { "epoch": 1.0, "grad_norm": 9.050861526752746, "learning_rate": 1.7894076569435759e-10, "loss": 1.0883, "step": 8019 }, { "epoch": 1.0, "grad_norm": 7.351651663979731, "learning_rate": 1.6230463938782249e-10, "loss": 0.9714, "step": 8020 }, { "epoch": 1.0, "grad_norm": 7.542351276911601, "learning_rate": 1.4648001431472047e-10, "loss": 0.7226, "step": 8021 }, { "epoch": 1.0, "grad_norm": 7.470926194860488, "learning_rate": 1.3146689304355253e-10, "loss": 0.9741, "step": 8022 }, { "epoch": 1.0, "grad_norm": 8.042324617732408, "learning_rate": 1.172652780112582e-10, "loss": 0.7866, "step": 8023 }, { "epoch": 1.0, "grad_norm": 5.9094003212291994, "learning_rate": 1.0387517152266047e-10, "loss": 0.5722, "step": 8024 }, { "epoch": 1.0, "grad_norm": 7.393579277610118, "learning_rate": 9.129657575046579e-11, "loss": 0.8448, "step": 8025 }, { "epoch": 1.0, "grad_norm": 8.609871125936719, "learning_rate": 7.952949273748456e-11, "loss": 1.3252, "step": 8026 }, { "epoch": 1.0, "grad_norm": 6.970565773879236, "learning_rate": 6.857392439219013e-11, "loss": 0.7082, "step": 8027 }, { "epoch": 1.0, "grad_norm": 7.982369606165441, "learning_rate": 5.84298724937149e-11, "loss": 0.9025, "step": 8028 }, { "epoch": 1.0, "grad_norm": 7.163926149936263, "learning_rate": 4.909733868851963e-11, "loss": 0.5445, "step": 8029 }, { "epoch": 1.0, "grad_norm": 7.885410797094182, "learning_rate": 4.0576324490948503e-11, "loss": 0.9212, "step": 8030 }, { "epoch": 1.0, "grad_norm": 7.441281659481612, "learning_rate": 3.2866831283784315e-11, "loss": 0.8178, "step": 8031 }, { "epoch": 1.0, "grad_norm": 7.24504045360237, "learning_rate": 2.5968860318803524e-11, "loss": 0.8285, "step": 8032 }, { "epoch": 1.0, "grad_norm": 6.8948079403878, "learning_rate": 1.9882412715110932e-11, "loss": 0.9815, "step": 8033 }, { "epoch": 1.0, "grad_norm": 7.541926874471061, "learning_rate": 1.4607489461360148e-11, "loss": 0.9142, "step": 8034 }, { "epoch": 1.0, "grad_norm": 6.40506033597055, "learning_rate": 1.0144091412978008e-11, "loss": 0.8493, "step": 8035 }, { "epoch": 1.0, "grad_norm": 7.1768709021450094, "learning_rate": 6.492219294385038e-12, "loss": 1.1351, "step": 8036 }, { "epoch": 1.0, "grad_norm": 7.705150214554825, "learning_rate": 3.651873698995445e-12, "loss": 0.8686, "step": 8037 }, { "epoch": 1.0, "grad_norm": 6.438295129920793, "learning_rate": 1.623055086996672e-12, "loss": 0.8538, "step": 8038 }, { "epoch": 1.0, "grad_norm": 7.866432528186994, "learning_rate": 4.0576378812495767e-13, "loss": 0.8329, "step": 8039 }, { "epoch": 1.0, "grad_norm": 6.685790454967885, "learning_rate": 0.0, "loss": 0.8562, "step": 8040 }, { "epoch": 1.0, "step": 8040, "total_flos": 82042607924224.0, "train_loss": 1.2590281576251805, "train_runtime": 31812.5773, "train_samples_per_second": 4.044, "train_steps_per_second": 0.253 } ], "logging_steps": 1.0, "max_steps": 8040, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 82042607924224.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }