{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998902425639337, "global_step": 4555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4598540145985402e-07, "loss": 3.527, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.9197080291970804e-07, "loss": 3.1821, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.379562043795621e-07, "loss": 2.9231, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.839416058394161e-07, "loss": 3.4403, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.299270072992701e-07, "loss": 3.326, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.759124087591242e-07, "loss": 3.553, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.0218978102189781e-06, "loss": 3.2228, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.1678832116788322e-06, "loss": 3.2485, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.3138686131386864e-06, "loss": 3.0549, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.4598540145985402e-06, "loss": 2.7973, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.6058394160583942e-06, "loss": 2.8106, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.7518248175182485e-06, "loss": 2.5989, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.8978102189781023e-06, "loss": 2.3181, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.0437956204379563e-06, "loss": 2.3596, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.1897810218978103e-06, "loss": 2.3599, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.3357664233576643e-06, "loss": 2.1762, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.4817518248175183e-06, "loss": 2.0018, "step": 17 }, { "epoch": 0.0, "learning_rate": 2.627737226277373e-06, "loss": 1.89, "step": 18 }, { "epoch": 0.0, "learning_rate": 2.7737226277372264e-06, "loss": 1.7508, "step": 19 }, { "epoch": 0.0, "learning_rate": 2.9197080291970804e-06, "loss": 1.7807, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.065693430656935e-06, "loss": 1.8425, "step": 21 }, { "epoch": 0.0, "learning_rate": 3.2116788321167884e-06, "loss": 1.8062, "step": 22 }, { "epoch": 0.01, "learning_rate": 3.3576642335766425e-06, "loss": 0.2849, "step": 23 }, { "epoch": 0.01, "learning_rate": 3.503649635036497e-06, "loss": 1.7105, "step": 24 }, { "epoch": 0.01, "learning_rate": 3.6496350364963505e-06, "loss": 1.6817, "step": 25 }, { "epoch": 0.01, "learning_rate": 3.7956204379562045e-06, "loss": 1.623, "step": 26 }, { "epoch": 0.01, "learning_rate": 3.9416058394160585e-06, "loss": 1.5836, "step": 27 }, { "epoch": 0.01, "learning_rate": 4.0875912408759126e-06, "loss": 1.6326, "step": 28 }, { "epoch": 0.01, "learning_rate": 4.233576642335767e-06, "loss": 1.6064, "step": 29 }, { "epoch": 0.01, "learning_rate": 4.379562043795621e-06, "loss": 1.512, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.525547445255475e-06, "loss": 1.6379, "step": 31 }, { "epoch": 0.01, "learning_rate": 4.671532846715329e-06, "loss": 1.5565, "step": 32 }, { "epoch": 0.01, "learning_rate": 4.8175182481751835e-06, "loss": 1.4751, "step": 33 }, { "epoch": 0.01, "learning_rate": 4.963503649635037e-06, "loss": 1.4703, "step": 34 }, { "epoch": 0.01, "learning_rate": 5.1094890510948916e-06, "loss": 1.4388, "step": 35 }, { "epoch": 0.01, "learning_rate": 5.255474452554746e-06, "loss": 1.3996, "step": 36 }, { "epoch": 0.01, "learning_rate": 5.401459854014599e-06, "loss": 1.4086, "step": 37 }, { "epoch": 0.01, "learning_rate": 5.547445255474453e-06, "loss": 1.4157, "step": 38 }, { "epoch": 0.01, "learning_rate": 5.693430656934307e-06, "loss": 1.4059, "step": 39 }, { "epoch": 0.01, "learning_rate": 5.839416058394161e-06, "loss": 1.3917, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.985401459854016e-06, "loss": 1.4446, "step": 41 }, { "epoch": 0.01, "learning_rate": 6.13138686131387e-06, "loss": 1.4208, "step": 42 }, { "epoch": 0.01, "learning_rate": 6.277372262773723e-06, "loss": 1.4023, "step": 43 }, { "epoch": 0.01, "learning_rate": 6.423357664233577e-06, "loss": 1.3612, "step": 44 }, { "epoch": 0.01, "learning_rate": 6.569343065693431e-06, "loss": 1.3908, "step": 45 }, { "epoch": 0.01, "learning_rate": 6.715328467153285e-06, "loss": 1.3779, "step": 46 }, { "epoch": 0.01, "learning_rate": 6.86131386861314e-06, "loss": 1.2828, "step": 47 }, { "epoch": 0.01, "learning_rate": 7.007299270072994e-06, "loss": 1.343, "step": 48 }, { "epoch": 0.01, "learning_rate": 7.153284671532848e-06, "loss": 1.3918, "step": 49 }, { "epoch": 0.01, "learning_rate": 7.299270072992701e-06, "loss": 1.3523, "step": 50 }, { "epoch": 0.01, "learning_rate": 7.445255474452555e-06, "loss": 1.2573, "step": 51 }, { "epoch": 0.01, "learning_rate": 7.591240875912409e-06, "loss": 1.3133, "step": 52 }, { "epoch": 0.01, "learning_rate": 7.737226277372264e-06, "loss": 1.2508, "step": 53 }, { "epoch": 0.01, "learning_rate": 7.883211678832117e-06, "loss": 1.2361, "step": 54 }, { "epoch": 0.01, "learning_rate": 8.029197080291972e-06, "loss": 1.2235, "step": 55 }, { "epoch": 0.01, "learning_rate": 8.175182481751825e-06, "loss": 0.2657, "step": 56 }, { "epoch": 0.01, "learning_rate": 8.32116788321168e-06, "loss": 1.2605, "step": 57 }, { "epoch": 0.01, "learning_rate": 8.467153284671533e-06, "loss": 1.2317, "step": 58 }, { "epoch": 0.01, "learning_rate": 8.613138686131386e-06, "loss": 1.3131, "step": 59 }, { "epoch": 0.01, "learning_rate": 8.759124087591241e-06, "loss": 1.3003, "step": 60 }, { "epoch": 0.01, "learning_rate": 8.905109489051096e-06, "loss": 1.2979, "step": 61 }, { "epoch": 0.01, "learning_rate": 9.05109489051095e-06, "loss": 1.3094, "step": 62 }, { "epoch": 0.01, "learning_rate": 9.197080291970804e-06, "loss": 1.2967, "step": 63 }, { "epoch": 0.01, "learning_rate": 9.343065693430657e-06, "loss": 1.266, "step": 64 }, { "epoch": 0.01, "learning_rate": 9.48905109489051e-06, "loss": 1.3344, "step": 65 }, { "epoch": 0.01, "learning_rate": 9.635036496350367e-06, "loss": 1.2549, "step": 66 }, { "epoch": 0.01, "learning_rate": 9.78102189781022e-06, "loss": 1.2457, "step": 67 }, { "epoch": 0.01, "learning_rate": 9.927007299270073e-06, "loss": 1.2768, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.0072992700729928e-05, "loss": 0.276, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.0218978102189783e-05, "loss": 1.2222, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.0364963503649636e-05, "loss": 1.1944, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.0510948905109491e-05, "loss": 1.2766, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.0656934306569344e-05, "loss": 1.1917, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.0802919708029198e-05, "loss": 1.1633, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.0948905109489052e-05, "loss": 1.2255, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.1094890510948906e-05, "loss": 1.2138, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.124087591240876e-05, "loss": 1.1892, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.1386861313868614e-05, "loss": 1.2782, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.1532846715328467e-05, "loss": 1.1869, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.1678832116788322e-05, "loss": 1.2229, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.1824817518248176e-05, "loss": 1.1838, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.1970802919708031e-05, "loss": 1.1869, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.2116788321167885e-05, "loss": 1.1643, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.226277372262774e-05, "loss": 1.2443, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.2408759124087593e-05, "loss": 1.1478, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.2554744525547446e-05, "loss": 1.2128, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.27007299270073e-05, "loss": 1.1949, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.2846715328467154e-05, "loss": 1.2589, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.2992700729927009e-05, "loss": 1.2663, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.3138686131386862e-05, "loss": 1.2252, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.3284671532846715e-05, "loss": 1.247, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.343065693430657e-05, "loss": 1.1861, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.3576642335766423e-05, "loss": 1.2861, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.372262773722628e-05, "loss": 1.1827, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.3868613138686133e-05, "loss": 1.1442, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.4014598540145988e-05, "loss": 1.2476, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.416058394160584e-05, "loss": 1.1608, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.4306569343065696e-05, "loss": 1.237, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.4452554744525549e-05, "loss": 1.2111, "step": 99 }, { "epoch": 0.02, "learning_rate": 1.4598540145985402e-05, "loss": 1.176, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.4744525547445257e-05, "loss": 1.1465, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.489051094890511e-05, "loss": 1.164, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.5036496350364965e-05, "loss": 1.1402, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.5182481751824818e-05, "loss": 1.2192, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.5328467153284673e-05, "loss": 1.2461, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.5474452554744528e-05, "loss": 1.1456, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.5620437956204383e-05, "loss": 1.2491, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.5766423357664234e-05, "loss": 1.145, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.591240875912409e-05, "loss": 1.2144, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.6058394160583944e-05, "loss": 1.152, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.62043795620438e-05, "loss": 1.1618, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.635036496350365e-05, "loss": 1.1279, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.6496350364963505e-05, "loss": 1.1635, "step": 113 }, { "epoch": 0.03, "learning_rate": 1.664233576642336e-05, "loss": 1.1635, "step": 114 }, { "epoch": 0.03, "learning_rate": 1.678832116788321e-05, "loss": 1.1651, "step": 115 }, { "epoch": 0.03, "learning_rate": 1.6934306569343066e-05, "loss": 1.1533, "step": 116 }, { "epoch": 0.03, "learning_rate": 1.708029197080292e-05, "loss": 1.1982, "step": 117 }, { "epoch": 0.03, "learning_rate": 1.7226277372262773e-05, "loss": 1.15, "step": 118 }, { "epoch": 0.03, "learning_rate": 1.737226277372263e-05, "loss": 1.1811, "step": 119 }, { "epoch": 0.03, "learning_rate": 1.7518248175182482e-05, "loss": 1.1165, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.7664233576642337e-05, "loss": 1.1333, "step": 121 }, { "epoch": 0.03, "learning_rate": 1.7810218978102192e-05, "loss": 1.1959, "step": 122 }, { "epoch": 0.03, "learning_rate": 1.7956204379562047e-05, "loss": 1.1223, "step": 123 }, { "epoch": 0.03, "learning_rate": 1.81021897810219e-05, "loss": 1.1939, "step": 124 }, { "epoch": 0.03, "learning_rate": 1.8248175182481753e-05, "loss": 1.1051, "step": 125 }, { "epoch": 0.03, "learning_rate": 1.8394160583941608e-05, "loss": 1.1949, "step": 126 }, { "epoch": 0.03, "learning_rate": 1.854014598540146e-05, "loss": 1.1537, "step": 127 }, { "epoch": 0.03, "learning_rate": 1.8686131386861315e-05, "loss": 1.1982, "step": 128 }, { "epoch": 0.03, "learning_rate": 1.883211678832117e-05, "loss": 1.1166, "step": 129 }, { "epoch": 0.03, "learning_rate": 1.897810218978102e-05, "loss": 1.1302, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.912408759124088e-05, "loss": 1.1159, "step": 131 }, { "epoch": 0.03, "learning_rate": 1.9270072992700734e-05, "loss": 1.1468, "step": 132 }, { "epoch": 0.03, "learning_rate": 1.9416058394160586e-05, "loss": 1.175, "step": 133 }, { "epoch": 0.03, "learning_rate": 1.956204379562044e-05, "loss": 1.1648, "step": 134 }, { "epoch": 0.03, "learning_rate": 1.9708029197080295e-05, "loss": 1.1064, "step": 135 }, { "epoch": 0.03, "learning_rate": 1.9854014598540147e-05, "loss": 1.1597, "step": 136 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 1.0936, "step": 137 }, { "epoch": 0.03, "learning_rate": 1.9999997471759943e-05, "loss": 1.1613, "step": 138 }, { "epoch": 0.03, "learning_rate": 1.9999989887041042e-05, "loss": 1.1657, "step": 139 }, { "epoch": 0.03, "learning_rate": 1.9999977245847137e-05, "loss": 1.1125, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.999995954818462e-05, "loss": 1.1693, "step": 141 }, { "epoch": 0.03, "learning_rate": 1.9999936794062434e-05, "loss": 1.1671, "step": 142 }, { "epoch": 0.03, "learning_rate": 1.9999908983492093e-05, "loss": 1.168, "step": 143 }, { "epoch": 0.03, "learning_rate": 1.9999876116487656e-05, "loss": 1.1481, "step": 144 }, { "epoch": 0.03, "learning_rate": 1.999983819306574e-05, "loss": 1.1611, "step": 145 }, { "epoch": 0.03, "learning_rate": 1.9999795213245522e-05, "loss": 1.1567, "step": 146 }, { "epoch": 0.03, "learning_rate": 1.9999747177048734e-05, "loss": 1.1586, "step": 147 }, { "epoch": 0.03, "learning_rate": 1.999969408449967e-05, "loss": 1.1482, "step": 148 }, { "epoch": 0.03, "learning_rate": 1.999963593562517e-05, "loss": 1.1826, "step": 149 }, { "epoch": 0.03, "learning_rate": 1.9999572730454638e-05, "loss": 1.1319, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.999950446902004e-05, "loss": 1.1501, "step": 151 }, { "epoch": 0.03, "learning_rate": 1.9999431151355882e-05, "loss": 1.1395, "step": 152 }, { "epoch": 0.03, "learning_rate": 1.999935277749925e-05, "loss": 1.1254, "step": 153 }, { "epoch": 0.03, "learning_rate": 1.999926934748976e-05, "loss": 1.1432, "step": 154 }, { "epoch": 0.03, "learning_rate": 1.9999180861369605e-05, "loss": 1.1911, "step": 155 }, { "epoch": 0.03, "learning_rate": 1.9999087319183527e-05, "loss": 1.202, "step": 156 }, { "epoch": 0.03, "learning_rate": 1.9998988720978825e-05, "loss": 1.1306, "step": 157 }, { "epoch": 0.03, "learning_rate": 1.9998885066805355e-05, "loss": 1.1608, "step": 158 }, { "epoch": 0.03, "learning_rate": 1.999877635671553e-05, "loss": 1.1989, "step": 159 }, { "epoch": 0.04, "learning_rate": 1.999866259076432e-05, "loss": 1.0448, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.999854376900925e-05, "loss": 1.1614, "step": 161 }, { "epoch": 0.04, "learning_rate": 1.99984198915104e-05, "loss": 1.1215, "step": 162 }, { "epoch": 0.04, "learning_rate": 1.9998290958330407e-05, "loss": 1.1224, "step": 163 }, { "epoch": 0.04, "learning_rate": 1.9998156969534475e-05, "loss": 1.0919, "step": 164 }, { "epoch": 0.04, "learning_rate": 1.9998017925190345e-05, "loss": 1.1702, "step": 165 }, { "epoch": 0.04, "learning_rate": 1.999787382536833e-05, "loss": 1.129, "step": 166 }, { "epoch": 0.04, "learning_rate": 1.999772467014129e-05, "loss": 1.0744, "step": 167 }, { "epoch": 0.04, "learning_rate": 1.999757045958465e-05, "loss": 1.0937, "step": 168 }, { "epoch": 0.04, "learning_rate": 1.999741119377638e-05, "loss": 1.0974, "step": 169 }, { "epoch": 0.04, "learning_rate": 1.9997246872797018e-05, "loss": 1.1513, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.9997077496729647e-05, "loss": 1.1525, "step": 171 }, { "epoch": 0.04, "learning_rate": 1.999690306565992e-05, "loss": 1.0911, "step": 172 }, { "epoch": 0.04, "learning_rate": 1.999672357967603e-05, "loss": 1.1167, "step": 173 }, { "epoch": 0.04, "learning_rate": 1.9996539038868737e-05, "loss": 1.1037, "step": 174 }, { "epoch": 0.04, "learning_rate": 1.9996349443331354e-05, "loss": 1.2418, "step": 175 }, { "epoch": 0.04, "learning_rate": 1.999615479315975e-05, "loss": 1.0991, "step": 176 }, { "epoch": 0.04, "learning_rate": 1.9995955088452344e-05, "loss": 1.1701, "step": 177 }, { "epoch": 0.04, "learning_rate": 1.9995750329310126e-05, "loss": 1.1492, "step": 178 }, { "epoch": 0.04, "learning_rate": 1.9995540515836622e-05, "loss": 1.1316, "step": 179 }, { "epoch": 0.04, "learning_rate": 1.999532564813793e-05, "loss": 0.3031, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.9995105726322693e-05, "loss": 0.3027, "step": 181 }, { "epoch": 0.04, "learning_rate": 1.999488075050212e-05, "loss": 1.1584, "step": 182 }, { "epoch": 0.04, "learning_rate": 1.9994650720789968e-05, "loss": 1.0437, "step": 183 }, { "epoch": 0.04, "learning_rate": 1.9994415637302545e-05, "loss": 1.1089, "step": 184 }, { "epoch": 0.04, "learning_rate": 1.999417550015873e-05, "loss": 0.3111, "step": 185 }, { "epoch": 0.04, "learning_rate": 1.999393030947994e-05, "loss": 1.0763, "step": 186 }, { "epoch": 0.04, "learning_rate": 1.999368006539016e-05, "loss": 1.0706, "step": 187 }, { "epoch": 0.04, "learning_rate": 1.999342476801592e-05, "loss": 1.1456, "step": 188 }, { "epoch": 0.04, "learning_rate": 1.999316441748632e-05, "loss": 1.1615, "step": 189 }, { "epoch": 0.04, "learning_rate": 1.9992899013932994e-05, "loss": 1.0669, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.9992628557490153e-05, "loss": 1.1548, "step": 191 }, { "epoch": 0.04, "learning_rate": 1.9992353048294542e-05, "loss": 1.1484, "step": 192 }, { "epoch": 0.04, "learning_rate": 1.9992072486485484e-05, "loss": 1.1554, "step": 193 }, { "epoch": 0.04, "learning_rate": 1.9991786872204838e-05, "loss": 1.1058, "step": 194 }, { "epoch": 0.04, "learning_rate": 1.9991496205597023e-05, "loss": 1.1429, "step": 195 }, { "epoch": 0.04, "learning_rate": 1.9991200486809018e-05, "loss": 1.1305, "step": 196 }, { "epoch": 0.04, "learning_rate": 1.999089971599035e-05, "loss": 1.04, "step": 197 }, { "epoch": 0.04, "learning_rate": 1.9990593893293098e-05, "loss": 1.1467, "step": 198 }, { "epoch": 0.04, "learning_rate": 1.9990283018871912e-05, "loss": 1.1116, "step": 199 }, { "epoch": 0.04, "learning_rate": 1.998996709288398e-05, "loss": 1.1327, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.9989646115489044e-05, "loss": 1.1138, "step": 201 }, { "epoch": 0.04, "learning_rate": 1.9989320086849412e-05, "loss": 1.1301, "step": 202 }, { "epoch": 0.04, "learning_rate": 1.998898900712994e-05, "loss": 1.0173, "step": 203 }, { "epoch": 0.04, "learning_rate": 1.9988652876498035e-05, "loss": 1.1788, "step": 204 }, { "epoch": 0.05, "learning_rate": 1.998831169512366e-05, "loss": 0.2933, "step": 205 }, { "epoch": 0.05, "learning_rate": 1.9987965463179337e-05, "loss": 1.0884, "step": 206 }, { "epoch": 0.05, "learning_rate": 1.9987614180840135e-05, "loss": 1.1241, "step": 207 }, { "epoch": 0.05, "learning_rate": 1.9987257848283673e-05, "loss": 1.0846, "step": 208 }, { "epoch": 0.05, "learning_rate": 1.998689646569014e-05, "loss": 1.0995, "step": 209 }, { "epoch": 0.05, "learning_rate": 1.9986530033242263e-05, "loss": 1.1427, "step": 210 }, { "epoch": 0.05, "learning_rate": 1.9986158551125335e-05, "loss": 1.1368, "step": 211 }, { "epoch": 0.05, "learning_rate": 1.998578201952718e-05, "loss": 1.1763, "step": 212 }, { "epoch": 0.05, "learning_rate": 1.9985400438638204e-05, "loss": 1.1003, "step": 213 }, { "epoch": 0.05, "learning_rate": 1.998501380865135e-05, "loss": 1.1115, "step": 214 }, { "epoch": 0.05, "learning_rate": 1.9984622129762116e-05, "loss": 1.1056, "step": 215 }, { "epoch": 0.05, "learning_rate": 1.9984225402168546e-05, "loss": 1.1365, "step": 216 }, { "epoch": 0.05, "learning_rate": 1.9983823626071257e-05, "loss": 1.1542, "step": 217 }, { "epoch": 0.05, "learning_rate": 1.9983416801673397e-05, "loss": 1.1314, "step": 218 }, { "epoch": 0.05, "learning_rate": 1.9983004929180682e-05, "loss": 1.1717, "step": 219 }, { "epoch": 0.05, "learning_rate": 1.9982588008801368e-05, "loss": 1.1072, "step": 220 }, { "epoch": 0.05, "learning_rate": 1.998216604074628e-05, "loss": 1.0872, "step": 221 }, { "epoch": 0.05, "learning_rate": 1.9981739025228773e-05, "loss": 1.0642, "step": 222 }, { "epoch": 0.05, "learning_rate": 1.9981306962464776e-05, "loss": 1.1351, "step": 223 }, { "epoch": 0.05, "learning_rate": 1.9980869852672756e-05, "loss": 1.1486, "step": 224 }, { "epoch": 0.05, "learning_rate": 1.998042769607374e-05, "loss": 1.0689, "step": 225 }, { "epoch": 0.05, "learning_rate": 1.99799804928913e-05, "loss": 1.0786, "step": 226 }, { "epoch": 0.05, "learning_rate": 1.9979528243351564e-05, "loss": 1.0887, "step": 227 }, { "epoch": 0.05, "learning_rate": 1.9979070947683214e-05, "loss": 1.1289, "step": 228 }, { "epoch": 0.05, "learning_rate": 1.997860860611748e-05, "loss": 1.1026, "step": 229 }, { "epoch": 0.05, "learning_rate": 1.9978141218888143e-05, "loss": 1.1366, "step": 230 }, { "epoch": 0.05, "learning_rate": 1.9977668786231536e-05, "loss": 1.0886, "step": 231 }, { "epoch": 0.05, "learning_rate": 1.997719130838654e-05, "loss": 1.1037, "step": 232 }, { "epoch": 0.05, "learning_rate": 1.9976708785594598e-05, "loss": 1.1414, "step": 233 }, { "epoch": 0.05, "learning_rate": 1.9976221218099692e-05, "loss": 1.1368, "step": 234 }, { "epoch": 0.05, "learning_rate": 1.997572860614836e-05, "loss": 1.0947, "step": 235 }, { "epoch": 0.05, "learning_rate": 1.9975230949989694e-05, "loss": 1.1058, "step": 236 }, { "epoch": 0.05, "learning_rate": 1.997472824987533e-05, "loss": 1.1313, "step": 237 }, { "epoch": 0.05, "learning_rate": 1.9974220506059456e-05, "loss": 1.0279, "step": 238 }, { "epoch": 0.05, "learning_rate": 1.997370771879881e-05, "loss": 0.3225, "step": 239 }, { "epoch": 0.05, "learning_rate": 1.997318988835269e-05, "loss": 1.1441, "step": 240 }, { "epoch": 0.05, "learning_rate": 1.9972667014982928e-05, "loss": 1.1018, "step": 241 }, { "epoch": 0.05, "learning_rate": 1.997213909895392e-05, "loss": 1.0824, "step": 242 }, { "epoch": 0.05, "learning_rate": 1.99716061405326e-05, "loss": 1.1531, "step": 243 }, { "epoch": 0.05, "learning_rate": 1.997106813998846e-05, "loss": 1.0889, "step": 244 }, { "epoch": 0.05, "learning_rate": 1.9970525097593537e-05, "loss": 1.118, "step": 245 }, { "epoch": 0.05, "learning_rate": 1.996997701362242e-05, "loss": 1.0724, "step": 246 }, { "epoch": 0.05, "learning_rate": 1.996942388835225e-05, "loss": 1.121, "step": 247 }, { "epoch": 0.05, "learning_rate": 1.9968865722062708e-05, "loss": 1.1149, "step": 248 }, { "epoch": 0.05, "learning_rate": 1.9968302515036038e-05, "loss": 1.0758, "step": 249 }, { "epoch": 0.05, "learning_rate": 1.996773426755702e-05, "loss": 1.1028, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.996716097991298e-05, "loss": 0.3188, "step": 251 }, { "epoch": 0.06, "learning_rate": 1.996658265239381e-05, "loss": 1.0265, "step": 252 }, { "epoch": 0.06, "learning_rate": 1.9965999285291934e-05, "loss": 1.1773, "step": 253 }, { "epoch": 0.06, "learning_rate": 1.9965410878902336e-05, "loss": 1.0768, "step": 254 }, { "epoch": 0.06, "learning_rate": 1.9964817433522537e-05, "loss": 1.0695, "step": 255 }, { "epoch": 0.06, "learning_rate": 1.9964218949452617e-05, "loss": 1.0647, "step": 256 }, { "epoch": 0.06, "learning_rate": 1.9963615426995195e-05, "loss": 1.1462, "step": 257 }, { "epoch": 0.06, "learning_rate": 1.9963006866455438e-05, "loss": 1.0829, "step": 258 }, { "epoch": 0.06, "learning_rate": 1.9962393268141068e-05, "loss": 1.1139, "step": 259 }, { "epoch": 0.06, "learning_rate": 1.996177463236235e-05, "loss": 1.0832, "step": 260 }, { "epoch": 0.06, "learning_rate": 1.9961150959432093e-05, "loss": 1.0456, "step": 261 }, { "epoch": 0.06, "learning_rate": 1.9960522249665658e-05, "loss": 1.0792, "step": 262 }, { "epoch": 0.06, "learning_rate": 1.9959888503380946e-05, "loss": 1.0864, "step": 263 }, { "epoch": 0.06, "learning_rate": 1.9959249720898416e-05, "loss": 1.104, "step": 264 }, { "epoch": 0.06, "learning_rate": 1.9958605902541065e-05, "loss": 1.0387, "step": 265 }, { "epoch": 0.06, "learning_rate": 1.995795704863444e-05, "loss": 1.1811, "step": 266 }, { "epoch": 0.06, "learning_rate": 1.9957303159506632e-05, "loss": 1.0671, "step": 267 }, { "epoch": 0.06, "learning_rate": 1.9956644235488273e-05, "loss": 1.0841, "step": 268 }, { "epoch": 0.06, "learning_rate": 1.9955980276912555e-05, "loss": 1.157, "step": 269 }, { "epoch": 0.06, "learning_rate": 1.9955311284115198e-05, "loss": 1.0741, "step": 270 }, { "epoch": 0.06, "learning_rate": 1.9954637257434487e-05, "loss": 1.1352, "step": 271 }, { "epoch": 0.06, "learning_rate": 1.995395819721124e-05, "loss": 1.1873, "step": 272 }, { "epoch": 0.06, "learning_rate": 1.9953274103788817e-05, "loss": 1.0912, "step": 273 }, { "epoch": 0.06, "learning_rate": 1.995258497751313e-05, "loss": 1.1232, "step": 274 }, { "epoch": 0.06, "learning_rate": 1.995189081873264e-05, "loss": 1.0579, "step": 275 }, { "epoch": 0.06, "learning_rate": 1.9951191627798342e-05, "loss": 1.1013, "step": 276 }, { "epoch": 0.06, "learning_rate": 1.995048740506378e-05, "loss": 0.298, "step": 277 }, { "epoch": 0.06, "learning_rate": 1.994977815088504e-05, "loss": 1.0575, "step": 278 }, { "epoch": 0.06, "learning_rate": 1.9949063865620767e-05, "loss": 1.0567, "step": 279 }, { "epoch": 0.06, "learning_rate": 1.9948344549632124e-05, "loss": 1.1138, "step": 280 }, { "epoch": 0.06, "learning_rate": 1.9947620203282842e-05, "loss": 1.1163, "step": 281 }, { "epoch": 0.06, "learning_rate": 1.9946890826939184e-05, "loss": 1.025, "step": 282 }, { "epoch": 0.06, "learning_rate": 1.994615642096995e-05, "loss": 1.0613, "step": 283 }, { "epoch": 0.06, "learning_rate": 1.9945416985746496e-05, "loss": 1.0354, "step": 284 }, { "epoch": 0.06, "learning_rate": 1.9944672521642715e-05, "loss": 1.1048, "step": 285 }, { "epoch": 0.06, "learning_rate": 1.9943923029035046e-05, "loss": 0.3083, "step": 286 }, { "epoch": 0.06, "learning_rate": 1.9943168508302467e-05, "loss": 1.0482, "step": 287 }, { "epoch": 0.06, "learning_rate": 1.99424089598265e-05, "loss": 1.1149, "step": 288 }, { "epoch": 0.06, "learning_rate": 1.994164438399121e-05, "loss": 1.1013, "step": 289 }, { "epoch": 0.06, "learning_rate": 1.9940874781183203e-05, "loss": 1.0904, "step": 290 }, { "epoch": 0.06, "learning_rate": 1.9940100151791624e-05, "loss": 1.0322, "step": 291 }, { "epoch": 0.06, "learning_rate": 1.9939320496208166e-05, "loss": 1.0267, "step": 292 }, { "epoch": 0.06, "learning_rate": 1.993853581482706e-05, "loss": 1.0536, "step": 293 }, { "epoch": 0.06, "learning_rate": 1.9937746108045076e-05, "loss": 0.2843, "step": 294 }, { "epoch": 0.06, "learning_rate": 1.9936951376261534e-05, "loss": 1.0364, "step": 295 }, { "epoch": 0.06, "learning_rate": 1.993615161987828e-05, "loss": 1.0771, "step": 296 }, { "epoch": 0.07, "learning_rate": 1.9935346839299717e-05, "loss": 1.0747, "step": 297 }, { "epoch": 0.07, "learning_rate": 1.9934537034932775e-05, "loss": 0.3095, "step": 298 }, { "epoch": 0.07, "learning_rate": 1.993372220718693e-05, "loss": 1.089, "step": 299 }, { "epoch": 0.07, "learning_rate": 1.9932902356474208e-05, "loss": 1.0113, "step": 300 }, { "epoch": 0.07, "learning_rate": 1.993207748320915e-05, "loss": 1.0828, "step": 301 }, { "epoch": 0.07, "learning_rate": 1.9931247587808863e-05, "loss": 1.0825, "step": 302 }, { "epoch": 0.07, "learning_rate": 1.9930412670692974e-05, "loss": 1.0805, "step": 303 }, { "epoch": 0.07, "learning_rate": 1.9929572732283666e-05, "loss": 1.052, "step": 304 }, { "epoch": 0.07, "learning_rate": 1.9928727773005644e-05, "loss": 1.0618, "step": 305 }, { "epoch": 0.07, "learning_rate": 1.992787779328616e-05, "loss": 1.1116, "step": 306 }, { "epoch": 0.07, "learning_rate": 1.992702279355501e-05, "loss": 1.1207, "step": 307 }, { "epoch": 0.07, "learning_rate": 1.992616277424452e-05, "loss": 1.0763, "step": 308 }, { "epoch": 0.07, "learning_rate": 1.9925297735789558e-05, "loss": 1.0535, "step": 309 }, { "epoch": 0.07, "learning_rate": 1.9924427678627533e-05, "loss": 1.0598, "step": 310 }, { "epoch": 0.07, "learning_rate": 1.9923552603198378e-05, "loss": 1.1096, "step": 311 }, { "epoch": 0.07, "learning_rate": 1.992267250994458e-05, "loss": 1.1542, "step": 312 }, { "epoch": 0.07, "learning_rate": 1.9921787399311153e-05, "loss": 1.0768, "step": 313 }, { "epoch": 0.07, "learning_rate": 1.992089727174565e-05, "loss": 1.0344, "step": 314 }, { "epoch": 0.07, "learning_rate": 1.992000212769817e-05, "loss": 1.0672, "step": 315 }, { "epoch": 0.07, "learning_rate": 1.9919101967621338e-05, "loss": 1.06, "step": 316 }, { "epoch": 0.07, "learning_rate": 1.991819679197031e-05, "loss": 1.0911, "step": 317 }, { "epoch": 0.07, "learning_rate": 1.99172866012028e-05, "loss": 1.1039, "step": 318 }, { "epoch": 0.07, "learning_rate": 1.9916371395779033e-05, "loss": 1.0866, "step": 319 }, { "epoch": 0.07, "learning_rate": 1.9915451176161788e-05, "loss": 1.0263, "step": 320 }, { "epoch": 0.07, "learning_rate": 1.9914525942816364e-05, "loss": 1.138, "step": 321 }, { "epoch": 0.07, "learning_rate": 1.9913595696210608e-05, "loss": 1.1066, "step": 322 }, { "epoch": 0.07, "learning_rate": 1.99126604368149e-05, "loss": 1.0603, "step": 323 }, { "epoch": 0.07, "learning_rate": 1.991172016510215e-05, "loss": 1.0468, "step": 324 }, { "epoch": 0.07, "learning_rate": 1.9910774881547803e-05, "loss": 1.117, "step": 325 }, { "epoch": 0.07, "learning_rate": 1.990982458662984e-05, "loss": 1.0749, "step": 326 }, { "epoch": 0.07, "learning_rate": 1.990886928082878e-05, "loss": 1.1678, "step": 327 }, { "epoch": 0.07, "learning_rate": 1.9907908964627664e-05, "loss": 1.1062, "step": 328 }, { "epoch": 0.07, "learning_rate": 1.990694363851208e-05, "loss": 1.0169, "step": 329 }, { "epoch": 0.07, "learning_rate": 1.990597330297014e-05, "loss": 1.0786, "step": 330 }, { "epoch": 0.07, "learning_rate": 1.9904997958492497e-05, "loss": 1.1466, "step": 331 }, { "epoch": 0.07, "learning_rate": 1.9904017605572322e-05, "loss": 1.1198, "step": 332 }, { "epoch": 0.07, "learning_rate": 1.990303224470534e-05, "loss": 1.0842, "step": 333 }, { "epoch": 0.07, "learning_rate": 1.9902041876389794e-05, "loss": 1.1105, "step": 334 }, { "epoch": 0.07, "learning_rate": 1.9901046501126454e-05, "loss": 1.0637, "step": 335 }, { "epoch": 0.07, "learning_rate": 1.990004611941864e-05, "loss": 1.072, "step": 336 }, { "epoch": 0.07, "learning_rate": 1.9899040731772183e-05, "loss": 1.0713, "step": 337 }, { "epoch": 0.07, "learning_rate": 1.9898030338695465e-05, "loss": 1.097, "step": 338 }, { "epoch": 0.07, "learning_rate": 1.9897014940699383e-05, "loss": 1.0489, "step": 339 }, { "epoch": 0.07, "learning_rate": 1.989599453829737e-05, "loss": 1.0838, "step": 340 }, { "epoch": 0.07, "learning_rate": 1.9894969132005396e-05, "loss": 1.1083, "step": 341 }, { "epoch": 0.08, "learning_rate": 1.9893938722341956e-05, "loss": 1.136, "step": 342 }, { "epoch": 0.08, "learning_rate": 1.9892903309828066e-05, "loss": 1.0752, "step": 343 }, { "epoch": 0.08, "learning_rate": 1.9891862894987285e-05, "loss": 1.0668, "step": 344 }, { "epoch": 0.08, "learning_rate": 1.98908174783457e-05, "loss": 0.3037, "step": 345 }, { "epoch": 0.08, "learning_rate": 1.988976706043192e-05, "loss": 1.0331, "step": 346 }, { "epoch": 0.08, "learning_rate": 1.988871164177709e-05, "loss": 1.0655, "step": 347 }, { "epoch": 0.08, "learning_rate": 1.9887651222914876e-05, "loss": 1.0393, "step": 348 }, { "epoch": 0.08, "learning_rate": 1.9886585804381477e-05, "loss": 1.0548, "step": 349 }, { "epoch": 0.08, "learning_rate": 1.9885515386715625e-05, "loss": 1.0949, "step": 350 }, { "epoch": 0.08, "learning_rate": 1.988443997045857e-05, "loss": 1.0969, "step": 351 }, { "epoch": 0.08, "learning_rate": 1.9883359556154093e-05, "loss": 1.0757, "step": 352 }, { "epoch": 0.08, "learning_rate": 1.988227414434851e-05, "loss": 1.1138, "step": 353 }, { "epoch": 0.08, "learning_rate": 1.988118373559065e-05, "loss": 1.094, "step": 354 }, { "epoch": 0.08, "learning_rate": 1.9880088330431883e-05, "loss": 1.0652, "step": 355 }, { "epoch": 0.08, "learning_rate": 1.987898792942609e-05, "loss": 1.0302, "step": 356 }, { "epoch": 0.08, "learning_rate": 1.987788253312969e-05, "loss": 1.0746, "step": 357 }, { "epoch": 0.08, "learning_rate": 1.987677214210163e-05, "loss": 1.0836, "step": 358 }, { "epoch": 0.08, "learning_rate": 1.987565675690337e-05, "loss": 1.1057, "step": 359 }, { "epoch": 0.08, "learning_rate": 1.9874536378098905e-05, "loss": 1.0684, "step": 360 }, { "epoch": 0.08, "learning_rate": 1.9873411006254754e-05, "loss": 1.0555, "step": 361 }, { "epoch": 0.08, "learning_rate": 1.9872280641939953e-05, "loss": 1.1055, "step": 362 }, { "epoch": 0.08, "learning_rate": 1.9871145285726077e-05, "loss": 1.06, "step": 363 }, { "epoch": 0.08, "learning_rate": 1.987000493818721e-05, "loss": 1.0683, "step": 364 }, { "epoch": 0.08, "learning_rate": 1.986885959989997e-05, "loss": 1.0454, "step": 365 }, { "epoch": 0.08, "learning_rate": 1.986770927144349e-05, "loss": 1.0695, "step": 366 }, { "epoch": 0.08, "learning_rate": 1.986655395339944e-05, "loss": 1.0955, "step": 367 }, { "epoch": 0.08, "learning_rate": 1.9865393646351996e-05, "loss": 1.0658, "step": 368 }, { "epoch": 0.08, "learning_rate": 1.986422835088787e-05, "loss": 1.0589, "step": 369 }, { "epoch": 0.08, "learning_rate": 1.9863058067596287e-05, "loss": 1.0214, "step": 370 }, { "epoch": 0.08, "learning_rate": 1.9861882797069002e-05, "loss": 1.0304, "step": 371 }, { "epoch": 0.08, "learning_rate": 1.9860702539900288e-05, "loss": 1.0764, "step": 372 }, { "epoch": 0.08, "learning_rate": 1.9859517296686935e-05, "loss": 1.0772, "step": 373 }, { "epoch": 0.08, "learning_rate": 1.985832706802827e-05, "loss": 1.065, "step": 374 }, { "epoch": 0.08, "learning_rate": 1.9857131854526117e-05, "loss": 1.1109, "step": 375 }, { "epoch": 0.08, "learning_rate": 1.985593165678484e-05, "loss": 1.0751, "step": 376 }, { "epoch": 0.08, "learning_rate": 1.9854726475411315e-05, "loss": 1.0847, "step": 377 }, { "epoch": 0.08, "learning_rate": 1.9853516311014943e-05, "loss": 1.0339, "step": 378 }, { "epoch": 0.08, "learning_rate": 1.9852301164207636e-05, "loss": 1.0751, "step": 379 }, { "epoch": 0.08, "learning_rate": 1.9851081035603836e-05, "loss": 1.0204, "step": 380 }, { "epoch": 0.08, "learning_rate": 1.984985592582049e-05, "loss": 1.0427, "step": 381 }, { "epoch": 0.08, "learning_rate": 1.984862583547708e-05, "loss": 1.0948, "step": 382 }, { "epoch": 0.08, "learning_rate": 1.9847390765195597e-05, "loss": 1.0341, "step": 383 }, { "epoch": 0.08, "learning_rate": 1.9846150715600552e-05, "loss": 1.0422, "step": 384 }, { "epoch": 0.08, "learning_rate": 1.984490568731897e-05, "loss": 1.073, "step": 385 }, { "epoch": 0.08, "learning_rate": 1.9843655680980403e-05, "loss": 1.0455, "step": 386 }, { "epoch": 0.08, "learning_rate": 1.984240069721691e-05, "loss": 0.337, "step": 387 }, { "epoch": 0.09, "learning_rate": 1.9841140736663072e-05, "loss": 1.1187, "step": 388 }, { "epoch": 0.09, "learning_rate": 1.9839875799955985e-05, "loss": 1.0936, "step": 389 }, { "epoch": 0.09, "learning_rate": 1.9838605887735266e-05, "loss": 1.076, "step": 390 }, { "epoch": 0.09, "learning_rate": 1.9837331000643036e-05, "loss": 1.0794, "step": 391 }, { "epoch": 0.09, "learning_rate": 1.9836051139323947e-05, "loss": 1.0932, "step": 392 }, { "epoch": 0.09, "learning_rate": 1.983476630442515e-05, "loss": 1.0718, "step": 393 }, { "epoch": 0.09, "learning_rate": 1.983347649659633e-05, "loss": 0.2956, "step": 394 }, { "epoch": 0.09, "learning_rate": 1.9832181716489664e-05, "loss": 1.0138, "step": 395 }, { "epoch": 0.09, "learning_rate": 1.983088196475986e-05, "loss": 1.0287, "step": 396 }, { "epoch": 0.09, "learning_rate": 1.9829577242064138e-05, "loss": 1.0611, "step": 397 }, { "epoch": 0.09, "learning_rate": 1.9828267549062224e-05, "loss": 1.0648, "step": 398 }, { "epoch": 0.09, "learning_rate": 1.9826952886416365e-05, "loss": 1.0236, "step": 399 }, { "epoch": 0.09, "learning_rate": 1.9825633254791318e-05, "loss": 1.095, "step": 400 }, { "epoch": 0.09, "learning_rate": 1.9824308654854346e-05, "loss": 1.0291, "step": 401 }, { "epoch": 0.09, "learning_rate": 1.9822979087275238e-05, "loss": 1.0358, "step": 402 }, { "epoch": 0.09, "learning_rate": 1.982164455272628e-05, "loss": 1.0706, "step": 403 }, { "epoch": 0.09, "learning_rate": 1.9820305051882287e-05, "loss": 1.0837, "step": 404 }, { "epoch": 0.09, "learning_rate": 1.9818960585420562e-05, "loss": 1.0933, "step": 405 }, { "epoch": 0.09, "learning_rate": 1.9817611154020942e-05, "loss": 1.1178, "step": 406 }, { "epoch": 0.09, "learning_rate": 1.981625675836576e-05, "loss": 1.031, "step": 407 }, { "epoch": 0.09, "learning_rate": 1.9814897399139862e-05, "loss": 1.1489, "step": 408 }, { "epoch": 0.09, "learning_rate": 1.9813533077030606e-05, "loss": 1.114, "step": 409 }, { "epoch": 0.09, "learning_rate": 1.9812163792727864e-05, "loss": 1.0353, "step": 410 }, { "epoch": 0.09, "learning_rate": 1.9810789546924004e-05, "loss": 0.3164, "step": 411 }, { "epoch": 0.09, "learning_rate": 1.9809410340313916e-05, "loss": 1.0405, "step": 412 }, { "epoch": 0.09, "learning_rate": 1.9808026173594993e-05, "loss": 1.1109, "step": 413 }, { "epoch": 0.09, "learning_rate": 1.9806637047467136e-05, "loss": 1.0372, "step": 414 }, { "epoch": 0.09, "learning_rate": 1.9805242962632747e-05, "loss": 1.0674, "step": 415 }, { "epoch": 0.09, "learning_rate": 1.9803843919796753e-05, "loss": 1.0559, "step": 416 }, { "epoch": 0.09, "learning_rate": 1.980243991966657e-05, "loss": 1.0397, "step": 417 }, { "epoch": 0.09, "learning_rate": 1.980103096295213e-05, "loss": 0.2897, "step": 418 }, { "epoch": 0.09, "learning_rate": 1.979961705036587e-05, "loss": 1.008, "step": 419 }, { "epoch": 0.09, "learning_rate": 1.9798198182622734e-05, "loss": 1.0094, "step": 420 }, { "epoch": 0.09, "learning_rate": 1.9796774360440158e-05, "loss": 0.2852, "step": 421 }, { "epoch": 0.09, "learning_rate": 1.979534558453811e-05, "loss": 0.9975, "step": 422 }, { "epoch": 0.09, "learning_rate": 1.979391185563904e-05, "loss": 1.0133, "step": 423 }, { "epoch": 0.09, "learning_rate": 1.9792473174467913e-05, "loss": 1.0057, "step": 424 }, { "epoch": 0.09, "learning_rate": 1.9791029541752197e-05, "loss": 1.0647, "step": 425 }, { "epoch": 0.09, "learning_rate": 1.9789580958221854e-05, "loss": 1.0462, "step": 426 }, { "epoch": 0.09, "learning_rate": 1.978812742460936e-05, "loss": 0.3215, "step": 427 }, { "epoch": 0.09, "learning_rate": 1.97866689416497e-05, "loss": 1.0488, "step": 428 }, { "epoch": 0.09, "learning_rate": 1.9785205510080343e-05, "loss": 1.0517, "step": 429 }, { "epoch": 0.09, "learning_rate": 1.9783737130641272e-05, "loss": 1.0619, "step": 430 }, { "epoch": 0.09, "learning_rate": 1.9782263804074974e-05, "loss": 0.9915, "step": 431 }, { "epoch": 0.09, "learning_rate": 1.9780785531126434e-05, "loss": 1.0355, "step": 432 }, { "epoch": 0.1, "learning_rate": 1.9779302312543134e-05, "loss": 1.086, "step": 433 }, { "epoch": 0.1, "learning_rate": 1.977781414907506e-05, "loss": 0.9993, "step": 434 }, { "epoch": 0.1, "learning_rate": 1.97763210414747e-05, "loss": 1.0382, "step": 435 }, { "epoch": 0.1, "learning_rate": 1.9774822990497048e-05, "loss": 1.0617, "step": 436 }, { "epoch": 0.1, "learning_rate": 1.977331999689958e-05, "loss": 1.0945, "step": 437 }, { "epoch": 0.1, "learning_rate": 1.9771812061442288e-05, "loss": 1.0596, "step": 438 }, { "epoch": 0.1, "learning_rate": 1.977029918488765e-05, "loss": 1.0493, "step": 439 }, { "epoch": 0.1, "learning_rate": 1.9768781368000658e-05, "loss": 0.9827, "step": 440 }, { "epoch": 0.1, "learning_rate": 1.9767258611548787e-05, "loss": 0.9686, "step": 441 }, { "epoch": 0.1, "learning_rate": 1.976573091630202e-05, "loss": 1.0065, "step": 442 }, { "epoch": 0.1, "learning_rate": 1.9764198283032828e-05, "loss": 1.0622, "step": 443 }, { "epoch": 0.1, "learning_rate": 1.9762660712516187e-05, "loss": 1.0197, "step": 444 }, { "epoch": 0.1, "learning_rate": 1.9761118205529565e-05, "loss": 1.0517, "step": 445 }, { "epoch": 0.1, "learning_rate": 1.975957076285293e-05, "loss": 1.0913, "step": 446 }, { "epoch": 0.1, "learning_rate": 1.975801838526874e-05, "loss": 0.9918, "step": 447 }, { "epoch": 0.1, "learning_rate": 1.9756461073561955e-05, "loss": 1.1022, "step": 448 }, { "epoch": 0.1, "learning_rate": 1.9754898828520026e-05, "loss": 1.029, "step": 449 }, { "epoch": 0.1, "learning_rate": 1.9753331650932898e-05, "loss": 1.047, "step": 450 }, { "epoch": 0.1, "learning_rate": 1.975175954159301e-05, "loss": 1.018, "step": 451 }, { "epoch": 0.1, "learning_rate": 1.9750182501295298e-05, "loss": 1.0862, "step": 452 }, { "epoch": 0.1, "learning_rate": 1.9748600530837187e-05, "loss": 1.0692, "step": 453 }, { "epoch": 0.1, "learning_rate": 1.97470136310186e-05, "loss": 1.0798, "step": 454 }, { "epoch": 0.1, "learning_rate": 1.974542180264195e-05, "loss": 1.0415, "step": 455 }, { "epoch": 0.1, "learning_rate": 1.974382504651214e-05, "loss": 1.0347, "step": 456 }, { "epoch": 0.1, "learning_rate": 1.9742223363436567e-05, "loss": 1.084, "step": 457 }, { "epoch": 0.1, "learning_rate": 1.974061675422512e-05, "loss": 1.0166, "step": 458 }, { "epoch": 0.1, "learning_rate": 1.9739005219690174e-05, "loss": 0.9442, "step": 459 }, { "epoch": 0.1, "learning_rate": 1.97373887606466e-05, "loss": 1.0565, "step": 460 }, { "epoch": 0.1, "learning_rate": 1.973576737791176e-05, "loss": 1.1041, "step": 461 }, { "epoch": 0.1, "learning_rate": 1.9734141072305504e-05, "loss": 0.9701, "step": 462 }, { "epoch": 0.1, "learning_rate": 1.973250984465016e-05, "loss": 0.9738, "step": 463 }, { "epoch": 0.1, "learning_rate": 1.9730873695770573e-05, "loss": 1.0682, "step": 464 }, { "epoch": 0.1, "learning_rate": 1.972923262649404e-05, "loss": 1.0584, "step": 465 }, { "epoch": 0.1, "learning_rate": 1.9727586637650373e-05, "loss": 1.0966, "step": 466 }, { "epoch": 0.1, "learning_rate": 1.9725935730071862e-05, "loss": 1.0482, "step": 467 }, { "epoch": 0.1, "learning_rate": 1.9724279904593287e-05, "loss": 1.0455, "step": 468 }, { "epoch": 0.1, "learning_rate": 1.972261916205191e-05, "loss": 1.065, "step": 469 }, { "epoch": 0.1, "learning_rate": 1.9720953503287487e-05, "loss": 1.0353, "step": 470 }, { "epoch": 0.1, "learning_rate": 1.9719282929142248e-05, "loss": 1.0558, "step": 471 }, { "epoch": 0.1, "learning_rate": 1.971760744046092e-05, "loss": 1.0295, "step": 472 }, { "epoch": 0.1, "learning_rate": 1.971592703809071e-05, "loss": 1.046, "step": 473 }, { "epoch": 0.1, "learning_rate": 1.9714241722881306e-05, "loss": 1.0676, "step": 474 }, { "epoch": 0.1, "learning_rate": 1.971255149568489e-05, "loss": 1.089, "step": 475 }, { "epoch": 0.1, "learning_rate": 1.9710856357356117e-05, "loss": 1.0004, "step": 476 }, { "epoch": 0.1, "learning_rate": 1.970915630875214e-05, "loss": 1.1026, "step": 477 }, { "epoch": 0.1, "learning_rate": 1.9707451350732572e-05, "loss": 1.06, "step": 478 }, { "epoch": 0.11, "learning_rate": 1.9705741484159532e-05, "loss": 1.0265, "step": 479 }, { "epoch": 0.11, "learning_rate": 1.9704026709897606e-05, "loss": 1.0696, "step": 480 }, { "epoch": 0.11, "learning_rate": 1.9702307028813864e-05, "loss": 0.9586, "step": 481 }, { "epoch": 0.11, "learning_rate": 1.9700582441777863e-05, "loss": 1.0562, "step": 482 }, { "epoch": 0.11, "learning_rate": 1.969885294966164e-05, "loss": 1.0513, "step": 483 }, { "epoch": 0.11, "learning_rate": 1.9697118553339703e-05, "loss": 1.1106, "step": 484 }, { "epoch": 0.11, "learning_rate": 1.9695379253689048e-05, "loss": 1.0317, "step": 485 }, { "epoch": 0.11, "learning_rate": 1.9693635051589148e-05, "loss": 0.3049, "step": 486 }, { "epoch": 0.11, "learning_rate": 1.9691885947921957e-05, "loss": 1.0449, "step": 487 }, { "epoch": 0.11, "learning_rate": 1.9690131943571906e-05, "loss": 1.08, "step": 488 }, { "epoch": 0.11, "learning_rate": 1.96883730394259e-05, "loss": 1.1018, "step": 489 }, { "epoch": 0.11, "learning_rate": 1.9686609236373333e-05, "loss": 1.0452, "step": 490 }, { "epoch": 0.11, "learning_rate": 1.968484053530606e-05, "loss": 1.0214, "step": 491 }, { "epoch": 0.11, "learning_rate": 1.9683066937118423e-05, "loss": 1.0708, "step": 492 }, { "epoch": 0.11, "learning_rate": 1.9681288442707246e-05, "loss": 1.0314, "step": 493 }, { "epoch": 0.11, "learning_rate": 1.967950505297181e-05, "loss": 1.0641, "step": 494 }, { "epoch": 0.11, "learning_rate": 1.9677716768813893e-05, "loss": 1.1007, "step": 495 }, { "epoch": 0.11, "learning_rate": 1.967592359113773e-05, "loss": 1.0864, "step": 496 }, { "epoch": 0.11, "learning_rate": 1.9674125520850036e-05, "loss": 1.0977, "step": 497 }, { "epoch": 0.11, "learning_rate": 1.967232255886001e-05, "loss": 1.0218, "step": 498 }, { "epoch": 0.11, "learning_rate": 1.967051470607931e-05, "loss": 1.035, "step": 499 }, { "epoch": 0.11, "learning_rate": 1.9668701963422077e-05, "loss": 1.0527, "step": 500 }, { "epoch": 0.11, "learning_rate": 1.9666884331804916e-05, "loss": 1.0297, "step": 501 }, { "epoch": 0.11, "learning_rate": 1.9665061812146912e-05, "loss": 1.0639, "step": 502 }, { "epoch": 0.11, "learning_rate": 1.966323440536962e-05, "loss": 1.0975, "step": 503 }, { "epoch": 0.11, "learning_rate": 1.966140211239706e-05, "loss": 1.0331, "step": 504 }, { "epoch": 0.11, "learning_rate": 1.9659564934155733e-05, "loss": 1.0938, "step": 505 }, { "epoch": 0.11, "learning_rate": 1.9657722871574602e-05, "loss": 1.003, "step": 506 }, { "epoch": 0.11, "learning_rate": 1.9655875925585096e-05, "loss": 0.9953, "step": 507 }, { "epoch": 0.11, "learning_rate": 1.965402409712113e-05, "loss": 1.0828, "step": 508 }, { "epoch": 0.11, "learning_rate": 1.965216738711907e-05, "loss": 1.06, "step": 509 }, { "epoch": 0.11, "learning_rate": 1.965030579651776e-05, "loss": 0.2977, "step": 510 }, { "epoch": 0.11, "learning_rate": 1.964843932625851e-05, "loss": 1.0799, "step": 511 }, { "epoch": 0.11, "learning_rate": 1.96465679772851e-05, "loss": 1.057, "step": 512 }, { "epoch": 0.11, "learning_rate": 1.964469175054377e-05, "loss": 1.0721, "step": 513 }, { "epoch": 0.11, "learning_rate": 1.9642810646983225e-05, "loss": 1.0947, "step": 514 }, { "epoch": 0.11, "learning_rate": 1.9640924667554654e-05, "loss": 1.0634, "step": 515 }, { "epoch": 0.11, "learning_rate": 1.963903381321169e-05, "loss": 0.3042, "step": 516 }, { "epoch": 0.11, "learning_rate": 1.963713808491044e-05, "loss": 1.0654, "step": 517 }, { "epoch": 0.11, "learning_rate": 1.963523748360948e-05, "loss": 0.305, "step": 518 }, { "epoch": 0.11, "learning_rate": 1.9633332010269842e-05, "loss": 1.0107, "step": 519 }, { "epoch": 0.11, "learning_rate": 1.9631421665855023e-05, "loss": 1.0842, "step": 520 }, { "epoch": 0.11, "learning_rate": 1.9629506451330988e-05, "loss": 1.0025, "step": 521 }, { "epoch": 0.11, "learning_rate": 1.962758636766616e-05, "loss": 0.9728, "step": 522 }, { "epoch": 0.11, "learning_rate": 1.962566141583143e-05, "loss": 1.008, "step": 523 }, { "epoch": 0.12, "learning_rate": 1.9623731596800137e-05, "loss": 1.0791, "step": 524 }, { "epoch": 0.12, "learning_rate": 1.9621796911548097e-05, "loss": 1.0668, "step": 525 }, { "epoch": 0.12, "learning_rate": 1.9619857361053575e-05, "loss": 1.0362, "step": 526 }, { "epoch": 0.12, "learning_rate": 1.9617912946297308e-05, "loss": 1.1061, "step": 527 }, { "epoch": 0.12, "learning_rate": 1.961596366826248e-05, "loss": 0.3209, "step": 528 }, { "epoch": 0.12, "learning_rate": 1.9614009527934738e-05, "loss": 1.0326, "step": 529 }, { "epoch": 0.12, "learning_rate": 1.9612050526302195e-05, "loss": 1.0438, "step": 530 }, { "epoch": 0.12, "learning_rate": 1.9610086664355414e-05, "loss": 0.9815, "step": 531 }, { "epoch": 0.12, "learning_rate": 1.9608117943087415e-05, "loss": 0.9787, "step": 532 }, { "epoch": 0.12, "learning_rate": 1.9606144363493678e-05, "loss": 1.095, "step": 533 }, { "epoch": 0.12, "learning_rate": 1.9604165926572145e-05, "loss": 1.0183, "step": 534 }, { "epoch": 0.12, "learning_rate": 1.9602182633323205e-05, "loss": 1.0237, "step": 535 }, { "epoch": 0.12, "learning_rate": 1.9600194484749704e-05, "loss": 1.1186, "step": 536 }, { "epoch": 0.12, "learning_rate": 1.9598201481856953e-05, "loss": 0.9495, "step": 537 }, { "epoch": 0.12, "learning_rate": 1.95962036256527e-05, "loss": 0.9796, "step": 538 }, { "epoch": 0.12, "learning_rate": 1.9594200917147166e-05, "loss": 1.0421, "step": 539 }, { "epoch": 0.12, "learning_rate": 1.9592193357353012e-05, "loss": 1.031, "step": 540 }, { "epoch": 0.12, "learning_rate": 1.9590180947285354e-05, "loss": 1.0364, "step": 541 }, { "epoch": 0.12, "learning_rate": 1.958816368796177e-05, "loss": 1.0167, "step": 542 }, { "epoch": 0.12, "learning_rate": 1.9586141580402277e-05, "loss": 1.0828, "step": 543 }, { "epoch": 0.12, "learning_rate": 1.9584114625629353e-05, "loss": 1.0222, "step": 544 }, { "epoch": 0.12, "learning_rate": 1.9582082824667924e-05, "loss": 1.1111, "step": 545 }, { "epoch": 0.12, "learning_rate": 1.9580046178545365e-05, "loss": 1.0622, "step": 546 }, { "epoch": 0.12, "learning_rate": 1.95780046882915e-05, "loss": 1.0043, "step": 547 }, { "epoch": 0.12, "learning_rate": 1.9575958354938608e-05, "loss": 1.0179, "step": 548 }, { "epoch": 0.12, "learning_rate": 1.957390717952141e-05, "loss": 1.0333, "step": 549 }, { "epoch": 0.12, "learning_rate": 1.9571851163077082e-05, "loss": 1.0787, "step": 550 }, { "epoch": 0.12, "learning_rate": 1.9569790306645246e-05, "loss": 0.9926, "step": 551 }, { "epoch": 0.12, "learning_rate": 1.9567724611267962e-05, "loss": 1.0076, "step": 552 }, { "epoch": 0.12, "learning_rate": 1.956565407798975e-05, "loss": 1.0394, "step": 553 }, { "epoch": 0.12, "learning_rate": 1.9563578707857577e-05, "loss": 0.9987, "step": 554 }, { "epoch": 0.12, "learning_rate": 1.956149850192084e-05, "loss": 1.0548, "step": 555 }, { "epoch": 0.12, "learning_rate": 1.9559413461231395e-05, "loss": 1.0765, "step": 556 }, { "epoch": 0.12, "learning_rate": 1.9557323586843538e-05, "loss": 1.0582, "step": 557 }, { "epoch": 0.12, "learning_rate": 1.9555228879814012e-05, "loss": 0.9955, "step": 558 }, { "epoch": 0.12, "learning_rate": 1.9553129341202e-05, "loss": 0.9694, "step": 559 }, { "epoch": 0.12, "learning_rate": 1.9551024972069127e-05, "loss": 1.0284, "step": 560 }, { "epoch": 0.12, "learning_rate": 1.9548915773479465e-05, "loss": 1.0218, "step": 561 }, { "epoch": 0.12, "learning_rate": 1.954680174649953e-05, "loss": 1.0322, "step": 562 }, { "epoch": 0.12, "learning_rate": 1.954468289219827e-05, "loss": 1.0012, "step": 563 }, { "epoch": 0.12, "learning_rate": 1.9542559211647082e-05, "loss": 1.0185, "step": 564 }, { "epoch": 0.12, "learning_rate": 1.9540430705919798e-05, "loss": 1.0197, "step": 565 }, { "epoch": 0.12, "learning_rate": 1.9538297376092695e-05, "loss": 1.0054, "step": 566 }, { "epoch": 0.12, "learning_rate": 1.953615922324449e-05, "loss": 1.0318, "step": 567 }, { "epoch": 0.12, "learning_rate": 1.9534016248456332e-05, "loss": 1.0177, "step": 568 }, { "epoch": 0.12, "learning_rate": 1.953186845281181e-05, "loss": 1.0809, "step": 569 }, { "epoch": 0.13, "learning_rate": 1.9529715837396956e-05, "loss": 1.0299, "step": 570 }, { "epoch": 0.13, "learning_rate": 1.9527558403300238e-05, "loss": 1.0745, "step": 571 }, { "epoch": 0.13, "learning_rate": 1.9525396151612552e-05, "loss": 1.0785, "step": 572 }, { "epoch": 0.13, "learning_rate": 1.9523229083427242e-05, "loss": 1.0395, "step": 573 }, { "epoch": 0.13, "learning_rate": 1.9521057199840078e-05, "loss": 0.9943, "step": 574 }, { "epoch": 0.13, "learning_rate": 1.9518880501949267e-05, "loss": 1.039, "step": 575 }, { "epoch": 0.13, "learning_rate": 1.951669899085546e-05, "loss": 1.0478, "step": 576 }, { "epoch": 0.13, "learning_rate": 1.9514512667661727e-05, "loss": 1.0366, "step": 577 }, { "epoch": 0.13, "learning_rate": 1.9512321533473578e-05, "loss": 1.0456, "step": 578 }, { "epoch": 0.13, "learning_rate": 1.951012558939896e-05, "loss": 1.0024, "step": 579 }, { "epoch": 0.13, "learning_rate": 1.9507924836548244e-05, "loss": 0.9935, "step": 580 }, { "epoch": 0.13, "learning_rate": 1.950571927603423e-05, "loss": 0.298, "step": 581 }, { "epoch": 0.13, "learning_rate": 1.9503508908972173e-05, "loss": 1.0718, "step": 582 }, { "epoch": 0.13, "learning_rate": 1.950129373647972e-05, "loss": 1.0485, "step": 583 }, { "epoch": 0.13, "learning_rate": 1.949907375967699e-05, "loss": 1.0459, "step": 584 }, { "epoch": 0.13, "learning_rate": 1.9496848979686493e-05, "loss": 1.0337, "step": 585 }, { "epoch": 0.13, "learning_rate": 1.9494619397633186e-05, "loss": 1.0486, "step": 586 }, { "epoch": 0.13, "learning_rate": 1.9492385014644463e-05, "loss": 1.0624, "step": 587 }, { "epoch": 0.13, "learning_rate": 1.949014583185013e-05, "loss": 1.0013, "step": 588 }, { "epoch": 0.13, "learning_rate": 1.948790185038242e-05, "loss": 1.0538, "step": 589 }, { "epoch": 0.13, "learning_rate": 1.9485653071376004e-05, "loss": 1.075, "step": 590 }, { "epoch": 0.13, "learning_rate": 1.9483399495967973e-05, "loss": 1.0061, "step": 591 }, { "epoch": 0.13, "learning_rate": 1.9481141125297838e-05, "loss": 1.0333, "step": 592 }, { "epoch": 0.13, "learning_rate": 1.9478877960507543e-05, "loss": 1.0377, "step": 593 }, { "epoch": 0.13, "learning_rate": 1.9476610002741452e-05, "loss": 1.0657, "step": 594 }, { "epoch": 0.13, "learning_rate": 1.947433725314636e-05, "loss": 1.0037, "step": 595 }, { "epoch": 0.13, "learning_rate": 1.9472059712871464e-05, "loss": 1.0093, "step": 596 }, { "epoch": 0.13, "learning_rate": 1.9469777383068412e-05, "loss": 1.0496, "step": 597 }, { "epoch": 0.13, "learning_rate": 1.946749026489125e-05, "loss": 1.0564, "step": 598 }, { "epoch": 0.13, "learning_rate": 1.9465198359496455e-05, "loss": 1.0428, "step": 599 }, { "epoch": 0.13, "learning_rate": 1.946290166804293e-05, "loss": 1.0492, "step": 600 }, { "epoch": 0.13, "learning_rate": 1.946060019169199e-05, "loss": 1.0683, "step": 601 }, { "epoch": 0.13, "learning_rate": 1.945829393160737e-05, "loss": 0.9506, "step": 602 }, { "epoch": 0.13, "learning_rate": 1.9455982888955232e-05, "loss": 1.0738, "step": 603 }, { "epoch": 0.13, "learning_rate": 1.9453667064904143e-05, "loss": 1.0218, "step": 604 }, { "epoch": 0.13, "learning_rate": 1.94513464606251e-05, "loss": 0.9988, "step": 605 }, { "epoch": 0.13, "learning_rate": 1.9449021077291506e-05, "loss": 1.0229, "step": 606 }, { "epoch": 0.13, "learning_rate": 1.944669091607919e-05, "loss": 1.0141, "step": 607 }, { "epoch": 0.13, "learning_rate": 1.9444355978166394e-05, "loss": 1.005, "step": 608 }, { "epoch": 0.13, "learning_rate": 1.9442016264733773e-05, "loss": 1.0734, "step": 609 }, { "epoch": 0.13, "learning_rate": 1.94396717769644e-05, "loss": 1.0198, "step": 610 }, { "epoch": 0.13, "learning_rate": 1.9437322516043766e-05, "loss": 1.0408, "step": 611 }, { "epoch": 0.13, "learning_rate": 1.943496848315976e-05, "loss": 1.0728, "step": 612 }, { "epoch": 0.13, "learning_rate": 1.9432609679502694e-05, "loss": 1.0284, "step": 613 }, { "epoch": 0.13, "learning_rate": 1.9430246106265303e-05, "loss": 1.0415, "step": 614 }, { "epoch": 0.14, "learning_rate": 1.9427877764642714e-05, "loss": 1.0409, "step": 615 }, { "epoch": 0.14, "learning_rate": 1.9425504655832474e-05, "loss": 0.9957, "step": 616 }, { "epoch": 0.14, "learning_rate": 1.9423126781034547e-05, "loss": 1.0346, "step": 617 }, { "epoch": 0.14, "learning_rate": 1.9420744141451296e-05, "loss": 1.0253, "step": 618 }, { "epoch": 0.14, "learning_rate": 1.9418356738287503e-05, "loss": 1.0375, "step": 619 }, { "epoch": 0.14, "learning_rate": 1.9415964572750347e-05, "loss": 1.0244, "step": 620 }, { "epoch": 0.14, "learning_rate": 1.9413567646049427e-05, "loss": 1.0369, "step": 621 }, { "epoch": 0.14, "learning_rate": 1.941116595939674e-05, "loss": 0.9589, "step": 622 }, { "epoch": 0.14, "learning_rate": 1.9408759514006695e-05, "loss": 1.0175, "step": 623 }, { "epoch": 0.14, "learning_rate": 1.9406348311096108e-05, "loss": 1.0407, "step": 624 }, { "epoch": 0.14, "learning_rate": 1.94039323518842e-05, "loss": 1.0754, "step": 625 }, { "epoch": 0.14, "learning_rate": 1.9401511637592593e-05, "loss": 1.0474, "step": 626 }, { "epoch": 0.14, "learning_rate": 1.939908616944532e-05, "loss": 0.9622, "step": 627 }, { "epoch": 0.14, "learning_rate": 1.939665594866881e-05, "loss": 1.026, "step": 628 }, { "epoch": 0.14, "learning_rate": 1.93942209764919e-05, "loss": 1.0217, "step": 629 }, { "epoch": 0.14, "learning_rate": 1.9391781254145833e-05, "loss": 1.0197, "step": 630 }, { "epoch": 0.14, "learning_rate": 1.9389336782864244e-05, "loss": 0.9566, "step": 631 }, { "epoch": 0.14, "learning_rate": 1.938688756388318e-05, "loss": 1.0064, "step": 632 }, { "epoch": 0.14, "learning_rate": 1.938443359844108e-05, "loss": 1.0315, "step": 633 }, { "epoch": 0.14, "learning_rate": 1.9381974887778787e-05, "loss": 1.0365, "step": 634 }, { "epoch": 0.14, "learning_rate": 1.9379511433139547e-05, "loss": 1.0366, "step": 635 }, { "epoch": 0.14, "learning_rate": 1.9377043235768997e-05, "loss": 0.9872, "step": 636 }, { "epoch": 0.14, "learning_rate": 1.9374570296915176e-05, "loss": 1.0167, "step": 637 }, { "epoch": 0.14, "learning_rate": 1.9372092617828524e-05, "loss": 0.9837, "step": 638 }, { "epoch": 0.14, "learning_rate": 1.936961019976187e-05, "loss": 1.0305, "step": 639 }, { "epoch": 0.14, "learning_rate": 1.9367123043970452e-05, "loss": 1.0285, "step": 640 }, { "epoch": 0.14, "learning_rate": 1.9364631151711887e-05, "loss": 1.0415, "step": 641 }, { "epoch": 0.14, "learning_rate": 1.9362134524246196e-05, "loss": 1.0433, "step": 642 }, { "epoch": 0.14, "learning_rate": 1.9359633162835796e-05, "loss": 0.9551, "step": 643 }, { "epoch": 0.14, "learning_rate": 1.9357127068745497e-05, "loss": 0.9815, "step": 644 }, { "epoch": 0.14, "learning_rate": 1.93546162432425e-05, "loss": 1.0802, "step": 645 }, { "epoch": 0.14, "learning_rate": 1.9352100687596394e-05, "loss": 0.9978, "step": 646 }, { "epoch": 0.14, "learning_rate": 1.934958040307917e-05, "loss": 1.049, "step": 647 }, { "epoch": 0.14, "learning_rate": 1.93470553909652e-05, "loss": 1.0424, "step": 648 }, { "epoch": 0.14, "learning_rate": 1.934452565253126e-05, "loss": 1.0935, "step": 649 }, { "epoch": 0.14, "learning_rate": 1.9341991189056498e-05, "loss": 1.0377, "step": 650 }, { "epoch": 0.14, "learning_rate": 1.9339452001822468e-05, "loss": 1.086, "step": 651 }, { "epoch": 0.14, "learning_rate": 1.9336908092113098e-05, "loss": 1.0621, "step": 652 }, { "epoch": 0.14, "learning_rate": 1.9334359461214712e-05, "loss": 0.3167, "step": 653 }, { "epoch": 0.14, "learning_rate": 1.9331806110416027e-05, "loss": 1.0268, "step": 654 }, { "epoch": 0.14, "learning_rate": 1.9329248041008134e-05, "loss": 0.9938, "step": 655 }, { "epoch": 0.14, "learning_rate": 1.9326685254284517e-05, "loss": 1.019, "step": 656 }, { "epoch": 0.14, "learning_rate": 1.932411775154104e-05, "loss": 0.9745, "step": 657 }, { "epoch": 0.14, "learning_rate": 1.9321545534075962e-05, "loss": 0.9864, "step": 658 }, { "epoch": 0.14, "learning_rate": 1.9318968603189917e-05, "loss": 1.0687, "step": 659 }, { "epoch": 0.14, "learning_rate": 1.9316386960185922e-05, "loss": 1.0561, "step": 660 }, { "epoch": 0.15, "learning_rate": 1.9313800606369387e-05, "loss": 1.0266, "step": 661 }, { "epoch": 0.15, "learning_rate": 1.931120954304809e-05, "loss": 0.9644, "step": 662 }, { "epoch": 0.15, "learning_rate": 1.93086137715322e-05, "loss": 1.0517, "step": 663 }, { "epoch": 0.15, "learning_rate": 1.9306013293134264e-05, "loss": 1.0173, "step": 664 }, { "epoch": 0.15, "learning_rate": 1.9303408109169205e-05, "loss": 1.0045, "step": 665 }, { "epoch": 0.15, "learning_rate": 1.9300798220954332e-05, "loss": 1.0421, "step": 666 }, { "epoch": 0.15, "learning_rate": 1.929818362980933e-05, "loss": 1.0289, "step": 667 }, { "epoch": 0.15, "learning_rate": 1.929556433705626e-05, "loss": 1.0164, "step": 668 }, { "epoch": 0.15, "learning_rate": 1.9292940344019568e-05, "loss": 1.0083, "step": 669 }, { "epoch": 0.15, "learning_rate": 1.9290311652026065e-05, "loss": 1.0099, "step": 670 }, { "epoch": 0.15, "learning_rate": 1.9287678262404944e-05, "loss": 0.3124, "step": 671 }, { "epoch": 0.15, "learning_rate": 1.9285040176487773e-05, "loss": 0.2896, "step": 672 }, { "epoch": 0.15, "learning_rate": 1.9282397395608502e-05, "loss": 1.0257, "step": 673 }, { "epoch": 0.15, "learning_rate": 1.9279749921103436e-05, "loss": 1.053, "step": 674 }, { "epoch": 0.15, "learning_rate": 1.9277097754311277e-05, "loss": 1.0036, "step": 675 }, { "epoch": 0.15, "learning_rate": 1.9274440896573078e-05, "loss": 1.094, "step": 676 }, { "epoch": 0.15, "learning_rate": 1.9271779349232277e-05, "loss": 1.0155, "step": 677 }, { "epoch": 0.15, "learning_rate": 1.9269113113634685e-05, "loss": 1.0658, "step": 678 }, { "epoch": 0.15, "learning_rate": 1.9266442191128476e-05, "loss": 1.011, "step": 679 }, { "epoch": 0.15, "learning_rate": 1.9263766583064193e-05, "loss": 1.0656, "step": 680 }, { "epoch": 0.15, "learning_rate": 1.926108629079476e-05, "loss": 1.0107, "step": 681 }, { "epoch": 0.15, "learning_rate": 1.925840131567545e-05, "loss": 1.0322, "step": 682 }, { "epoch": 0.15, "learning_rate": 1.925571165906392e-05, "loss": 1.0148, "step": 683 }, { "epoch": 0.15, "learning_rate": 1.9253017322320197e-05, "loss": 0.9981, "step": 684 }, { "epoch": 0.15, "learning_rate": 1.925031830680666e-05, "loss": 1.0232, "step": 685 }, { "epoch": 0.15, "learning_rate": 1.924761461388806e-05, "loss": 1.0342, "step": 686 }, { "epoch": 0.15, "learning_rate": 1.9244906244931516e-05, "loss": 0.9837, "step": 687 }, { "epoch": 0.15, "learning_rate": 1.924219320130651e-05, "loss": 1.0344, "step": 688 }, { "epoch": 0.15, "learning_rate": 1.9239475484384884e-05, "loss": 1.014, "step": 689 }, { "epoch": 0.15, "learning_rate": 1.923675309554085e-05, "loss": 1.0164, "step": 690 }, { "epoch": 0.15, "learning_rate": 1.9234026036150977e-05, "loss": 0.2866, "step": 691 }, { "epoch": 0.15, "learning_rate": 1.9231294307594198e-05, "loss": 1.0156, "step": 692 }, { "epoch": 0.15, "learning_rate": 1.9228557911251804e-05, "loss": 1.0387, "step": 693 }, { "epoch": 0.15, "learning_rate": 1.9225816848507447e-05, "loss": 1.0144, "step": 694 }, { "epoch": 0.15, "learning_rate": 1.9223071120747145e-05, "loss": 1.02, "step": 695 }, { "epoch": 0.15, "learning_rate": 1.922032072935927e-05, "loss": 1.0263, "step": 696 }, { "epoch": 0.15, "learning_rate": 1.9217565675734545e-05, "loss": 1.0125, "step": 697 }, { "epoch": 0.15, "learning_rate": 1.921480596126606e-05, "loss": 0.319, "step": 698 }, { "epoch": 0.15, "learning_rate": 1.921204158734927e-05, "loss": 1.0172, "step": 699 }, { "epoch": 0.15, "learning_rate": 1.920927255538196e-05, "loss": 1.0301, "step": 700 }, { "epoch": 0.15, "learning_rate": 1.920649886676429e-05, "loss": 1.0383, "step": 701 }, { "epoch": 0.15, "learning_rate": 1.9203720522898773e-05, "loss": 0.9591, "step": 702 }, { "epoch": 0.15, "learning_rate": 1.9200937525190272e-05, "loss": 0.9658, "step": 703 }, { "epoch": 0.15, "learning_rate": 1.9198149875046003e-05, "loss": 1.0505, "step": 704 }, { "epoch": 0.15, "learning_rate": 1.9195357573875537e-05, "loss": 1.0338, "step": 705 }, { "epoch": 0.15, "learning_rate": 1.9192560623090796e-05, "loss": 0.9873, "step": 706 }, { "epoch": 0.16, "learning_rate": 1.9189759024106048e-05, "loss": 1.0528, "step": 707 }, { "epoch": 0.16, "learning_rate": 1.9186952778337925e-05, "loss": 1.0452, "step": 708 }, { "epoch": 0.16, "learning_rate": 1.9184141887205393e-05, "loss": 0.3381, "step": 709 }, { "epoch": 0.16, "learning_rate": 1.9181326352129773e-05, "loss": 1.0436, "step": 710 }, { "epoch": 0.16, "learning_rate": 1.9178506174534736e-05, "loss": 1.0251, "step": 711 }, { "epoch": 0.16, "learning_rate": 1.91756813558463e-05, "loss": 1.0881, "step": 712 }, { "epoch": 0.16, "learning_rate": 1.917285189749283e-05, "loss": 0.2917, "step": 713 }, { "epoch": 0.16, "learning_rate": 1.9170017800905035e-05, "loss": 1.052, "step": 714 }, { "epoch": 0.16, "learning_rate": 1.916717906751597e-05, "loss": 1.0037, "step": 715 }, { "epoch": 0.16, "learning_rate": 1.9164335698761034e-05, "loss": 1.051, "step": 716 }, { "epoch": 0.16, "learning_rate": 1.9161487696077975e-05, "loss": 1.0109, "step": 717 }, { "epoch": 0.16, "learning_rate": 1.9158635060906872e-05, "loss": 1.0416, "step": 718 }, { "epoch": 0.16, "learning_rate": 1.915577779469016e-05, "loss": 1.0048, "step": 719 }, { "epoch": 0.16, "learning_rate": 1.915291589887261e-05, "loss": 1.0663, "step": 720 }, { "epoch": 0.16, "learning_rate": 1.9150049374901328e-05, "loss": 1.0488, "step": 721 }, { "epoch": 0.16, "learning_rate": 1.9147178224225778e-05, "loss": 1.0037, "step": 722 }, { "epoch": 0.16, "learning_rate": 1.914430244829774e-05, "loss": 1.105, "step": 723 }, { "epoch": 0.16, "learning_rate": 1.9141422048571352e-05, "loss": 1.0236, "step": 724 }, { "epoch": 0.16, "learning_rate": 1.9138537026503076e-05, "loss": 0.3273, "step": 725 }, { "epoch": 0.16, "learning_rate": 1.9135647383551723e-05, "loss": 1.0314, "step": 726 }, { "epoch": 0.16, "learning_rate": 1.9132753121178433e-05, "loss": 1.0065, "step": 727 }, { "epoch": 0.16, "learning_rate": 1.9129854240846685e-05, "loss": 0.9936, "step": 728 }, { "epoch": 0.16, "learning_rate": 1.9126950744022287e-05, "loss": 1.0496, "step": 729 }, { "epoch": 0.16, "learning_rate": 1.9124042632173398e-05, "loss": 0.977, "step": 730 }, { "epoch": 0.16, "learning_rate": 1.9121129906770486e-05, "loss": 0.947, "step": 731 }, { "epoch": 0.16, "learning_rate": 1.911821256928637e-05, "loss": 0.9737, "step": 732 }, { "epoch": 0.16, "learning_rate": 1.9115290621196202e-05, "loss": 1.0637, "step": 733 }, { "epoch": 0.16, "learning_rate": 1.9112364063977447e-05, "loss": 1.0443, "step": 734 }, { "epoch": 0.16, "learning_rate": 1.9109432899109923e-05, "loss": 1.0637, "step": 735 }, { "epoch": 0.16, "learning_rate": 1.9106497128075765e-05, "loss": 1.003, "step": 736 }, { "epoch": 0.16, "learning_rate": 1.910355675235944e-05, "loss": 1.0682, "step": 737 }, { "epoch": 0.16, "learning_rate": 1.9100611773447734e-05, "loss": 1.0195, "step": 738 }, { "epoch": 0.16, "learning_rate": 1.9097662192829786e-05, "loss": 0.9776, "step": 739 }, { "epoch": 0.16, "learning_rate": 1.9094708011997033e-05, "loss": 1.051, "step": 740 }, { "epoch": 0.16, "learning_rate": 1.909174923244326e-05, "loss": 0.9778, "step": 741 }, { "epoch": 0.16, "learning_rate": 1.9088785855664557e-05, "loss": 0.9625, "step": 742 }, { "epoch": 0.16, "learning_rate": 1.908581788315936e-05, "loss": 0.9577, "step": 743 }, { "epoch": 0.16, "learning_rate": 1.908284531642841e-05, "loss": 0.9894, "step": 744 }, { "epoch": 0.16, "learning_rate": 1.9079868156974788e-05, "loss": 0.9838, "step": 745 }, { "epoch": 0.16, "learning_rate": 1.9076886406303882e-05, "loss": 0.9838, "step": 746 }, { "epoch": 0.16, "learning_rate": 1.907390006592341e-05, "loss": 0.9211, "step": 747 }, { "epoch": 0.16, "learning_rate": 1.907090913734341e-05, "loss": 0.9844, "step": 748 }, { "epoch": 0.16, "learning_rate": 1.9067913622076236e-05, "loss": 1.0217, "step": 749 }, { "epoch": 0.16, "learning_rate": 1.9064913521636574e-05, "loss": 1.0214, "step": 750 }, { "epoch": 0.16, "learning_rate": 1.9061908837541404e-05, "loss": 0.9885, "step": 751 }, { "epoch": 0.17, "learning_rate": 1.905889957131005e-05, "loss": 0.9772, "step": 752 }, { "epoch": 0.17, "learning_rate": 1.905588572446414e-05, "loss": 1.0337, "step": 753 }, { "epoch": 0.17, "learning_rate": 1.9052867298527612e-05, "loss": 1.07, "step": 754 }, { "epoch": 0.17, "learning_rate": 1.9049844295026738e-05, "loss": 0.9983, "step": 755 }, { "epoch": 0.17, "learning_rate": 1.9046816715490085e-05, "loss": 0.9776, "step": 756 }, { "epoch": 0.17, "learning_rate": 1.9043784561448547e-05, "loss": 0.9996, "step": 757 }, { "epoch": 0.17, "learning_rate": 1.904074783443533e-05, "loss": 0.9985, "step": 758 }, { "epoch": 0.17, "learning_rate": 1.9037706535985937e-05, "loss": 1.0015, "step": 759 }, { "epoch": 0.17, "learning_rate": 1.9034660667638206e-05, "loss": 0.3108, "step": 760 }, { "epoch": 0.17, "learning_rate": 1.903161023093227e-05, "loss": 1.0031, "step": 761 }, { "epoch": 0.17, "learning_rate": 1.9028555227410575e-05, "loss": 0.9542, "step": 762 }, { "epoch": 0.17, "learning_rate": 1.902549565861788e-05, "loss": 0.9988, "step": 763 }, { "epoch": 0.17, "learning_rate": 1.902243152610125e-05, "loss": 1.0254, "step": 764 }, { "epoch": 0.17, "learning_rate": 1.9019362831410057e-05, "loss": 1.0584, "step": 765 }, { "epoch": 0.17, "learning_rate": 1.9016289576095977e-05, "loss": 1.0201, "step": 766 }, { "epoch": 0.17, "learning_rate": 1.9013211761713e-05, "loss": 1.0018, "step": 767 }, { "epoch": 0.17, "learning_rate": 1.9010129389817412e-05, "loss": 0.9796, "step": 768 }, { "epoch": 0.17, "learning_rate": 1.9007042461967817e-05, "loss": 1.0833, "step": 769 }, { "epoch": 0.17, "learning_rate": 1.9003950979725103e-05, "loss": 1.0193, "step": 770 }, { "epoch": 0.17, "learning_rate": 1.900085494465248e-05, "loss": 0.9912, "step": 771 }, { "epoch": 0.17, "learning_rate": 1.899775435831545e-05, "loss": 0.9703, "step": 772 }, { "epoch": 0.17, "learning_rate": 1.899464922228181e-05, "loss": 1.0641, "step": 773 }, { "epoch": 0.17, "learning_rate": 1.899153953812168e-05, "loss": 1.0806, "step": 774 }, { "epoch": 0.17, "learning_rate": 1.8988425307407458e-05, "loss": 1.0012, "step": 775 }, { "epoch": 0.17, "learning_rate": 1.8985306531713846e-05, "loss": 0.3103, "step": 776 }, { "epoch": 0.17, "learning_rate": 1.8982183212617853e-05, "loss": 1.0133, "step": 777 }, { "epoch": 0.17, "learning_rate": 1.8979055351698776e-05, "loss": 1.0724, "step": 778 }, { "epoch": 0.17, "learning_rate": 1.897592295053821e-05, "loss": 0.9838, "step": 779 }, { "epoch": 0.17, "learning_rate": 1.8972786010720046e-05, "loss": 1.0206, "step": 780 }, { "epoch": 0.17, "learning_rate": 1.8969644533830483e-05, "loss": 1.0037, "step": 781 }, { "epoch": 0.17, "learning_rate": 1.896649852145799e-05, "loss": 1.0066, "step": 782 }, { "epoch": 0.17, "learning_rate": 1.8963347975193346e-05, "loss": 0.9784, "step": 783 }, { "epoch": 0.17, "learning_rate": 1.8960192896629616e-05, "loss": 1.0275, "step": 784 }, { "epoch": 0.17, "learning_rate": 1.8957033287362167e-05, "loss": 0.9781, "step": 785 }, { "epoch": 0.17, "learning_rate": 1.895386914898864e-05, "loss": 0.9937, "step": 786 }, { "epoch": 0.17, "learning_rate": 1.895070048310898e-05, "loss": 1.0024, "step": 787 }, { "epoch": 0.17, "learning_rate": 1.8947527291325417e-05, "loss": 1.1119, "step": 788 }, { "epoch": 0.17, "learning_rate": 1.8944349575242465e-05, "loss": 1.0344, "step": 789 }, { "epoch": 0.17, "learning_rate": 1.8941167336466932e-05, "loss": 1.0243, "step": 790 }, { "epoch": 0.17, "learning_rate": 1.8937980576607913e-05, "loss": 1.072, "step": 791 }, { "epoch": 0.17, "learning_rate": 1.8934789297276787e-05, "loss": 1.0617, "step": 792 }, { "epoch": 0.17, "learning_rate": 1.8931593500087216e-05, "loss": 1.033, "step": 793 }, { "epoch": 0.17, "learning_rate": 1.892839318665515e-05, "loss": 1.0455, "step": 794 }, { "epoch": 0.17, "learning_rate": 1.8925188358598815e-05, "loss": 0.9564, "step": 795 }, { "epoch": 0.17, "learning_rate": 1.892197901753873e-05, "loss": 0.3033, "step": 796 }, { "epoch": 0.17, "learning_rate": 1.8918765165097696e-05, "loss": 0.3314, "step": 797 }, { "epoch": 0.18, "learning_rate": 1.8915546802900787e-05, "loss": 1.0233, "step": 798 }, { "epoch": 0.18, "learning_rate": 1.8912323932575365e-05, "loss": 1.0876, "step": 799 }, { "epoch": 0.18, "learning_rate": 1.890909655575106e-05, "loss": 1.0293, "step": 800 }, { "epoch": 0.18, "learning_rate": 1.89058646740598e-05, "loss": 1.0192, "step": 801 }, { "epoch": 0.18, "learning_rate": 1.8902628289135768e-05, "loss": 1.0861, "step": 802 }, { "epoch": 0.18, "learning_rate": 1.8899387402615446e-05, "loss": 0.3331, "step": 803 }, { "epoch": 0.18, "learning_rate": 1.8896142016137572e-05, "loss": 1.019, "step": 804 }, { "epoch": 0.18, "learning_rate": 1.8892892131343177e-05, "loss": 0.9624, "step": 805 }, { "epoch": 0.18, "learning_rate": 1.8889637749875555e-05, "loss": 1.0239, "step": 806 }, { "epoch": 0.18, "learning_rate": 1.888637887338028e-05, "loss": 1.0109, "step": 807 }, { "epoch": 0.18, "learning_rate": 1.888311550350519e-05, "loss": 1.0018, "step": 808 }, { "epoch": 0.18, "learning_rate": 1.887984764190041e-05, "loss": 1.077, "step": 809 }, { "epoch": 0.18, "learning_rate": 1.8876575290218323e-05, "loss": 0.915, "step": 810 }, { "epoch": 0.18, "learning_rate": 1.8873298450113585e-05, "loss": 0.9939, "step": 811 }, { "epoch": 0.18, "learning_rate": 1.8870017123243135e-05, "loss": 0.9995, "step": 812 }, { "epoch": 0.18, "learning_rate": 1.8866731311266155e-05, "loss": 1.0224, "step": 813 }, { "epoch": 0.18, "learning_rate": 1.8863441015844116e-05, "loss": 1.0028, "step": 814 }, { "epoch": 0.18, "learning_rate": 1.886014623864075e-05, "loss": 1.0307, "step": 815 }, { "epoch": 0.18, "learning_rate": 1.8856846981322053e-05, "loss": 1.0157, "step": 816 }, { "epoch": 0.18, "learning_rate": 1.885354324555629e-05, "loss": 1.0654, "step": 817 }, { "epoch": 0.18, "learning_rate": 1.8850235033013984e-05, "loss": 1.0291, "step": 818 }, { "epoch": 0.18, "learning_rate": 1.884692234536793e-05, "loss": 1.0296, "step": 819 }, { "epoch": 0.18, "learning_rate": 1.8843605184293177e-05, "loss": 1.0341, "step": 820 }, { "epoch": 0.18, "learning_rate": 1.884028355146705e-05, "loss": 1.0532, "step": 821 }, { "epoch": 0.18, "learning_rate": 1.883695744856912e-05, "loss": 0.9574, "step": 822 }, { "epoch": 0.18, "learning_rate": 1.8833626877281225e-05, "loss": 1.0695, "step": 823 }, { "epoch": 0.18, "learning_rate": 1.883029183928746e-05, "loss": 1.0577, "step": 824 }, { "epoch": 0.18, "learning_rate": 1.8826952336274184e-05, "loss": 1.0509, "step": 825 }, { "epoch": 0.18, "learning_rate": 1.8823608369930007e-05, "loss": 0.9877, "step": 826 }, { "epoch": 0.18, "learning_rate": 1.88202599419458e-05, "loss": 1.0019, "step": 827 }, { "epoch": 0.18, "learning_rate": 1.8816907054014686e-05, "loss": 0.9883, "step": 828 }, { "epoch": 0.18, "learning_rate": 1.8813549707832054e-05, "loss": 1.0143, "step": 829 }, { "epoch": 0.18, "learning_rate": 1.881018790509553e-05, "loss": 0.3187, "step": 830 }, { "epoch": 0.18, "learning_rate": 1.880682164750501e-05, "loss": 0.3289, "step": 831 }, { "epoch": 0.18, "learning_rate": 1.8803450936762634e-05, "loss": 1.03, "step": 832 }, { "epoch": 0.18, "learning_rate": 1.8800075774572794e-05, "loss": 1.017, "step": 833 }, { "epoch": 0.18, "learning_rate": 1.8796696162642135e-05, "loss": 1.0813, "step": 834 }, { "epoch": 0.18, "learning_rate": 1.8793312102679548e-05, "loss": 0.3061, "step": 835 }, { "epoch": 0.18, "learning_rate": 1.8789923596396177e-05, "loss": 1.0015, "step": 836 }, { "epoch": 0.18, "learning_rate": 1.8786530645505417e-05, "loss": 0.9926, "step": 837 }, { "epoch": 0.18, "learning_rate": 1.87831332517229e-05, "loss": 1.0437, "step": 838 }, { "epoch": 0.18, "learning_rate": 1.8779731416766523e-05, "loss": 0.9913, "step": 839 }, { "epoch": 0.18, "learning_rate": 1.8776325142356406e-05, "loss": 0.9902, "step": 840 }, { "epoch": 0.18, "learning_rate": 1.877291443021493e-05, "loss": 0.9769, "step": 841 }, { "epoch": 0.18, "learning_rate": 1.8769499282066716e-05, "loss": 1.0365, "step": 842 }, { "epoch": 0.19, "learning_rate": 1.8766079699638626e-05, "loss": 1.0042, "step": 843 }, { "epoch": 0.19, "learning_rate": 1.876265568465976e-05, "loss": 1.0324, "step": 844 }, { "epoch": 0.19, "learning_rate": 1.8759227238861467e-05, "loss": 0.9712, "step": 845 }, { "epoch": 0.19, "learning_rate": 1.8755794363977336e-05, "loss": 1.0011, "step": 846 }, { "epoch": 0.19, "learning_rate": 1.8752357061743195e-05, "loss": 0.3054, "step": 847 }, { "epoch": 0.19, "learning_rate": 1.8748915333897105e-05, "loss": 1.007, "step": 848 }, { "epoch": 0.19, "learning_rate": 1.874546918217937e-05, "loss": 0.2997, "step": 849 }, { "epoch": 0.19, "learning_rate": 1.874201860833253e-05, "loss": 0.9926, "step": 850 }, { "epoch": 0.19, "learning_rate": 1.8738563614101363e-05, "loss": 0.2912, "step": 851 }, { "epoch": 0.19, "learning_rate": 1.8735104201232874e-05, "loss": 1.0386, "step": 852 }, { "epoch": 0.19, "learning_rate": 1.8731640371476312e-05, "loss": 1.0602, "step": 853 }, { "epoch": 0.19, "learning_rate": 1.872817212658316e-05, "loss": 0.3005, "step": 854 }, { "epoch": 0.19, "learning_rate": 1.8724699468307123e-05, "loss": 1.0724, "step": 855 }, { "epoch": 0.19, "learning_rate": 1.8721222398404145e-05, "loss": 1.0076, "step": 856 }, { "epoch": 0.19, "learning_rate": 1.8717740918632396e-05, "loss": 0.9687, "step": 857 }, { "epoch": 0.19, "learning_rate": 1.871425503075229e-05, "loss": 1.0366, "step": 858 }, { "epoch": 0.19, "learning_rate": 1.871076473652645e-05, "loss": 1.0043, "step": 859 }, { "epoch": 0.19, "learning_rate": 1.8707270037719737e-05, "loss": 0.9917, "step": 860 }, { "epoch": 0.19, "learning_rate": 1.870377093609924e-05, "loss": 0.9962, "step": 861 }, { "epoch": 0.19, "learning_rate": 1.8700267433434275e-05, "loss": 0.9823, "step": 862 }, { "epoch": 0.19, "learning_rate": 1.869675953149638e-05, "loss": 1.0079, "step": 863 }, { "epoch": 0.19, "learning_rate": 1.869324723205932e-05, "loss": 1.0511, "step": 864 }, { "epoch": 0.19, "learning_rate": 1.8689730536899078e-05, "loss": 1.0418, "step": 865 }, { "epoch": 0.19, "learning_rate": 1.8686209447793863e-05, "loss": 1.011, "step": 866 }, { "epoch": 0.19, "learning_rate": 1.8682683966524112e-05, "loss": 0.9624, "step": 867 }, { "epoch": 0.19, "learning_rate": 1.8679154094872477e-05, "loss": 1.045, "step": 868 }, { "epoch": 0.19, "learning_rate": 1.867561983462383e-05, "loss": 1.081, "step": 869 }, { "epoch": 0.19, "learning_rate": 1.867208118756526e-05, "loss": 0.9708, "step": 870 }, { "epoch": 0.19, "learning_rate": 1.8668538155486077e-05, "loss": 1.0052, "step": 871 }, { "epoch": 0.19, "learning_rate": 1.866499074017781e-05, "loss": 1.0219, "step": 872 }, { "epoch": 0.19, "learning_rate": 1.8661438943434205e-05, "loss": 0.9611, "step": 873 }, { "epoch": 0.19, "learning_rate": 1.8657882767051218e-05, "loss": 1.0275, "step": 874 }, { "epoch": 0.19, "learning_rate": 1.8654322212827022e-05, "loss": 1.0233, "step": 875 }, { "epoch": 0.19, "learning_rate": 1.8650757282562006e-05, "loss": 1.0408, "step": 876 }, { "epoch": 0.19, "learning_rate": 1.8647187978058764e-05, "loss": 1.0466, "step": 877 }, { "epoch": 0.19, "learning_rate": 1.8643614301122114e-05, "loss": 1.0284, "step": 878 }, { "epoch": 0.19, "learning_rate": 1.864003625355908e-05, "loss": 0.9763, "step": 879 }, { "epoch": 0.19, "learning_rate": 1.8636453837178893e-05, "loss": 0.9775, "step": 880 }, { "epoch": 0.19, "learning_rate": 1.8632867053792988e-05, "loss": 0.9832, "step": 881 }, { "epoch": 0.19, "learning_rate": 1.8629275905215023e-05, "loss": 0.9883, "step": 882 }, { "epoch": 0.19, "learning_rate": 1.8625680393260852e-05, "loss": 1.0485, "step": 883 }, { "epoch": 0.19, "learning_rate": 1.862208051974854e-05, "loss": 1.0216, "step": 884 }, { "epoch": 0.19, "learning_rate": 1.8618476286498356e-05, "loss": 0.9971, "step": 885 }, { "epoch": 0.19, "learning_rate": 1.861486769533277e-05, "loss": 1.0259, "step": 886 }, { "epoch": 0.19, "learning_rate": 1.8611254748076463e-05, "loss": 1.0142, "step": 887 }, { "epoch": 0.19, "learning_rate": 1.860763744655631e-05, "loss": 0.982, "step": 888 }, { "epoch": 0.2, "learning_rate": 1.8604015792601395e-05, "loss": 1.0093, "step": 889 }, { "epoch": 0.2, "learning_rate": 1.8600389788043003e-05, "loss": 0.9952, "step": 890 }, { "epoch": 0.2, "learning_rate": 1.859675943471461e-05, "loss": 0.9672, "step": 891 }, { "epoch": 0.2, "learning_rate": 1.8593124734451903e-05, "loss": 0.9769, "step": 892 }, { "epoch": 0.2, "learning_rate": 1.8589485689092756e-05, "loss": 1.0278, "step": 893 }, { "epoch": 0.2, "learning_rate": 1.858584230047725e-05, "loss": 1.026, "step": 894 }, { "epoch": 0.2, "learning_rate": 1.8582194570447654e-05, "loss": 0.969, "step": 895 }, { "epoch": 0.2, "learning_rate": 1.8578542500848434e-05, "loss": 1.0291, "step": 896 }, { "epoch": 0.2, "learning_rate": 1.8574886093526253e-05, "loss": 1.0235, "step": 897 }, { "epoch": 0.2, "learning_rate": 1.8571225350329965e-05, "loss": 0.951, "step": 898 }, { "epoch": 0.2, "learning_rate": 1.8567560273110624e-05, "loss": 1.0033, "step": 899 }, { "epoch": 0.2, "learning_rate": 1.856389086372146e-05, "loss": 1.0021, "step": 900 }, { "epoch": 0.2, "learning_rate": 1.856021712401791e-05, "loss": 0.9782, "step": 901 }, { "epoch": 0.2, "learning_rate": 1.8556539055857592e-05, "loss": 1.0146, "step": 902 }, { "epoch": 0.2, "learning_rate": 1.8552856661100307e-05, "loss": 0.9671, "step": 903 }, { "epoch": 0.2, "learning_rate": 1.8549169941608056e-05, "loss": 1.0278, "step": 904 }, { "epoch": 0.2, "learning_rate": 1.854547889924502e-05, "loss": 1.0223, "step": 905 }, { "epoch": 0.2, "learning_rate": 1.8541783535877572e-05, "loss": 1.0151, "step": 906 }, { "epoch": 0.2, "learning_rate": 1.8538083853374257e-05, "loss": 0.9531, "step": 907 }, { "epoch": 0.2, "learning_rate": 1.8534379853605817e-05, "loss": 0.9963, "step": 908 }, { "epoch": 0.2, "learning_rate": 1.8530671538445176e-05, "loss": 0.9945, "step": 909 }, { "epoch": 0.2, "learning_rate": 1.8526958909767425e-05, "loss": 1.0038, "step": 910 }, { "epoch": 0.2, "learning_rate": 1.8523241969449857e-05, "loss": 0.9765, "step": 911 }, { "epoch": 0.2, "learning_rate": 1.8519520719371932e-05, "loss": 0.9398, "step": 912 }, { "epoch": 0.2, "learning_rate": 1.8515795161415294e-05, "loss": 1.0398, "step": 913 }, { "epoch": 0.2, "learning_rate": 1.851206529746376e-05, "loss": 1.0039, "step": 914 }, { "epoch": 0.2, "learning_rate": 1.8508331129403333e-05, "loss": 1.0055, "step": 915 }, { "epoch": 0.2, "learning_rate": 1.8504592659122185e-05, "loss": 1.0378, "step": 916 }, { "epoch": 0.2, "learning_rate": 1.8500849888510668e-05, "loss": 0.9935, "step": 917 }, { "epoch": 0.2, "learning_rate": 1.8497102819461305e-05, "loss": 1.0665, "step": 918 }, { "epoch": 0.2, "learning_rate": 1.8493351453868796e-05, "loss": 0.9998, "step": 919 }, { "epoch": 0.2, "learning_rate": 1.848959579363001e-05, "loss": 1.009, "step": 920 }, { "epoch": 0.2, "learning_rate": 1.8485835840643985e-05, "loss": 0.3059, "step": 921 }, { "epoch": 0.2, "learning_rate": 1.848207159681194e-05, "loss": 0.999, "step": 922 }, { "epoch": 0.2, "learning_rate": 1.847830306403726e-05, "loss": 1.0101, "step": 923 }, { "epoch": 0.2, "learning_rate": 1.8474530244225487e-05, "loss": 1.0248, "step": 924 }, { "epoch": 0.2, "learning_rate": 1.8470753139284344e-05, "loss": 0.9856, "step": 925 }, { "epoch": 0.2, "learning_rate": 1.846697175112372e-05, "loss": 0.9987, "step": 926 }, { "epoch": 0.2, "learning_rate": 1.846318608165566e-05, "loss": 1.039, "step": 927 }, { "epoch": 0.2, "learning_rate": 1.8459396132794387e-05, "loss": 1.0017, "step": 928 }, { "epoch": 0.2, "learning_rate": 1.8455601906456275e-05, "loss": 1.0463, "step": 929 }, { "epoch": 0.2, "learning_rate": 1.8451803404559873e-05, "loss": 0.2832, "step": 930 }, { "epoch": 0.2, "learning_rate": 1.8448000629025878e-05, "loss": 0.9652, "step": 931 }, { "epoch": 0.2, "learning_rate": 1.8444193581777163e-05, "loss": 0.9528, "step": 932 }, { "epoch": 0.2, "learning_rate": 1.8440382264738753e-05, "loss": 0.9961, "step": 933 }, { "epoch": 0.21, "learning_rate": 1.8436566679837825e-05, "loss": 0.9491, "step": 934 }, { "epoch": 0.21, "learning_rate": 1.8432746829003732e-05, "loss": 1.0328, "step": 935 }, { "epoch": 0.21, "learning_rate": 1.842892271416797e-05, "loss": 1.0087, "step": 936 }, { "epoch": 0.21, "learning_rate": 1.84250943372642e-05, "loss": 0.996, "step": 937 }, { "epoch": 0.21, "learning_rate": 1.8421261700228222e-05, "loss": 0.9852, "step": 938 }, { "epoch": 0.21, "learning_rate": 1.841742480499801e-05, "loss": 1.0588, "step": 939 }, { "epoch": 0.21, "learning_rate": 1.841358365351368e-05, "loss": 0.9784, "step": 940 }, { "epoch": 0.21, "learning_rate": 1.84097382477175e-05, "loss": 1.0337, "step": 941 }, { "epoch": 0.21, "learning_rate": 1.84058885895539e-05, "loss": 0.2974, "step": 942 }, { "epoch": 0.21, "learning_rate": 1.840203468096944e-05, "loss": 0.9422, "step": 943 }, { "epoch": 0.21, "learning_rate": 1.8398176523912857e-05, "loss": 0.9621, "step": 944 }, { "epoch": 0.21, "learning_rate": 1.8394314120335002e-05, "loss": 1.0414, "step": 945 }, { "epoch": 0.21, "learning_rate": 1.8390447472188906e-05, "loss": 0.9578, "step": 946 }, { "epoch": 0.21, "learning_rate": 1.8386576581429726e-05, "loss": 1.0355, "step": 947 }, { "epoch": 0.21, "learning_rate": 1.838270145001477e-05, "loss": 0.9975, "step": 948 }, { "epoch": 0.21, "learning_rate": 1.8378822079903492e-05, "loss": 0.9904, "step": 949 }, { "epoch": 0.21, "learning_rate": 1.8374938473057486e-05, "loss": 1.081, "step": 950 }, { "epoch": 0.21, "learning_rate": 1.8371050631440495e-05, "loss": 0.9578, "step": 951 }, { "epoch": 0.21, "learning_rate": 1.836715855701839e-05, "loss": 1.036, "step": 952 }, { "epoch": 0.21, "learning_rate": 1.83632622517592e-05, "loss": 0.9622, "step": 953 }, { "epoch": 0.21, "learning_rate": 1.8359361717633075e-05, "loss": 1.0181, "step": 954 }, { "epoch": 0.21, "learning_rate": 1.835545695661232e-05, "loss": 1.0287, "step": 955 }, { "epoch": 0.21, "learning_rate": 1.8351547970671366e-05, "loss": 1.0373, "step": 956 }, { "epoch": 0.21, "learning_rate": 1.8347634761786786e-05, "loss": 0.8784, "step": 957 }, { "epoch": 0.21, "learning_rate": 1.8343717331937282e-05, "loss": 0.9649, "step": 958 }, { "epoch": 0.21, "learning_rate": 1.83397956831037e-05, "loss": 1.0334, "step": 959 }, { "epoch": 0.21, "learning_rate": 1.8335869817269006e-05, "loss": 0.946, "step": 960 }, { "epoch": 0.21, "learning_rate": 1.8331939736418315e-05, "loss": 1.0504, "step": 961 }, { "epoch": 0.21, "learning_rate": 1.8328005442538863e-05, "loss": 1.0317, "step": 962 }, { "epoch": 0.21, "learning_rate": 1.8324066937620014e-05, "loss": 1.0287, "step": 963 }, { "epoch": 0.21, "learning_rate": 1.8320124223653266e-05, "loss": 1.037, "step": 964 }, { "epoch": 0.21, "learning_rate": 1.8316177302632248e-05, "loss": 1.0158, "step": 965 }, { "epoch": 0.21, "learning_rate": 1.831222617655271e-05, "loss": 0.995, "step": 966 }, { "epoch": 0.21, "learning_rate": 1.830827084741253e-05, "loss": 1.0351, "step": 967 }, { "epoch": 0.21, "learning_rate": 1.8304311317211715e-05, "loss": 0.9861, "step": 968 }, { "epoch": 0.21, "learning_rate": 1.8300347587952393e-05, "loss": 0.9832, "step": 969 }, { "epoch": 0.21, "learning_rate": 1.829637966163881e-05, "loss": 1.0096, "step": 970 }, { "epoch": 0.21, "learning_rate": 1.829240754027735e-05, "loss": 1.019, "step": 971 }, { "epoch": 0.21, "learning_rate": 1.8288431225876505e-05, "loss": 0.9725, "step": 972 }, { "epoch": 0.21, "learning_rate": 1.8284450720446886e-05, "loss": 0.288, "step": 973 }, { "epoch": 0.21, "learning_rate": 1.828046602600123e-05, "loss": 0.9325, "step": 974 }, { "epoch": 0.21, "learning_rate": 1.8276477144554393e-05, "loss": 1.004, "step": 975 }, { "epoch": 0.21, "learning_rate": 1.827248407812334e-05, "loss": 1.0201, "step": 976 }, { "epoch": 0.21, "learning_rate": 1.826848682872716e-05, "loss": 0.995, "step": 977 }, { "epoch": 0.21, "learning_rate": 1.8264485398387056e-05, "loss": 0.9887, "step": 978 }, { "epoch": 0.21, "learning_rate": 1.8260479789126338e-05, "loss": 1.035, "step": 979 }, { "epoch": 0.22, "learning_rate": 1.8256470002970438e-05, "loss": 0.9739, "step": 980 }, { "epoch": 0.22, "learning_rate": 1.82524560419469e-05, "loss": 0.9429, "step": 981 }, { "epoch": 0.22, "learning_rate": 1.8248437908085365e-05, "loss": 1.0042, "step": 982 }, { "epoch": 0.22, "learning_rate": 1.8244415603417603e-05, "loss": 0.9774, "step": 983 }, { "epoch": 0.22, "learning_rate": 1.8240389129977486e-05, "loss": 1.0278, "step": 984 }, { "epoch": 0.22, "learning_rate": 1.823635848980098e-05, "loss": 1.0112, "step": 985 }, { "epoch": 0.22, "learning_rate": 1.8232323684926184e-05, "loss": 1.0465, "step": 986 }, { "epoch": 0.22, "learning_rate": 1.8228284717393287e-05, "loss": 0.8921, "step": 987 }, { "epoch": 0.22, "learning_rate": 1.8224241589244575e-05, "loss": 0.9604, "step": 988 }, { "epoch": 0.22, "learning_rate": 1.8220194302524454e-05, "loss": 0.9698, "step": 989 }, { "epoch": 0.22, "learning_rate": 1.8216142859279432e-05, "loss": 1.0183, "step": 990 }, { "epoch": 0.22, "learning_rate": 1.82120872615581e-05, "loss": 1.0436, "step": 991 }, { "epoch": 0.22, "learning_rate": 1.8208027511411178e-05, "loss": 0.9518, "step": 992 }, { "epoch": 0.22, "learning_rate": 1.820396361089146e-05, "loss": 1.0485, "step": 993 }, { "epoch": 0.22, "learning_rate": 1.819989556205385e-05, "loss": 0.9502, "step": 994 }, { "epoch": 0.22, "learning_rate": 1.8195823366955356e-05, "loss": 1.0247, "step": 995 }, { "epoch": 0.22, "learning_rate": 1.819174702765507e-05, "loss": 0.9573, "step": 996 }, { "epoch": 0.22, "learning_rate": 1.8187666546214182e-05, "loss": 0.9797, "step": 997 }, { "epoch": 0.22, "learning_rate": 1.8183581924695987e-05, "loss": 1.0689, "step": 998 }, { "epoch": 0.22, "learning_rate": 1.817949316516586e-05, "loss": 0.9923, "step": 999 }, { "epoch": 0.22, "learning_rate": 1.8175400269691278e-05, "loss": 0.9822, "step": 1000 }, { "epoch": 0.22, "learning_rate": 1.81713032403418e-05, "loss": 1.0166, "step": 1001 }, { "epoch": 0.22, "learning_rate": 1.8167202079189086e-05, "loss": 0.9994, "step": 1002 }, { "epoch": 0.22, "learning_rate": 1.8163096788306877e-05, "loss": 1.0422, "step": 1003 }, { "epoch": 0.22, "learning_rate": 1.8158987369771008e-05, "loss": 0.9833, "step": 1004 }, { "epoch": 0.22, "learning_rate": 1.8154873825659393e-05, "loss": 1.0002, "step": 1005 }, { "epoch": 0.22, "learning_rate": 1.815075615805204e-05, "loss": 1.0141, "step": 1006 }, { "epoch": 0.22, "learning_rate": 1.8146634369031045e-05, "loss": 0.9383, "step": 1007 }, { "epoch": 0.22, "learning_rate": 1.8142508460680576e-05, "loss": 1.0479, "step": 1008 }, { "epoch": 0.22, "learning_rate": 1.8138378435086888e-05, "loss": 0.3134, "step": 1009 }, { "epoch": 0.22, "learning_rate": 1.813424429433833e-05, "loss": 0.9652, "step": 1010 }, { "epoch": 0.22, "learning_rate": 1.8130106040525316e-05, "loss": 1.0357, "step": 1011 }, { "epoch": 0.22, "learning_rate": 1.8125963675740342e-05, "loss": 1.0048, "step": 1012 }, { "epoch": 0.22, "learning_rate": 1.8121817202077995e-05, "loss": 0.9945, "step": 1013 }, { "epoch": 0.22, "learning_rate": 1.8117666621634923e-05, "loss": 1.031, "step": 1014 }, { "epoch": 0.22, "learning_rate": 1.8113511936509864e-05, "loss": 0.9851, "step": 1015 }, { "epoch": 0.22, "learning_rate": 1.8109353148803626e-05, "loss": 0.9928, "step": 1016 }, { "epoch": 0.22, "learning_rate": 1.810519026061909e-05, "loss": 0.9684, "step": 1017 }, { "epoch": 0.22, "learning_rate": 1.810102327406121e-05, "loss": 1.0037, "step": 1018 }, { "epoch": 0.22, "learning_rate": 1.8096852191237022e-05, "loss": 1.0095, "step": 1019 }, { "epoch": 0.22, "learning_rate": 1.809267701425562e-05, "loss": 1.0019, "step": 1020 }, { "epoch": 0.22, "learning_rate": 1.8088497745228172e-05, "loss": 1.0036, "step": 1021 }, { "epoch": 0.22, "learning_rate": 1.8084314386267923e-05, "loss": 0.9822, "step": 1022 }, { "epoch": 0.22, "learning_rate": 1.8080126939490176e-05, "loss": 1.0293, "step": 1023 }, { "epoch": 0.22, "learning_rate": 1.8075935407012304e-05, "loss": 1.0098, "step": 1024 }, { "epoch": 0.23, "learning_rate": 1.8071739790953754e-05, "loss": 1.0195, "step": 1025 }, { "epoch": 0.23, "learning_rate": 1.8067540093436024e-05, "loss": 1.0176, "step": 1026 }, { "epoch": 0.23, "learning_rate": 1.8063336316582683e-05, "loss": 0.9795, "step": 1027 }, { "epoch": 0.23, "learning_rate": 1.8059128462519364e-05, "loss": 0.9483, "step": 1028 }, { "epoch": 0.23, "learning_rate": 1.8054916533373763e-05, "loss": 0.9925, "step": 1029 }, { "epoch": 0.23, "learning_rate": 1.8050700531275632e-05, "loss": 0.9871, "step": 1030 }, { "epoch": 0.23, "learning_rate": 1.8046480458356778e-05, "loss": 0.988, "step": 1031 }, { "epoch": 0.23, "learning_rate": 1.8042256316751082e-05, "loss": 0.9679, "step": 1032 }, { "epoch": 0.23, "learning_rate": 1.8038028108594468e-05, "loss": 0.9523, "step": 1033 }, { "epoch": 0.23, "learning_rate": 1.803379583602492e-05, "loss": 1.0146, "step": 1034 }, { "epoch": 0.23, "learning_rate": 1.8029559501182482e-05, "loss": 0.9571, "step": 1035 }, { "epoch": 0.23, "learning_rate": 1.802531910620924e-05, "loss": 0.9931, "step": 1036 }, { "epoch": 0.23, "learning_rate": 1.8021074653249354e-05, "loss": 0.9554, "step": 1037 }, { "epoch": 0.23, "learning_rate": 1.8016826144449012e-05, "loss": 1.0284, "step": 1038 }, { "epoch": 0.23, "learning_rate": 1.801257358195647e-05, "loss": 0.9559, "step": 1039 }, { "epoch": 0.23, "learning_rate": 1.8008316967922027e-05, "loss": 1.018, "step": 1040 }, { "epoch": 0.23, "learning_rate": 1.8004056304498027e-05, "loss": 0.9903, "step": 1041 }, { "epoch": 0.23, "learning_rate": 1.7999791593838874e-05, "loss": 1.0402, "step": 1042 }, { "epoch": 0.23, "learning_rate": 1.7995522838101003e-05, "loss": 1.0859, "step": 1043 }, { "epoch": 0.23, "learning_rate": 1.799125003944291e-05, "loss": 0.9198, "step": 1044 }, { "epoch": 0.23, "learning_rate": 1.7986973200025115e-05, "loss": 0.9353, "step": 1045 }, { "epoch": 0.23, "learning_rate": 1.7982692322010205e-05, "loss": 0.9485, "step": 1046 }, { "epoch": 0.23, "learning_rate": 1.797840740756279e-05, "loss": 0.9994, "step": 1047 }, { "epoch": 0.23, "learning_rate": 1.7974118458849532e-05, "loss": 1.006, "step": 1048 }, { "epoch": 0.23, "learning_rate": 1.796982547803913e-05, "loss": 0.9465, "step": 1049 }, { "epoch": 0.23, "learning_rate": 1.796552846730232e-05, "loss": 1.0369, "step": 1050 }, { "epoch": 0.23, "learning_rate": 1.7961227428811877e-05, "loss": 0.9878, "step": 1051 }, { "epoch": 0.23, "learning_rate": 1.795692236474261e-05, "loss": 0.975, "step": 1052 }, { "epoch": 0.23, "learning_rate": 1.795261327727137e-05, "loss": 0.9919, "step": 1053 }, { "epoch": 0.23, "learning_rate": 1.7948300168577038e-05, "loss": 0.3126, "step": 1054 }, { "epoch": 0.23, "learning_rate": 1.7943983040840527e-05, "loss": 0.9893, "step": 1055 }, { "epoch": 0.23, "learning_rate": 1.7939661896244786e-05, "loss": 0.9833, "step": 1056 }, { "epoch": 0.23, "learning_rate": 1.793533673697479e-05, "loss": 1.0217, "step": 1057 }, { "epoch": 0.23, "learning_rate": 1.793100756521755e-05, "loss": 1.0416, "step": 1058 }, { "epoch": 0.23, "learning_rate": 1.7926674383162104e-05, "loss": 0.9584, "step": 1059 }, { "epoch": 0.23, "learning_rate": 1.7922337192999514e-05, "loss": 0.9928, "step": 1060 }, { "epoch": 0.23, "learning_rate": 1.791799599692287e-05, "loss": 1.0564, "step": 1061 }, { "epoch": 0.23, "learning_rate": 1.7913650797127298e-05, "loss": 0.9758, "step": 1062 }, { "epoch": 0.23, "learning_rate": 1.790930159580993e-05, "loss": 0.9573, "step": 1063 }, { "epoch": 0.23, "learning_rate": 1.7904948395169933e-05, "loss": 0.9643, "step": 1064 }, { "epoch": 0.23, "learning_rate": 1.79005911974085e-05, "loss": 0.99, "step": 1065 }, { "epoch": 0.23, "learning_rate": 1.7896230004728834e-05, "loss": 0.9943, "step": 1066 }, { "epoch": 0.23, "learning_rate": 1.789186481933616e-05, "loss": 1.0438, "step": 1067 }, { "epoch": 0.23, "learning_rate": 1.7887495643437737e-05, "loss": 0.9895, "step": 1068 }, { "epoch": 0.23, "learning_rate": 1.788312247924282e-05, "loss": 0.9778, "step": 1069 }, { "epoch": 0.23, "learning_rate": 1.7878745328962696e-05, "loss": 1.0369, "step": 1070 }, { "epoch": 0.24, "learning_rate": 1.7874364194810657e-05, "loss": 1.0303, "step": 1071 }, { "epoch": 0.24, "learning_rate": 1.7869979079002017e-05, "loss": 1.0598, "step": 1072 }, { "epoch": 0.24, "learning_rate": 1.7865589983754104e-05, "loss": 0.9463, "step": 1073 }, { "epoch": 0.24, "learning_rate": 1.7861196911286254e-05, "loss": 0.9757, "step": 1074 }, { "epoch": 0.24, "learning_rate": 1.7856799863819814e-05, "loss": 1.0109, "step": 1075 }, { "epoch": 0.24, "learning_rate": 1.7852398843578144e-05, "loss": 0.9544, "step": 1076 }, { "epoch": 0.24, "learning_rate": 1.7847993852786612e-05, "loss": 1.0642, "step": 1077 }, { "epoch": 0.24, "learning_rate": 1.784358489367259e-05, "loss": 0.9978, "step": 1078 }, { "epoch": 0.24, "learning_rate": 1.7839171968465455e-05, "loss": 0.9606, "step": 1079 }, { "epoch": 0.24, "learning_rate": 1.7834755079396604e-05, "loss": 1.0166, "step": 1080 }, { "epoch": 0.24, "learning_rate": 1.7830334228699425e-05, "loss": 0.9961, "step": 1081 }, { "epoch": 0.24, "learning_rate": 1.7825909418609306e-05, "loss": 0.9696, "step": 1082 }, { "epoch": 0.24, "learning_rate": 1.782148065136365e-05, "loss": 1.0012, "step": 1083 }, { "epoch": 0.24, "learning_rate": 1.7817047929201856e-05, "loss": 0.317, "step": 1084 }, { "epoch": 0.24, "learning_rate": 1.781261125436532e-05, "loss": 0.9777, "step": 1085 }, { "epoch": 0.24, "learning_rate": 1.780817062909743e-05, "loss": 0.9902, "step": 1086 }, { "epoch": 0.24, "learning_rate": 1.780372605564358e-05, "loss": 1.0331, "step": 1087 }, { "epoch": 0.24, "learning_rate": 1.7799277536251173e-05, "loss": 0.9871, "step": 1088 }, { "epoch": 0.24, "learning_rate": 1.7794825073169585e-05, "loss": 0.2959, "step": 1089 }, { "epoch": 0.24, "learning_rate": 1.779036866865019e-05, "loss": 0.3104, "step": 1090 }, { "epoch": 0.24, "learning_rate": 1.7785908324946373e-05, "loss": 1.0777, "step": 1091 }, { "epoch": 0.24, "learning_rate": 1.7781444044313487e-05, "loss": 0.9837, "step": 1092 }, { "epoch": 0.24, "learning_rate": 1.777697582900889e-05, "loss": 1.0129, "step": 1093 }, { "epoch": 0.24, "learning_rate": 1.7772503681291925e-05, "loss": 0.9911, "step": 1094 }, { "epoch": 0.24, "learning_rate": 1.776802760342393e-05, "loss": 1.0119, "step": 1095 }, { "epoch": 0.24, "learning_rate": 1.7763547597668222e-05, "loss": 1.0273, "step": 1096 }, { "epoch": 0.24, "learning_rate": 1.7759063666290102e-05, "loss": 1.0014, "step": 1097 }, { "epoch": 0.24, "learning_rate": 1.7754575811556866e-05, "loss": 0.8646, "step": 1098 }, { "epoch": 0.24, "learning_rate": 1.7750084035737788e-05, "loss": 0.34, "step": 1099 }, { "epoch": 0.24, "learning_rate": 1.7745588341104127e-05, "loss": 0.2907, "step": 1100 }, { "epoch": 0.24, "learning_rate": 1.774108872992912e-05, "loss": 0.9577, "step": 1101 }, { "epoch": 0.24, "learning_rate": 1.7736585204487985e-05, "loss": 0.9998, "step": 1102 }, { "epoch": 0.24, "learning_rate": 1.7732077767057926e-05, "loss": 0.9828, "step": 1103 }, { "epoch": 0.24, "learning_rate": 1.772756641991811e-05, "loss": 0.9956, "step": 1104 }, { "epoch": 0.24, "learning_rate": 1.77230511653497e-05, "loss": 1.0056, "step": 1105 }, { "epoch": 0.24, "learning_rate": 1.7718532005635826e-05, "loss": 1.0469, "step": 1106 }, { "epoch": 0.24, "learning_rate": 1.7714008943061586e-05, "loss": 0.3173, "step": 1107 }, { "epoch": 0.24, "learning_rate": 1.7709481979914058e-05, "loss": 0.9844, "step": 1108 }, { "epoch": 0.24, "learning_rate": 1.7704951118482298e-05, "loss": 0.9966, "step": 1109 }, { "epoch": 0.24, "learning_rate": 1.7700416361057322e-05, "loss": 0.9935, "step": 1110 }, { "epoch": 0.24, "learning_rate": 1.769587770993212e-05, "loss": 1.0073, "step": 1111 }, { "epoch": 0.24, "learning_rate": 1.769133516740166e-05, "loss": 0.9873, "step": 1112 }, { "epoch": 0.24, "learning_rate": 1.7686788735762856e-05, "loss": 0.9705, "step": 1113 }, { "epoch": 0.24, "learning_rate": 1.7682238417314616e-05, "loss": 1.065, "step": 1114 }, { "epoch": 0.24, "learning_rate": 1.7677684214357793e-05, "loss": 0.9732, "step": 1115 }, { "epoch": 0.24, "learning_rate": 1.7673126129195208e-05, "loss": 0.9896, "step": 1116 }, { "epoch": 0.25, "learning_rate": 1.766856416413165e-05, "loss": 1.0034, "step": 1117 }, { "epoch": 0.25, "learning_rate": 1.7663998321473873e-05, "loss": 1.0475, "step": 1118 }, { "epoch": 0.25, "learning_rate": 1.765942860353058e-05, "loss": 0.9648, "step": 1119 }, { "epoch": 0.25, "learning_rate": 1.7654855012612442e-05, "loss": 1.0092, "step": 1120 }, { "epoch": 0.25, "learning_rate": 1.7650277551032083e-05, "loss": 1.0253, "step": 1121 }, { "epoch": 0.25, "learning_rate": 1.764569622110409e-05, "loss": 0.9641, "step": 1122 }, { "epoch": 0.25, "learning_rate": 1.7641111025145006e-05, "loss": 1.016, "step": 1123 }, { "epoch": 0.25, "learning_rate": 1.7636521965473324e-05, "loss": 1.0305, "step": 1124 }, { "epoch": 0.25, "learning_rate": 1.763192904440949e-05, "loss": 1.0552, "step": 1125 }, { "epoch": 0.25, "learning_rate": 1.7627332264275907e-05, "loss": 0.984, "step": 1126 }, { "epoch": 0.25, "learning_rate": 1.762273162739693e-05, "loss": 0.9987, "step": 1127 }, { "epoch": 0.25, "learning_rate": 1.761812713609886e-05, "loss": 0.951, "step": 1128 }, { "epoch": 0.25, "learning_rate": 1.761351879270995e-05, "loss": 0.9634, "step": 1129 }, { "epoch": 0.25, "learning_rate": 1.76089065995604e-05, "loss": 0.9743, "step": 1130 }, { "epoch": 0.25, "learning_rate": 1.7604290558982354e-05, "loss": 0.9855, "step": 1131 }, { "epoch": 0.25, "learning_rate": 1.7599670673309903e-05, "loss": 0.9809, "step": 1132 }, { "epoch": 0.25, "learning_rate": 1.759504694487909e-05, "loss": 0.9685, "step": 1133 }, { "epoch": 0.25, "learning_rate": 1.7590419376027882e-05, "loss": 1.0141, "step": 1134 }, { "epoch": 0.25, "learning_rate": 1.758578796909621e-05, "loss": 0.9081, "step": 1135 }, { "epoch": 0.25, "learning_rate": 1.7581152726425937e-05, "loss": 1.0278, "step": 1136 }, { "epoch": 0.25, "learning_rate": 1.7576513650360854e-05, "loss": 0.9542, "step": 1137 }, { "epoch": 0.25, "learning_rate": 1.757187074324671e-05, "loss": 0.981, "step": 1138 }, { "epoch": 0.25, "learning_rate": 1.756722400743118e-05, "loss": 0.9769, "step": 1139 }, { "epoch": 0.25, "learning_rate": 1.756257344526387e-05, "loss": 0.9991, "step": 1140 }, { "epoch": 0.25, "learning_rate": 1.755791905909634e-05, "loss": 0.9742, "step": 1141 }, { "epoch": 0.25, "learning_rate": 1.7553260851282058e-05, "loss": 1.0357, "step": 1142 }, { "epoch": 0.25, "learning_rate": 1.7548598824176445e-05, "loss": 1.0123, "step": 1143 }, { "epoch": 0.25, "learning_rate": 1.7543932980136844e-05, "loss": 0.965, "step": 1144 }, { "epoch": 0.25, "learning_rate": 1.7539263321522528e-05, "loss": 0.9754, "step": 1145 }, { "epoch": 0.25, "learning_rate": 1.7534589850694704e-05, "loss": 1.0194, "step": 1146 }, { "epoch": 0.25, "learning_rate": 1.7529912570016503e-05, "loss": 0.9444, "step": 1147 }, { "epoch": 0.25, "learning_rate": 1.752523148185298e-05, "loss": 1.0306, "step": 1148 }, { "epoch": 0.25, "learning_rate": 1.7520546588571118e-05, "loss": 1.0456, "step": 1149 }, { "epoch": 0.25, "learning_rate": 1.7515857892539828e-05, "loss": 0.3309, "step": 1150 }, { "epoch": 0.25, "learning_rate": 1.7511165396129934e-05, "loss": 0.9697, "step": 1151 }, { "epoch": 0.25, "learning_rate": 1.750646910171419e-05, "loss": 1.0097, "step": 1152 }, { "epoch": 0.25, "learning_rate": 1.7501769011667273e-05, "loss": 1.0094, "step": 1153 }, { "epoch": 0.25, "learning_rate": 1.7497065128365766e-05, "loss": 1.0111, "step": 1154 }, { "epoch": 0.25, "learning_rate": 1.749235745418818e-05, "loss": 0.891, "step": 1155 }, { "epoch": 0.25, "learning_rate": 1.7487645991514947e-05, "loss": 1.0346, "step": 1156 }, { "epoch": 0.25, "learning_rate": 1.7482930742728402e-05, "loss": 0.9738, "step": 1157 }, { "epoch": 0.25, "learning_rate": 1.7478211710212808e-05, "loss": 1.0506, "step": 1158 }, { "epoch": 0.25, "learning_rate": 1.7473488896354326e-05, "loss": 0.9716, "step": 1159 }, { "epoch": 0.25, "learning_rate": 1.7468762303541044e-05, "loss": 0.3028, "step": 1160 }, { "epoch": 0.25, "learning_rate": 1.746403193416295e-05, "loss": 0.9927, "step": 1161 }, { "epoch": 0.26, "learning_rate": 1.745929779061195e-05, "loss": 0.9837, "step": 1162 }, { "epoch": 0.26, "learning_rate": 1.745455987528185e-05, "loss": 1.0208, "step": 1163 }, { "epoch": 0.26, "learning_rate": 1.7449818190568372e-05, "loss": 1.0048, "step": 1164 }, { "epoch": 0.26, "learning_rate": 1.7445072738869134e-05, "loss": 1.0015, "step": 1165 }, { "epoch": 0.26, "learning_rate": 1.7440323522583668e-05, "loss": 1.0239, "step": 1166 }, { "epoch": 0.26, "learning_rate": 1.7435570544113404e-05, "loss": 0.9153, "step": 1167 }, { "epoch": 0.26, "learning_rate": 1.743081380586168e-05, "loss": 1.0005, "step": 1168 }, { "epoch": 0.26, "learning_rate": 1.7426053310233725e-05, "loss": 0.9644, "step": 1169 }, { "epoch": 0.26, "learning_rate": 1.742128905963668e-05, "loss": 0.9484, "step": 1170 }, { "epoch": 0.26, "learning_rate": 1.7416521056479577e-05, "loss": 0.9888, "step": 1171 }, { "epoch": 0.26, "learning_rate": 1.7411749303173345e-05, "loss": 0.9378, "step": 1172 }, { "epoch": 0.26, "learning_rate": 1.7406973802130817e-05, "loss": 0.9675, "step": 1173 }, { "epoch": 0.26, "learning_rate": 1.7402194555766704e-05, "loss": 0.9763, "step": 1174 }, { "epoch": 0.26, "learning_rate": 1.7397411566497638e-05, "loss": 0.9829, "step": 1175 }, { "epoch": 0.26, "learning_rate": 1.7392624836742116e-05, "loss": 0.9724, "step": 1176 }, { "epoch": 0.26, "learning_rate": 1.7387834368920544e-05, "loss": 0.9893, "step": 1177 }, { "epoch": 0.26, "learning_rate": 1.738304016545521e-05, "loss": 1.0331, "step": 1178 }, { "epoch": 0.26, "learning_rate": 1.7378242228770298e-05, "loss": 0.9731, "step": 1179 }, { "epoch": 0.26, "learning_rate": 1.737344056129187e-05, "loss": 1.0073, "step": 1180 }, { "epoch": 0.26, "learning_rate": 1.7368635165447877e-05, "loss": 1.0253, "step": 1181 }, { "epoch": 0.26, "learning_rate": 1.7363826043668166e-05, "loss": 0.9143, "step": 1182 }, { "epoch": 0.26, "learning_rate": 1.7359013198384457e-05, "loss": 0.9271, "step": 1183 }, { "epoch": 0.26, "learning_rate": 1.7354196632030357e-05, "loss": 0.9777, "step": 1184 }, { "epoch": 0.26, "learning_rate": 1.7349376347041346e-05, "loss": 1.0135, "step": 1185 }, { "epoch": 0.26, "learning_rate": 1.7344552345854796e-05, "loss": 0.9666, "step": 1186 }, { "epoch": 0.26, "learning_rate": 1.7339724630909958e-05, "loss": 1.0397, "step": 1187 }, { "epoch": 0.26, "learning_rate": 1.7334893204647947e-05, "loss": 0.9809, "step": 1188 }, { "epoch": 0.26, "learning_rate": 1.733005806951178e-05, "loss": 0.9531, "step": 1189 }, { "epoch": 0.26, "learning_rate": 1.7325219227946314e-05, "loss": 1.0399, "step": 1190 }, { "epoch": 0.26, "learning_rate": 1.7320376682398313e-05, "loss": 0.9797, "step": 1191 }, { "epoch": 0.26, "learning_rate": 1.731553043531639e-05, "loss": 0.3237, "step": 1192 }, { "epoch": 0.26, "learning_rate": 1.7310680489151056e-05, "loss": 0.9474, "step": 1193 }, { "epoch": 0.26, "learning_rate": 1.7305826846354664e-05, "loss": 0.9515, "step": 1194 }, { "epoch": 0.26, "learning_rate": 1.7300969509381448e-05, "loss": 0.939, "step": 1195 }, { "epoch": 0.26, "learning_rate": 1.7296108480687515e-05, "loss": 1.0033, "step": 1196 }, { "epoch": 0.26, "learning_rate": 1.7291243762730837e-05, "loss": 0.9762, "step": 1197 }, { "epoch": 0.26, "learning_rate": 1.7286375357971244e-05, "loss": 0.9812, "step": 1198 }, { "epoch": 0.26, "learning_rate": 1.728150326887044e-05, "loss": 0.9802, "step": 1199 }, { "epoch": 0.26, "learning_rate": 1.7276627497891984e-05, "loss": 1.0376, "step": 1200 }, { "epoch": 0.26, "learning_rate": 1.72717480475013e-05, "loss": 0.3128, "step": 1201 }, { "epoch": 0.26, "learning_rate": 1.726686492016567e-05, "loss": 0.8732, "step": 1202 }, { "epoch": 0.26, "learning_rate": 1.7261978118354245e-05, "loss": 0.9639, "step": 1203 }, { "epoch": 0.26, "learning_rate": 1.725708764453802e-05, "loss": 0.9665, "step": 1204 }, { "epoch": 0.26, "learning_rate": 1.7252193501189857e-05, "loss": 1.0523, "step": 1205 }, { "epoch": 0.26, "learning_rate": 1.7247295690784465e-05, "loss": 0.9516, "step": 1206 }, { "epoch": 0.26, "learning_rate": 1.7242394215798418e-05, "loss": 0.944, "step": 1207 }, { "epoch": 0.27, "learning_rate": 1.7237489078710136e-05, "loss": 1.0214, "step": 1208 }, { "epoch": 0.27, "learning_rate": 1.7232580281999888e-05, "loss": 1.0134, "step": 1209 }, { "epoch": 0.27, "learning_rate": 1.7227667828149795e-05, "loss": 0.9521, "step": 1210 }, { "epoch": 0.27, "learning_rate": 1.7222751719643844e-05, "loss": 0.9325, "step": 1211 }, { "epoch": 0.27, "learning_rate": 1.7217831958967837e-05, "loss": 1.0073, "step": 1212 }, { "epoch": 0.27, "learning_rate": 1.7212908548609455e-05, "loss": 0.9499, "step": 1213 }, { "epoch": 0.27, "learning_rate": 1.72079814910582e-05, "loss": 0.99, "step": 1214 }, { "epoch": 0.27, "learning_rate": 1.720305078880544e-05, "loss": 0.9864, "step": 1215 }, { "epoch": 0.27, "learning_rate": 1.7198116444344368e-05, "loss": 0.9769, "step": 1216 }, { "epoch": 0.27, "learning_rate": 1.7193178460170025e-05, "loss": 0.9731, "step": 1217 }, { "epoch": 0.27, "learning_rate": 1.7188236838779297e-05, "loss": 1.014, "step": 1218 }, { "epoch": 0.27, "learning_rate": 1.71832915826709e-05, "loss": 0.9971, "step": 1219 }, { "epoch": 0.27, "learning_rate": 1.7178342694345395e-05, "loss": 1.0072, "step": 1220 }, { "epoch": 0.27, "learning_rate": 1.7173390176305178e-05, "loss": 0.9973, "step": 1221 }, { "epoch": 0.27, "learning_rate": 1.716843403105448e-05, "loss": 0.9561, "step": 1222 }, { "epoch": 0.27, "learning_rate": 1.7163474261099368e-05, "loss": 0.3274, "step": 1223 }, { "epoch": 0.27, "learning_rate": 1.7158510868947735e-05, "loss": 0.9537, "step": 1224 }, { "epoch": 0.27, "learning_rate": 1.7153543857109314e-05, "loss": 0.9418, "step": 1225 }, { "epoch": 0.27, "learning_rate": 1.7148573228095663e-05, "loss": 0.9675, "step": 1226 }, { "epoch": 0.27, "learning_rate": 1.7143598984420174e-05, "loss": 0.9731, "step": 1227 }, { "epoch": 0.27, "learning_rate": 1.713862112859806e-05, "loss": 1.0016, "step": 1228 }, { "epoch": 0.27, "learning_rate": 1.713363966314636e-05, "loss": 0.9366, "step": 1229 }, { "epoch": 0.27, "learning_rate": 1.7128654590583953e-05, "loss": 0.2915, "step": 1230 }, { "epoch": 0.27, "learning_rate": 1.7123665913431522e-05, "loss": 1.0085, "step": 1231 }, { "epoch": 0.27, "learning_rate": 1.7118673634211585e-05, "loss": 1.0012, "step": 1232 }, { "epoch": 0.27, "learning_rate": 1.7113677755448478e-05, "loss": 0.9974, "step": 1233 }, { "epoch": 0.27, "learning_rate": 1.7108678279668355e-05, "loss": 0.9977, "step": 1234 }, { "epoch": 0.27, "learning_rate": 1.7103675209399194e-05, "loss": 1.0226, "step": 1235 }, { "epoch": 0.27, "learning_rate": 1.7098668547170787e-05, "loss": 0.97, "step": 1236 }, { "epoch": 0.27, "learning_rate": 1.7093658295514742e-05, "loss": 0.9666, "step": 1237 }, { "epoch": 0.27, "learning_rate": 1.708864445696448e-05, "loss": 1.0113, "step": 1238 }, { "epoch": 0.27, "learning_rate": 1.7083627034055245e-05, "loss": 0.9582, "step": 1239 }, { "epoch": 0.27, "learning_rate": 1.707860602932408e-05, "loss": 0.9588, "step": 1240 }, { "epoch": 0.27, "learning_rate": 1.7073581445309852e-05, "loss": 0.9484, "step": 1241 }, { "epoch": 0.27, "learning_rate": 1.706855328455323e-05, "loss": 1.0246, "step": 1242 }, { "epoch": 0.27, "learning_rate": 1.706352154959669e-05, "loss": 1.0231, "step": 1243 }, { "epoch": 0.27, "learning_rate": 1.705848624298452e-05, "loss": 0.9853, "step": 1244 }, { "epoch": 0.27, "learning_rate": 1.7053447367262817e-05, "loss": 1.008, "step": 1245 }, { "epoch": 0.27, "learning_rate": 1.704840492497948e-05, "loss": 0.9635, "step": 1246 }, { "epoch": 0.27, "learning_rate": 1.7043358918684198e-05, "loss": 0.9359, "step": 1247 }, { "epoch": 0.27, "learning_rate": 1.7038309350928485e-05, "loss": 1.0141, "step": 1248 }, { "epoch": 0.27, "learning_rate": 1.7033256224265642e-05, "loss": 0.9423, "step": 1249 }, { "epoch": 0.27, "learning_rate": 1.702819954125077e-05, "loss": 0.9845, "step": 1250 }, { "epoch": 0.27, "learning_rate": 1.7023139304440778e-05, "loss": 0.9956, "step": 1251 }, { "epoch": 0.27, "learning_rate": 1.7018075516394355e-05, "loss": 1.002, "step": 1252 }, { "epoch": 0.28, "learning_rate": 1.7013008179672002e-05, "loss": 1.06, "step": 1253 }, { "epoch": 0.28, "learning_rate": 1.7007937296836003e-05, "loss": 1.0305, "step": 1254 }, { "epoch": 0.28, "learning_rate": 1.7002862870450446e-05, "loss": 1.0163, "step": 1255 }, { "epoch": 0.28, "learning_rate": 1.6997784903081197e-05, "loss": 0.9604, "step": 1256 }, { "epoch": 0.28, "learning_rate": 1.6992703397295927e-05, "loss": 1.029, "step": 1257 }, { "epoch": 0.28, "learning_rate": 1.6987618355664085e-05, "loss": 0.9393, "step": 1258 }, { "epoch": 0.28, "learning_rate": 1.6982529780756913e-05, "loss": 1.0091, "step": 1259 }, { "epoch": 0.28, "learning_rate": 1.697743767514744e-05, "loss": 1.0071, "step": 1260 }, { "epoch": 0.28, "learning_rate": 1.697234204141048e-05, "loss": 0.9985, "step": 1261 }, { "epoch": 0.28, "learning_rate": 1.6967242882122626e-05, "loss": 1.0152, "step": 1262 }, { "epoch": 0.28, "learning_rate": 1.6962140199862263e-05, "loss": 0.9746, "step": 1263 }, { "epoch": 0.28, "learning_rate": 1.695703399720955e-05, "loss": 0.965, "step": 1264 }, { "epoch": 0.28, "learning_rate": 1.6951924276746425e-05, "loss": 0.9978, "step": 1265 }, { "epoch": 0.28, "learning_rate": 1.6946811041056614e-05, "loss": 1.0178, "step": 1266 }, { "epoch": 0.28, "learning_rate": 1.6941694292725606e-05, "loss": 0.934, "step": 1267 }, { "epoch": 0.28, "learning_rate": 1.6936574034340683e-05, "loss": 0.955, "step": 1268 }, { "epoch": 0.28, "learning_rate": 1.6931450268490886e-05, "loss": 0.9889, "step": 1269 }, { "epoch": 0.28, "learning_rate": 1.6926322997767045e-05, "loss": 0.9842, "step": 1270 }, { "epoch": 0.28, "learning_rate": 1.6921192224761752e-05, "loss": 0.9643, "step": 1271 }, { "epoch": 0.28, "learning_rate": 1.6916057952069367e-05, "loss": 0.9634, "step": 1272 }, { "epoch": 0.28, "learning_rate": 1.6910920182286036e-05, "loss": 0.9667, "step": 1273 }, { "epoch": 0.28, "learning_rate": 1.6905778918009646e-05, "loss": 1.0034, "step": 1274 }, { "epoch": 0.28, "learning_rate": 1.690063416183988e-05, "loss": 0.9281, "step": 1275 }, { "epoch": 0.28, "learning_rate": 1.6895485916378166e-05, "loss": 0.9574, "step": 1276 }, { "epoch": 0.28, "learning_rate": 1.689033418422771e-05, "loss": 0.9356, "step": 1277 }, { "epoch": 0.28, "learning_rate": 1.6885178967993472e-05, "loss": 1.0336, "step": 1278 }, { "epoch": 0.28, "learning_rate": 1.6880020270282175e-05, "loss": 1.0074, "step": 1279 }, { "epoch": 0.28, "learning_rate": 1.687485809370231e-05, "loss": 0.9787, "step": 1280 }, { "epoch": 0.28, "learning_rate": 1.6869692440864115e-05, "loss": 0.9872, "step": 1281 }, { "epoch": 0.28, "learning_rate": 1.68645233143796e-05, "loss": 0.9571, "step": 1282 }, { "epoch": 0.28, "learning_rate": 1.6859350716862516e-05, "loss": 0.9359, "step": 1283 }, { "epoch": 0.28, "learning_rate": 1.6854174650928378e-05, "loss": 1.0372, "step": 1284 }, { "epoch": 0.28, "learning_rate": 1.6848995119194453e-05, "loss": 0.9729, "step": 1285 }, { "epoch": 0.28, "learning_rate": 1.6843812124279763e-05, "loss": 0.9508, "step": 1286 }, { "epoch": 0.28, "learning_rate": 1.683862566880508e-05, "loss": 0.9432, "step": 1287 }, { "epoch": 0.28, "learning_rate": 1.6833435755392923e-05, "loss": 0.9547, "step": 1288 }, { "epoch": 0.28, "learning_rate": 1.6828242386667563e-05, "loss": 0.9963, "step": 1289 }, { "epoch": 0.28, "learning_rate": 1.6823045565255018e-05, "loss": 0.9743, "step": 1290 }, { "epoch": 0.28, "learning_rate": 1.6817845293783044e-05, "loss": 0.9667, "step": 1291 }, { "epoch": 0.28, "learning_rate": 1.681264157488115e-05, "loss": 1.0085, "step": 1292 }, { "epoch": 0.28, "learning_rate": 1.680743441118059e-05, "loss": 1.0141, "step": 1293 }, { "epoch": 0.28, "learning_rate": 1.6802223805314355e-05, "loss": 0.3338, "step": 1294 }, { "epoch": 0.28, "learning_rate": 1.6797009759917177e-05, "loss": 0.9704, "step": 1295 }, { "epoch": 0.28, "learning_rate": 1.6791792277625523e-05, "loss": 1.0036, "step": 1296 }, { "epoch": 0.28, "learning_rate": 1.6786571361077613e-05, "loss": 1.0438, "step": 1297 }, { "epoch": 0.28, "learning_rate": 1.6781347012913383e-05, "loss": 0.9865, "step": 1298 }, { "epoch": 0.29, "learning_rate": 1.6776119235774517e-05, "loss": 0.9981, "step": 1299 }, { "epoch": 0.29, "learning_rate": 1.6770888032304437e-05, "loss": 0.9535, "step": 1300 }, { "epoch": 0.29, "learning_rate": 1.6765653405148275e-05, "loss": 1.0374, "step": 1301 }, { "epoch": 0.29, "learning_rate": 1.676041535695293e-05, "loss": 1.0056, "step": 1302 }, { "epoch": 0.29, "learning_rate": 1.675517389036699e-05, "loss": 1.0208, "step": 1303 }, { "epoch": 0.29, "learning_rate": 1.674992900804081e-05, "loss": 1.0125, "step": 1304 }, { "epoch": 0.29, "learning_rate": 1.674468071262644e-05, "loss": 0.99, "step": 1305 }, { "epoch": 0.29, "learning_rate": 1.6739429006777684e-05, "loss": 1.0476, "step": 1306 }, { "epoch": 0.29, "learning_rate": 1.6734173893150045e-05, "loss": 1.0116, "step": 1307 }, { "epoch": 0.29, "learning_rate": 1.672891537440077e-05, "loss": 0.9492, "step": 1308 }, { "epoch": 0.29, "learning_rate": 1.672365345318881e-05, "loss": 0.9937, "step": 1309 }, { "epoch": 0.29, "learning_rate": 1.671838813217485e-05, "loss": 0.9673, "step": 1310 }, { "epoch": 0.29, "learning_rate": 1.6713119414021285e-05, "loss": 0.9966, "step": 1311 }, { "epoch": 0.29, "learning_rate": 1.6707847301392237e-05, "loss": 0.9463, "step": 1312 }, { "epoch": 0.29, "learning_rate": 1.6702571796953536e-05, "loss": 0.9884, "step": 1313 }, { "epoch": 0.29, "learning_rate": 1.6697292903372732e-05, "loss": 0.974, "step": 1314 }, { "epoch": 0.29, "learning_rate": 1.6692010623319087e-05, "loss": 0.3189, "step": 1315 }, { "epoch": 0.29, "learning_rate": 1.6686724959463574e-05, "loss": 0.9745, "step": 1316 }, { "epoch": 0.29, "learning_rate": 1.668143591447888e-05, "loss": 0.9514, "step": 1317 }, { "epoch": 0.29, "learning_rate": 1.6676143491039398e-05, "loss": 1.0278, "step": 1318 }, { "epoch": 0.29, "learning_rate": 1.6670847691821232e-05, "loss": 0.9385, "step": 1319 }, { "epoch": 0.29, "learning_rate": 1.6665548519502196e-05, "loss": 0.9668, "step": 1320 }, { "epoch": 0.29, "learning_rate": 1.66602459767618e-05, "loss": 0.988, "step": 1321 }, { "epoch": 0.29, "learning_rate": 1.6654940066281268e-05, "loss": 0.9821, "step": 1322 }, { "epoch": 0.29, "learning_rate": 1.664963079074352e-05, "loss": 0.9909, "step": 1323 }, { "epoch": 0.29, "learning_rate": 1.664431815283319e-05, "loss": 0.9392, "step": 1324 }, { "epoch": 0.29, "learning_rate": 1.663900215523659e-05, "loss": 0.9435, "step": 1325 }, { "epoch": 0.29, "learning_rate": 1.663368280064175e-05, "loss": 0.9432, "step": 1326 }, { "epoch": 0.29, "learning_rate": 1.662836009173839e-05, "loss": 0.3095, "step": 1327 }, { "epoch": 0.29, "learning_rate": 1.662303403121793e-05, "loss": 0.9997, "step": 1328 }, { "epoch": 0.29, "learning_rate": 1.661770462177348e-05, "loss": 1.0031, "step": 1329 }, { "epoch": 0.29, "learning_rate": 1.661237186609984e-05, "loss": 0.9243, "step": 1330 }, { "epoch": 0.29, "learning_rate": 1.6607035766893515e-05, "loss": 1.0295, "step": 1331 }, { "epoch": 0.29, "learning_rate": 1.660169632685269e-05, "loss": 0.9335, "step": 1332 }, { "epoch": 0.29, "learning_rate": 1.659635354867724e-05, "loss": 0.9822, "step": 1333 }, { "epoch": 0.29, "learning_rate": 1.6591007435068736e-05, "loss": 0.9939, "step": 1334 }, { "epoch": 0.29, "learning_rate": 1.6585657988730424e-05, "loss": 0.9296, "step": 1335 }, { "epoch": 0.29, "learning_rate": 1.658030521236724e-05, "loss": 0.928, "step": 1336 }, { "epoch": 0.29, "learning_rate": 1.6574949108685813e-05, "loss": 1.0141, "step": 1337 }, { "epoch": 0.29, "learning_rate": 1.656958968039444e-05, "loss": 1.0006, "step": 1338 }, { "epoch": 0.29, "learning_rate": 1.6564226930203106e-05, "loss": 0.9329, "step": 1339 }, { "epoch": 0.29, "learning_rate": 1.6558860860823473e-05, "loss": 1.012, "step": 1340 }, { "epoch": 0.29, "learning_rate": 1.655349147496889e-05, "loss": 0.9798, "step": 1341 }, { "epoch": 0.29, "learning_rate": 1.6548118775354364e-05, "loss": 0.9704, "step": 1342 }, { "epoch": 0.29, "learning_rate": 1.6542742764696603e-05, "loss": 0.995, "step": 1343 }, { "epoch": 0.3, "learning_rate": 1.6537363445713972e-05, "loss": 1.0386, "step": 1344 }, { "epoch": 0.3, "learning_rate": 1.6531980821126508e-05, "loss": 1.0635, "step": 1345 }, { "epoch": 0.3, "learning_rate": 1.652659489365593e-05, "loss": 1.0172, "step": 1346 }, { "epoch": 0.3, "learning_rate": 1.652120566602562e-05, "loss": 1.0068, "step": 1347 }, { "epoch": 0.3, "learning_rate": 1.651581314096063e-05, "loss": 0.96, "step": 1348 }, { "epoch": 0.3, "learning_rate": 1.6510417321187677e-05, "loss": 1.0455, "step": 1349 }, { "epoch": 0.3, "learning_rate": 1.6505018209435152e-05, "loss": 0.9589, "step": 1350 }, { "epoch": 0.3, "learning_rate": 1.6499615808433098e-05, "loss": 0.9926, "step": 1351 }, { "epoch": 0.3, "learning_rate": 1.6494210120913238e-05, "loss": 1.0241, "step": 1352 }, { "epoch": 0.3, "learning_rate": 1.648880114960894e-05, "loss": 0.9017, "step": 1353 }, { "epoch": 0.3, "learning_rate": 1.6483388897255238e-05, "loss": 0.9902, "step": 1354 }, { "epoch": 0.3, "learning_rate": 1.6477973366588833e-05, "loss": 0.9453, "step": 1355 }, { "epoch": 0.3, "learning_rate": 1.647255456034807e-05, "loss": 0.9777, "step": 1356 }, { "epoch": 0.3, "learning_rate": 1.6467132481272965e-05, "loss": 0.9822, "step": 1357 }, { "epoch": 0.3, "learning_rate": 1.6461707132105177e-05, "loss": 0.9928, "step": 1358 }, { "epoch": 0.3, "learning_rate": 1.6456278515588023e-05, "loss": 0.9386, "step": 1359 }, { "epoch": 0.3, "learning_rate": 1.6450846634466476e-05, "loss": 0.9973, "step": 1360 }, { "epoch": 0.3, "learning_rate": 1.6445411491487154e-05, "loss": 0.3193, "step": 1361 }, { "epoch": 0.3, "learning_rate": 1.6439973089398325e-05, "loss": 0.9918, "step": 1362 }, { "epoch": 0.3, "learning_rate": 1.6434531430949907e-05, "loss": 0.9687, "step": 1363 }, { "epoch": 0.3, "learning_rate": 1.642908651889346e-05, "loss": 0.9633, "step": 1364 }, { "epoch": 0.3, "learning_rate": 1.6423638355982202e-05, "loss": 1.014, "step": 1365 }, { "epoch": 0.3, "learning_rate": 1.6418186944970976e-05, "loss": 0.2959, "step": 1366 }, { "epoch": 0.3, "learning_rate": 1.641273228861628e-05, "loss": 1.0123, "step": 1367 }, { "epoch": 0.3, "learning_rate": 1.6407274389676254e-05, "loss": 0.9722, "step": 1368 }, { "epoch": 0.3, "learning_rate": 1.6401813250910673e-05, "loss": 0.9847, "step": 1369 }, { "epoch": 0.3, "learning_rate": 1.6396348875080945e-05, "loss": 0.9557, "step": 1370 }, { "epoch": 0.3, "learning_rate": 1.6390881264950128e-05, "loss": 1.0514, "step": 1371 }, { "epoch": 0.3, "learning_rate": 1.6385410423282907e-05, "loss": 0.9698, "step": 1372 }, { "epoch": 0.3, "learning_rate": 1.6379936352845595e-05, "loss": 0.9639, "step": 1373 }, { "epoch": 0.3, "learning_rate": 1.6374459056406155e-05, "loss": 0.9788, "step": 1374 }, { "epoch": 0.3, "learning_rate": 1.6368978536734162e-05, "loss": 0.9877, "step": 1375 }, { "epoch": 0.3, "learning_rate": 1.636349479660084e-05, "loss": 0.8985, "step": 1376 }, { "epoch": 0.3, "learning_rate": 1.635800783877902e-05, "loss": 0.9772, "step": 1377 }, { "epoch": 0.3, "learning_rate": 1.635251766604318e-05, "loss": 0.9967, "step": 1378 }, { "epoch": 0.3, "learning_rate": 1.6347024281169406e-05, "loss": 0.9672, "step": 1379 }, { "epoch": 0.3, "learning_rate": 1.634152768693543e-05, "loss": 0.9367, "step": 1380 }, { "epoch": 0.3, "learning_rate": 1.633602788612058e-05, "loss": 0.9028, "step": 1381 }, { "epoch": 0.3, "learning_rate": 1.6330524881505834e-05, "loss": 1.0053, "step": 1382 }, { "epoch": 0.3, "learning_rate": 1.632501867587376e-05, "loss": 0.9442, "step": 1383 }, { "epoch": 0.3, "learning_rate": 1.6319509272008574e-05, "loss": 0.9129, "step": 1384 }, { "epoch": 0.3, "learning_rate": 1.6313996672696083e-05, "loss": 0.9411, "step": 1385 }, { "epoch": 0.3, "learning_rate": 1.630848088072373e-05, "loss": 0.9394, "step": 1386 }, { "epoch": 0.3, "learning_rate": 1.6302961898880562e-05, "loss": 0.9398, "step": 1387 }, { "epoch": 0.3, "learning_rate": 1.629743972995724e-05, "loss": 1.0131, "step": 1388 }, { "epoch": 0.3, "learning_rate": 1.629191437674604e-05, "loss": 0.9967, "step": 1389 }, { "epoch": 0.31, "learning_rate": 1.6286385842040843e-05, "loss": 0.9758, "step": 1390 }, { "epoch": 0.31, "learning_rate": 1.628085412863714e-05, "loss": 1.0035, "step": 1391 }, { "epoch": 0.31, "learning_rate": 1.6275319239332038e-05, "loss": 0.9947, "step": 1392 }, { "epoch": 0.31, "learning_rate": 1.6269781176924237e-05, "loss": 0.9631, "step": 1393 }, { "epoch": 0.31, "learning_rate": 1.626423994421405e-05, "loss": 0.9538, "step": 1394 }, { "epoch": 0.31, "learning_rate": 1.625869554400339e-05, "loss": 0.9963, "step": 1395 }, { "epoch": 0.31, "learning_rate": 1.6253147979095765e-05, "loss": 0.961, "step": 1396 }, { "epoch": 0.31, "learning_rate": 1.6247597252296304e-05, "loss": 0.945, "step": 1397 }, { "epoch": 0.31, "learning_rate": 1.6242043366411714e-05, "loss": 1.0047, "step": 1398 }, { "epoch": 0.31, "learning_rate": 1.62364863242503e-05, "loss": 1.0318, "step": 1399 }, { "epoch": 0.31, "learning_rate": 1.623092612862198e-05, "loss": 0.9542, "step": 1400 }, { "epoch": 0.31, "learning_rate": 1.6225362782338243e-05, "loss": 0.9713, "step": 1401 }, { "epoch": 0.31, "learning_rate": 1.62197962882122e-05, "loss": 0.9771, "step": 1402 }, { "epoch": 0.31, "learning_rate": 1.6214226649058524e-05, "loss": 1.0207, "step": 1403 }, { "epoch": 0.31, "learning_rate": 1.62086538676935e-05, "loss": 0.9667, "step": 1404 }, { "epoch": 0.31, "learning_rate": 1.6203077946934992e-05, "loss": 0.9866, "step": 1405 }, { "epoch": 0.31, "learning_rate": 1.619749888960245e-05, "loss": 0.9869, "step": 1406 }, { "epoch": 0.31, "learning_rate": 1.6191916698516918e-05, "loss": 0.309, "step": 1407 }, { "epoch": 0.31, "learning_rate": 1.6186331376501018e-05, "loss": 0.9798, "step": 1408 }, { "epoch": 0.31, "learning_rate": 1.6180742926378952e-05, "loss": 0.9825, "step": 1409 }, { "epoch": 0.31, "learning_rate": 1.6175151350976518e-05, "loss": 0.9416, "step": 1410 }, { "epoch": 0.31, "learning_rate": 1.6169556653121083e-05, "loss": 0.9482, "step": 1411 }, { "epoch": 0.31, "learning_rate": 1.6163958835641586e-05, "loss": 0.9888, "step": 1412 }, { "epoch": 0.31, "learning_rate": 1.615835790136856e-05, "loss": 1.0557, "step": 1413 }, { "epoch": 0.31, "learning_rate": 1.6152753853134106e-05, "loss": 0.2932, "step": 1414 }, { "epoch": 0.31, "learning_rate": 1.6147146693771896e-05, "loss": 0.9704, "step": 1415 }, { "epoch": 0.31, "learning_rate": 1.6141536426117186e-05, "loss": 0.9461, "step": 1416 }, { "epoch": 0.31, "learning_rate": 1.613592305300679e-05, "loss": 0.9573, "step": 1417 }, { "epoch": 0.31, "learning_rate": 1.6130306577279097e-05, "loss": 0.907, "step": 1418 }, { "epoch": 0.31, "learning_rate": 1.6124687001774076e-05, "loss": 0.9201, "step": 1419 }, { "epoch": 0.31, "learning_rate": 1.6119064329333248e-05, "loss": 0.3135, "step": 1420 }, { "epoch": 0.31, "learning_rate": 1.6113438562799706e-05, "loss": 0.9642, "step": 1421 }, { "epoch": 0.31, "learning_rate": 1.610780970501811e-05, "loss": 0.9985, "step": 1422 }, { "epoch": 0.31, "learning_rate": 1.610217775883468e-05, "loss": 0.9638, "step": 1423 }, { "epoch": 0.31, "learning_rate": 1.6096542727097198e-05, "loss": 0.9983, "step": 1424 }, { "epoch": 0.31, "learning_rate": 1.6090904612655007e-05, "loss": 0.9744, "step": 1425 }, { "epoch": 0.31, "learning_rate": 1.6085263418359004e-05, "loss": 0.9581, "step": 1426 }, { "epoch": 0.31, "learning_rate": 1.6079619147061657e-05, "loss": 1.0174, "step": 1427 }, { "epoch": 0.31, "learning_rate": 1.6073971801616975e-05, "loss": 0.9764, "step": 1428 }, { "epoch": 0.31, "learning_rate": 1.6068321384880523e-05, "loss": 0.9731, "step": 1429 }, { "epoch": 0.31, "learning_rate": 1.606266789970943e-05, "loss": 0.9911, "step": 1430 }, { "epoch": 0.31, "learning_rate": 1.6057011348962367e-05, "loss": 1.002, "step": 1431 }, { "epoch": 0.31, "learning_rate": 1.6051351735499555e-05, "loss": 0.9324, "step": 1432 }, { "epoch": 0.31, "learning_rate": 1.604568906218277e-05, "loss": 0.9521, "step": 1433 }, { "epoch": 0.31, "learning_rate": 1.604002333187533e-05, "loss": 0.9794, "step": 1434 }, { "epoch": 0.32, "learning_rate": 1.6034354547442104e-05, "loss": 0.9547, "step": 1435 }, { "epoch": 0.32, "learning_rate": 1.6028682711749495e-05, "loss": 1.0557, "step": 1436 }, { "epoch": 0.32, "learning_rate": 1.602300782766546e-05, "loss": 0.9795, "step": 1437 }, { "epoch": 0.32, "learning_rate": 1.601732989805949e-05, "loss": 0.9683, "step": 1438 }, { "epoch": 0.32, "learning_rate": 1.601164892580262e-05, "loss": 0.9525, "step": 1439 }, { "epoch": 0.32, "learning_rate": 1.600596491376742e-05, "loss": 0.9783, "step": 1440 }, { "epoch": 0.32, "learning_rate": 1.6000277864828005e-05, "loss": 0.3177, "step": 1441 }, { "epoch": 0.32, "learning_rate": 1.599458778186002e-05, "loss": 0.9528, "step": 1442 }, { "epoch": 0.32, "learning_rate": 1.5988894667740635e-05, "loss": 0.9879, "step": 1443 }, { "epoch": 0.32, "learning_rate": 1.598319852534857e-05, "loss": 0.3307, "step": 1444 }, { "epoch": 0.32, "learning_rate": 1.5977499357564067e-05, "loss": 0.314, "step": 1445 }, { "epoch": 0.32, "learning_rate": 1.5971797167268895e-05, "loss": 0.9802, "step": 1446 }, { "epoch": 0.32, "learning_rate": 1.5966091957346363e-05, "loss": 1.0075, "step": 1447 }, { "epoch": 0.32, "learning_rate": 1.596038373068129e-05, "loss": 1.0062, "step": 1448 }, { "epoch": 0.32, "learning_rate": 1.595467249016003e-05, "loss": 0.942, "step": 1449 }, { "epoch": 0.32, "learning_rate": 1.594895823867047e-05, "loss": 1.0027, "step": 1450 }, { "epoch": 0.32, "learning_rate": 1.5943240979102005e-05, "loss": 1.0232, "step": 1451 }, { "epoch": 0.32, "learning_rate": 1.593752071434555e-05, "loss": 0.9758, "step": 1452 }, { "epoch": 0.32, "learning_rate": 1.5931797447293553e-05, "loss": 0.9799, "step": 1453 }, { "epoch": 0.32, "learning_rate": 1.592607118083997e-05, "loss": 0.9594, "step": 1454 }, { "epoch": 0.32, "learning_rate": 1.5920341917880277e-05, "loss": 0.887, "step": 1455 }, { "epoch": 0.32, "learning_rate": 1.591460966131146e-05, "loss": 0.9942, "step": 1456 }, { "epoch": 0.32, "learning_rate": 1.5908874414032027e-05, "loss": 0.9646, "step": 1457 }, { "epoch": 0.32, "learning_rate": 1.5903136178941997e-05, "loss": 0.9774, "step": 1458 }, { "epoch": 0.32, "learning_rate": 1.589739495894289e-05, "loss": 1.0045, "step": 1459 }, { "epoch": 0.32, "learning_rate": 1.5891650756937755e-05, "loss": 0.982, "step": 1460 }, { "epoch": 0.32, "learning_rate": 1.588590357583112e-05, "loss": 0.9646, "step": 1461 }, { "epoch": 0.32, "learning_rate": 1.5880153418529044e-05, "loss": 0.9209, "step": 1462 }, { "epoch": 0.32, "learning_rate": 1.5874400287939084e-05, "loss": 0.9556, "step": 1463 }, { "epoch": 0.32, "learning_rate": 1.5868644186970298e-05, "loss": 0.9747, "step": 1464 }, { "epoch": 0.32, "learning_rate": 1.5862885118533244e-05, "loss": 1.0376, "step": 1465 }, { "epoch": 0.32, "learning_rate": 1.5857123085539988e-05, "loss": 0.9127, "step": 1466 }, { "epoch": 0.32, "learning_rate": 1.585135809090409e-05, "loss": 0.9546, "step": 1467 }, { "epoch": 0.32, "learning_rate": 1.5845590137540602e-05, "loss": 0.9614, "step": 1468 }, { "epoch": 0.32, "learning_rate": 1.583981922836609e-05, "loss": 0.9066, "step": 1469 }, { "epoch": 0.32, "learning_rate": 1.5834045366298593e-05, "loss": 0.9342, "step": 1470 }, { "epoch": 0.32, "learning_rate": 1.582826855425765e-05, "loss": 0.9657, "step": 1471 }, { "epoch": 0.32, "learning_rate": 1.5822488795164302e-05, "loss": 0.9929, "step": 1472 }, { "epoch": 0.32, "learning_rate": 1.5816706091941073e-05, "loss": 0.9449, "step": 1473 }, { "epoch": 0.32, "learning_rate": 1.581092044751197e-05, "loss": 0.9746, "step": 1474 }, { "epoch": 0.32, "learning_rate": 1.5805131864802496e-05, "loss": 0.9874, "step": 1475 }, { "epoch": 0.32, "learning_rate": 1.5799340346739634e-05, "loss": 0.9253, "step": 1476 }, { "epoch": 0.32, "learning_rate": 1.579354589625186e-05, "loss": 1.0119, "step": 1477 }, { "epoch": 0.32, "learning_rate": 1.5787748516269122e-05, "loss": 0.9591, "step": 1478 }, { "epoch": 0.32, "learning_rate": 1.578194820972285e-05, "loss": 0.9898, "step": 1479 }, { "epoch": 0.32, "learning_rate": 1.5776144979545963e-05, "loss": 0.9537, "step": 1480 }, { "epoch": 0.33, "learning_rate": 1.577033882867285e-05, "loss": 0.9653, "step": 1481 }, { "epoch": 0.33, "learning_rate": 1.576452976003938e-05, "loss": 1.0247, "step": 1482 }, { "epoch": 0.33, "learning_rate": 1.5758717776582898e-05, "loss": 0.9694, "step": 1483 }, { "epoch": 0.33, "learning_rate": 1.5752902881242218e-05, "loss": 0.9969, "step": 1484 }, { "epoch": 0.33, "learning_rate": 1.5747085076957635e-05, "loss": 0.9865, "step": 1485 }, { "epoch": 0.33, "learning_rate": 1.5741264366670913e-05, "loss": 0.9159, "step": 1486 }, { "epoch": 0.33, "learning_rate": 1.5735440753325273e-05, "loss": 0.9382, "step": 1487 }, { "epoch": 0.33, "learning_rate": 1.5729614239865424e-05, "loss": 0.317, "step": 1488 }, { "epoch": 0.33, "learning_rate": 1.572378482923752e-05, "loss": 0.9275, "step": 1489 }, { "epoch": 0.33, "learning_rate": 1.57179525243892e-05, "loss": 1.0289, "step": 1490 }, { "epoch": 0.33, "learning_rate": 1.571211732826955e-05, "loss": 0.9638, "step": 1491 }, { "epoch": 0.33, "learning_rate": 1.5706279243829135e-05, "loss": 1.0114, "step": 1492 }, { "epoch": 0.33, "learning_rate": 1.5700438274019963e-05, "loss": 1.0165, "step": 1493 }, { "epoch": 0.33, "learning_rate": 1.569459442179551e-05, "loss": 0.9698, "step": 1494 }, { "epoch": 0.33, "learning_rate": 1.5688747690110708e-05, "loss": 1.0418, "step": 1495 }, { "epoch": 0.33, "learning_rate": 1.568289808192195e-05, "loss": 1.0054, "step": 1496 }, { "epoch": 0.33, "learning_rate": 1.5677045600187074e-05, "loss": 0.9552, "step": 1497 }, { "epoch": 0.33, "learning_rate": 1.5671190247865374e-05, "loss": 1.008, "step": 1498 }, { "epoch": 0.33, "learning_rate": 1.56653320279176e-05, "loss": 0.9916, "step": 1499 }, { "epoch": 0.33, "learning_rate": 1.5659470943305956e-05, "loss": 0.9243, "step": 1500 }, { "epoch": 0.33, "learning_rate": 1.5653606996994074e-05, "loss": 0.9926, "step": 1501 }, { "epoch": 0.33, "learning_rate": 1.5647740191947056e-05, "loss": 0.9632, "step": 1502 }, { "epoch": 0.33, "learning_rate": 1.564187053113144e-05, "loss": 0.9886, "step": 1503 }, { "epoch": 0.33, "learning_rate": 1.5635998017515203e-05, "loss": 0.9695, "step": 1504 }, { "epoch": 0.33, "learning_rate": 1.5630122654067778e-05, "loss": 0.9263, "step": 1505 }, { "epoch": 0.33, "learning_rate": 1.5624244443760025e-05, "loss": 0.8945, "step": 1506 }, { "epoch": 0.33, "learning_rate": 1.5618363389564247e-05, "loss": 0.8908, "step": 1507 }, { "epoch": 0.33, "learning_rate": 1.5612479494454195e-05, "loss": 0.9926, "step": 1508 }, { "epoch": 0.33, "learning_rate": 1.5606592761405044e-05, "loss": 0.978, "step": 1509 }, { "epoch": 0.33, "learning_rate": 1.560070319339341e-05, "loss": 0.9542, "step": 1510 }, { "epoch": 0.33, "learning_rate": 1.559481079339734e-05, "loss": 0.9603, "step": 1511 }, { "epoch": 0.33, "learning_rate": 1.5588915564396315e-05, "loss": 0.3381, "step": 1512 }, { "epoch": 0.33, "learning_rate": 1.5583017509371248e-05, "loss": 0.9834, "step": 1513 }, { "epoch": 0.33, "learning_rate": 1.5577116631304475e-05, "loss": 1.0126, "step": 1514 }, { "epoch": 0.33, "learning_rate": 1.5571212933179766e-05, "loss": 0.9369, "step": 1515 }, { "epoch": 0.33, "learning_rate": 1.5565306417982314e-05, "loss": 0.3106, "step": 1516 }, { "epoch": 0.33, "learning_rate": 1.5559397088698736e-05, "loss": 0.9926, "step": 1517 }, { "epoch": 0.33, "learning_rate": 1.5553484948317074e-05, "loss": 0.939, "step": 1518 }, { "epoch": 0.33, "learning_rate": 1.554756999982679e-05, "loss": 0.9063, "step": 1519 }, { "epoch": 0.33, "learning_rate": 1.554165224621876e-05, "loss": 0.9526, "step": 1520 }, { "epoch": 0.33, "learning_rate": 1.5535731690485293e-05, "loss": 0.9907, "step": 1521 }, { "epoch": 0.33, "learning_rate": 1.55298083356201e-05, "loss": 0.9635, "step": 1522 }, { "epoch": 0.33, "learning_rate": 1.5523882184618315e-05, "loss": 0.9385, "step": 1523 }, { "epoch": 0.33, "learning_rate": 1.551795324047649e-05, "loss": 0.9589, "step": 1524 }, { "epoch": 0.33, "learning_rate": 1.5512021506192575e-05, "loss": 0.9587, "step": 1525 }, { "epoch": 0.33, "learning_rate": 1.5506086984765945e-05, "loss": 0.9054, "step": 1526 }, { "epoch": 0.34, "learning_rate": 1.550014967919738e-05, "loss": 1.0129, "step": 1527 }, { "epoch": 0.34, "learning_rate": 1.5494209592489063e-05, "loss": 0.8955, "step": 1528 }, { "epoch": 0.34, "learning_rate": 1.5488266727644588e-05, "loss": 0.964, "step": 1529 }, { "epoch": 0.34, "learning_rate": 1.5482321087668954e-05, "loss": 1.0204, "step": 1530 }, { "epoch": 0.34, "learning_rate": 1.5476372675568562e-05, "loss": 0.9864, "step": 1531 }, { "epoch": 0.34, "learning_rate": 1.5470421494351214e-05, "loss": 0.3028, "step": 1532 }, { "epoch": 0.34, "learning_rate": 1.5464467547026112e-05, "loss": 1.0122, "step": 1533 }, { "epoch": 0.34, "learning_rate": 1.5458510836603863e-05, "loss": 1.0282, "step": 1534 }, { "epoch": 0.34, "learning_rate": 1.5452551366096457e-05, "loss": 0.993, "step": 1535 }, { "epoch": 0.34, "learning_rate": 1.5446589138517292e-05, "loss": 0.9976, "step": 1536 }, { "epoch": 0.34, "learning_rate": 1.5440624156881162e-05, "loss": 0.9318, "step": 1537 }, { "epoch": 0.34, "learning_rate": 1.543465642420424e-05, "loss": 0.9537, "step": 1538 }, { "epoch": 0.34, "learning_rate": 1.5428685943504104e-05, "loss": 1.0181, "step": 1539 }, { "epoch": 0.34, "learning_rate": 1.5422712717799714e-05, "loss": 0.9801, "step": 1540 }, { "epoch": 0.34, "learning_rate": 1.5416736750111415e-05, "loss": 0.9731, "step": 1541 }, { "epoch": 0.34, "learning_rate": 1.5410758043460952e-05, "loss": 0.3093, "step": 1542 }, { "epoch": 0.34, "learning_rate": 1.540477660087144e-05, "loss": 0.9898, "step": 1543 }, { "epoch": 0.34, "learning_rate": 1.5398792425367387e-05, "loss": 0.9867, "step": 1544 }, { "epoch": 0.34, "learning_rate": 1.5392805519974678e-05, "loss": 1.0034, "step": 1545 }, { "epoch": 0.34, "learning_rate": 1.5386815887720576e-05, "loss": 0.997, "step": 1546 }, { "epoch": 0.34, "learning_rate": 1.5380823531633727e-05, "loss": 0.9633, "step": 1547 }, { "epoch": 0.34, "learning_rate": 1.5374828454744163e-05, "loss": 0.9564, "step": 1548 }, { "epoch": 0.34, "learning_rate": 1.5368830660083275e-05, "loss": 0.9427, "step": 1549 }, { "epoch": 0.34, "learning_rate": 1.5362830150683838e-05, "loss": 1.0122, "step": 1550 }, { "epoch": 0.34, "learning_rate": 1.5356826929579994e-05, "loss": 0.9829, "step": 1551 }, { "epoch": 0.34, "learning_rate": 1.5350820999807266e-05, "loss": 0.8865, "step": 1552 }, { "epoch": 0.34, "learning_rate": 1.5344812364402538e-05, "loss": 0.9943, "step": 1553 }, { "epoch": 0.34, "learning_rate": 1.533880102640406e-05, "loss": 0.9516, "step": 1554 }, { "epoch": 0.34, "learning_rate": 1.5332786988851462e-05, "loss": 0.9955, "step": 1555 }, { "epoch": 0.34, "learning_rate": 1.532677025478572e-05, "loss": 1.0376, "step": 1556 }, { "epoch": 0.34, "learning_rate": 1.5320750827249192e-05, "loss": 0.9903, "step": 1557 }, { "epoch": 0.34, "learning_rate": 1.5314728709285587e-05, "loss": 0.9701, "step": 1558 }, { "epoch": 0.34, "learning_rate": 1.5308703903939974e-05, "loss": 0.971, "step": 1559 }, { "epoch": 0.34, "learning_rate": 1.530267641425879e-05, "loss": 1.0336, "step": 1560 }, { "epoch": 0.34, "learning_rate": 1.529664624328982e-05, "loss": 0.9622, "step": 1561 }, { "epoch": 0.34, "learning_rate": 1.5290613394082202e-05, "loss": 0.9614, "step": 1562 }, { "epoch": 0.34, "learning_rate": 1.5284577869686447e-05, "loss": 0.9811, "step": 1563 }, { "epoch": 0.34, "learning_rate": 1.5278539673154393e-05, "loss": 0.9229, "step": 1564 }, { "epoch": 0.34, "learning_rate": 1.5272498807539248e-05, "loss": 0.9745, "step": 1565 }, { "epoch": 0.34, "learning_rate": 1.5266455275895562e-05, "loss": 0.9636, "step": 1566 }, { "epoch": 0.34, "learning_rate": 1.5260409081279237e-05, "loss": 0.9764, "step": 1567 }, { "epoch": 0.34, "learning_rate": 1.525436022674752e-05, "loss": 0.9817, "step": 1568 }, { "epoch": 0.34, "learning_rate": 1.5248308715358999e-05, "loss": 0.984, "step": 1569 }, { "epoch": 0.34, "learning_rate": 1.5242254550173612e-05, "loss": 0.9822, "step": 1570 }, { "epoch": 0.34, "learning_rate": 1.5236197734252636e-05, "loss": 0.9465, "step": 1571 }, { "epoch": 0.35, "learning_rate": 1.5230138270658681e-05, "loss": 0.9697, "step": 1572 }, { "epoch": 0.35, "learning_rate": 1.522407616245571e-05, "loss": 0.9827, "step": 1573 }, { "epoch": 0.35, "learning_rate": 1.5218011412709013e-05, "loss": 0.9774, "step": 1574 }, { "epoch": 0.35, "learning_rate": 1.5211944024485216e-05, "loss": 0.9624, "step": 1575 }, { "epoch": 0.35, "learning_rate": 1.520587400085229e-05, "loss": 0.3137, "step": 1576 }, { "epoch": 0.35, "learning_rate": 1.5199801344879523e-05, "loss": 1.0248, "step": 1577 }, { "epoch": 0.35, "learning_rate": 1.5193726059637543e-05, "loss": 0.9611, "step": 1578 }, { "epoch": 0.35, "learning_rate": 1.5187648148198309e-05, "loss": 0.9161, "step": 1579 }, { "epoch": 0.35, "learning_rate": 1.5181567613635099e-05, "loss": 0.9503, "step": 1580 }, { "epoch": 0.35, "learning_rate": 1.5175484459022531e-05, "loss": 0.947, "step": 1581 }, { "epoch": 0.35, "learning_rate": 1.5169398687436535e-05, "loss": 0.981, "step": 1582 }, { "epoch": 0.35, "learning_rate": 1.5163310301954367e-05, "loss": 0.9141, "step": 1583 }, { "epoch": 0.35, "learning_rate": 1.5157219305654614e-05, "loss": 0.9623, "step": 1584 }, { "epoch": 0.35, "learning_rate": 1.5151125701617168e-05, "loss": 0.9278, "step": 1585 }, { "epoch": 0.35, "learning_rate": 1.5145029492923255e-05, "loss": 0.9269, "step": 1586 }, { "epoch": 0.35, "learning_rate": 1.5138930682655405e-05, "loss": 1.0079, "step": 1587 }, { "epoch": 0.35, "learning_rate": 1.5132829273897473e-05, "loss": 0.9387, "step": 1588 }, { "epoch": 0.35, "learning_rate": 1.5126725269734625e-05, "loss": 1.0341, "step": 1589 }, { "epoch": 0.35, "learning_rate": 1.5120618673253335e-05, "loss": 0.9055, "step": 1590 }, { "epoch": 0.35, "learning_rate": 1.5114509487541394e-05, "loss": 0.9805, "step": 1591 }, { "epoch": 0.35, "learning_rate": 1.51083977156879e-05, "loss": 0.9376, "step": 1592 }, { "epoch": 0.35, "learning_rate": 1.5102283360783258e-05, "loss": 0.9826, "step": 1593 }, { "epoch": 0.35, "learning_rate": 1.5096166425919176e-05, "loss": 0.9479, "step": 1594 }, { "epoch": 0.35, "learning_rate": 1.5090046914188672e-05, "loss": 0.9361, "step": 1595 }, { "epoch": 0.35, "learning_rate": 1.5083924828686067e-05, "loss": 1.0087, "step": 1596 }, { "epoch": 0.35, "learning_rate": 1.5077800172506977e-05, "loss": 0.9779, "step": 1597 }, { "epoch": 0.35, "learning_rate": 1.507167294874833e-05, "loss": 0.9362, "step": 1598 }, { "epoch": 0.35, "learning_rate": 1.5065543160508337e-05, "loss": 0.9616, "step": 1599 }, { "epoch": 0.35, "learning_rate": 1.5059410810886515e-05, "loss": 0.9669, "step": 1600 }, { "epoch": 0.35, "learning_rate": 1.5053275902983675e-05, "loss": 0.9756, "step": 1601 }, { "epoch": 0.35, "learning_rate": 1.5047138439901922e-05, "loss": 0.9736, "step": 1602 }, { "epoch": 0.35, "learning_rate": 1.5040998424744653e-05, "loss": 0.9646, "step": 1603 }, { "epoch": 0.35, "learning_rate": 1.5034855860616548e-05, "loss": 0.9653, "step": 1604 }, { "epoch": 0.35, "learning_rate": 1.5028710750623592e-05, "loss": 1.0221, "step": 1605 }, { "epoch": 0.35, "learning_rate": 1.502256309787304e-05, "loss": 0.8817, "step": 1606 }, { "epoch": 0.35, "learning_rate": 1.5016412905473444e-05, "loss": 0.9847, "step": 1607 }, { "epoch": 0.35, "learning_rate": 1.5010260176534632e-05, "loss": 1.0506, "step": 1608 }, { "epoch": 0.35, "learning_rate": 1.5004104914167725e-05, "loss": 0.9142, "step": 1609 }, { "epoch": 0.35, "learning_rate": 1.4997947121485119e-05, "loss": 0.9433, "step": 1610 }, { "epoch": 0.35, "learning_rate": 1.4991786801600485e-05, "loss": 0.9829, "step": 1611 }, { "epoch": 0.35, "learning_rate": 1.4985623957628781e-05, "loss": 0.9796, "step": 1612 }, { "epoch": 0.35, "learning_rate": 1.4979458592686234e-05, "loss": 0.9859, "step": 1613 }, { "epoch": 0.35, "learning_rate": 1.4973290709890346e-05, "loss": 0.917, "step": 1614 }, { "epoch": 0.35, "learning_rate": 1.4967120312359902e-05, "loss": 0.9607, "step": 1615 }, { "epoch": 0.35, "learning_rate": 1.4960947403214947e-05, "loss": 0.9682, "step": 1616 }, { "epoch": 0.35, "learning_rate": 1.4954771985576799e-05, "loss": 0.9578, "step": 1617 }, { "epoch": 0.36, "learning_rate": 1.4948594062568047e-05, "loss": 0.9706, "step": 1618 }, { "epoch": 0.36, "learning_rate": 1.4942413637312548e-05, "loss": 0.9633, "step": 1619 }, { "epoch": 0.36, "learning_rate": 1.4936230712935416e-05, "loss": 0.9527, "step": 1620 }, { "epoch": 0.36, "learning_rate": 1.493004529256304e-05, "loss": 1.0312, "step": 1621 }, { "epoch": 0.36, "learning_rate": 1.4923857379323063e-05, "loss": 0.9366, "step": 1622 }, { "epoch": 0.36, "learning_rate": 1.4917666976344392e-05, "loss": 0.926, "step": 1623 }, { "epoch": 0.36, "learning_rate": 1.4911474086757189e-05, "loss": 0.9887, "step": 1624 }, { "epoch": 0.36, "learning_rate": 1.490527871369288e-05, "loss": 0.9234, "step": 1625 }, { "epoch": 0.36, "learning_rate": 1.489908086028414e-05, "loss": 1.02, "step": 1626 }, { "epoch": 0.36, "learning_rate": 1.4892880529664906e-05, "loss": 1.0311, "step": 1627 }, { "epoch": 0.36, "learning_rate": 1.4886677724970358e-05, "loss": 0.9085, "step": 1628 }, { "epoch": 0.36, "learning_rate": 1.4880472449336936e-05, "loss": 0.3275, "step": 1629 }, { "epoch": 0.36, "learning_rate": 1.4874264705902319e-05, "loss": 0.9642, "step": 1630 }, { "epoch": 0.36, "learning_rate": 1.4868054497805446e-05, "loss": 1.0071, "step": 1631 }, { "epoch": 0.36, "learning_rate": 1.4861841828186494e-05, "loss": 0.9603, "step": 1632 }, { "epoch": 0.36, "learning_rate": 1.4855626700186887e-05, "loss": 0.935, "step": 1633 }, { "epoch": 0.36, "learning_rate": 1.4849409116949294e-05, "loss": 0.9719, "step": 1634 }, { "epoch": 0.36, "learning_rate": 1.4843189081617622e-05, "loss": 0.9488, "step": 1635 }, { "epoch": 0.36, "learning_rate": 1.4836966597337017e-05, "loss": 1.0092, "step": 1636 }, { "epoch": 0.36, "learning_rate": 1.4830741667253873e-05, "loss": 0.9851, "step": 1637 }, { "epoch": 0.36, "learning_rate": 1.4824514294515805e-05, "loss": 0.9843, "step": 1638 }, { "epoch": 0.36, "learning_rate": 1.481828448227168e-05, "loss": 0.9226, "step": 1639 }, { "epoch": 0.36, "learning_rate": 1.4812052233671581e-05, "loss": 0.9914, "step": 1640 }, { "epoch": 0.36, "learning_rate": 1.4805817551866839e-05, "loss": 0.9475, "step": 1641 }, { "epoch": 0.36, "learning_rate": 1.4799580440010006e-05, "loss": 0.984, "step": 1642 }, { "epoch": 0.36, "learning_rate": 1.4793340901254867e-05, "loss": 0.9568, "step": 1643 }, { "epoch": 0.36, "learning_rate": 1.4787098938756428e-05, "loss": 0.9242, "step": 1644 }, { "epoch": 0.36, "learning_rate": 1.4780854555670932e-05, "loss": 0.9575, "step": 1645 }, { "epoch": 0.36, "learning_rate": 1.4774607755155832e-05, "loss": 0.9322, "step": 1646 }, { "epoch": 0.36, "learning_rate": 1.4768358540369816e-05, "loss": 1.0139, "step": 1647 }, { "epoch": 0.36, "learning_rate": 1.476210691447278e-05, "loss": 0.9522, "step": 1648 }, { "epoch": 0.36, "learning_rate": 1.4755852880625851e-05, "loss": 0.9605, "step": 1649 }, { "epoch": 0.36, "learning_rate": 1.474959644199137e-05, "loss": 0.954, "step": 1650 }, { "epoch": 0.36, "learning_rate": 1.474333760173289e-05, "loss": 0.9395, "step": 1651 }, { "epoch": 0.36, "learning_rate": 1.4737076363015181e-05, "loss": 0.9527, "step": 1652 }, { "epoch": 0.36, "learning_rate": 1.473081272900423e-05, "loss": 0.9743, "step": 1653 }, { "epoch": 0.36, "learning_rate": 1.4724546702867223e-05, "loss": 0.9753, "step": 1654 }, { "epoch": 0.36, "learning_rate": 1.4718278287772574e-05, "loss": 0.8909, "step": 1655 }, { "epoch": 0.36, "learning_rate": 1.4712007486889888e-05, "loss": 0.9409, "step": 1656 }, { "epoch": 0.36, "learning_rate": 1.4705734303389982e-05, "loss": 0.9852, "step": 1657 }, { "epoch": 0.36, "learning_rate": 1.4699458740444881e-05, "loss": 0.9968, "step": 1658 }, { "epoch": 0.36, "learning_rate": 1.4693180801227813e-05, "loss": 0.9319, "step": 1659 }, { "epoch": 0.36, "learning_rate": 1.46869004889132e-05, "loss": 0.9296, "step": 1660 }, { "epoch": 0.36, "learning_rate": 1.4680617806676675e-05, "loss": 0.9741, "step": 1661 }, { "epoch": 0.36, "learning_rate": 1.4674332757695063e-05, "loss": 0.9671, "step": 1662 }, { "epoch": 0.37, "learning_rate": 1.4668045345146384e-05, "loss": 0.911, "step": 1663 }, { "epoch": 0.37, "learning_rate": 1.4661755572209858e-05, "loss": 1.0028, "step": 1664 }, { "epoch": 0.37, "learning_rate": 1.4655463442065893e-05, "loss": 0.9994, "step": 1665 }, { "epoch": 0.37, "learning_rate": 1.4649168957896097e-05, "loss": 0.9238, "step": 1666 }, { "epoch": 0.37, "learning_rate": 1.4642872122883256e-05, "loss": 1.0074, "step": 1667 }, { "epoch": 0.37, "learning_rate": 1.463657294021136e-05, "loss": 0.9791, "step": 1668 }, { "epoch": 0.37, "learning_rate": 1.4630271413065573e-05, "loss": 1.0181, "step": 1669 }, { "epoch": 0.37, "learning_rate": 1.4623967544632252e-05, "loss": 0.9658, "step": 1670 }, { "epoch": 0.37, "learning_rate": 1.4617661338098934e-05, "loss": 1.0484, "step": 1671 }, { "epoch": 0.37, "learning_rate": 1.4611352796654344e-05, "loss": 0.9941, "step": 1672 }, { "epoch": 0.37, "learning_rate": 1.4605041923488376e-05, "loss": 0.951, "step": 1673 }, { "epoch": 0.37, "learning_rate": 1.4598728721792119e-05, "loss": 0.9266, "step": 1674 }, { "epoch": 0.37, "learning_rate": 1.4592413194757826e-05, "loss": 1.0149, "step": 1675 }, { "epoch": 0.37, "learning_rate": 1.458609534557893e-05, "loss": 0.9628, "step": 1676 }, { "epoch": 0.37, "learning_rate": 1.457977517745004e-05, "loss": 1.0325, "step": 1677 }, { "epoch": 0.37, "learning_rate": 1.4573452693566936e-05, "loss": 0.9875, "step": 1678 }, { "epoch": 0.37, "learning_rate": 1.4567127897126571e-05, "loss": 0.9713, "step": 1679 }, { "epoch": 0.37, "learning_rate": 1.4560800791327063e-05, "loss": 0.936, "step": 1680 }, { "epoch": 0.37, "learning_rate": 1.4554471379367705e-05, "loss": 0.9381, "step": 1681 }, { "epoch": 0.37, "learning_rate": 1.4548139664448948e-05, "loss": 1.0069, "step": 1682 }, { "epoch": 0.37, "learning_rate": 1.4541805649772412e-05, "loss": 0.9338, "step": 1683 }, { "epoch": 0.37, "learning_rate": 1.453546933854088e-05, "loss": 0.94, "step": 1684 }, { "epoch": 0.37, "learning_rate": 1.4529130733958292e-05, "loss": 0.9935, "step": 1685 }, { "epoch": 0.37, "learning_rate": 1.4522789839229752e-05, "loss": 0.9889, "step": 1686 }, { "epoch": 0.37, "learning_rate": 1.4516446657561522e-05, "loss": 0.9367, "step": 1687 }, { "epoch": 0.37, "learning_rate": 1.451010119216102e-05, "loss": 0.9892, "step": 1688 }, { "epoch": 0.37, "learning_rate": 1.4503753446236814e-05, "loss": 0.9065, "step": 1689 }, { "epoch": 0.37, "learning_rate": 1.4497403422998634e-05, "loss": 1.023, "step": 1690 }, { "epoch": 0.37, "learning_rate": 1.4491051125657357e-05, "loss": 0.9099, "step": 1691 }, { "epoch": 0.37, "learning_rate": 1.4484696557425006e-05, "loss": 1.003, "step": 1692 }, { "epoch": 0.37, "learning_rate": 1.4478339721514757e-05, "loss": 0.9553, "step": 1693 }, { "epoch": 0.37, "learning_rate": 1.4471980621140929e-05, "loss": 0.2914, "step": 1694 }, { "epoch": 0.37, "learning_rate": 1.446561925951899e-05, "loss": 0.9922, "step": 1695 }, { "epoch": 0.37, "learning_rate": 1.4459255639865552e-05, "loss": 0.9001, "step": 1696 }, { "epoch": 0.37, "learning_rate": 1.4452889765398363e-05, "loss": 0.9764, "step": 1697 }, { "epoch": 0.37, "learning_rate": 1.4446521639336322e-05, "loss": 0.9959, "step": 1698 }, { "epoch": 0.37, "learning_rate": 1.4440151264899451e-05, "loss": 0.3428, "step": 1699 }, { "epoch": 0.37, "learning_rate": 1.443377864530892e-05, "loss": 0.9576, "step": 1700 }, { "epoch": 0.37, "learning_rate": 1.4427403783787033e-05, "loss": 0.9404, "step": 1701 }, { "epoch": 0.37, "learning_rate": 1.4421026683557227e-05, "loss": 0.9057, "step": 1702 }, { "epoch": 0.37, "learning_rate": 1.4414647347844064e-05, "loss": 0.9866, "step": 1703 }, { "epoch": 0.37, "learning_rate": 1.4408265779873248e-05, "loss": 1.0105, "step": 1704 }, { "epoch": 0.37, "learning_rate": 1.4401881982871604e-05, "loss": 0.9065, "step": 1705 }, { "epoch": 0.37, "learning_rate": 1.4395495960067088e-05, "loss": 0.9804, "step": 1706 }, { "epoch": 0.37, "learning_rate": 1.4389107714688777e-05, "loss": 0.9709, "step": 1707 }, { "epoch": 0.37, "learning_rate": 1.4382717249966875e-05, "loss": 0.9595, "step": 1708 }, { "epoch": 0.38, "learning_rate": 1.4376324569132712e-05, "loss": 0.9821, "step": 1709 }, { "epoch": 0.38, "learning_rate": 1.436992967541873e-05, "loss": 0.9669, "step": 1710 }, { "epoch": 0.38, "learning_rate": 1.4363532572058497e-05, "loss": 0.9957, "step": 1711 }, { "epoch": 0.38, "learning_rate": 1.4357133262286693e-05, "loss": 0.9396, "step": 1712 }, { "epoch": 0.38, "learning_rate": 1.4350731749339116e-05, "loss": 0.9646, "step": 1713 }, { "epoch": 0.38, "learning_rate": 1.4344328036452679e-05, "loss": 0.9511, "step": 1714 }, { "epoch": 0.38, "learning_rate": 1.433792212686541e-05, "loss": 0.9627, "step": 1715 }, { "epoch": 0.38, "learning_rate": 1.4331514023816442e-05, "loss": 0.9488, "step": 1716 }, { "epoch": 0.38, "learning_rate": 1.4325103730546018e-05, "loss": 0.9388, "step": 1717 }, { "epoch": 0.38, "learning_rate": 1.4318691250295491e-05, "loss": 0.9929, "step": 1718 }, { "epoch": 0.38, "learning_rate": 1.431227658630732e-05, "loss": 0.9825, "step": 1719 }, { "epoch": 0.38, "learning_rate": 1.4305859741825068e-05, "loss": 1.0717, "step": 1720 }, { "epoch": 0.38, "learning_rate": 1.4299440720093397e-05, "loss": 0.9867, "step": 1721 }, { "epoch": 0.38, "learning_rate": 1.429301952435807e-05, "loss": 0.9943, "step": 1722 }, { "epoch": 0.38, "learning_rate": 1.4286596157865963e-05, "loss": 0.9532, "step": 1723 }, { "epoch": 0.38, "learning_rate": 1.4280170623865025e-05, "loss": 0.9276, "step": 1724 }, { "epoch": 0.38, "learning_rate": 1.4273742925604322e-05, "loss": 0.9428, "step": 1725 }, { "epoch": 0.38, "learning_rate": 1.4267313066334005e-05, "loss": 0.9259, "step": 1726 }, { "epoch": 0.38, "learning_rate": 1.4260881049305323e-05, "loss": 0.9571, "step": 1727 }, { "epoch": 0.38, "learning_rate": 1.4254446877770605e-05, "loss": 0.9649, "step": 1728 }, { "epoch": 0.38, "learning_rate": 1.4248010554983286e-05, "loss": 1.0231, "step": 1729 }, { "epoch": 0.38, "learning_rate": 1.424157208419787e-05, "loss": 0.9347, "step": 1730 }, { "epoch": 0.38, "learning_rate": 1.4235131468669967e-05, "loss": 0.9265, "step": 1731 }, { "epoch": 0.38, "learning_rate": 1.4228688711656254e-05, "loss": 0.9687, "step": 1732 }, { "epoch": 0.38, "learning_rate": 1.42222438164145e-05, "loss": 0.8897, "step": 1733 }, { "epoch": 0.38, "learning_rate": 1.4215796786203557e-05, "loss": 1.0162, "step": 1734 }, { "epoch": 0.38, "learning_rate": 1.4209347624283352e-05, "loss": 0.9407, "step": 1735 }, { "epoch": 0.38, "learning_rate": 1.4202896333914889e-05, "loss": 0.907, "step": 1736 }, { "epoch": 0.38, "learning_rate": 1.4196442918360247e-05, "loss": 0.9977, "step": 1737 }, { "epoch": 0.38, "learning_rate": 1.4189987380882589e-05, "loss": 0.9642, "step": 1738 }, { "epoch": 0.38, "learning_rate": 1.4183529724746142e-05, "loss": 1.0372, "step": 1739 }, { "epoch": 0.38, "learning_rate": 1.4177069953216207e-05, "loss": 0.9945, "step": 1740 }, { "epoch": 0.38, "learning_rate": 1.4170608069559153e-05, "loss": 0.9462, "step": 1741 }, { "epoch": 0.38, "learning_rate": 1.4164144077042421e-05, "loss": 0.3119, "step": 1742 }, { "epoch": 0.38, "learning_rate": 1.4157677978934516e-05, "loss": 0.9792, "step": 1743 }, { "epoch": 0.38, "learning_rate": 1.4151209778505006e-05, "loss": 1.0246, "step": 1744 }, { "epoch": 0.38, "learning_rate": 1.4144739479024527e-05, "loss": 0.9922, "step": 1745 }, { "epoch": 0.38, "learning_rate": 1.413826708376477e-05, "loss": 0.9709, "step": 1746 }, { "epoch": 0.38, "learning_rate": 1.4131792595998486e-05, "loss": 0.9797, "step": 1747 }, { "epoch": 0.38, "learning_rate": 1.4125316018999493e-05, "loss": 0.2837, "step": 1748 }, { "epoch": 0.38, "learning_rate": 1.4118837356042656e-05, "loss": 0.9602, "step": 1749 }, { "epoch": 0.38, "learning_rate": 1.4112356610403897e-05, "loss": 0.9817, "step": 1750 }, { "epoch": 0.38, "learning_rate": 1.4105873785360197e-05, "loss": 0.9293, "step": 1751 }, { "epoch": 0.38, "learning_rate": 1.4099388884189577e-05, "loss": 0.9786, "step": 1752 }, { "epoch": 0.38, "learning_rate": 1.409290191017112e-05, "loss": 0.9362, "step": 1753 }, { "epoch": 0.39, "learning_rate": 1.408641286658495e-05, "loss": 0.9698, "step": 1754 }, { "epoch": 0.39, "learning_rate": 1.4079921756712238e-05, "loss": 0.9266, "step": 1755 }, { "epoch": 0.39, "learning_rate": 1.4073428583835199e-05, "loss": 0.316, "step": 1756 }, { "epoch": 0.39, "learning_rate": 1.4066933351237095e-05, "loss": 0.9448, "step": 1757 }, { "epoch": 0.39, "learning_rate": 1.4060436062202229e-05, "loss": 0.9214, "step": 1758 }, { "epoch": 0.39, "learning_rate": 1.4053936720015942e-05, "loss": 1.0222, "step": 1759 }, { "epoch": 0.39, "learning_rate": 1.4047435327964609e-05, "loss": 0.9847, "step": 1760 }, { "epoch": 0.39, "learning_rate": 1.4040931889335649e-05, "loss": 0.9443, "step": 1761 }, { "epoch": 0.39, "learning_rate": 1.4034426407417517e-05, "loss": 0.9947, "step": 1762 }, { "epoch": 0.39, "learning_rate": 1.4027918885499692e-05, "loss": 0.9125, "step": 1763 }, { "epoch": 0.39, "learning_rate": 1.4021409326872688e-05, "loss": 0.9431, "step": 1764 }, { "epoch": 0.39, "learning_rate": 1.4014897734828055e-05, "loss": 0.9601, "step": 1765 }, { "epoch": 0.39, "learning_rate": 1.4008384112658363e-05, "loss": 0.9193, "step": 1766 }, { "epoch": 0.39, "learning_rate": 1.4001868463657214e-05, "loss": 0.9505, "step": 1767 }, { "epoch": 0.39, "learning_rate": 1.3995350791119232e-05, "loss": 0.985, "step": 1768 }, { "epoch": 0.39, "learning_rate": 1.3988831098340065e-05, "loss": 0.9604, "step": 1769 }, { "epoch": 0.39, "learning_rate": 1.3982309388616385e-05, "loss": 1.0054, "step": 1770 }, { "epoch": 0.39, "learning_rate": 1.397578566524588e-05, "loss": 0.9885, "step": 1771 }, { "epoch": 0.39, "learning_rate": 1.3969259931527257e-05, "loss": 0.9341, "step": 1772 }, { "epoch": 0.39, "learning_rate": 1.3962732190760243e-05, "loss": 0.9894, "step": 1773 }, { "epoch": 0.39, "learning_rate": 1.3956202446245572e-05, "loss": 0.9956, "step": 1774 }, { "epoch": 0.39, "learning_rate": 1.3949670701285e-05, "loss": 0.9684, "step": 1775 }, { "epoch": 0.39, "learning_rate": 1.394313695918129e-05, "loss": 0.9624, "step": 1776 }, { "epoch": 0.39, "learning_rate": 1.3936601223238217e-05, "loss": 0.9873, "step": 1777 }, { "epoch": 0.39, "learning_rate": 1.3930063496760562e-05, "loss": 0.947, "step": 1778 }, { "epoch": 0.39, "learning_rate": 1.3923523783054114e-05, "loss": 0.9435, "step": 1779 }, { "epoch": 0.39, "learning_rate": 1.3916982085425663e-05, "loss": 0.9982, "step": 1780 }, { "epoch": 0.39, "learning_rate": 1.3910438407183009e-05, "loss": 1.0088, "step": 1781 }, { "epoch": 0.39, "learning_rate": 1.3903892751634949e-05, "loss": 0.9616, "step": 1782 }, { "epoch": 0.39, "learning_rate": 1.3897345122091283e-05, "loss": 0.9481, "step": 1783 }, { "epoch": 0.39, "learning_rate": 1.3890795521862802e-05, "loss": 1.0083, "step": 1784 }, { "epoch": 0.39, "learning_rate": 1.38842439542613e-05, "loss": 1.0449, "step": 1785 }, { "epoch": 0.39, "learning_rate": 1.3877690422599566e-05, "loss": 0.9823, "step": 1786 }, { "epoch": 0.39, "learning_rate": 1.3871134930191379e-05, "loss": 1.0098, "step": 1787 }, { "epoch": 0.39, "learning_rate": 1.3864577480351508e-05, "loss": 0.9574, "step": 1788 }, { "epoch": 0.39, "learning_rate": 1.385801807639572e-05, "loss": 0.9923, "step": 1789 }, { "epoch": 0.39, "learning_rate": 1.3851456721640762e-05, "loss": 1.0241, "step": 1790 }, { "epoch": 0.39, "learning_rate": 1.3844893419404369e-05, "loss": 0.9777, "step": 1791 }, { "epoch": 0.39, "learning_rate": 1.3838328173005264e-05, "loss": 0.9512, "step": 1792 }, { "epoch": 0.39, "learning_rate": 1.3831760985763147e-05, "loss": 0.9192, "step": 1793 }, { "epoch": 0.39, "learning_rate": 1.382519186099871e-05, "loss": 0.9647, "step": 1794 }, { "epoch": 0.39, "learning_rate": 1.3818620802033609e-05, "loss": 0.9474, "step": 1795 }, { "epoch": 0.39, "learning_rate": 1.3812047812190492e-05, "loss": 0.9452, "step": 1796 }, { "epoch": 0.39, "learning_rate": 1.380547289479298e-05, "loss": 0.9048, "step": 1797 }, { "epoch": 0.39, "learning_rate": 1.379889605316566e-05, "loss": 0.917, "step": 1798 }, { "epoch": 0.39, "learning_rate": 1.3792317290634106e-05, "loss": 0.9118, "step": 1799 }, { "epoch": 0.4, "learning_rate": 1.3785736610524854e-05, "loss": 0.9483, "step": 1800 }, { "epoch": 0.4, "learning_rate": 1.377915401616541e-05, "loss": 1.0189, "step": 1801 }, { "epoch": 0.4, "learning_rate": 1.377256951088425e-05, "loss": 0.9455, "step": 1802 }, { "epoch": 0.4, "learning_rate": 1.376598309801082e-05, "loss": 0.9159, "step": 1803 }, { "epoch": 0.4, "learning_rate": 1.3759394780875523e-05, "loss": 0.9532, "step": 1804 }, { "epoch": 0.4, "learning_rate": 1.3752804562809731e-05, "loss": 1.0023, "step": 1805 }, { "epoch": 0.4, "learning_rate": 1.374621244714577e-05, "loss": 0.966, "step": 1806 }, { "epoch": 0.4, "learning_rate": 1.3739618437216932e-05, "loss": 0.8997, "step": 1807 }, { "epoch": 0.4, "learning_rate": 1.373302253635747e-05, "loss": 0.9645, "step": 1808 }, { "epoch": 0.4, "learning_rate": 1.3726424747902581e-05, "loss": 0.9562, "step": 1809 }, { "epoch": 0.4, "learning_rate": 1.3719825075188427e-05, "loss": 0.9201, "step": 1810 }, { "epoch": 0.4, "learning_rate": 1.3713223521552118e-05, "loss": 0.2926, "step": 1811 }, { "epoch": 0.4, "learning_rate": 1.3706620090331718e-05, "loss": 0.9051, "step": 1812 }, { "epoch": 0.4, "learning_rate": 1.370001478486624e-05, "loss": 0.979, "step": 1813 }, { "epoch": 0.4, "learning_rate": 1.3693407608495643e-05, "loss": 1.0011, "step": 1814 }, { "epoch": 0.4, "learning_rate": 1.3686798564560831e-05, "loss": 0.9372, "step": 1815 }, { "epoch": 0.4, "learning_rate": 1.3680187656403657e-05, "loss": 0.968, "step": 1816 }, { "epoch": 0.4, "learning_rate": 1.3673574887366907e-05, "loss": 0.9497, "step": 1817 }, { "epoch": 0.4, "learning_rate": 1.366696026079432e-05, "loss": 0.9617, "step": 1818 }, { "epoch": 0.4, "learning_rate": 1.3660343780030567e-05, "loss": 0.9399, "step": 1819 }, { "epoch": 0.4, "learning_rate": 1.3653725448421258e-05, "loss": 0.8923, "step": 1820 }, { "epoch": 0.4, "learning_rate": 1.3647105269312942e-05, "loss": 0.9387, "step": 1821 }, { "epoch": 0.4, "learning_rate": 1.3640483246053096e-05, "loss": 0.2955, "step": 1822 }, { "epoch": 0.4, "learning_rate": 1.3633859381990136e-05, "loss": 1.0093, "step": 1823 }, { "epoch": 0.4, "learning_rate": 1.3627233680473402e-05, "loss": 0.9721, "step": 1824 }, { "epoch": 0.4, "learning_rate": 1.362060614485317e-05, "loss": 0.9603, "step": 1825 }, { "epoch": 0.4, "learning_rate": 1.361397677848064e-05, "loss": 0.9128, "step": 1826 }, { "epoch": 0.4, "learning_rate": 1.3607345584707937e-05, "loss": 0.9373, "step": 1827 }, { "epoch": 0.4, "learning_rate": 1.3600712566888107e-05, "loss": 1.0008, "step": 1828 }, { "epoch": 0.4, "learning_rate": 1.3594077728375129e-05, "loss": 0.9975, "step": 1829 }, { "epoch": 0.4, "learning_rate": 1.3587441072523893e-05, "loss": 0.9578, "step": 1830 }, { "epoch": 0.4, "learning_rate": 1.3580802602690211e-05, "loss": 1.0205, "step": 1831 }, { "epoch": 0.4, "learning_rate": 1.357416232223081e-05, "loss": 0.9974, "step": 1832 }, { "epoch": 0.4, "learning_rate": 1.356752023450334e-05, "loss": 0.9499, "step": 1833 }, { "epoch": 0.4, "learning_rate": 1.3560876342866355e-05, "loss": 0.9448, "step": 1834 }, { "epoch": 0.4, "learning_rate": 1.3554230650679326e-05, "loss": 1.0069, "step": 1835 }, { "epoch": 0.4, "learning_rate": 1.3547583161302635e-05, "loss": 1.034, "step": 1836 }, { "epoch": 0.4, "learning_rate": 1.354093387809757e-05, "loss": 0.9787, "step": 1837 }, { "epoch": 0.4, "learning_rate": 1.353428280442633e-05, "loss": 0.9632, "step": 1838 }, { "epoch": 0.4, "learning_rate": 1.3527629943652019e-05, "loss": 0.9356, "step": 1839 }, { "epoch": 0.4, "learning_rate": 1.3520975299138637e-05, "loss": 0.9784, "step": 1840 }, { "epoch": 0.4, "learning_rate": 1.3514318874251097e-05, "loss": 0.9262, "step": 1841 }, { "epoch": 0.4, "learning_rate": 1.3507660672355206e-05, "loss": 0.98, "step": 1842 }, { "epoch": 0.4, "learning_rate": 1.3501000696817668e-05, "loss": 0.9438, "step": 1843 }, { "epoch": 0.4, "learning_rate": 1.3494338951006087e-05, "loss": 0.9916, "step": 1844 }, { "epoch": 0.41, "learning_rate": 1.3487675438288965e-05, "loss": 0.9508, "step": 1845 }, { "epoch": 0.41, "learning_rate": 1.3481010162035691e-05, "loss": 0.9758, "step": 1846 }, { "epoch": 0.41, "learning_rate": 1.3474343125616546e-05, "loss": 0.9513, "step": 1847 }, { "epoch": 0.41, "learning_rate": 1.3467674332402709e-05, "loss": 0.9277, "step": 1848 }, { "epoch": 0.41, "learning_rate": 1.3461003785766243e-05, "loss": 0.9695, "step": 1849 }, { "epoch": 0.41, "learning_rate": 1.345433148908009e-05, "loss": 0.9733, "step": 1850 }, { "epoch": 0.41, "learning_rate": 1.3447657445718087e-05, "loss": 1.009, "step": 1851 }, { "epoch": 0.41, "learning_rate": 1.3440981659054954e-05, "loss": 0.9471, "step": 1852 }, { "epoch": 0.41, "learning_rate": 1.3434304132466282e-05, "loss": 0.3009, "step": 1853 }, { "epoch": 0.41, "learning_rate": 1.3427624869328558e-05, "loss": 1.0029, "step": 1854 }, { "epoch": 0.41, "learning_rate": 1.3420943873019128e-05, "loss": 1.0009, "step": 1855 }, { "epoch": 0.41, "learning_rate": 1.3414261146916231e-05, "loss": 0.9835, "step": 1856 }, { "epoch": 0.41, "learning_rate": 1.3407576694398977e-05, "loss": 0.9365, "step": 1857 }, { "epoch": 0.41, "learning_rate": 1.3400890518847338e-05, "loss": 0.9272, "step": 1858 }, { "epoch": 0.41, "learning_rate": 1.3394202623642172e-05, "loss": 0.9443, "step": 1859 }, { "epoch": 0.41, "learning_rate": 1.3387513012165196e-05, "loss": 0.9165, "step": 1860 }, { "epoch": 0.41, "learning_rate": 1.3380821687799001e-05, "loss": 0.9972, "step": 1861 }, { "epoch": 0.41, "learning_rate": 1.3374128653927041e-05, "loss": 0.9608, "step": 1862 }, { "epoch": 0.41, "learning_rate": 1.3367433913933636e-05, "loss": 1.0296, "step": 1863 }, { "epoch": 0.41, "learning_rate": 1.3360737471203966e-05, "loss": 0.8874, "step": 1864 }, { "epoch": 0.41, "learning_rate": 1.3354039329124074e-05, "loss": 1.0045, "step": 1865 }, { "epoch": 0.41, "learning_rate": 1.3347339491080865e-05, "loss": 0.9932, "step": 1866 }, { "epoch": 0.41, "learning_rate": 1.3340637960462097e-05, "loss": 0.9561, "step": 1867 }, { "epoch": 0.41, "learning_rate": 1.3333934740656384e-05, "loss": 0.9509, "step": 1868 }, { "epoch": 0.41, "learning_rate": 1.3327229835053202e-05, "loss": 0.9777, "step": 1869 }, { "epoch": 0.41, "learning_rate": 1.3320523247042867e-05, "loss": 0.9453, "step": 1870 }, { "epoch": 0.41, "learning_rate": 1.3313814980016554e-05, "loss": 0.9177, "step": 1871 }, { "epoch": 0.41, "learning_rate": 1.3307105037366281e-05, "loss": 0.9587, "step": 1872 }, { "epoch": 0.41, "learning_rate": 1.3300393422484924e-05, "loss": 0.9666, "step": 1873 }, { "epoch": 0.41, "learning_rate": 1.3293680138766194e-05, "loss": 0.9529, "step": 1874 }, { "epoch": 0.41, "learning_rate": 1.3286965189604648e-05, "loss": 0.9177, "step": 1875 }, { "epoch": 0.41, "learning_rate": 1.328024857839569e-05, "loss": 0.9676, "step": 1876 }, { "epoch": 0.41, "learning_rate": 1.3273530308535562e-05, "loss": 0.9319, "step": 1877 }, { "epoch": 0.41, "learning_rate": 1.326681038342134e-05, "loss": 0.933, "step": 1878 }, { "epoch": 0.41, "learning_rate": 1.3260088806450944e-05, "loss": 0.9305, "step": 1879 }, { "epoch": 0.41, "learning_rate": 1.3253365581023123e-05, "loss": 0.9816, "step": 1880 }, { "epoch": 0.41, "learning_rate": 1.3246640710537464e-05, "loss": 0.9591, "step": 1881 }, { "epoch": 0.41, "learning_rate": 1.3239914198394383e-05, "loss": 0.9577, "step": 1882 }, { "epoch": 0.41, "learning_rate": 1.323318604799513e-05, "loss": 0.951, "step": 1883 }, { "epoch": 0.41, "learning_rate": 1.3226456262741781e-05, "loss": 0.9533, "step": 1884 }, { "epoch": 0.41, "learning_rate": 1.3219724846037237e-05, "loss": 0.2979, "step": 1885 }, { "epoch": 0.41, "learning_rate": 1.3212991801285222e-05, "loss": 0.9584, "step": 1886 }, { "epoch": 0.41, "learning_rate": 1.3206257131890294e-05, "loss": 0.9146, "step": 1887 }, { "epoch": 0.41, "learning_rate": 1.3199520841257822e-05, "loss": 0.9336, "step": 1888 }, { "epoch": 0.41, "learning_rate": 1.3192782932793997e-05, "loss": 0.9342, "step": 1889 }, { "epoch": 0.41, "learning_rate": 1.3186043409905831e-05, "loss": 0.9758, "step": 1890 }, { "epoch": 0.42, "learning_rate": 1.3179302276001144e-05, "loss": 0.9081, "step": 1891 }, { "epoch": 0.42, "learning_rate": 1.3172559534488587e-05, "loss": 0.9489, "step": 1892 }, { "epoch": 0.42, "learning_rate": 1.3165815188777606e-05, "loss": 0.2908, "step": 1893 }, { "epoch": 0.42, "learning_rate": 1.315906924227847e-05, "loss": 0.9635, "step": 1894 }, { "epoch": 0.42, "learning_rate": 1.3152321698402253e-05, "loss": 0.9653, "step": 1895 }, { "epoch": 0.42, "learning_rate": 1.3145572560560836e-05, "loss": 1.0276, "step": 1896 }, { "epoch": 0.42, "learning_rate": 1.3138821832166909e-05, "loss": 1.0367, "step": 1897 }, { "epoch": 0.42, "learning_rate": 1.313206951663396e-05, "loss": 0.9554, "step": 1898 }, { "epoch": 0.42, "learning_rate": 1.3125315617376286e-05, "loss": 0.9139, "step": 1899 }, { "epoch": 0.42, "learning_rate": 1.3118560137808985e-05, "loss": 0.9242, "step": 1900 }, { "epoch": 0.42, "learning_rate": 1.3111803081347952e-05, "loss": 0.944, "step": 1901 }, { "epoch": 0.42, "learning_rate": 1.3105044451409876e-05, "loss": 0.3023, "step": 1902 }, { "epoch": 0.42, "learning_rate": 1.3098284251412246e-05, "loss": 0.9828, "step": 1903 }, { "epoch": 0.42, "learning_rate": 1.3091522484773344e-05, "loss": 1.0173, "step": 1904 }, { "epoch": 0.42, "learning_rate": 1.3084759154912243e-05, "loss": 0.9374, "step": 1905 }, { "epoch": 0.42, "learning_rate": 1.3077994265248812e-05, "loss": 0.8769, "step": 1906 }, { "epoch": 0.42, "learning_rate": 1.3071227819203698e-05, "loss": 0.9534, "step": 1907 }, { "epoch": 0.42, "learning_rate": 1.3064459820198342e-05, "loss": 1.0229, "step": 1908 }, { "epoch": 0.42, "learning_rate": 1.305769027165497e-05, "loss": 0.3104, "step": 1909 }, { "epoch": 0.42, "learning_rate": 1.3050919176996592e-05, "loss": 0.9821, "step": 1910 }, { "epoch": 0.42, "learning_rate": 1.3044146539646998e-05, "loss": 0.9181, "step": 1911 }, { "epoch": 0.42, "learning_rate": 1.3037372363030759e-05, "loss": 0.966, "step": 1912 }, { "epoch": 0.42, "learning_rate": 1.3030596650573222e-05, "loss": 0.9622, "step": 1913 }, { "epoch": 0.42, "learning_rate": 1.3023819405700517e-05, "loss": 0.9002, "step": 1914 }, { "epoch": 0.42, "learning_rate": 1.3017040631839536e-05, "loss": 0.9599, "step": 1915 }, { "epoch": 0.42, "learning_rate": 1.3010260332417964e-05, "loss": 0.9981, "step": 1916 }, { "epoch": 0.42, "learning_rate": 1.3003478510864234e-05, "loss": 0.9234, "step": 1917 }, { "epoch": 0.42, "learning_rate": 1.2996695170607567e-05, "loss": 0.9258, "step": 1918 }, { "epoch": 0.42, "learning_rate": 1.2989910315077947e-05, "loss": 0.8833, "step": 1919 }, { "epoch": 0.42, "learning_rate": 1.298312394770612e-05, "loss": 1.0125, "step": 1920 }, { "epoch": 0.42, "learning_rate": 1.2976336071923599e-05, "loss": 0.9311, "step": 1921 }, { "epoch": 0.42, "learning_rate": 1.296954669116266e-05, "loss": 0.9233, "step": 1922 }, { "epoch": 0.42, "learning_rate": 1.2962755808856341e-05, "loss": 0.984, "step": 1923 }, { "epoch": 0.42, "learning_rate": 1.2955963428438438e-05, "loss": 0.8889, "step": 1924 }, { "epoch": 0.42, "learning_rate": 1.2949169553343504e-05, "loss": 1.0338, "step": 1925 }, { "epoch": 0.42, "learning_rate": 1.2942374187006847e-05, "loss": 0.9641, "step": 1926 }, { "epoch": 0.42, "learning_rate": 1.2935577332864535e-05, "loss": 0.9781, "step": 1927 }, { "epoch": 0.42, "learning_rate": 1.292877899435338e-05, "loss": 0.942, "step": 1928 }, { "epoch": 0.42, "learning_rate": 1.2921979174910949e-05, "loss": 0.965, "step": 1929 }, { "epoch": 0.42, "learning_rate": 1.2915177877975556e-05, "loss": 0.9441, "step": 1930 }, { "epoch": 0.42, "learning_rate": 1.2908375106986263e-05, "loss": 0.9755, "step": 1931 }, { "epoch": 0.42, "learning_rate": 1.2901570865382886e-05, "loss": 0.9256, "step": 1932 }, { "epoch": 0.42, "learning_rate": 1.2894765156605967e-05, "loss": 0.9071, "step": 1933 }, { "epoch": 0.42, "learning_rate": 1.2887957984096797e-05, "loss": 0.9122, "step": 1934 }, { "epoch": 0.42, "learning_rate": 1.288114935129742e-05, "loss": 0.9372, "step": 1935 }, { "epoch": 0.42, "learning_rate": 1.2874339261650596e-05, "loss": 0.9453, "step": 1936 }, { "epoch": 0.43, "learning_rate": 1.286752771859984e-05, "loss": 0.9175, "step": 1937 }, { "epoch": 0.43, "learning_rate": 1.2860714725589399e-05, "loss": 0.9688, "step": 1938 }, { "epoch": 0.43, "learning_rate": 1.285390028606424e-05, "loss": 0.945, "step": 1939 }, { "epoch": 0.43, "learning_rate": 1.2847084403470076e-05, "loss": 0.9205, "step": 1940 }, { "epoch": 0.43, "learning_rate": 1.2840267081253346e-05, "loss": 0.9778, "step": 1941 }, { "epoch": 0.43, "learning_rate": 1.283344832286121e-05, "loss": 0.9929, "step": 1942 }, { "epoch": 0.43, "learning_rate": 1.2826628131741564e-05, "loss": 0.9501, "step": 1943 }, { "epoch": 0.43, "learning_rate": 1.2819806511343023e-05, "loss": 0.9226, "step": 1944 }, { "epoch": 0.43, "learning_rate": 1.2812983465114925e-05, "loss": 0.9522, "step": 1945 }, { "epoch": 0.43, "learning_rate": 1.2806158996507331e-05, "loss": 0.9722, "step": 1946 }, { "epoch": 0.43, "learning_rate": 1.2799333108971017e-05, "loss": 0.9779, "step": 1947 }, { "epoch": 0.43, "learning_rate": 1.2792505805957484e-05, "loss": 0.9556, "step": 1948 }, { "epoch": 0.43, "learning_rate": 1.2785677090918943e-05, "loss": 0.9753, "step": 1949 }, { "epoch": 0.43, "learning_rate": 1.277884696730832e-05, "loss": 0.9667, "step": 1950 }, { "epoch": 0.43, "learning_rate": 1.277201543857925e-05, "loss": 0.9246, "step": 1951 }, { "epoch": 0.43, "learning_rate": 1.2765182508186085e-05, "loss": 0.9523, "step": 1952 }, { "epoch": 0.43, "learning_rate": 1.2758348179583884e-05, "loss": 0.9773, "step": 1953 }, { "epoch": 0.43, "learning_rate": 1.2751512456228408e-05, "loss": 0.9391, "step": 1954 }, { "epoch": 0.43, "learning_rate": 1.274467534157613e-05, "loss": 0.9539, "step": 1955 }, { "epoch": 0.43, "learning_rate": 1.273783683908422e-05, "loss": 0.9553, "step": 1956 }, { "epoch": 0.43, "learning_rate": 1.2730996952210558e-05, "loss": 0.9202, "step": 1957 }, { "epoch": 0.43, "learning_rate": 1.2724155684413715e-05, "loss": 0.9314, "step": 1958 }, { "epoch": 0.43, "learning_rate": 1.2717313039152968e-05, "loss": 0.9652, "step": 1959 }, { "epoch": 0.43, "learning_rate": 1.2710469019888283e-05, "loss": 0.9997, "step": 1960 }, { "epoch": 0.43, "learning_rate": 1.2703623630080328e-05, "loss": 0.3353, "step": 1961 }, { "epoch": 0.43, "learning_rate": 1.269677687319046e-05, "loss": 0.9407, "step": 1962 }, { "epoch": 0.43, "learning_rate": 1.2689928752680728e-05, "loss": 0.9549, "step": 1963 }, { "epoch": 0.43, "learning_rate": 1.2683079272013867e-05, "loss": 0.9473, "step": 1964 }, { "epoch": 0.43, "learning_rate": 1.2676228434653307e-05, "loss": 0.9775, "step": 1965 }, { "epoch": 0.43, "learning_rate": 1.266937624406316e-05, "loss": 0.9374, "step": 1966 }, { "epoch": 0.43, "learning_rate": 1.2662522703708225e-05, "loss": 0.9794, "step": 1967 }, { "epoch": 0.43, "learning_rate": 1.2655667817053977e-05, "loss": 0.3171, "step": 1968 }, { "epoch": 0.43, "learning_rate": 1.2648811587566576e-05, "loss": 0.9507, "step": 1969 }, { "epoch": 0.43, "learning_rate": 1.2641954018712863e-05, "loss": 0.9569, "step": 1970 }, { "epoch": 0.43, "learning_rate": 1.2635095113960351e-05, "loss": 0.9768, "step": 1971 }, { "epoch": 0.43, "learning_rate": 1.2628234876777234e-05, "loss": 0.8888, "step": 1972 }, { "epoch": 0.43, "learning_rate": 1.262137331063238e-05, "loss": 1.0051, "step": 1973 }, { "epoch": 0.43, "learning_rate": 1.2614510418995318e-05, "loss": 1.0002, "step": 1974 }, { "epoch": 0.43, "learning_rate": 1.2607646205336264e-05, "loss": 0.9222, "step": 1975 }, { "epoch": 0.43, "learning_rate": 1.260078067312609e-05, "loss": 0.9792, "step": 1976 }, { "epoch": 0.43, "learning_rate": 1.2593913825836341e-05, "loss": 1.0207, "step": 1977 }, { "epoch": 0.43, "learning_rate": 1.2587045666939219e-05, "loss": 1.0168, "step": 1978 }, { "epoch": 0.43, "learning_rate": 1.2580176199907602e-05, "loss": 0.9772, "step": 1979 }, { "epoch": 0.43, "learning_rate": 1.2573305428215017e-05, "loss": 0.9274, "step": 1980 }, { "epoch": 0.43, "learning_rate": 1.2566433355335658e-05, "loss": 0.9807, "step": 1981 }, { "epoch": 0.44, "learning_rate": 1.2559559984744376e-05, "loss": 0.9595, "step": 1982 }, { "epoch": 0.44, "learning_rate": 1.2552685319916678e-05, "loss": 0.9246, "step": 1983 }, { "epoch": 0.44, "learning_rate": 1.254580936432872e-05, "loss": 0.9267, "step": 1984 }, { "epoch": 0.44, "learning_rate": 1.2538932121457322e-05, "loss": 0.9806, "step": 1985 }, { "epoch": 0.44, "learning_rate": 1.2532053594779945e-05, "loss": 0.9961, "step": 1986 }, { "epoch": 0.44, "learning_rate": 1.2525173787774698e-05, "loss": 0.9675, "step": 1987 }, { "epoch": 0.44, "learning_rate": 1.2518292703920347e-05, "loss": 0.9411, "step": 1988 }, { "epoch": 0.44, "learning_rate": 1.2511410346696295e-05, "loss": 0.3236, "step": 1989 }, { "epoch": 0.44, "learning_rate": 1.2504526719582596e-05, "loss": 1.0055, "step": 1990 }, { "epoch": 0.44, "learning_rate": 1.2497641826059941e-05, "loss": 1.0083, "step": 1991 }, { "epoch": 0.44, "learning_rate": 1.2490755669609663e-05, "loss": 0.9927, "step": 1992 }, { "epoch": 0.44, "learning_rate": 1.2483868253713732e-05, "loss": 0.9342, "step": 1993 }, { "epoch": 0.44, "learning_rate": 1.2476979581854753e-05, "loss": 0.9236, "step": 1994 }, { "epoch": 0.44, "learning_rate": 1.247008965751598e-05, "loss": 0.9436, "step": 1995 }, { "epoch": 0.44, "learning_rate": 1.2463198484181277e-05, "loss": 0.9626, "step": 1996 }, { "epoch": 0.44, "learning_rate": 1.2456306065335159e-05, "loss": 0.9644, "step": 1997 }, { "epoch": 0.44, "learning_rate": 1.2449412404462761e-05, "loss": 0.9264, "step": 1998 }, { "epoch": 0.44, "learning_rate": 1.2442517505049854e-05, "loss": 0.9836, "step": 1999 }, { "epoch": 0.44, "learning_rate": 1.2435621370582824e-05, "loss": 0.9088, "step": 2000 }, { "epoch": 0.44, "learning_rate": 1.242872400454869e-05, "loss": 0.9878, "step": 2001 }, { "epoch": 0.44, "learning_rate": 1.2421825410435094e-05, "loss": 0.9786, "step": 2002 }, { "epoch": 0.44, "learning_rate": 1.2414925591730289e-05, "loss": 0.9539, "step": 2003 }, { "epoch": 0.44, "learning_rate": 1.2408024551923163e-05, "loss": 0.9244, "step": 2004 }, { "epoch": 0.44, "learning_rate": 1.2401122294503212e-05, "loss": 0.9585, "step": 2005 }, { "epoch": 0.44, "learning_rate": 1.2394218822960547e-05, "loss": 0.9889, "step": 2006 }, { "epoch": 0.44, "learning_rate": 1.2387314140785889e-05, "loss": 0.3077, "step": 2007 }, { "epoch": 0.44, "learning_rate": 1.2380408251470587e-05, "loss": 0.9615, "step": 2008 }, { "epoch": 0.44, "learning_rate": 1.237350115850658e-05, "loss": 0.9286, "step": 2009 }, { "epoch": 0.44, "learning_rate": 1.2366592865386433e-05, "loss": 0.9496, "step": 2010 }, { "epoch": 0.44, "learning_rate": 1.2359683375603311e-05, "loss": 0.9365, "step": 2011 }, { "epoch": 0.44, "learning_rate": 1.2352772692650979e-05, "loss": 0.3187, "step": 2012 }, { "epoch": 0.44, "learning_rate": 1.2345860820023811e-05, "loss": 0.976, "step": 2013 }, { "epoch": 0.44, "learning_rate": 1.2338947761216787e-05, "loss": 1.0312, "step": 2014 }, { "epoch": 0.44, "learning_rate": 1.2332033519725474e-05, "loss": 1.0674, "step": 2015 }, { "epoch": 0.44, "learning_rate": 1.2325118099046052e-05, "loss": 0.9761, "step": 2016 }, { "epoch": 0.44, "learning_rate": 1.2318201502675285e-05, "loss": 0.9772, "step": 2017 }, { "epoch": 0.44, "learning_rate": 1.2311283734110535e-05, "loss": 0.9049, "step": 2018 }, { "epoch": 0.44, "learning_rate": 1.2304364796849762e-05, "loss": 0.9514, "step": 2019 }, { "epoch": 0.44, "learning_rate": 1.2297444694391509e-05, "loss": 0.9903, "step": 2020 }, { "epoch": 0.44, "learning_rate": 1.2290523430234915e-05, "loss": 0.9647, "step": 2021 }, { "epoch": 0.44, "learning_rate": 1.2283601007879702e-05, "loss": 0.9872, "step": 2022 }, { "epoch": 0.44, "learning_rate": 1.2276677430826178e-05, "loss": 0.9526, "step": 2023 }, { "epoch": 0.44, "learning_rate": 1.226975270257524e-05, "loss": 0.9225, "step": 2024 }, { "epoch": 0.44, "learning_rate": 1.2262826826628357e-05, "loss": 0.9749, "step": 2025 }, { "epoch": 0.44, "learning_rate": 1.225589980648759e-05, "loss": 0.9577, "step": 2026 }, { "epoch": 0.44, "learning_rate": 1.2248971645655572e-05, "loss": 0.8349, "step": 2027 }, { "epoch": 0.45, "learning_rate": 1.2242042347635512e-05, "loss": 0.9597, "step": 2028 }, { "epoch": 0.45, "learning_rate": 1.2235111915931198e-05, "loss": 0.2788, "step": 2029 }, { "epoch": 0.45, "learning_rate": 1.2228180354046983e-05, "loss": 0.9288, "step": 2030 }, { "epoch": 0.45, "learning_rate": 1.2221247665487802e-05, "loss": 0.9703, "step": 2031 }, { "epoch": 0.45, "learning_rate": 1.221431385375916e-05, "loss": 1.0161, "step": 2032 }, { "epoch": 0.45, "learning_rate": 1.2207378922367115e-05, "loss": 1.0128, "step": 2033 }, { "epoch": 0.45, "learning_rate": 1.220044287481831e-05, "loss": 0.9589, "step": 2034 }, { "epoch": 0.45, "learning_rate": 1.2193505714619937e-05, "loss": 0.9731, "step": 2035 }, { "epoch": 0.45, "learning_rate": 1.2186567445279763e-05, "loss": 0.9222, "step": 2036 }, { "epoch": 0.45, "learning_rate": 1.2179628070306107e-05, "loss": 0.8945, "step": 2037 }, { "epoch": 0.45, "learning_rate": 1.2172687593207848e-05, "loss": 1.0203, "step": 2038 }, { "epoch": 0.45, "learning_rate": 1.216574601749443e-05, "loss": 0.9608, "step": 2039 }, { "epoch": 0.45, "learning_rate": 1.2158803346675845e-05, "loss": 0.9244, "step": 2040 }, { "epoch": 0.45, "learning_rate": 1.2151859584262638e-05, "loss": 1.0043, "step": 2041 }, { "epoch": 0.45, "learning_rate": 1.2144914733765909e-05, "loss": 0.9612, "step": 2042 }, { "epoch": 0.45, "learning_rate": 1.213796879869731e-05, "loss": 0.927, "step": 2043 }, { "epoch": 0.45, "learning_rate": 1.213102178256904e-05, "loss": 0.9511, "step": 2044 }, { "epoch": 0.45, "learning_rate": 1.2124073688893838e-05, "loss": 0.9774, "step": 2045 }, { "epoch": 0.45, "learning_rate": 1.2117124521185002e-05, "loss": 0.9605, "step": 2046 }, { "epoch": 0.45, "learning_rate": 1.2110174282956359e-05, "loss": 0.9258, "step": 2047 }, { "epoch": 0.45, "learning_rate": 1.2103222977722289e-05, "loss": 0.9304, "step": 2048 }, { "epoch": 0.45, "learning_rate": 1.20962706089977e-05, "loss": 0.9754, "step": 2049 }, { "epoch": 0.45, "learning_rate": 1.2089317180298043e-05, "loss": 0.9373, "step": 2050 }, { "epoch": 0.45, "learning_rate": 1.2082362695139309e-05, "loss": 0.9581, "step": 2051 }, { "epoch": 0.45, "learning_rate": 1.2075407157038017e-05, "loss": 0.9587, "step": 2052 }, { "epoch": 0.45, "learning_rate": 1.2068450569511223e-05, "loss": 0.9745, "step": 2053 }, { "epoch": 0.45, "learning_rate": 1.2061492936076511e-05, "loss": 0.942, "step": 2054 }, { "epoch": 0.45, "learning_rate": 1.2054534260251995e-05, "loss": 1.024, "step": 2055 }, { "epoch": 0.45, "learning_rate": 1.2047574545556313e-05, "loss": 1.0044, "step": 2056 }, { "epoch": 0.45, "learning_rate": 1.2040613795508636e-05, "loss": 0.9122, "step": 2057 }, { "epoch": 0.45, "learning_rate": 1.203365201362865e-05, "loss": 0.9506, "step": 2058 }, { "epoch": 0.45, "learning_rate": 1.2026689203436563e-05, "loss": 1.0382, "step": 2059 }, { "epoch": 0.45, "learning_rate": 1.2019725368453111e-05, "loss": 0.9399, "step": 2060 }, { "epoch": 0.45, "learning_rate": 1.2012760512199541e-05, "loss": 0.9293, "step": 2061 }, { "epoch": 0.45, "learning_rate": 1.2005794638197621e-05, "loss": 0.9679, "step": 2062 }, { "epoch": 0.45, "learning_rate": 1.199882774996963e-05, "loss": 0.9192, "step": 2063 }, { "epoch": 0.45, "learning_rate": 1.1991859851038362e-05, "loss": 0.9767, "step": 2064 }, { "epoch": 0.45, "learning_rate": 1.1984890944927119e-05, "loss": 0.9714, "step": 2065 }, { "epoch": 0.45, "learning_rate": 1.1977921035159716e-05, "loss": 0.8911, "step": 2066 }, { "epoch": 0.45, "learning_rate": 1.1970950125260475e-05, "loss": 0.9838, "step": 2067 }, { "epoch": 0.45, "learning_rate": 1.196397821875422e-05, "loss": 0.8909, "step": 2068 }, { "epoch": 0.45, "learning_rate": 1.1957005319166283e-05, "loss": 0.9235, "step": 2069 }, { "epoch": 0.45, "learning_rate": 1.1950031430022499e-05, "loss": 0.8973, "step": 2070 }, { "epoch": 0.45, "learning_rate": 1.1943056554849197e-05, "loss": 0.936, "step": 2071 }, { "epoch": 0.45, "learning_rate": 1.1936080697173213e-05, "loss": 0.94, "step": 2072 }, { "epoch": 0.46, "learning_rate": 1.1929103860521873e-05, "loss": 0.9334, "step": 2073 }, { "epoch": 0.46, "learning_rate": 1.1922126048423002e-05, "loss": 0.9783, "step": 2074 }, { "epoch": 0.46, "learning_rate": 1.1915147264404916e-05, "loss": 0.9431, "step": 2075 }, { "epoch": 0.46, "learning_rate": 1.1908167511996423e-05, "loss": 0.9617, "step": 2076 }, { "epoch": 0.46, "learning_rate": 1.190118679472682e-05, "loss": 0.9633, "step": 2077 }, { "epoch": 0.46, "learning_rate": 1.1894205116125894e-05, "loss": 0.916, "step": 2078 }, { "epoch": 0.46, "learning_rate": 1.1887222479723919e-05, "loss": 0.9783, "step": 2079 }, { "epoch": 0.46, "learning_rate": 1.1880238889051647e-05, "loss": 0.9681, "step": 2080 }, { "epoch": 0.46, "learning_rate": 1.1873254347640319e-05, "loss": 1.0116, "step": 2081 }, { "epoch": 0.46, "learning_rate": 1.1866268859021654e-05, "loss": 0.8967, "step": 2082 }, { "epoch": 0.46, "learning_rate": 1.1859282426727851e-05, "loss": 0.9693, "step": 2083 }, { "epoch": 0.46, "learning_rate": 1.185229505429159e-05, "loss": 1.0055, "step": 2084 }, { "epoch": 0.46, "learning_rate": 1.1845306745246012e-05, "loss": 0.8944, "step": 2085 }, { "epoch": 0.46, "learning_rate": 1.183831750312475e-05, "loss": 0.994, "step": 2086 }, { "epoch": 0.46, "learning_rate": 1.1831327331461895e-05, "loss": 0.9816, "step": 2087 }, { "epoch": 0.46, "learning_rate": 1.1824336233792018e-05, "loss": 0.9138, "step": 2088 }, { "epoch": 0.46, "learning_rate": 1.1817344213650147e-05, "loss": 0.2968, "step": 2089 }, { "epoch": 0.46, "learning_rate": 1.1810351274571792e-05, "loss": 0.9564, "step": 2090 }, { "epoch": 0.46, "learning_rate": 1.180335742009291e-05, "loss": 0.9616, "step": 2091 }, { "epoch": 0.46, "learning_rate": 1.1796362653749938e-05, "loss": 0.9661, "step": 2092 }, { "epoch": 0.46, "learning_rate": 1.178936697907976e-05, "loss": 1.0038, "step": 2093 }, { "epoch": 0.46, "learning_rate": 1.1782370399619727e-05, "loss": 0.9334, "step": 2094 }, { "epoch": 0.46, "learning_rate": 1.1775372918907646e-05, "loss": 0.9035, "step": 2095 }, { "epoch": 0.46, "learning_rate": 1.1768374540481776e-05, "loss": 0.8882, "step": 2096 }, { "epoch": 0.46, "learning_rate": 1.1761375267880835e-05, "loss": 0.9818, "step": 2097 }, { "epoch": 0.46, "learning_rate": 1.175437510464399e-05, "loss": 0.9831, "step": 2098 }, { "epoch": 0.46, "learning_rate": 1.1747374054310864e-05, "loss": 0.9876, "step": 2099 }, { "epoch": 0.46, "learning_rate": 1.174037212042152e-05, "loss": 0.9263, "step": 2100 }, { "epoch": 0.46, "learning_rate": 1.1733369306516477e-05, "loss": 0.9414, "step": 2101 }, { "epoch": 0.46, "learning_rate": 1.172636561613669e-05, "loss": 0.9565, "step": 2102 }, { "epoch": 0.46, "learning_rate": 1.171936105282356e-05, "loss": 0.9641, "step": 2103 }, { "epoch": 0.46, "learning_rate": 1.171235562011893e-05, "loss": 0.9643, "step": 2104 }, { "epoch": 0.46, "learning_rate": 1.1705349321565085e-05, "loss": 0.9642, "step": 2105 }, { "epoch": 0.46, "learning_rate": 1.1698342160704748e-05, "loss": 0.2812, "step": 2106 }, { "epoch": 0.46, "learning_rate": 1.1691334141081071e-05, "loss": 0.9462, "step": 2107 }, { "epoch": 0.46, "learning_rate": 1.168432526623765e-05, "loss": 0.962, "step": 2108 }, { "epoch": 0.46, "learning_rate": 1.1677315539718508e-05, "loss": 0.9774, "step": 2109 }, { "epoch": 0.46, "learning_rate": 1.1670304965068098e-05, "loss": 0.9683, "step": 2110 }, { "epoch": 0.46, "learning_rate": 1.1663293545831302e-05, "loss": 0.9543, "step": 2111 }, { "epoch": 0.46, "learning_rate": 1.1656281285553433e-05, "loss": 0.9418, "step": 2112 }, { "epoch": 0.46, "learning_rate": 1.1649268187780223e-05, "loss": 0.9423, "step": 2113 }, { "epoch": 0.46, "learning_rate": 1.1642254256057832e-05, "loss": 0.9284, "step": 2114 }, { "epoch": 0.46, "learning_rate": 1.1635239493932842e-05, "loss": 0.9645, "step": 2115 }, { "epoch": 0.46, "learning_rate": 1.1628223904952253e-05, "loss": 0.8988, "step": 2116 }, { "epoch": 0.46, "learning_rate": 1.1621207492663484e-05, "loss": 0.9656, "step": 2117 }, { "epoch": 0.46, "learning_rate": 1.161419026061437e-05, "loss": 0.9466, "step": 2118 }, { "epoch": 0.47, "learning_rate": 1.1607172212353159e-05, "loss": 0.9455, "step": 2119 }, { "epoch": 0.47, "learning_rate": 1.1600153351428516e-05, "loss": 0.9747, "step": 2120 }, { "epoch": 0.47, "learning_rate": 1.159313368138951e-05, "loss": 0.9002, "step": 2121 }, { "epoch": 0.47, "learning_rate": 1.1586113205785626e-05, "loss": 0.9525, "step": 2122 }, { "epoch": 0.47, "learning_rate": 1.1579091928166753e-05, "loss": 0.9628, "step": 2123 }, { "epoch": 0.47, "learning_rate": 1.1572069852083185e-05, "loss": 0.9667, "step": 2124 }, { "epoch": 0.47, "learning_rate": 1.1565046981085621e-05, "loss": 0.9489, "step": 2125 }, { "epoch": 0.47, "learning_rate": 1.1558023318725164e-05, "loss": 0.9262, "step": 2126 }, { "epoch": 0.47, "learning_rate": 1.1550998868553313e-05, "loss": 0.9439, "step": 2127 }, { "epoch": 0.47, "learning_rate": 1.1543973634121969e-05, "loss": 1.0117, "step": 2128 }, { "epoch": 0.47, "learning_rate": 1.1536947618983427e-05, "loss": 0.9433, "step": 2129 }, { "epoch": 0.47, "learning_rate": 1.1529920826690375e-05, "loss": 0.8622, "step": 2130 }, { "epoch": 0.47, "learning_rate": 1.15228932607959e-05, "loss": 0.9304, "step": 2131 }, { "epoch": 0.47, "learning_rate": 1.1515864924853473e-05, "loss": 0.9478, "step": 2132 }, { "epoch": 0.47, "learning_rate": 1.1508835822416962e-05, "loss": 0.9621, "step": 2133 }, { "epoch": 0.47, "learning_rate": 1.1501805957040617e-05, "loss": 1.0169, "step": 2134 }, { "epoch": 0.47, "learning_rate": 1.1494775332279076e-05, "loss": 0.9842, "step": 2135 }, { "epoch": 0.47, "learning_rate": 1.148774395168736e-05, "loss": 0.9077, "step": 2136 }, { "epoch": 0.47, "learning_rate": 1.1480711818820874e-05, "loss": 0.8906, "step": 2137 }, { "epoch": 0.47, "learning_rate": 1.1473678937235398e-05, "loss": 0.9263, "step": 2138 }, { "epoch": 0.47, "learning_rate": 1.1466645310487099e-05, "loss": 0.9419, "step": 2139 }, { "epoch": 0.47, "learning_rate": 1.1459610942132513e-05, "loss": 0.9495, "step": 2140 }, { "epoch": 0.47, "learning_rate": 1.1452575835728557e-05, "loss": 0.97, "step": 2141 }, { "epoch": 0.47, "learning_rate": 1.1445539994832518e-05, "loss": 0.9223, "step": 2142 }, { "epoch": 0.47, "learning_rate": 1.1438503423002052e-05, "loss": 0.9502, "step": 2143 }, { "epoch": 0.47, "learning_rate": 1.1431466123795193e-05, "loss": 0.8582, "step": 2144 }, { "epoch": 0.47, "learning_rate": 1.1424428100770333e-05, "loss": 0.9312, "step": 2145 }, { "epoch": 0.47, "learning_rate": 1.1417389357486237e-05, "loss": 0.9806, "step": 2146 }, { "epoch": 0.47, "learning_rate": 1.141034989750203e-05, "loss": 0.9519, "step": 2147 }, { "epoch": 0.47, "learning_rate": 1.14033097243772e-05, "loss": 0.9419, "step": 2148 }, { "epoch": 0.47, "learning_rate": 1.1396268841671598e-05, "loss": 0.9798, "step": 2149 }, { "epoch": 0.47, "learning_rate": 1.1389227252945434e-05, "loss": 0.9885, "step": 2150 }, { "epoch": 0.47, "learning_rate": 1.1382184961759268e-05, "loss": 0.9522, "step": 2151 }, { "epoch": 0.47, "learning_rate": 1.1375141971674026e-05, "loss": 0.9386, "step": 2152 }, { "epoch": 0.47, "learning_rate": 1.1368098286250981e-05, "loss": 0.965, "step": 2153 }, { "epoch": 0.47, "learning_rate": 1.1361053909051756e-05, "loss": 0.8943, "step": 2154 }, { "epoch": 0.47, "learning_rate": 1.1354008843638331e-05, "loss": 1.0267, "step": 2155 }, { "epoch": 0.47, "learning_rate": 1.1346963093573023e-05, "loss": 1.0297, "step": 2156 }, { "epoch": 0.47, "learning_rate": 1.1339916662418505e-05, "loss": 0.9173, "step": 2157 }, { "epoch": 0.47, "learning_rate": 1.133286955373779e-05, "loss": 0.9275, "step": 2158 }, { "epoch": 0.47, "learning_rate": 1.1325821771094236e-05, "loss": 0.9439, "step": 2159 }, { "epoch": 0.47, "learning_rate": 1.131877331805154e-05, "loss": 0.8775, "step": 2160 }, { "epoch": 0.47, "learning_rate": 1.1311724198173735e-05, "loss": 0.9325, "step": 2161 }, { "epoch": 0.47, "learning_rate": 1.1304674415025198e-05, "loss": 0.9541, "step": 2162 }, { "epoch": 0.47, "learning_rate": 1.129762397217064e-05, "loss": 0.8894, "step": 2163 }, { "epoch": 0.48, "learning_rate": 1.1290572873175098e-05, "loss": 1.0305, "step": 2164 }, { "epoch": 0.48, "learning_rate": 1.1283521121603948e-05, "loss": 0.9217, "step": 2165 }, { "epoch": 0.48, "learning_rate": 1.1276468721022895e-05, "loss": 0.9304, "step": 2166 }, { "epoch": 0.48, "learning_rate": 1.1269415674997972e-05, "loss": 0.9345, "step": 2167 }, { "epoch": 0.48, "learning_rate": 1.1262361987095533e-05, "loss": 0.3083, "step": 2168 }, { "epoch": 0.48, "learning_rate": 1.125530766088227e-05, "loss": 0.8998, "step": 2169 }, { "epoch": 0.48, "learning_rate": 1.124825269992518e-05, "loss": 0.9215, "step": 2170 }, { "epoch": 0.48, "learning_rate": 1.1241197107791595e-05, "loss": 0.9397, "step": 2171 }, { "epoch": 0.48, "learning_rate": 1.1234140888049162e-05, "loss": 0.8855, "step": 2172 }, { "epoch": 0.48, "learning_rate": 1.1227084044265843e-05, "loss": 0.9211, "step": 2173 }, { "epoch": 0.48, "learning_rate": 1.1220026580009916e-05, "loss": 0.9435, "step": 2174 }, { "epoch": 0.48, "learning_rate": 1.1212968498849973e-05, "loss": 0.9615, "step": 2175 }, { "epoch": 0.48, "learning_rate": 1.1205909804354918e-05, "loss": 0.9246, "step": 2176 }, { "epoch": 0.48, "learning_rate": 1.119885050009397e-05, "loss": 0.8943, "step": 2177 }, { "epoch": 0.48, "learning_rate": 1.1191790589636651e-05, "loss": 0.951, "step": 2178 }, { "epoch": 0.48, "learning_rate": 1.1184730076552788e-05, "loss": 0.3421, "step": 2179 }, { "epoch": 0.48, "learning_rate": 1.1177668964412519e-05, "loss": 0.8881, "step": 2180 }, { "epoch": 0.48, "learning_rate": 1.1170607256786276e-05, "loss": 0.9824, "step": 2181 }, { "epoch": 0.48, "learning_rate": 1.1163544957244808e-05, "loss": 0.9229, "step": 2182 }, { "epoch": 0.48, "learning_rate": 1.115648206935914e-05, "loss": 0.9188, "step": 2183 }, { "epoch": 0.48, "learning_rate": 1.1149418596700614e-05, "loss": 0.9522, "step": 2184 }, { "epoch": 0.48, "learning_rate": 1.1142354542840859e-05, "loss": 0.9393, "step": 2185 }, { "epoch": 0.48, "learning_rate": 1.1135289911351801e-05, "loss": 0.9137, "step": 2186 }, { "epoch": 0.48, "learning_rate": 1.1128224705805657e-05, "loss": 0.9481, "step": 2187 }, { "epoch": 0.48, "learning_rate": 1.1121158929774933e-05, "loss": 0.9908, "step": 2188 }, { "epoch": 0.48, "learning_rate": 1.1114092586832426e-05, "loss": 0.8895, "step": 2189 }, { "epoch": 0.48, "learning_rate": 1.1107025680551216e-05, "loss": 0.9294, "step": 2190 }, { "epoch": 0.48, "learning_rate": 1.1099958214504674e-05, "loss": 0.938, "step": 2191 }, { "epoch": 0.48, "learning_rate": 1.1092890192266446e-05, "loss": 0.8838, "step": 2192 }, { "epoch": 0.48, "learning_rate": 1.1085821617410464e-05, "loss": 1.0105, "step": 2193 }, { "epoch": 0.48, "learning_rate": 1.107875249351094e-05, "loss": 0.9919, "step": 2194 }, { "epoch": 0.48, "learning_rate": 1.1071682824142365e-05, "loss": 0.9321, "step": 2195 }, { "epoch": 0.48, "learning_rate": 1.1064612612879497e-05, "loss": 0.9041, "step": 2196 }, { "epoch": 0.48, "learning_rate": 1.105754186329738e-05, "loss": 0.9341, "step": 2197 }, { "epoch": 0.48, "learning_rate": 1.1050470578971322e-05, "loss": 0.9517, "step": 2198 }, { "epoch": 0.48, "learning_rate": 1.1043398763476904e-05, "loss": 0.9693, "step": 2199 }, { "epoch": 0.48, "learning_rate": 1.1036326420389978e-05, "loss": 0.9429, "step": 2200 }, { "epoch": 0.48, "learning_rate": 1.1029253553286657e-05, "loss": 0.9148, "step": 2201 }, { "epoch": 0.48, "learning_rate": 1.102218016574332e-05, "loss": 0.9855, "step": 2202 }, { "epoch": 0.48, "learning_rate": 1.1015106261336617e-05, "loss": 0.9506, "step": 2203 }, { "epoch": 0.48, "learning_rate": 1.100803184364345e-05, "loss": 0.9466, "step": 2204 }, { "epoch": 0.48, "learning_rate": 1.1000956916240985e-05, "loss": 0.9285, "step": 2205 }, { "epoch": 0.48, "learning_rate": 1.0993881482706646e-05, "loss": 0.9587, "step": 2206 }, { "epoch": 0.48, "learning_rate": 1.0986805546618111e-05, "loss": 0.9532, "step": 2207 }, { "epoch": 0.48, "learning_rate": 1.0979729111553316e-05, "loss": 0.9364, "step": 2208 }, { "epoch": 0.48, "learning_rate": 1.097265218109044e-05, "loss": 0.9255, "step": 2209 }, { "epoch": 0.49, "learning_rate": 1.0965574758807924e-05, "loss": 0.3322, "step": 2210 }, { "epoch": 0.49, "learning_rate": 1.095849684828445e-05, "loss": 0.9009, "step": 2211 }, { "epoch": 0.49, "learning_rate": 1.095141845309895e-05, "loss": 0.9361, "step": 2212 }, { "epoch": 0.49, "learning_rate": 1.09443395768306e-05, "loss": 1.0082, "step": 2213 }, { "epoch": 0.49, "learning_rate": 1.0937260223058824e-05, "loss": 0.9319, "step": 2214 }, { "epoch": 0.49, "learning_rate": 1.0930180395363275e-05, "loss": 1.0008, "step": 2215 }, { "epoch": 0.49, "learning_rate": 1.0923100097323859e-05, "loss": 0.992, "step": 2216 }, { "epoch": 0.49, "learning_rate": 1.0916019332520717e-05, "loss": 0.9226, "step": 2217 }, { "epoch": 0.49, "learning_rate": 1.0908938104534219e-05, "loss": 0.9324, "step": 2218 }, { "epoch": 0.49, "learning_rate": 1.0901856416944978e-05, "loss": 0.9822, "step": 2219 }, { "epoch": 0.49, "learning_rate": 1.089477427333383e-05, "loss": 0.9371, "step": 2220 }, { "epoch": 0.49, "learning_rate": 1.088769167728185e-05, "loss": 0.9487, "step": 2221 }, { "epoch": 0.49, "learning_rate": 1.088060863237034e-05, "loss": 0.976, "step": 2222 }, { "epoch": 0.49, "learning_rate": 1.0873525142180823e-05, "loss": 0.9769, "step": 2223 }, { "epoch": 0.49, "learning_rate": 1.0866441210295055e-05, "loss": 0.9541, "step": 2224 }, { "epoch": 0.49, "learning_rate": 1.0859356840295013e-05, "loss": 0.9817, "step": 2225 }, { "epoch": 0.49, "learning_rate": 1.0852272035762893e-05, "loss": 0.944, "step": 2226 }, { "epoch": 0.49, "learning_rate": 1.0845186800281112e-05, "loss": 0.9543, "step": 2227 }, { "epoch": 0.49, "learning_rate": 1.0838101137432303e-05, "loss": 0.9178, "step": 2228 }, { "epoch": 0.49, "learning_rate": 1.0831015050799325e-05, "loss": 0.9649, "step": 2229 }, { "epoch": 0.49, "learning_rate": 1.0823928543965236e-05, "loss": 0.9375, "step": 2230 }, { "epoch": 0.49, "learning_rate": 1.0816841620513318e-05, "loss": 0.9452, "step": 2231 }, { "epoch": 0.49, "learning_rate": 1.080975428402706e-05, "loss": 0.9894, "step": 2232 }, { "epoch": 0.49, "learning_rate": 1.0802666538090154e-05, "loss": 0.9173, "step": 2233 }, { "epoch": 0.49, "learning_rate": 1.0795578386286511e-05, "loss": 0.9295, "step": 2234 }, { "epoch": 0.49, "learning_rate": 1.0788489832200237e-05, "loss": 0.9905, "step": 2235 }, { "epoch": 0.49, "learning_rate": 1.0781400879415649e-05, "loss": 0.9444, "step": 2236 }, { "epoch": 0.49, "learning_rate": 1.0774311531517258e-05, "loss": 0.9699, "step": 2237 }, { "epoch": 0.49, "learning_rate": 1.0767221792089784e-05, "loss": 0.9311, "step": 2238 }, { "epoch": 0.49, "learning_rate": 1.0760131664718133e-05, "loss": 0.9433, "step": 2239 }, { "epoch": 0.49, "learning_rate": 1.075304115298742e-05, "loss": 0.3135, "step": 2240 }, { "epoch": 0.49, "learning_rate": 1.0745950260482943e-05, "loss": 0.9664, "step": 2241 }, { "epoch": 0.49, "learning_rate": 1.07388589907902e-05, "loss": 0.9161, "step": 2242 }, { "epoch": 0.49, "learning_rate": 1.0731767347494876e-05, "loss": 0.9316, "step": 2243 }, { "epoch": 0.49, "learning_rate": 1.072467533418285e-05, "loss": 0.9279, "step": 2244 }, { "epoch": 0.49, "learning_rate": 1.071758295444018e-05, "loss": 0.937, "step": 2245 }, { "epoch": 0.49, "learning_rate": 1.0710490211853118e-05, "loss": 0.9602, "step": 2246 }, { "epoch": 0.49, "learning_rate": 1.0703397110008089e-05, "loss": 0.9526, "step": 2247 }, { "epoch": 0.49, "learning_rate": 1.0696303652491713e-05, "loss": 0.9906, "step": 2248 }, { "epoch": 0.49, "learning_rate": 1.0689209842890777e-05, "loss": 0.8983, "step": 2249 }, { "epoch": 0.49, "learning_rate": 1.0682115684792256e-05, "loss": 0.9214, "step": 2250 }, { "epoch": 0.49, "learning_rate": 1.0675021181783295e-05, "loss": 0.9714, "step": 2251 }, { "epoch": 0.49, "learning_rate": 1.0667926337451217e-05, "loss": 0.9916, "step": 2252 }, { "epoch": 0.49, "learning_rate": 1.0660831155383513e-05, "loss": 0.9246, "step": 2253 }, { "epoch": 0.49, "learning_rate": 1.0653735639167848e-05, "loss": 0.8693, "step": 2254 }, { "epoch": 0.5, "learning_rate": 1.0646639792392057e-05, "loss": 0.9516, "step": 2255 }, { "epoch": 0.5, "learning_rate": 1.0639543618644141e-05, "loss": 0.8905, "step": 2256 }, { "epoch": 0.5, "learning_rate": 1.0632447121512265e-05, "loss": 0.9675, "step": 2257 }, { "epoch": 0.5, "learning_rate": 1.062535030458476e-05, "loss": 0.9297, "step": 2258 }, { "epoch": 0.5, "learning_rate": 1.0618253171450117e-05, "loss": 0.9632, "step": 2259 }, { "epoch": 0.5, "learning_rate": 1.0611155725696988e-05, "loss": 0.8856, "step": 2260 }, { "epoch": 0.5, "learning_rate": 1.0604057970914179e-05, "loss": 0.9468, "step": 2261 }, { "epoch": 0.5, "learning_rate": 1.0596959910690663e-05, "loss": 0.9748, "step": 2262 }, { "epoch": 0.5, "learning_rate": 1.058986154861555e-05, "loss": 0.3073, "step": 2263 }, { "epoch": 0.5, "learning_rate": 1.058276288827812e-05, "loss": 0.9288, "step": 2264 }, { "epoch": 0.5, "learning_rate": 1.0575663933267793e-05, "loss": 0.9357, "step": 2265 }, { "epoch": 0.5, "learning_rate": 1.0568564687174141e-05, "loss": 1.0096, "step": 2266 }, { "epoch": 0.5, "learning_rate": 1.0561465153586887e-05, "loss": 0.9242, "step": 2267 }, { "epoch": 0.5, "learning_rate": 1.0554365336095893e-05, "loss": 0.9559, "step": 2268 }, { "epoch": 0.5, "learning_rate": 1.0547265238291172e-05, "loss": 0.9922, "step": 2269 }, { "epoch": 0.5, "learning_rate": 1.0540164863762867e-05, "loss": 0.9002, "step": 2270 }, { "epoch": 0.5, "learning_rate": 1.0533064216101279e-05, "loss": 0.9802, "step": 2271 }, { "epoch": 0.5, "learning_rate": 1.0525963298896827e-05, "loss": 0.9637, "step": 2272 }, { "epoch": 0.5, "learning_rate": 1.0518862115740076e-05, "loss": 0.9069, "step": 2273 }, { "epoch": 0.5, "learning_rate": 1.0511760670221728e-05, "loss": 0.9592, "step": 2274 }, { "epoch": 0.5, "learning_rate": 1.0504658965932617e-05, "loss": 0.9447, "step": 2275 }, { "epoch": 0.5, "learning_rate": 1.0497557006463702e-05, "loss": 0.9047, "step": 2276 }, { "epoch": 0.5, "learning_rate": 1.0490454795406075e-05, "loss": 0.9845, "step": 2277 }, { "epoch": 0.5, "learning_rate": 1.0483352336350956e-05, "loss": 0.3185, "step": 2278 }, { "epoch": 0.5, "learning_rate": 1.047624963288969e-05, "loss": 0.9059, "step": 2279 }, { "epoch": 0.5, "learning_rate": 1.0469146688613744e-05, "loss": 0.9033, "step": 2280 }, { "epoch": 0.5, "learning_rate": 1.0462043507114709e-05, "loss": 0.924, "step": 2281 }, { "epoch": 0.5, "learning_rate": 1.0454940091984292e-05, "loss": 0.9766, "step": 2282 }, { "epoch": 0.5, "learning_rate": 1.0447836446814322e-05, "loss": 0.9563, "step": 2283 }, { "epoch": 0.5, "learning_rate": 1.0440732575196741e-05, "loss": 0.9683, "step": 2284 }, { "epoch": 0.5, "learning_rate": 1.043362848072361e-05, "loss": 0.9285, "step": 2285 }, { "epoch": 0.5, "learning_rate": 1.0426524166987102e-05, "loss": 0.919, "step": 2286 }, { "epoch": 0.5, "learning_rate": 1.0419419637579495e-05, "loss": 0.9054, "step": 2287 }, { "epoch": 0.5, "learning_rate": 1.0412314896093184e-05, "loss": 0.9101, "step": 2288 }, { "epoch": 0.5, "learning_rate": 1.0405209946120667e-05, "loss": 0.9407, "step": 2289 }, { "epoch": 0.5, "learning_rate": 1.0398104791254542e-05, "loss": 0.9137, "step": 2290 }, { "epoch": 0.5, "learning_rate": 1.0390999435087523e-05, "loss": 0.3336, "step": 2291 }, { "epoch": 0.5, "learning_rate": 1.0383893881212417e-05, "loss": 0.954, "step": 2292 }, { "epoch": 0.5, "learning_rate": 1.0376788133222134e-05, "loss": 0.9396, "step": 2293 }, { "epoch": 0.5, "learning_rate": 1.0369682194709679e-05, "loss": 0.9689, "step": 2294 }, { "epoch": 0.5, "learning_rate": 1.0362576069268156e-05, "loss": 0.9408, "step": 2295 }, { "epoch": 0.5, "learning_rate": 1.0355469760490766e-05, "loss": 0.9702, "step": 2296 }, { "epoch": 0.5, "learning_rate": 1.0348363271970798e-05, "loss": 0.8984, "step": 2297 }, { "epoch": 0.5, "learning_rate": 1.0341256607301635e-05, "loss": 0.9556, "step": 2298 }, { "epoch": 0.5, "learning_rate": 1.0334149770076747e-05, "loss": 0.9662, "step": 2299 }, { "epoch": 0.5, "learning_rate": 1.0327042763889692e-05, "loss": 0.9347, "step": 2300 }, { "epoch": 0.51, "learning_rate": 1.0319935592334111e-05, "loss": 0.8872, "step": 2301 }, { "epoch": 0.51, "learning_rate": 1.0312828259003738e-05, "loss": 0.947, "step": 2302 }, { "epoch": 0.51, "learning_rate": 1.0305720767492376e-05, "loss": 0.9221, "step": 2303 }, { "epoch": 0.51, "learning_rate": 1.0298613121393915e-05, "loss": 0.3113, "step": 2304 }, { "epoch": 0.51, "learning_rate": 1.0291505324302322e-05, "loss": 0.9723, "step": 2305 }, { "epoch": 0.51, "learning_rate": 1.0284397379811643e-05, "loss": 0.9467, "step": 2306 }, { "epoch": 0.51, "learning_rate": 1.0277289291515995e-05, "loss": 0.3246, "step": 2307 }, { "epoch": 0.51, "learning_rate": 1.0270181063009568e-05, "loss": 0.9222, "step": 2308 }, { "epoch": 0.51, "learning_rate": 1.0263072697886622e-05, "loss": 0.9591, "step": 2309 }, { "epoch": 0.51, "learning_rate": 1.0255964199741488e-05, "loss": 0.978, "step": 2310 }, { "epoch": 0.51, "learning_rate": 1.0248855572168568e-05, "loss": 0.9237, "step": 2311 }, { "epoch": 0.51, "learning_rate": 1.0241746818762321e-05, "loss": 0.9483, "step": 2312 }, { "epoch": 0.51, "learning_rate": 1.0234637943117278e-05, "loss": 0.3132, "step": 2313 }, { "epoch": 0.51, "learning_rate": 1.0227528948828023e-05, "loss": 0.995, "step": 2314 }, { "epoch": 0.51, "learning_rate": 1.022041983948921e-05, "loss": 0.926, "step": 2315 }, { "epoch": 0.51, "learning_rate": 1.0213310618695541e-05, "loss": 0.9712, "step": 2316 }, { "epoch": 0.51, "learning_rate": 1.0206201290041784e-05, "loss": 0.9471, "step": 2317 }, { "epoch": 0.51, "learning_rate": 1.0199091857122754e-05, "loss": 0.9524, "step": 2318 }, { "epoch": 0.51, "learning_rate": 1.019198232353332e-05, "loss": 0.8538, "step": 2319 }, { "epoch": 0.51, "learning_rate": 1.0184872692868409e-05, "loss": 0.9754, "step": 2320 }, { "epoch": 0.51, "learning_rate": 1.0177762968722985e-05, "loss": 0.9487, "step": 2321 }, { "epoch": 0.51, "learning_rate": 1.0170653154692072e-05, "loss": 0.9289, "step": 2322 }, { "epoch": 0.51, "learning_rate": 1.0163543254370729e-05, "loss": 0.9646, "step": 2323 }, { "epoch": 0.51, "learning_rate": 1.0156433271354067e-05, "loss": 0.3158, "step": 2324 }, { "epoch": 0.51, "learning_rate": 1.014932320923723e-05, "loss": 0.9812, "step": 2325 }, { "epoch": 0.51, "learning_rate": 1.014221307161541e-05, "loss": 0.9405, "step": 2326 }, { "epoch": 0.51, "learning_rate": 1.0135102862083833e-05, "loss": 0.9384, "step": 2327 }, { "epoch": 0.51, "learning_rate": 1.012799258423776e-05, "loss": 0.934, "step": 2328 }, { "epoch": 0.51, "learning_rate": 1.0120882241672494e-05, "loss": 0.9202, "step": 2329 }, { "epoch": 0.51, "learning_rate": 1.0113771837983361e-05, "loss": 0.9058, "step": 2330 }, { "epoch": 0.51, "learning_rate": 1.0106661376765724e-05, "loss": 0.9091, "step": 2331 }, { "epoch": 0.51, "learning_rate": 1.0099550861614977e-05, "loss": 0.952, "step": 2332 }, { "epoch": 0.51, "learning_rate": 1.0092440296126536e-05, "loss": 0.9271, "step": 2333 }, { "epoch": 0.51, "learning_rate": 1.0085329683895839e-05, "loss": 0.9296, "step": 2334 }, { "epoch": 0.51, "learning_rate": 1.0078219028518359e-05, "loss": 0.9399, "step": 2335 }, { "epoch": 0.51, "learning_rate": 1.007110833358958e-05, "loss": 0.9223, "step": 2336 }, { "epoch": 0.51, "learning_rate": 1.0063997602705015e-05, "loss": 0.9894, "step": 2337 }, { "epoch": 0.51, "learning_rate": 1.0056886839460192e-05, "loss": 0.9642, "step": 2338 }, { "epoch": 0.51, "learning_rate": 1.0049776047450648e-05, "loss": 0.9291, "step": 2339 }, { "epoch": 0.51, "learning_rate": 1.0042665230271947e-05, "loss": 0.9079, "step": 2340 }, { "epoch": 0.51, "learning_rate": 1.0035554391519655e-05, "loss": 0.9653, "step": 2341 }, { "epoch": 0.51, "learning_rate": 1.0028443534789359e-05, "loss": 0.955, "step": 2342 }, { "epoch": 0.51, "learning_rate": 1.0021332663676644e-05, "loss": 0.9632, "step": 2343 }, { "epoch": 0.51, "learning_rate": 1.001422178177711e-05, "loss": 0.93, "step": 2344 }, { "epoch": 0.51, "learning_rate": 1.000711089268636e-05, "loss": 0.9664, "step": 2345 }, { "epoch": 0.51, "learning_rate": 1e-05, "loss": 0.9509, "step": 2346 }, { "epoch": 0.52, "learning_rate": 9.992889107313642e-06, "loss": 0.9469, "step": 2347 }, { "epoch": 0.52, "learning_rate": 9.985778218222894e-06, "loss": 0.9228, "step": 2348 }, { "epoch": 0.52, "learning_rate": 9.978667336323359e-06, "loss": 0.9426, "step": 2349 }, { "epoch": 0.52, "learning_rate": 9.971556465210643e-06, "loss": 0.9774, "step": 2350 }, { "epoch": 0.52, "learning_rate": 9.964445608480345e-06, "loss": 0.9165, "step": 2351 }, { "epoch": 0.52, "learning_rate": 9.957334769728055e-06, "loss": 1.0039, "step": 2352 }, { "epoch": 0.52, "learning_rate": 9.950223952549352e-06, "loss": 0.9346, "step": 2353 }, { "epoch": 0.52, "learning_rate": 9.94311316053981e-06, "loss": 0.9151, "step": 2354 }, { "epoch": 0.52, "learning_rate": 9.93600239729499e-06, "loss": 0.9869, "step": 2355 }, { "epoch": 0.52, "learning_rate": 9.928891666410425e-06, "loss": 0.9641, "step": 2356 }, { "epoch": 0.52, "learning_rate": 9.921780971481648e-06, "loss": 0.9631, "step": 2357 }, { "epoch": 0.52, "learning_rate": 9.914670316104166e-06, "loss": 0.9621, "step": 2358 }, { "epoch": 0.52, "learning_rate": 9.907559703873469e-06, "loss": 0.9555, "step": 2359 }, { "epoch": 0.52, "learning_rate": 9.900449138385026e-06, "loss": 0.9859, "step": 2360 }, { "epoch": 0.52, "learning_rate": 9.893338623234277e-06, "loss": 0.9814, "step": 2361 }, { "epoch": 0.52, "learning_rate": 9.88622816201664e-06, "loss": 0.9413, "step": 2362 }, { "epoch": 0.52, "learning_rate": 9.879117758327508e-06, "loss": 0.9311, "step": 2363 }, { "epoch": 0.52, "learning_rate": 9.872007415762243e-06, "loss": 0.8555, "step": 2364 }, { "epoch": 0.52, "learning_rate": 9.86489713791617e-06, "loss": 0.9335, "step": 2365 }, { "epoch": 0.52, "learning_rate": 9.857786928384594e-06, "loss": 0.8569, "step": 2366 }, { "epoch": 0.52, "learning_rate": 9.850676790762773e-06, "loss": 0.9398, "step": 2367 }, { "epoch": 0.52, "learning_rate": 9.843566728645938e-06, "loss": 0.912, "step": 2368 }, { "epoch": 0.52, "learning_rate": 9.836456745629273e-06, "loss": 0.9606, "step": 2369 }, { "epoch": 0.52, "learning_rate": 9.829346845307929e-06, "loss": 0.9554, "step": 2370 }, { "epoch": 0.52, "learning_rate": 9.822237031277015e-06, "loss": 0.9914, "step": 2371 }, { "epoch": 0.52, "learning_rate": 9.815127307131593e-06, "loss": 0.9248, "step": 2372 }, { "epoch": 0.52, "learning_rate": 9.808017676466685e-06, "loss": 0.9872, "step": 2373 }, { "epoch": 0.52, "learning_rate": 9.800908142877253e-06, "loss": 0.3058, "step": 2374 }, { "epoch": 0.52, "learning_rate": 9.793798709958221e-06, "loss": 0.8957, "step": 2375 }, { "epoch": 0.52, "learning_rate": 9.786689381304464e-06, "loss": 0.8803, "step": 2376 }, { "epoch": 0.52, "learning_rate": 9.779580160510794e-06, "loss": 0.2937, "step": 2377 }, { "epoch": 0.52, "learning_rate": 9.77247105117198e-06, "loss": 0.9423, "step": 2378 }, { "epoch": 0.52, "learning_rate": 9.765362056882725e-06, "loss": 0.956, "step": 2379 }, { "epoch": 0.52, "learning_rate": 9.75825318123768e-06, "loss": 0.9503, "step": 2380 }, { "epoch": 0.52, "learning_rate": 9.751144427831435e-06, "loss": 0.9574, "step": 2381 }, { "epoch": 0.52, "learning_rate": 9.744035800258513e-06, "loss": 0.8932, "step": 2382 }, { "epoch": 0.52, "learning_rate": 9.736927302113381e-06, "loss": 0.9404, "step": 2383 }, { "epoch": 0.52, "learning_rate": 9.729818936990435e-06, "loss": 0.9674, "step": 2384 }, { "epoch": 0.52, "learning_rate": 9.722710708484009e-06, "loss": 0.9577, "step": 2385 }, { "epoch": 0.52, "learning_rate": 9.715602620188358e-06, "loss": 0.8726, "step": 2386 }, { "epoch": 0.52, "learning_rate": 9.708494675697678e-06, "loss": 0.9541, "step": 2387 }, { "epoch": 0.52, "learning_rate": 9.701386878606087e-06, "loss": 0.9704, "step": 2388 }, { "epoch": 0.52, "learning_rate": 9.694279232507626e-06, "loss": 0.974, "step": 2389 }, { "epoch": 0.52, "learning_rate": 9.687171740996262e-06, "loss": 0.9702, "step": 2390 }, { "epoch": 0.52, "learning_rate": 9.680064407665892e-06, "loss": 0.9235, "step": 2391 }, { "epoch": 0.53, "learning_rate": 9.672957236110313e-06, "loss": 0.9105, "step": 2392 }, { "epoch": 0.53, "learning_rate": 9.665850229923258e-06, "loss": 0.9603, "step": 2393 }, { "epoch": 0.53, "learning_rate": 9.658743392698368e-06, "loss": 0.3099, "step": 2394 }, { "epoch": 0.53, "learning_rate": 9.651636728029205e-06, "loss": 0.9794, "step": 2395 }, { "epoch": 0.53, "learning_rate": 9.644530239509238e-06, "loss": 0.9508, "step": 2396 }, { "epoch": 0.53, "learning_rate": 9.637423930731847e-06, "loss": 1.0073, "step": 2397 }, { "epoch": 0.53, "learning_rate": 9.630317805290327e-06, "loss": 0.9722, "step": 2398 }, { "epoch": 0.53, "learning_rate": 9.623211866777871e-06, "loss": 0.9428, "step": 2399 }, { "epoch": 0.53, "learning_rate": 9.616106118787586e-06, "loss": 0.9831, "step": 2400 }, { "epoch": 0.53, "learning_rate": 9.60900056491248e-06, "loss": 1.0149, "step": 2401 }, { "epoch": 0.53, "learning_rate": 9.60189520874546e-06, "loss": 1.0012, "step": 2402 }, { "epoch": 0.53, "learning_rate": 9.594790053879336e-06, "loss": 0.3238, "step": 2403 }, { "epoch": 0.53, "learning_rate": 9.587685103906818e-06, "loss": 0.917, "step": 2404 }, { "epoch": 0.53, "learning_rate": 9.580580362420505e-06, "loss": 0.9576, "step": 2405 }, { "epoch": 0.53, "learning_rate": 9.5734758330129e-06, "loss": 1.0104, "step": 2406 }, { "epoch": 0.53, "learning_rate": 9.566371519276389e-06, "loss": 0.9856, "step": 2407 }, { "epoch": 0.53, "learning_rate": 9.559267424803259e-06, "loss": 0.9201, "step": 2408 }, { "epoch": 0.53, "learning_rate": 9.552163553185685e-06, "loss": 0.8367, "step": 2409 }, { "epoch": 0.53, "learning_rate": 9.545059908015713e-06, "loss": 0.9271, "step": 2410 }, { "epoch": 0.53, "learning_rate": 9.537956492885296e-06, "loss": 0.9318, "step": 2411 }, { "epoch": 0.53, "learning_rate": 9.530853311386259e-06, "loss": 0.9045, "step": 2412 }, { "epoch": 0.53, "learning_rate": 9.523750367110312e-06, "loss": 1.0136, "step": 2413 }, { "epoch": 0.53, "learning_rate": 9.516647663649047e-06, "loss": 0.951, "step": 2414 }, { "epoch": 0.53, "learning_rate": 9.509545204593928e-06, "loss": 0.9673, "step": 2415 }, { "epoch": 0.53, "learning_rate": 9.502442993536302e-06, "loss": 0.9603, "step": 2416 }, { "epoch": 0.53, "learning_rate": 9.495341034067386e-06, "loss": 0.9088, "step": 2417 }, { "epoch": 0.53, "learning_rate": 9.488239329778273e-06, "loss": 0.8893, "step": 2418 }, { "epoch": 0.53, "learning_rate": 9.481137884259928e-06, "loss": 0.9421, "step": 2419 }, { "epoch": 0.53, "learning_rate": 9.474036701103178e-06, "loss": 0.9456, "step": 2420 }, { "epoch": 0.53, "learning_rate": 9.466935783898725e-06, "loss": 0.9784, "step": 2421 }, { "epoch": 0.53, "learning_rate": 9.459835136237133e-06, "loss": 0.9183, "step": 2422 }, { "epoch": 0.53, "learning_rate": 9.45273476170883e-06, "loss": 0.9595, "step": 2423 }, { "epoch": 0.53, "learning_rate": 9.445634663904107e-06, "loss": 0.9677, "step": 2424 }, { "epoch": 0.53, "learning_rate": 9.438534846413115e-06, "loss": 0.9039, "step": 2425 }, { "epoch": 0.53, "learning_rate": 9.431435312825859e-06, "loss": 0.9008, "step": 2426 }, { "epoch": 0.53, "learning_rate": 9.42433606673221e-06, "loss": 0.3433, "step": 2427 }, { "epoch": 0.53, "learning_rate": 9.417237111721884e-06, "loss": 0.956, "step": 2428 }, { "epoch": 0.53, "learning_rate": 9.410138451384453e-06, "loss": 0.9625, "step": 2429 }, { "epoch": 0.53, "learning_rate": 9.40304008930934e-06, "loss": 0.9691, "step": 2430 }, { "epoch": 0.53, "learning_rate": 9.395942029085823e-06, "loss": 0.3293, "step": 2431 }, { "epoch": 0.53, "learning_rate": 9.388844274303015e-06, "loss": 1.0197, "step": 2432 }, { "epoch": 0.53, "learning_rate": 9.381746828549885e-06, "loss": 0.9558, "step": 2433 }, { "epoch": 0.53, "learning_rate": 9.374649695415241e-06, "loss": 0.9354, "step": 2434 }, { "epoch": 0.53, "learning_rate": 9.367552878487736e-06, "loss": 0.9477, "step": 2435 }, { "epoch": 0.53, "learning_rate": 9.360456381355862e-06, "loss": 0.9574, "step": 2436 }, { "epoch": 0.53, "learning_rate": 9.353360207607946e-06, "loss": 0.9094, "step": 2437 }, { "epoch": 0.54, "learning_rate": 9.346264360832155e-06, "loss": 0.9221, "step": 2438 }, { "epoch": 0.54, "learning_rate": 9.33916884461649e-06, "loss": 0.9489, "step": 2439 }, { "epoch": 0.54, "learning_rate": 9.332073662548785e-06, "loss": 0.9565, "step": 2440 }, { "epoch": 0.54, "learning_rate": 9.324978818216706e-06, "loss": 0.9095, "step": 2441 }, { "epoch": 0.54, "learning_rate": 9.317884315207745e-06, "loss": 0.9249, "step": 2442 }, { "epoch": 0.54, "learning_rate": 9.310790157109223e-06, "loss": 0.9021, "step": 2443 }, { "epoch": 0.54, "learning_rate": 9.303696347508292e-06, "loss": 0.9434, "step": 2444 }, { "epoch": 0.54, "learning_rate": 9.296602889991914e-06, "loss": 0.9248, "step": 2445 }, { "epoch": 0.54, "learning_rate": 9.289509788146886e-06, "loss": 0.9154, "step": 2446 }, { "epoch": 0.54, "learning_rate": 9.282417045559823e-06, "loss": 0.9079, "step": 2447 }, { "epoch": 0.54, "learning_rate": 9.275324665817153e-06, "loss": 0.9228, "step": 2448 }, { "epoch": 0.54, "learning_rate": 9.268232652505125e-06, "loss": 0.9883, "step": 2449 }, { "epoch": 0.54, "learning_rate": 9.261141009209803e-06, "loss": 0.9401, "step": 2450 }, { "epoch": 0.54, "learning_rate": 9.254049739517059e-06, "loss": 0.9125, "step": 2451 }, { "epoch": 0.54, "learning_rate": 9.246958847012583e-06, "loss": 0.96, "step": 2452 }, { "epoch": 0.54, "learning_rate": 9.23986833528187e-06, "loss": 0.3225, "step": 2453 }, { "epoch": 0.54, "learning_rate": 9.23277820791022e-06, "loss": 0.9565, "step": 2454 }, { "epoch": 0.54, "learning_rate": 9.225688468482743e-06, "loss": 0.9187, "step": 2455 }, { "epoch": 0.54, "learning_rate": 9.218599120584353e-06, "loss": 0.921, "step": 2456 }, { "epoch": 0.54, "learning_rate": 9.211510167799765e-06, "loss": 0.9503, "step": 2457 }, { "epoch": 0.54, "learning_rate": 9.204421613713492e-06, "loss": 0.9603, "step": 2458 }, { "epoch": 0.54, "learning_rate": 9.197333461909849e-06, "loss": 0.9227, "step": 2459 }, { "epoch": 0.54, "learning_rate": 9.190245715972946e-06, "loss": 0.9334, "step": 2460 }, { "epoch": 0.54, "learning_rate": 9.183158379486683e-06, "loss": 0.9048, "step": 2461 }, { "epoch": 0.54, "learning_rate": 9.176071456034767e-06, "loss": 0.8759, "step": 2462 }, { "epoch": 0.54, "learning_rate": 9.168984949200678e-06, "loss": 0.9639, "step": 2463 }, { "epoch": 0.54, "learning_rate": 9.161898862567698e-06, "loss": 0.9384, "step": 2464 }, { "epoch": 0.54, "learning_rate": 9.154813199718893e-06, "loss": 0.9621, "step": 2465 }, { "epoch": 0.54, "learning_rate": 9.147727964237112e-06, "loss": 1.0333, "step": 2466 }, { "epoch": 0.54, "learning_rate": 9.14064315970499e-06, "loss": 1.0284, "step": 2467 }, { "epoch": 0.54, "learning_rate": 9.133558789704948e-06, "loss": 0.9662, "step": 2468 }, { "epoch": 0.54, "learning_rate": 9.12647485781918e-06, "loss": 0.9465, "step": 2469 }, { "epoch": 0.54, "learning_rate": 9.119391367629665e-06, "loss": 1.0023, "step": 2470 }, { "epoch": 0.54, "learning_rate": 9.112308322718151e-06, "loss": 0.9028, "step": 2471 }, { "epoch": 0.54, "learning_rate": 9.105225726666173e-06, "loss": 0.9676, "step": 2472 }, { "epoch": 0.54, "learning_rate": 9.098143583055025e-06, "loss": 0.9722, "step": 2473 }, { "epoch": 0.54, "learning_rate": 9.091061895465783e-06, "loss": 0.8917, "step": 2474 }, { "epoch": 0.54, "learning_rate": 9.083980667479286e-06, "loss": 0.9345, "step": 2475 }, { "epoch": 0.54, "learning_rate": 9.076899902676143e-06, "loss": 0.8884, "step": 2476 }, { "epoch": 0.54, "learning_rate": 9.069819604636727e-06, "loss": 0.9043, "step": 2477 }, { "epoch": 0.54, "learning_rate": 9.062739776941181e-06, "loss": 0.8834, "step": 2478 }, { "epoch": 0.54, "learning_rate": 9.055660423169403e-06, "loss": 0.9319, "step": 2479 }, { "epoch": 0.54, "learning_rate": 9.048581546901056e-06, "loss": 0.9048, "step": 2480 }, { "epoch": 0.54, "learning_rate": 9.041503151715555e-06, "loss": 0.9598, "step": 2481 }, { "epoch": 0.54, "learning_rate": 9.034425241192082e-06, "loss": 0.9576, "step": 2482 }, { "epoch": 0.55, "learning_rate": 9.027347818909564e-06, "loss": 0.9113, "step": 2483 }, { "epoch": 0.55, "learning_rate": 9.020270888446689e-06, "loss": 0.9873, "step": 2484 }, { "epoch": 0.55, "learning_rate": 9.013194453381892e-06, "loss": 0.9441, "step": 2485 }, { "epoch": 0.55, "learning_rate": 9.006118517293356e-06, "loss": 0.9557, "step": 2486 }, { "epoch": 0.55, "learning_rate": 8.999043083759016e-06, "loss": 0.9345, "step": 2487 }, { "epoch": 0.55, "learning_rate": 8.991968156356553e-06, "loss": 0.943, "step": 2488 }, { "epoch": 0.55, "learning_rate": 8.984893738663386e-06, "loss": 0.9222, "step": 2489 }, { "epoch": 0.55, "learning_rate": 8.977819834256683e-06, "loss": 0.9199, "step": 2490 }, { "epoch": 0.55, "learning_rate": 8.970746446713346e-06, "loss": 0.9247, "step": 2491 }, { "epoch": 0.55, "learning_rate": 8.963673579610024e-06, "loss": 0.9553, "step": 2492 }, { "epoch": 0.55, "learning_rate": 8.956601236523096e-06, "loss": 0.8598, "step": 2493 }, { "epoch": 0.55, "learning_rate": 8.949529421028677e-06, "loss": 0.985, "step": 2494 }, { "epoch": 0.55, "learning_rate": 8.94245813670262e-06, "loss": 0.9453, "step": 2495 }, { "epoch": 0.55, "learning_rate": 8.935387387120505e-06, "loss": 0.9148, "step": 2496 }, { "epoch": 0.55, "learning_rate": 8.928317175857639e-06, "loss": 0.3217, "step": 2497 }, { "epoch": 0.55, "learning_rate": 8.921247506489064e-06, "loss": 0.9626, "step": 2498 }, { "epoch": 0.55, "learning_rate": 8.91417838258954e-06, "loss": 0.9339, "step": 2499 }, { "epoch": 0.55, "learning_rate": 8.907109807733559e-06, "loss": 0.9833, "step": 2500 }, { "epoch": 0.55, "learning_rate": 8.900041785495331e-06, "loss": 0.959, "step": 2501 }, { "epoch": 0.55, "learning_rate": 8.892974319448788e-06, "loss": 0.9646, "step": 2502 }, { "epoch": 0.55, "learning_rate": 8.885907413167577e-06, "loss": 0.9059, "step": 2503 }, { "epoch": 0.55, "learning_rate": 8.87884107022507e-06, "loss": 0.9017, "step": 2504 }, { "epoch": 0.55, "learning_rate": 8.871775294194346e-06, "loss": 1.004, "step": 2505 }, { "epoch": 0.55, "learning_rate": 8.864710088648202e-06, "loss": 0.9948, "step": 2506 }, { "epoch": 0.55, "learning_rate": 8.857645457159145e-06, "loss": 0.9207, "step": 2507 }, { "epoch": 0.55, "learning_rate": 8.85058140329939e-06, "loss": 0.9701, "step": 2508 }, { "epoch": 0.55, "learning_rate": 8.843517930640863e-06, "loss": 0.9435, "step": 2509 }, { "epoch": 0.55, "learning_rate": 8.836455042755197e-06, "loss": 0.9377, "step": 2510 }, { "epoch": 0.55, "learning_rate": 8.829392743213724e-06, "loss": 0.9456, "step": 2511 }, { "epoch": 0.55, "learning_rate": 8.822331035587483e-06, "loss": 0.9279, "step": 2512 }, { "epoch": 0.55, "learning_rate": 8.815269923447213e-06, "loss": 0.9112, "step": 2513 }, { "epoch": 0.55, "learning_rate": 8.80820941036335e-06, "loss": 0.9197, "step": 2514 }, { "epoch": 0.55, "learning_rate": 8.801149499906032e-06, "loss": 0.9096, "step": 2515 }, { "epoch": 0.55, "learning_rate": 8.794090195645087e-06, "loss": 0.9009, "step": 2516 }, { "epoch": 0.55, "learning_rate": 8.787031501150034e-06, "loss": 0.968, "step": 2517 }, { "epoch": 0.55, "learning_rate": 8.779973419990091e-06, "loss": 0.9795, "step": 2518 }, { "epoch": 0.55, "learning_rate": 8.772915955734162e-06, "loss": 0.9975, "step": 2519 }, { "epoch": 0.55, "learning_rate": 8.765859111950842e-06, "loss": 0.97, "step": 2520 }, { "epoch": 0.55, "learning_rate": 8.758802892208406e-06, "loss": 0.8983, "step": 2521 }, { "epoch": 0.55, "learning_rate": 8.751747300074821e-06, "loss": 0.9441, "step": 2522 }, { "epoch": 0.55, "learning_rate": 8.744692339117734e-06, "loss": 0.9129, "step": 2523 }, { "epoch": 0.55, "learning_rate": 8.737638012904469e-06, "loss": 0.947, "step": 2524 }, { "epoch": 0.55, "learning_rate": 8.730584325002031e-06, "loss": 0.9223, "step": 2525 }, { "epoch": 0.55, "learning_rate": 8.723531278977108e-06, "loss": 0.9422, "step": 2526 }, { "epoch": 0.55, "learning_rate": 8.716478878396053e-06, "loss": 0.9457, "step": 2527 }, { "epoch": 0.55, "learning_rate": 8.709427126824904e-06, "loss": 0.8893, "step": 2528 }, { "epoch": 0.56, "learning_rate": 8.702376027829362e-06, "loss": 0.9232, "step": 2529 }, { "epoch": 0.56, "learning_rate": 8.695325584974802e-06, "loss": 0.8934, "step": 2530 }, { "epoch": 0.56, "learning_rate": 8.688275801826265e-06, "loss": 0.9721, "step": 2531 }, { "epoch": 0.56, "learning_rate": 8.68122668194846e-06, "loss": 0.9011, "step": 2532 }, { "epoch": 0.56, "learning_rate": 8.674178228905765e-06, "loss": 0.9509, "step": 2533 }, { "epoch": 0.56, "learning_rate": 8.667130446262214e-06, "loss": 0.8957, "step": 2534 }, { "epoch": 0.56, "learning_rate": 8.6600833375815e-06, "loss": 0.3325, "step": 2535 }, { "epoch": 0.56, "learning_rate": 8.65303690642698e-06, "loss": 0.9516, "step": 2536 }, { "epoch": 0.56, "learning_rate": 8.645991156361674e-06, "loss": 0.9663, "step": 2537 }, { "epoch": 0.56, "learning_rate": 8.638946090948247e-06, "loss": 0.9296, "step": 2538 }, { "epoch": 0.56, "learning_rate": 8.631901713749022e-06, "loss": 0.9243, "step": 2539 }, { "epoch": 0.56, "learning_rate": 8.624858028325976e-06, "loss": 0.96, "step": 2540 }, { "epoch": 0.56, "learning_rate": 8.617815038240734e-06, "loss": 0.9761, "step": 2541 }, { "epoch": 0.56, "learning_rate": 8.61077274705457e-06, "loss": 0.9764, "step": 2542 }, { "epoch": 0.56, "learning_rate": 8.603731158328405e-06, "loss": 0.8966, "step": 2543 }, { "epoch": 0.56, "learning_rate": 8.596690275622803e-06, "loss": 0.9594, "step": 2544 }, { "epoch": 0.56, "learning_rate": 8.589650102497973e-06, "loss": 0.9021, "step": 2545 }, { "epoch": 0.56, "learning_rate": 8.582610642513765e-06, "loss": 0.9697, "step": 2546 }, { "epoch": 0.56, "learning_rate": 8.575571899229667e-06, "loss": 0.9436, "step": 2547 }, { "epoch": 0.56, "learning_rate": 8.568533876204807e-06, "loss": 0.9452, "step": 2548 }, { "epoch": 0.56, "learning_rate": 8.561496576997948e-06, "loss": 0.9331, "step": 2549 }, { "epoch": 0.56, "learning_rate": 8.554460005167483e-06, "loss": 0.9129, "step": 2550 }, { "epoch": 0.56, "learning_rate": 8.547424164271448e-06, "loss": 0.9539, "step": 2551 }, { "epoch": 0.56, "learning_rate": 8.540389057867492e-06, "loss": 0.3138, "step": 2552 }, { "epoch": 0.56, "learning_rate": 8.533354689512906e-06, "loss": 0.9746, "step": 2553 }, { "epoch": 0.56, "learning_rate": 8.526321062764607e-06, "loss": 0.9283, "step": 2554 }, { "epoch": 0.56, "learning_rate": 8.51928818117913e-06, "loss": 0.8994, "step": 2555 }, { "epoch": 0.56, "learning_rate": 8.512256048312645e-06, "loss": 0.2895, "step": 2556 }, { "epoch": 0.56, "learning_rate": 8.505224667720928e-06, "loss": 0.9892, "step": 2557 }, { "epoch": 0.56, "learning_rate": 8.498194042959386e-06, "loss": 0.9413, "step": 2558 }, { "epoch": 0.56, "learning_rate": 8.49116417758304e-06, "loss": 0.9311, "step": 2559 }, { "epoch": 0.56, "learning_rate": 8.484135075146528e-06, "loss": 0.9101, "step": 2560 }, { "epoch": 0.56, "learning_rate": 8.477106739204103e-06, "loss": 0.9191, "step": 2561 }, { "epoch": 0.56, "learning_rate": 8.470079173309627e-06, "loss": 0.9636, "step": 2562 }, { "epoch": 0.56, "learning_rate": 8.463052381016576e-06, "loss": 0.9856, "step": 2563 }, { "epoch": 0.56, "learning_rate": 8.456026365878032e-06, "loss": 0.9003, "step": 2564 }, { "epoch": 0.56, "learning_rate": 8.449001131446687e-06, "loss": 0.9025, "step": 2565 }, { "epoch": 0.56, "learning_rate": 8.441976681274836e-06, "loss": 0.9258, "step": 2566 }, { "epoch": 0.56, "learning_rate": 8.434953018914379e-06, "loss": 0.9788, "step": 2567 }, { "epoch": 0.56, "learning_rate": 8.427930147916816e-06, "loss": 0.8697, "step": 2568 }, { "epoch": 0.56, "learning_rate": 8.420908071833252e-06, "loss": 0.8856, "step": 2569 }, { "epoch": 0.56, "learning_rate": 8.413886794214379e-06, "loss": 0.9047, "step": 2570 }, { "epoch": 0.56, "learning_rate": 8.406866318610496e-06, "loss": 0.923, "step": 2571 }, { "epoch": 0.56, "learning_rate": 8.39984664857149e-06, "loss": 0.9028, "step": 2572 }, { "epoch": 0.56, "learning_rate": 8.392827787646845e-06, "loss": 0.9487, "step": 2573 }, { "epoch": 0.57, "learning_rate": 8.385809739385632e-06, "loss": 0.2968, "step": 2574 }, { "epoch": 0.57, "learning_rate": 8.37879250733652e-06, "loss": 0.928, "step": 2575 }, { "epoch": 0.57, "learning_rate": 8.37177609504775e-06, "loss": 0.9306, "step": 2576 }, { "epoch": 0.57, "learning_rate": 8.364760506067162e-06, "loss": 0.8708, "step": 2577 }, { "epoch": 0.57, "learning_rate": 8.357745743942171e-06, "loss": 0.9692, "step": 2578 }, { "epoch": 0.57, "learning_rate": 8.35073181221978e-06, "loss": 0.9495, "step": 2579 }, { "epoch": 0.57, "learning_rate": 8.343718714446572e-06, "loss": 0.9422, "step": 2580 }, { "epoch": 0.57, "learning_rate": 8.336706454168701e-06, "loss": 0.9555, "step": 2581 }, { "epoch": 0.57, "learning_rate": 8.329695034931904e-06, "loss": 0.9964, "step": 2582 }, { "epoch": 0.57, "learning_rate": 8.322684460281494e-06, "loss": 0.9634, "step": 2583 }, { "epoch": 0.57, "learning_rate": 8.31567473376235e-06, "loss": 0.9847, "step": 2584 }, { "epoch": 0.57, "learning_rate": 8.308665858918928e-06, "loss": 0.2914, "step": 2585 }, { "epoch": 0.57, "learning_rate": 8.301657839295253e-06, "loss": 0.9534, "step": 2586 }, { "epoch": 0.57, "learning_rate": 8.294650678434919e-06, "loss": 0.9178, "step": 2587 }, { "epoch": 0.57, "learning_rate": 8.287644379881075e-06, "loss": 0.9264, "step": 2588 }, { "epoch": 0.57, "learning_rate": 8.280638947176447e-06, "loss": 0.9225, "step": 2589 }, { "epoch": 0.57, "learning_rate": 8.273634383863315e-06, "loss": 0.9527, "step": 2590 }, { "epoch": 0.57, "learning_rate": 8.266630693483526e-06, "loss": 0.954, "step": 2591 }, { "epoch": 0.57, "learning_rate": 8.25962787957848e-06, "loss": 0.9311, "step": 2592 }, { "epoch": 0.57, "learning_rate": 8.252625945689138e-06, "loss": 0.9467, "step": 2593 }, { "epoch": 0.57, "learning_rate": 8.245624895356011e-06, "loss": 0.9252, "step": 2594 }, { "epoch": 0.57, "learning_rate": 8.238624732119169e-06, "loss": 0.9614, "step": 2595 }, { "epoch": 0.57, "learning_rate": 8.231625459518228e-06, "loss": 0.9629, "step": 2596 }, { "epoch": 0.57, "learning_rate": 8.224627081092357e-06, "loss": 0.8928, "step": 2597 }, { "epoch": 0.57, "learning_rate": 8.217629600380275e-06, "loss": 0.916, "step": 2598 }, { "epoch": 0.57, "learning_rate": 8.210633020920241e-06, "loss": 0.882, "step": 2599 }, { "epoch": 0.57, "learning_rate": 8.203637346250062e-06, "loss": 1.0196, "step": 2600 }, { "epoch": 0.57, "learning_rate": 8.19664257990709e-06, "loss": 0.9623, "step": 2601 }, { "epoch": 0.57, "learning_rate": 8.18964872542821e-06, "loss": 0.911, "step": 2602 }, { "epoch": 0.57, "learning_rate": 8.182655786349852e-06, "loss": 0.8786, "step": 2603 }, { "epoch": 0.57, "learning_rate": 8.175663766207983e-06, "loss": 0.93, "step": 2604 }, { "epoch": 0.57, "learning_rate": 8.16867266853811e-06, "loss": 0.9064, "step": 2605 }, { "epoch": 0.57, "learning_rate": 8.161682496875255e-06, "loss": 0.9201, "step": 2606 }, { "epoch": 0.57, "learning_rate": 8.154693254753991e-06, "loss": 0.9539, "step": 2607 }, { "epoch": 0.57, "learning_rate": 8.147704945708414e-06, "loss": 0.9036, "step": 2608 }, { "epoch": 0.57, "learning_rate": 8.14071757327215e-06, "loss": 0.9348, "step": 2609 }, { "epoch": 0.57, "learning_rate": 8.133731140978347e-06, "loss": 0.8909, "step": 2610 }, { "epoch": 0.57, "learning_rate": 8.126745652359683e-06, "loss": 0.9376, "step": 2611 }, { "epoch": 0.57, "learning_rate": 8.119761110948356e-06, "loss": 0.9405, "step": 2612 }, { "epoch": 0.57, "learning_rate": 8.112777520276085e-06, "loss": 0.9545, "step": 2613 }, { "epoch": 0.57, "learning_rate": 8.105794883874108e-06, "loss": 0.9401, "step": 2614 }, { "epoch": 0.57, "learning_rate": 8.098813205273183e-06, "loss": 0.9333, "step": 2615 }, { "epoch": 0.57, "learning_rate": 8.09183248800358e-06, "loss": 0.9447, "step": 2616 }, { "epoch": 0.57, "learning_rate": 8.084852735595085e-06, "loss": 0.9666, "step": 2617 }, { "epoch": 0.57, "learning_rate": 8.077873951577e-06, "loss": 0.9101, "step": 2618 }, { "epoch": 0.57, "learning_rate": 8.070896139478127e-06, "loss": 0.3242, "step": 2619 }, { "epoch": 0.58, "learning_rate": 8.063919302826787e-06, "loss": 0.9742, "step": 2620 }, { "epoch": 0.58, "learning_rate": 8.056943445150801e-06, "loss": 0.8685, "step": 2621 }, { "epoch": 0.58, "learning_rate": 8.049968569977503e-06, "loss": 0.9087, "step": 2622 }, { "epoch": 0.58, "learning_rate": 8.04299468083372e-06, "loss": 0.8764, "step": 2623 }, { "epoch": 0.58, "learning_rate": 8.036021781245783e-06, "loss": 0.9356, "step": 2624 }, { "epoch": 0.58, "learning_rate": 8.02904987473953e-06, "loss": 0.9422, "step": 2625 }, { "epoch": 0.58, "learning_rate": 8.022078964840287e-06, "loss": 0.9381, "step": 2626 }, { "epoch": 0.58, "learning_rate": 8.015109055072885e-06, "loss": 0.9491, "step": 2627 }, { "epoch": 0.58, "learning_rate": 8.008140148961642e-06, "loss": 0.879, "step": 2628 }, { "epoch": 0.58, "learning_rate": 8.001172250030373e-06, "loss": 0.9156, "step": 2629 }, { "epoch": 0.58, "learning_rate": 7.99420536180238e-06, "loss": 0.963, "step": 2630 }, { "epoch": 0.58, "learning_rate": 7.98723948780046e-06, "loss": 0.9577, "step": 2631 }, { "epoch": 0.58, "learning_rate": 7.98027463154689e-06, "loss": 0.9101, "step": 2632 }, { "epoch": 0.58, "learning_rate": 7.97331079656344e-06, "loss": 0.9582, "step": 2633 }, { "epoch": 0.58, "learning_rate": 7.966347986371355e-06, "loss": 0.9562, "step": 2634 }, { "epoch": 0.58, "learning_rate": 7.959386204491365e-06, "loss": 0.9472, "step": 2635 }, { "epoch": 0.58, "learning_rate": 7.952425454443688e-06, "loss": 0.2932, "step": 2636 }, { "epoch": 0.58, "learning_rate": 7.945465739748006e-06, "loss": 0.2907, "step": 2637 }, { "epoch": 0.58, "learning_rate": 7.938507063923489e-06, "loss": 0.9276, "step": 2638 }, { "epoch": 0.58, "learning_rate": 7.931549430488777e-06, "loss": 0.9327, "step": 2639 }, { "epoch": 0.58, "learning_rate": 7.924592842961985e-06, "loss": 0.9178, "step": 2640 }, { "epoch": 0.58, "learning_rate": 7.917637304860695e-06, "loss": 0.9721, "step": 2641 }, { "epoch": 0.58, "learning_rate": 7.91068281970196e-06, "loss": 0.9347, "step": 2642 }, { "epoch": 0.58, "learning_rate": 7.903729391002305e-06, "loss": 0.9309, "step": 2643 }, { "epoch": 0.58, "learning_rate": 7.896777022277714e-06, "loss": 0.2924, "step": 2644 }, { "epoch": 0.58, "learning_rate": 7.889825717043643e-06, "loss": 0.9289, "step": 2645 }, { "epoch": 0.58, "learning_rate": 7.882875478815001e-06, "loss": 0.9446, "step": 2646 }, { "epoch": 0.58, "learning_rate": 7.875926311106163e-06, "loss": 0.9198, "step": 2647 }, { "epoch": 0.58, "learning_rate": 7.868978217430964e-06, "loss": 0.9548, "step": 2648 }, { "epoch": 0.58, "learning_rate": 7.862031201302693e-06, "loss": 0.9801, "step": 2649 }, { "epoch": 0.58, "learning_rate": 7.855085266234093e-06, "loss": 0.9498, "step": 2650 }, { "epoch": 0.58, "learning_rate": 7.848140415737367e-06, "loss": 1.002, "step": 2651 }, { "epoch": 0.58, "learning_rate": 7.841196653324159e-06, "loss": 0.9327, "step": 2652 }, { "epoch": 0.58, "learning_rate": 7.834253982505571e-06, "loss": 0.9212, "step": 2653 }, { "epoch": 0.58, "learning_rate": 7.827312406792152e-06, "loss": 0.9109, "step": 2654 }, { "epoch": 0.58, "learning_rate": 7.820371929693894e-06, "loss": 0.9422, "step": 2655 }, { "epoch": 0.58, "learning_rate": 7.813432554720239e-06, "loss": 0.8252, "step": 2656 }, { "epoch": 0.58, "learning_rate": 7.806494285380063e-06, "loss": 0.913, "step": 2657 }, { "epoch": 0.58, "learning_rate": 7.799557125181695e-06, "loss": 0.905, "step": 2658 }, { "epoch": 0.58, "learning_rate": 7.792621077632887e-06, "loss": 1.0319, "step": 2659 }, { "epoch": 0.58, "learning_rate": 7.785686146240844e-06, "loss": 1.0166, "step": 2660 }, { "epoch": 0.58, "learning_rate": 7.7787523345122e-06, "loss": 0.9192, "step": 2661 }, { "epoch": 0.58, "learning_rate": 7.771819645953019e-06, "loss": 0.9143, "step": 2662 }, { "epoch": 0.58, "learning_rate": 7.764888084068807e-06, "loss": 0.9202, "step": 2663 }, { "epoch": 0.58, "learning_rate": 7.75795765236449e-06, "loss": 0.8955, "step": 2664 }, { "epoch": 0.59, "learning_rate": 7.751028354344432e-06, "loss": 0.9524, "step": 2665 }, { "epoch": 0.59, "learning_rate": 7.744100193512411e-06, "loss": 0.9198, "step": 2666 }, { "epoch": 0.59, "learning_rate": 7.737173173371644e-06, "loss": 0.8857, "step": 2667 }, { "epoch": 0.59, "learning_rate": 7.730247297424765e-06, "loss": 0.9374, "step": 2668 }, { "epoch": 0.59, "learning_rate": 7.723322569173824e-06, "loss": 0.9327, "step": 2669 }, { "epoch": 0.59, "learning_rate": 7.716398992120302e-06, "loss": 0.9374, "step": 2670 }, { "epoch": 0.59, "learning_rate": 7.709476569765088e-06, "loss": 0.9492, "step": 2671 }, { "epoch": 0.59, "learning_rate": 7.702555305608493e-06, "loss": 0.9126, "step": 2672 }, { "epoch": 0.59, "learning_rate": 7.695635203150241e-06, "loss": 0.97, "step": 2673 }, { "epoch": 0.59, "learning_rate": 7.688716265889467e-06, "loss": 0.9203, "step": 2674 }, { "epoch": 0.59, "learning_rate": 7.681798497324717e-06, "loss": 0.9521, "step": 2675 }, { "epoch": 0.59, "learning_rate": 7.674881900953951e-06, "loss": 0.9603, "step": 2676 }, { "epoch": 0.59, "learning_rate": 7.667966480274527e-06, "loss": 0.9649, "step": 2677 }, { "epoch": 0.59, "learning_rate": 7.661052238783217e-06, "loss": 0.9331, "step": 2678 }, { "epoch": 0.59, "learning_rate": 7.65413917997619e-06, "loss": 0.8745, "step": 2679 }, { "epoch": 0.59, "learning_rate": 7.647227307349024e-06, "loss": 0.9573, "step": 2680 }, { "epoch": 0.59, "learning_rate": 7.640316624396692e-06, "loss": 0.9315, "step": 2681 }, { "epoch": 0.59, "learning_rate": 7.633407134613569e-06, "loss": 0.8994, "step": 2682 }, { "epoch": 0.59, "learning_rate": 7.626498841493423e-06, "loss": 0.912, "step": 2683 }, { "epoch": 0.59, "learning_rate": 7.619591748529418e-06, "loss": 0.9194, "step": 2684 }, { "epoch": 0.59, "learning_rate": 7.612685859214113e-06, "loss": 0.9212, "step": 2685 }, { "epoch": 0.59, "learning_rate": 7.605781177039457e-06, "loss": 0.3134, "step": 2686 }, { "epoch": 0.59, "learning_rate": 7.5988777054967926e-06, "loss": 0.9789, "step": 2687 }, { "epoch": 0.59, "learning_rate": 7.591975448076838e-06, "loss": 0.3029, "step": 2688 }, { "epoch": 0.59, "learning_rate": 7.585074408269712e-06, "loss": 0.8633, "step": 2689 }, { "epoch": 0.59, "learning_rate": 7.578174589564911e-06, "loss": 0.9702, "step": 2690 }, { "epoch": 0.59, "learning_rate": 7.571275995451313e-06, "loss": 0.9299, "step": 2691 }, { "epoch": 0.59, "learning_rate": 7.564378629417179e-06, "loss": 0.894, "step": 2692 }, { "epoch": 0.59, "learning_rate": 7.55748249495015e-06, "loss": 0.9601, "step": 2693 }, { "epoch": 0.59, "learning_rate": 7.550587595537242e-06, "loss": 0.9232, "step": 2694 }, { "epoch": 0.59, "learning_rate": 7.543693934664846e-06, "loss": 0.9321, "step": 2695 }, { "epoch": 0.59, "learning_rate": 7.536801515818728e-06, "loss": 0.9265, "step": 2696 }, { "epoch": 0.59, "learning_rate": 7.529910342484026e-06, "loss": 0.9025, "step": 2697 }, { "epoch": 0.59, "learning_rate": 7.523020418145248e-06, "loss": 0.9399, "step": 2698 }, { "epoch": 0.59, "learning_rate": 7.516131746286272e-06, "loss": 0.9133, "step": 2699 }, { "epoch": 0.59, "learning_rate": 7.5092443303903404e-06, "loss": 0.9535, "step": 2700 }, { "epoch": 0.59, "learning_rate": 7.502358173940061e-06, "loss": 0.9235, "step": 2701 }, { "epoch": 0.59, "learning_rate": 7.495473280417404e-06, "loss": 0.9214, "step": 2702 }, { "epoch": 0.59, "learning_rate": 7.4885896533037064e-06, "loss": 0.2849, "step": 2703 }, { "epoch": 0.59, "learning_rate": 7.481707296079657e-06, "loss": 0.9173, "step": 2704 }, { "epoch": 0.59, "learning_rate": 7.474826212225305e-06, "loss": 0.9093, "step": 2705 }, { "epoch": 0.59, "learning_rate": 7.467946405220059e-06, "loss": 0.9025, "step": 2706 }, { "epoch": 0.59, "learning_rate": 7.461067878542679e-06, "loss": 0.3472, "step": 2707 }, { "epoch": 0.59, "learning_rate": 7.454190635671279e-06, "loss": 0.9459, "step": 2708 }, { "epoch": 0.59, "learning_rate": 7.447314680083324e-06, "loss": 0.9146, "step": 2709 }, { "epoch": 0.59, "learning_rate": 7.440440015255625e-06, "loss": 0.8897, "step": 2710 }, { "epoch": 0.6, "learning_rate": 7.433566644664343e-06, "loss": 0.947, "step": 2711 }, { "epoch": 0.6, "learning_rate": 7.426694571784987e-06, "loss": 0.9554, "step": 2712 }, { "epoch": 0.6, "learning_rate": 7.4198238000924024e-06, "loss": 0.9391, "step": 2713 }, { "epoch": 0.6, "learning_rate": 7.412954333060784e-06, "loss": 0.93, "step": 2714 }, { "epoch": 0.6, "learning_rate": 7.406086174163665e-06, "loss": 0.3213, "step": 2715 }, { "epoch": 0.6, "learning_rate": 7.399219326873915e-06, "loss": 0.9293, "step": 2716 }, { "epoch": 0.6, "learning_rate": 7.392353794663739e-06, "loss": 0.94, "step": 2717 }, { "epoch": 0.6, "learning_rate": 7.385489581004683e-06, "loss": 0.9188, "step": 2718 }, { "epoch": 0.6, "learning_rate": 7.378626689367625e-06, "loss": 0.908, "step": 2719 }, { "epoch": 0.6, "learning_rate": 7.371765123222767e-06, "loss": 0.3203, "step": 2720 }, { "epoch": 0.6, "learning_rate": 7.364904886039651e-06, "loss": 0.8931, "step": 2721 }, { "epoch": 0.6, "learning_rate": 7.358045981287141e-06, "loss": 0.92, "step": 2722 }, { "epoch": 0.6, "learning_rate": 7.351188412433425e-06, "loss": 0.9542, "step": 2723 }, { "epoch": 0.6, "learning_rate": 7.344332182946026e-06, "loss": 0.9627, "step": 2724 }, { "epoch": 0.6, "learning_rate": 7.337477296291778e-06, "loss": 0.8836, "step": 2725 }, { "epoch": 0.6, "learning_rate": 7.330623755936838e-06, "loss": 0.3044, "step": 2726 }, { "epoch": 0.6, "learning_rate": 7.323771565346693e-06, "loss": 0.9406, "step": 2727 }, { "epoch": 0.6, "learning_rate": 7.316920727986134e-06, "loss": 0.9364, "step": 2728 }, { "epoch": 0.6, "learning_rate": 7.310071247319275e-06, "loss": 0.9821, "step": 2729 }, { "epoch": 0.6, "learning_rate": 7.303223126809546e-06, "loss": 0.9273, "step": 2730 }, { "epoch": 0.6, "learning_rate": 7.2963763699196765e-06, "loss": 0.9465, "step": 2731 }, { "epoch": 0.6, "learning_rate": 7.289530980111721e-06, "loss": 0.9475, "step": 2732 }, { "epoch": 0.6, "learning_rate": 7.282686960847036e-06, "loss": 0.9408, "step": 2733 }, { "epoch": 0.6, "learning_rate": 7.275844315586289e-06, "loss": 0.9728, "step": 2734 }, { "epoch": 0.6, "learning_rate": 7.269003047789446e-06, "loss": 0.9302, "step": 2735 }, { "epoch": 0.6, "learning_rate": 7.262163160915783e-06, "loss": 0.9367, "step": 2736 }, { "epoch": 0.6, "learning_rate": 7.255324658423875e-06, "loss": 0.3389, "step": 2737 }, { "epoch": 0.6, "learning_rate": 7.248487543771595e-06, "loss": 0.9021, "step": 2738 }, { "epoch": 0.6, "learning_rate": 7.24165182041612e-06, "loss": 0.9623, "step": 2739 }, { "epoch": 0.6, "learning_rate": 7.234817491813917e-06, "loss": 0.9604, "step": 2740 }, { "epoch": 0.6, "learning_rate": 7.227984561420752e-06, "loss": 0.8874, "step": 2741 }, { "epoch": 0.6, "learning_rate": 7.221153032691683e-06, "loss": 0.9635, "step": 2742 }, { "epoch": 0.6, "learning_rate": 7.214322909081057e-06, "loss": 0.9621, "step": 2743 }, { "epoch": 0.6, "learning_rate": 7.207494194042515e-06, "loss": 0.8952, "step": 2744 }, { "epoch": 0.6, "learning_rate": 7.200666891028983e-06, "loss": 0.908, "step": 2745 }, { "epoch": 0.6, "learning_rate": 7.19384100349267e-06, "loss": 0.9261, "step": 2746 }, { "epoch": 0.6, "learning_rate": 7.187016534885081e-06, "loss": 0.9557, "step": 2747 }, { "epoch": 0.6, "learning_rate": 7.180193488656982e-06, "loss": 0.939, "step": 2748 }, { "epoch": 0.6, "learning_rate": 7.1733718682584406e-06, "loss": 0.9394, "step": 2749 }, { "epoch": 0.6, "learning_rate": 7.166551677138794e-06, "loss": 0.9964, "step": 2750 }, { "epoch": 0.6, "learning_rate": 7.159732918746659e-06, "loss": 0.9668, "step": 2751 }, { "epoch": 0.6, "learning_rate": 7.152915596529928e-06, "loss": 0.8549, "step": 2752 }, { "epoch": 0.6, "learning_rate": 7.146099713935763e-06, "loss": 0.9179, "step": 2753 }, { "epoch": 0.6, "learning_rate": 7.139285274410605e-06, "loss": 0.8881, "step": 2754 }, { "epoch": 0.6, "learning_rate": 7.13247228140016e-06, "loss": 0.9417, "step": 2755 }, { "epoch": 0.6, "learning_rate": 7.125660738349406e-06, "loss": 0.9048, "step": 2756 }, { "epoch": 0.61, "learning_rate": 7.1188506487025845e-06, "loss": 0.9478, "step": 2757 }, { "epoch": 0.61, "learning_rate": 7.112042015903204e-06, "loss": 0.8901, "step": 2758 }, { "epoch": 0.61, "learning_rate": 7.105234843394038e-06, "loss": 0.912, "step": 2759 }, { "epoch": 0.61, "learning_rate": 7.098429134617117e-06, "loss": 0.8517, "step": 2760 }, { "epoch": 0.61, "learning_rate": 7.091624893013735e-06, "loss": 0.9345, "step": 2761 }, { "epoch": 0.61, "learning_rate": 7.084822122024446e-06, "loss": 0.9613, "step": 2762 }, { "epoch": 0.61, "learning_rate": 7.078020825089054e-06, "loss": 0.9441, "step": 2763 }, { "epoch": 0.61, "learning_rate": 7.0712210056466224e-06, "loss": 0.966, "step": 2764 }, { "epoch": 0.61, "learning_rate": 7.06442266713547e-06, "loss": 0.9348, "step": 2765 }, { "epoch": 0.61, "learning_rate": 7.057625812993156e-06, "loss": 0.926, "step": 2766 }, { "epoch": 0.61, "learning_rate": 7.0508304466565005e-06, "loss": 0.9977, "step": 2767 }, { "epoch": 0.61, "learning_rate": 7.0440365715615655e-06, "loss": 0.9083, "step": 2768 }, { "epoch": 0.61, "learning_rate": 7.037244191143662e-06, "loss": 0.963, "step": 2769 }, { "epoch": 0.61, "learning_rate": 7.030453308837344e-06, "loss": 0.9302, "step": 2770 }, { "epoch": 0.61, "learning_rate": 7.0236639280764055e-06, "loss": 0.8872, "step": 2771 }, { "epoch": 0.61, "learning_rate": 7.016876052293884e-06, "loss": 0.9758, "step": 2772 }, { "epoch": 0.61, "learning_rate": 7.010089684922056e-06, "loss": 0.9254, "step": 2773 }, { "epoch": 0.61, "learning_rate": 7.003304829392435e-06, "loss": 0.902, "step": 2774 }, { "epoch": 0.61, "learning_rate": 6.996521489135768e-06, "loss": 0.9203, "step": 2775 }, { "epoch": 0.61, "learning_rate": 6.9897396675820406e-06, "loss": 0.3194, "step": 2776 }, { "epoch": 0.61, "learning_rate": 6.982959368160464e-06, "loss": 0.8981, "step": 2777 }, { "epoch": 0.61, "learning_rate": 6.976180594299486e-06, "loss": 1.0115, "step": 2778 }, { "epoch": 0.61, "learning_rate": 6.9694033494267774e-06, "loss": 0.9146, "step": 2779 }, { "epoch": 0.61, "learning_rate": 6.962627636969241e-06, "loss": 0.9581, "step": 2780 }, { "epoch": 0.61, "learning_rate": 6.955853460353002e-06, "loss": 0.9747, "step": 2781 }, { "epoch": 0.61, "learning_rate": 6.949080823003408e-06, "loss": 0.9158, "step": 2782 }, { "epoch": 0.61, "learning_rate": 6.942309728345034e-06, "loss": 0.9336, "step": 2783 }, { "epoch": 0.61, "learning_rate": 6.935540179801663e-06, "loss": 0.9797, "step": 2784 }, { "epoch": 0.61, "learning_rate": 6.928772180796308e-06, "loss": 0.8842, "step": 2785 }, { "epoch": 0.61, "learning_rate": 6.9220057347511935e-06, "loss": 0.9138, "step": 2786 }, { "epoch": 0.61, "learning_rate": 6.9152408450877595e-06, "loss": 0.9309, "step": 2787 }, { "epoch": 0.61, "learning_rate": 6.908477515226659e-06, "loss": 0.9374, "step": 2788 }, { "epoch": 0.61, "learning_rate": 6.901715748587758e-06, "loss": 0.9158, "step": 2789 }, { "epoch": 0.61, "learning_rate": 6.894955548590128e-06, "loss": 0.9241, "step": 2790 }, { "epoch": 0.61, "learning_rate": 6.888196918652052e-06, "loss": 0.9349, "step": 2791 }, { "epoch": 0.61, "learning_rate": 6.881439862191018e-06, "loss": 0.965, "step": 2792 }, { "epoch": 0.61, "learning_rate": 6.874684382623716e-06, "loss": 0.9576, "step": 2793 }, { "epoch": 0.61, "learning_rate": 6.867930483366043e-06, "loss": 0.9114, "step": 2794 }, { "epoch": 0.61, "learning_rate": 6.861178167833096e-06, "loss": 0.9129, "step": 2795 }, { "epoch": 0.61, "learning_rate": 6.854427439439166e-06, "loss": 0.9321, "step": 2796 }, { "epoch": 0.61, "learning_rate": 6.847678301597749e-06, "loss": 0.8809, "step": 2797 }, { "epoch": 0.61, "learning_rate": 6.84093075772153e-06, "loss": 0.9259, "step": 2798 }, { "epoch": 0.61, "learning_rate": 6.834184811222394e-06, "loss": 0.9528, "step": 2799 }, { "epoch": 0.61, "learning_rate": 6.827440465511414e-06, "loss": 0.9265, "step": 2800 }, { "epoch": 0.61, "learning_rate": 6.8206977239988594e-06, "loss": 0.9217, "step": 2801 }, { "epoch": 0.62, "learning_rate": 6.8139565900941754e-06, "loss": 0.2975, "step": 2802 }, { "epoch": 0.62, "learning_rate": 6.807217067206007e-06, "loss": 0.9056, "step": 2803 }, { "epoch": 0.62, "learning_rate": 6.800479158742182e-06, "loss": 0.9321, "step": 2804 }, { "epoch": 0.62, "learning_rate": 6.793742868109709e-06, "loss": 0.9109, "step": 2805 }, { "epoch": 0.62, "learning_rate": 6.78700819871478e-06, "loss": 0.8815, "step": 2806 }, { "epoch": 0.62, "learning_rate": 6.780275153962767e-06, "loss": 0.8954, "step": 2807 }, { "epoch": 0.62, "learning_rate": 6.773543737258222e-06, "loss": 0.9171, "step": 2808 }, { "epoch": 0.62, "learning_rate": 6.766813952004873e-06, "loss": 0.9153, "step": 2809 }, { "epoch": 0.62, "learning_rate": 6.76008580160562e-06, "loss": 0.9907, "step": 2810 }, { "epoch": 0.62, "learning_rate": 6.75335928946254e-06, "loss": 0.8861, "step": 2811 }, { "epoch": 0.62, "learning_rate": 6.746634418976881e-06, "loss": 0.8919, "step": 2812 }, { "epoch": 0.62, "learning_rate": 6.739911193549058e-06, "loss": 0.9271, "step": 2813 }, { "epoch": 0.62, "learning_rate": 6.733189616578661e-06, "loss": 0.8832, "step": 2814 }, { "epoch": 0.62, "learning_rate": 6.726469691464439e-06, "loss": 0.993, "step": 2815 }, { "epoch": 0.62, "learning_rate": 6.719751421604309e-06, "loss": 0.9281, "step": 2816 }, { "epoch": 0.62, "learning_rate": 6.7130348103953516e-06, "loss": 0.9611, "step": 2817 }, { "epoch": 0.62, "learning_rate": 6.706319861233808e-06, "loss": 0.9045, "step": 2818 }, { "epoch": 0.62, "learning_rate": 6.6996065775150805e-06, "loss": 0.9275, "step": 2819 }, { "epoch": 0.62, "learning_rate": 6.692894962633722e-06, "loss": 0.9426, "step": 2820 }, { "epoch": 0.62, "learning_rate": 6.686185019983452e-06, "loss": 0.9256, "step": 2821 }, { "epoch": 0.62, "learning_rate": 6.6794767529571385e-06, "loss": 0.8766, "step": 2822 }, { "epoch": 0.62, "learning_rate": 6.672770164946802e-06, "loss": 0.9385, "step": 2823 }, { "epoch": 0.62, "learning_rate": 6.6660652593436174e-06, "loss": 0.9441, "step": 2824 }, { "epoch": 0.62, "learning_rate": 6.659362039537907e-06, "loss": 0.3076, "step": 2825 }, { "epoch": 0.62, "learning_rate": 6.652660508919138e-06, "loss": 0.3071, "step": 2826 }, { "epoch": 0.62, "learning_rate": 6.645960670875928e-06, "loss": 0.8976, "step": 2827 }, { "epoch": 0.62, "learning_rate": 6.639262528796038e-06, "loss": 1.0019, "step": 2828 }, { "epoch": 0.62, "learning_rate": 6.632566086066367e-06, "loss": 0.9358, "step": 2829 }, { "epoch": 0.62, "learning_rate": 6.6258713460729604e-06, "loss": 0.8814, "step": 2830 }, { "epoch": 0.62, "learning_rate": 6.619178312201e-06, "loss": 0.9318, "step": 2831 }, { "epoch": 0.62, "learning_rate": 6.612486987834805e-06, "loss": 0.9071, "step": 2832 }, { "epoch": 0.62, "learning_rate": 6.60579737635783e-06, "loss": 0.9088, "step": 2833 }, { "epoch": 0.62, "learning_rate": 6.599109481152662e-06, "loss": 0.9359, "step": 2834 }, { "epoch": 0.62, "learning_rate": 6.592423305601025e-06, "loss": 0.9085, "step": 2835 }, { "epoch": 0.62, "learning_rate": 6.585738853083772e-06, "loss": 0.9466, "step": 2836 }, { "epoch": 0.62, "learning_rate": 6.5790561269808766e-06, "loss": 0.936, "step": 2837 }, { "epoch": 0.62, "learning_rate": 6.572375130671449e-06, "loss": 0.9698, "step": 2838 }, { "epoch": 0.62, "learning_rate": 6.565695867533721e-06, "loss": 0.9349, "step": 2839 }, { "epoch": 0.62, "learning_rate": 6.559018340945051e-06, "loss": 0.903, "step": 2840 }, { "epoch": 0.62, "learning_rate": 6.552342554281916e-06, "loss": 0.9282, "step": 2841 }, { "epoch": 0.62, "learning_rate": 6.5456685109199135e-06, "loss": 0.8773, "step": 2842 }, { "epoch": 0.62, "learning_rate": 6.538996214233761e-06, "loss": 0.9676, "step": 2843 }, { "epoch": 0.62, "learning_rate": 6.532325667597292e-06, "loss": 0.3053, "step": 2844 }, { "epoch": 0.62, "learning_rate": 6.525656874383456e-06, "loss": 0.9217, "step": 2845 }, { "epoch": 0.62, "learning_rate": 6.518989837964313e-06, "loss": 0.8713, "step": 2846 }, { "epoch": 0.62, "learning_rate": 6.512324561711038e-06, "loss": 0.9047, "step": 2847 }, { "epoch": 0.63, "learning_rate": 6.505661048993914e-06, "loss": 0.9281, "step": 2848 }, { "epoch": 0.63, "learning_rate": 6.498999303182334e-06, "loss": 0.9241, "step": 2849 }, { "epoch": 0.63, "learning_rate": 6.492339327644797e-06, "loss": 0.9846, "step": 2850 }, { "epoch": 0.63, "learning_rate": 6.485681125748904e-06, "loss": 0.9203, "step": 2851 }, { "epoch": 0.63, "learning_rate": 6.479024700861363e-06, "loss": 0.3331, "step": 2852 }, { "epoch": 0.63, "learning_rate": 6.472370056347983e-06, "loss": 0.9461, "step": 2853 }, { "epoch": 0.63, "learning_rate": 6.4657171955736704e-06, "loss": 0.9613, "step": 2854 }, { "epoch": 0.63, "learning_rate": 6.459066121902433e-06, "loss": 0.9698, "step": 2855 }, { "epoch": 0.63, "learning_rate": 6.4524168386973696e-06, "loss": 0.9555, "step": 2856 }, { "epoch": 0.63, "learning_rate": 6.445769349320679e-06, "loss": 0.9171, "step": 2857 }, { "epoch": 0.63, "learning_rate": 6.439123657133649e-06, "loss": 0.9335, "step": 2858 }, { "epoch": 0.63, "learning_rate": 6.4324797654966646e-06, "loss": 0.9847, "step": 2859 }, { "epoch": 0.63, "learning_rate": 6.425837677769191e-06, "loss": 0.9006, "step": 2860 }, { "epoch": 0.63, "learning_rate": 6.419197397309792e-06, "loss": 0.9826, "step": 2861 }, { "epoch": 0.63, "learning_rate": 6.412558927476109e-06, "loss": 0.9305, "step": 2862 }, { "epoch": 0.63, "learning_rate": 6.405922271624874e-06, "loss": 0.9667, "step": 2863 }, { "epoch": 0.63, "learning_rate": 6.399287433111895e-06, "loss": 0.9506, "step": 2864 }, { "epoch": 0.63, "learning_rate": 6.392654415292068e-06, "loss": 0.8737, "step": 2865 }, { "epoch": 0.63, "learning_rate": 6.386023221519363e-06, "loss": 0.9553, "step": 2866 }, { "epoch": 0.63, "learning_rate": 6.379393855146831e-06, "loss": 0.8585, "step": 2867 }, { "epoch": 0.63, "learning_rate": 6.3727663195266e-06, "loss": 0.9297, "step": 2868 }, { "epoch": 0.63, "learning_rate": 6.366140618009866e-06, "loss": 0.977, "step": 2869 }, { "epoch": 0.63, "learning_rate": 6.359516753946905e-06, "loss": 0.9784, "step": 2870 }, { "epoch": 0.63, "learning_rate": 6.352894730687059e-06, "loss": 0.9308, "step": 2871 }, { "epoch": 0.63, "learning_rate": 6.346274551578744e-06, "loss": 0.9257, "step": 2872 }, { "epoch": 0.63, "learning_rate": 6.339656219969437e-06, "loss": 0.9632, "step": 2873 }, { "epoch": 0.63, "learning_rate": 6.333039739205684e-06, "loss": 0.9011, "step": 2874 }, { "epoch": 0.63, "learning_rate": 6.326425112633097e-06, "loss": 0.8772, "step": 2875 }, { "epoch": 0.63, "learning_rate": 6.319812343596348e-06, "loss": 0.8897, "step": 2876 }, { "epoch": 0.63, "learning_rate": 6.313201435439172e-06, "loss": 0.8946, "step": 2877 }, { "epoch": 0.63, "learning_rate": 6.306592391504361e-06, "loss": 0.9462, "step": 2878 }, { "epoch": 0.63, "learning_rate": 6.299985215133762e-06, "loss": 0.9077, "step": 2879 }, { "epoch": 0.63, "learning_rate": 6.293379909668282e-06, "loss": 0.903, "step": 2880 }, { "epoch": 0.63, "learning_rate": 6.286776478447885e-06, "loss": 0.8923, "step": 2881 }, { "epoch": 0.63, "learning_rate": 6.280174924811576e-06, "loss": 0.8781, "step": 2882 }, { "epoch": 0.63, "learning_rate": 6.273575252097422e-06, "loss": 0.9331, "step": 2883 }, { "epoch": 0.63, "learning_rate": 6.266977463642534e-06, "loss": 0.9099, "step": 2884 }, { "epoch": 0.63, "learning_rate": 6.2603815627830685e-06, "loss": 0.9127, "step": 2885 }, { "epoch": 0.63, "learning_rate": 6.2537875528542335e-06, "loss": 0.8748, "step": 2886 }, { "epoch": 0.63, "learning_rate": 6.247195437190272e-06, "loss": 0.9118, "step": 2887 }, { "epoch": 0.63, "learning_rate": 6.240605219124478e-06, "loss": 0.9105, "step": 2888 }, { "epoch": 0.63, "learning_rate": 6.2340169019891805e-06, "loss": 0.9523, "step": 2889 }, { "epoch": 0.63, "learning_rate": 6.227430489115751e-06, "loss": 0.8942, "step": 2890 }, { "epoch": 0.63, "learning_rate": 6.220845983834594e-06, "loss": 0.9248, "step": 2891 }, { "epoch": 0.63, "learning_rate": 6.214263389475149e-06, "loss": 0.2911, "step": 2892 }, { "epoch": 0.64, "learning_rate": 6.207682709365897e-06, "loss": 0.9431, "step": 2893 }, { "epoch": 0.64, "learning_rate": 6.201103946834341e-06, "loss": 0.9492, "step": 2894 }, { "epoch": 0.64, "learning_rate": 6.194527105207024e-06, "loss": 0.9459, "step": 2895 }, { "epoch": 0.64, "learning_rate": 6.18795218780951e-06, "loss": 0.9068, "step": 2896 }, { "epoch": 0.64, "learning_rate": 6.181379197966393e-06, "loss": 0.874, "step": 2897 }, { "epoch": 0.64, "learning_rate": 6.174808139001294e-06, "loss": 0.9339, "step": 2898 }, { "epoch": 0.64, "learning_rate": 6.168239014236855e-06, "loss": 0.9078, "step": 2899 }, { "epoch": 0.64, "learning_rate": 6.161671826994739e-06, "loss": 0.8778, "step": 2900 }, { "epoch": 0.64, "learning_rate": 6.155106580595633e-06, "loss": 0.9106, "step": 2901 }, { "epoch": 0.64, "learning_rate": 6.14854327835924e-06, "loss": 0.886, "step": 2902 }, { "epoch": 0.64, "learning_rate": 6.141981923604281e-06, "loss": 0.9019, "step": 2903 }, { "epoch": 0.64, "learning_rate": 6.135422519648493e-06, "loss": 0.9277, "step": 2904 }, { "epoch": 0.64, "learning_rate": 6.128865069808625e-06, "loss": 0.8848, "step": 2905 }, { "epoch": 0.64, "learning_rate": 6.122309577400437e-06, "loss": 0.935, "step": 2906 }, { "epoch": 0.64, "learning_rate": 6.115756045738703e-06, "loss": 0.9213, "step": 2907 }, { "epoch": 0.64, "learning_rate": 6.109204478137203e-06, "loss": 0.3004, "step": 2908 }, { "epoch": 0.64, "learning_rate": 6.102654877908722e-06, "loss": 0.9461, "step": 2909 }, { "epoch": 0.64, "learning_rate": 6.0961072483650526e-06, "loss": 0.3062, "step": 2910 }, { "epoch": 0.64, "learning_rate": 6.0895615928169935e-06, "loss": 1.01, "step": 2911 }, { "epoch": 0.64, "learning_rate": 6.08301791457434e-06, "loss": 0.9125, "step": 2912 }, { "epoch": 0.64, "learning_rate": 6.0764762169458905e-06, "loss": 0.9601, "step": 2913 }, { "epoch": 0.64, "learning_rate": 6.069936503239441e-06, "loss": 0.9514, "step": 2914 }, { "epoch": 0.64, "learning_rate": 6.063398776761785e-06, "loss": 0.9479, "step": 2915 }, { "epoch": 0.64, "learning_rate": 6.0568630408187126e-06, "loss": 0.9265, "step": 2916 }, { "epoch": 0.64, "learning_rate": 6.050329298715003e-06, "loss": 0.9569, "step": 2917 }, { "epoch": 0.64, "learning_rate": 6.0437975537544315e-06, "loss": 0.862, "step": 2918 }, { "epoch": 0.64, "learning_rate": 6.037267809239762e-06, "loss": 0.3092, "step": 2919 }, { "epoch": 0.64, "learning_rate": 6.030740068472745e-06, "loss": 0.9425, "step": 2920 }, { "epoch": 0.64, "learning_rate": 6.024214334754124e-06, "loss": 0.9271, "step": 2921 }, { "epoch": 0.64, "learning_rate": 6.017690611383618e-06, "loss": 0.3144, "step": 2922 }, { "epoch": 0.64, "learning_rate": 6.011168901659938e-06, "loss": 0.9561, "step": 2923 }, { "epoch": 0.64, "learning_rate": 6.004649208880771e-06, "loss": 0.9003, "step": 2924 }, { "epoch": 0.64, "learning_rate": 5.998131536342792e-06, "loss": 0.9694, "step": 2925 }, { "epoch": 0.64, "learning_rate": 5.9916158873416415e-06, "loss": 0.8955, "step": 2926 }, { "epoch": 0.64, "learning_rate": 5.985102265171952e-06, "loss": 0.9252, "step": 2927 }, { "epoch": 0.64, "learning_rate": 5.978590673127316e-06, "loss": 0.8532, "step": 2928 }, { "epoch": 0.64, "learning_rate": 5.972081114500313e-06, "loss": 0.8756, "step": 2929 }, { "epoch": 0.64, "learning_rate": 5.965573592582488e-06, "loss": 0.9197, "step": 2930 }, { "epoch": 0.64, "learning_rate": 5.9590681106643525e-06, "loss": 0.9431, "step": 2931 }, { "epoch": 0.64, "learning_rate": 5.952564672035394e-06, "loss": 0.9836, "step": 2932 }, { "epoch": 0.64, "learning_rate": 5.9460632799840626e-06, "loss": 0.8927, "step": 2933 }, { "epoch": 0.64, "learning_rate": 5.9395639377977735e-06, "loss": 0.9365, "step": 2934 }, { "epoch": 0.64, "learning_rate": 5.933066648762907e-06, "loss": 0.9191, "step": 2935 }, { "epoch": 0.64, "learning_rate": 5.926571416164804e-06, "loss": 0.9366, "step": 2936 }, { "epoch": 0.64, "learning_rate": 5.920078243287765e-06, "loss": 0.8786, "step": 2937 }, { "epoch": 0.64, "learning_rate": 5.913587133415053e-06, "loss": 0.8996, "step": 2938 }, { "epoch": 0.65, "learning_rate": 5.907098089828881e-06, "loss": 0.9093, "step": 2939 }, { "epoch": 0.65, "learning_rate": 5.900611115810423e-06, "loss": 0.9123, "step": 2940 }, { "epoch": 0.65, "learning_rate": 5.894126214639805e-06, "loss": 0.8978, "step": 2941 }, { "epoch": 0.65, "learning_rate": 5.887643389596103e-06, "loss": 0.9258, "step": 2942 }, { "epoch": 0.65, "learning_rate": 5.88116264395735e-06, "loss": 0.8933, "step": 2943 }, { "epoch": 0.65, "learning_rate": 5.874683981000513e-06, "loss": 0.9503, "step": 2944 }, { "epoch": 0.65, "learning_rate": 5.868207404001518e-06, "loss": 0.9235, "step": 2945 }, { "epoch": 0.65, "learning_rate": 5.861732916235237e-06, "loss": 0.9621, "step": 2946 }, { "epoch": 0.65, "learning_rate": 5.8552605209754775e-06, "loss": 0.9116, "step": 2947 }, { "epoch": 0.65, "learning_rate": 5.8487902214949965e-06, "loss": 0.9803, "step": 2948 }, { "epoch": 0.65, "learning_rate": 5.842322021065488e-06, "loss": 0.9683, "step": 2949 }, { "epoch": 0.65, "learning_rate": 5.835855922957583e-06, "loss": 0.8981, "step": 2950 }, { "epoch": 0.65, "learning_rate": 5.829391930440851e-06, "loss": 0.9443, "step": 2951 }, { "epoch": 0.65, "learning_rate": 5.822930046783799e-06, "loss": 0.9675, "step": 2952 }, { "epoch": 0.65, "learning_rate": 5.8164702752538625e-06, "loss": 0.8846, "step": 2953 }, { "epoch": 0.65, "learning_rate": 5.810012619117415e-06, "loss": 0.9118, "step": 2954 }, { "epoch": 0.65, "learning_rate": 5.803557081639757e-06, "loss": 0.9268, "step": 2955 }, { "epoch": 0.65, "learning_rate": 5.797103666085116e-06, "loss": 0.8916, "step": 2956 }, { "epoch": 0.65, "learning_rate": 5.790652375716653e-06, "loss": 0.8165, "step": 2957 }, { "epoch": 0.65, "learning_rate": 5.784203213796444e-06, "loss": 0.9062, "step": 2958 }, { "epoch": 0.65, "learning_rate": 5.777756183585501e-06, "loss": 0.8927, "step": 2959 }, { "epoch": 0.65, "learning_rate": 5.771311288343748e-06, "loss": 0.9558, "step": 2960 }, { "epoch": 0.65, "learning_rate": 5.764868531330041e-06, "loss": 0.9166, "step": 2961 }, { "epoch": 0.65, "learning_rate": 5.758427915802136e-06, "loss": 0.9557, "step": 2962 }, { "epoch": 0.65, "learning_rate": 5.751989445016724e-06, "loss": 0.8983, "step": 2963 }, { "epoch": 0.65, "learning_rate": 5.745553122229402e-06, "loss": 0.8809, "step": 2964 }, { "epoch": 0.65, "learning_rate": 5.739118950694684e-06, "loss": 0.3225, "step": 2965 }, { "epoch": 0.65, "learning_rate": 5.732686933666001e-06, "loss": 0.9275, "step": 2966 }, { "epoch": 0.65, "learning_rate": 5.7262570743956835e-06, "loss": 0.8638, "step": 2967 }, { "epoch": 0.65, "learning_rate": 5.71982937613498e-06, "loss": 0.3435, "step": 2968 }, { "epoch": 0.65, "learning_rate": 5.713403842134044e-06, "loss": 0.9156, "step": 2969 }, { "epoch": 0.65, "learning_rate": 5.7069804756419326e-06, "loss": 0.9573, "step": 2970 }, { "epoch": 0.65, "learning_rate": 5.700559279906608e-06, "loss": 0.9327, "step": 2971 }, { "epoch": 0.65, "learning_rate": 5.694140258174936e-06, "loss": 0.9037, "step": 2972 }, { "epoch": 0.65, "learning_rate": 5.687723413692683e-06, "loss": 0.9571, "step": 2973 }, { "epoch": 0.65, "learning_rate": 5.6813087497045125e-06, "loss": 0.8873, "step": 2974 }, { "epoch": 0.65, "learning_rate": 5.6748962694539855e-06, "loss": 0.9042, "step": 2975 }, { "epoch": 0.65, "learning_rate": 5.668485976183562e-06, "loss": 0.8855, "step": 2976 }, { "epoch": 0.65, "learning_rate": 5.662077873134593e-06, "loss": 0.9569, "step": 2977 }, { "epoch": 0.65, "learning_rate": 5.655671963547322e-06, "loss": 0.8951, "step": 2978 }, { "epoch": 0.65, "learning_rate": 5.6492682506608865e-06, "loss": 0.9419, "step": 2979 }, { "epoch": 0.65, "learning_rate": 5.642866737713311e-06, "loss": 0.9228, "step": 2980 }, { "epoch": 0.65, "learning_rate": 5.636467427941506e-06, "loss": 0.9039, "step": 2981 }, { "epoch": 0.65, "learning_rate": 5.630070324581271e-06, "loss": 0.8958, "step": 2982 }, { "epoch": 0.65, "learning_rate": 5.623675430867292e-06, "loss": 0.9533, "step": 2983 }, { "epoch": 0.66, "learning_rate": 5.617282750033128e-06, "loss": 0.318, "step": 2984 }, { "epoch": 0.66, "learning_rate": 5.610892285311229e-06, "loss": 0.9262, "step": 2985 }, { "epoch": 0.66, "learning_rate": 5.604504039932919e-06, "loss": 0.8798, "step": 2986 }, { "epoch": 0.66, "learning_rate": 5.598118017128401e-06, "loss": 0.9183, "step": 2987 }, { "epoch": 0.66, "learning_rate": 5.591734220126757e-06, "loss": 0.9248, "step": 2988 }, { "epoch": 0.66, "learning_rate": 5.585352652155941e-06, "loss": 0.8854, "step": 2989 }, { "epoch": 0.66, "learning_rate": 5.578973316442779e-06, "loss": 0.9396, "step": 2990 }, { "epoch": 0.66, "learning_rate": 5.572596216212971e-06, "loss": 0.9383, "step": 2991 }, { "epoch": 0.66, "learning_rate": 5.5662213546910835e-06, "loss": 0.9939, "step": 2992 }, { "epoch": 0.66, "learning_rate": 5.559848735100553e-06, "loss": 0.8444, "step": 2993 }, { "epoch": 0.66, "learning_rate": 5.553478360663682e-06, "loss": 0.8948, "step": 2994 }, { "epoch": 0.66, "learning_rate": 5.5471102346016385e-06, "loss": 0.9463, "step": 2995 }, { "epoch": 0.66, "learning_rate": 5.540744360134451e-06, "loss": 0.9167, "step": 2996 }, { "epoch": 0.66, "learning_rate": 5.534380740481014e-06, "loss": 0.9319, "step": 2997 }, { "epoch": 0.66, "learning_rate": 5.528019378859075e-06, "loss": 0.9165, "step": 2998 }, { "epoch": 0.66, "learning_rate": 5.521660278485248e-06, "loss": 0.916, "step": 2999 }, { "epoch": 0.66, "learning_rate": 5.515303442574997e-06, "loss": 0.9171, "step": 3000 }, { "epoch": 0.66, "learning_rate": 5.508948874342644e-06, "loss": 0.8928, "step": 3001 }, { "epoch": 0.66, "learning_rate": 5.502596577001365e-06, "loss": 0.9017, "step": 3002 }, { "epoch": 0.66, "learning_rate": 5.496246553763185e-06, "loss": 0.9311, "step": 3003 }, { "epoch": 0.66, "learning_rate": 5.48989880783898e-06, "loss": 0.9189, "step": 3004 }, { "epoch": 0.66, "learning_rate": 5.4835533424384825e-06, "loss": 0.8953, "step": 3005 }, { "epoch": 0.66, "learning_rate": 5.477210160770252e-06, "loss": 0.9357, "step": 3006 }, { "epoch": 0.66, "learning_rate": 5.470869266041714e-06, "loss": 0.894, "step": 3007 }, { "epoch": 0.66, "learning_rate": 5.464530661459127e-06, "loss": 0.9813, "step": 3008 }, { "epoch": 0.66, "learning_rate": 5.458194350227592e-06, "loss": 0.9331, "step": 3009 }, { "epoch": 0.66, "learning_rate": 5.451860335551056e-06, "loss": 0.9558, "step": 3010 }, { "epoch": 0.66, "learning_rate": 5.445528620632298e-06, "loss": 0.8954, "step": 3011 }, { "epoch": 0.66, "learning_rate": 5.439199208672938e-06, "loss": 0.9166, "step": 3012 }, { "epoch": 0.66, "learning_rate": 5.432872102873432e-06, "loss": 0.9669, "step": 3013 }, { "epoch": 0.66, "learning_rate": 5.426547306433067e-06, "loss": 0.8678, "step": 3014 }, { "epoch": 0.66, "learning_rate": 5.420224822549963e-06, "loss": 0.9422, "step": 3015 }, { "epoch": 0.66, "learning_rate": 5.413904654421074e-06, "loss": 0.861, "step": 3016 }, { "epoch": 0.66, "learning_rate": 5.407586805242178e-06, "loss": 0.9513, "step": 3017 }, { "epoch": 0.66, "learning_rate": 5.401271278207883e-06, "loss": 0.92, "step": 3018 }, { "epoch": 0.66, "learning_rate": 5.394958076511623e-06, "loss": 0.9536, "step": 3019 }, { "epoch": 0.66, "learning_rate": 5.388647203345659e-06, "loss": 0.8914, "step": 3020 }, { "epoch": 0.66, "learning_rate": 5.382338661901066e-06, "loss": 0.941, "step": 3021 }, { "epoch": 0.66, "learning_rate": 5.376032455367748e-06, "loss": 0.9675, "step": 3022 }, { "epoch": 0.66, "learning_rate": 5.369728586934427e-06, "loss": 0.8974, "step": 3023 }, { "epoch": 0.66, "learning_rate": 5.363427059788642e-06, "loss": 0.8946, "step": 3024 }, { "epoch": 0.66, "learning_rate": 5.357127877116743e-06, "loss": 0.8923, "step": 3025 }, { "epoch": 0.66, "learning_rate": 5.350831042103906e-06, "loss": 0.9207, "step": 3026 }, { "epoch": 0.66, "learning_rate": 5.34453655793411e-06, "loss": 0.9571, "step": 3027 }, { "epoch": 0.66, "learning_rate": 5.3382444277901465e-06, "loss": 0.9056, "step": 3028 }, { "epoch": 0.66, "learning_rate": 5.3319546548536195e-06, "loss": 0.8727, "step": 3029 }, { "epoch": 0.67, "learning_rate": 5.3256672423049396e-06, "loss": 0.8733, "step": 3030 }, { "epoch": 0.67, "learning_rate": 5.3193821933233255e-06, "loss": 0.9438, "step": 3031 }, { "epoch": 0.67, "learning_rate": 5.313099511086801e-06, "loss": 0.9448, "step": 3032 }, { "epoch": 0.67, "learning_rate": 5.306819198772191e-06, "loss": 0.938, "step": 3033 }, { "epoch": 0.67, "learning_rate": 5.300541259555121e-06, "loss": 0.9567, "step": 3034 }, { "epoch": 0.67, "learning_rate": 5.294265696610022e-06, "loss": 0.848, "step": 3035 }, { "epoch": 0.67, "learning_rate": 5.287992513110116e-06, "loss": 0.9183, "step": 3036 }, { "epoch": 0.67, "learning_rate": 5.281721712227428e-06, "loss": 0.9081, "step": 3037 }, { "epoch": 0.67, "learning_rate": 5.275453297132777e-06, "loss": 0.8807, "step": 3038 }, { "epoch": 0.67, "learning_rate": 5.269187270995773e-06, "loss": 0.9359, "step": 3039 }, { "epoch": 0.67, "learning_rate": 5.262923636984818e-06, "loss": 0.935, "step": 3040 }, { "epoch": 0.67, "learning_rate": 5.25666239826711e-06, "loss": 0.9499, "step": 3041 }, { "epoch": 0.67, "learning_rate": 5.25040355800863e-06, "loss": 0.9512, "step": 3042 }, { "epoch": 0.67, "learning_rate": 5.244147119374148e-06, "loss": 0.9246, "step": 3043 }, { "epoch": 0.67, "learning_rate": 5.23789308552722e-06, "loss": 0.968, "step": 3044 }, { "epoch": 0.67, "learning_rate": 5.2316414596301855e-06, "loss": 0.9032, "step": 3045 }, { "epoch": 0.67, "learning_rate": 5.225392244844168e-06, "loss": 0.9353, "step": 3046 }, { "epoch": 0.67, "learning_rate": 5.219145444329068e-06, "loss": 0.941, "step": 3047 }, { "epoch": 0.67, "learning_rate": 5.2129010612435694e-06, "loss": 0.884, "step": 3048 }, { "epoch": 0.67, "learning_rate": 5.206659098745137e-06, "loss": 0.912, "step": 3049 }, { "epoch": 0.67, "learning_rate": 5.2004195599899966e-06, "loss": 0.31, "step": 3050 }, { "epoch": 0.67, "learning_rate": 5.194182448133163e-06, "loss": 0.9223, "step": 3051 }, { "epoch": 0.67, "learning_rate": 5.187947766328421e-06, "loss": 0.9201, "step": 3052 }, { "epoch": 0.67, "learning_rate": 5.181715517728324e-06, "loss": 0.9286, "step": 3053 }, { "epoch": 0.67, "learning_rate": 5.175485705484195e-06, "loss": 0.9668, "step": 3054 }, { "epoch": 0.67, "learning_rate": 5.16925833274613e-06, "loss": 0.928, "step": 3055 }, { "epoch": 0.67, "learning_rate": 5.163033402662982e-06, "loss": 0.933, "step": 3056 }, { "epoch": 0.67, "learning_rate": 5.15681091838238e-06, "loss": 0.893, "step": 3057 }, { "epoch": 0.67, "learning_rate": 5.150590883050708e-06, "loss": 0.9856, "step": 3058 }, { "epoch": 0.67, "learning_rate": 5.144373299813113e-06, "loss": 0.9414, "step": 3059 }, { "epoch": 0.67, "learning_rate": 5.138158171813507e-06, "loss": 0.9321, "step": 3060 }, { "epoch": 0.67, "learning_rate": 5.131945502194555e-06, "loss": 0.8873, "step": 3061 }, { "epoch": 0.67, "learning_rate": 5.125735294097681e-06, "loss": 0.9698, "step": 3062 }, { "epoch": 0.67, "learning_rate": 5.119527550663066e-06, "loss": 0.902, "step": 3063 }, { "epoch": 0.67, "learning_rate": 5.113322275029642e-06, "loss": 0.9094, "step": 3064 }, { "epoch": 0.67, "learning_rate": 5.107119470335093e-06, "loss": 0.8989, "step": 3065 }, { "epoch": 0.67, "learning_rate": 5.100919139715859e-06, "loss": 0.9204, "step": 3066 }, { "epoch": 0.67, "learning_rate": 5.09472128630712e-06, "loss": 0.9289, "step": 3067 }, { "epoch": 0.67, "learning_rate": 5.088525913242815e-06, "loss": 0.9688, "step": 3068 }, { "epoch": 0.67, "learning_rate": 5.0823330236556125e-06, "loss": 0.9679, "step": 3069 }, { "epoch": 0.67, "learning_rate": 5.076142620676941e-06, "loss": 0.9276, "step": 3070 }, { "epoch": 0.67, "learning_rate": 5.069954707436961e-06, "loss": 0.8797, "step": 3071 }, { "epoch": 0.67, "learning_rate": 5.063769287064586e-06, "loss": 0.9795, "step": 3072 }, { "epoch": 0.67, "learning_rate": 5.0575863626874544e-06, "loss": 0.9636, "step": 3073 }, { "epoch": 0.67, "learning_rate": 5.051405937431955e-06, "loss": 0.9453, "step": 3074 }, { "epoch": 0.68, "learning_rate": 5.045228014423203e-06, "loss": 0.9054, "step": 3075 }, { "epoch": 0.68, "learning_rate": 5.039052596785055e-06, "loss": 0.8895, "step": 3076 }, { "epoch": 0.68, "learning_rate": 5.032879687640099e-06, "loss": 0.9494, "step": 3077 }, { "epoch": 0.68, "learning_rate": 5.026709290109653e-06, "loss": 0.8423, "step": 3078 }, { "epoch": 0.68, "learning_rate": 5.0205414073137685e-06, "loss": 0.3215, "step": 3079 }, { "epoch": 0.68, "learning_rate": 5.014376042371221e-06, "loss": 0.9463, "step": 3080 }, { "epoch": 0.68, "learning_rate": 5.0082131983995145e-06, "loss": 0.9715, "step": 3081 }, { "epoch": 0.68, "learning_rate": 5.00205287851488e-06, "loss": 0.9046, "step": 3082 }, { "epoch": 0.68, "learning_rate": 4.995895085832273e-06, "loss": 0.9046, "step": 3083 }, { "epoch": 0.68, "learning_rate": 4.989739823465366e-06, "loss": 0.9432, "step": 3084 }, { "epoch": 0.68, "learning_rate": 4.983587094526556e-06, "loss": 0.9264, "step": 3085 }, { "epoch": 0.68, "learning_rate": 4.977436902126963e-06, "loss": 0.9284, "step": 3086 }, { "epoch": 0.68, "learning_rate": 4.971289249376411e-06, "loss": 0.8523, "step": 3087 }, { "epoch": 0.68, "learning_rate": 4.965144139383452e-06, "loss": 0.3115, "step": 3088 }, { "epoch": 0.68, "learning_rate": 4.95900157525535e-06, "loss": 0.9274, "step": 3089 }, { "epoch": 0.68, "learning_rate": 4.952861560098079e-06, "loss": 0.8882, "step": 3090 }, { "epoch": 0.68, "learning_rate": 4.946724097016326e-06, "loss": 0.8965, "step": 3091 }, { "epoch": 0.68, "learning_rate": 4.940589189113486e-06, "loss": 0.9248, "step": 3092 }, { "epoch": 0.68, "learning_rate": 4.934456839491665e-06, "loss": 0.8687, "step": 3093 }, { "epoch": 0.68, "learning_rate": 4.928327051251671e-06, "loss": 0.894, "step": 3094 }, { "epoch": 0.68, "learning_rate": 4.922199827493022e-06, "loss": 0.9161, "step": 3095 }, { "epoch": 0.68, "learning_rate": 4.916075171313933e-06, "loss": 0.8934, "step": 3096 }, { "epoch": 0.68, "learning_rate": 4.9099530858113275e-06, "loss": 0.8569, "step": 3097 }, { "epoch": 0.68, "learning_rate": 4.903833574080825e-06, "loss": 0.3306, "step": 3098 }, { "epoch": 0.68, "learning_rate": 4.897716639216743e-06, "loss": 0.9219, "step": 3099 }, { "epoch": 0.68, "learning_rate": 4.8916022843121e-06, "loss": 0.9252, "step": 3100 }, { "epoch": 0.68, "learning_rate": 4.8854905124586035e-06, "loss": 0.9731, "step": 3101 }, { "epoch": 0.68, "learning_rate": 4.8793813267466626e-06, "loss": 0.8901, "step": 3102 }, { "epoch": 0.68, "learning_rate": 4.873274730265374e-06, "loss": 0.9281, "step": 3103 }, { "epoch": 0.68, "learning_rate": 4.867170726102528e-06, "loss": 0.8948, "step": 3104 }, { "epoch": 0.68, "learning_rate": 4.861069317344598e-06, "loss": 0.9274, "step": 3105 }, { "epoch": 0.68, "learning_rate": 4.854970507076748e-06, "loss": 0.939, "step": 3106 }, { "epoch": 0.68, "learning_rate": 4.848874298382834e-06, "loss": 0.9478, "step": 3107 }, { "epoch": 0.68, "learning_rate": 4.8427806943453905e-06, "loss": 0.3385, "step": 3108 }, { "epoch": 0.68, "learning_rate": 4.836689698045634e-06, "loss": 0.9197, "step": 3109 }, { "epoch": 0.68, "learning_rate": 4.830601312563469e-06, "loss": 0.9461, "step": 3110 }, { "epoch": 0.68, "learning_rate": 4.82451554097747e-06, "loss": 0.8811, "step": 3111 }, { "epoch": 0.68, "learning_rate": 4.818432386364901e-06, "loss": 0.879, "step": 3112 }, { "epoch": 0.68, "learning_rate": 4.812351851801692e-06, "loss": 0.8352, "step": 3113 }, { "epoch": 0.68, "learning_rate": 4.806273940362457e-06, "loss": 0.3451, "step": 3114 }, { "epoch": 0.68, "learning_rate": 4.800198655120478e-06, "loss": 0.8986, "step": 3115 }, { "epoch": 0.68, "learning_rate": 4.794125999147711e-06, "loss": 0.9438, "step": 3116 }, { "epoch": 0.68, "learning_rate": 4.788055975514783e-06, "loss": 0.8829, "step": 3117 }, { "epoch": 0.68, "learning_rate": 4.781988587290989e-06, "loss": 0.9316, "step": 3118 }, { "epoch": 0.68, "learning_rate": 4.775923837544292e-06, "loss": 0.8991, "step": 3119 }, { "epoch": 0.68, "learning_rate": 4.76986172934132e-06, "loss": 0.8821, "step": 3120 }, { "epoch": 0.69, "learning_rate": 4.76380226574737e-06, "loss": 0.2922, "step": 3121 }, { "epoch": 0.69, "learning_rate": 4.757745449826391e-06, "loss": 0.9026, "step": 3122 }, { "epoch": 0.69, "learning_rate": 4.751691284641003e-06, "loss": 0.936, "step": 3123 }, { "epoch": 0.69, "learning_rate": 4.745639773252483e-06, "loss": 0.9123, "step": 3124 }, { "epoch": 0.69, "learning_rate": 4.739590918720765e-06, "loss": 0.8786, "step": 3125 }, { "epoch": 0.69, "learning_rate": 4.7335447241044405e-06, "loss": 0.9429, "step": 3126 }, { "epoch": 0.69, "learning_rate": 4.727501192460756e-06, "loss": 0.946, "step": 3127 }, { "epoch": 0.69, "learning_rate": 4.7214603268456115e-06, "loss": 0.8726, "step": 3128 }, { "epoch": 0.69, "learning_rate": 4.715422130313558e-06, "loss": 0.9232, "step": 3129 }, { "epoch": 0.69, "learning_rate": 4.709386605917798e-06, "loss": 0.9837, "step": 3130 }, { "epoch": 0.69, "learning_rate": 4.703353756710184e-06, "loss": 0.9759, "step": 3131 }, { "epoch": 0.69, "learning_rate": 4.697323585741212e-06, "loss": 0.9195, "step": 3132 }, { "epoch": 0.69, "learning_rate": 4.6912960960600255e-06, "loss": 0.9216, "step": 3133 }, { "epoch": 0.69, "learning_rate": 4.6852712907144135e-06, "loss": 0.9578, "step": 3134 }, { "epoch": 0.69, "learning_rate": 4.6792491727508076e-06, "loss": 0.9255, "step": 3135 }, { "epoch": 0.69, "learning_rate": 4.673229745214279e-06, "loss": 0.3123, "step": 3136 }, { "epoch": 0.69, "learning_rate": 4.667213011148541e-06, "loss": 0.9534, "step": 3137 }, { "epoch": 0.69, "learning_rate": 4.661198973595939e-06, "loss": 0.9322, "step": 3138 }, { "epoch": 0.69, "learning_rate": 4.655187635597468e-06, "loss": 0.9632, "step": 3139 }, { "epoch": 0.69, "learning_rate": 4.6491790001927385e-06, "loss": 0.8838, "step": 3140 }, { "epoch": 0.69, "learning_rate": 4.6431730704200096e-06, "loss": 0.924, "step": 3141 }, { "epoch": 0.69, "learning_rate": 4.637169849316167e-06, "loss": 0.9178, "step": 3142 }, { "epoch": 0.69, "learning_rate": 4.631169339916729e-06, "loss": 0.898, "step": 3143 }, { "epoch": 0.69, "learning_rate": 4.625171545255841e-06, "loss": 0.9211, "step": 3144 }, { "epoch": 0.69, "learning_rate": 4.619176468366274e-06, "loss": 0.9129, "step": 3145 }, { "epoch": 0.69, "learning_rate": 4.613184112279429e-06, "loss": 0.9321, "step": 3146 }, { "epoch": 0.69, "learning_rate": 4.607194480025328e-06, "loss": 0.8872, "step": 3147 }, { "epoch": 0.69, "learning_rate": 4.601207574632616e-06, "loss": 0.9484, "step": 3148 }, { "epoch": 0.69, "learning_rate": 4.595223399128561e-06, "loss": 0.9395, "step": 3149 }, { "epoch": 0.69, "learning_rate": 4.5892419565390486e-06, "loss": 0.9048, "step": 3150 }, { "epoch": 0.69, "learning_rate": 4.583263249888584e-06, "loss": 0.9157, "step": 3151 }, { "epoch": 0.69, "learning_rate": 4.577287282200288e-06, "loss": 0.9062, "step": 3152 }, { "epoch": 0.69, "learning_rate": 4.571314056495897e-06, "loss": 0.8983, "step": 3153 }, { "epoch": 0.69, "learning_rate": 4.56534357579576e-06, "loss": 0.939, "step": 3154 }, { "epoch": 0.69, "learning_rate": 4.559375843118839e-06, "loss": 0.9736, "step": 3155 }, { "epoch": 0.69, "learning_rate": 4.553410861482708e-06, "loss": 0.3224, "step": 3156 }, { "epoch": 0.69, "learning_rate": 4.547448633903548e-06, "loss": 0.9614, "step": 3157 }, { "epoch": 0.69, "learning_rate": 4.541489163396143e-06, "loss": 0.8975, "step": 3158 }, { "epoch": 0.69, "learning_rate": 4.535532452973891e-06, "loss": 0.9712, "step": 3159 }, { "epoch": 0.69, "learning_rate": 4.529578505648789e-06, "loss": 0.8628, "step": 3160 }, { "epoch": 0.69, "learning_rate": 4.523627324431442e-06, "loss": 0.9346, "step": 3161 }, { "epoch": 0.69, "learning_rate": 4.517678912331049e-06, "loss": 0.973, "step": 3162 }, { "epoch": 0.69, "learning_rate": 4.511733272355415e-06, "loss": 0.9047, "step": 3163 }, { "epoch": 0.69, "learning_rate": 4.50579040751094e-06, "loss": 0.8452, "step": 3164 }, { "epoch": 0.69, "learning_rate": 4.499850320802623e-06, "loss": 0.9781, "step": 3165 }, { "epoch": 0.69, "learning_rate": 4.493913015234056e-06, "loss": 0.9597, "step": 3166 }, { "epoch": 0.7, "learning_rate": 4.487978493807426e-06, "loss": 0.9401, "step": 3167 }, { "epoch": 0.7, "learning_rate": 4.4820467595235125e-06, "loss": 0.9213, "step": 3168 }, { "epoch": 0.7, "learning_rate": 4.476117815381685e-06, "loss": 0.9503, "step": 3169 }, { "epoch": 0.7, "learning_rate": 4.470191664379903e-06, "loss": 0.9149, "step": 3170 }, { "epoch": 0.7, "learning_rate": 4.46426830951471e-06, "loss": 0.8597, "step": 3171 }, { "epoch": 0.7, "learning_rate": 4.458347753781241e-06, "loss": 0.9485, "step": 3172 }, { "epoch": 0.7, "learning_rate": 4.4524300001732135e-06, "loss": 0.893, "step": 3173 }, { "epoch": 0.7, "learning_rate": 4.446515051682927e-06, "loss": 0.9169, "step": 3174 }, { "epoch": 0.7, "learning_rate": 4.440602911301267e-06, "loss": 0.9325, "step": 3175 }, { "epoch": 0.7, "learning_rate": 4.434693582017689e-06, "loss": 0.9694, "step": 3176 }, { "epoch": 0.7, "learning_rate": 4.428787066820237e-06, "loss": 0.948, "step": 3177 }, { "epoch": 0.7, "learning_rate": 4.422883368695529e-06, "loss": 0.8609, "step": 3178 }, { "epoch": 0.7, "learning_rate": 4.416982490628756e-06, "loss": 0.9311, "step": 3179 }, { "epoch": 0.7, "learning_rate": 4.411084435603688e-06, "loss": 0.9283, "step": 3180 }, { "epoch": 0.7, "learning_rate": 4.405189206602664e-06, "loss": 0.9066, "step": 3181 }, { "epoch": 0.7, "learning_rate": 4.399296806606594e-06, "loss": 0.2758, "step": 3182 }, { "epoch": 0.7, "learning_rate": 4.393407238594959e-06, "loss": 0.9109, "step": 3183 }, { "epoch": 0.7, "learning_rate": 4.387520505545807e-06, "loss": 0.9423, "step": 3184 }, { "epoch": 0.7, "learning_rate": 4.3816366104357545e-06, "loss": 0.9007, "step": 3185 }, { "epoch": 0.7, "learning_rate": 4.375755556239979e-06, "loss": 0.9061, "step": 3186 }, { "epoch": 0.7, "learning_rate": 4.369877345932224e-06, "loss": 0.9038, "step": 3187 }, { "epoch": 0.7, "learning_rate": 4.364001982484797e-06, "loss": 0.8449, "step": 3188 }, { "epoch": 0.7, "learning_rate": 4.3581294688685616e-06, "loss": 0.8848, "step": 3189 }, { "epoch": 0.7, "learning_rate": 4.352259808052944e-06, "loss": 0.8984, "step": 3190 }, { "epoch": 0.7, "learning_rate": 4.346393003005926e-06, "loss": 0.8944, "step": 3191 }, { "epoch": 0.7, "learning_rate": 4.3405290566940475e-06, "loss": 0.9031, "step": 3192 }, { "epoch": 0.7, "learning_rate": 4.334667972082402e-06, "loss": 0.2837, "step": 3193 }, { "epoch": 0.7, "learning_rate": 4.32880975213463e-06, "loss": 0.8969, "step": 3194 }, { "epoch": 0.7, "learning_rate": 4.322954399812932e-06, "loss": 0.9212, "step": 3195 }, { "epoch": 0.7, "learning_rate": 4.317101918078054e-06, "loss": 0.8843, "step": 3196 }, { "epoch": 0.7, "learning_rate": 4.311252309889294e-06, "loss": 0.9556, "step": 3197 }, { "epoch": 0.7, "learning_rate": 4.305405578204493e-06, "loss": 0.2795, "step": 3198 }, { "epoch": 0.7, "learning_rate": 4.29956172598004e-06, "loss": 0.301, "step": 3199 }, { "epoch": 0.7, "learning_rate": 4.293720756170868e-06, "loss": 0.9137, "step": 3200 }, { "epoch": 0.7, "learning_rate": 4.28788267173045e-06, "loss": 0.8628, "step": 3201 }, { "epoch": 0.7, "learning_rate": 4.282047475610803e-06, "loss": 0.9386, "step": 3202 }, { "epoch": 0.7, "learning_rate": 4.2762151707624825e-06, "loss": 0.9062, "step": 3203 }, { "epoch": 0.7, "learning_rate": 4.27038576013458e-06, "loss": 0.922, "step": 3204 }, { "epoch": 0.7, "learning_rate": 4.264559246674727e-06, "loss": 0.8756, "step": 3205 }, { "epoch": 0.7, "learning_rate": 4.25873563332909e-06, "loss": 0.9209, "step": 3206 }, { "epoch": 0.7, "learning_rate": 4.252914923042365e-06, "loss": 0.8763, "step": 3207 }, { "epoch": 0.7, "learning_rate": 4.247097118757782e-06, "loss": 0.8954, "step": 3208 }, { "epoch": 0.7, "learning_rate": 4.241282223417104e-06, "loss": 0.9076, "step": 3209 }, { "epoch": 0.7, "learning_rate": 4.235470239960623e-06, "loss": 0.8866, "step": 3210 }, { "epoch": 0.7, "learning_rate": 4.2296611713271565e-06, "loss": 0.9137, "step": 3211 }, { "epoch": 0.71, "learning_rate": 4.223855020454043e-06, "loss": 0.3072, "step": 3212 }, { "epoch": 0.71, "learning_rate": 4.218051790277154e-06, "loss": 0.9623, "step": 3213 }, { "epoch": 0.71, "learning_rate": 4.212251483730883e-06, "loss": 0.9423, "step": 3214 }, { "epoch": 0.71, "learning_rate": 4.206454103748142e-06, "loss": 0.9427, "step": 3215 }, { "epoch": 0.71, "learning_rate": 4.2006596532603664e-06, "loss": 0.9365, "step": 3216 }, { "epoch": 0.71, "learning_rate": 4.1948681351975065e-06, "loss": 0.9281, "step": 3217 }, { "epoch": 0.71, "learning_rate": 4.189079552488033e-06, "loss": 0.923, "step": 3218 }, { "epoch": 0.71, "learning_rate": 4.18329390805893e-06, "loss": 0.9769, "step": 3219 }, { "epoch": 0.71, "learning_rate": 4.1775112048357e-06, "loss": 0.9501, "step": 3220 }, { "epoch": 0.71, "learning_rate": 4.171731445742353e-06, "loss": 0.9195, "step": 3221 }, { "epoch": 0.71, "learning_rate": 4.165954633701412e-06, "loss": 0.913, "step": 3222 }, { "epoch": 0.71, "learning_rate": 4.160180771633914e-06, "loss": 0.9257, "step": 3223 }, { "epoch": 0.71, "learning_rate": 4.154409862459397e-06, "loss": 0.8967, "step": 3224 }, { "epoch": 0.71, "learning_rate": 4.148641909095911e-06, "loss": 0.9331, "step": 3225 }, { "epoch": 0.71, "learning_rate": 4.142876914460011e-06, "loss": 0.8998, "step": 3226 }, { "epoch": 0.71, "learning_rate": 4.137114881466755e-06, "loss": 0.9069, "step": 3227 }, { "epoch": 0.71, "learning_rate": 4.131355813029706e-06, "loss": 0.9426, "step": 3228 }, { "epoch": 0.71, "learning_rate": 4.125599712060919e-06, "loss": 0.9007, "step": 3229 }, { "epoch": 0.71, "learning_rate": 4.11984658147096e-06, "loss": 0.9246, "step": 3230 }, { "epoch": 0.71, "learning_rate": 4.1140964241688855e-06, "loss": 0.9302, "step": 3231 }, { "epoch": 0.71, "learning_rate": 4.108349243062253e-06, "loss": 0.9276, "step": 3232 }, { "epoch": 0.71, "learning_rate": 4.1026050410571115e-06, "loss": 0.9266, "step": 3233 }, { "epoch": 0.71, "learning_rate": 4.096863821058007e-06, "loss": 0.9125, "step": 3234 }, { "epoch": 0.71, "learning_rate": 4.091125585967975e-06, "loss": 0.9518, "step": 3235 }, { "epoch": 0.71, "learning_rate": 4.085390338688543e-06, "loss": 0.9223, "step": 3236 }, { "epoch": 0.71, "learning_rate": 4.0796580821197275e-06, "loss": 0.8967, "step": 3237 }, { "epoch": 0.71, "learning_rate": 4.073928819160032e-06, "loss": 0.9369, "step": 3238 }, { "epoch": 0.71, "learning_rate": 4.0682025527064486e-06, "loss": 0.9492, "step": 3239 }, { "epoch": 0.71, "learning_rate": 4.0624792856544505e-06, "loss": 0.9241, "step": 3240 }, { "epoch": 0.71, "learning_rate": 4.056759020897997e-06, "loss": 0.9596, "step": 3241 }, { "epoch": 0.71, "learning_rate": 4.05104176132953e-06, "loss": 0.8801, "step": 3242 }, { "epoch": 0.71, "learning_rate": 4.045327509839967e-06, "loss": 0.903, "step": 3243 }, { "epoch": 0.71, "learning_rate": 4.039616269318711e-06, "loss": 0.904, "step": 3244 }, { "epoch": 0.71, "learning_rate": 4.033908042653639e-06, "loss": 0.8957, "step": 3245 }, { "epoch": 0.71, "learning_rate": 4.028202832731109e-06, "loss": 0.987, "step": 3246 }, { "epoch": 0.71, "learning_rate": 4.022500642435938e-06, "loss": 0.9288, "step": 3247 }, { "epoch": 0.71, "learning_rate": 4.016801474651434e-06, "loss": 0.9224, "step": 3248 }, { "epoch": 0.71, "learning_rate": 4.011105332259368e-06, "loss": 0.9743, "step": 3249 }, { "epoch": 0.71, "learning_rate": 4.005412218139986e-06, "loss": 0.9077, "step": 3250 }, { "epoch": 0.71, "learning_rate": 3.999722135171997e-06, "loss": 0.3156, "step": 3251 }, { "epoch": 0.71, "learning_rate": 3.9940350862325815e-06, "loss": 0.929, "step": 3252 }, { "epoch": 0.71, "learning_rate": 3.988351074197384e-06, "loss": 0.9532, "step": 3253 }, { "epoch": 0.71, "learning_rate": 3.982670101940515e-06, "loss": 0.9054, "step": 3254 }, { "epoch": 0.71, "learning_rate": 3.976992172334544e-06, "loss": 0.8376, "step": 3255 }, { "epoch": 0.71, "learning_rate": 3.971317288250508e-06, "loss": 0.9487, "step": 3256 }, { "epoch": 0.71, "learning_rate": 3.965645452557899e-06, "loss": 0.938, "step": 3257 }, { "epoch": 0.72, "learning_rate": 3.9599766681246695e-06, "loss": 0.9387, "step": 3258 }, { "epoch": 0.72, "learning_rate": 3.954310937817231e-06, "loss": 0.9078, "step": 3259 }, { "epoch": 0.72, "learning_rate": 3.948648264500445e-06, "loss": 0.9083, "step": 3260 }, { "epoch": 0.72, "learning_rate": 3.942988651037634e-06, "loss": 0.8981, "step": 3261 }, { "epoch": 0.72, "learning_rate": 3.937332100290572e-06, "loss": 0.8805, "step": 3262 }, { "epoch": 0.72, "learning_rate": 3.931678615119477e-06, "loss": 0.8577, "step": 3263 }, { "epoch": 0.72, "learning_rate": 3.926028198383032e-06, "loss": 0.9062, "step": 3264 }, { "epoch": 0.72, "learning_rate": 3.920380852938348e-06, "loss": 0.8994, "step": 3265 }, { "epoch": 0.72, "learning_rate": 3.914736581640998e-06, "loss": 0.945, "step": 3266 }, { "epoch": 0.72, "learning_rate": 3.909095387344998e-06, "loss": 0.9012, "step": 3267 }, { "epoch": 0.72, "learning_rate": 3.903457272902807e-06, "loss": 0.88, "step": 3268 }, { "epoch": 0.72, "learning_rate": 3.897822241165323e-06, "loss": 0.9383, "step": 3269 }, { "epoch": 0.72, "learning_rate": 3.892190294981893e-06, "loss": 0.8877, "step": 3270 }, { "epoch": 0.72, "learning_rate": 3.886561437200297e-06, "loss": 0.89, "step": 3271 }, { "epoch": 0.72, "learning_rate": 3.880935670666756e-06, "loss": 0.9119, "step": 3272 }, { "epoch": 0.72, "learning_rate": 3.8753129982259265e-06, "loss": 0.9368, "step": 3273 }, { "epoch": 0.72, "learning_rate": 3.8696934227209034e-06, "loss": 0.9617, "step": 3274 }, { "epoch": 0.72, "learning_rate": 3.864076946993215e-06, "loss": 0.9135, "step": 3275 }, { "epoch": 0.72, "learning_rate": 3.858463573882818e-06, "loss": 0.9838, "step": 3276 }, { "epoch": 0.72, "learning_rate": 3.852853306228105e-06, "loss": 0.3307, "step": 3277 }, { "epoch": 0.72, "learning_rate": 3.847246146865896e-06, "loss": 0.9212, "step": 3278 }, { "epoch": 0.72, "learning_rate": 3.841642098631442e-06, "loss": 0.9075, "step": 3279 }, { "epoch": 0.72, "learning_rate": 3.836041164358416e-06, "loss": 0.9268, "step": 3280 }, { "epoch": 0.72, "learning_rate": 3.830443346878921e-06, "loss": 0.91, "step": 3281 }, { "epoch": 0.72, "learning_rate": 3.824848649023486e-06, "loss": 0.2965, "step": 3282 }, { "epoch": 0.72, "learning_rate": 3.81925707362105e-06, "loss": 0.8967, "step": 3283 }, { "epoch": 0.72, "learning_rate": 3.8136686234989873e-06, "loss": 0.8939, "step": 3284 }, { "epoch": 0.72, "learning_rate": 3.8080833014830865e-06, "loss": 0.9471, "step": 3285 }, { "epoch": 0.72, "learning_rate": 3.802501110397553e-06, "loss": 0.9291, "step": 3286 }, { "epoch": 0.72, "learning_rate": 3.796922053065013e-06, "loss": 0.9074, "step": 3287 }, { "epoch": 0.72, "learning_rate": 3.7913461323065036e-06, "loss": 0.8912, "step": 3288 }, { "epoch": 0.72, "learning_rate": 3.7857733509414796e-06, "loss": 0.9171, "step": 3289 }, { "epoch": 0.72, "learning_rate": 3.7802037117878053e-06, "loss": 0.9318, "step": 3290 }, { "epoch": 0.72, "learning_rate": 3.7746372176617585e-06, "loss": 0.9218, "step": 3291 }, { "epoch": 0.72, "learning_rate": 3.769073871378026e-06, "loss": 0.8953, "step": 3292 }, { "epoch": 0.72, "learning_rate": 3.7635136757497036e-06, "loss": 0.9111, "step": 3293 }, { "epoch": 0.72, "learning_rate": 3.757956633588291e-06, "loss": 0.9463, "step": 3294 }, { "epoch": 0.72, "learning_rate": 3.7524027477036974e-06, "loss": 0.8848, "step": 3295 }, { "epoch": 0.72, "learning_rate": 3.7468520209042336e-06, "loss": 0.9434, "step": 3296 }, { "epoch": 0.72, "learning_rate": 3.741304455996613e-06, "loss": 0.926, "step": 3297 }, { "epoch": 0.72, "learning_rate": 3.735760055785952e-06, "loss": 0.8596, "step": 3298 }, { "epoch": 0.72, "learning_rate": 3.730218823075764e-06, "loss": 0.9138, "step": 3299 }, { "epoch": 0.72, "learning_rate": 3.724680760667967e-06, "loss": 0.9284, "step": 3300 }, { "epoch": 0.72, "learning_rate": 3.7191458713628636e-06, "loss": 0.9085, "step": 3301 }, { "epoch": 0.72, "learning_rate": 3.7136141579591635e-06, "loss": 0.9077, "step": 3302 }, { "epoch": 0.73, "learning_rate": 3.7080856232539665e-06, "loss": 0.887, "step": 3303 }, { "epoch": 0.73, "learning_rate": 3.702560270042764e-06, "loss": 0.919, "step": 3304 }, { "epoch": 0.73, "learning_rate": 3.697038101119442e-06, "loss": 0.9286, "step": 3305 }, { "epoch": 0.73, "learning_rate": 3.6915191192762735e-06, "loss": 0.3345, "step": 3306 }, { "epoch": 0.73, "learning_rate": 3.68600332730392e-06, "loss": 0.8923, "step": 3307 }, { "epoch": 0.73, "learning_rate": 3.6804907279914314e-06, "loss": 0.9, "step": 3308 }, { "epoch": 0.73, "learning_rate": 3.674981324126241e-06, "loss": 0.9308, "step": 3309 }, { "epoch": 0.73, "learning_rate": 3.6694751184941712e-06, "loss": 0.9264, "step": 3310 }, { "epoch": 0.73, "learning_rate": 3.6639721138794205e-06, "loss": 0.9366, "step": 3311 }, { "epoch": 0.73, "learning_rate": 3.658472313064574e-06, "loss": 0.8979, "step": 3312 }, { "epoch": 0.73, "learning_rate": 3.652975718830595e-06, "loss": 0.9943, "step": 3313 }, { "epoch": 0.73, "learning_rate": 3.6474823339568245e-06, "loss": 0.949, "step": 3314 }, { "epoch": 0.73, "learning_rate": 3.641992161220983e-06, "loss": 0.9009, "step": 3315 }, { "epoch": 0.73, "learning_rate": 3.6365052033991644e-06, "loss": 0.8704, "step": 3316 }, { "epoch": 0.73, "learning_rate": 3.6310214632658426e-06, "loss": 0.9239, "step": 3317 }, { "epoch": 0.73, "learning_rate": 3.6255409435938517e-06, "loss": 0.945, "step": 3318 }, { "epoch": 0.73, "learning_rate": 3.6200636471544093e-06, "loss": 0.907, "step": 3319 }, { "epoch": 0.73, "learning_rate": 3.6145895767171e-06, "loss": 0.9411, "step": 3320 }, { "epoch": 0.73, "learning_rate": 3.6091187350498756e-06, "loss": 0.9067, "step": 3321 }, { "epoch": 0.73, "learning_rate": 3.6036511249190575e-06, "loss": 0.9181, "step": 3322 }, { "epoch": 0.73, "learning_rate": 3.5981867490893317e-06, "loss": 0.8955, "step": 3323 }, { "epoch": 0.73, "learning_rate": 3.5927256103237486e-06, "loss": 0.3476, "step": 3324 }, { "epoch": 0.73, "learning_rate": 3.5872677113837227e-06, "loss": 0.9232, "step": 3325 }, { "epoch": 0.73, "learning_rate": 3.581813055029029e-06, "loss": 0.9035, "step": 3326 }, { "epoch": 0.73, "learning_rate": 3.5763616440178037e-06, "loss": 0.9312, "step": 3327 }, { "epoch": 0.73, "learning_rate": 3.570913481106543e-06, "loss": 0.8833, "step": 3328 }, { "epoch": 0.73, "learning_rate": 3.565468569050098e-06, "loss": 0.9243, "step": 3329 }, { "epoch": 0.73, "learning_rate": 3.560026910601678e-06, "loss": 0.9432, "step": 3330 }, { "epoch": 0.73, "learning_rate": 3.5545885085128484e-06, "loss": 0.9155, "step": 3331 }, { "epoch": 0.73, "learning_rate": 3.549153365533524e-06, "loss": 0.9268, "step": 3332 }, { "epoch": 0.73, "learning_rate": 3.543721484411976e-06, "loss": 0.9123, "step": 3333 }, { "epoch": 0.73, "learning_rate": 3.5382928678948235e-06, "loss": 0.8093, "step": 3334 }, { "epoch": 0.73, "learning_rate": 3.53286751872704e-06, "loss": 0.9136, "step": 3335 }, { "epoch": 0.73, "learning_rate": 3.5274454396519342e-06, "loss": 0.9322, "step": 3336 }, { "epoch": 0.73, "learning_rate": 3.522026633411174e-06, "loss": 0.8946, "step": 3337 }, { "epoch": 0.73, "learning_rate": 3.516611102744767e-06, "loss": 0.9362, "step": 3338 }, { "epoch": 0.73, "learning_rate": 3.5111988503910664e-06, "loss": 0.9525, "step": 3339 }, { "epoch": 0.73, "learning_rate": 3.5057898790867673e-06, "loss": 0.9301, "step": 3340 }, { "epoch": 0.73, "learning_rate": 3.5003841915669047e-06, "loss": 0.9429, "step": 3341 }, { "epoch": 0.73, "learning_rate": 3.494981790564853e-06, "loss": 0.9718, "step": 3342 }, { "epoch": 0.73, "learning_rate": 3.4895826788123267e-06, "loss": 0.9389, "step": 3343 }, { "epoch": 0.73, "learning_rate": 3.4841868590393747e-06, "loss": 0.9275, "step": 3344 }, { "epoch": 0.73, "learning_rate": 3.4787943339743836e-06, "loss": 0.9645, "step": 3345 }, { "epoch": 0.73, "learning_rate": 3.473405106344072e-06, "loss": 0.9661, "step": 3346 }, { "epoch": 0.73, "learning_rate": 3.4680191788734942e-06, "loss": 0.8745, "step": 3347 }, { "epoch": 0.73, "learning_rate": 3.462636554286032e-06, "loss": 0.8906, "step": 3348 }, { "epoch": 0.74, "learning_rate": 3.457257235303398e-06, "loss": 0.9663, "step": 3349 }, { "epoch": 0.74, "learning_rate": 3.451881224645637e-06, "loss": 0.8832, "step": 3350 }, { "epoch": 0.74, "learning_rate": 3.4465085250311148e-06, "loss": 0.9732, "step": 3351 }, { "epoch": 0.74, "learning_rate": 3.441139139176528e-06, "loss": 0.9278, "step": 3352 }, { "epoch": 0.74, "learning_rate": 3.4357730697968994e-06, "loss": 0.9689, "step": 3353 }, { "epoch": 0.74, "learning_rate": 3.430410319605565e-06, "loss": 0.9148, "step": 3354 }, { "epoch": 0.74, "learning_rate": 3.425050891314191e-06, "loss": 0.8917, "step": 3355 }, { "epoch": 0.74, "learning_rate": 3.4196947876327614e-06, "loss": 0.9307, "step": 3356 }, { "epoch": 0.74, "learning_rate": 3.41434201126958e-06, "loss": 0.906, "step": 3357 }, { "epoch": 0.74, "learning_rate": 3.4089925649312683e-06, "loss": 0.9136, "step": 3358 }, { "epoch": 0.74, "learning_rate": 3.403646451322763e-06, "loss": 0.9565, "step": 3359 }, { "epoch": 0.74, "learning_rate": 3.398303673147314e-06, "loss": 0.8763, "step": 3360 }, { "epoch": 0.74, "learning_rate": 3.3929642331064883e-06, "loss": 0.9592, "step": 3361 }, { "epoch": 0.74, "learning_rate": 3.3876281339001625e-06, "loss": 0.8883, "step": 3362 }, { "epoch": 0.74, "learning_rate": 3.3822953782265256e-06, "loss": 0.9195, "step": 3363 }, { "epoch": 0.74, "learning_rate": 3.3769659687820734e-06, "loss": 0.8987, "step": 3364 }, { "epoch": 0.74, "learning_rate": 3.371639908261611e-06, "loss": 0.9962, "step": 3365 }, { "epoch": 0.74, "learning_rate": 3.3663171993582523e-06, "loss": 0.8708, "step": 3366 }, { "epoch": 0.74, "learning_rate": 3.360997844763413e-06, "loss": 0.9083, "step": 3367 }, { "epoch": 0.74, "learning_rate": 3.3556818471668128e-06, "loss": 0.85, "step": 3368 }, { "epoch": 0.74, "learning_rate": 3.3503692092564788e-06, "loss": 0.8888, "step": 3369 }, { "epoch": 0.74, "learning_rate": 3.3450599337187326e-06, "loss": 0.9284, "step": 3370 }, { "epoch": 0.74, "learning_rate": 3.339754023238205e-06, "loss": 0.9465, "step": 3371 }, { "epoch": 0.74, "learning_rate": 3.33445148049781e-06, "loss": 0.8574, "step": 3372 }, { "epoch": 0.74, "learning_rate": 3.3291523081787723e-06, "loss": 0.9413, "step": 3373 }, { "epoch": 0.74, "learning_rate": 3.3238565089606068e-06, "loss": 0.9142, "step": 3374 }, { "epoch": 0.74, "learning_rate": 3.3185640855211255e-06, "loss": 0.9949, "step": 3375 }, { "epoch": 0.74, "learning_rate": 3.3132750405364312e-06, "loss": 0.8688, "step": 3376 }, { "epoch": 0.74, "learning_rate": 3.307989376680918e-06, "loss": 0.96, "step": 3377 }, { "epoch": 0.74, "learning_rate": 3.3027070966272724e-06, "loss": 0.908, "step": 3378 }, { "epoch": 0.74, "learning_rate": 3.2974282030464677e-06, "loss": 0.9282, "step": 3379 }, { "epoch": 0.74, "learning_rate": 3.292152698607768e-06, "loss": 0.892, "step": 3380 }, { "epoch": 0.74, "learning_rate": 3.286880585978719e-06, "loss": 0.9397, "step": 3381 }, { "epoch": 0.74, "learning_rate": 3.2816118678251554e-06, "loss": 0.8775, "step": 3382 }, { "epoch": 0.74, "learning_rate": 3.2763465468111945e-06, "loss": 0.9406, "step": 3383 }, { "epoch": 0.74, "learning_rate": 3.2710846255992347e-06, "loss": 0.8968, "step": 3384 }, { "epoch": 0.74, "learning_rate": 3.2658261068499562e-06, "loss": 0.8403, "step": 3385 }, { "epoch": 0.74, "learning_rate": 3.2605709932223185e-06, "loss": 0.8743, "step": 3386 }, { "epoch": 0.74, "learning_rate": 3.2553192873735595e-06, "loss": 0.8921, "step": 3387 }, { "epoch": 0.74, "learning_rate": 3.250070991959193e-06, "loss": 0.9181, "step": 3388 }, { "epoch": 0.74, "learning_rate": 3.2448261096330135e-06, "loss": 0.8762, "step": 3389 }, { "epoch": 0.74, "learning_rate": 3.239584643047078e-06, "loss": 0.9194, "step": 3390 }, { "epoch": 0.74, "learning_rate": 3.2343465948517284e-06, "loss": 0.8996, "step": 3391 }, { "epoch": 0.74, "learning_rate": 3.2291119676955708e-06, "loss": 0.9389, "step": 3392 }, { "epoch": 0.74, "learning_rate": 3.2238807642254864e-06, "loss": 0.8952, "step": 3393 }, { "epoch": 0.75, "learning_rate": 3.2186529870866214e-06, "loss": 0.874, "step": 3394 }, { "epoch": 0.75, "learning_rate": 3.2134286389223913e-06, "loss": 0.3085, "step": 3395 }, { "epoch": 0.75, "learning_rate": 3.208207722374478e-06, "loss": 0.8798, "step": 3396 }, { "epoch": 0.75, "learning_rate": 3.2029902400828274e-06, "loss": 0.8936, "step": 3397 }, { "epoch": 0.75, "learning_rate": 3.1977761946856477e-06, "loss": 0.905, "step": 3398 }, { "epoch": 0.75, "learning_rate": 3.192565588819412e-06, "loss": 0.909, "step": 3399 }, { "epoch": 0.75, "learning_rate": 3.1873584251188527e-06, "loss": 0.9907, "step": 3400 }, { "epoch": 0.75, "learning_rate": 3.182154706216961e-06, "loss": 0.922, "step": 3401 }, { "epoch": 0.75, "learning_rate": 3.176954434744988e-06, "loss": 0.2984, "step": 3402 }, { "epoch": 0.75, "learning_rate": 3.171757613332439e-06, "loss": 0.9184, "step": 3403 }, { "epoch": 0.75, "learning_rate": 3.166564244607079e-06, "loss": 0.9287, "step": 3404 }, { "epoch": 0.75, "learning_rate": 3.161374331194922e-06, "loss": 0.9215, "step": 3405 }, { "epoch": 0.75, "learning_rate": 3.156187875720238e-06, "loss": 0.9188, "step": 3406 }, { "epoch": 0.75, "learning_rate": 3.151004880805553e-06, "loss": 0.964, "step": 3407 }, { "epoch": 0.75, "learning_rate": 3.1458253490716293e-06, "loss": 0.8655, "step": 3408 }, { "epoch": 0.75, "learning_rate": 3.1406492831374914e-06, "loss": 0.9478, "step": 3409 }, { "epoch": 0.75, "learning_rate": 3.1354766856204066e-06, "loss": 0.9156, "step": 3410 }, { "epoch": 0.75, "learning_rate": 3.1303075591358877e-06, "loss": 0.8943, "step": 3411 }, { "epoch": 0.75, "learning_rate": 3.125141906297694e-06, "loss": 0.9414, "step": 3412 }, { "epoch": 0.75, "learning_rate": 3.119979729717827e-06, "loss": 0.8809, "step": 3413 }, { "epoch": 0.75, "learning_rate": 3.1148210320065318e-06, "loss": 0.8987, "step": 3414 }, { "epoch": 0.75, "learning_rate": 3.1096658157722936e-06, "loss": 0.9276, "step": 3415 }, { "epoch": 0.75, "learning_rate": 3.104514083621837e-06, "loss": 0.9493, "step": 3416 }, { "epoch": 0.75, "learning_rate": 3.0993658381601243e-06, "loss": 0.893, "step": 3417 }, { "epoch": 0.75, "learning_rate": 3.094221081990356e-06, "loss": 0.9039, "step": 3418 }, { "epoch": 0.75, "learning_rate": 3.0890798177139693e-06, "loss": 0.8928, "step": 3419 }, { "epoch": 0.75, "learning_rate": 3.0839420479306325e-06, "loss": 0.9503, "step": 3420 }, { "epoch": 0.75, "learning_rate": 3.0788077752382496e-06, "loss": 0.9143, "step": 3421 }, { "epoch": 0.75, "learning_rate": 3.0736770022329554e-06, "loss": 0.3192, "step": 3422 }, { "epoch": 0.75, "learning_rate": 3.068549731509114e-06, "loss": 0.9883, "step": 3423 }, { "epoch": 0.75, "learning_rate": 3.06342596565932e-06, "loss": 0.87, "step": 3424 }, { "epoch": 0.75, "learning_rate": 3.058305707274396e-06, "loss": 0.9544, "step": 3425 }, { "epoch": 0.75, "learning_rate": 3.0531889589433916e-06, "loss": 0.8863, "step": 3426 }, { "epoch": 0.75, "learning_rate": 3.0480757232535773e-06, "loss": 0.9423, "step": 3427 }, { "epoch": 0.75, "learning_rate": 3.042966002790453e-06, "loss": 0.9163, "step": 3428 }, { "epoch": 0.75, "learning_rate": 3.037859800137741e-06, "loss": 0.9191, "step": 3429 }, { "epoch": 0.75, "learning_rate": 3.0327571178773772e-06, "loss": 0.9244, "step": 3430 }, { "epoch": 0.75, "learning_rate": 3.0276579585895248e-06, "loss": 0.8597, "step": 3431 }, { "epoch": 0.75, "learning_rate": 3.022562324852564e-06, "loss": 0.8695, "step": 3432 }, { "epoch": 0.75, "learning_rate": 3.0174702192430904e-06, "loss": 0.8848, "step": 3433 }, { "epoch": 0.75, "learning_rate": 3.0123816443359198e-06, "loss": 0.9683, "step": 3434 }, { "epoch": 0.75, "learning_rate": 3.0072966027040785e-06, "loss": 0.8752, "step": 3435 }, { "epoch": 0.75, "learning_rate": 3.0022150969188067e-06, "loss": 0.9116, "step": 3436 }, { "epoch": 0.75, "learning_rate": 2.99713712954956e-06, "loss": 0.8905, "step": 3437 }, { "epoch": 0.75, "learning_rate": 2.9920627031640004e-06, "loss": 0.8587, "step": 3438 }, { "epoch": 0.75, "learning_rate": 2.9869918203280024e-06, "loss": 0.8751, "step": 3439 }, { "epoch": 0.76, "learning_rate": 2.9819244836056482e-06, "loss": 0.9477, "step": 3440 }, { "epoch": 0.76, "learning_rate": 2.976860695559226e-06, "loss": 0.926, "step": 3441 }, { "epoch": 0.76, "learning_rate": 2.971800458749231e-06, "loss": 0.8881, "step": 3442 }, { "epoch": 0.76, "learning_rate": 2.9667437757343598e-06, "loss": 0.9472, "step": 3443 }, { "epoch": 0.76, "learning_rate": 2.961690649071517e-06, "loss": 0.9235, "step": 3444 }, { "epoch": 0.76, "learning_rate": 2.956641081315803e-06, "loss": 0.9499, "step": 3445 }, { "epoch": 0.76, "learning_rate": 2.9515950750205247e-06, "loss": 0.9362, "step": 3446 }, { "epoch": 0.76, "learning_rate": 2.946552632737183e-06, "loss": 0.9323, "step": 3447 }, { "epoch": 0.76, "learning_rate": 2.9415137570154795e-06, "loss": 0.901, "step": 3448 }, { "epoch": 0.76, "learning_rate": 2.9364784504033115e-06, "loss": 0.9174, "step": 3449 }, { "epoch": 0.76, "learning_rate": 2.9314467154467728e-06, "loss": 0.896, "step": 3450 }, { "epoch": 0.76, "learning_rate": 2.9264185546901525e-06, "loss": 0.9025, "step": 3451 }, { "epoch": 0.76, "learning_rate": 2.921393970675923e-06, "loss": 0.9643, "step": 3452 }, { "epoch": 0.76, "learning_rate": 2.916372965944759e-06, "loss": 0.9019, "step": 3453 }, { "epoch": 0.76, "learning_rate": 2.9113555430355224e-06, "loss": 0.9018, "step": 3454 }, { "epoch": 0.76, "learning_rate": 2.9063417044852627e-06, "loss": 0.9266, "step": 3455 }, { "epoch": 0.76, "learning_rate": 2.901331452829217e-06, "loss": 0.8878, "step": 3456 }, { "epoch": 0.76, "learning_rate": 2.8963247906008086e-06, "loss": 0.9327, "step": 3457 }, { "epoch": 0.76, "learning_rate": 2.8913217203316466e-06, "loss": 0.9296, "step": 3458 }, { "epoch": 0.76, "learning_rate": 2.8863222445515248e-06, "loss": 0.9349, "step": 3459 }, { "epoch": 0.76, "learning_rate": 2.881326365788417e-06, "loss": 0.9734, "step": 3460 }, { "epoch": 0.76, "learning_rate": 2.8763340865684795e-06, "loss": 0.9342, "step": 3461 }, { "epoch": 0.76, "learning_rate": 2.8713454094160497e-06, "loss": 0.8448, "step": 3462 }, { "epoch": 0.76, "learning_rate": 2.8663603368536397e-06, "loss": 0.3127, "step": 3463 }, { "epoch": 0.76, "learning_rate": 2.861378871401943e-06, "loss": 0.8455, "step": 3464 }, { "epoch": 0.76, "learning_rate": 2.856401015579828e-06, "loss": 0.9059, "step": 3465 }, { "epoch": 0.76, "learning_rate": 2.8514267719043374e-06, "loss": 0.9751, "step": 3466 }, { "epoch": 0.76, "learning_rate": 2.8464561428906866e-06, "loss": 0.9457, "step": 3467 }, { "epoch": 0.76, "learning_rate": 2.8414891310522665e-06, "loss": 0.9058, "step": 3468 }, { "epoch": 0.76, "learning_rate": 2.836525738900634e-06, "loss": 0.8587, "step": 3469 }, { "epoch": 0.76, "learning_rate": 2.83156596894552e-06, "loss": 0.9123, "step": 3470 }, { "epoch": 0.76, "learning_rate": 2.8266098236948223e-06, "loss": 0.9191, "step": 3471 }, { "epoch": 0.76, "learning_rate": 2.8216573056546057e-06, "loss": 0.9239, "step": 3472 }, { "epoch": 0.76, "learning_rate": 2.816708417329105e-06, "loss": 0.303, "step": 3473 }, { "epoch": 0.76, "learning_rate": 2.8117631612207084e-06, "loss": 0.9213, "step": 3474 }, { "epoch": 0.76, "learning_rate": 2.806821539829978e-06, "loss": 0.8765, "step": 3475 }, { "epoch": 0.76, "learning_rate": 2.8018835556556357e-06, "loss": 0.8893, "step": 3476 }, { "epoch": 0.76, "learning_rate": 2.796949211194562e-06, "loss": 0.9307, "step": 3477 }, { "epoch": 0.76, "learning_rate": 2.7920185089417993e-06, "loss": 0.8942, "step": 3478 }, { "epoch": 0.76, "learning_rate": 2.787091451390548e-06, "loss": 0.9006, "step": 3479 }, { "epoch": 0.76, "learning_rate": 2.7821680410321638e-06, "loss": 0.9006, "step": 3480 }, { "epoch": 0.76, "learning_rate": 2.7772482803561596e-06, "loss": 0.9179, "step": 3481 }, { "epoch": 0.76, "learning_rate": 2.772332171850204e-06, "loss": 0.9284, "step": 3482 }, { "epoch": 0.76, "learning_rate": 2.767419718000115e-06, "loss": 0.9281, "step": 3483 }, { "epoch": 0.76, "learning_rate": 2.762510921289867e-06, "loss": 0.9138, "step": 3484 }, { "epoch": 0.77, "learning_rate": 2.7576057842015823e-06, "loss": 0.8949, "step": 3485 }, { "epoch": 0.77, "learning_rate": 2.752704309215535e-06, "loss": 0.8748, "step": 3486 }, { "epoch": 0.77, "learning_rate": 2.7478064988101436e-06, "loss": 0.9053, "step": 3487 }, { "epoch": 0.77, "learning_rate": 2.7429123554619795e-06, "loss": 0.9687, "step": 3488 }, { "epoch": 0.77, "learning_rate": 2.738021881645755e-06, "loss": 0.9275, "step": 3489 }, { "epoch": 0.77, "learning_rate": 2.733135079834327e-06, "loss": 0.9077, "step": 3490 }, { "epoch": 0.77, "learning_rate": 2.7282519524986995e-06, "loss": 0.9094, "step": 3491 }, { "epoch": 0.77, "learning_rate": 2.723372502108016e-06, "loss": 0.9216, "step": 3492 }, { "epoch": 0.77, "learning_rate": 2.7184967311295583e-06, "loss": 0.9272, "step": 3493 }, { "epoch": 0.77, "learning_rate": 2.7136246420287538e-06, "loss": 0.8728, "step": 3494 }, { "epoch": 0.77, "learning_rate": 2.7087562372691644e-06, "loss": 0.8976, "step": 3495 }, { "epoch": 0.77, "learning_rate": 2.703891519312486e-06, "loss": 0.2799, "step": 3496 }, { "epoch": 0.77, "learning_rate": 2.699030490618555e-06, "loss": 0.9085, "step": 3497 }, { "epoch": 0.77, "learning_rate": 2.6941731536453407e-06, "loss": 0.8788, "step": 3498 }, { "epoch": 0.77, "learning_rate": 2.689319510848947e-06, "loss": 1.0167, "step": 3499 }, { "epoch": 0.77, "learning_rate": 2.684469564683608e-06, "loss": 0.9011, "step": 3500 }, { "epoch": 0.77, "learning_rate": 2.6796233176016893e-06, "loss": 0.8684, "step": 3501 }, { "epoch": 0.77, "learning_rate": 2.6747807720536866e-06, "loss": 0.8865, "step": 3502 }, { "epoch": 0.77, "learning_rate": 2.6699419304882233e-06, "loss": 0.9308, "step": 3503 }, { "epoch": 0.77, "learning_rate": 2.6651067953520505e-06, "loss": 0.898, "step": 3504 }, { "epoch": 0.77, "learning_rate": 2.660275369090043e-06, "loss": 0.9029, "step": 3505 }, { "epoch": 0.77, "learning_rate": 2.6554476541452033e-06, "loss": 0.8858, "step": 3506 }, { "epoch": 0.77, "learning_rate": 2.650623652958656e-06, "loss": 0.9317, "step": 3507 }, { "epoch": 0.77, "learning_rate": 2.645803367969647e-06, "loss": 0.9094, "step": 3508 }, { "epoch": 0.77, "learning_rate": 2.6409868016155436e-06, "loss": 0.9698, "step": 3509 }, { "epoch": 0.77, "learning_rate": 2.6361739563318334e-06, "loss": 0.8769, "step": 3510 }, { "epoch": 0.77, "learning_rate": 2.6313648345521224e-06, "loss": 0.9138, "step": 3511 }, { "epoch": 0.77, "learning_rate": 2.6265594387081327e-06, "loss": 0.8914, "step": 3512 }, { "epoch": 0.77, "learning_rate": 2.621757771229707e-06, "loss": 0.939, "step": 3513 }, { "epoch": 0.77, "learning_rate": 2.616959834544792e-06, "loss": 0.9223, "step": 3514 }, { "epoch": 0.77, "learning_rate": 2.61216563107946e-06, "loss": 0.874, "step": 3515 }, { "epoch": 0.77, "learning_rate": 2.607375163257887e-06, "loss": 0.8919, "step": 3516 }, { "epoch": 0.77, "learning_rate": 2.6025884335023657e-06, "loss": 0.8864, "step": 3517 }, { "epoch": 0.77, "learning_rate": 2.5978054442332968e-06, "loss": 0.8994, "step": 3518 }, { "epoch": 0.77, "learning_rate": 2.593026197869187e-06, "loss": 0.9305, "step": 3519 }, { "epoch": 0.77, "learning_rate": 2.5882506968266564e-06, "loss": 0.9229, "step": 3520 }, { "epoch": 0.77, "learning_rate": 2.5834789435204245e-06, "loss": 0.9181, "step": 3521 }, { "epoch": 0.77, "learning_rate": 2.5787109403633202e-06, "loss": 0.9208, "step": 3522 }, { "epoch": 0.77, "learning_rate": 2.5739466897662746e-06, "loss": 0.9089, "step": 3523 }, { "epoch": 0.77, "learning_rate": 2.5691861941383213e-06, "loss": 0.9086, "step": 3524 }, { "epoch": 0.77, "learning_rate": 2.5644294558865955e-06, "loss": 0.9763, "step": 3525 }, { "epoch": 0.77, "learning_rate": 2.559676477416333e-06, "loss": 0.8878, "step": 3526 }, { "epoch": 0.77, "learning_rate": 2.5549272611308674e-06, "loss": 0.9593, "step": 3527 }, { "epoch": 0.77, "learning_rate": 2.550181809431631e-06, "loss": 0.8877, "step": 3528 }, { "epoch": 0.77, "learning_rate": 2.54544012471815e-06, "loss": 0.9447, "step": 3529 }, { "epoch": 0.77, "learning_rate": 2.540702209388052e-06, "loss": 0.9595, "step": 3530 }, { "epoch": 0.78, "learning_rate": 2.5359680658370532e-06, "loss": 0.8936, "step": 3531 }, { "epoch": 0.78, "learning_rate": 2.5312376964589593e-06, "loss": 0.9141, "step": 3532 }, { "epoch": 0.78, "learning_rate": 2.5265111036456767e-06, "loss": 0.9461, "step": 3533 }, { "epoch": 0.78, "learning_rate": 2.521788289787196e-06, "loss": 0.9061, "step": 3534 }, { "epoch": 0.78, "learning_rate": 2.5170692572715983e-06, "loss": 0.8874, "step": 3535 }, { "epoch": 0.78, "learning_rate": 2.512354008485054e-06, "loss": 0.9221, "step": 3536 }, { "epoch": 0.78, "learning_rate": 2.5076425458118192e-06, "loss": 0.9415, "step": 3537 }, { "epoch": 0.78, "learning_rate": 2.502934871634236e-06, "loss": 0.9184, "step": 3538 }, { "epoch": 0.78, "learning_rate": 2.498230988332729e-06, "loss": 0.897, "step": 3539 }, { "epoch": 0.78, "learning_rate": 2.4935308982858097e-06, "loss": 0.8631, "step": 3540 }, { "epoch": 0.78, "learning_rate": 2.4888346038700672e-06, "loss": 0.8739, "step": 3541 }, { "epoch": 0.78, "learning_rate": 2.4841421074601735e-06, "loss": 0.9394, "step": 3542 }, { "epoch": 0.78, "learning_rate": 2.4794534114288827e-06, "loss": 0.8406, "step": 3543 }, { "epoch": 0.78, "learning_rate": 2.4747685181470203e-06, "loss": 0.9156, "step": 3544 }, { "epoch": 0.78, "learning_rate": 2.4700874299834975e-06, "loss": 0.9263, "step": 3545 }, { "epoch": 0.78, "learning_rate": 2.4654101493052952e-06, "loss": 0.888, "step": 3546 }, { "epoch": 0.78, "learning_rate": 2.460736678477471e-06, "loss": 0.9048, "step": 3547 }, { "epoch": 0.78, "learning_rate": 2.4560670198631566e-06, "loss": 1.0059, "step": 3548 }, { "epoch": 0.78, "learning_rate": 2.451401175823558e-06, "loss": 0.964, "step": 3549 }, { "epoch": 0.78, "learning_rate": 2.4467391487179446e-06, "loss": 0.9166, "step": 3550 }, { "epoch": 0.78, "learning_rate": 2.442080940903664e-06, "loss": 0.9837, "step": 3551 }, { "epoch": 0.78, "learning_rate": 2.4374265547361298e-06, "loss": 0.907, "step": 3552 }, { "epoch": 0.78, "learning_rate": 2.4327759925688233e-06, "loss": 0.9622, "step": 3553 }, { "epoch": 0.78, "learning_rate": 2.428129256753291e-06, "loss": 0.9333, "step": 3554 }, { "epoch": 0.78, "learning_rate": 2.4234863496391458e-06, "loss": 0.9079, "step": 3555 }, { "epoch": 0.78, "learning_rate": 2.4188472735740663e-06, "loss": 0.9516, "step": 3556 }, { "epoch": 0.78, "learning_rate": 2.4142120309037896e-06, "loss": 0.9054, "step": 3557 }, { "epoch": 0.78, "learning_rate": 2.409580623972119e-06, "loss": 0.3006, "step": 3558 }, { "epoch": 0.78, "learning_rate": 2.4049530551209143e-06, "loss": 0.953, "step": 3559 }, { "epoch": 0.78, "learning_rate": 2.4003293266900985e-06, "loss": 0.2822, "step": 3560 }, { "epoch": 0.78, "learning_rate": 2.3957094410176497e-06, "loss": 0.927, "step": 3561 }, { "epoch": 0.78, "learning_rate": 2.3910934004396034e-06, "loss": 0.9465, "step": 3562 }, { "epoch": 0.78, "learning_rate": 2.3864812072900512e-06, "loss": 0.8688, "step": 3563 }, { "epoch": 0.78, "learning_rate": 2.3818728639011413e-06, "loss": 0.9012, "step": 3564 }, { "epoch": 0.78, "learning_rate": 2.377268372603071e-06, "loss": 0.8998, "step": 3565 }, { "epoch": 0.78, "learning_rate": 2.3726677357240934e-06, "loss": 0.9209, "step": 3566 }, { "epoch": 0.78, "learning_rate": 2.3680709555905145e-06, "loss": 0.9358, "step": 3567 }, { "epoch": 0.78, "learning_rate": 2.3634780345266805e-06, "loss": 0.9258, "step": 3568 }, { "epoch": 0.78, "learning_rate": 2.358888974854997e-06, "loss": 0.9557, "step": 3569 }, { "epoch": 0.78, "learning_rate": 2.354303778895911e-06, "loss": 0.8964, "step": 3570 }, { "epoch": 0.78, "learning_rate": 2.349722448967919e-06, "loss": 0.9093, "step": 3571 }, { "epoch": 0.78, "learning_rate": 2.345144987387562e-06, "loss": 0.8782, "step": 3572 }, { "epoch": 0.78, "learning_rate": 2.340571396469422e-06, "loss": 0.8467, "step": 3573 }, { "epoch": 0.78, "learning_rate": 2.3360016785261287e-06, "loss": 0.9234, "step": 3574 }, { "epoch": 0.78, "learning_rate": 2.331435835868349e-06, "loss": 0.9129, "step": 3575 }, { "epoch": 0.78, "learning_rate": 2.3268738708047935e-06, "loss": 0.9198, "step": 3576 }, { "epoch": 0.79, "learning_rate": 2.3223157856422095e-06, "loss": 0.8991, "step": 3577 }, { "epoch": 0.79, "learning_rate": 2.317761582685385e-06, "loss": 0.8621, "step": 3578 }, { "epoch": 0.79, "learning_rate": 2.313211264237143e-06, "loss": 0.8818, "step": 3579 }, { "epoch": 0.79, "learning_rate": 2.308664832598343e-06, "loss": 0.9219, "step": 3580 }, { "epoch": 0.79, "learning_rate": 2.3041222900678785e-06, "loss": 0.8652, "step": 3581 }, { "epoch": 0.79, "learning_rate": 2.2995836389426783e-06, "loss": 0.8692, "step": 3582 }, { "epoch": 0.79, "learning_rate": 2.2950488815177017e-06, "loss": 0.9148, "step": 3583 }, { "epoch": 0.79, "learning_rate": 2.2905180200859413e-06, "loss": 0.9106, "step": 3584 }, { "epoch": 0.79, "learning_rate": 2.285991056938418e-06, "loss": 0.9052, "step": 3585 }, { "epoch": 0.79, "learning_rate": 2.281467994364177e-06, "loss": 0.9081, "step": 3586 }, { "epoch": 0.79, "learning_rate": 2.276948834650301e-06, "loss": 0.9125, "step": 3587 }, { "epoch": 0.79, "learning_rate": 2.272433580081892e-06, "loss": 0.9257, "step": 3588 }, { "epoch": 0.79, "learning_rate": 2.267922232942079e-06, "loss": 0.9132, "step": 3589 }, { "epoch": 0.79, "learning_rate": 2.2634147955120176e-06, "loss": 0.8198, "step": 3590 }, { "epoch": 0.79, "learning_rate": 2.2589112700708836e-06, "loss": 0.9008, "step": 3591 }, { "epoch": 0.79, "learning_rate": 2.2544116588958755e-06, "loss": 0.9292, "step": 3592 }, { "epoch": 0.79, "learning_rate": 2.2499159642622128e-06, "loss": 0.94, "step": 3593 }, { "epoch": 0.79, "learning_rate": 2.245424188443135e-06, "loss": 0.8481, "step": 3594 }, { "epoch": 0.79, "learning_rate": 2.2409363337099e-06, "loss": 0.9285, "step": 3595 }, { "epoch": 0.79, "learning_rate": 2.236452402331781e-06, "loss": 0.9298, "step": 3596 }, { "epoch": 0.79, "learning_rate": 2.2319723965760707e-06, "loss": 0.9273, "step": 3597 }, { "epoch": 0.79, "learning_rate": 2.2274963187080732e-06, "loss": 0.9484, "step": 3598 }, { "epoch": 0.79, "learning_rate": 2.2230241709911107e-06, "loss": 0.8964, "step": 3599 }, { "epoch": 0.79, "learning_rate": 2.2185559556865145e-06, "loss": 0.8359, "step": 3600 }, { "epoch": 0.79, "learning_rate": 2.214091675053629e-06, "loss": 0.9626, "step": 3601 }, { "epoch": 0.79, "learning_rate": 2.209631331349811e-06, "loss": 0.8407, "step": 3602 }, { "epoch": 0.79, "learning_rate": 2.20517492683042e-06, "loss": 0.9222, "step": 3603 }, { "epoch": 0.79, "learning_rate": 2.20072246374883e-06, "loss": 0.3157, "step": 3604 }, { "epoch": 0.79, "learning_rate": 2.1962739443564196e-06, "loss": 0.9317, "step": 3605 }, { "epoch": 0.79, "learning_rate": 2.191829370902575e-06, "loss": 0.9639, "step": 3606 }, { "epoch": 0.79, "learning_rate": 2.1873887456346866e-06, "loss": 0.879, "step": 3607 }, { "epoch": 0.79, "learning_rate": 2.1829520707981457e-06, "loss": 0.9569, "step": 3608 }, { "epoch": 0.79, "learning_rate": 2.1785193486363486e-06, "loss": 0.8605, "step": 3609 }, { "epoch": 0.79, "learning_rate": 2.1740905813906945e-06, "loss": 0.9096, "step": 3610 }, { "epoch": 0.79, "learning_rate": 2.1696657713005775e-06, "loss": 0.8407, "step": 3611 }, { "epoch": 0.79, "learning_rate": 2.1652449206033966e-06, "loss": 0.8539, "step": 3612 }, { "epoch": 0.79, "learning_rate": 2.160828031534544e-06, "loss": 0.8753, "step": 3613 }, { "epoch": 0.79, "learning_rate": 2.156415106327413e-06, "loss": 0.9278, "step": 3614 }, { "epoch": 0.79, "learning_rate": 2.1520061472133903e-06, "loss": 0.943, "step": 3615 }, { "epoch": 0.79, "learning_rate": 2.147601156421856e-06, "loss": 0.9418, "step": 3616 }, { "epoch": 0.79, "learning_rate": 2.143200136180185e-06, "loss": 0.9177, "step": 3617 }, { "epoch": 0.79, "learning_rate": 2.1388030887137455e-06, "loss": 0.9034, "step": 3618 }, { "epoch": 0.79, "learning_rate": 2.1344100162458947e-06, "loss": 0.8945, "step": 3619 }, { "epoch": 0.79, "learning_rate": 2.130020920997985e-06, "loss": 0.8856, "step": 3620 }, { "epoch": 0.79, "learning_rate": 2.125635805189347e-06, "loss": 0.8918, "step": 3621 }, { "epoch": 0.8, "learning_rate": 2.1212546710373096e-06, "loss": 0.8658, "step": 3622 }, { "epoch": 0.8, "learning_rate": 2.1168775207571833e-06, "loss": 0.9142, "step": 3623 }, { "epoch": 0.8, "learning_rate": 2.112504356562266e-06, "loss": 0.9292, "step": 3624 }, { "epoch": 0.8, "learning_rate": 2.1081351806638395e-06, "loss": 0.9564, "step": 3625 }, { "epoch": 0.8, "learning_rate": 2.1037699952711697e-06, "loss": 0.9133, "step": 3626 }, { "epoch": 0.8, "learning_rate": 2.0994088025915027e-06, "loss": 0.8901, "step": 3627 }, { "epoch": 0.8, "learning_rate": 2.095051604830067e-06, "loss": 0.9188, "step": 3628 }, { "epoch": 0.8, "learning_rate": 2.0906984041900724e-06, "loss": 0.8967, "step": 3629 }, { "epoch": 0.8, "learning_rate": 2.086349202872705e-06, "loss": 0.3367, "step": 3630 }, { "epoch": 0.8, "learning_rate": 2.0820040030771295e-06, "loss": 0.9113, "step": 3631 }, { "epoch": 0.8, "learning_rate": 2.0776628070004878e-06, "loss": 0.9103, "step": 3632 }, { "epoch": 0.8, "learning_rate": 2.073325616837898e-06, "loss": 0.9433, "step": 3633 }, { "epoch": 0.8, "learning_rate": 2.068992434782451e-06, "loss": 0.8942, "step": 3634 }, { "epoch": 0.8, "learning_rate": 2.0646632630252104e-06, "loss": 0.9218, "step": 3635 }, { "epoch": 0.8, "learning_rate": 2.060338103755216e-06, "loss": 0.8727, "step": 3636 }, { "epoch": 0.8, "learning_rate": 2.0560169591594735e-06, "loss": 0.946, "step": 3637 }, { "epoch": 0.8, "learning_rate": 2.051699831422965e-06, "loss": 0.9395, "step": 3638 }, { "epoch": 0.8, "learning_rate": 2.0473867227286326e-06, "loss": 0.9261, "step": 3639 }, { "epoch": 0.8, "learning_rate": 2.0430776352573924e-06, "loss": 0.8341, "step": 3640 }, { "epoch": 0.8, "learning_rate": 2.038772571188127e-06, "loss": 0.2985, "step": 3641 }, { "epoch": 0.8, "learning_rate": 2.0344715326976826e-06, "loss": 0.9115, "step": 3642 }, { "epoch": 0.8, "learning_rate": 2.0301745219608728e-06, "loss": 0.8816, "step": 3643 }, { "epoch": 0.8, "learning_rate": 2.0258815411504697e-06, "loss": 0.3083, "step": 3644 }, { "epoch": 0.8, "learning_rate": 2.0215925924372126e-06, "loss": 0.8818, "step": 3645 }, { "epoch": 0.8, "learning_rate": 2.017307677989798e-06, "loss": 0.9327, "step": 3646 }, { "epoch": 0.8, "learning_rate": 2.0130267999748866e-06, "loss": 0.8955, "step": 3647 }, { "epoch": 0.8, "learning_rate": 2.0087499605570947e-06, "loss": 0.8979, "step": 3648 }, { "epoch": 0.8, "learning_rate": 2.0044771618989977e-06, "loss": 0.9557, "step": 3649 }, { "epoch": 0.8, "learning_rate": 2.0002084061611282e-06, "loss": 0.8948, "step": 3650 }, { "epoch": 0.8, "learning_rate": 1.995943695501973e-06, "loss": 0.9442, "step": 3651 }, { "epoch": 0.8, "learning_rate": 1.9916830320779755e-06, "loss": 0.3041, "step": 3652 }, { "epoch": 0.8, "learning_rate": 1.987426418043531e-06, "loss": 0.8912, "step": 3653 }, { "epoch": 0.8, "learning_rate": 1.983173855550988e-06, "loss": 0.968, "step": 3654 }, { "epoch": 0.8, "learning_rate": 1.9789253467506474e-06, "loss": 0.9456, "step": 3655 }, { "epoch": 0.8, "learning_rate": 1.9746808937907615e-06, "loss": 0.8431, "step": 3656 }, { "epoch": 0.8, "learning_rate": 1.970440498817523e-06, "loss": 0.9147, "step": 3657 }, { "epoch": 0.8, "learning_rate": 1.966204163975083e-06, "loss": 0.9032, "step": 3658 }, { "epoch": 0.8, "learning_rate": 1.961971891405535e-06, "loss": 0.9413, "step": 3659 }, { "epoch": 0.8, "learning_rate": 1.9577436832489206e-06, "loss": 0.8256, "step": 3660 }, { "epoch": 0.8, "learning_rate": 1.953519541643223e-06, "loss": 0.3279, "step": 3661 }, { "epoch": 0.8, "learning_rate": 1.9492994687243715e-06, "loss": 0.9035, "step": 3662 }, { "epoch": 0.8, "learning_rate": 1.9450834666262387e-06, "loss": 0.91, "step": 3663 }, { "epoch": 0.8, "learning_rate": 1.9408715374806365e-06, "loss": 0.8784, "step": 3664 }, { "epoch": 0.8, "learning_rate": 1.9366636834173193e-06, "loss": 0.9489, "step": 3665 }, { "epoch": 0.8, "learning_rate": 1.9324599065639793e-06, "loss": 0.9637, "step": 3666 }, { "epoch": 0.8, "learning_rate": 1.9282602090462486e-06, "loss": 0.9399, "step": 3667 }, { "epoch": 0.81, "learning_rate": 1.9240645929876966e-06, "loss": 0.9034, "step": 3668 }, { "epoch": 0.81, "learning_rate": 1.919873060509826e-06, "loss": 0.8894, "step": 3669 }, { "epoch": 0.81, "learning_rate": 1.915685613732079e-06, "loss": 0.8999, "step": 3670 }, { "epoch": 0.81, "learning_rate": 1.911502254771829e-06, "loss": 0.2947, "step": 3671 }, { "epoch": 0.81, "learning_rate": 1.907322985744382e-06, "loss": 0.9088, "step": 3672 }, { "epoch": 0.81, "learning_rate": 1.9031478087629785e-06, "loss": 0.86, "step": 3673 }, { "epoch": 0.81, "learning_rate": 1.8989767259387904e-06, "loss": 0.3265, "step": 3674 }, { "epoch": 0.81, "learning_rate": 1.8948097393809127e-06, "loss": 0.3224, "step": 3675 }, { "epoch": 0.81, "learning_rate": 1.8906468511963771e-06, "loss": 0.8845, "step": 3676 }, { "epoch": 0.81, "learning_rate": 1.8864880634901384e-06, "loss": 0.3108, "step": 3677 }, { "epoch": 0.81, "learning_rate": 1.8823333783650798e-06, "loss": 0.9319, "step": 3678 }, { "epoch": 0.81, "learning_rate": 1.8781827979220092e-06, "loss": 0.9271, "step": 3679 }, { "epoch": 0.81, "learning_rate": 1.8740363242596605e-06, "loss": 0.2786, "step": 3680 }, { "epoch": 0.81, "learning_rate": 1.869893959474689e-06, "loss": 0.9418, "step": 3681 }, { "epoch": 0.81, "learning_rate": 1.8657557056616726e-06, "loss": 0.8794, "step": 3682 }, { "epoch": 0.81, "learning_rate": 1.8616215649131119e-06, "loss": 0.8771, "step": 3683 }, { "epoch": 0.81, "learning_rate": 1.857491539319428e-06, "loss": 0.8807, "step": 3684 }, { "epoch": 0.81, "learning_rate": 1.8533656309689584e-06, "loss": 0.8957, "step": 3685 }, { "epoch": 0.81, "learning_rate": 1.8492438419479609e-06, "loss": 0.8833, "step": 3686 }, { "epoch": 0.81, "learning_rate": 1.8451261743406102e-06, "loss": 0.9125, "step": 3687 }, { "epoch": 0.81, "learning_rate": 1.8410126302289965e-06, "loss": 0.9024, "step": 3688 }, { "epoch": 0.81, "learning_rate": 1.8369032116931263e-06, "loss": 0.8961, "step": 3689 }, { "epoch": 0.81, "learning_rate": 1.832797920810917e-06, "loss": 0.9611, "step": 3690 }, { "epoch": 0.81, "learning_rate": 1.8286967596582038e-06, "loss": 0.9206, "step": 3691 }, { "epoch": 0.81, "learning_rate": 1.8245997303087271e-06, "loss": 0.9376, "step": 3692 }, { "epoch": 0.81, "learning_rate": 1.8205068348341426e-06, "loss": 0.8532, "step": 3693 }, { "epoch": 0.81, "learning_rate": 1.8164180753040161e-06, "loss": 0.9269, "step": 3694 }, { "epoch": 0.81, "learning_rate": 1.8123334537858195e-06, "loss": 0.8501, "step": 3695 }, { "epoch": 0.81, "learning_rate": 1.8082529723449338e-06, "loss": 0.8866, "step": 3696 }, { "epoch": 0.81, "learning_rate": 1.8041766330446463e-06, "loss": 0.8727, "step": 3697 }, { "epoch": 0.81, "learning_rate": 1.8001044379461507e-06, "loss": 0.8665, "step": 3698 }, { "epoch": 0.81, "learning_rate": 1.796036389108543e-06, "loss": 0.904, "step": 3699 }, { "epoch": 0.81, "learning_rate": 1.7919724885888256e-06, "loss": 0.9154, "step": 3700 }, { "epoch": 0.81, "learning_rate": 1.7879127384419004e-06, "loss": 0.3059, "step": 3701 }, { "epoch": 0.81, "learning_rate": 1.783857140720573e-06, "loss": 0.9255, "step": 3702 }, { "epoch": 0.81, "learning_rate": 1.7798056974755473e-06, "loss": 0.8781, "step": 3703 }, { "epoch": 0.81, "learning_rate": 1.7757584107554282e-06, "loss": 0.9221, "step": 3704 }, { "epoch": 0.81, "learning_rate": 1.7717152826067175e-06, "loss": 1.0019, "step": 3705 }, { "epoch": 0.81, "learning_rate": 1.7676763150738164e-06, "loss": 0.9078, "step": 3706 }, { "epoch": 0.81, "learning_rate": 1.763641510199019e-06, "loss": 0.9344, "step": 3707 }, { "epoch": 0.81, "learning_rate": 1.7596108700225178e-06, "loss": 0.9098, "step": 3708 }, { "epoch": 0.81, "learning_rate": 1.7555843965823992e-06, "loss": 0.3089, "step": 3709 }, { "epoch": 0.81, "learning_rate": 1.751562091914637e-06, "loss": 0.898, "step": 3710 }, { "epoch": 0.81, "learning_rate": 1.7475439580531052e-06, "loss": 0.9017, "step": 3711 }, { "epoch": 0.81, "learning_rate": 1.7435299970295627e-06, "loss": 0.8974, "step": 3712 }, { "epoch": 0.82, "learning_rate": 1.7395202108736642e-06, "loss": 0.9427, "step": 3713 }, { "epoch": 0.82, "learning_rate": 1.7355146016129476e-06, "loss": 0.9009, "step": 3714 }, { "epoch": 0.82, "learning_rate": 1.7315131712728417e-06, "loss": 0.8421, "step": 3715 }, { "epoch": 0.82, "learning_rate": 1.7275159218766624e-06, "loss": 0.8779, "step": 3716 }, { "epoch": 0.82, "learning_rate": 1.7235228554456095e-06, "loss": 0.9459, "step": 3717 }, { "epoch": 0.82, "learning_rate": 1.7195339739987716e-06, "loss": 0.9121, "step": 3718 }, { "epoch": 0.82, "learning_rate": 1.715549279553117e-06, "loss": 0.8972, "step": 3719 }, { "epoch": 0.82, "learning_rate": 1.7115687741234987e-06, "loss": 0.9287, "step": 3720 }, { "epoch": 0.82, "learning_rate": 1.7075924597226513e-06, "loss": 0.9718, "step": 3721 }, { "epoch": 0.82, "learning_rate": 1.7036203383611905e-06, "loss": 0.9748, "step": 3722 }, { "epoch": 0.82, "learning_rate": 1.699652412047611e-06, "loss": 0.8981, "step": 3723 }, { "epoch": 0.82, "learning_rate": 1.695688682788288e-06, "loss": 0.9266, "step": 3724 }, { "epoch": 0.82, "learning_rate": 1.6917291525874723e-06, "loss": 0.8657, "step": 3725 }, { "epoch": 0.82, "learning_rate": 1.6877738234472929e-06, "loss": 0.8998, "step": 3726 }, { "epoch": 0.82, "learning_rate": 1.6838226973677562e-06, "loss": 0.869, "step": 3727 }, { "epoch": 0.82, "learning_rate": 1.6798757763467376e-06, "loss": 0.9312, "step": 3728 }, { "epoch": 0.82, "learning_rate": 1.6759330623799908e-06, "loss": 0.2988, "step": 3729 }, { "epoch": 0.82, "learning_rate": 1.6719945574611418e-06, "loss": 0.9015, "step": 3730 }, { "epoch": 0.82, "learning_rate": 1.6680602635816877e-06, "loss": 0.948, "step": 3731 }, { "epoch": 0.82, "learning_rate": 1.6641301827309963e-06, "loss": 0.9178, "step": 3732 }, { "epoch": 0.82, "learning_rate": 1.6602043168963055e-06, "loss": 0.3081, "step": 3733 }, { "epoch": 0.82, "learning_rate": 1.6562826680627219e-06, "loss": 0.315, "step": 3734 }, { "epoch": 0.82, "learning_rate": 1.6523652382132183e-06, "loss": 0.9319, "step": 3735 }, { "epoch": 0.82, "learning_rate": 1.6484520293286366e-06, "loss": 0.9307, "step": 3736 }, { "epoch": 0.82, "learning_rate": 1.644543043387682e-06, "loss": 0.8764, "step": 3737 }, { "epoch": 0.82, "learning_rate": 1.6406382823669264e-06, "loss": 0.905, "step": 3738 }, { "epoch": 0.82, "learning_rate": 1.636737748240803e-06, "loss": 0.2928, "step": 3739 }, { "epoch": 0.82, "learning_rate": 1.6328414429816109e-06, "loss": 0.9138, "step": 3740 }, { "epoch": 0.82, "learning_rate": 1.6289493685595082e-06, "loss": 0.839, "step": 3741 }, { "epoch": 0.82, "learning_rate": 1.6250615269425153e-06, "loss": 0.8927, "step": 3742 }, { "epoch": 0.82, "learning_rate": 1.6211779200965105e-06, "loss": 0.9323, "step": 3743 }, { "epoch": 0.82, "learning_rate": 1.6172985499852322e-06, "loss": 0.8822, "step": 3744 }, { "epoch": 0.82, "learning_rate": 1.613423418570279e-06, "loss": 0.952, "step": 3745 }, { "epoch": 0.82, "learning_rate": 1.6095525278110979e-06, "loss": 0.9268, "step": 3746 }, { "epoch": 0.82, "learning_rate": 1.605685879665e-06, "loss": 0.9195, "step": 3747 }, { "epoch": 0.82, "learning_rate": 1.6018234760871488e-06, "loss": 0.9543, "step": 3748 }, { "epoch": 0.82, "learning_rate": 1.59796531903056e-06, "loss": 0.9294, "step": 3749 }, { "epoch": 0.82, "learning_rate": 1.594111410446104e-06, "loss": 0.8893, "step": 3750 }, { "epoch": 0.82, "learning_rate": 1.5902617522825003e-06, "loss": 0.9525, "step": 3751 }, { "epoch": 0.82, "learning_rate": 1.5864163464863235e-06, "loss": 0.8901, "step": 3752 }, { "epoch": 0.82, "learning_rate": 1.5825751950019929e-06, "loss": 0.9106, "step": 3753 }, { "epoch": 0.82, "learning_rate": 1.5787382997717804e-06, "loss": 0.9183, "step": 3754 }, { "epoch": 0.82, "learning_rate": 1.574905662735805e-06, "loss": 0.9228, "step": 3755 }, { "epoch": 0.82, "learning_rate": 1.57107728583203e-06, "loss": 0.9082, "step": 3756 }, { "epoch": 0.82, "learning_rate": 1.5672531709962691e-06, "loss": 0.9483, "step": 3757 }, { "epoch": 0.82, "learning_rate": 1.5634333201621754e-06, "loss": 0.9339, "step": 3758 }, { "epoch": 0.83, "learning_rate": 1.5596177352612518e-06, "loss": 0.9259, "step": 3759 }, { "epoch": 0.83, "learning_rate": 1.5558064182228393e-06, "loss": 0.9251, "step": 3760 }, { "epoch": 0.83, "learning_rate": 1.5519993709741243e-06, "loss": 0.9047, "step": 3761 }, { "epoch": 0.83, "learning_rate": 1.5481965954401313e-06, "loss": 0.909, "step": 3762 }, { "epoch": 0.83, "learning_rate": 1.5443980935437286e-06, "loss": 0.8966, "step": 3763 }, { "epoch": 0.83, "learning_rate": 1.5406038672056167e-06, "loss": 0.9209, "step": 3764 }, { "epoch": 0.83, "learning_rate": 1.5368139183443421e-06, "loss": 0.9733, "step": 3765 }, { "epoch": 0.83, "learning_rate": 1.5330282488762837e-06, "loss": 0.8377, "step": 3766 }, { "epoch": 0.83, "learning_rate": 1.5292468607156573e-06, "loss": 0.9107, "step": 3767 }, { "epoch": 0.83, "learning_rate": 1.5254697557745156e-06, "loss": 0.9268, "step": 3768 }, { "epoch": 0.83, "learning_rate": 1.5216969359627431e-06, "loss": 0.8942, "step": 3769 }, { "epoch": 0.83, "learning_rate": 1.5179284031880603e-06, "loss": 0.927, "step": 3770 }, { "epoch": 0.83, "learning_rate": 1.5141641593560164e-06, "loss": 0.9416, "step": 3771 }, { "epoch": 0.83, "learning_rate": 1.5104042063699941e-06, "loss": 0.9036, "step": 3772 }, { "epoch": 0.83, "learning_rate": 1.5066485461312063e-06, "loss": 0.8611, "step": 3773 }, { "epoch": 0.83, "learning_rate": 1.502897180538697e-06, "loss": 0.9649, "step": 3774 }, { "epoch": 0.83, "learning_rate": 1.4991501114893336e-06, "loss": 0.9121, "step": 3775 }, { "epoch": 0.83, "learning_rate": 1.495407340877817e-06, "loss": 0.8408, "step": 3776 }, { "epoch": 0.83, "learning_rate": 1.4916688705966697e-06, "loss": 0.874, "step": 3777 }, { "epoch": 0.83, "learning_rate": 1.4879347025362422e-06, "loss": 0.9023, "step": 3778 }, { "epoch": 0.83, "learning_rate": 1.4842048385847097e-06, "loss": 0.9244, "step": 3779 }, { "epoch": 0.83, "learning_rate": 1.48047928062807e-06, "loss": 0.9352, "step": 3780 }, { "epoch": 0.83, "learning_rate": 1.4767580305501473e-06, "loss": 0.9515, "step": 3781 }, { "epoch": 0.83, "learning_rate": 1.4730410902325786e-06, "loss": 0.8627, "step": 3782 }, { "epoch": 0.83, "learning_rate": 1.4693284615548297e-06, "loss": 0.8743, "step": 3783 }, { "epoch": 0.83, "learning_rate": 1.4656201463941844e-06, "loss": 0.9528, "step": 3784 }, { "epoch": 0.83, "learning_rate": 1.4619161466257459e-06, "loss": 0.9086, "step": 3785 }, { "epoch": 0.83, "learning_rate": 1.4582164641224317e-06, "loss": 0.9225, "step": 3786 }, { "epoch": 0.83, "learning_rate": 1.454521100754981e-06, "loss": 0.8963, "step": 3787 }, { "epoch": 0.83, "learning_rate": 1.4508300583919466e-06, "loss": 0.9305, "step": 3788 }, { "epoch": 0.83, "learning_rate": 1.447143338899696e-06, "loss": 0.9351, "step": 3789 }, { "epoch": 0.83, "learning_rate": 1.443460944142413e-06, "loss": 0.9094, "step": 3790 }, { "epoch": 0.83, "learning_rate": 1.4397828759820909e-06, "loss": 0.8795, "step": 3791 }, { "epoch": 0.83, "learning_rate": 1.4361091362785396e-06, "loss": 0.9544, "step": 3792 }, { "epoch": 0.83, "learning_rate": 1.4324397268893775e-06, "loss": 0.9525, "step": 3793 }, { "epoch": 0.83, "learning_rate": 1.4287746496700338e-06, "loss": 0.9639, "step": 3794 }, { "epoch": 0.83, "learning_rate": 1.4251139064737485e-06, "loss": 0.9505, "step": 3795 }, { "epoch": 0.83, "learning_rate": 1.4214574991515695e-06, "loss": 0.9205, "step": 3796 }, { "epoch": 0.83, "learning_rate": 1.417805429552349e-06, "loss": 0.9199, "step": 3797 }, { "epoch": 0.83, "learning_rate": 1.4141576995227546e-06, "loss": 0.9063, "step": 3798 }, { "epoch": 0.83, "learning_rate": 1.410514310907246e-06, "loss": 0.9324, "step": 3799 }, { "epoch": 0.83, "learning_rate": 1.406875265548101e-06, "loss": 0.8912, "step": 3800 }, { "epoch": 0.83, "learning_rate": 1.4032405652853932e-06, "loss": 0.9073, "step": 3801 }, { "epoch": 0.83, "learning_rate": 1.399610211957001e-06, "loss": 0.9584, "step": 3802 }, { "epoch": 0.83, "learning_rate": 1.3959842073986085e-06, "loss": 0.9622, "step": 3803 }, { "epoch": 0.84, "learning_rate": 1.3923625534436947e-06, "loss": 0.9528, "step": 3804 }, { "epoch": 0.84, "learning_rate": 1.3887452519235434e-06, "loss": 0.928, "step": 3805 }, { "epoch": 0.84, "learning_rate": 1.385132304667235e-06, "loss": 0.974, "step": 3806 }, { "epoch": 0.84, "learning_rate": 1.381523713501649e-06, "loss": 0.3108, "step": 3807 }, { "epoch": 0.84, "learning_rate": 1.3779194802514628e-06, "loss": 0.8401, "step": 3808 }, { "epoch": 0.84, "learning_rate": 1.3743196067391507e-06, "loss": 0.9023, "step": 3809 }, { "epoch": 0.84, "learning_rate": 1.3707240947849797e-06, "loss": 0.9242, "step": 3810 }, { "epoch": 0.84, "learning_rate": 1.367132946207015e-06, "loss": 0.8994, "step": 3811 }, { "epoch": 0.84, "learning_rate": 1.3635461628211122e-06, "loss": 0.8487, "step": 3812 }, { "epoch": 0.84, "learning_rate": 1.359963746440922e-06, "loss": 0.9561, "step": 3813 }, { "epoch": 0.84, "learning_rate": 1.3563856988778868e-06, "loss": 0.9169, "step": 3814 }, { "epoch": 0.84, "learning_rate": 1.3528120219412377e-06, "loss": 0.9254, "step": 3815 }, { "epoch": 0.84, "learning_rate": 1.3492427174380007e-06, "loss": 0.315, "step": 3816 }, { "epoch": 0.84, "learning_rate": 1.345677787172983e-06, "loss": 0.9632, "step": 3817 }, { "epoch": 0.84, "learning_rate": 1.3421172329487865e-06, "loss": 0.9783, "step": 3818 }, { "epoch": 0.84, "learning_rate": 1.338561056565798e-06, "loss": 0.9653, "step": 3819 }, { "epoch": 0.84, "learning_rate": 1.335009259822191e-06, "loss": 0.9559, "step": 3820 }, { "epoch": 0.84, "learning_rate": 1.331461844513925e-06, "loss": 0.9843, "step": 3821 }, { "epoch": 0.84, "learning_rate": 1.3279188124347442e-06, "loss": 0.898, "step": 3822 }, { "epoch": 0.84, "learning_rate": 1.3243801653761734e-06, "loss": 0.9122, "step": 3823 }, { "epoch": 0.84, "learning_rate": 1.3208459051275247e-06, "loss": 0.3087, "step": 3824 }, { "epoch": 0.84, "learning_rate": 1.3173160334758895e-06, "loss": 0.941, "step": 3825 }, { "epoch": 0.84, "learning_rate": 1.3137905522061389e-06, "loss": 0.9708, "step": 3826 }, { "epoch": 0.84, "learning_rate": 1.3102694631009261e-06, "loss": 0.9166, "step": 3827 }, { "epoch": 0.84, "learning_rate": 1.3067527679406843e-06, "loss": 0.8814, "step": 3828 }, { "epoch": 0.84, "learning_rate": 1.3032404685036216e-06, "loss": 0.8499, "step": 3829 }, { "epoch": 0.84, "learning_rate": 1.2997325665657257e-06, "loss": 0.9417, "step": 3830 }, { "epoch": 0.84, "learning_rate": 1.2962290639007614e-06, "loss": 0.8659, "step": 3831 }, { "epoch": 0.84, "learning_rate": 1.2927299622802658e-06, "loss": 0.8877, "step": 3832 }, { "epoch": 0.84, "learning_rate": 1.2892352634735539e-06, "loss": 0.9504, "step": 3833 }, { "epoch": 0.84, "learning_rate": 1.2857449692477152e-06, "loss": 0.8834, "step": 3834 }, { "epoch": 0.84, "learning_rate": 1.282259081367606e-06, "loss": 0.9514, "step": 3835 }, { "epoch": 0.84, "learning_rate": 1.2787776015958609e-06, "loss": 0.9204, "step": 3836 }, { "epoch": 0.84, "learning_rate": 1.2753005316928823e-06, "loss": 0.8699, "step": 3837 }, { "epoch": 0.84, "learning_rate": 1.2718278734168442e-06, "loss": 0.9699, "step": 3838 }, { "epoch": 0.84, "learning_rate": 1.2683596285236899e-06, "loss": 0.8973, "step": 3839 }, { "epoch": 0.84, "learning_rate": 1.2648957987671295e-06, "loss": 0.903, "step": 3840 }, { "epoch": 0.84, "learning_rate": 1.2614363858986412e-06, "loss": 0.8943, "step": 3841 }, { "epoch": 0.84, "learning_rate": 1.2579813916674721e-06, "loss": 0.858, "step": 3842 }, { "epoch": 0.84, "learning_rate": 1.2545308178206327e-06, "loss": 0.8947, "step": 3843 }, { "epoch": 0.84, "learning_rate": 1.2510846661028974e-06, "loss": 0.9075, "step": 3844 }, { "epoch": 0.84, "learning_rate": 1.2476429382568067e-06, "loss": 0.8572, "step": 3845 }, { "epoch": 0.84, "learning_rate": 1.244205636022664e-06, "loss": 0.9007, "step": 3846 }, { "epoch": 0.84, "learning_rate": 1.2407727611385346e-06, "loss": 0.9184, "step": 3847 }, { "epoch": 0.84, "learning_rate": 1.237344315340243e-06, "loss": 0.9472, "step": 3848 }, { "epoch": 0.84, "learning_rate": 1.2339203003613787e-06, "loss": 0.9317, "step": 3849 }, { "epoch": 0.85, "learning_rate": 1.2305007179332851e-06, "loss": 0.9043, "step": 3850 }, { "epoch": 0.85, "learning_rate": 1.2270855697850692e-06, "loss": 0.9113, "step": 3851 }, { "epoch": 0.85, "learning_rate": 1.2236748576435943e-06, "loss": 0.9264, "step": 3852 }, { "epoch": 0.85, "learning_rate": 1.2202685832334805e-06, "loss": 0.8303, "step": 3853 }, { "epoch": 0.85, "learning_rate": 1.2168667482771e-06, "loss": 0.8873, "step": 3854 }, { "epoch": 0.85, "learning_rate": 1.2134693544945875e-06, "loss": 0.9165, "step": 3855 }, { "epoch": 0.85, "learning_rate": 1.210076403603827e-06, "loss": 0.8736, "step": 3856 }, { "epoch": 0.85, "learning_rate": 1.2066878973204576e-06, "loss": 0.8811, "step": 3857 }, { "epoch": 0.85, "learning_rate": 1.203303837357871e-06, "loss": 0.9395, "step": 3858 }, { "epoch": 0.85, "learning_rate": 1.1999242254272092e-06, "loss": 0.9114, "step": 3859 }, { "epoch": 0.85, "learning_rate": 1.1965490632373677e-06, "loss": 0.9102, "step": 3860 }, { "epoch": 0.85, "learning_rate": 1.1931783524949913e-06, "loss": 0.9263, "step": 3861 }, { "epoch": 0.85, "learning_rate": 1.1898120949044712e-06, "loss": 0.8678, "step": 3862 }, { "epoch": 0.85, "learning_rate": 1.1864502921679487e-06, "loss": 0.8981, "step": 3863 }, { "epoch": 0.85, "learning_rate": 1.1830929459853158e-06, "loss": 0.9313, "step": 3864 }, { "epoch": 0.85, "learning_rate": 1.179740058054204e-06, "loss": 0.9071, "step": 3865 }, { "epoch": 0.85, "learning_rate": 1.1763916300699973e-06, "loss": 0.8804, "step": 3866 }, { "epoch": 0.85, "learning_rate": 1.1730476637258192e-06, "loss": 0.8839, "step": 3867 }, { "epoch": 0.85, "learning_rate": 1.1697081607125427e-06, "loss": 0.2947, "step": 3868 }, { "epoch": 0.85, "learning_rate": 1.166373122718778e-06, "loss": 0.926, "step": 3869 }, { "epoch": 0.85, "learning_rate": 1.1630425514308819e-06, "loss": 0.8995, "step": 3870 }, { "epoch": 0.85, "learning_rate": 1.1597164485329504e-06, "loss": 0.8568, "step": 3871 }, { "epoch": 0.85, "learning_rate": 1.1563948157068217e-06, "loss": 0.9404, "step": 3872 }, { "epoch": 0.85, "learning_rate": 1.1530776546320721e-06, "loss": 0.872, "step": 3873 }, { "epoch": 0.85, "learning_rate": 1.1497649669860179e-06, "loss": 0.9273, "step": 3874 }, { "epoch": 0.85, "learning_rate": 1.1464567544437144e-06, "loss": 0.8787, "step": 3875 }, { "epoch": 0.85, "learning_rate": 1.1431530186779505e-06, "loss": 0.9439, "step": 3876 }, { "epoch": 0.85, "learning_rate": 1.1398537613592531e-06, "loss": 0.9352, "step": 3877 }, { "epoch": 0.85, "learning_rate": 1.1365589841558866e-06, "loss": 0.8671, "step": 3878 }, { "epoch": 0.85, "learning_rate": 1.1332686887338485e-06, "loss": 0.8861, "step": 3879 }, { "epoch": 0.85, "learning_rate": 1.12998287675687e-06, "loss": 0.8697, "step": 3880 }, { "epoch": 0.85, "learning_rate": 1.1267015498864153e-06, "loss": 0.9135, "step": 3881 }, { "epoch": 0.85, "learning_rate": 1.1234247097816798e-06, "loss": 0.9749, "step": 3882 }, { "epoch": 0.85, "learning_rate": 1.1201523580995931e-06, "loss": 0.9051, "step": 3883 }, { "epoch": 0.85, "learning_rate": 1.1168844964948122e-06, "loss": 0.9327, "step": 3884 }, { "epoch": 0.85, "learning_rate": 1.113621126619725e-06, "loss": 0.9364, "step": 3885 }, { "epoch": 0.85, "learning_rate": 1.1103622501244483e-06, "loss": 0.871, "step": 3886 }, { "epoch": 0.85, "learning_rate": 1.1071078686568259e-06, "loss": 0.9357, "step": 3887 }, { "epoch": 0.85, "learning_rate": 1.1038579838624298e-06, "loss": 0.9101, "step": 3888 }, { "epoch": 0.85, "learning_rate": 1.1006125973845571e-06, "loss": 0.8677, "step": 3889 }, { "epoch": 0.85, "learning_rate": 1.0973717108642323e-06, "loss": 0.8893, "step": 3890 }, { "epoch": 0.85, "learning_rate": 1.094135325940202e-06, "loss": 0.9158, "step": 3891 }, { "epoch": 0.85, "learning_rate": 1.0909034442489396e-06, "loss": 0.9326, "step": 3892 }, { "epoch": 0.85, "learning_rate": 1.0876760674246368e-06, "loss": 0.3231, "step": 3893 }, { "epoch": 0.85, "learning_rate": 1.0844531970992122e-06, "loss": 0.2962, "step": 3894 }, { "epoch": 0.86, "learning_rate": 1.0812348349023038e-06, "loss": 0.881, "step": 3895 }, { "epoch": 0.86, "learning_rate": 1.0780209824612698e-06, "loss": 0.9655, "step": 3896 }, { "epoch": 0.86, "learning_rate": 1.074811641401189e-06, "loss": 0.9348, "step": 3897 }, { "epoch": 0.86, "learning_rate": 1.0716068133448564e-06, "loss": 0.8444, "step": 3898 }, { "epoch": 0.86, "learning_rate": 1.0684064999127875e-06, "loss": 0.888, "step": 3899 }, { "epoch": 0.86, "learning_rate": 1.065210702723215e-06, "loss": 0.28, "step": 3900 }, { "epoch": 0.86, "learning_rate": 1.0620194233920877e-06, "loss": 0.8988, "step": 3901 }, { "epoch": 0.86, "learning_rate": 1.0588326635330693e-06, "loss": 0.981, "step": 3902 }, { "epoch": 0.86, "learning_rate": 1.0556504247575383e-06, "loss": 0.9224, "step": 3903 }, { "epoch": 0.86, "learning_rate": 1.0524727086745878e-06, "loss": 0.8511, "step": 3904 }, { "epoch": 0.86, "learning_rate": 1.0492995168910225e-06, "loss": 0.8817, "step": 3905 }, { "epoch": 0.86, "learning_rate": 1.0461308510113632e-06, "loss": 0.8893, "step": 3906 }, { "epoch": 0.86, "learning_rate": 1.0429667126378363e-06, "loss": 0.8844, "step": 3907 }, { "epoch": 0.86, "learning_rate": 1.0398071033703849e-06, "loss": 0.8935, "step": 3908 }, { "epoch": 0.86, "learning_rate": 1.036652024806657e-06, "loss": 0.9568, "step": 3909 }, { "epoch": 0.86, "learning_rate": 1.0335014785420128e-06, "loss": 0.9381, "step": 3910 }, { "epoch": 0.86, "learning_rate": 1.0303554661695192e-06, "loss": 0.8947, "step": 3911 }, { "epoch": 0.86, "learning_rate": 1.0272139892799515e-06, "loss": 0.9411, "step": 3912 }, { "epoch": 0.86, "learning_rate": 1.0240770494617923e-06, "loss": 0.9306, "step": 3913 }, { "epoch": 0.86, "learning_rate": 1.0209446483012263e-06, "loss": 0.9245, "step": 3914 }, { "epoch": 0.86, "learning_rate": 1.0178167873821487e-06, "loss": 0.9606, "step": 3915 }, { "epoch": 0.86, "learning_rate": 1.0146934682861553e-06, "loss": 0.8873, "step": 3916 }, { "epoch": 0.86, "learning_rate": 1.0115746925925452e-06, "loss": 0.9462, "step": 3917 }, { "epoch": 0.86, "learning_rate": 1.008460461878321e-06, "loss": 0.9184, "step": 3918 }, { "epoch": 0.86, "learning_rate": 1.0053507777181914e-06, "loss": 0.8823, "step": 3919 }, { "epoch": 0.86, "learning_rate": 1.0022456416845561e-06, "loss": 0.9563, "step": 3920 }, { "epoch": 0.86, "learning_rate": 9.991450553475234e-07, "loss": 0.843, "step": 3921 }, { "epoch": 0.86, "learning_rate": 9.960490202748995e-07, "loss": 0.9326, "step": 3922 }, { "epoch": 0.86, "learning_rate": 9.929575380321866e-07, "loss": 0.8673, "step": 3923 }, { "epoch": 0.86, "learning_rate": 9.898706101825883e-07, "loss": 0.8659, "step": 3924 }, { "epoch": 0.86, "learning_rate": 9.86788238287003e-07, "loss": 0.9139, "step": 3925 }, { "epoch": 0.86, "learning_rate": 9.837104239040251e-07, "loss": 0.9239, "step": 3926 }, { "epoch": 0.86, "learning_rate": 9.806371685899463e-07, "loss": 0.313, "step": 3927 }, { "epoch": 0.86, "learning_rate": 9.775684738987524e-07, "loss": 0.9729, "step": 3928 }, { "epoch": 0.86, "learning_rate": 9.745043413821208e-07, "loss": 0.8704, "step": 3929 }, { "epoch": 0.86, "learning_rate": 9.71444772589426e-07, "loss": 0.9118, "step": 3930 }, { "epoch": 0.86, "learning_rate": 9.683897690677323e-07, "loss": 0.8962, "step": 3931 }, { "epoch": 0.86, "learning_rate": 9.653393323617954e-07, "loss": 0.9311, "step": 3932 }, { "epoch": 0.86, "learning_rate": 9.622934640140636e-07, "loss": 0.9132, "step": 3933 }, { "epoch": 0.86, "learning_rate": 9.592521655646735e-07, "loss": 0.9323, "step": 3934 }, { "epoch": 0.86, "learning_rate": 9.56215438551452e-07, "loss": 0.8933, "step": 3935 }, { "epoch": 0.86, "learning_rate": 9.531832845099142e-07, "loss": 0.8728, "step": 3936 }, { "epoch": 0.86, "learning_rate": 9.501557049732624e-07, "loss": 0.8729, "step": 3937 }, { "epoch": 0.86, "learning_rate": 9.471327014723863e-07, "loss": 0.9674, "step": 3938 }, { "epoch": 0.86, "learning_rate": 9.441142755358624e-07, "loss": 0.8429, "step": 3939 }, { "epoch": 0.86, "learning_rate": 9.411004286899495e-07, "loss": 0.8992, "step": 3940 }, { "epoch": 0.87, "learning_rate": 9.380911624585976e-07, "loss": 0.926, "step": 3941 }, { "epoch": 0.87, "learning_rate": 9.350864783634306e-07, "loss": 0.9126, "step": 3942 }, { "epoch": 0.87, "learning_rate": 9.320863779237644e-07, "loss": 0.9198, "step": 3943 }, { "epoch": 0.87, "learning_rate": 9.290908626565931e-07, "loss": 0.9229, "step": 3944 }, { "epoch": 0.87, "learning_rate": 9.26099934076593e-07, "loss": 0.8825, "step": 3945 }, { "epoch": 0.87, "learning_rate": 9.231135936961211e-07, "loss": 0.886, "step": 3946 }, { "epoch": 0.87, "learning_rate": 9.201318430252149e-07, "loss": 0.2896, "step": 3947 }, { "epoch": 0.87, "learning_rate": 9.171546835715906e-07, "loss": 0.9332, "step": 3948 }, { "epoch": 0.87, "learning_rate": 9.141821168406428e-07, "loss": 0.9457, "step": 3949 }, { "epoch": 0.87, "learning_rate": 9.112141443354439e-07, "loss": 0.9548, "step": 3950 }, { "epoch": 0.87, "learning_rate": 9.082507675567431e-07, "loss": 0.9556, "step": 3951 }, { "epoch": 0.87, "learning_rate": 9.052919880029676e-07, "loss": 0.9168, "step": 3952 }, { "epoch": 0.87, "learning_rate": 9.02337807170216e-07, "loss": 0.908, "step": 3953 }, { "epoch": 0.87, "learning_rate": 8.993882265522646e-07, "loss": 0.9332, "step": 3954 }, { "epoch": 0.87, "learning_rate": 8.964432476405638e-07, "loss": 0.9074, "step": 3955 }, { "epoch": 0.87, "learning_rate": 8.935028719242367e-07, "loss": 0.9387, "step": 3956 }, { "epoch": 0.87, "learning_rate": 8.905671008900773e-07, "loss": 0.869, "step": 3957 }, { "epoch": 0.87, "learning_rate": 8.876359360225528e-07, "loss": 0.9063, "step": 3958 }, { "epoch": 0.87, "learning_rate": 8.847093788038019e-07, "loss": 0.9131, "step": 3959 }, { "epoch": 0.87, "learning_rate": 8.817874307136298e-07, "loss": 0.899, "step": 3960 }, { "epoch": 0.87, "learning_rate": 8.788700932295158e-07, "loss": 0.9413, "step": 3961 }, { "epoch": 0.87, "learning_rate": 8.759573678266054e-07, "loss": 0.2954, "step": 3962 }, { "epoch": 0.87, "learning_rate": 8.730492559777127e-07, "loss": 0.9012, "step": 3963 }, { "epoch": 0.87, "learning_rate": 8.70145759153318e-07, "loss": 0.8989, "step": 3964 }, { "epoch": 0.87, "learning_rate": 8.672468788215682e-07, "loss": 0.9087, "step": 3965 }, { "epoch": 0.87, "learning_rate": 8.643526164482785e-07, "loss": 0.9453, "step": 3966 }, { "epoch": 0.87, "learning_rate": 8.614629734969249e-07, "loss": 0.8867, "step": 3967 }, { "epoch": 0.87, "learning_rate": 8.585779514286497e-07, "loss": 0.8997, "step": 3968 }, { "epoch": 0.87, "learning_rate": 8.556975517022603e-07, "loss": 0.9157, "step": 3969 }, { "epoch": 0.87, "learning_rate": 8.52821775774223e-07, "loss": 0.9005, "step": 3970 }, { "epoch": 0.87, "learning_rate": 8.4995062509867e-07, "loss": 0.9106, "step": 3971 }, { "epoch": 0.87, "learning_rate": 8.470841011273922e-07, "loss": 0.9613, "step": 3972 }, { "epoch": 0.87, "learning_rate": 8.442222053098415e-07, "loss": 0.8906, "step": 3973 }, { "epoch": 0.87, "learning_rate": 8.413649390931289e-07, "loss": 0.9398, "step": 3974 }, { "epoch": 0.87, "learning_rate": 8.385123039220277e-07, "loss": 0.972, "step": 3975 }, { "epoch": 0.87, "learning_rate": 8.356643012389653e-07, "loss": 0.8904, "step": 3976 }, { "epoch": 0.87, "learning_rate": 8.328209324840319e-07, "loss": 0.8793, "step": 3977 }, { "epoch": 0.87, "learning_rate": 8.299821990949664e-07, "loss": 0.9037, "step": 3978 }, { "epoch": 0.87, "learning_rate": 8.271481025071704e-07, "loss": 0.9133, "step": 3979 }, { "epoch": 0.87, "learning_rate": 8.243186441536999e-07, "loss": 0.9076, "step": 3980 }, { "epoch": 0.87, "learning_rate": 8.214938254652649e-07, "loss": 0.9392, "step": 3981 }, { "epoch": 0.87, "learning_rate": 8.186736478702295e-07, "loss": 0.9129, "step": 3982 }, { "epoch": 0.87, "learning_rate": 8.158581127946108e-07, "loss": 0.9416, "step": 3983 }, { "epoch": 0.87, "learning_rate": 8.130472216620778e-07, "loss": 0.8762, "step": 3984 }, { "epoch": 0.87, "learning_rate": 8.102409758939522e-07, "loss": 0.8859, "step": 3985 }, { "epoch": 0.87, "learning_rate": 8.074393769092071e-07, "loss": 0.8707, "step": 3986 }, { "epoch": 0.88, "learning_rate": 8.046424261244646e-07, "loss": 0.9226, "step": 3987 }, { "epoch": 0.88, "learning_rate": 8.018501249539989e-07, "loss": 0.8847, "step": 3988 }, { "epoch": 0.88, "learning_rate": 7.990624748097308e-07, "loss": 0.9075, "step": 3989 }, { "epoch": 0.88, "learning_rate": 7.962794771012284e-07, "loss": 0.9522, "step": 3990 }, { "epoch": 0.88, "learning_rate": 7.935011332357113e-07, "loss": 0.9202, "step": 3991 }, { "epoch": 0.88, "learning_rate": 7.907274446180436e-07, "loss": 0.8815, "step": 3992 }, { "epoch": 0.88, "learning_rate": 7.879584126507334e-07, "loss": 0.9209, "step": 3993 }, { "epoch": 0.88, "learning_rate": 7.851940387339385e-07, "loss": 0.9416, "step": 3994 }, { "epoch": 0.88, "learning_rate": 7.824343242654564e-07, "loss": 0.9126, "step": 3995 }, { "epoch": 0.88, "learning_rate": 7.79679270640733e-07, "loss": 0.8986, "step": 3996 }, { "epoch": 0.88, "learning_rate": 7.769288792528562e-07, "loss": 0.8394, "step": 3997 }, { "epoch": 0.88, "learning_rate": 7.741831514925535e-07, "loss": 0.9093, "step": 3998 }, { "epoch": 0.88, "learning_rate": 7.714420887481999e-07, "loss": 0.8711, "step": 3999 }, { "epoch": 0.88, "learning_rate": 7.687056924058056e-07, "loss": 0.9262, "step": 4000 }, { "epoch": 0.88, "learning_rate": 7.659739638490249e-07, "loss": 0.3146, "step": 4001 }, { "epoch": 0.88, "learning_rate": 7.632469044591517e-07, "loss": 0.894, "step": 4002 }, { "epoch": 0.88, "learning_rate": 7.605245156151175e-07, "loss": 0.9293, "step": 4003 }, { "epoch": 0.88, "learning_rate": 7.578067986934934e-07, "loss": 0.2975, "step": 4004 }, { "epoch": 0.88, "learning_rate": 7.550937550684867e-07, "loss": 0.9105, "step": 4005 }, { "epoch": 0.88, "learning_rate": 7.523853861119423e-07, "loss": 0.8719, "step": 4006 }, { "epoch": 0.88, "learning_rate": 7.496816931933427e-07, "loss": 0.874, "step": 4007 }, { "epoch": 0.88, "learning_rate": 7.469826776798039e-07, "loss": 0.8713, "step": 4008 }, { "epoch": 0.88, "learning_rate": 7.442883409360779e-07, "loss": 0.9241, "step": 4009 }, { "epoch": 0.88, "learning_rate": 7.415986843245515e-07, "loss": 0.856, "step": 4010 }, { "epoch": 0.88, "learning_rate": 7.389137092052434e-07, "loss": 0.944, "step": 4011 }, { "epoch": 0.88, "learning_rate": 7.362334169358087e-07, "loss": 0.9123, "step": 4012 }, { "epoch": 0.88, "learning_rate": 7.335578088715267e-07, "loss": 0.889, "step": 4013 }, { "epoch": 0.88, "learning_rate": 7.308868863653162e-07, "loss": 0.8221, "step": 4014 }, { "epoch": 0.88, "learning_rate": 7.282206507677225e-07, "loss": 0.8985, "step": 4015 }, { "epoch": 0.88, "learning_rate": 7.255591034269249e-07, "loss": 0.9723, "step": 4016 }, { "epoch": 0.88, "learning_rate": 7.22902245688728e-07, "loss": 0.9377, "step": 4017 }, { "epoch": 0.88, "learning_rate": 7.20250078896566e-07, "loss": 0.9172, "step": 4018 }, { "epoch": 0.88, "learning_rate": 7.176026043915018e-07, "loss": 0.8816, "step": 4019 }, { "epoch": 0.88, "learning_rate": 7.149598235122279e-07, "loss": 0.8802, "step": 4020 }, { "epoch": 0.88, "learning_rate": 7.123217375950587e-07, "loss": 0.9167, "step": 4021 }, { "epoch": 0.88, "learning_rate": 7.09688347973938e-07, "loss": 0.862, "step": 4022 }, { "epoch": 0.88, "learning_rate": 7.070596559804333e-07, "loss": 0.9717, "step": 4023 }, { "epoch": 0.88, "learning_rate": 7.044356629437388e-07, "loss": 0.8776, "step": 4024 }, { "epoch": 0.88, "learning_rate": 7.01816370190671e-07, "loss": 0.9226, "step": 4025 }, { "epoch": 0.88, "learning_rate": 6.992017790456696e-07, "loss": 0.9819, "step": 4026 }, { "epoch": 0.88, "learning_rate": 6.965918908307978e-07, "loss": 0.8997, "step": 4027 }, { "epoch": 0.88, "learning_rate": 6.939867068657391e-07, "loss": 0.8612, "step": 4028 }, { "epoch": 0.88, "learning_rate": 6.913862284678019e-07, "loss": 0.8786, "step": 4029 }, { "epoch": 0.88, "learning_rate": 6.887904569519133e-07, "loss": 0.3256, "step": 4030 }, { "epoch": 0.88, "learning_rate": 6.861993936306166e-07, "loss": 0.8634, "step": 4031 }, { "epoch": 0.89, "learning_rate": 6.836130398140794e-07, "loss": 0.9352, "step": 4032 }, { "epoch": 0.89, "learning_rate": 6.810313968100868e-07, "loss": 0.8665, "step": 4033 }, { "epoch": 0.89, "learning_rate": 6.784544659240411e-07, "loss": 0.9121, "step": 4034 }, { "epoch": 0.89, "learning_rate": 6.758822484589622e-07, "loss": 0.9221, "step": 4035 }, { "epoch": 0.89, "learning_rate": 6.733147457154876e-07, "loss": 0.9143, "step": 4036 }, { "epoch": 0.89, "learning_rate": 6.707519589918687e-07, "loss": 0.9431, "step": 4037 }, { "epoch": 0.89, "learning_rate": 6.681938895839746e-07, "loss": 0.887, "step": 4038 }, { "epoch": 0.89, "learning_rate": 6.656405387852871e-07, "loss": 0.8396, "step": 4039 }, { "epoch": 0.89, "learning_rate": 6.630919078869036e-07, "loss": 0.9243, "step": 4040 }, { "epoch": 0.89, "learning_rate": 6.605479981775342e-07, "loss": 0.8749, "step": 4041 }, { "epoch": 0.89, "learning_rate": 6.580088109435023e-07, "loss": 0.8896, "step": 4042 }, { "epoch": 0.89, "learning_rate": 6.554743474687409e-07, "loss": 0.8938, "step": 4043 }, { "epoch": 0.89, "learning_rate": 6.52944609034799e-07, "loss": 0.9414, "step": 4044 }, { "epoch": 0.89, "learning_rate": 6.504195969208315e-07, "loss": 0.8903, "step": 4045 }, { "epoch": 0.89, "learning_rate": 6.478993124036081e-07, "loss": 0.3042, "step": 4046 }, { "epoch": 0.89, "learning_rate": 6.453837567575028e-07, "loss": 0.9355, "step": 4047 }, { "epoch": 0.89, "learning_rate": 6.428729312545056e-07, "loss": 0.2739, "step": 4048 }, { "epoch": 0.89, "learning_rate": 6.403668371642058e-07, "loss": 0.8768, "step": 4049 }, { "epoch": 0.89, "learning_rate": 6.378654757538072e-07, "loss": 0.9105, "step": 4050 }, { "epoch": 0.89, "learning_rate": 6.35368848288117e-07, "loss": 0.9488, "step": 4051 }, { "epoch": 0.89, "learning_rate": 6.328769560295511e-07, "loss": 0.9383, "step": 4052 }, { "epoch": 0.89, "learning_rate": 6.303898002381292e-07, "loss": 0.8974, "step": 4053 }, { "epoch": 0.89, "learning_rate": 6.279073821714776e-07, "loss": 0.8768, "step": 4054 }, { "epoch": 0.89, "learning_rate": 6.254297030848255e-07, "loss": 0.978, "step": 4055 }, { "epoch": 0.89, "learning_rate": 6.229567642310064e-07, "loss": 0.8845, "step": 4056 }, { "epoch": 0.89, "learning_rate": 6.204885668604566e-07, "loss": 0.8681, "step": 4057 }, { "epoch": 0.89, "learning_rate": 6.180251122212144e-07, "loss": 0.9379, "step": 4058 }, { "epoch": 0.89, "learning_rate": 6.155664015589236e-07, "loss": 0.9368, "step": 4059 }, { "epoch": 0.89, "learning_rate": 6.131124361168228e-07, "loss": 0.9278, "step": 4060 }, { "epoch": 0.89, "learning_rate": 6.106632171357574e-07, "loss": 0.9064, "step": 4061 }, { "epoch": 0.89, "learning_rate": 6.082187458541699e-07, "loss": 0.9213, "step": 4062 }, { "epoch": 0.89, "learning_rate": 6.057790235081007e-07, "loss": 0.8825, "step": 4063 }, { "epoch": 0.89, "learning_rate": 6.033440513311917e-07, "loss": 0.86, "step": 4064 }, { "epoch": 0.89, "learning_rate": 6.009138305546813e-07, "loss": 0.9597, "step": 4065 }, { "epoch": 0.89, "learning_rate": 5.98488362407409e-07, "loss": 0.8494, "step": 4066 }, { "epoch": 0.89, "learning_rate": 5.960676481158023e-07, "loss": 0.9289, "step": 4067 }, { "epoch": 0.89, "learning_rate": 5.936516889038924e-07, "loss": 0.8845, "step": 4068 }, { "epoch": 0.89, "learning_rate": 5.912404859933063e-07, "loss": 0.8792, "step": 4069 }, { "epoch": 0.89, "learning_rate": 5.888340406032633e-07, "loss": 0.3066, "step": 4070 }, { "epoch": 0.89, "learning_rate": 5.864323539505756e-07, "loss": 0.8813, "step": 4071 }, { "epoch": 0.89, "learning_rate": 5.84035427249654e-07, "loss": 0.9371, "step": 4072 }, { "epoch": 0.89, "learning_rate": 5.81643261712499e-07, "loss": 0.9215, "step": 4073 }, { "epoch": 0.89, "learning_rate": 5.792558585487029e-07, "loss": 0.8303, "step": 4074 }, { "epoch": 0.89, "learning_rate": 5.768732189654535e-07, "loss": 0.929, "step": 4075 }, { "epoch": 0.89, "learning_rate": 5.744953441675261e-07, "loss": 0.9117, "step": 4076 }, { "epoch": 0.89, "learning_rate": 5.721222353572886e-07, "loss": 0.9105, "step": 4077 }, { "epoch": 0.9, "learning_rate": 5.697538937347002e-07, "loss": 0.8991, "step": 4078 }, { "epoch": 0.9, "learning_rate": 5.673903204973064e-07, "loss": 0.8977, "step": 4079 }, { "epoch": 0.9, "learning_rate": 5.650315168402443e-07, "loss": 0.9332, "step": 4080 }, { "epoch": 0.9, "learning_rate": 5.626774839562377e-07, "loss": 0.2892, "step": 4081 }, { "epoch": 0.9, "learning_rate": 5.603282230355988e-07, "loss": 0.9443, "step": 4082 }, { "epoch": 0.9, "learning_rate": 5.579837352662276e-07, "loss": 0.8656, "step": 4083 }, { "epoch": 0.9, "learning_rate": 5.556440218336078e-07, "loss": 0.902, "step": 4084 }, { "epoch": 0.9, "learning_rate": 5.533090839208133e-07, "loss": 0.9321, "step": 4085 }, { "epoch": 0.9, "learning_rate": 5.509789227084983e-07, "loss": 0.8829, "step": 4086 }, { "epoch": 0.9, "learning_rate": 5.486535393749049e-07, "loss": 0.9102, "step": 4087 }, { "epoch": 0.9, "learning_rate": 5.463329350958602e-07, "loss": 0.9353, "step": 4088 }, { "epoch": 0.9, "learning_rate": 5.440171110447712e-07, "loss": 0.9025, "step": 4089 }, { "epoch": 0.9, "learning_rate": 5.417060683926301e-07, "loss": 0.9074, "step": 4090 }, { "epoch": 0.9, "learning_rate": 5.393998083080121e-07, "loss": 0.8919, "step": 4091 }, { "epoch": 0.9, "learning_rate": 5.370983319570722e-07, "loss": 0.9335, "step": 4092 }, { "epoch": 0.9, "learning_rate": 5.348016405035472e-07, "loss": 0.8977, "step": 4093 }, { "epoch": 0.9, "learning_rate": 5.325097351087549e-07, "loss": 0.915, "step": 4094 }, { "epoch": 0.9, "learning_rate": 5.302226169315927e-07, "loss": 0.8929, "step": 4095 }, { "epoch": 0.9, "learning_rate": 5.279402871285366e-07, "loss": 0.9764, "step": 4096 }, { "epoch": 0.9, "learning_rate": 5.256627468536435e-07, "loss": 0.9294, "step": 4097 }, { "epoch": 0.9, "learning_rate": 5.233899972585466e-07, "loss": 0.9012, "step": 4098 }, { "epoch": 0.9, "learning_rate": 5.211220394924566e-07, "loss": 0.2934, "step": 4099 }, { "epoch": 0.9, "learning_rate": 5.188588747021628e-07, "loss": 0.3038, "step": 4100 }, { "epoch": 0.9, "learning_rate": 5.166005040320299e-07, "loss": 0.8773, "step": 4101 }, { "epoch": 0.9, "learning_rate": 5.143469286239977e-07, "loss": 0.9679, "step": 4102 }, { "epoch": 0.9, "learning_rate": 5.120981496175815e-07, "loss": 0.9482, "step": 4103 }, { "epoch": 0.9, "learning_rate": 5.098541681498736e-07, "loss": 0.8708, "step": 4104 }, { "epoch": 0.9, "learning_rate": 5.076149853555379e-07, "loss": 0.8772, "step": 4105 }, { "epoch": 0.9, "learning_rate": 5.053806023668128e-07, "loss": 0.9202, "step": 4106 }, { "epoch": 0.9, "learning_rate": 5.031510203135104e-07, "loss": 0.8875, "step": 4107 }, { "epoch": 0.9, "learning_rate": 5.009262403230131e-07, "loss": 0.8791, "step": 4108 }, { "epoch": 0.9, "learning_rate": 4.987062635202778e-07, "loss": 0.2827, "step": 4109 }, { "epoch": 0.9, "learning_rate": 4.964910910278298e-07, "loss": 0.9318, "step": 4110 }, { "epoch": 0.9, "learning_rate": 4.942807239657688e-07, "loss": 0.2972, "step": 4111 }, { "epoch": 0.9, "learning_rate": 4.920751634517606e-07, "loss": 0.8703, "step": 4112 }, { "epoch": 0.9, "learning_rate": 4.898744106010433e-07, "loss": 0.9349, "step": 4113 }, { "epoch": 0.9, "learning_rate": 4.876784665264234e-07, "loss": 0.8432, "step": 4114 }, { "epoch": 0.9, "learning_rate": 4.854873323382747e-07, "loss": 0.9531, "step": 4115 }, { "epoch": 0.9, "learning_rate": 4.833010091445411e-07, "loss": 0.8947, "step": 4116 }, { "epoch": 0.9, "learning_rate": 4.811194980507317e-07, "loss": 0.9042, "step": 4117 }, { "epoch": 0.9, "learning_rate": 4.789428001599228e-07, "loss": 0.854, "step": 4118 }, { "epoch": 0.9, "learning_rate": 4.7677091657276076e-07, "loss": 0.943, "step": 4119 }, { "epoch": 0.9, "learning_rate": 4.7460384838744934e-07, "loss": 0.8712, "step": 4120 }, { "epoch": 0.9, "learning_rate": 4.724415966997642e-07, "loss": 0.917, "step": 4121 }, { "epoch": 0.9, "learning_rate": 4.702841626030441e-07, "loss": 0.9191, "step": 4122 }, { "epoch": 0.91, "learning_rate": 4.68131547188192e-07, "loss": 0.945, "step": 4123 }, { "epoch": 0.91, "learning_rate": 4.659837515436716e-07, "loss": 0.8913, "step": 4124 }, { "epoch": 0.91, "learning_rate": 4.638407767555131e-07, "loss": 0.8614, "step": 4125 }, { "epoch": 0.91, "learning_rate": 4.617026239073064e-07, "loss": 0.8853, "step": 4126 }, { "epoch": 0.91, "learning_rate": 4.5956929408020546e-07, "loss": 0.9925, "step": 4127 }, { "epoch": 0.91, "learning_rate": 4.5744078835292305e-07, "loss": 0.9235, "step": 4128 }, { "epoch": 0.91, "learning_rate": 4.5531710780173375e-07, "loss": 0.8963, "step": 4129 }, { "epoch": 0.91, "learning_rate": 4.531982535004731e-07, "loss": 0.9203, "step": 4130 }, { "epoch": 0.91, "learning_rate": 4.510842265205362e-07, "loss": 0.923, "step": 4131 }, { "epoch": 0.91, "learning_rate": 4.4897502793087576e-07, "loss": 0.914, "step": 4132 }, { "epoch": 0.91, "learning_rate": 4.468706587980043e-07, "loss": 0.8891, "step": 4133 }, { "epoch": 0.91, "learning_rate": 4.4477112018599056e-07, "loss": 0.9238, "step": 4134 }, { "epoch": 0.91, "learning_rate": 4.4267641315646313e-07, "loss": 0.9047, "step": 4135 }, { "epoch": 0.91, "learning_rate": 4.40586538768607e-07, "loss": 0.8964, "step": 4136 }, { "epoch": 0.91, "learning_rate": 4.3850149807916354e-07, "loss": 0.9225, "step": 4137 }, { "epoch": 0.91, "learning_rate": 4.364212921424271e-07, "loss": 0.8607, "step": 4138 }, { "epoch": 0.91, "learning_rate": 4.3434592201025084e-07, "loss": 0.8461, "step": 4139 }, { "epoch": 0.91, "learning_rate": 4.3227538873204076e-07, "loss": 0.8505, "step": 4140 }, { "epoch": 0.91, "learning_rate": 4.3020969335475946e-07, "loss": 0.9724, "step": 4141 }, { "epoch": 0.91, "learning_rate": 4.281488369229203e-07, "loss": 0.8668, "step": 4142 }, { "epoch": 0.91, "learning_rate": 4.260928204785919e-07, "loss": 0.9575, "step": 4143 }, { "epoch": 0.91, "learning_rate": 4.2404164506139487e-07, "loss": 0.9686, "step": 4144 }, { "epoch": 0.91, "learning_rate": 4.2199531170850296e-07, "loss": 0.8919, "step": 4145 }, { "epoch": 0.91, "learning_rate": 4.1995382145463836e-07, "loss": 0.91, "step": 4146 }, { "epoch": 0.91, "learning_rate": 4.179171753320777e-07, "loss": 0.9082, "step": 4147 }, { "epoch": 0.91, "learning_rate": 4.158853743706481e-07, "loss": 0.3279, "step": 4148 }, { "epoch": 0.91, "learning_rate": 4.1385841959772353e-07, "loss": 0.9429, "step": 4149 }, { "epoch": 0.91, "learning_rate": 4.118363120382318e-07, "loss": 0.8996, "step": 4150 }, { "epoch": 0.91, "learning_rate": 4.0981905271464615e-07, "loss": 0.9037, "step": 4151 }, { "epoch": 0.91, "learning_rate": 4.0780664264698954e-07, "loss": 0.9322, "step": 4152 }, { "epoch": 0.91, "learning_rate": 4.0579908285283465e-07, "loss": 0.8945, "step": 4153 }, { "epoch": 0.91, "learning_rate": 4.037963743472995e-07, "loss": 0.8148, "step": 4154 }, { "epoch": 0.91, "learning_rate": 4.017985181430495e-07, "loss": 0.8291, "step": 4155 }, { "epoch": 0.91, "learning_rate": 3.9980551525029555e-07, "loss": 0.8601, "step": 4156 }, { "epoch": 0.91, "learning_rate": 3.9781736667679703e-07, "loss": 0.9573, "step": 4157 }, { "epoch": 0.91, "learning_rate": 3.9583407342785653e-07, "loss": 0.9194, "step": 4158 }, { "epoch": 0.91, "learning_rate": 3.9385563650632284e-07, "loss": 0.8845, "step": 4159 }, { "epoch": 0.91, "learning_rate": 3.918820569125881e-07, "loss": 0.9071, "step": 4160 }, { "epoch": 0.91, "learning_rate": 3.8991333564458943e-07, "loss": 0.9226, "step": 4161 }, { "epoch": 0.91, "learning_rate": 3.879494736978062e-07, "loss": 0.8694, "step": 4162 }, { "epoch": 0.91, "learning_rate": 3.8599047206526187e-07, "loss": 0.9228, "step": 4163 }, { "epoch": 0.91, "learning_rate": 3.840363317375229e-07, "loss": 0.8812, "step": 4164 }, { "epoch": 0.91, "learning_rate": 3.820870537026944e-07, "loss": 0.8892, "step": 4165 }, { "epoch": 0.91, "learning_rate": 3.801426389464247e-07, "loss": 0.895, "step": 4166 }, { "epoch": 0.91, "learning_rate": 3.782030884519061e-07, "loss": 0.9357, "step": 4167 }, { "epoch": 0.91, "learning_rate": 3.7626840319986536e-07, "loss": 0.9056, "step": 4168 }, { "epoch": 0.92, "learning_rate": 3.7433858416857316e-07, "loss": 0.8677, "step": 4169 }, { "epoch": 0.92, "learning_rate": 3.7241363233384007e-07, "loss": 0.9076, "step": 4170 }, { "epoch": 0.92, "learning_rate": 3.7049354866901313e-07, "loss": 0.9001, "step": 4171 }, { "epoch": 0.92, "learning_rate": 3.685783341449789e-07, "loss": 0.9202, "step": 4172 }, { "epoch": 0.92, "learning_rate": 3.666679897301617e-07, "loss": 0.8538, "step": 4173 }, { "epoch": 0.92, "learning_rate": 3.6476251639052217e-07, "loss": 0.9147, "step": 4174 }, { "epoch": 0.92, "learning_rate": 3.628619150895607e-07, "loss": 0.8928, "step": 4175 }, { "epoch": 0.92, "learning_rate": 3.60966186788313e-07, "loss": 0.2813, "step": 4176 }, { "epoch": 0.92, "learning_rate": 3.5907533244534887e-07, "loss": 0.9474, "step": 4177 }, { "epoch": 0.92, "learning_rate": 3.571893530167758e-07, "loss": 0.9051, "step": 4178 }, { "epoch": 0.92, "learning_rate": 3.553082494562354e-07, "loss": 0.9656, "step": 4179 }, { "epoch": 0.92, "learning_rate": 3.534320227149035e-07, "loss": 0.8996, "step": 4180 }, { "epoch": 0.92, "learning_rate": 3.5156067374149117e-07, "loss": 0.8955, "step": 4181 }, { "epoch": 0.92, "learning_rate": 3.496942034822415e-07, "loss": 0.9093, "step": 4182 }, { "epoch": 0.92, "learning_rate": 3.4783261288093286e-07, "loss": 0.9657, "step": 4183 }, { "epoch": 0.92, "learning_rate": 3.459759028788734e-07, "loss": 0.3315, "step": 4184 }, { "epoch": 0.92, "learning_rate": 3.441240744149055e-07, "loss": 0.2669, "step": 4185 }, { "epoch": 0.92, "learning_rate": 3.4227712842540226e-07, "loss": 0.9403, "step": 4186 }, { "epoch": 0.92, "learning_rate": 3.40435065844269e-07, "loss": 0.897, "step": 4187 }, { "epoch": 0.92, "learning_rate": 3.385978876029394e-07, "loss": 0.9453, "step": 4188 }, { "epoch": 0.92, "learning_rate": 3.367655946303805e-07, "loss": 0.9346, "step": 4189 }, { "epoch": 0.92, "learning_rate": 3.3493818785308886e-07, "loss": 0.8632, "step": 4190 }, { "epoch": 0.92, "learning_rate": 3.3311566819508536e-07, "loss": 0.9174, "step": 4191 }, { "epoch": 0.92, "learning_rate": 3.3129803657792505e-07, "loss": 0.9241, "step": 4192 }, { "epoch": 0.92, "learning_rate": 3.294852939206916e-07, "loss": 0.9571, "step": 4193 }, { "epoch": 0.92, "learning_rate": 3.276774411399919e-07, "loss": 0.908, "step": 4194 }, { "epoch": 0.92, "learning_rate": 3.2587447914996463e-07, "loss": 0.9365, "step": 4195 }, { "epoch": 0.92, "learning_rate": 3.240764088622739e-07, "loss": 0.9035, "step": 4196 }, { "epoch": 0.92, "learning_rate": 3.2228323118611015e-07, "loss": 0.9052, "step": 4197 }, { "epoch": 0.92, "learning_rate": 3.204949470281904e-07, "loss": 0.9138, "step": 4198 }, { "epoch": 0.92, "learning_rate": 3.1871155729275683e-07, "loss": 0.8956, "step": 4199 }, { "epoch": 0.92, "learning_rate": 3.1693306288157697e-07, "loss": 0.8932, "step": 4200 }, { "epoch": 0.92, "learning_rate": 3.1515946469394263e-07, "loss": 0.9074, "step": 4201 }, { "epoch": 0.92, "learning_rate": 3.1339076362666975e-07, "loss": 0.8951, "step": 4202 }, { "epoch": 0.92, "learning_rate": 3.116269605740996e-07, "loss": 0.9294, "step": 4203 }, { "epoch": 0.92, "learning_rate": 3.098680564280965e-07, "loss": 0.8713, "step": 4204 }, { "epoch": 0.92, "learning_rate": 3.0811405207804456e-07, "loss": 0.8567, "step": 4205 }, { "epoch": 0.92, "learning_rate": 3.063649484108533e-07, "loss": 0.9602, "step": 4206 }, { "epoch": 0.92, "learning_rate": 3.0462074631095404e-07, "loss": 0.8986, "step": 4207 }, { "epoch": 0.92, "learning_rate": 3.0288144666030027e-07, "loss": 0.9119, "step": 4208 }, { "epoch": 0.92, "learning_rate": 3.011470503383629e-07, "loss": 0.906, "step": 4209 }, { "epoch": 0.92, "learning_rate": 2.9941755822213704e-07, "loss": 0.9319, "step": 4210 }, { "epoch": 0.92, "learning_rate": 2.976929711861376e-07, "loss": 0.8993, "step": 4211 }, { "epoch": 0.92, "learning_rate": 2.9597329010239706e-07, "loss": 0.8929, "step": 4212 }, { "epoch": 0.92, "learning_rate": 2.942585158404698e-07, "loss": 0.9114, "step": 4213 }, { "epoch": 0.93, "learning_rate": 2.92548649267429e-07, "loss": 0.9194, "step": 4214 }, { "epoch": 0.93, "learning_rate": 2.9084369124786293e-07, "loss": 0.8993, "step": 4215 }, { "epoch": 0.93, "learning_rate": 2.891436426438821e-07, "loss": 0.8643, "step": 4216 }, { "epoch": 0.93, "learning_rate": 2.8744850431511205e-07, "loss": 0.9185, "step": 4217 }, { "epoch": 0.93, "learning_rate": 2.857582771186962e-07, "loss": 0.9618, "step": 4218 }, { "epoch": 0.93, "learning_rate": 2.8407296190929413e-07, "loss": 0.9472, "step": 4219 }, { "epoch": 0.93, "learning_rate": 2.8239255953908305e-07, "loss": 0.9305, "step": 4220 }, { "epoch": 0.93, "learning_rate": 2.807170708577545e-07, "loss": 0.9258, "step": 4221 }, { "epoch": 0.93, "learning_rate": 2.7904649671251617e-07, "loss": 0.8914, "step": 4222 }, { "epoch": 0.93, "learning_rate": 2.7738083794809026e-07, "loss": 0.9179, "step": 4223 }, { "epoch": 0.93, "learning_rate": 2.7572009540671405e-07, "loss": 0.881, "step": 4224 }, { "epoch": 0.93, "learning_rate": 2.740642699281382e-07, "loss": 0.8863, "step": 4225 }, { "epoch": 0.93, "learning_rate": 2.7241336234962943e-07, "loss": 0.8776, "step": 4226 }, { "epoch": 0.93, "learning_rate": 2.707673735059635e-07, "loss": 0.9003, "step": 4227 }, { "epoch": 0.93, "learning_rate": 2.6912630422943234e-07, "loss": 0.8607, "step": 4228 }, { "epoch": 0.93, "learning_rate": 2.674901553498388e-07, "loss": 0.9243, "step": 4229 }, { "epoch": 0.93, "learning_rate": 2.6585892769450005e-07, "loss": 0.8461, "step": 4230 }, { "epoch": 0.93, "learning_rate": 2.6423262208824073e-07, "loss": 0.9164, "step": 4231 }, { "epoch": 0.93, "learning_rate": 2.626112393534008e-07, "loss": 0.975, "step": 4232 }, { "epoch": 0.93, "learning_rate": 2.6099478030982897e-07, "loss": 0.912, "step": 4233 }, { "epoch": 0.93, "learning_rate": 2.593832457748846e-07, "loss": 0.9122, "step": 4234 }, { "epoch": 0.93, "learning_rate": 2.57776636563436e-07, "loss": 0.9155, "step": 4235 }, { "epoch": 0.93, "learning_rate": 2.5617495348786214e-07, "loss": 0.9221, "step": 4236 }, { "epoch": 0.93, "learning_rate": 2.545781973580519e-07, "loss": 0.3014, "step": 4237 }, { "epoch": 0.93, "learning_rate": 2.529863689814005e-07, "loss": 0.866, "step": 4238 }, { "epoch": 0.93, "learning_rate": 2.513994691628141e-07, "loss": 0.9092, "step": 4239 }, { "epoch": 0.93, "learning_rate": 2.498174987047042e-07, "loss": 0.8963, "step": 4240 }, { "epoch": 0.93, "learning_rate": 2.4824045840699194e-07, "loss": 0.8722, "step": 4241 }, { "epoch": 0.93, "learning_rate": 2.4666834906710514e-07, "loss": 0.9223, "step": 4242 }, { "epoch": 0.93, "learning_rate": 2.4510117147997556e-07, "loss": 0.8935, "step": 4243 }, { "epoch": 0.93, "learning_rate": 2.435389264380472e-07, "loss": 0.9239, "step": 4244 }, { "epoch": 0.93, "learning_rate": 2.4198161473126147e-07, "loss": 0.8992, "step": 4245 }, { "epoch": 0.93, "learning_rate": 2.404292371470729e-07, "loss": 0.9596, "step": 4246 }, { "epoch": 0.93, "learning_rate": 2.388817944704369e-07, "loss": 0.9873, "step": 4247 }, { "epoch": 0.93, "learning_rate": 2.3733928748381652e-07, "loss": 0.3151, "step": 4248 }, { "epoch": 0.93, "learning_rate": 2.3580171696717558e-07, "loss": 0.9057, "step": 4249 }, { "epoch": 0.93, "learning_rate": 2.342690836979833e-07, "loss": 0.9038, "step": 4250 }, { "epoch": 0.93, "learning_rate": 2.327413884512142e-07, "loss": 0.8816, "step": 4251 }, { "epoch": 0.93, "learning_rate": 2.312186319993437e-07, "loss": 0.8639, "step": 4252 }, { "epoch": 0.93, "learning_rate": 2.297008151123503e-07, "loss": 0.8935, "step": 4253 }, { "epoch": 0.93, "learning_rate": 2.281879385577157e-07, "loss": 0.9273, "step": 4254 }, { "epoch": 0.93, "learning_rate": 2.2668000310042237e-07, "loss": 0.9143, "step": 4255 }, { "epoch": 0.93, "learning_rate": 2.2517700950295483e-07, "loss": 0.918, "step": 4256 }, { "epoch": 0.93, "learning_rate": 2.2367895852529853e-07, "loss": 0.8922, "step": 4257 }, { "epoch": 0.93, "learning_rate": 2.2218585092494083e-07, "loss": 0.9535, "step": 4258 }, { "epoch": 0.93, "learning_rate": 2.2069768745686894e-07, "loss": 0.8767, "step": 4259 }, { "epoch": 0.94, "learning_rate": 2.1921446887356869e-07, "loss": 0.8489, "step": 4260 }, { "epoch": 0.94, "learning_rate": 2.1773619592502682e-07, "loss": 0.9565, "step": 4261 }, { "epoch": 0.94, "learning_rate": 2.1626286935872987e-07, "loss": 0.8314, "step": 4262 }, { "epoch": 0.94, "learning_rate": 2.147944899196608e-07, "loss": 0.9028, "step": 4263 }, { "epoch": 0.94, "learning_rate": 2.1333105835030455e-07, "loss": 0.9572, "step": 4264 }, { "epoch": 0.94, "learning_rate": 2.1187257539064143e-07, "loss": 0.9302, "step": 4265 }, { "epoch": 0.94, "learning_rate": 2.1041904177815042e-07, "loss": 0.9561, "step": 4266 }, { "epoch": 0.94, "learning_rate": 2.0897045824780803e-07, "loss": 0.8884, "step": 4267 }, { "epoch": 0.94, "learning_rate": 2.0752682553208837e-07, "loss": 0.8381, "step": 4268 }, { "epoch": 0.94, "learning_rate": 2.0608814436096082e-07, "loss": 0.9139, "step": 4269 }, { "epoch": 0.94, "learning_rate": 2.0465441546189125e-07, "loss": 0.9245, "step": 4270 }, { "epoch": 0.94, "learning_rate": 2.03225639559842e-07, "loss": 0.9572, "step": 4271 }, { "epoch": 0.94, "learning_rate": 2.0180181737727178e-07, "loss": 0.8992, "step": 4272 }, { "epoch": 0.94, "learning_rate": 2.0038294963413251e-07, "loss": 0.8966, "step": 4273 }, { "epoch": 0.94, "learning_rate": 1.989690370478714e-07, "loss": 0.9651, "step": 4274 }, { "epoch": 0.94, "learning_rate": 1.9756008033343211e-07, "loss": 0.8625, "step": 4275 }, { "epoch": 0.94, "learning_rate": 1.9615608020324915e-07, "loss": 0.3196, "step": 4276 }, { "epoch": 0.94, "learning_rate": 1.947570373672536e-07, "loss": 0.8871, "step": 4277 }, { "epoch": 0.94, "learning_rate": 1.9336295253286842e-07, "loss": 0.9671, "step": 4278 }, { "epoch": 0.94, "learning_rate": 1.9197382640500973e-07, "loss": 0.9286, "step": 4279 }, { "epoch": 0.94, "learning_rate": 1.9058965968608567e-07, "loss": 0.836, "step": 4280 }, { "epoch": 0.94, "learning_rate": 1.892104530759975e-07, "loss": 0.9284, "step": 4281 }, { "epoch": 0.94, "learning_rate": 1.878362072721396e-07, "loss": 0.9017, "step": 4282 }, { "epoch": 0.94, "learning_rate": 1.8646692296939605e-07, "loss": 0.8893, "step": 4283 }, { "epoch": 0.94, "learning_rate": 1.85102600860142e-07, "loss": 0.89, "step": 4284 }, { "epoch": 0.94, "learning_rate": 1.837432416342444e-07, "loss": 0.9014, "step": 4285 }, { "epoch": 0.94, "learning_rate": 1.8238884597906127e-07, "loss": 0.897, "step": 4286 }, { "epoch": 0.94, "learning_rate": 1.8103941457943918e-07, "loss": 0.8598, "step": 4287 }, { "epoch": 0.94, "learning_rate": 1.7969494811771682e-07, "loss": 0.9088, "step": 4288 }, { "epoch": 0.94, "learning_rate": 1.7835544727371922e-07, "loss": 0.9351, "step": 4289 }, { "epoch": 0.94, "learning_rate": 1.770209127247635e-07, "loss": 0.8915, "step": 4290 }, { "epoch": 0.94, "learning_rate": 1.7569134514565435e-07, "loss": 0.947, "step": 4291 }, { "epoch": 0.94, "learning_rate": 1.74366745208685e-07, "loss": 0.9044, "step": 4292 }, { "epoch": 0.94, "learning_rate": 1.7304711358363536e-07, "loss": 0.9201, "step": 4293 }, { "epoch": 0.94, "learning_rate": 1.7173245093777602e-07, "loss": 0.8573, "step": 4294 }, { "epoch": 0.94, "learning_rate": 1.7042275793586416e-07, "loss": 0.9138, "step": 4295 }, { "epoch": 0.94, "learning_rate": 1.6911803524014113e-07, "loss": 0.8976, "step": 4296 }, { "epoch": 0.94, "learning_rate": 1.6781828351033925e-07, "loss": 0.914, "step": 4297 }, { "epoch": 0.94, "learning_rate": 1.6652350340367495e-07, "loss": 0.3264, "step": 4298 }, { "epoch": 0.94, "learning_rate": 1.6523369557485126e-07, "loss": 0.9487, "step": 4299 }, { "epoch": 0.94, "learning_rate": 1.6394886067605752e-07, "loss": 0.901, "step": 4300 }, { "epoch": 0.94, "learning_rate": 1.6266899935696635e-07, "loss": 0.8705, "step": 4301 }, { "epoch": 0.94, "learning_rate": 1.6139411226473777e-07, "loss": 0.9514, "step": 4302 }, { "epoch": 0.94, "learning_rate": 1.601242000440162e-07, "loss": 0.8939, "step": 4303 }, { "epoch": 0.94, "learning_rate": 1.5885926333693013e-07, "loss": 0.932, "step": 4304 }, { "epoch": 0.95, "learning_rate": 1.5759930278309243e-07, "loss": 0.9014, "step": 4305 }, { "epoch": 0.95, "learning_rate": 1.563443190196001e-07, "loss": 0.9166, "step": 4306 }, { "epoch": 0.95, "learning_rate": 1.5509431268103116e-07, "loss": 0.9481, "step": 4307 }, { "epoch": 0.95, "learning_rate": 1.53849284399451e-07, "loss": 0.909, "step": 4308 }, { "epoch": 0.95, "learning_rate": 1.5260923480440505e-07, "loss": 0.8631, "step": 4309 }, { "epoch": 0.95, "learning_rate": 1.5137416452292164e-07, "loss": 0.9461, "step": 4310 }, { "epoch": 0.95, "learning_rate": 1.5014407417951127e-07, "loss": 0.9299, "step": 4311 }, { "epoch": 0.95, "learning_rate": 1.489189643961686e-07, "loss": 0.9615, "step": 4312 }, { "epoch": 0.95, "learning_rate": 1.4769883579236477e-07, "loss": 0.8754, "step": 4313 }, { "epoch": 0.95, "learning_rate": 1.4648368898505848e-07, "loss": 0.8991, "step": 4314 }, { "epoch": 0.95, "learning_rate": 1.4527352458868494e-07, "loss": 0.8636, "step": 4315 }, { "epoch": 0.95, "learning_rate": 1.4406834321516018e-07, "loss": 0.28, "step": 4316 }, { "epoch": 0.95, "learning_rate": 1.428681454738845e-07, "loss": 0.9285, "step": 4317 }, { "epoch": 0.95, "learning_rate": 1.416729319717325e-07, "loss": 0.9296, "step": 4318 }, { "epoch": 0.95, "learning_rate": 1.4048270331306402e-07, "loss": 0.9784, "step": 4319 }, { "epoch": 0.95, "learning_rate": 1.3929746009971434e-07, "loss": 0.8709, "step": 4320 }, { "epoch": 0.95, "learning_rate": 1.3811720293100074e-07, "loss": 0.9252, "step": 4321 }, { "epoch": 0.95, "learning_rate": 1.369419324037169e-07, "loss": 0.9073, "step": 4322 }, { "epoch": 0.95, "learning_rate": 1.3577164911213525e-07, "loss": 0.9003, "step": 4323 }, { "epoch": 0.95, "learning_rate": 1.3460635364800802e-07, "loss": 0.8375, "step": 4324 }, { "epoch": 0.95, "learning_rate": 1.3344604660056494e-07, "loss": 0.9092, "step": 4325 }, { "epoch": 0.95, "learning_rate": 1.3229072855651226e-07, "loss": 0.9135, "step": 4326 }, { "epoch": 0.95, "learning_rate": 1.3114040010003493e-07, "loss": 0.9101, "step": 4327 }, { "epoch": 0.95, "learning_rate": 1.299950618127932e-07, "loss": 0.9747, "step": 4328 }, { "epoch": 0.95, "learning_rate": 1.2885471427392603e-07, "loss": 0.9533, "step": 4329 }, { "epoch": 0.95, "learning_rate": 1.2771935806004776e-07, "loss": 0.9296, "step": 4330 }, { "epoch": 0.95, "learning_rate": 1.2658899374524912e-07, "loss": 0.9158, "step": 4331 }, { "epoch": 0.95, "learning_rate": 1.254636219010963e-07, "loss": 0.8674, "step": 4332 }, { "epoch": 0.95, "learning_rate": 1.2434324309663182e-07, "loss": 0.9084, "step": 4333 }, { "epoch": 0.95, "learning_rate": 1.2322785789837144e-07, "loss": 0.8383, "step": 4334 }, { "epoch": 0.95, "learning_rate": 1.2211746687030958e-07, "loss": 0.9129, "step": 4335 }, { "epoch": 0.95, "learning_rate": 1.2101207057391261e-07, "loss": 0.8696, "step": 4336 }, { "epoch": 0.95, "learning_rate": 1.1991166956812018e-07, "loss": 0.9163, "step": 4337 }, { "epoch": 0.95, "learning_rate": 1.1881626440935046e-07, "loss": 0.9548, "step": 4338 }, { "epoch": 0.95, "learning_rate": 1.1772585565149152e-07, "loss": 0.8893, "step": 4339 }, { "epoch": 0.95, "learning_rate": 1.1664044384590679e-07, "loss": 0.911, "step": 4340 }, { "epoch": 0.95, "learning_rate": 1.1556002954143164e-07, "loss": 0.9028, "step": 4341 }, { "epoch": 0.95, "learning_rate": 1.1448461328437688e-07, "loss": 0.958, "step": 4342 }, { "epoch": 0.95, "learning_rate": 1.1341419561852418e-07, "loss": 0.9052, "step": 4343 }, { "epoch": 0.95, "learning_rate": 1.1234877708512726e-07, "loss": 0.9376, "step": 4344 }, { "epoch": 0.95, "learning_rate": 1.1128835822291406e-07, "loss": 0.3362, "step": 4345 }, { "epoch": 0.95, "learning_rate": 1.1023293956808235e-07, "loss": 0.8827, "step": 4346 }, { "epoch": 0.95, "learning_rate": 1.0918252165430188e-07, "loss": 0.8497, "step": 4347 }, { "epoch": 0.95, "learning_rate": 1.0813710501271669e-07, "loss": 0.8786, "step": 4348 }, { "epoch": 0.95, "learning_rate": 1.0709669017193724e-07, "loss": 0.9145, "step": 4349 }, { "epoch": 0.95, "learning_rate": 1.060612776580483e-07, "loss": 0.8817, "step": 4350 }, { "epoch": 0.96, "learning_rate": 1.0503086799460438e-07, "loss": 0.9177, "step": 4351 }, { "epoch": 0.96, "learning_rate": 1.0400546170262982e-07, "loss": 0.9391, "step": 4352 }, { "epoch": 0.96, "learning_rate": 1.0298505930061875e-07, "loss": 0.9057, "step": 4353 }, { "epoch": 0.96, "learning_rate": 1.0196966130453734e-07, "loss": 0.9273, "step": 4354 }, { "epoch": 0.96, "learning_rate": 1.009592682278171e-07, "loss": 0.9032, "step": 4355 }, { "epoch": 0.96, "learning_rate": 9.995388058136269e-08, "loss": 0.863, "step": 4356 }, { "epoch": 0.96, "learning_rate": 9.895349887354633e-08, "loss": 0.8787, "step": 4357 }, { "epoch": 0.96, "learning_rate": 9.795812361020896e-08, "loss": 0.8864, "step": 4358 }, { "epoch": 0.96, "learning_rate": 9.696775529465907e-08, "loss": 0.9471, "step": 4359 }, { "epoch": 0.96, "learning_rate": 9.598239442767721e-08, "loss": 0.8563, "step": 4360 }, { "epoch": 0.96, "learning_rate": 9.500204150750591e-08, "loss": 0.9016, "step": 4361 }, { "epoch": 0.96, "learning_rate": 9.402669702986089e-08, "loss": 0.8862, "step": 4362 }, { "epoch": 0.96, "learning_rate": 9.305636148792096e-08, "loss": 0.962, "step": 4363 }, { "epoch": 0.96, "learning_rate": 9.209103537233699e-08, "loss": 0.939, "step": 4364 }, { "epoch": 0.96, "learning_rate": 9.113071917122407e-08, "loss": 0.9108, "step": 4365 }, { "epoch": 0.96, "learning_rate": 9.017541337016155e-08, "loss": 0.9381, "step": 4366 }, { "epoch": 0.96, "learning_rate": 8.922511845219972e-08, "loss": 0.8928, "step": 4367 }, { "epoch": 0.96, "learning_rate": 8.827983489785197e-08, "loss": 0.8774, "step": 4368 }, { "epoch": 0.96, "learning_rate": 8.73395631851015e-08, "loss": 0.8939, "step": 4369 }, { "epoch": 0.96, "learning_rate": 8.640430378939246e-08, "loss": 0.3124, "step": 4370 }, { "epoch": 0.96, "learning_rate": 8.547405718363876e-08, "loss": 0.9339, "step": 4371 }, { "epoch": 0.96, "learning_rate": 8.454882383821638e-08, "loss": 0.8233, "step": 4372 }, { "epoch": 0.96, "learning_rate": 8.362860422096885e-08, "loss": 0.9018, "step": 4373 }, { "epoch": 0.96, "learning_rate": 8.271339879720174e-08, "loss": 0.8945, "step": 4374 }, { "epoch": 0.96, "learning_rate": 8.180320802968822e-08, "loss": 0.8972, "step": 4375 }, { "epoch": 0.96, "learning_rate": 8.089803237866455e-08, "loss": 0.9632, "step": 4376 }, { "epoch": 0.96, "learning_rate": 7.999787230183021e-08, "loss": 0.9359, "step": 4377 }, { "epoch": 0.96, "learning_rate": 7.910272825434995e-08, "loss": 0.8875, "step": 4378 }, { "epoch": 0.96, "learning_rate": 7.821260068885062e-08, "loss": 0.9057, "step": 4379 }, { "epoch": 0.96, "learning_rate": 7.732749005542439e-08, "loss": 0.9436, "step": 4380 }, { "epoch": 0.96, "learning_rate": 7.64473968016255e-08, "loss": 0.8716, "step": 4381 }, { "epoch": 0.96, "learning_rate": 7.557232137247127e-08, "loss": 0.9, "step": 4382 }, { "epoch": 0.96, "learning_rate": 7.470226421044225e-08, "loss": 0.9158, "step": 4383 }, { "epoch": 0.96, "learning_rate": 7.38372257554798e-08, "loss": 0.8844, "step": 4384 }, { "epoch": 0.96, "learning_rate": 7.297720644499073e-08, "loss": 0.918, "step": 4385 }, { "epoch": 0.96, "learning_rate": 7.212220671384163e-08, "loss": 0.8671, "step": 4386 }, { "epoch": 0.96, "learning_rate": 7.127222699435999e-08, "loss": 0.8094, "step": 4387 }, { "epoch": 0.96, "learning_rate": 7.042726771633868e-08, "loss": 0.9222, "step": 4388 }, { "epoch": 0.96, "learning_rate": 6.958732930702705e-08, "loss": 0.87, "step": 4389 }, { "epoch": 0.96, "learning_rate": 6.875241219113982e-08, "loss": 0.9375, "step": 4390 }, { "epoch": 0.96, "learning_rate": 6.792251679085037e-08, "loss": 0.9081, "step": 4391 }, { "epoch": 0.96, "learning_rate": 6.709764352579529e-08, "loss": 0.8492, "step": 4392 }, { "epoch": 0.96, "learning_rate": 6.627779281306868e-08, "loss": 0.8947, "step": 4393 }, { "epoch": 0.96, "learning_rate": 6.546296506722672e-08, "loss": 0.3035, "step": 4394 }, { "epoch": 0.96, "learning_rate": 6.465316070028538e-08, "loss": 0.8686, "step": 4395 }, { "epoch": 0.96, "learning_rate": 6.384838012172157e-08, "loss": 0.9179, "step": 4396 }, { "epoch": 0.97, "learning_rate": 6.304862373846865e-08, "loss": 0.9463, "step": 4397 }, { "epoch": 0.97, "learning_rate": 6.225389195492426e-08, "loss": 0.8797, "step": 4398 }, { "epoch": 0.97, "learning_rate": 6.14641851729425e-08, "loss": 0.918, "step": 4399 }, { "epoch": 0.97, "learning_rate": 6.067950379183619e-08, "loss": 0.917, "step": 4400 }, { "epoch": 0.97, "learning_rate": 5.98998482083779e-08, "loss": 0.9016, "step": 4401 }, { "epoch": 0.97, "learning_rate": 5.912521881680011e-08, "loss": 0.9369, "step": 4402 }, { "epoch": 0.97, "learning_rate": 5.835561600879169e-08, "loss": 0.9155, "step": 4403 }, { "epoch": 0.97, "learning_rate": 5.759104017350137e-08, "loss": 0.8643, "step": 4404 }, { "epoch": 0.97, "learning_rate": 5.683149169753433e-08, "loss": 0.9016, "step": 4405 }, { "epoch": 0.97, "learning_rate": 5.6076970964954456e-08, "loss": 0.8957, "step": 4406 }, { "epoch": 0.97, "learning_rate": 5.532747835728658e-08, "loss": 0.9706, "step": 4407 }, { "epoch": 0.97, "learning_rate": 5.458301425350643e-08, "loss": 0.8956, "step": 4408 }, { "epoch": 0.97, "learning_rate": 5.3843579030052885e-08, "loss": 0.892, "step": 4409 }, { "epoch": 0.97, "learning_rate": 5.3109173060820196e-08, "loss": 0.9277, "step": 4410 }, { "epoch": 0.97, "learning_rate": 5.2379796717157985e-08, "loss": 0.9144, "step": 4411 }, { "epoch": 0.97, "learning_rate": 5.165545036787567e-08, "loss": 0.8875, "step": 4412 }, { "epoch": 0.97, "learning_rate": 5.093613437923584e-08, "loss": 0.9123, "step": 4413 }, { "epoch": 0.97, "learning_rate": 5.022184911495864e-08, "loss": 0.886, "step": 4414 }, { "epoch": 0.97, "learning_rate": 4.9512594936224065e-08, "loss": 0.8733, "step": 4415 }, { "epoch": 0.97, "learning_rate": 4.8808372201661904e-08, "loss": 0.8581, "step": 4416 }, { "epoch": 0.97, "learning_rate": 4.810918126736286e-08, "loss": 0.3149, "step": 4417 }, { "epoch": 0.97, "learning_rate": 4.74150224868708e-08, "loss": 0.9266, "step": 4418 }, { "epoch": 0.97, "learning_rate": 4.672589621118495e-08, "loss": 0.8755, "step": 4419 }, { "epoch": 0.97, "learning_rate": 4.6041802788762136e-08, "loss": 0.83, "step": 4420 }, { "epoch": 0.97, "learning_rate": 4.536274256551232e-08, "loss": 0.903, "step": 4421 }, { "epoch": 0.97, "learning_rate": 4.468871588480195e-08, "loss": 0.9197, "step": 4422 }, { "epoch": 0.97, "learning_rate": 4.40197230874484e-08, "loss": 0.9525, "step": 4423 }, { "epoch": 0.97, "learning_rate": 4.3355764511728846e-08, "loss": 0.8988, "step": 4424 }, { "epoch": 0.97, "learning_rate": 4.269684049337142e-08, "loss": 0.9368, "step": 4425 }, { "epoch": 0.97, "learning_rate": 4.20429513655618e-08, "loss": 0.9236, "step": 4426 }, { "epoch": 0.97, "learning_rate": 4.139409745893441e-08, "loss": 0.8925, "step": 4427 }, { "epoch": 0.97, "learning_rate": 4.075027910158458e-08, "loss": 0.9512, "step": 4428 }, { "epoch": 0.97, "learning_rate": 4.011149661905522e-08, "loss": 0.9363, "step": 4429 }, { "epoch": 0.97, "learning_rate": 3.947775033434575e-08, "loss": 0.9156, "step": 4430 }, { "epoch": 0.97, "learning_rate": 3.8849040567909835e-08, "loss": 0.8568, "step": 4431 }, { "epoch": 0.97, "learning_rate": 3.822536763765317e-08, "loss": 0.9639, "step": 4432 }, { "epoch": 0.97, "learning_rate": 3.7606731858933486e-08, "loss": 0.8916, "step": 4433 }, { "epoch": 0.97, "learning_rate": 3.6993133544563906e-08, "loss": 0.8806, "step": 4434 }, { "epoch": 0.97, "learning_rate": 3.6384573004808465e-08, "loss": 0.9237, "step": 4435 }, { "epoch": 0.97, "learning_rate": 3.5781050547384346e-08, "loss": 0.9049, "step": 4436 }, { "epoch": 0.97, "learning_rate": 3.518256647746299e-08, "loss": 0.8847, "step": 4437 }, { "epoch": 0.97, "learning_rate": 3.4589121097665653e-08, "loss": 0.8926, "step": 4438 }, { "epoch": 0.97, "learning_rate": 3.400071470806676e-08, "loss": 0.9066, "step": 4439 }, { "epoch": 0.97, "learning_rate": 3.341734760619275e-08, "loss": 0.9437, "step": 4440 }, { "epoch": 0.97, "learning_rate": 3.2839020087022114e-08, "loss": 0.8768, "step": 4441 }, { "epoch": 0.98, "learning_rate": 3.226573244298537e-08, "loss": 0.9337, "step": 4442 }, { "epoch": 0.98, "learning_rate": 3.1697484963963964e-08, "loss": 0.9483, "step": 4443 }, { "epoch": 0.98, "learning_rate": 3.11342779372914e-08, "loss": 0.9494, "step": 4444 }, { "epoch": 0.98, "learning_rate": 3.0576111647752096e-08, "loss": 0.8539, "step": 4445 }, { "epoch": 0.98, "learning_rate": 3.0022986377581386e-08, "loss": 0.9244, "step": 4446 }, { "epoch": 0.98, "learning_rate": 2.9474902406466666e-08, "loss": 0.956, "step": 4447 }, { "epoch": 0.98, "learning_rate": 2.8931860011544022e-08, "loss": 0.9459, "step": 4448 }, { "epoch": 0.98, "learning_rate": 2.8393859467403806e-08, "loss": 0.9251, "step": 4449 }, { "epoch": 0.98, "learning_rate": 2.7860901046082856e-08, "loss": 0.9682, "step": 4450 }, { "epoch": 0.98, "learning_rate": 2.7332985017072266e-08, "loss": 0.9413, "step": 4451 }, { "epoch": 0.98, "learning_rate": 2.6810111647310733e-08, "loss": 0.9064, "step": 4452 }, { "epoch": 0.98, "learning_rate": 2.6292281201188985e-08, "loss": 0.8928, "step": 4453 }, { "epoch": 0.98, "learning_rate": 2.5779493940546464e-08, "loss": 0.295, "step": 4454 }, { "epoch": 0.98, "learning_rate": 2.5271750124672423e-08, "loss": 0.8645, "step": 4455 }, { "epoch": 0.98, "learning_rate": 2.476905001030705e-08, "loss": 0.8748, "step": 4456 }, { "epoch": 0.98, "learning_rate": 2.4271393851640346e-08, "loss": 0.8584, "step": 4457 }, { "epoch": 0.98, "learning_rate": 2.377878190030991e-08, "loss": 0.9118, "step": 4458 }, { "epoch": 0.98, "learning_rate": 2.329121440540427e-08, "loss": 0.916, "step": 4459 }, { "epoch": 0.98, "learning_rate": 2.2808691613461776e-08, "loss": 0.9619, "step": 4460 }, { "epoch": 0.98, "learning_rate": 2.2331213768468363e-08, "loss": 0.9654, "step": 4461 }, { "epoch": 0.98, "learning_rate": 2.1858781111859793e-08, "loss": 0.3022, "step": 4462 }, { "epoch": 0.98, "learning_rate": 2.1391393882521648e-08, "loss": 0.8993, "step": 4463 }, { "epoch": 0.98, "learning_rate": 2.0929052316785993e-08, "loss": 0.8855, "step": 4464 }, { "epoch": 0.98, "learning_rate": 2.0471756648435814e-08, "loss": 0.915, "step": 4465 }, { "epoch": 0.98, "learning_rate": 2.001950710870171e-08, "loss": 0.9042, "step": 4466 }, { "epoch": 0.98, "learning_rate": 1.957230392626186e-08, "loss": 0.8459, "step": 4467 }, { "epoch": 0.98, "learning_rate": 1.9130147327244274e-08, "loss": 0.8718, "step": 4468 }, { "epoch": 0.98, "learning_rate": 1.8693037535225666e-08, "loss": 0.8929, "step": 4469 }, { "epoch": 0.98, "learning_rate": 1.8260974771227015e-08, "loss": 0.8962, "step": 4470 }, { "epoch": 0.98, "learning_rate": 1.7833959253723554e-08, "loss": 0.2834, "step": 4471 }, { "epoch": 0.98, "learning_rate": 1.741199119863146e-08, "loss": 0.9054, "step": 4472 }, { "epoch": 0.98, "learning_rate": 1.6995070819320058e-08, "loss": 0.9087, "step": 4473 }, { "epoch": 0.98, "learning_rate": 1.6583198326604043e-08, "loss": 0.9068, "step": 4474 }, { "epoch": 0.98, "learning_rate": 1.6176373928745715e-08, "loss": 0.9848, "step": 4475 }, { "epoch": 0.98, "learning_rate": 1.5774597831454964e-08, "loss": 0.8741, "step": 4476 }, { "epoch": 0.98, "learning_rate": 1.537787023788817e-08, "loss": 0.852, "step": 4477 }, { "epoch": 0.98, "learning_rate": 1.4986191348651537e-08, "loss": 0.8695, "step": 4478 }, { "epoch": 0.98, "learning_rate": 1.4599561361795523e-08, "loss": 0.868, "step": 4479 }, { "epoch": 0.98, "learning_rate": 1.4217980472819304e-08, "loss": 0.8885, "step": 4480 }, { "epoch": 0.98, "learning_rate": 1.3841448874668539e-08, "loss": 0.8647, "step": 4481 }, { "epoch": 0.98, "learning_rate": 1.3469966757736486e-08, "loss": 0.8943, "step": 4482 }, { "epoch": 0.98, "learning_rate": 1.3103534309860666e-08, "loss": 0.9547, "step": 4483 }, { "epoch": 0.98, "learning_rate": 1.274215171632731e-08, "loss": 0.9134, "step": 4484 }, { "epoch": 0.98, "learning_rate": 1.2385819159869138e-08, "loss": 0.8938, "step": 4485 }, { "epoch": 0.98, "learning_rate": 1.2034536820665355e-08, "loss": 0.8918, "step": 4486 }, { "epoch": 0.98, "learning_rate": 1.168830487634054e-08, "loss": 0.8889, "step": 4487 }, { "epoch": 0.99, "learning_rate": 1.1347123501966872e-08, "loss": 0.9149, "step": 4488 }, { "epoch": 0.99, "learning_rate": 1.10109928700608e-08, "loss": 0.9576, "step": 4489 }, { "epoch": 0.99, "learning_rate": 1.0679913150588584e-08, "loss": 0.8523, "step": 4490 }, { "epoch": 0.99, "learning_rate": 1.0353884510957424e-08, "loss": 0.8868, "step": 4491 }, { "epoch": 0.99, "learning_rate": 1.0032907116023228e-08, "loss": 0.9178, "step": 4492 }, { "epoch": 0.99, "learning_rate": 9.716981128089498e-09, "loss": 0.8658, "step": 4493 }, { "epoch": 0.99, "learning_rate": 9.406106706902896e-09, "loss": 0.8875, "step": 4494 }, { "epoch": 0.99, "learning_rate": 9.100284009655458e-09, "loss": 0.8491, "step": 4495 }, { "epoch": 0.99, "learning_rate": 8.79951319098571e-09, "loss": 0.8573, "step": 4496 }, { "epoch": 0.99, "learning_rate": 8.50379440297866e-09, "loss": 0.9055, "step": 4497 }, { "epoch": 0.99, "learning_rate": 8.213127795164699e-09, "loss": 0.9629, "step": 4498 }, { "epoch": 0.99, "learning_rate": 7.92751351451737e-09, "loss": 0.8366, "step": 4499 }, { "epoch": 0.99, "learning_rate": 7.646951705457817e-09, "loss": 0.9205, "step": 4500 }, { "epoch": 0.99, "learning_rate": 7.371442509850335e-09, "loss": 0.9192, "step": 4501 }, { "epoch": 0.99, "learning_rate": 7.100986067006821e-09, "loss": 0.8969, "step": 4502 }, { "epoch": 0.99, "learning_rate": 6.835582513683436e-09, "loss": 0.9045, "step": 4503 }, { "epoch": 0.99, "learning_rate": 6.5752319840795e-09, "loss": 0.9712, "step": 4504 }, { "epoch": 0.99, "learning_rate": 6.319934609841927e-09, "loss": 0.8998, "step": 4505 }, { "epoch": 0.99, "learning_rate": 6.069690520060789e-09, "loss": 0.834, "step": 4506 }, { "epoch": 0.99, "learning_rate": 5.824499841271536e-09, "loss": 0.9753, "step": 4507 }, { "epoch": 0.99, "learning_rate": 5.584362697453882e-09, "loss": 0.9712, "step": 4508 }, { "epoch": 0.99, "learning_rate": 5.349279210034031e-09, "loss": 0.9279, "step": 4509 }, { "epoch": 0.99, "learning_rate": 5.11924949788023e-09, "loss": 0.974, "step": 4510 }, { "epoch": 0.99, "learning_rate": 4.894273677307215e-09, "loss": 0.8713, "step": 4511 }, { "epoch": 0.99, "learning_rate": 4.674351862072879e-09, "loss": 0.897, "step": 4512 }, { "epoch": 0.99, "learning_rate": 4.4594841633816e-09, "loss": 0.8869, "step": 4513 }, { "epoch": 0.99, "learning_rate": 4.249670689878693e-09, "loss": 0.9384, "step": 4514 }, { "epoch": 0.99, "learning_rate": 4.044911547658181e-09, "loss": 0.9034, "step": 4515 }, { "epoch": 0.99, "learning_rate": 3.845206840253912e-09, "loss": 0.8978, "step": 4516 }, { "epoch": 0.99, "learning_rate": 3.6505566686484416e-09, "loss": 0.9259, "step": 4517 }, { "epoch": 0.99, "learning_rate": 3.4609611312652615e-09, "loss": 0.32, "step": 4518 }, { "epoch": 0.99, "learning_rate": 3.276420323972129e-09, "loss": 0.9391, "step": 4519 }, { "epoch": 0.99, "learning_rate": 3.0969343400821807e-09, "loss": 0.9171, "step": 4520 }, { "epoch": 0.99, "learning_rate": 2.922503270353927e-09, "loss": 0.2974, "step": 4521 }, { "epoch": 0.99, "learning_rate": 2.7531272029845958e-09, "loss": 0.8903, "step": 4522 }, { "epoch": 0.99, "learning_rate": 2.588806223622342e-09, "loss": 0.2868, "step": 4523 }, { "epoch": 0.99, "learning_rate": 2.4295404153540368e-09, "loss": 0.2857, "step": 4524 }, { "epoch": 0.99, "learning_rate": 2.2753298587119276e-09, "loss": 0.9453, "step": 4525 }, { "epoch": 0.99, "learning_rate": 2.1261746316725287e-09, "loss": 0.9657, "step": 4526 }, { "epoch": 0.99, "learning_rate": 1.98207480965662e-09, "loss": 0.9043, "step": 4527 }, { "epoch": 0.99, "learning_rate": 1.8430304655270293e-09, "loss": 0.9144, "step": 4528 }, { "epoch": 0.99, "learning_rate": 1.7090416695919598e-09, "loss": 0.8856, "step": 4529 }, { "epoch": 0.99, "learning_rate": 1.5801084896027719e-09, "loss": 0.3339, "step": 4530 }, { "epoch": 0.99, "learning_rate": 1.4562309907528716e-09, "loss": 0.9363, "step": 4531 }, { "epoch": 0.99, "learning_rate": 1.3374092356821521e-09, "loss": 0.8562, "step": 4532 }, { "epoch": 1.0, "learning_rate": 1.223643284471443e-09, "loss": 0.8801, "step": 4533 }, { "epoch": 1.0, "learning_rate": 1.1149331946469499e-09, "loss": 0.9049, "step": 4534 }, { "epoch": 1.0, "learning_rate": 1.0112790211769252e-09, "loss": 0.8766, "step": 4535 }, { "epoch": 1.0, "learning_rate": 9.12680816476108e-10, "loss": 0.872, "step": 4536 }, { "epoch": 1.0, "learning_rate": 8.191386303979532e-10, "loss": 0.9272, "step": 4537 }, { "epoch": 1.0, "learning_rate": 7.306525102424022e-10, "loss": 0.2955, "step": 4538 }, { "epoch": 1.0, "learning_rate": 6.472225007536636e-10, "loss": 0.9215, "step": 4539 }, { "epoch": 1.0, "learning_rate": 5.688486441168816e-10, "loss": 0.3164, "step": 4540 }, { "epoch": 1.0, "learning_rate": 4.955309799614671e-10, "loss": 0.2778, "step": 4541 }, { "epoch": 1.0, "learning_rate": 4.2726954536109753e-10, "loss": 0.8672, "step": 4542 }, { "epoch": 1.0, "learning_rate": 3.6406437483149647e-10, "loss": 0.9123, "step": 4543 }, { "epoch": 1.0, "learning_rate": 3.059155003326542e-10, "loss": 0.8717, "step": 4544 }, { "epoch": 1.0, "learning_rate": 2.5282295126660694e-10, "loss": 0.8961, "step": 4545 }, { "epoch": 1.0, "learning_rate": 2.047867544796578e-10, "loss": 0.8896, "step": 4546 }, { "epoch": 1.0, "learning_rate": 1.6180693426237627e-10, "loss": 0.8491, "step": 4547 }, { "epoch": 1.0, "learning_rate": 1.2388351234626784e-10, "loss": 0.8888, "step": 4548 }, { "epoch": 1.0, "learning_rate": 9.101650790821481e-11, "loss": 0.9181, "step": 4549 }, { "epoch": 1.0, "learning_rate": 6.320593756603544e-11, "loss": 0.8773, "step": 4550 }, { "epoch": 1.0, "learning_rate": 4.045181538292475e-11, "loss": 0.8976, "step": 4551 }, { "epoch": 1.0, "learning_rate": 2.2754152864123967e-11, "loss": 0.9367, "step": 4552 }, { "epoch": 1.0, "learning_rate": 1.0112958959140884e-11, "loss": 0.9153, "step": 4553 }, { "epoch": 1.0, "learning_rate": 2.528240059529452e-12, "loss": 0.8958, "step": 4554 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.2859, "step": 4555 }, { "epoch": 1.0, "step": 4555, "total_flos": 2.073718275026662e+19, "train_loss": 0.9391254732231932, "train_runtime": 101188.5094, "train_samples_per_second": 7.563, "train_steps_per_second": 0.045 } ], "max_steps": 4555, "num_train_epochs": 1, "total_flos": 2.073718275026662e+19, "trial_name": null, "trial_params": null }