{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 1560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.128205128205128e-07, "loss": 1.2474, "step": 2 }, { "epoch": 0.01, "learning_rate": 1.0256410256410257e-06, "loss": 1.1588, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.5384615384615387e-06, "loss": 1.1009, "step": 6 }, { "epoch": 0.01, "learning_rate": 2.0512820512820513e-06, "loss": 1.1041, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.564102564102564e-06, "loss": 1.0571, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.0769230769230774e-06, "loss": 1.0064, "step": 12 }, { "epoch": 0.02, "learning_rate": 3.58974358974359e-06, "loss": 1.0577, "step": 14 }, { "epoch": 0.02, "learning_rate": 4.102564102564103e-06, "loss": 1.0429, "step": 16 }, { "epoch": 0.02, "learning_rate": 4.615384615384616e-06, "loss": 1.0566, "step": 18 }, { "epoch": 0.03, "learning_rate": 5.128205128205128e-06, "loss": 1.025, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.641025641025641e-06, "loss": 1.0348, "step": 22 }, { "epoch": 0.03, "learning_rate": 6.153846153846155e-06, "loss": 1.0395, "step": 24 }, { "epoch": 0.03, "learning_rate": 6.666666666666667e-06, "loss": 1.0705, "step": 26 }, { "epoch": 0.04, "learning_rate": 7.17948717948718e-06, "loss": 1.0066, "step": 28 }, { "epoch": 0.04, "learning_rate": 7.692307692307694e-06, "loss": 1.0498, "step": 30 }, { "epoch": 0.04, "learning_rate": 8.205128205128205e-06, "loss": 1.0381, "step": 32 }, { "epoch": 0.04, "learning_rate": 8.717948717948719e-06, "loss": 1.0386, "step": 34 }, { "epoch": 0.05, "learning_rate": 9.230769230769232e-06, "loss": 1.0641, "step": 36 }, { "epoch": 0.05, "learning_rate": 9.743589743589744e-06, "loss": 1.0387, "step": 38 }, { "epoch": 0.05, "learning_rate": 1.0256410256410256e-05, "loss": 1.0826, "step": 40 }, { "epoch": 0.05, "learning_rate": 1.076923076923077e-05, "loss": 1.0797, "step": 42 }, { "epoch": 0.06, "learning_rate": 1.1282051282051283e-05, "loss": 1.1147, "step": 44 }, { "epoch": 0.06, "learning_rate": 1.1794871794871796e-05, "loss": 1.0696, "step": 46 }, { "epoch": 0.06, "learning_rate": 1.230769230769231e-05, "loss": 1.0447, "step": 48 }, { "epoch": 0.06, "learning_rate": 1.2820512820512823e-05, "loss": 1.0405, "step": 50 }, { "epoch": 0.07, "learning_rate": 1.3333333333333333e-05, "loss": 1.1069, "step": 52 }, { "epoch": 0.07, "learning_rate": 1.3846153846153847e-05, "loss": 1.0522, "step": 54 }, { "epoch": 0.07, "learning_rate": 1.435897435897436e-05, "loss": 1.1448, "step": 56 }, { "epoch": 0.07, "learning_rate": 1.4871794871794874e-05, "loss": 1.0508, "step": 58 }, { "epoch": 0.08, "learning_rate": 1.5384615384615387e-05, "loss": 1.1121, "step": 60 }, { "epoch": 0.08, "learning_rate": 1.5897435897435897e-05, "loss": 1.1292, "step": 62 }, { "epoch": 0.08, "learning_rate": 1.641025641025641e-05, "loss": 1.1066, "step": 64 }, { "epoch": 0.08, "learning_rate": 1.6923076923076924e-05, "loss": 1.1084, "step": 66 }, { "epoch": 0.09, "learning_rate": 1.7435897435897438e-05, "loss": 1.1632, "step": 68 }, { "epoch": 0.09, "learning_rate": 1.794871794871795e-05, "loss": 1.0844, "step": 70 }, { "epoch": 0.09, "learning_rate": 1.8461538461538465e-05, "loss": 1.0884, "step": 72 }, { "epoch": 0.09, "learning_rate": 1.8974358974358975e-05, "loss": 1.1601, "step": 74 }, { "epoch": 0.1, "learning_rate": 1.9487179487179488e-05, "loss": 1.1705, "step": 76 }, { "epoch": 0.1, "learning_rate": 2e-05, "loss": 1.1824, "step": 78 }, { "epoch": 0.1, "learning_rate": 1.999991012628722e-05, "loss": 1.153, "step": 80 }, { "epoch": 0.11, "learning_rate": 1.999964050676434e-05, "loss": 1.1438, "step": 82 }, { "epoch": 0.11, "learning_rate": 1.999919114627769e-05, "loss": 1.1805, "step": 84 }, { "epoch": 0.11, "learning_rate": 1.999856205290442e-05, "loss": 1.137, "step": 86 }, { "epoch": 0.11, "learning_rate": 1.999775323795232e-05, "loss": 1.1703, "step": 88 }, { "epoch": 0.12, "learning_rate": 1.999676471595962e-05, "loss": 1.142, "step": 90 }, { "epoch": 0.12, "learning_rate": 1.9995596504694764e-05, "loss": 1.1743, "step": 92 }, { "epoch": 0.12, "learning_rate": 1.999424862515604e-05, "loss": 1.1679, "step": 94 }, { "epoch": 0.12, "learning_rate": 1.9992721101571238e-05, "loss": 1.1924, "step": 96 }, { "epoch": 0.13, "learning_rate": 1.99910139613972e-05, "loss": 1.1704, "step": 98 }, { "epoch": 0.13, "learning_rate": 1.998912723531933e-05, "loss": 1.1856, "step": 100 }, { "epoch": 0.13, "learning_rate": 1.9987060957251047e-05, "loss": 1.1808, "step": 102 }, { "epoch": 0.13, "learning_rate": 1.9984815164333163e-05, "loss": 1.2127, "step": 104 }, { "epoch": 0.14, "learning_rate": 1.998238989693323e-05, "loss": 1.2006, "step": 106 }, { "epoch": 0.14, "learning_rate": 1.997978519864481e-05, "loss": 1.1249, "step": 108 }, { "epoch": 0.14, "learning_rate": 1.9977001116286675e-05, "loss": 1.2025, "step": 110 }, { "epoch": 0.14, "learning_rate": 1.9974037699901993e-05, "loss": 1.1594, "step": 112 }, { "epoch": 0.15, "learning_rate": 1.9970895002757413e-05, "loss": 1.1506, "step": 114 }, { "epoch": 0.15, "learning_rate": 1.9967573081342103e-05, "loss": 1.2164, "step": 116 }, { "epoch": 0.15, "learning_rate": 1.9964071995366744e-05, "loss": 1.1398, "step": 118 }, { "epoch": 0.15, "learning_rate": 1.9960391807762462e-05, "loss": 1.2018, "step": 120 }, { "epoch": 0.16, "learning_rate": 1.9956532584679676e-05, "loss": 1.18, "step": 122 }, { "epoch": 0.16, "learning_rate": 1.995249439548693e-05, "loss": 1.1726, "step": 124 }, { "epoch": 0.16, "learning_rate": 1.994827731276963e-05, "loss": 1.1302, "step": 126 }, { "epoch": 0.16, "learning_rate": 1.994388141232876e-05, "loss": 1.1494, "step": 128 }, { "epoch": 0.17, "learning_rate": 1.9939306773179498e-05, "loss": 1.2118, "step": 130 }, { "epoch": 0.17, "learning_rate": 1.9934553477549795e-05, "loss": 1.1385, "step": 132 }, { "epoch": 0.17, "learning_rate": 1.992962161087893e-05, "loss": 1.1837, "step": 134 }, { "epoch": 0.17, "learning_rate": 1.9924511261815928e-05, "loss": 1.1921, "step": 136 }, { "epoch": 0.18, "learning_rate": 1.9919222522217998e-05, "loss": 1.1621, "step": 138 }, { "epoch": 0.18, "learning_rate": 1.9913755487148874e-05, "loss": 1.2225, "step": 140 }, { "epoch": 0.18, "learning_rate": 1.9908110254877107e-05, "loss": 1.139, "step": 142 }, { "epoch": 0.18, "learning_rate": 1.990228692687429e-05, "loss": 1.1651, "step": 144 }, { "epoch": 0.19, "learning_rate": 1.9896285607813245e-05, "loss": 1.1357, "step": 146 }, { "epoch": 0.19, "learning_rate": 1.989010640556614e-05, "loss": 1.16, "step": 148 }, { "epoch": 0.19, "learning_rate": 1.988374943120254e-05, "loss": 1.1853, "step": 150 }, { "epoch": 0.19, "learning_rate": 1.9877214798987428e-05, "loss": 1.1863, "step": 152 }, { "epoch": 0.2, "learning_rate": 1.9870502626379127e-05, "loss": 1.2025, "step": 154 }, { "epoch": 0.2, "learning_rate": 1.9863613034027224e-05, "loss": 1.1802, "step": 156 }, { "epoch": 0.2, "learning_rate": 1.985654614577036e-05, "loss": 1.2212, "step": 158 }, { "epoch": 0.21, "learning_rate": 1.9849302088634034e-05, "loss": 1.191, "step": 160 }, { "epoch": 0.21, "learning_rate": 1.9841880992828306e-05, "loss": 1.1702, "step": 162 }, { "epoch": 0.21, "learning_rate": 1.9834282991745465e-05, "loss": 1.1982, "step": 164 }, { "epoch": 0.21, "learning_rate": 1.9826508221957624e-05, "loss": 1.1503, "step": 166 }, { "epoch": 0.22, "learning_rate": 1.981855682321427e-05, "loss": 1.1997, "step": 168 }, { "epoch": 0.22, "learning_rate": 1.981042893843974e-05, "loss": 1.1724, "step": 170 }, { "epoch": 0.22, "learning_rate": 1.980212471373068e-05, "loss": 1.1611, "step": 172 }, { "epoch": 0.22, "learning_rate": 1.979364429835339e-05, "loss": 1.1512, "step": 174 }, { "epoch": 0.23, "learning_rate": 1.978498784474115e-05, "loss": 1.2071, "step": 176 }, { "epoch": 0.23, "learning_rate": 1.9776155508491482e-05, "loss": 1.1767, "step": 178 }, { "epoch": 0.23, "learning_rate": 1.9767147448363366e-05, "loss": 1.162, "step": 180 }, { "epoch": 0.23, "learning_rate": 1.9757963826274357e-05, "loss": 1.1688, "step": 182 }, { "epoch": 0.24, "learning_rate": 1.97486048072977e-05, "loss": 1.1622, "step": 184 }, { "epoch": 0.24, "learning_rate": 1.9739070559659347e-05, "loss": 1.1653, "step": 186 }, { "epoch": 0.24, "learning_rate": 1.972936125473495e-05, "loss": 1.1141, "step": 188 }, { "epoch": 0.24, "learning_rate": 1.9719477067046768e-05, "loss": 1.1571, "step": 190 }, { "epoch": 0.25, "learning_rate": 1.9709418174260523e-05, "loss": 1.2207, "step": 192 }, { "epoch": 0.25, "learning_rate": 1.9699184757182225e-05, "loss": 1.1794, "step": 194 }, { "epoch": 0.25, "learning_rate": 1.9688776999754913e-05, "loss": 1.106, "step": 196 }, { "epoch": 0.25, "learning_rate": 1.9678195089055347e-05, "loss": 1.1521, "step": 198 }, { "epoch": 0.26, "learning_rate": 1.966743921529065e-05, "loss": 1.1671, "step": 200 }, { "epoch": 0.26, "learning_rate": 1.965650957179488e-05, "loss": 1.1579, "step": 202 }, { "epoch": 0.26, "learning_rate": 1.9645406355025565e-05, "loss": 1.2015, "step": 204 }, { "epoch": 0.26, "learning_rate": 1.963412976456017e-05, "loss": 1.1538, "step": 206 }, { "epoch": 0.27, "learning_rate": 1.9622680003092503e-05, "loss": 1.1832, "step": 208 }, { "epoch": 0.27, "learning_rate": 1.9611057276429085e-05, "loss": 1.2082, "step": 210 }, { "epoch": 0.27, "learning_rate": 1.9599261793485432e-05, "loss": 1.1638, "step": 212 }, { "epoch": 0.27, "learning_rate": 1.958729376628231e-05, "loss": 1.201, "step": 214 }, { "epoch": 0.28, "learning_rate": 1.957515340994193e-05, "loss": 1.1826, "step": 216 }, { "epoch": 0.28, "learning_rate": 1.956284094268407e-05, "loss": 1.1293, "step": 218 }, { "epoch": 0.28, "learning_rate": 1.955035658582216e-05, "loss": 1.1935, "step": 220 }, { "epoch": 0.28, "learning_rate": 1.9537700563759303e-05, "loss": 1.1658, "step": 222 }, { "epoch": 0.29, "learning_rate": 1.9524873103984234e-05, "loss": 1.1698, "step": 224 }, { "epoch": 0.29, "learning_rate": 1.9511874437067243e-05, "loss": 1.1962, "step": 226 }, { "epoch": 0.29, "learning_rate": 1.949870479665602e-05, "loss": 1.1148, "step": 228 }, { "epoch": 0.29, "learning_rate": 1.9485364419471454e-05, "loss": 1.1701, "step": 230 }, { "epoch": 0.3, "learning_rate": 1.9471853545303407e-05, "loss": 1.202, "step": 232 }, { "epoch": 0.3, "learning_rate": 1.9458172417006347e-05, "loss": 1.1923, "step": 234 }, { "epoch": 0.3, "learning_rate": 1.9444321280495045e-05, "loss": 1.1486, "step": 236 }, { "epoch": 0.31, "learning_rate": 1.9430300384740108e-05, "loss": 1.163, "step": 238 }, { "epoch": 0.31, "learning_rate": 1.9416109981763526e-05, "loss": 1.166, "step": 240 }, { "epoch": 0.31, "learning_rate": 1.9401750326634144e-05, "loss": 1.1528, "step": 242 }, { "epoch": 0.31, "learning_rate": 1.9387221677463064e-05, "loss": 1.1547, "step": 244 }, { "epoch": 0.32, "learning_rate": 1.9372524295399014e-05, "loss": 1.2095, "step": 246 }, { "epoch": 0.32, "learning_rate": 1.9357658444623655e-05, "loss": 1.0905, "step": 248 }, { "epoch": 0.32, "learning_rate": 1.9342624392346826e-05, "loss": 1.169, "step": 250 }, { "epoch": 0.32, "learning_rate": 1.9327422408801744e-05, "loss": 1.1979, "step": 252 }, { "epoch": 0.33, "learning_rate": 1.9312052767240153e-05, "loss": 1.1515, "step": 254 }, { "epoch": 0.33, "learning_rate": 1.92965157439274e-05, "loss": 1.1893, "step": 256 }, { "epoch": 0.33, "learning_rate": 1.9280811618137486e-05, "loss": 1.174, "step": 258 }, { "epoch": 0.33, "learning_rate": 1.9264940672148018e-05, "loss": 1.2114, "step": 260 }, { "epoch": 0.34, "learning_rate": 1.9248903191235177e-05, "loss": 1.1677, "step": 262 }, { "epoch": 0.34, "learning_rate": 1.9232699463668543e-05, "loss": 1.1598, "step": 264 }, { "epoch": 0.34, "learning_rate": 1.9216329780705955e-05, "loss": 1.1887, "step": 266 }, { "epoch": 0.34, "learning_rate": 1.9199794436588244e-05, "loss": 1.15, "step": 268 }, { "epoch": 0.35, "learning_rate": 1.9183093728533966e-05, "loss": 1.1685, "step": 270 }, { "epoch": 0.35, "learning_rate": 1.916622795673405e-05, "loss": 1.1508, "step": 272 }, { "epoch": 0.35, "learning_rate": 1.9149197424346405e-05, "loss": 1.1324, "step": 274 }, { "epoch": 0.35, "learning_rate": 1.913200243749046e-05, "loss": 1.1601, "step": 276 }, { "epoch": 0.36, "learning_rate": 1.9114643305241678e-05, "loss": 1.1349, "step": 278 }, { "epoch": 0.36, "learning_rate": 1.9097120339625994e-05, "loss": 1.1617, "step": 280 }, { "epoch": 0.36, "learning_rate": 1.9079433855614203e-05, "loss": 1.1513, "step": 282 }, { "epoch": 0.36, "learning_rate": 1.9061584171116302e-05, "loss": 1.17, "step": 284 }, { "epoch": 0.37, "learning_rate": 1.9043571606975776e-05, "loss": 1.1732, "step": 286 }, { "epoch": 0.37, "learning_rate": 1.9025396486963827e-05, "loss": 1.1233, "step": 288 }, { "epoch": 0.37, "learning_rate": 1.900705913777356e-05, "loss": 1.1701, "step": 290 }, { "epoch": 0.37, "learning_rate": 1.89885598890141e-05, "loss": 1.161, "step": 292 }, { "epoch": 0.38, "learning_rate": 1.8969899073204687e-05, "loss": 1.1404, "step": 294 }, { "epoch": 0.38, "learning_rate": 1.895107702576868e-05, "loss": 1.2028, "step": 296 }, { "epoch": 0.38, "learning_rate": 1.8932094085027534e-05, "loss": 1.1943, "step": 298 }, { "epoch": 0.38, "learning_rate": 1.891295059219472e-05, "loss": 1.1376, "step": 300 }, { "epoch": 0.39, "learning_rate": 1.88936468913696e-05, "loss": 1.1743, "step": 302 }, { "epoch": 0.39, "learning_rate": 1.8874183329531222e-05, "loss": 1.1552, "step": 304 }, { "epoch": 0.39, "learning_rate": 1.8854560256532098e-05, "loss": 1.1528, "step": 306 }, { "epoch": 0.39, "learning_rate": 1.883477802509192e-05, "loss": 1.1543, "step": 308 }, { "epoch": 0.4, "learning_rate": 1.88148369907912e-05, "loss": 1.1008, "step": 310 }, { "epoch": 0.4, "learning_rate": 1.879473751206489e-05, "loss": 1.1573, "step": 312 }, { "epoch": 0.4, "learning_rate": 1.877447995019596e-05, "loss": 1.1236, "step": 314 }, { "epoch": 0.41, "learning_rate": 1.875406466930886e-05, "loss": 1.1546, "step": 316 }, { "epoch": 0.41, "learning_rate": 1.8733492036363007e-05, "loss": 1.1547, "step": 318 }, { "epoch": 0.41, "learning_rate": 1.8712762421146185e-05, "loss": 1.1608, "step": 320 }, { "epoch": 0.41, "learning_rate": 1.8691876196267892e-05, "loss": 1.1708, "step": 322 }, { "epoch": 0.42, "learning_rate": 1.867083373715264e-05, "loss": 1.1485, "step": 324 }, { "epoch": 0.42, "learning_rate": 1.8649635422033218e-05, "loss": 1.213, "step": 326 }, { "epoch": 0.42, "learning_rate": 1.862828163194388e-05, "loss": 1.137, "step": 328 }, { "epoch": 0.42, "learning_rate": 1.8606772750713503e-05, "loss": 1.1987, "step": 330 }, { "epoch": 0.43, "learning_rate": 1.8585109164958698e-05, "loss": 1.1287, "step": 332 }, { "epoch": 0.43, "learning_rate": 1.8563291264076834e-05, "loss": 1.1653, "step": 334 }, { "epoch": 0.43, "learning_rate": 1.8541319440239066e-05, "loss": 1.1305, "step": 336 }, { "epoch": 0.43, "learning_rate": 1.851919408838327e-05, "loss": 1.186, "step": 338 }, { "epoch": 0.44, "learning_rate": 1.8496915606206952e-05, "loss": 1.1371, "step": 340 }, { "epoch": 0.44, "learning_rate": 1.847448439416009e-05, "loss": 1.1589, "step": 342 }, { "epoch": 0.44, "learning_rate": 1.845190085543795e-05, "loss": 1.0932, "step": 344 }, { "epoch": 0.44, "learning_rate": 1.842916539597382e-05, "loss": 1.1354, "step": 346 }, { "epoch": 0.45, "learning_rate": 1.8406278424431737e-05, "loss": 1.1056, "step": 348 }, { "epoch": 0.45, "learning_rate": 1.8383240352199118e-05, "loss": 1.1402, "step": 350 }, { "epoch": 0.45, "learning_rate": 1.8360051593379383e-05, "loss": 1.1689, "step": 352 }, { "epoch": 0.45, "learning_rate": 1.8336712564784506e-05, "loss": 1.1761, "step": 354 }, { "epoch": 0.46, "learning_rate": 1.8313223685927507e-05, "loss": 1.101, "step": 356 }, { "epoch": 0.46, "learning_rate": 1.8289585379014942e-05, "loss": 1.1944, "step": 358 }, { "epoch": 0.46, "learning_rate": 1.8265798068939295e-05, "loss": 1.2067, "step": 360 }, { "epoch": 0.46, "learning_rate": 1.8241862183271338e-05, "loss": 1.1291, "step": 362 }, { "epoch": 0.47, "learning_rate": 1.821777815225245e-05, "loss": 1.1814, "step": 364 }, { "epoch": 0.47, "learning_rate": 1.81935464087869e-05, "loss": 1.1324, "step": 366 }, { "epoch": 0.47, "learning_rate": 1.8169167388434024e-05, "loss": 1.1929, "step": 368 }, { "epoch": 0.47, "learning_rate": 1.8144641529400445e-05, "loss": 1.1131, "step": 370 }, { "epoch": 0.48, "learning_rate": 1.8119969272532164e-05, "loss": 1.1646, "step": 372 }, { "epoch": 0.48, "learning_rate": 1.8095151061306647e-05, "loss": 1.1554, "step": 374 }, { "epoch": 0.48, "learning_rate": 1.8070187341824848e-05, "loss": 1.1406, "step": 376 }, { "epoch": 0.48, "learning_rate": 1.8045078562803203e-05, "loss": 1.1385, "step": 378 }, { "epoch": 0.49, "learning_rate": 1.8019825175565544e-05, "loss": 1.1865, "step": 380 }, { "epoch": 0.49, "learning_rate": 1.7994427634035016e-05, "loss": 1.1631, "step": 382 }, { "epoch": 0.49, "learning_rate": 1.7968886394725876e-05, "loss": 1.1112, "step": 384 }, { "epoch": 0.49, "learning_rate": 1.7943201916735337e-05, "loss": 1.1238, "step": 386 }, { "epoch": 0.5, "learning_rate": 1.791737466173527e-05, "loss": 1.1653, "step": 388 }, { "epoch": 0.5, "learning_rate": 1.789140509396394e-05, "loss": 1.1407, "step": 390 }, { "epoch": 0.5, "learning_rate": 1.7865293680217636e-05, "loss": 1.1344, "step": 392 }, { "epoch": 0.51, "learning_rate": 1.7839040889842307e-05, "loss": 1.1513, "step": 394 }, { "epoch": 0.51, "learning_rate": 1.7812647194725093e-05, "loss": 1.1392, "step": 396 }, { "epoch": 0.51, "learning_rate": 1.7786113069285877e-05, "loss": 1.0903, "step": 398 }, { "epoch": 0.51, "learning_rate": 1.7759438990468726e-05, "loss": 1.1068, "step": 400 }, { "epoch": 0.52, "learning_rate": 1.7732625437733338e-05, "loss": 1.1482, "step": 402 }, { "epoch": 0.52, "learning_rate": 1.7705672893046425e-05, "loss": 1.1475, "step": 404 }, { "epoch": 0.52, "learning_rate": 1.767858184087304e-05, "loss": 1.1514, "step": 406 }, { "epoch": 0.52, "learning_rate": 1.765135276816787e-05, "loss": 1.1395, "step": 408 }, { "epoch": 0.53, "learning_rate": 1.7623986164366487e-05, "loss": 1.1084, "step": 410 }, { "epoch": 0.53, "learning_rate": 1.7596482521376546e-05, "loss": 1.1748, "step": 412 }, { "epoch": 0.53, "learning_rate": 1.7568842333568952e-05, "loss": 1.1709, "step": 414 }, { "epoch": 0.53, "learning_rate": 1.7541066097768965e-05, "loss": 1.1961, "step": 416 }, { "epoch": 0.54, "learning_rate": 1.7513154313247273e-05, "loss": 1.1287, "step": 418 }, { "epoch": 0.54, "learning_rate": 1.7485107481711014e-05, "loss": 1.1484, "step": 420 }, { "epoch": 0.54, "learning_rate": 1.7456926107294765e-05, "loss": 1.1195, "step": 422 }, { "epoch": 0.54, "learning_rate": 1.742861069655148e-05, "loss": 1.1157, "step": 424 }, { "epoch": 0.55, "learning_rate": 1.7400161758443377e-05, "loss": 1.1829, "step": 426 }, { "epoch": 0.55, "learning_rate": 1.737157980433279e-05, "loss": 1.1937, "step": 428 }, { "epoch": 0.55, "learning_rate": 1.7342865347972987e-05, "loss": 1.0915, "step": 430 }, { "epoch": 0.55, "learning_rate": 1.7314018905498932e-05, "loss": 1.1741, "step": 432 }, { "epoch": 0.56, "learning_rate": 1.7285040995418003e-05, "loss": 1.1407, "step": 434 }, { "epoch": 0.56, "learning_rate": 1.7255932138600665e-05, "loss": 1.1625, "step": 436 }, { "epoch": 0.56, "learning_rate": 1.7226692858271133e-05, "loss": 1.17, "step": 438 }, { "epoch": 0.56, "learning_rate": 1.7197323679997943e-05, "loss": 1.1736, "step": 440 }, { "epoch": 0.57, "learning_rate": 1.7167825131684516e-05, "loss": 1.1646, "step": 442 }, { "epoch": 0.57, "learning_rate": 1.7138197743559656e-05, "loss": 1.1301, "step": 444 }, { "epoch": 0.57, "learning_rate": 1.7108442048168038e-05, "loss": 1.1564, "step": 446 }, { "epoch": 0.57, "learning_rate": 1.707855858036063e-05, "loss": 1.1653, "step": 448 }, { "epoch": 0.58, "learning_rate": 1.7048547877285078e-05, "loss": 1.1335, "step": 450 }, { "epoch": 0.58, "learning_rate": 1.7018410478376033e-05, "loss": 1.1478, "step": 452 }, { "epoch": 0.58, "learning_rate": 1.6988146925345487e-05, "loss": 1.1436, "step": 454 }, { "epoch": 0.58, "learning_rate": 1.695775776217301e-05, "loss": 1.1958, "step": 456 }, { "epoch": 0.59, "learning_rate": 1.6927243535095995e-05, "loss": 1.1215, "step": 458 }, { "epoch": 0.59, "learning_rate": 1.6896604792599813e-05, "loss": 1.1639, "step": 460 }, { "epoch": 0.59, "learning_rate": 1.686584208540797e-05, "loss": 1.1605, "step": 462 }, { "epoch": 0.59, "learning_rate": 1.6834955966472214e-05, "loss": 1.0812, "step": 464 }, { "epoch": 0.6, "learning_rate": 1.6803946990962577e-05, "loss": 1.1427, "step": 466 }, { "epoch": 0.6, "learning_rate": 1.6772815716257414e-05, "loss": 1.1497, "step": 468 }, { "epoch": 0.6, "learning_rate": 1.6741562701933366e-05, "loss": 1.1058, "step": 470 }, { "epoch": 0.61, "learning_rate": 1.671018850975533e-05, "loss": 1.095, "step": 472 }, { "epoch": 0.61, "learning_rate": 1.6678693703666327e-05, "loss": 1.1272, "step": 474 }, { "epoch": 0.61, "learning_rate": 1.664707884977739e-05, "loss": 1.1257, "step": 476 }, { "epoch": 0.61, "learning_rate": 1.661534451635738e-05, "loss": 1.1318, "step": 478 }, { "epoch": 0.62, "learning_rate": 1.6583491273822763e-05, "loss": 1.1335, "step": 480 }, { "epoch": 0.62, "learning_rate": 1.655151969472738e-05, "loss": 1.1529, "step": 482 }, { "epoch": 0.62, "learning_rate": 1.6519430353752138e-05, "loss": 1.1674, "step": 484 }, { "epoch": 0.62, "learning_rate": 1.6487223827694673e-05, "loss": 1.1743, "step": 486 }, { "epoch": 0.63, "learning_rate": 1.6454900695459e-05, "loss": 1.186, "step": 488 }, { "epoch": 0.63, "learning_rate": 1.6422461538045104e-05, "loss": 1.1251, "step": 490 }, { "epoch": 0.63, "learning_rate": 1.638990693853848e-05, "loss": 1.0951, "step": 492 }, { "epoch": 0.63, "learning_rate": 1.6357237482099682e-05, "loss": 1.1618, "step": 494 }, { "epoch": 0.64, "learning_rate": 1.6324453755953772e-05, "loss": 1.1283, "step": 496 }, { "epoch": 0.64, "learning_rate": 1.6291556349379794e-05, "loss": 1.1174, "step": 498 }, { "epoch": 0.64, "learning_rate": 1.6258545853700157e-05, "loss": 1.17, "step": 500 }, { "epoch": 0.64, "learning_rate": 1.622542286227003e-05, "loss": 1.1735, "step": 502 }, { "epoch": 0.65, "learning_rate": 1.6192187970466646e-05, "loss": 1.0973, "step": 504 }, { "epoch": 0.65, "learning_rate": 1.615884177567863e-05, "loss": 1.1478, "step": 506 }, { "epoch": 0.65, "learning_rate": 1.6125384877295255e-05, "loss": 1.1254, "step": 508 }, { "epoch": 0.65, "learning_rate": 1.6091817876695655e-05, "loss": 1.1599, "step": 510 }, { "epoch": 0.66, "learning_rate": 1.6058141377238026e-05, "loss": 1.1401, "step": 512 }, { "epoch": 0.66, "learning_rate": 1.602435598424877e-05, "loss": 1.1241, "step": 514 }, { "epoch": 0.66, "learning_rate": 1.599046230501163e-05, "loss": 1.1915, "step": 516 }, { "epoch": 0.66, "learning_rate": 1.5956460948756765e-05, "loss": 1.1119, "step": 518 }, { "epoch": 0.67, "learning_rate": 1.5922352526649803e-05, "loss": 1.1226, "step": 520 }, { "epoch": 0.67, "learning_rate": 1.5888137651780847e-05, "loss": 1.1635, "step": 522 }, { "epoch": 0.67, "learning_rate": 1.585381693915346e-05, "loss": 1.123, "step": 524 }, { "epoch": 0.67, "learning_rate": 1.581939100567363e-05, "loss": 1.178, "step": 526 }, { "epoch": 0.68, "learning_rate": 1.5784860470138633e-05, "loss": 1.152, "step": 528 }, { "epoch": 0.68, "learning_rate": 1.5750225953225968e-05, "loss": 1.1314, "step": 530 }, { "epoch": 0.68, "learning_rate": 1.5715488077482152e-05, "loss": 1.1556, "step": 532 }, { "epoch": 0.68, "learning_rate": 1.568064746731156e-05, "loss": 1.1427, "step": 534 }, { "epoch": 0.69, "learning_rate": 1.5645704748965193e-05, "loss": 1.1633, "step": 536 }, { "epoch": 0.69, "learning_rate": 1.5610660550529413e-05, "loss": 1.1237, "step": 538 }, { "epoch": 0.69, "learning_rate": 1.557551550191467e-05, "loss": 1.1241, "step": 540 }, { "epoch": 0.69, "learning_rate": 1.554027023484416e-05, "loss": 1.1212, "step": 542 }, { "epoch": 0.7, "learning_rate": 1.550492538284249e-05, "loss": 1.1125, "step": 544 }, { "epoch": 0.7, "learning_rate": 1.5469481581224274e-05, "loss": 1.1345, "step": 546 }, { "epoch": 0.7, "learning_rate": 1.5433939467082713e-05, "loss": 1.0837, "step": 548 }, { "epoch": 0.71, "learning_rate": 1.5398299679278172e-05, "loss": 1.1531, "step": 550 }, { "epoch": 0.71, "learning_rate": 1.5362562858426655e-05, "loss": 1.179, "step": 552 }, { "epoch": 0.71, "learning_rate": 1.5326729646888314e-05, "loss": 1.1123, "step": 554 }, { "epoch": 0.71, "learning_rate": 1.5290800688755906e-05, "loss": 1.1616, "step": 556 }, { "epoch": 0.72, "learning_rate": 1.5254776629843204e-05, "loss": 1.1236, "step": 558 }, { "epoch": 0.72, "learning_rate": 1.5218658117673389e-05, "loss": 1.1254, "step": 560 }, { "epoch": 0.72, "learning_rate": 1.518244580146742e-05, "loss": 1.0994, "step": 562 }, { "epoch": 0.72, "learning_rate": 1.5146140332132359e-05, "loss": 1.0841, "step": 564 }, { "epoch": 0.73, "learning_rate": 1.5109742362249673e-05, "loss": 1.1284, "step": 566 }, { "epoch": 0.73, "learning_rate": 1.5073252546063493e-05, "loss": 1.108, "step": 568 }, { "epoch": 0.73, "learning_rate": 1.5036671539468879e-05, "loss": 1.1494, "step": 570 }, { "epoch": 0.73, "learning_rate": 1.5000000000000002e-05, "loss": 1.1226, "step": 572 }, { "epoch": 0.74, "learning_rate": 1.4963238586818346e-05, "loss": 1.0835, "step": 574 }, { "epoch": 0.74, "learning_rate": 1.4926387960700843e-05, "loss": 1.1047, "step": 576 }, { "epoch": 0.74, "learning_rate": 1.488944878402802e-05, "loss": 1.0861, "step": 578 }, { "epoch": 0.74, "learning_rate": 1.4852421720772064e-05, "loss": 1.1066, "step": 580 }, { "epoch": 0.75, "learning_rate": 1.4815307436484898e-05, "loss": 1.1382, "step": 582 }, { "epoch": 0.75, "learning_rate": 1.4778106598286235e-05, "loss": 1.0789, "step": 584 }, { "epoch": 0.75, "learning_rate": 1.4740819874851562e-05, "loss": 1.1367, "step": 586 }, { "epoch": 0.75, "learning_rate": 1.4703447936400135e-05, "loss": 1.1017, "step": 588 }, { "epoch": 0.76, "learning_rate": 1.4665991454682924e-05, "loss": 1.1009, "step": 590 }, { "epoch": 0.76, "learning_rate": 1.4628451102970546e-05, "loss": 1.08, "step": 592 }, { "epoch": 0.76, "learning_rate": 1.4590827556041158e-05, "loss": 1.1003, "step": 594 }, { "epoch": 0.76, "learning_rate": 1.4553121490168335e-05, "loss": 1.1396, "step": 596 }, { "epoch": 0.77, "learning_rate": 1.4515333583108896e-05, "loss": 1.1184, "step": 598 }, { "epoch": 0.77, "learning_rate": 1.4477464514090745e-05, "loss": 1.1374, "step": 600 }, { "epoch": 0.77, "learning_rate": 1.443951496380065e-05, "loss": 1.1114, "step": 602 }, { "epoch": 0.77, "learning_rate": 1.4401485614372009e-05, "loss": 1.0906, "step": 604 }, { "epoch": 0.78, "learning_rate": 1.4363377149372584e-05, "loss": 1.126, "step": 606 }, { "epoch": 0.78, "learning_rate": 1.4325190253792222e-05, "loss": 1.0907, "step": 608 }, { "epoch": 0.78, "learning_rate": 1.4286925614030542e-05, "loss": 1.126, "step": 610 }, { "epoch": 0.78, "learning_rate": 1.4248583917884595e-05, "loss": 1.1183, "step": 612 }, { "epoch": 0.79, "learning_rate": 1.4210165854536495e-05, "loss": 1.1165, "step": 614 }, { "epoch": 0.79, "learning_rate": 1.4171672114541042e-05, "loss": 1.1149, "step": 616 }, { "epoch": 0.79, "learning_rate": 1.4133103389813302e-05, "loss": 1.1124, "step": 618 }, { "epoch": 0.79, "learning_rate": 1.409446037361617e-05, "loss": 1.0864, "step": 620 }, { "epoch": 0.8, "learning_rate": 1.4055743760547918e-05, "loss": 1.105, "step": 622 }, { "epoch": 0.8, "learning_rate": 1.4016954246529697e-05, "loss": 1.1575, "step": 624 }, { "epoch": 0.8, "learning_rate": 1.3978092528793032e-05, "loss": 1.1155, "step": 626 }, { "epoch": 0.81, "learning_rate": 1.39391593058673e-05, "loss": 1.12, "step": 628 }, { "epoch": 0.81, "learning_rate": 1.3900155277567157e-05, "loss": 1.1048, "step": 630 }, { "epoch": 0.81, "learning_rate": 1.3861081144979975e-05, "loss": 1.1003, "step": 632 }, { "epoch": 0.81, "learning_rate": 1.382193761045322e-05, "loss": 1.0671, "step": 634 }, { "epoch": 0.82, "learning_rate": 1.378272537758185e-05, "loss": 1.0981, "step": 636 }, { "epoch": 0.82, "learning_rate": 1.3743445151195658e-05, "loss": 1.132, "step": 638 }, { "epoch": 0.82, "learning_rate": 1.37040976373466e-05, "loss": 1.1709, "step": 640 }, { "epoch": 0.82, "learning_rate": 1.3664683543296114e-05, "loss": 1.1083, "step": 642 }, { "epoch": 0.83, "learning_rate": 1.3625203577502384e-05, "loss": 1.1272, "step": 644 }, { "epoch": 0.83, "learning_rate": 1.3585658449607632e-05, "loss": 1.0967, "step": 646 }, { "epoch": 0.83, "learning_rate": 1.3546048870425356e-05, "loss": 1.0316, "step": 648 }, { "epoch": 0.83, "learning_rate": 1.3506375551927546e-05, "loss": 1.0944, "step": 650 }, { "epoch": 0.84, "learning_rate": 1.3466639207231882e-05, "loss": 1.1149, "step": 652 }, { "epoch": 0.84, "learning_rate": 1.3426840550588933e-05, "loss": 1.0834, "step": 654 }, { "epoch": 0.84, "learning_rate": 1.3386980297369308e-05, "loss": 1.0853, "step": 656 }, { "epoch": 0.84, "learning_rate": 1.3347059164050796e-05, "loss": 1.0994, "step": 658 }, { "epoch": 0.85, "learning_rate": 1.3307077868205487e-05, "loss": 1.127, "step": 660 }, { "epoch": 0.85, "learning_rate": 1.3267037128486883e-05, "loss": 1.1307, "step": 662 }, { "epoch": 0.85, "learning_rate": 1.3226937664616977e-05, "loss": 1.1151, "step": 664 }, { "epoch": 0.85, "learning_rate": 1.3186780197373306e-05, "loss": 1.1228, "step": 666 }, { "epoch": 0.86, "learning_rate": 1.3146565448576002e-05, "loss": 1.0869, "step": 668 }, { "epoch": 0.86, "learning_rate": 1.3106294141074825e-05, "loss": 1.0714, "step": 670 }, { "epoch": 0.86, "learning_rate": 1.3065966998736155e-05, "loss": 1.1025, "step": 672 }, { "epoch": 0.86, "learning_rate": 1.302558474643e-05, "loss": 1.0993, "step": 674 }, { "epoch": 0.87, "learning_rate": 1.2985148110016947e-05, "loss": 1.0594, "step": 676 }, { "epoch": 0.87, "learning_rate": 1.2944657816335124e-05, "loss": 1.0898, "step": 678 }, { "epoch": 0.87, "learning_rate": 1.2904114593187136e-05, "loss": 1.1221, "step": 680 }, { "epoch": 0.87, "learning_rate": 1.2863519169326984e-05, "loss": 1.1194, "step": 682 }, { "epoch": 0.88, "learning_rate": 1.2822872274446958e-05, "loss": 1.1323, "step": 684 }, { "epoch": 0.88, "learning_rate": 1.2782174639164528e-05, "loss": 1.1366, "step": 686 }, { "epoch": 0.88, "learning_rate": 1.2741426995009214e-05, "loss": 1.096, "step": 688 }, { "epoch": 0.88, "learning_rate": 1.2700630074409427e-05, "loss": 1.0622, "step": 690 }, { "epoch": 0.89, "learning_rate": 1.2659784610679318e-05, "loss": 1.1226, "step": 692 }, { "epoch": 0.89, "learning_rate": 1.2618891338005574e-05, "loss": 1.0771, "step": 694 }, { "epoch": 0.89, "learning_rate": 1.2577950991434249e-05, "loss": 1.1363, "step": 696 }, { "epoch": 0.89, "learning_rate": 1.2536964306857526e-05, "loss": 1.0735, "step": 698 }, { "epoch": 0.9, "learning_rate": 1.2495932021000516e-05, "loss": 1.0877, "step": 700 }, { "epoch": 0.9, "learning_rate": 1.2454854871407993e-05, "loss": 1.0683, "step": 702 }, { "epoch": 0.9, "learning_rate": 1.2413733596431141e-05, "loss": 1.1172, "step": 704 }, { "epoch": 0.91, "learning_rate": 1.2372568935214298e-05, "loss": 1.068, "step": 706 }, { "epoch": 0.91, "learning_rate": 1.2331361627681645e-05, "loss": 1.0995, "step": 708 }, { "epoch": 0.91, "learning_rate": 1.2290112414523927e-05, "loss": 1.0648, "step": 710 }, { "epoch": 0.91, "learning_rate": 1.2248822037185137e-05, "loss": 1.0978, "step": 712 }, { "epoch": 0.92, "learning_rate": 1.2207491237849174e-05, "loss": 1.0318, "step": 714 }, { "epoch": 0.92, "learning_rate": 1.2166120759426515e-05, "loss": 1.1007, "step": 716 }, { "epoch": 0.92, "learning_rate": 1.2124711345540861e-05, "loss": 1.0964, "step": 718 }, { "epoch": 0.92, "learning_rate": 1.2083263740515764e-05, "loss": 1.1559, "step": 720 }, { "epoch": 0.93, "learning_rate": 1.2041778689361254e-05, "loss": 1.1272, "step": 722 }, { "epoch": 0.93, "learning_rate": 1.2000256937760446e-05, "loss": 1.066, "step": 724 }, { "epoch": 0.93, "learning_rate": 1.1958699232056135e-05, "loss": 1.0868, "step": 726 }, { "epoch": 0.93, "learning_rate": 1.1917106319237386e-05, "loss": 1.0778, "step": 728 }, { "epoch": 0.94, "learning_rate": 1.1875478946926094e-05, "loss": 1.0461, "step": 730 }, { "epoch": 0.94, "learning_rate": 1.1833817863363563e-05, "loss": 1.0645, "step": 732 }, { "epoch": 0.94, "learning_rate": 1.1792123817397041e-05, "loss": 1.1091, "step": 734 }, { "epoch": 0.94, "learning_rate": 1.1750397558466273e-05, "loss": 1.1129, "step": 736 }, { "epoch": 0.95, "learning_rate": 1.1708639836590024e-05, "loss": 1.0704, "step": 738 }, { "epoch": 0.95, "learning_rate": 1.1666851402352587e-05, "loss": 1.0903, "step": 740 }, { "epoch": 0.95, "learning_rate": 1.1625033006890316e-05, "loss": 1.0858, "step": 742 }, { "epoch": 0.95, "learning_rate": 1.15831854018781e-05, "loss": 1.1189, "step": 744 }, { "epoch": 0.96, "learning_rate": 1.154130933951587e-05, "loss": 1.0825, "step": 746 }, { "epoch": 0.96, "learning_rate": 1.1499405572515059e-05, "loss": 1.0744, "step": 748 }, { "epoch": 0.96, "learning_rate": 1.1457474854085095e-05, "loss": 1.0701, "step": 750 }, { "epoch": 0.96, "learning_rate": 1.1415517937919846e-05, "loss": 1.0705, "step": 752 }, { "epoch": 0.97, "learning_rate": 1.1373535578184083e-05, "loss": 1.0532, "step": 754 }, { "epoch": 0.97, "learning_rate": 1.1331528529499909e-05, "loss": 1.1365, "step": 756 }, { "epoch": 0.97, "learning_rate": 1.1289497546933212e-05, "loss": 1.0797, "step": 758 }, { "epoch": 0.97, "learning_rate": 1.124744338598008e-05, "loss": 1.0715, "step": 760 }, { "epoch": 0.98, "learning_rate": 1.1205366802553231e-05, "loss": 1.1339, "step": 762 }, { "epoch": 0.98, "learning_rate": 1.1163268552968422e-05, "loss": 1.0962, "step": 764 }, { "epoch": 0.98, "learning_rate": 1.112114939393085e-05, "loss": 1.075, "step": 766 }, { "epoch": 0.98, "learning_rate": 1.1079010082521557e-05, "loss": 1.056, "step": 768 }, { "epoch": 0.99, "learning_rate": 1.1036851376183812e-05, "loss": 1.05, "step": 770 }, { "epoch": 0.99, "learning_rate": 1.0994674032709514e-05, "loss": 1.0404, "step": 772 }, { "epoch": 0.99, "learning_rate": 1.095247881022555e-05, "loss": 1.0539, "step": 774 }, { "epoch": 0.99, "learning_rate": 1.091026646718018e-05, "loss": 1.0554, "step": 776 }, { "epoch": 1.0, "learning_rate": 1.0868037762329405e-05, "loss": 1.0717, "step": 778 }, { "epoch": 1.0, "learning_rate": 1.0825793454723325e-05, "loss": 1.0589, "step": 780 }, { "epoch": 1.0, "learning_rate": 1.0783534303692493e-05, "loss": 0.7134, "step": 782 }, { "epoch": 1.01, "learning_rate": 1.0741261068834266e-05, "loss": 0.6371, "step": 784 }, { "epoch": 1.01, "learning_rate": 1.0698974509999159e-05, "loss": 0.6415, "step": 786 }, { "epoch": 1.01, "learning_rate": 1.0656675387277183e-05, "loss": 0.6547, "step": 788 }, { "epoch": 1.01, "learning_rate": 1.0614364460984178e-05, "loss": 0.6518, "step": 790 }, { "epoch": 1.02, "learning_rate": 1.057204249164815e-05, "loss": 0.672, "step": 792 }, { "epoch": 1.02, "learning_rate": 1.0529710239995606e-05, "loss": 0.669, "step": 794 }, { "epoch": 1.02, "learning_rate": 1.0487368466937866e-05, "loss": 0.6394, "step": 796 }, { "epoch": 1.02, "learning_rate": 1.0445017933557404e-05, "loss": 0.6664, "step": 798 }, { "epoch": 1.03, "learning_rate": 1.0402659401094154e-05, "loss": 0.6451, "step": 800 }, { "epoch": 1.03, "learning_rate": 1.036029363093183e-05, "loss": 0.6917, "step": 802 }, { "epoch": 1.03, "learning_rate": 1.0317921384584245e-05, "loss": 0.6579, "step": 804 }, { "epoch": 1.03, "learning_rate": 1.0275543423681622e-05, "loss": 0.6505, "step": 806 }, { "epoch": 1.04, "learning_rate": 1.0233160509956893e-05, "loss": 0.636, "step": 808 }, { "epoch": 1.04, "learning_rate": 1.0190773405232024e-05, "loss": 0.6344, "step": 810 }, { "epoch": 1.04, "learning_rate": 1.014838287140431e-05, "loss": 0.6315, "step": 812 }, { "epoch": 1.04, "learning_rate": 1.010598967043268e-05, "loss": 0.6549, "step": 814 }, { "epoch": 1.05, "learning_rate": 1.0063594564324014e-05, "loss": 0.6585, "step": 816 }, { "epoch": 1.05, "learning_rate": 1.0021198315119426e-05, "loss": 0.6358, "step": 818 }, { "epoch": 1.05, "learning_rate": 9.97880168488058e-06, "loss": 0.6627, "step": 820 }, { "epoch": 1.05, "learning_rate": 9.936405435675991e-06, "loss": 0.6451, "step": 822 }, { "epoch": 1.06, "learning_rate": 9.894010329567322e-06, "loss": 0.6304, "step": 824 }, { "epoch": 1.06, "learning_rate": 9.851617128595694e-06, "loss": 0.6271, "step": 826 }, { "epoch": 1.06, "learning_rate": 9.809226594767979e-06, "loss": 0.6649, "step": 828 }, { "epoch": 1.06, "learning_rate": 9.766839490043108e-06, "loss": 0.6811, "step": 830 }, { "epoch": 1.07, "learning_rate": 9.724456576318383e-06, "loss": 0.6136, "step": 832 }, { "epoch": 1.07, "learning_rate": 9.682078615415755e-06, "loss": 0.6876, "step": 834 }, { "epoch": 1.07, "learning_rate": 9.63970636906817e-06, "loss": 0.6612, "step": 836 }, { "epoch": 1.07, "learning_rate": 9.597340598905851e-06, "loss": 0.6553, "step": 838 }, { "epoch": 1.08, "learning_rate": 9.554982066442601e-06, "loss": 0.6467, "step": 840 }, { "epoch": 1.08, "learning_rate": 9.512631533062138e-06, "loss": 0.6705, "step": 842 }, { "epoch": 1.08, "learning_rate": 9.470289760004398e-06, "loss": 0.6552, "step": 844 }, { "epoch": 1.08, "learning_rate": 9.427957508351852e-06, "loss": 0.6369, "step": 846 }, { "epoch": 1.09, "learning_rate": 9.385635539015824e-06, "loss": 0.6447, "step": 848 }, { "epoch": 1.09, "learning_rate": 9.343324612722819e-06, "loss": 0.6566, "step": 850 }, { "epoch": 1.09, "learning_rate": 9.301025490000843e-06, "loss": 0.6285, "step": 852 }, { "epoch": 1.09, "learning_rate": 9.25873893116574e-06, "loss": 0.6479, "step": 854 }, { "epoch": 1.1, "learning_rate": 9.216465696307513e-06, "loss": 0.6463, "step": 856 }, { "epoch": 1.1, "learning_rate": 9.174206545276678e-06, "loss": 0.6277, "step": 858 }, { "epoch": 1.1, "learning_rate": 9.131962237670599e-06, "loss": 0.6608, "step": 860 }, { "epoch": 1.11, "learning_rate": 9.089733532819825e-06, "loss": 0.6512, "step": 862 }, { "epoch": 1.11, "learning_rate": 9.047521189774456e-06, "loss": 0.6306, "step": 864 }, { "epoch": 1.11, "learning_rate": 9.005325967290489e-06, "loss": 0.6236, "step": 866 }, { "epoch": 1.11, "learning_rate": 8.963148623816191e-06, "loss": 0.653, "step": 868 }, { "epoch": 1.12, "learning_rate": 8.920989917478446e-06, "loss": 0.656, "step": 870 }, { "epoch": 1.12, "learning_rate": 8.878850606069152e-06, "loss": 0.6556, "step": 872 }, { "epoch": 1.12, "learning_rate": 8.836731447031581e-06, "loss": 0.6574, "step": 874 }, { "epoch": 1.12, "learning_rate": 8.79463319744677e-06, "loss": 0.6456, "step": 876 }, { "epoch": 1.13, "learning_rate": 8.752556614019924e-06, "loss": 0.6353, "step": 878 }, { "epoch": 1.13, "learning_rate": 8.710502453066791e-06, "loss": 0.6369, "step": 880 }, { "epoch": 1.13, "learning_rate": 8.668471470500094e-06, "loss": 0.6316, "step": 882 }, { "epoch": 1.13, "learning_rate": 8.626464421815919e-06, "loss": 0.6415, "step": 884 }, { "epoch": 1.14, "learning_rate": 8.584482062080154e-06, "loss": 0.6522, "step": 886 }, { "epoch": 1.14, "learning_rate": 8.542525145914907e-06, "loss": 0.6476, "step": 888 }, { "epoch": 1.14, "learning_rate": 8.500594427484946e-06, "loss": 0.625, "step": 890 }, { "epoch": 1.14, "learning_rate": 8.458690660484134e-06, "loss": 0.6545, "step": 892 }, { "epoch": 1.15, "learning_rate": 8.416814598121901e-06, "loss": 0.6584, "step": 894 }, { "epoch": 1.15, "learning_rate": 8.374966993109689e-06, "loss": 0.6388, "step": 896 }, { "epoch": 1.15, "learning_rate": 8.333148597647414e-06, "loss": 0.6499, "step": 898 }, { "epoch": 1.15, "learning_rate": 8.291360163409978e-06, "loss": 0.6556, "step": 900 }, { "epoch": 1.16, "learning_rate": 8.249602441533727e-06, "loss": 0.6359, "step": 902 }, { "epoch": 1.16, "learning_rate": 8.207876182602959e-06, "loss": 0.612, "step": 904 }, { "epoch": 1.16, "learning_rate": 8.16618213663644e-06, "loss": 0.6158, "step": 906 }, { "epoch": 1.16, "learning_rate": 8.12452105307391e-06, "loss": 0.6362, "step": 908 }, { "epoch": 1.17, "learning_rate": 8.082893680762619e-06, "loss": 0.6402, "step": 910 }, { "epoch": 1.17, "learning_rate": 8.041300767943867e-06, "loss": 0.6629, "step": 912 }, { "epoch": 1.17, "learning_rate": 7.999743062239557e-06, "loss": 0.6136, "step": 914 }, { "epoch": 1.17, "learning_rate": 7.958221310638749e-06, "loss": 0.6517, "step": 916 }, { "epoch": 1.18, "learning_rate": 7.916736259484239e-06, "loss": 0.6635, "step": 918 }, { "epoch": 1.18, "learning_rate": 7.875288654459144e-06, "loss": 0.6405, "step": 920 }, { "epoch": 1.18, "learning_rate": 7.833879240573487e-06, "loss": 0.6242, "step": 922 }, { "epoch": 1.18, "learning_rate": 7.792508762150833e-06, "loss": 0.6642, "step": 924 }, { "epoch": 1.19, "learning_rate": 7.751177962814867e-06, "loss": 0.6297, "step": 926 }, { "epoch": 1.19, "learning_rate": 7.709887585476075e-06, "loss": 0.6458, "step": 928 }, { "epoch": 1.19, "learning_rate": 7.668638372318359e-06, "loss": 0.6413, "step": 930 }, { "epoch": 1.19, "learning_rate": 7.627431064785705e-06, "loss": 0.6093, "step": 932 }, { "epoch": 1.2, "learning_rate": 7.5862664035688604e-06, "loss": 0.6088, "step": 934 }, { "epoch": 1.2, "learning_rate": 7.545145128592009e-06, "loss": 0.628, "step": 936 }, { "epoch": 1.2, "learning_rate": 7.504067978999484e-06, "loss": 0.6382, "step": 938 }, { "epoch": 1.21, "learning_rate": 7.463035693142473e-06, "loss": 0.6458, "step": 940 }, { "epoch": 1.21, "learning_rate": 7.422049008565757e-06, "loss": 0.638, "step": 942 }, { "epoch": 1.21, "learning_rate": 7.38110866199443e-06, "loss": 0.6704, "step": 944 }, { "epoch": 1.21, "learning_rate": 7.340215389320686e-06, "loss": 0.6049, "step": 946 }, { "epoch": 1.22, "learning_rate": 7.299369925590575e-06, "loss": 0.6124, "step": 948 }, { "epoch": 1.22, "learning_rate": 7.258573004990789e-06, "loss": 0.6417, "step": 950 }, { "epoch": 1.22, "learning_rate": 7.217825360835475e-06, "loss": 0.6359, "step": 952 }, { "epoch": 1.22, "learning_rate": 7.1771277255530456e-06, "loss": 0.6347, "step": 954 }, { "epoch": 1.23, "learning_rate": 7.136480830673018e-06, "loss": 0.6138, "step": 956 }, { "epoch": 1.23, "learning_rate": 7.095885406812866e-06, "loss": 0.6218, "step": 958 }, { "epoch": 1.23, "learning_rate": 7.05534218366488e-06, "loss": 0.614, "step": 960 }, { "epoch": 1.23, "learning_rate": 7.014851889983058e-06, "loss": 0.6245, "step": 962 }, { "epoch": 1.24, "learning_rate": 6.974415253570003e-06, "loss": 0.6275, "step": 964 }, { "epoch": 1.24, "learning_rate": 6.934033001263847e-06, "loss": 0.6227, "step": 966 }, { "epoch": 1.24, "learning_rate": 6.893705858925179e-06, "loss": 0.6287, "step": 968 }, { "epoch": 1.24, "learning_rate": 6.853434551424001e-06, "loss": 0.6074, "step": 970 }, { "epoch": 1.25, "learning_rate": 6.813219802626698e-06, "loss": 0.6325, "step": 972 }, { "epoch": 1.25, "learning_rate": 6.773062335383024e-06, "loss": 0.6666, "step": 974 }, { "epoch": 1.25, "learning_rate": 6.73296287151312e-06, "loss": 0.6135, "step": 976 }, { "epoch": 1.25, "learning_rate": 6.692922131794517e-06, "loss": 0.5956, "step": 978 }, { "epoch": 1.26, "learning_rate": 6.652940835949208e-06, "loss": 0.5967, "step": 980 }, { "epoch": 1.26, "learning_rate": 6.6130197026306945e-06, "loss": 0.6195, "step": 982 }, { "epoch": 1.26, "learning_rate": 6.573159449411071e-06, "loss": 0.6283, "step": 984 }, { "epoch": 1.26, "learning_rate": 6.533360792768122e-06, "loss": 0.6356, "step": 986 }, { "epoch": 1.27, "learning_rate": 6.4936244480724575e-06, "loss": 0.595, "step": 988 }, { "epoch": 1.27, "learning_rate": 6.453951129574644e-06, "loss": 0.6115, "step": 990 }, { "epoch": 1.27, "learning_rate": 6.4143415503923676e-06, "loss": 0.6164, "step": 992 }, { "epoch": 1.27, "learning_rate": 6.374796422497622e-06, "loss": 0.628, "step": 994 }, { "epoch": 1.28, "learning_rate": 6.335316456703891e-06, "loss": 0.6135, "step": 996 }, { "epoch": 1.28, "learning_rate": 6.295902362653401e-06, "loss": 0.6223, "step": 998 }, { "epoch": 1.28, "learning_rate": 6.256554848804343e-06, "loss": 0.6014, "step": 1000 }, { "epoch": 1.28, "learning_rate": 6.2172746224181524e-06, "loss": 0.6463, "step": 1002 }, { "epoch": 1.29, "learning_rate": 6.178062389546784e-06, "loss": 0.6153, "step": 1004 }, { "epoch": 1.29, "learning_rate": 6.138918855020028e-06, "loss": 0.6424, "step": 1006 }, { "epoch": 1.29, "learning_rate": 6.099844722432844e-06, "loss": 0.6166, "step": 1008 }, { "epoch": 1.29, "learning_rate": 6.060840694132701e-06, "loss": 0.6097, "step": 1010 }, { "epoch": 1.3, "learning_rate": 6.021907471206971e-06, "loss": 0.6262, "step": 1012 }, { "epoch": 1.3, "learning_rate": 5.983045753470308e-06, "loss": 0.6141, "step": 1014 }, { "epoch": 1.3, "learning_rate": 5.944256239452085e-06, "loss": 0.6047, "step": 1016 }, { "epoch": 1.31, "learning_rate": 5.905539626383831e-06, "loss": 0.6547, "step": 1018 }, { "epoch": 1.31, "learning_rate": 5.866896610186701e-06, "loss": 0.5995, "step": 1020 }, { "epoch": 1.31, "learning_rate": 5.82832788545896e-06, "loss": 0.6131, "step": 1022 }, { "epoch": 1.31, "learning_rate": 5.789834145463506e-06, "loss": 0.6484, "step": 1024 }, { "epoch": 1.32, "learning_rate": 5.7514160821154085e-06, "loss": 0.6497, "step": 1026 }, { "epoch": 1.32, "learning_rate": 5.713074385969457e-06, "loss": 0.6031, "step": 1028 }, { "epoch": 1.32, "learning_rate": 5.67480974620778e-06, "loss": 0.6294, "step": 1030 }, { "epoch": 1.32, "learning_rate": 5.63662285062742e-06, "loss": 0.6053, "step": 1032 }, { "epoch": 1.33, "learning_rate": 5.598514385627997e-06, "loss": 0.622, "step": 1034 }, { "epoch": 1.33, "learning_rate": 5.56048503619935e-06, "loss": 0.6325, "step": 1036 }, { "epoch": 1.33, "learning_rate": 5.522535485909258e-06, "loss": 0.6111, "step": 1038 }, { "epoch": 1.33, "learning_rate": 5.484666416891109e-06, "loss": 0.6061, "step": 1040 }, { "epoch": 1.34, "learning_rate": 5.446878509831668e-06, "loss": 0.5885, "step": 1042 }, { "epoch": 1.34, "learning_rate": 5.409172443958844e-06, "loss": 0.6158, "step": 1044 }, { "epoch": 1.34, "learning_rate": 5.371548897029457e-06, "loss": 0.6153, "step": 1046 }, { "epoch": 1.34, "learning_rate": 5.334008545317082e-06, "loss": 0.5985, "step": 1048 }, { "epoch": 1.35, "learning_rate": 5.2965520635998676e-06, "loss": 0.6004, "step": 1050 }, { "epoch": 1.35, "learning_rate": 5.259180125148442e-06, "loss": 0.6108, "step": 1052 }, { "epoch": 1.35, "learning_rate": 5.22189340171377e-06, "loss": 0.6343, "step": 1054 }, { "epoch": 1.35, "learning_rate": 5.184692563515104e-06, "loss": 0.6376, "step": 1056 }, { "epoch": 1.36, "learning_rate": 5.147578279227943e-06, "loss": 0.5997, "step": 1058 }, { "epoch": 1.36, "learning_rate": 5.110551215971981e-06, "loss": 0.6192, "step": 1060 }, { "epoch": 1.36, "learning_rate": 5.073612039299157e-06, "loss": 0.6276, "step": 1062 }, { "epoch": 1.36, "learning_rate": 5.036761413181659e-06, "loss": 0.6493, "step": 1064 }, { "epoch": 1.37, "learning_rate": 5.000000000000003e-06, "loss": 0.5993, "step": 1066 }, { "epoch": 1.37, "learning_rate": 4.963328460531127e-06, "loss": 0.5804, "step": 1068 }, { "epoch": 1.37, "learning_rate": 4.926747453936509e-06, "loss": 0.6444, "step": 1070 }, { "epoch": 1.37, "learning_rate": 4.890257637750332e-06, "loss": 0.6287, "step": 1072 }, { "epoch": 1.38, "learning_rate": 4.853859667867641e-06, "loss": 0.5872, "step": 1074 }, { "epoch": 1.38, "learning_rate": 4.817554198532582e-06, "loss": 0.6015, "step": 1076 }, { "epoch": 1.38, "learning_rate": 4.781341882326615e-06, "loss": 0.5941, "step": 1078 }, { "epoch": 1.38, "learning_rate": 4.745223370156797e-06, "loss": 0.6406, "step": 1080 }, { "epoch": 1.39, "learning_rate": 4.709199311244098e-06, "loss": 0.6334, "step": 1082 }, { "epoch": 1.39, "learning_rate": 4.673270353111687e-06, "loss": 0.647, "step": 1084 }, { "epoch": 1.39, "learning_rate": 4.63743714157335e-06, "loss": 0.6168, "step": 1086 }, { "epoch": 1.39, "learning_rate": 4.6017003207218294e-06, "loss": 0.595, "step": 1088 }, { "epoch": 1.4, "learning_rate": 4.566060532917288e-06, "loss": 0.6195, "step": 1090 }, { "epoch": 1.4, "learning_rate": 4.530518418775734e-06, "loss": 0.6084, "step": 1092 }, { "epoch": 1.4, "learning_rate": 4.4950746171575135e-06, "loss": 0.5976, "step": 1094 }, { "epoch": 1.41, "learning_rate": 4.459729765155842e-06, "loss": 0.6086, "step": 1096 }, { "epoch": 1.41, "learning_rate": 4.424484498085335e-06, "loss": 0.5855, "step": 1098 }, { "epoch": 1.41, "learning_rate": 4.389339449470592e-06, "loss": 0.6503, "step": 1100 }, { "epoch": 1.41, "learning_rate": 4.354295251034811e-06, "loss": 0.6035, "step": 1102 }, { "epoch": 1.42, "learning_rate": 4.319352532688444e-06, "loss": 0.5885, "step": 1104 }, { "epoch": 1.42, "learning_rate": 4.284511922517853e-06, "loss": 0.5765, "step": 1106 }, { "epoch": 1.42, "learning_rate": 4.249774046774034e-06, "loss": 0.6631, "step": 1108 }, { "epoch": 1.42, "learning_rate": 4.2151395298613675e-06, "loss": 0.6236, "step": 1110 }, { "epoch": 1.43, "learning_rate": 4.180608994326371e-06, "loss": 0.6516, "step": 1112 }, { "epoch": 1.43, "learning_rate": 4.1461830608465385e-06, "loss": 0.6293, "step": 1114 }, { "epoch": 1.43, "learning_rate": 4.111862348219158e-06, "loss": 0.6361, "step": 1116 }, { "epoch": 1.43, "learning_rate": 4.077647473350201e-06, "loss": 0.6274, "step": 1118 }, { "epoch": 1.44, "learning_rate": 4.04353905124324e-06, "loss": 0.6109, "step": 1120 }, { "epoch": 1.44, "learning_rate": 4.009537694988372e-06, "loss": 0.5974, "step": 1122 }, { "epoch": 1.44, "learning_rate": 3.975644015751234e-06, "loss": 0.5849, "step": 1124 }, { "epoch": 1.44, "learning_rate": 3.941858622761975e-06, "loss": 0.6025, "step": 1126 }, { "epoch": 1.45, "learning_rate": 3.908182123304344e-06, "loss": 0.6323, "step": 1128 }, { "epoch": 1.45, "learning_rate": 3.8746151227047455e-06, "loss": 0.58, "step": 1130 }, { "epoch": 1.45, "learning_rate": 3.84115822432137e-06, "loss": 0.631, "step": 1132 }, { "epoch": 1.45, "learning_rate": 3.807812029533362e-06, "loss": 0.6424, "step": 1134 }, { "epoch": 1.46, "learning_rate": 3.7745771377299758e-06, "loss": 0.6061, "step": 1136 }, { "epoch": 1.46, "learning_rate": 3.7414541462998446e-06, "loss": 0.6172, "step": 1138 }, { "epoch": 1.46, "learning_rate": 3.708443650620206e-06, "loss": 0.6402, "step": 1140 }, { "epoch": 1.46, "learning_rate": 3.6755462440462288e-06, "loss": 0.5702, "step": 1142 }, { "epoch": 1.47, "learning_rate": 3.6427625179003223e-06, "loss": 0.6181, "step": 1144 }, { "epoch": 1.47, "learning_rate": 3.6100930614615204e-06, "loss": 0.6158, "step": 1146 }, { "epoch": 1.47, "learning_rate": 3.5775384619549e-06, "loss": 0.586, "step": 1148 }, { "epoch": 1.47, "learning_rate": 3.5450993045409997e-06, "loss": 0.6334, "step": 1150 }, { "epoch": 1.48, "learning_rate": 3.5127761723053313e-06, "loss": 0.5789, "step": 1152 }, { "epoch": 1.48, "learning_rate": 3.4805696462478634e-06, "loss": 0.6111, "step": 1154 }, { "epoch": 1.48, "learning_rate": 3.448480305272619e-06, "loss": 0.5845, "step": 1156 }, { "epoch": 1.48, "learning_rate": 3.41650872617724e-06, "loss": 0.6199, "step": 1158 }, { "epoch": 1.49, "learning_rate": 3.384655483642624e-06, "loss": 0.6317, "step": 1160 }, { "epoch": 1.49, "learning_rate": 3.352921150222612e-06, "loss": 0.6299, "step": 1162 }, { "epoch": 1.49, "learning_rate": 3.321306296333673e-06, "loss": 0.5978, "step": 1164 }, { "epoch": 1.49, "learning_rate": 3.2898114902446708e-06, "loss": 0.6216, "step": 1166 }, { "epoch": 1.5, "learning_rate": 3.2584372980666344e-06, "loss": 0.619, "step": 1168 }, { "epoch": 1.5, "learning_rate": 3.2271842837425917e-06, "loss": 0.6271, "step": 1170 }, { "epoch": 1.5, "learning_rate": 3.1960530090374277e-06, "loss": 0.5788, "step": 1172 }, { "epoch": 1.51, "learning_rate": 3.165044033527789e-06, "loss": 0.5523, "step": 1174 }, { "epoch": 1.51, "learning_rate": 3.134157914592032e-06, "loss": 0.5863, "step": 1176 }, { "epoch": 1.51, "learning_rate": 3.1033952074001882e-06, "loss": 0.6359, "step": 1178 }, { "epoch": 1.51, "learning_rate": 3.0727564649040066e-06, "loss": 0.6057, "step": 1180 }, { "epoch": 1.52, "learning_rate": 3.042242237826991e-06, "loss": 0.5813, "step": 1182 }, { "epoch": 1.52, "learning_rate": 3.011853074654515e-06, "loss": 0.6351, "step": 1184 }, { "epoch": 1.52, "learning_rate": 2.981589521623973e-06, "loss": 0.5861, "step": 1186 }, { "epoch": 1.52, "learning_rate": 2.951452122714926e-06, "loss": 0.608, "step": 1188 }, { "epoch": 1.53, "learning_rate": 2.9214414196393702e-06, "loss": 0.6156, "step": 1190 }, { "epoch": 1.53, "learning_rate": 2.8915579518319626e-06, "loss": 0.6082, "step": 1192 }, { "epoch": 1.53, "learning_rate": 2.861802256440348e-06, "loss": 0.6268, "step": 1194 }, { "epoch": 1.53, "learning_rate": 2.8321748683154893e-06, "loss": 0.5921, "step": 1196 }, { "epoch": 1.54, "learning_rate": 2.8026763200020557e-06, "loss": 0.5828, "step": 1198 }, { "epoch": 1.54, "learning_rate": 2.773307141728867e-06, "loss": 0.6255, "step": 1200 }, { "epoch": 1.54, "learning_rate": 2.744067861399333e-06, "loss": 0.5878, "step": 1202 }, { "epoch": 1.54, "learning_rate": 2.714959004582003e-06, "loss": 0.6113, "step": 1204 }, { "epoch": 1.55, "learning_rate": 2.6859810945010687e-06, "loss": 0.6255, "step": 1206 }, { "epoch": 1.55, "learning_rate": 2.6571346520270147e-06, "loss": 0.5998, "step": 1208 }, { "epoch": 1.55, "learning_rate": 2.628420195667214e-06, "loss": 0.607, "step": 1210 }, { "epoch": 1.55, "learning_rate": 2.5998382415566258e-06, "loss": 0.5773, "step": 1212 }, { "epoch": 1.56, "learning_rate": 2.5713893034485216e-06, "loss": 0.5944, "step": 1214 }, { "epoch": 1.56, "learning_rate": 2.5430738927052346e-06, "loss": 0.6249, "step": 1216 }, { "epoch": 1.56, "learning_rate": 2.514892518288988e-06, "loss": 0.6379, "step": 1218 }, { "epoch": 1.56, "learning_rate": 2.4868456867527315e-06, "loss": 0.5833, "step": 1220 }, { "epoch": 1.57, "learning_rate": 2.4589339022310386e-06, "loss": 0.5841, "step": 1222 }, { "epoch": 1.57, "learning_rate": 2.431157666431052e-06, "loss": 0.6359, "step": 1224 }, { "epoch": 1.57, "learning_rate": 2.403517478623456e-06, "loss": 0.575, "step": 1226 }, { "epoch": 1.57, "learning_rate": 2.3760138356335172e-06, "loss": 0.6207, "step": 1228 }, { "epoch": 1.58, "learning_rate": 2.348647231832131e-06, "loss": 0.6009, "step": 1230 }, { "epoch": 1.58, "learning_rate": 2.3214181591269603e-06, "loss": 0.6267, "step": 1232 }, { "epoch": 1.58, "learning_rate": 2.2943271069535754e-06, "loss": 0.6262, "step": 1234 }, { "epoch": 1.58, "learning_rate": 2.267374562266662e-06, "loss": 0.5881, "step": 1236 }, { "epoch": 1.59, "learning_rate": 2.240561009531281e-06, "loss": 0.5842, "step": 1238 }, { "epoch": 1.59, "learning_rate": 2.2138869307141266e-06, "loss": 0.5602, "step": 1240 }, { "epoch": 1.59, "learning_rate": 2.1873528052749094e-06, "loss": 0.596, "step": 1242 }, { "epoch": 1.59, "learning_rate": 2.1609591101576945e-06, "loss": 0.5942, "step": 1244 }, { "epoch": 1.6, "learning_rate": 2.1347063197823648e-06, "loss": 0.5955, "step": 1246 }, { "epoch": 1.6, "learning_rate": 2.1085949060360654e-06, "loss": 0.6148, "step": 1248 }, { "epoch": 1.6, "learning_rate": 2.0826253382647334e-06, "loss": 0.6484, "step": 1250 }, { "epoch": 1.61, "learning_rate": 2.056798083264667e-06, "loss": 0.5729, "step": 1252 }, { "epoch": 1.61, "learning_rate": 2.0311136052741274e-06, "loss": 0.6088, "step": 1254 }, { "epoch": 1.61, "learning_rate": 2.0055723659649907e-06, "loss": 0.6047, "step": 1256 }, { "epoch": 1.61, "learning_rate": 1.9801748244344587e-06, "loss": 0.5812, "step": 1258 }, { "epoch": 1.62, "learning_rate": 1.9549214371968008e-06, "loss": 0.5976, "step": 1260 }, { "epoch": 1.62, "learning_rate": 1.9298126581751542e-06, "loss": 0.6362, "step": 1262 }, { "epoch": 1.62, "learning_rate": 1.9048489386933545e-06, "loss": 0.5954, "step": 1264 }, { "epoch": 1.62, "learning_rate": 1.8800307274678364e-06, "loss": 0.6468, "step": 1266 }, { "epoch": 1.63, "learning_rate": 1.8553584705995564e-06, "loss": 0.6041, "step": 1268 }, { "epoch": 1.63, "learning_rate": 1.8308326115659757e-06, "loss": 0.5584, "step": 1270 }, { "epoch": 1.63, "learning_rate": 1.8064535912131032e-06, "loss": 0.6246, "step": 1272 }, { "epoch": 1.63, "learning_rate": 1.7822218477475496e-06, "loss": 0.6056, "step": 1274 }, { "epoch": 1.64, "learning_rate": 1.7581378167286655e-06, "loss": 0.6081, "step": 1276 }, { "epoch": 1.64, "learning_rate": 1.7342019310607062e-06, "loss": 0.5951, "step": 1278 }, { "epoch": 1.64, "learning_rate": 1.7104146209850591e-06, "loss": 0.5586, "step": 1280 }, { "epoch": 1.64, "learning_rate": 1.6867763140724969e-06, "loss": 0.6417, "step": 1282 }, { "epoch": 1.65, "learning_rate": 1.6632874352154982e-06, "loss": 0.5777, "step": 1284 }, { "epoch": 1.65, "learning_rate": 1.6399484066206183e-06, "loss": 0.5733, "step": 1286 }, { "epoch": 1.65, "learning_rate": 1.6167596478008817e-06, "loss": 0.6019, "step": 1288 }, { "epoch": 1.65, "learning_rate": 1.5937215755682667e-06, "loss": 0.6216, "step": 1290 }, { "epoch": 1.66, "learning_rate": 1.5708346040261812e-06, "loss": 0.6211, "step": 1292 }, { "epoch": 1.66, "learning_rate": 1.5480991445620541e-06, "loss": 0.6229, "step": 1294 }, { "epoch": 1.66, "learning_rate": 1.5255156058399124e-06, "loss": 0.6139, "step": 1296 }, { "epoch": 1.66, "learning_rate": 1.5030843937930485e-06, "loss": 0.6054, "step": 1298 }, { "epoch": 1.67, "learning_rate": 1.4808059116167306e-06, "loss": 0.5948, "step": 1300 }, { "epoch": 1.67, "learning_rate": 1.4586805597609333e-06, "loss": 0.5594, "step": 1302 }, { "epoch": 1.67, "learning_rate": 1.4367087359231668e-06, "loss": 0.6174, "step": 1304 }, { "epoch": 1.67, "learning_rate": 1.4148908350413048e-06, "loss": 0.5938, "step": 1306 }, { "epoch": 1.68, "learning_rate": 1.3932272492864984e-06, "loss": 0.5763, "step": 1308 }, { "epoch": 1.68, "learning_rate": 1.3717183680561253e-06, "loss": 0.5997, "step": 1310 }, { "epoch": 1.68, "learning_rate": 1.3503645779667852e-06, "loss": 0.564, "step": 1312 }, { "epoch": 1.68, "learning_rate": 1.3291662628473634e-06, "loss": 0.5961, "step": 1314 }, { "epoch": 1.69, "learning_rate": 1.308123803732111e-06, "loss": 0.627, "step": 1316 }, { "epoch": 1.69, "learning_rate": 1.2872375788538171e-06, "loss": 0.6356, "step": 1318 }, { "epoch": 1.69, "learning_rate": 1.266507963636997e-06, "loss": 0.6069, "step": 1320 }, { "epoch": 1.69, "learning_rate": 1.2459353306911438e-06, "loss": 0.6107, "step": 1322 }, { "epoch": 1.7, "learning_rate": 1.2255200498040432e-06, "loss": 0.606, "step": 1324 }, { "epoch": 1.7, "learning_rate": 1.2052624879351105e-06, "loss": 0.583, "step": 1326 }, { "epoch": 1.7, "learning_rate": 1.1851630092088051e-06, "loss": 0.5755, "step": 1328 }, { "epoch": 1.71, "learning_rate": 1.1652219749080817e-06, "loss": 0.5951, "step": 1330 }, { "epoch": 1.71, "learning_rate": 1.1454397434679022e-06, "loss": 0.5939, "step": 1332 }, { "epoch": 1.71, "learning_rate": 1.12581667046878e-06, "loss": 0.6146, "step": 1334 }, { "epoch": 1.71, "learning_rate": 1.1063531086304003e-06, "loss": 0.5748, "step": 1336 }, { "epoch": 1.72, "learning_rate": 1.0870494078052796e-06, "loss": 0.5867, "step": 1338 }, { "epoch": 1.72, "learning_rate": 1.067905914972468e-06, "loss": 0.5761, "step": 1340 }, { "epoch": 1.72, "learning_rate": 1.0489229742313223e-06, "loss": 0.592, "step": 1342 }, { "epoch": 1.72, "learning_rate": 1.0301009267953145e-06, "loss": 0.5793, "step": 1344 }, { "epoch": 1.73, "learning_rate": 1.0114401109859019e-06, "loss": 0.6008, "step": 1346 }, { "epoch": 1.73, "learning_rate": 9.929408622264448e-07, "loss": 0.5669, "step": 1348 }, { "epoch": 1.73, "learning_rate": 9.746035130361741e-07, "loss": 0.5875, "step": 1350 }, { "epoch": 1.73, "learning_rate": 9.564283930242258e-07, "loss": 0.6218, "step": 1352 }, { "epoch": 1.74, "learning_rate": 9.384158288836987e-07, "loss": 0.5938, "step": 1354 }, { "epoch": 1.74, "learning_rate": 9.205661443857994e-07, "loss": 0.5622, "step": 1356 }, { "epoch": 1.74, "learning_rate": 9.028796603740097e-07, "loss": 0.6239, "step": 1358 }, { "epoch": 1.74, "learning_rate": 8.853566947583259e-07, "loss": 0.5665, "step": 1360 }, { "epoch": 1.75, "learning_rate": 8.67997562509546e-07, "loss": 0.6079, "step": 1362 }, { "epoch": 1.75, "learning_rate": 8.508025756535987e-07, "loss": 0.5813, "step": 1364 }, { "epoch": 1.75, "learning_rate": 8.337720432659513e-07, "loss": 0.6135, "step": 1366 }, { "epoch": 1.75, "learning_rate": 8.169062714660347e-07, "loss": 0.5598, "step": 1368 }, { "epoch": 1.76, "learning_rate": 8.002055634117578e-07, "loss": 0.5624, "step": 1370 }, { "epoch": 1.76, "learning_rate": 7.836702192940493e-07, "loss": 0.6002, "step": 1372 }, { "epoch": 1.76, "learning_rate": 7.673005363314578e-07, "loss": 0.5882, "step": 1374 }, { "epoch": 1.76, "learning_rate": 7.510968087648262e-07, "loss": 0.6119, "step": 1376 }, { "epoch": 1.77, "learning_rate": 7.350593278519824e-07, "loss": 0.5976, "step": 1378 }, { "epoch": 1.77, "learning_rate": 7.19188381862519e-07, "loss": 0.5798, "step": 1380 }, { "epoch": 1.77, "learning_rate": 7.034842560726008e-07, "loss": 0.5791, "step": 1382 }, { "epoch": 1.77, "learning_rate": 6.879472327598502e-07, "loss": 0.5823, "step": 1384 }, { "epoch": 1.78, "learning_rate": 6.725775911982602e-07, "loss": 0.5993, "step": 1386 }, { "epoch": 1.78, "learning_rate": 6.573756076531779e-07, "loss": 0.5688, "step": 1388 }, { "epoch": 1.78, "learning_rate": 6.423415553763479e-07, "loss": 0.5925, "step": 1390 }, { "epoch": 1.78, "learning_rate": 6.274757046009871e-07, "loss": 0.6044, "step": 1392 }, { "epoch": 1.79, "learning_rate": 6.127783225369377e-07, "loss": 0.6185, "step": 1394 }, { "epoch": 1.79, "learning_rate": 5.982496733658582e-07, "loss": 0.5809, "step": 1396 }, { "epoch": 1.79, "learning_rate": 5.83890018236476e-07, "loss": 0.5961, "step": 1398 }, { "epoch": 1.79, "learning_rate": 5.696996152598966e-07, "loss": 0.6069, "step": 1400 }, { "epoch": 1.8, "learning_rate": 5.556787195049573e-07, "loss": 0.5802, "step": 1402 }, { "epoch": 1.8, "learning_rate": 5.418275829936537e-07, "loss": 0.6161, "step": 1404 }, { "epoch": 1.8, "learning_rate": 5.281464546965953e-07, "loss": 0.5966, "step": 1406 }, { "epoch": 1.81, "learning_rate": 5.146355805285452e-07, "loss": 0.6, "step": 1408 }, { "epoch": 1.81, "learning_rate": 5.012952033439844e-07, "loss": 0.578, "step": 1410 }, { "epoch": 1.81, "learning_rate": 4.881255629327608e-07, "loss": 0.5885, "step": 1412 }, { "epoch": 1.81, "learning_rate": 4.7512689601576843e-07, "loss": 0.5753, "step": 1414 }, { "epoch": 1.82, "learning_rate": 4.6229943624069963e-07, "loss": 0.6083, "step": 1416 }, { "epoch": 1.82, "learning_rate": 4.4964341417784165e-07, "loss": 0.6019, "step": 1418 }, { "epoch": 1.82, "learning_rate": 4.3715905731593233e-07, "loss": 0.6016, "step": 1420 }, { "epoch": 1.82, "learning_rate": 4.248465900580734e-07, "loss": 0.5873, "step": 1422 }, { "epoch": 1.83, "learning_rate": 4.127062337176935e-07, "loss": 0.5647, "step": 1424 }, { "epoch": 1.83, "learning_rate": 4.0073820651457043e-07, "loss": 0.5816, "step": 1426 }, { "epoch": 1.83, "learning_rate": 3.889427235709153e-07, "loss": 0.5444, "step": 1428 }, { "epoch": 1.83, "learning_rate": 3.773199969074959e-07, "loss": 0.6032, "step": 1430 }, { "epoch": 1.84, "learning_rate": 3.658702354398325e-07, "loss": 0.5779, "step": 1432 }, { "epoch": 1.84, "learning_rate": 3.5459364497443696e-07, "loss": 0.5919, "step": 1434 }, { "epoch": 1.84, "learning_rate": 3.4349042820512325e-07, "loss": 0.6086, "step": 1436 }, { "epoch": 1.84, "learning_rate": 3.325607847093537e-07, "loss": 0.5862, "step": 1438 }, { "epoch": 1.85, "learning_rate": 3.2180491094465414e-07, "loss": 0.6099, "step": 1440 }, { "epoch": 1.85, "learning_rate": 3.112230002450889e-07, "loss": 0.5925, "step": 1442 }, { "epoch": 1.85, "learning_rate": 3.0081524281777687e-07, "loss": 0.6217, "step": 1444 }, { "epoch": 1.85, "learning_rate": 2.905818257394799e-07, "loss": 0.5707, "step": 1446 }, { "epoch": 1.86, "learning_rate": 2.805229329532344e-07, "loss": 0.6038, "step": 1448 }, { "epoch": 1.86, "learning_rate": 2.706387452650494e-07, "loss": 0.5877, "step": 1450 }, { "epoch": 1.86, "learning_rate": 2.609294403406537e-07, "loss": 0.5912, "step": 1452 }, { "epoch": 1.86, "learning_rate": 2.513951927023017e-07, "loss": 0.6043, "step": 1454 }, { "epoch": 1.87, "learning_rate": 2.420361737256438e-07, "loss": 0.5947, "step": 1456 }, { "epoch": 1.87, "learning_rate": 2.3285255163663535e-07, "loss": 0.5927, "step": 1458 }, { "epoch": 1.87, "learning_rate": 2.2384449150851695e-07, "loss": 0.6107, "step": 1460 }, { "epoch": 1.87, "learning_rate": 2.1501215525885245e-07, "loss": 0.5427, "step": 1462 }, { "epoch": 1.88, "learning_rate": 2.063557016466111e-07, "loss": 0.5792, "step": 1464 }, { "epoch": 1.88, "learning_rate": 1.978752862693212e-07, "loss": 0.5775, "step": 1466 }, { "epoch": 1.88, "learning_rate": 1.8957106156026084e-07, "loss": 0.5554, "step": 1468 }, { "epoch": 1.88, "learning_rate": 1.8144317678573497e-07, "loss": 0.5464, "step": 1470 }, { "epoch": 1.89, "learning_rate": 1.7349177804237837e-07, "loss": 0.5882, "step": 1472 }, { "epoch": 1.89, "learning_rate": 1.6571700825453674e-07, "loss": 0.5718, "step": 1474 }, { "epoch": 1.89, "learning_rate": 1.5811900717169537e-07, "loss": 0.5631, "step": 1476 }, { "epoch": 1.89, "learning_rate": 1.506979113659679e-07, "loss": 0.6137, "step": 1478 }, { "epoch": 1.9, "learning_rate": 1.4345385422964043e-07, "loss": 0.5804, "step": 1480 }, { "epoch": 1.9, "learning_rate": 1.3638696597277678e-07, "loss": 0.5713, "step": 1482 }, { "epoch": 1.9, "learning_rate": 1.2949737362087156e-07, "loss": 0.5563, "step": 1484 }, { "epoch": 1.91, "learning_rate": 1.227852010125752e-07, "loss": 0.5885, "step": 1486 }, { "epoch": 1.91, "learning_rate": 1.1625056879746133e-07, "loss": 0.5563, "step": 1488 }, { "epoch": 1.91, "learning_rate": 1.0989359443386305e-07, "loss": 0.5924, "step": 1490 }, { "epoch": 1.91, "learning_rate": 1.0371439218675671e-07, "loss": 0.613, "step": 1492 }, { "epoch": 1.92, "learning_rate": 9.771307312571254e-08, "loss": 0.5946, "step": 1494 }, { "epoch": 1.92, "learning_rate": 9.188974512289617e-08, "loss": 0.5728, "step": 1496 }, { "epoch": 1.92, "learning_rate": 8.624451285112689e-08, "loss": 0.5644, "step": 1498 }, { "epoch": 1.92, "learning_rate": 8.077747778200474e-08, "loss": 0.6211, "step": 1500 }, { "epoch": 1.93, "learning_rate": 7.54887381840752e-08, "loss": 0.5863, "step": 1502 }, { "epoch": 1.93, "learning_rate": 7.037838912107298e-08, "loss": 0.6035, "step": 1504 }, { "epoch": 1.93, "learning_rate": 6.544652245020433e-08, "loss": 0.6091, "step": 1506 }, { "epoch": 1.93, "learning_rate": 6.069322682050516e-08, "loss": 0.5979, "step": 1508 }, { "epoch": 1.94, "learning_rate": 5.611858767124001e-08, "loss": 0.6134, "step": 1510 }, { "epoch": 1.94, "learning_rate": 5.1722687230369995e-08, "loss": 0.5743, "step": 1512 }, { "epoch": 1.94, "learning_rate": 4.7505604513072845e-08, "loss": 0.5767, "step": 1514 }, { "epoch": 1.94, "learning_rate": 4.346741532032628e-08, "loss": 0.5905, "step": 1516 }, { "epoch": 1.95, "learning_rate": 3.96081922375402e-08, "loss": 0.576, "step": 1518 }, { "epoch": 1.95, "learning_rate": 3.592800463325663e-08, "loss": 0.5968, "step": 1520 }, { "epoch": 1.95, "learning_rate": 3.242691865790071e-08, "loss": 0.5451, "step": 1522 }, { "epoch": 1.95, "learning_rate": 2.9104997242590528e-08, "loss": 0.5999, "step": 1524 }, { "epoch": 1.96, "learning_rate": 2.5962300098008042e-08, "loss": 0.5615, "step": 1526 }, { "epoch": 1.96, "learning_rate": 2.2998883713326592e-08, "loss": 0.5677, "step": 1528 }, { "epoch": 1.96, "learning_rate": 2.0214801355192826e-08, "loss": 0.6002, "step": 1530 }, { "epoch": 1.96, "learning_rate": 1.761010306676969e-08, "loss": 0.6138, "step": 1532 }, { "epoch": 1.97, "learning_rate": 1.518483566683826e-08, "loss": 0.6095, "step": 1534 }, { "epoch": 1.97, "learning_rate": 1.2939042748955078e-08, "loss": 0.5596, "step": 1536 }, { "epoch": 1.97, "learning_rate": 1.0872764680671666e-08, "loss": 0.6428, "step": 1538 }, { "epoch": 1.97, "learning_rate": 8.986038602802894e-09, "loss": 0.5899, "step": 1540 }, { "epoch": 1.98, "learning_rate": 7.278898428764169e-09, "loss": 0.5952, "step": 1542 }, { "epoch": 1.98, "learning_rate": 5.751374843961932e-09, "loss": 0.5526, "step": 1544 }, { "epoch": 1.98, "learning_rate": 4.403495305237426e-09, "loss": 0.5829, "step": 1546 }, { "epoch": 1.98, "learning_rate": 3.2352840403804264e-09, "loss": 0.5848, "step": 1548 }, { "epoch": 1.99, "learning_rate": 2.246762047685147e-09, "loss": 0.5983, "step": 1550 }, { "epoch": 1.99, "learning_rate": 1.437947095582759e-09, "loss": 0.5923, "step": 1552 }, { "epoch": 1.99, "learning_rate": 8.088537223116533e-10, "loss": 0.5919, "step": 1554 }, { "epoch": 1.99, "learning_rate": 3.594932356654202e-10, "loss": 0.6125, "step": 1556 }, { "epoch": 2.0, "learning_rate": 8.987371278079693e-11, "loss": 0.5734, "step": 1558 }, { "epoch": 2.0, "learning_rate": 0.0, "loss": 0.6057, "step": 1560 }, { "epoch": 2.0, "step": 1560, "total_flos": 3.6540609644891996e+18, "train_loss": 0.8723082382709552, "train_runtime": 12247.4613, "train_samples_per_second": 8.152, "train_steps_per_second": 0.127 } ], "logging_steps": 2, "max_steps": 1560, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 780, "total_flos": 3.6540609644891996e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }