{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 4436, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9850746268656716e-07, "loss": 2.4984, "step": 2 }, { "epoch": 0.0, "learning_rate": 5.970149253731343e-07, "loss": 2.4375, "step": 4 }, { "epoch": 0.01, "learning_rate": 8.955223880597015e-07, "loss": 2.3496, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.1940298507462686e-06, "loss": 2.3019, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.4925373134328358e-06, "loss": 2.2681, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.791044776119403e-06, "loss": 2.307, "step": 12 }, { "epoch": 0.01, "learning_rate": 2.08955223880597e-06, "loss": 2.3224, "step": 14 }, { "epoch": 0.01, "learning_rate": 2.3880597014925373e-06, "loss": 2.2319, "step": 16 }, { "epoch": 0.02, "learning_rate": 2.686567164179105e-06, "loss": 2.2864, "step": 18 }, { "epoch": 0.02, "learning_rate": 2.9850746268656716e-06, "loss": 2.2184, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.283582089552239e-06, "loss": 2.2243, "step": 22 }, { "epoch": 0.02, "learning_rate": 3.582089552238806e-06, "loss": 2.1806, "step": 24 }, { "epoch": 0.02, "learning_rate": 3.8805970149253735e-06, "loss": 2.1346, "step": 26 }, { "epoch": 0.03, "learning_rate": 4.17910447761194e-06, "loss": 2.1112, "step": 28 }, { "epoch": 0.03, "learning_rate": 4.477611940298508e-06, "loss": 2.1375, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.7761194029850745e-06, "loss": 2.0972, "step": 32 }, { "epoch": 0.03, "learning_rate": 5.074626865671642e-06, "loss": 2.1996, "step": 34 }, { "epoch": 0.03, "learning_rate": 5.37313432835821e-06, "loss": 2.115, "step": 36 }, { "epoch": 0.03, "learning_rate": 5.671641791044776e-06, "loss": 2.1727, "step": 38 }, { "epoch": 0.04, "learning_rate": 5.970149253731343e-06, "loss": 2.0205, "step": 40 }, { "epoch": 0.04, "learning_rate": 6.2686567164179116e-06, "loss": 2.0463, "step": 42 }, { "epoch": 0.04, "learning_rate": 6.567164179104478e-06, "loss": 2.1016, "step": 44 }, { "epoch": 0.04, "learning_rate": 6.865671641791045e-06, "loss": 2.1054, "step": 46 }, { "epoch": 0.04, "learning_rate": 7.164179104477612e-06, "loss": 2.0415, "step": 48 }, { "epoch": 0.05, "learning_rate": 7.46268656716418e-06, "loss": 2.0726, "step": 50 }, { "epoch": 0.05, "learning_rate": 7.761194029850747e-06, "loss": 2.0446, "step": 52 }, { "epoch": 0.05, "learning_rate": 8.059701492537314e-06, "loss": 2.0737, "step": 54 }, { "epoch": 0.05, "learning_rate": 8.35820895522388e-06, "loss": 2.0369, "step": 56 }, { "epoch": 0.05, "learning_rate": 8.656716417910447e-06, "loss": 2.0708, "step": 58 }, { "epoch": 0.05, "learning_rate": 8.955223880597016e-06, "loss": 2.0231, "step": 60 }, { "epoch": 0.06, "learning_rate": 9.253731343283582e-06, "loss": 2.0473, "step": 62 }, { "epoch": 0.06, "learning_rate": 9.552238805970149e-06, "loss": 2.0222, "step": 64 }, { "epoch": 0.06, "learning_rate": 9.850746268656717e-06, "loss": 2.0204, "step": 66 }, { "epoch": 0.06, "learning_rate": 1.0149253731343284e-05, "loss": 2.0266, "step": 68 }, { "epoch": 0.06, "learning_rate": 1.0447761194029851e-05, "loss": 1.9896, "step": 70 }, { "epoch": 0.06, "learning_rate": 1.074626865671642e-05, "loss": 1.9435, "step": 72 }, { "epoch": 0.07, "learning_rate": 1.1044776119402986e-05, "loss": 1.9961, "step": 74 }, { "epoch": 0.07, "learning_rate": 1.1343283582089553e-05, "loss": 2.0354, "step": 76 }, { "epoch": 0.07, "learning_rate": 1.1641791044776121e-05, "loss": 2.0071, "step": 78 }, { "epoch": 0.07, "learning_rate": 1.1940298507462686e-05, "loss": 1.9622, "step": 80 }, { "epoch": 0.07, "learning_rate": 1.2238805970149255e-05, "loss": 1.9752, "step": 82 }, { "epoch": 0.08, "learning_rate": 1.2537313432835823e-05, "loss": 1.9363, "step": 84 }, { "epoch": 0.08, "learning_rate": 1.2835820895522388e-05, "loss": 2.0192, "step": 86 }, { "epoch": 0.08, "learning_rate": 1.3134328358208957e-05, "loss": 1.9606, "step": 88 }, { "epoch": 0.08, "learning_rate": 1.3432835820895525e-05, "loss": 1.9499, "step": 90 }, { "epoch": 0.08, "learning_rate": 1.373134328358209e-05, "loss": 1.9427, "step": 92 }, { "epoch": 0.08, "learning_rate": 1.4029850746268658e-05, "loss": 1.9274, "step": 94 }, { "epoch": 0.09, "learning_rate": 1.4179104477611942e-05, "loss": 1.9864, "step": 96 }, { "epoch": 0.09, "learning_rate": 1.4477611940298509e-05, "loss": 1.899, "step": 98 }, { "epoch": 0.09, "learning_rate": 1.4776119402985077e-05, "loss": 2.0033, "step": 100 }, { "epoch": 0.09, "learning_rate": 1.5074626865671642e-05, "loss": 1.953, "step": 102 }, { "epoch": 0.09, "learning_rate": 1.537313432835821e-05, "loss": 1.9023, "step": 104 }, { "epoch": 0.1, "learning_rate": 1.5671641791044777e-05, "loss": 2.022, "step": 106 }, { "epoch": 0.1, "learning_rate": 1.5970149253731344e-05, "loss": 1.9652, "step": 108 }, { "epoch": 0.1, "learning_rate": 1.626865671641791e-05, "loss": 1.9581, "step": 110 }, { "epoch": 0.1, "learning_rate": 1.6567164179104477e-05, "loss": 2.0001, "step": 112 }, { "epoch": 0.1, "learning_rate": 1.6865671641791048e-05, "loss": 1.8767, "step": 114 }, { "epoch": 0.1, "learning_rate": 1.7164179104477614e-05, "loss": 1.8871, "step": 116 }, { "epoch": 0.11, "learning_rate": 1.746268656716418e-05, "loss": 1.8394, "step": 118 }, { "epoch": 0.11, "learning_rate": 1.7761194029850748e-05, "loss": 1.9129, "step": 120 }, { "epoch": 0.11, "learning_rate": 1.8059701492537314e-05, "loss": 1.9511, "step": 122 }, { "epoch": 0.11, "learning_rate": 1.8208955223880598e-05, "loss": 1.9331, "step": 124 }, { "epoch": 0.11, "learning_rate": 1.8507462686567165e-05, "loss": 1.9341, "step": 126 }, { "epoch": 0.12, "learning_rate": 1.8805970149253735e-05, "loss": 1.8714, "step": 128 }, { "epoch": 0.12, "learning_rate": 1.9104477611940298e-05, "loss": 1.9453, "step": 130 }, { "epoch": 0.12, "learning_rate": 1.9402985074626868e-05, "loss": 1.8974, "step": 132 }, { "epoch": 0.12, "learning_rate": 1.9701492537313435e-05, "loss": 1.9223, "step": 134 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 1.8966, "step": 136 }, { "epoch": 0.12, "learning_rate": 1.999998933431258e-05, "loss": 1.8785, "step": 138 }, { "epoch": 0.13, "learning_rate": 1.999995733727306e-05, "loss": 1.9156, "step": 140 }, { "epoch": 0.13, "learning_rate": 1.9999904008949705e-05, "loss": 1.9364, "step": 142 }, { "epoch": 0.13, "learning_rate": 1.9999829349456267e-05, "loss": 1.8797, "step": 144 }, { "epoch": 0.13, "learning_rate": 1.9999733358952002e-05, "loss": 1.9061, "step": 146 }, { "epoch": 0.13, "learning_rate": 1.999961603764167e-05, "loss": 1.8328, "step": 148 }, { "epoch": 0.14, "learning_rate": 1.9999477385775543e-05, "loss": 1.8222, "step": 150 }, { "epoch": 0.14, "learning_rate": 1.9999317403649377e-05, "loss": 1.8906, "step": 152 }, { "epoch": 0.14, "learning_rate": 1.9999136091604433e-05, "loss": 1.9194, "step": 154 }, { "epoch": 0.14, "learning_rate": 1.9998933450027483e-05, "loss": 1.8939, "step": 156 }, { "epoch": 0.14, "learning_rate": 1.9998709479350785e-05, "loss": 1.7923, "step": 158 }, { "epoch": 0.14, "learning_rate": 1.99984641800521e-05, "loss": 1.897, "step": 160 }, { "epoch": 0.15, "learning_rate": 1.9998197552654686e-05, "loss": 1.908, "step": 162 }, { "epoch": 0.15, "learning_rate": 1.999790959772729e-05, "loss": 1.8676, "step": 164 }, { "epoch": 0.15, "learning_rate": 1.9997600315884166e-05, "loss": 1.8565, "step": 166 }, { "epoch": 0.15, "learning_rate": 1.9997269707785055e-05, "loss": 1.871, "step": 168 }, { "epoch": 0.15, "learning_rate": 1.9996917774135186e-05, "loss": 1.8545, "step": 170 }, { "epoch": 0.16, "learning_rate": 1.999654451568528e-05, "loss": 1.9056, "step": 172 }, { "epoch": 0.16, "learning_rate": 1.9996149933231554e-05, "loss": 1.842, "step": 174 }, { "epoch": 0.16, "learning_rate": 1.9995734027615702e-05, "loss": 1.8387, "step": 176 }, { "epoch": 0.16, "learning_rate": 1.9995296799724914e-05, "loss": 1.9177, "step": 178 }, { "epoch": 0.16, "learning_rate": 1.9994838250491852e-05, "loss": 1.8014, "step": 180 }, { "epoch": 0.16, "learning_rate": 1.9994358380894665e-05, "loss": 1.8049, "step": 182 }, { "epoch": 0.17, "learning_rate": 1.999385719195698e-05, "loss": 1.8036, "step": 184 }, { "epoch": 0.17, "learning_rate": 1.9993334684747906e-05, "loss": 1.852, "step": 186 }, { "epoch": 0.17, "learning_rate": 1.9992790860382023e-05, "loss": 1.8787, "step": 188 }, { "epoch": 0.17, "learning_rate": 1.9992225720019377e-05, "loss": 1.8519, "step": 190 }, { "epoch": 0.17, "learning_rate": 1.9991639264865492e-05, "loss": 1.8272, "step": 192 }, { "epoch": 0.17, "learning_rate": 1.9991031496171362e-05, "loss": 1.9247, "step": 194 }, { "epoch": 0.18, "learning_rate": 1.9990402415233436e-05, "loss": 1.8138, "step": 196 }, { "epoch": 0.18, "learning_rate": 1.9989752023393635e-05, "loss": 1.8417, "step": 198 }, { "epoch": 0.18, "learning_rate": 1.9989080322039328e-05, "loss": 1.7823, "step": 200 }, { "epoch": 0.18, "learning_rate": 1.998838731260335e-05, "loss": 1.9182, "step": 202 }, { "epoch": 0.18, "learning_rate": 1.998767299656399e-05, "loss": 1.7648, "step": 204 }, { "epoch": 0.19, "learning_rate": 1.9986937375444973e-05, "loss": 1.8909, "step": 206 }, { "epoch": 0.19, "learning_rate": 1.9986180450815485e-05, "loss": 1.8438, "step": 208 }, { "epoch": 0.19, "learning_rate": 1.9985402224290153e-05, "loss": 1.7763, "step": 210 }, { "epoch": 0.19, "learning_rate": 1.9984602697529034e-05, "loss": 1.8553, "step": 212 }, { "epoch": 0.19, "learning_rate": 1.9983781872237634e-05, "loss": 1.8461, "step": 214 }, { "epoch": 0.19, "learning_rate": 1.9982939750166888e-05, "loss": 1.8635, "step": 216 }, { "epoch": 0.2, "learning_rate": 1.9982076333113153e-05, "loss": 1.8799, "step": 218 }, { "epoch": 0.2, "learning_rate": 1.9981191622918217e-05, "loss": 1.8216, "step": 220 }, { "epoch": 0.2, "learning_rate": 1.9980285621469288e-05, "loss": 1.9037, "step": 222 }, { "epoch": 0.2, "learning_rate": 1.9979358330698995e-05, "loss": 1.8344, "step": 224 }, { "epoch": 0.2, "learning_rate": 1.997840975258538e-05, "loss": 1.7403, "step": 226 }, { "epoch": 0.21, "learning_rate": 1.997743988915188e-05, "loss": 1.8381, "step": 228 }, { "epoch": 0.21, "learning_rate": 1.9976448742467352e-05, "loss": 1.8507, "step": 230 }, { "epoch": 0.21, "learning_rate": 1.9975436314646052e-05, "loss": 1.8299, "step": 232 }, { "epoch": 0.21, "learning_rate": 1.9974402607847623e-05, "loss": 1.7769, "step": 234 }, { "epoch": 0.21, "learning_rate": 1.9973347624277105e-05, "loss": 1.8628, "step": 236 }, { "epoch": 0.21, "learning_rate": 1.9972271366184922e-05, "loss": 1.8459, "step": 238 }, { "epoch": 0.22, "learning_rate": 1.9971173835866884e-05, "loss": 1.8887, "step": 240 }, { "epoch": 0.22, "learning_rate": 1.997005503566417e-05, "loss": 1.8039, "step": 242 }, { "epoch": 0.22, "learning_rate": 1.996891496796334e-05, "loss": 1.7956, "step": 244 }, { "epoch": 0.22, "learning_rate": 1.9967753635196306e-05, "loss": 1.7893, "step": 246 }, { "epoch": 0.22, "learning_rate": 1.996657103984036e-05, "loss": 1.8458, "step": 248 }, { "epoch": 0.23, "learning_rate": 1.9965367184418138e-05, "loss": 1.748, "step": 250 }, { "epoch": 0.23, "learning_rate": 1.996414207149762e-05, "loss": 1.8475, "step": 252 }, { "epoch": 0.23, "learning_rate": 1.9962895703692155e-05, "loss": 1.8296, "step": 254 }, { "epoch": 0.23, "learning_rate": 1.9961628083660406e-05, "loss": 1.8945, "step": 256 }, { "epoch": 0.23, "learning_rate": 1.9960339214106388e-05, "loss": 1.8396, "step": 258 }, { "epoch": 0.23, "learning_rate": 1.995902909777943e-05, "loss": 1.8078, "step": 260 }, { "epoch": 0.24, "learning_rate": 1.9957697737474198e-05, "loss": 1.8347, "step": 262 }, { "epoch": 0.24, "learning_rate": 1.995634513603066e-05, "loss": 1.791, "step": 264 }, { "epoch": 0.24, "learning_rate": 1.9954971296334102e-05, "loss": 1.7971, "step": 266 }, { "epoch": 0.24, "learning_rate": 1.9953576221315116e-05, "loss": 1.8384, "step": 268 }, { "epoch": 0.24, "learning_rate": 1.9952159913949583e-05, "loss": 1.8272, "step": 270 }, { "epoch": 0.25, "learning_rate": 1.9950722377258692e-05, "loss": 1.8127, "step": 272 }, { "epoch": 0.25, "learning_rate": 1.9949263614308894e-05, "loss": 1.763, "step": 274 }, { "epoch": 0.25, "learning_rate": 1.994778362821194e-05, "loss": 1.8761, "step": 276 }, { "epoch": 0.25, "learning_rate": 1.9946282422124842e-05, "loss": 1.8082, "step": 278 }, { "epoch": 0.25, "learning_rate": 1.994475999924987e-05, "loss": 1.8545, "step": 280 }, { "epoch": 0.25, "learning_rate": 1.9943216362834575e-05, "loss": 1.8098, "step": 282 }, { "epoch": 0.26, "learning_rate": 1.994165151617174e-05, "loss": 1.8048, "step": 284 }, { "epoch": 0.26, "learning_rate": 1.9940065462599394e-05, "loss": 1.8017, "step": 286 }, { "epoch": 0.26, "learning_rate": 1.9938458205500812e-05, "loss": 1.8242, "step": 288 }, { "epoch": 0.26, "learning_rate": 1.9936829748304496e-05, "loss": 1.8284, "step": 290 }, { "epoch": 0.26, "learning_rate": 1.9935180094484164e-05, "loss": 1.9135, "step": 292 }, { "epoch": 0.27, "learning_rate": 1.9933509247558755e-05, "loss": 1.8937, "step": 294 }, { "epoch": 0.27, "learning_rate": 1.993181721109242e-05, "loss": 1.7698, "step": 296 }, { "epoch": 0.27, "learning_rate": 1.99301039886945e-05, "loss": 1.8058, "step": 298 }, { "epoch": 0.27, "learning_rate": 1.9928369584019536e-05, "loss": 1.7913, "step": 300 }, { "epoch": 0.27, "learning_rate": 1.9926614000767254e-05, "loss": 1.7902, "step": 302 }, { "epoch": 0.27, "learning_rate": 1.992483724268255e-05, "loss": 1.7558, "step": 304 }, { "epoch": 0.28, "learning_rate": 1.99230393135555e-05, "loss": 1.7755, "step": 306 }, { "epoch": 0.28, "learning_rate": 1.9921220217221325e-05, "loss": 1.7874, "step": 308 }, { "epoch": 0.28, "learning_rate": 1.9919379957560413e-05, "loss": 1.7697, "step": 310 }, { "epoch": 0.28, "learning_rate": 1.9917518538498292e-05, "loss": 1.7788, "step": 312 }, { "epoch": 0.28, "learning_rate": 1.991563596400562e-05, "loss": 1.8082, "step": 314 }, { "epoch": 0.28, "learning_rate": 1.991373223809819e-05, "loss": 1.8246, "step": 316 }, { "epoch": 0.29, "learning_rate": 1.9911807364836915e-05, "loss": 1.7254, "step": 318 }, { "epoch": 0.29, "learning_rate": 1.9909861348327806e-05, "loss": 1.7688, "step": 320 }, { "epoch": 0.29, "learning_rate": 1.990789419272199e-05, "loss": 1.8298, "step": 322 }, { "epoch": 0.29, "learning_rate": 1.990590590221568e-05, "loss": 1.7999, "step": 324 }, { "epoch": 0.29, "learning_rate": 1.990389648105017e-05, "loss": 1.8174, "step": 326 }, { "epoch": 0.3, "learning_rate": 1.9901865933511834e-05, "loss": 1.8054, "step": 328 }, { "epoch": 0.3, "learning_rate": 1.9899814263932106e-05, "loss": 1.8832, "step": 330 }, { "epoch": 0.3, "learning_rate": 1.9897741476687486e-05, "loss": 1.8076, "step": 332 }, { "epoch": 0.3, "learning_rate": 1.9895647576199507e-05, "loss": 1.749, "step": 334 }, { "epoch": 0.3, "learning_rate": 1.9893532566934747e-05, "loss": 1.83, "step": 336 }, { "epoch": 0.3, "learning_rate": 1.9891396453404817e-05, "loss": 1.8311, "step": 338 }, { "epoch": 0.31, "learning_rate": 1.988923924016634e-05, "loss": 1.8504, "step": 340 }, { "epoch": 0.31, "learning_rate": 1.9887060931820937e-05, "loss": 1.8123, "step": 342 }, { "epoch": 0.31, "learning_rate": 1.9884861533015255e-05, "loss": 1.8611, "step": 344 }, { "epoch": 0.31, "learning_rate": 1.988264104844091e-05, "loss": 1.7545, "step": 346 }, { "epoch": 0.31, "learning_rate": 1.9880399482834493e-05, "loss": 1.7604, "step": 348 }, { "epoch": 0.32, "learning_rate": 1.9878136840977577e-05, "loss": 1.7608, "step": 350 }, { "epoch": 0.32, "learning_rate": 1.987585312769669e-05, "loss": 1.7772, "step": 352 }, { "epoch": 0.32, "learning_rate": 1.9873548347863306e-05, "loss": 1.8385, "step": 354 }, { "epoch": 0.32, "learning_rate": 1.9871222506393834e-05, "loss": 1.7471, "step": 356 }, { "epoch": 0.32, "learning_rate": 1.9868875608249613e-05, "loss": 1.8298, "step": 358 }, { "epoch": 0.32, "learning_rate": 1.9866507658436904e-05, "loss": 1.7782, "step": 360 }, { "epoch": 0.33, "learning_rate": 1.9864118662006868e-05, "loss": 1.766, "step": 362 }, { "epoch": 0.33, "learning_rate": 1.986170862405556e-05, "loss": 1.7897, "step": 364 }, { "epoch": 0.33, "learning_rate": 1.9859277549723924e-05, "loss": 1.7589, "step": 366 }, { "epoch": 0.33, "learning_rate": 1.9856825444197777e-05, "loss": 1.8643, "step": 368 }, { "epoch": 0.33, "learning_rate": 1.98543523127078e-05, "loss": 1.7779, "step": 370 }, { "epoch": 0.34, "learning_rate": 1.9851858160529512e-05, "loss": 1.8131, "step": 372 }, { "epoch": 0.34, "learning_rate": 1.9849342992983295e-05, "loss": 1.7722, "step": 374 }, { "epoch": 0.34, "learning_rate": 1.984680681543434e-05, "loss": 1.7034, "step": 376 }, { "epoch": 0.34, "learning_rate": 1.984424963329266e-05, "loss": 1.7491, "step": 378 }, { "epoch": 0.34, "learning_rate": 1.9841671452013082e-05, "loss": 1.8034, "step": 380 }, { "epoch": 0.34, "learning_rate": 1.9839072277095222e-05, "loss": 1.8034, "step": 382 }, { "epoch": 0.35, "learning_rate": 1.983645211408347e-05, "loss": 1.7686, "step": 384 }, { "epoch": 0.35, "learning_rate": 1.9833810968567004e-05, "loss": 1.8068, "step": 386 }, { "epoch": 0.35, "learning_rate": 1.9831148846179743e-05, "loss": 1.7915, "step": 388 }, { "epoch": 0.35, "learning_rate": 1.982846575260036e-05, "loss": 1.7891, "step": 390 }, { "epoch": 0.35, "learning_rate": 1.9825761693552264e-05, "loss": 1.7181, "step": 392 }, { "epoch": 0.36, "learning_rate": 1.9823036674803585e-05, "loss": 1.8185, "step": 394 }, { "epoch": 0.36, "learning_rate": 1.982029070216717e-05, "loss": 1.8424, "step": 396 }, { "epoch": 0.36, "learning_rate": 1.981752378150054e-05, "loss": 1.7818, "step": 398 }, { "epoch": 0.36, "learning_rate": 1.981473591870593e-05, "loss": 1.7919, "step": 400 }, { "epoch": 0.36, "learning_rate": 1.981192711973023e-05, "loss": 1.7922, "step": 402 }, { "epoch": 0.36, "learning_rate": 1.9809097390564996e-05, "loss": 1.7766, "step": 404 }, { "epoch": 0.37, "learning_rate": 1.980624673724643e-05, "loss": 1.812, "step": 406 }, { "epoch": 0.37, "learning_rate": 1.9803375165855366e-05, "loss": 1.8114, "step": 408 }, { "epoch": 0.37, "learning_rate": 1.980048268251726e-05, "loss": 1.7852, "step": 410 }, { "epoch": 0.37, "learning_rate": 1.9797569293402174e-05, "loss": 1.7116, "step": 412 }, { "epoch": 0.37, "learning_rate": 1.9794635004724774e-05, "loss": 1.7899, "step": 414 }, { "epoch": 0.38, "learning_rate": 1.9791679822744296e-05, "loss": 1.8268, "step": 416 }, { "epoch": 0.38, "learning_rate": 1.9788703753764554e-05, "loss": 1.7258, "step": 418 }, { "epoch": 0.38, "learning_rate": 1.9785706804133906e-05, "loss": 1.7856, "step": 420 }, { "epoch": 0.38, "learning_rate": 1.9782688980245263e-05, "loss": 1.7606, "step": 422 }, { "epoch": 0.38, "learning_rate": 1.9779650288536057e-05, "loss": 1.7671, "step": 424 }, { "epoch": 0.38, "learning_rate": 1.9776590735488238e-05, "loss": 1.8111, "step": 426 }, { "epoch": 0.39, "learning_rate": 1.9773510327628248e-05, "loss": 1.8157, "step": 428 }, { "epoch": 0.39, "learning_rate": 1.977040907152702e-05, "loss": 1.8134, "step": 430 }, { "epoch": 0.39, "learning_rate": 1.976728697379996e-05, "loss": 1.7108, "step": 432 }, { "epoch": 0.39, "learning_rate": 1.9764144041106942e-05, "loss": 1.7848, "step": 434 }, { "epoch": 0.39, "learning_rate": 1.976098028015226e-05, "loss": 1.7785, "step": 436 }, { "epoch": 0.39, "learning_rate": 1.9757795697684656e-05, "loss": 1.7781, "step": 438 }, { "epoch": 0.4, "learning_rate": 1.9754590300497283e-05, "loss": 1.8016, "step": 440 }, { "epoch": 0.4, "learning_rate": 1.9751364095427694e-05, "loss": 1.6873, "step": 442 }, { "epoch": 0.4, "learning_rate": 1.9748117089357827e-05, "loss": 1.7731, "step": 444 }, { "epoch": 0.4, "learning_rate": 1.9744849289213994e-05, "loss": 1.7192, "step": 446 }, { "epoch": 0.4, "learning_rate": 1.974156070196686e-05, "loss": 1.7577, "step": 448 }, { "epoch": 0.41, "learning_rate": 1.9738251334631435e-05, "loss": 1.7699, "step": 450 }, { "epoch": 0.41, "learning_rate": 1.9734921194267054e-05, "loss": 1.7899, "step": 452 }, { "epoch": 0.41, "learning_rate": 1.973157028797737e-05, "loss": 1.7815, "step": 454 }, { "epoch": 0.41, "learning_rate": 1.9728198622910313e-05, "loss": 1.7487, "step": 456 }, { "epoch": 0.41, "learning_rate": 1.9724806206258117e-05, "loss": 1.7338, "step": 458 }, { "epoch": 0.41, "learning_rate": 1.9721393045257277e-05, "loss": 1.6726, "step": 460 }, { "epoch": 0.42, "learning_rate": 1.9717959147188527e-05, "loss": 1.766, "step": 462 }, { "epoch": 0.42, "learning_rate": 1.9714504519376846e-05, "loss": 1.8309, "step": 464 }, { "epoch": 0.42, "learning_rate": 1.9711029169191437e-05, "loss": 1.7432, "step": 466 }, { "epoch": 0.42, "learning_rate": 1.9707533104045686e-05, "loss": 1.8108, "step": 468 }, { "epoch": 0.42, "learning_rate": 1.9704016331397192e-05, "loss": 1.7305, "step": 470 }, { "epoch": 0.43, "learning_rate": 1.970047885874771e-05, "loss": 1.8073, "step": 472 }, { "epoch": 0.43, "learning_rate": 1.9696920693643157e-05, "loss": 1.7198, "step": 474 }, { "epoch": 0.43, "learning_rate": 1.9693341843673586e-05, "loss": 1.7378, "step": 476 }, { "epoch": 0.43, "learning_rate": 1.968974231647318e-05, "loss": 1.78, "step": 478 }, { "epoch": 0.43, "learning_rate": 1.968612211972022e-05, "loss": 1.8091, "step": 480 }, { "epoch": 0.43, "learning_rate": 1.968248126113709e-05, "loss": 1.779, "step": 482 }, { "epoch": 0.44, "learning_rate": 1.9678819748490236e-05, "loss": 1.765, "step": 484 }, { "epoch": 0.44, "learning_rate": 1.9675137589590173e-05, "loss": 1.7272, "step": 486 }, { "epoch": 0.44, "learning_rate": 1.967143479229145e-05, "loss": 1.7872, "step": 488 }, { "epoch": 0.44, "learning_rate": 1.9667711364492638e-05, "loss": 1.7774, "step": 490 }, { "epoch": 0.44, "learning_rate": 1.9663967314136324e-05, "loss": 1.7369, "step": 492 }, { "epoch": 0.45, "learning_rate": 1.9660202649209088e-05, "loss": 1.8046, "step": 494 }, { "epoch": 0.45, "learning_rate": 1.965641737774147e-05, "loss": 1.7097, "step": 496 }, { "epoch": 0.45, "learning_rate": 1.9652611507807977e-05, "loss": 1.7573, "step": 498 }, { "epoch": 0.45, "learning_rate": 1.9648785047527052e-05, "loss": 1.7586, "step": 500 }, { "epoch": 0.45, "learning_rate": 1.9644938005061062e-05, "loss": 1.735, "step": 502 }, { "epoch": 0.45, "learning_rate": 1.964107038861628e-05, "loss": 1.7838, "step": 504 }, { "epoch": 0.46, "learning_rate": 1.9637182206442857e-05, "loss": 1.7658, "step": 506 }, { "epoch": 0.46, "learning_rate": 1.9633273466834826e-05, "loss": 1.7907, "step": 508 }, { "epoch": 0.46, "learning_rate": 1.9629344178130062e-05, "loss": 1.738, "step": 510 }, { "epoch": 0.46, "learning_rate": 1.962539434871028e-05, "loss": 1.7868, "step": 512 }, { "epoch": 0.46, "learning_rate": 1.9621423987001013e-05, "loss": 1.8226, "step": 514 }, { "epoch": 0.47, "learning_rate": 1.9617433101471582e-05, "loss": 1.6927, "step": 516 }, { "epoch": 0.47, "learning_rate": 1.9613421700635098e-05, "loss": 1.7609, "step": 518 }, { "epoch": 0.47, "learning_rate": 1.960938979304843e-05, "loss": 1.7614, "step": 520 }, { "epoch": 0.47, "learning_rate": 1.960533738731219e-05, "loss": 1.7703, "step": 522 }, { "epoch": 0.47, "learning_rate": 1.960126449207072e-05, "loss": 1.7205, "step": 524 }, { "epoch": 0.47, "learning_rate": 1.959717111601206e-05, "loss": 1.6713, "step": 526 }, { "epoch": 0.48, "learning_rate": 1.9593057267867945e-05, "loss": 1.7879, "step": 528 }, { "epoch": 0.48, "learning_rate": 1.9588922956413783e-05, "loss": 1.8063, "step": 530 }, { "epoch": 0.48, "learning_rate": 1.9584768190468624e-05, "loss": 1.7567, "step": 532 }, { "epoch": 0.48, "learning_rate": 1.958059297889516e-05, "loss": 1.7511, "step": 534 }, { "epoch": 0.48, "learning_rate": 1.9576397330599686e-05, "loss": 1.7831, "step": 536 }, { "epoch": 0.49, "learning_rate": 1.95721812545321e-05, "loss": 1.7832, "step": 538 }, { "epoch": 0.49, "learning_rate": 1.9567944759685873e-05, "loss": 1.8, "step": 540 }, { "epoch": 0.49, "learning_rate": 1.9563687855098027e-05, "loss": 1.7094, "step": 542 }, { "epoch": 0.49, "learning_rate": 1.9559410549849125e-05, "loss": 1.778, "step": 544 }, { "epoch": 0.49, "learning_rate": 1.9555112853063254e-05, "loss": 1.6998, "step": 546 }, { "epoch": 0.49, "learning_rate": 1.9550794773907983e-05, "loss": 1.6977, "step": 548 }, { "epoch": 0.5, "learning_rate": 1.9546456321594374e-05, "loss": 1.7396, "step": 550 }, { "epoch": 0.5, "learning_rate": 1.9542097505376942e-05, "loss": 1.7353, "step": 552 }, { "epoch": 0.5, "learning_rate": 1.9537718334553637e-05, "loss": 1.7153, "step": 554 }, { "epoch": 0.5, "learning_rate": 1.9533318818465837e-05, "loss": 1.7359, "step": 556 }, { "epoch": 0.5, "learning_rate": 1.9528898966498315e-05, "loss": 1.7319, "step": 558 }, { "epoch": 0.5, "learning_rate": 1.9524458788079218e-05, "loss": 1.8195, "step": 560 }, { "epoch": 0.51, "learning_rate": 1.9519998292680062e-05, "loss": 1.7492, "step": 562 }, { "epoch": 0.51, "learning_rate": 1.9515517489815694e-05, "loss": 1.733, "step": 564 }, { "epoch": 0.51, "learning_rate": 1.9511016389044283e-05, "loss": 1.7876, "step": 566 }, { "epoch": 0.51, "learning_rate": 1.9506494999967298e-05, "loss": 1.6752, "step": 568 }, { "epoch": 0.51, "learning_rate": 1.9501953332229482e-05, "loss": 1.7158, "step": 570 }, { "epoch": 0.52, "learning_rate": 1.9497391395518836e-05, "loss": 1.6684, "step": 572 }, { "epoch": 0.52, "learning_rate": 1.94928091995666e-05, "loss": 1.7932, "step": 574 }, { "epoch": 0.52, "learning_rate": 1.9488206754147223e-05, "loss": 1.7774, "step": 576 }, { "epoch": 0.52, "learning_rate": 1.948358406907836e-05, "loss": 1.7292, "step": 578 }, { "epoch": 0.52, "learning_rate": 1.9478941154220833e-05, "loss": 1.8014, "step": 580 }, { "epoch": 0.52, "learning_rate": 1.9474278019478614e-05, "loss": 1.7008, "step": 582 }, { "epoch": 0.53, "learning_rate": 1.9469594674798814e-05, "loss": 1.7719, "step": 584 }, { "epoch": 0.53, "learning_rate": 1.9464891130171647e-05, "loss": 1.7625, "step": 586 }, { "epoch": 0.53, "learning_rate": 1.946016739563042e-05, "loss": 1.7655, "step": 588 }, { "epoch": 0.53, "learning_rate": 1.945542348125152e-05, "loss": 1.7899, "step": 590 }, { "epoch": 0.53, "learning_rate": 1.9450659397154353e-05, "loss": 1.7145, "step": 592 }, { "epoch": 0.54, "learning_rate": 1.9445875153501375e-05, "loss": 1.783, "step": 594 }, { "epoch": 0.54, "learning_rate": 1.944107076049804e-05, "loss": 1.6651, "step": 596 }, { "epoch": 0.54, "learning_rate": 1.9436246228392762e-05, "loss": 1.7709, "step": 598 }, { "epoch": 0.54, "learning_rate": 1.943140156747694e-05, "loss": 1.7286, "step": 600 }, { "epoch": 0.54, "learning_rate": 1.9426536788084905e-05, "loss": 1.7013, "step": 602 }, { "epoch": 0.54, "learning_rate": 1.94216519005939e-05, "loss": 1.7354, "step": 604 }, { "epoch": 0.55, "learning_rate": 1.9416746915424057e-05, "loss": 1.6642, "step": 606 }, { "epoch": 0.55, "learning_rate": 1.9411821843038387e-05, "loss": 1.7365, "step": 608 }, { "epoch": 0.55, "learning_rate": 1.9406876693942747e-05, "loss": 1.6826, "step": 610 }, { "epoch": 0.55, "learning_rate": 1.9401911478685815e-05, "loss": 1.7372, "step": 612 }, { "epoch": 0.55, "learning_rate": 1.9396926207859085e-05, "loss": 1.7176, "step": 614 }, { "epoch": 0.56, "learning_rate": 1.939192089209682e-05, "loss": 1.7928, "step": 616 }, { "epoch": 0.56, "learning_rate": 1.938689554207605e-05, "loss": 1.719, "step": 618 }, { "epoch": 0.56, "learning_rate": 1.9381850168516535e-05, "loss": 1.8082, "step": 620 }, { "epoch": 0.56, "learning_rate": 1.9376784782180747e-05, "loss": 1.7874, "step": 622 }, { "epoch": 0.56, "learning_rate": 1.937169939387386e-05, "loss": 1.7655, "step": 624 }, { "epoch": 0.56, "learning_rate": 1.93665940144437e-05, "loss": 1.7579, "step": 626 }, { "epoch": 0.57, "learning_rate": 1.9361468654780748e-05, "loss": 1.7333, "step": 628 }, { "epoch": 0.57, "learning_rate": 1.9356323325818094e-05, "loss": 1.7346, "step": 630 }, { "epoch": 0.57, "learning_rate": 1.9351158038531438e-05, "loss": 1.6965, "step": 632 }, { "epoch": 0.57, "learning_rate": 1.9345972803939046e-05, "loss": 1.7706, "step": 634 }, { "epoch": 0.57, "learning_rate": 1.9340767633101735e-05, "loss": 1.7284, "step": 636 }, { "epoch": 0.58, "learning_rate": 1.9335542537122856e-05, "loss": 1.7154, "step": 638 }, { "epoch": 0.58, "learning_rate": 1.9330297527148246e-05, "loss": 1.6853, "step": 640 }, { "epoch": 0.58, "learning_rate": 1.9325032614366244e-05, "loss": 1.6915, "step": 642 }, { "epoch": 0.58, "learning_rate": 1.9319747810007625e-05, "loss": 1.7142, "step": 644 }, { "epoch": 0.58, "learning_rate": 1.9314443125345606e-05, "loss": 1.8119, "step": 646 }, { "epoch": 0.58, "learning_rate": 1.930911857169581e-05, "loss": 1.6678, "step": 648 }, { "epoch": 0.59, "learning_rate": 1.930377416041624e-05, "loss": 1.7466, "step": 650 }, { "epoch": 0.59, "learning_rate": 1.929840990290726e-05, "loss": 1.7159, "step": 652 }, { "epoch": 0.59, "learning_rate": 1.929302581061157e-05, "loss": 1.7208, "step": 654 }, { "epoch": 0.59, "learning_rate": 1.9287621895014178e-05, "loss": 1.7412, "step": 656 }, { "epoch": 0.59, "learning_rate": 1.928219816764238e-05, "loss": 1.7257, "step": 658 }, { "epoch": 0.6, "learning_rate": 1.927675464006573e-05, "loss": 1.6678, "step": 660 }, { "epoch": 0.6, "learning_rate": 1.9271291323896026e-05, "loss": 1.7721, "step": 662 }, { "epoch": 0.6, "learning_rate": 1.9265808230787265e-05, "loss": 1.75, "step": 664 }, { "epoch": 0.6, "learning_rate": 1.9260305372435643e-05, "loss": 1.7501, "step": 666 }, { "epoch": 0.6, "learning_rate": 1.9254782760579515e-05, "loss": 1.7389, "step": 668 }, { "epoch": 0.6, "learning_rate": 1.9249240406999366e-05, "loss": 1.7821, "step": 670 }, { "epoch": 0.61, "learning_rate": 1.9243678323517803e-05, "loss": 1.7417, "step": 672 }, { "epoch": 0.61, "learning_rate": 1.9238096521999514e-05, "loss": 1.7157, "step": 674 }, { "epoch": 0.61, "learning_rate": 1.9232495014351248e-05, "loss": 1.7157, "step": 676 }, { "epoch": 0.61, "learning_rate": 1.922687381252179e-05, "loss": 1.661, "step": 678 }, { "epoch": 0.61, "learning_rate": 1.9221232928501937e-05, "loss": 1.8155, "step": 680 }, { "epoch": 0.61, "learning_rate": 1.921557237432447e-05, "loss": 1.7296, "step": 682 }, { "epoch": 0.62, "learning_rate": 1.920989216206413e-05, "loss": 1.7099, "step": 684 }, { "epoch": 0.62, "learning_rate": 1.920419230383759e-05, "loss": 1.7449, "step": 686 }, { "epoch": 0.62, "learning_rate": 1.919847281180343e-05, "loss": 1.7194, "step": 688 }, { "epoch": 0.62, "learning_rate": 1.919273369816212e-05, "loss": 1.6699, "step": 690 }, { "epoch": 0.62, "learning_rate": 1.9186974975155967e-05, "loss": 1.7437, "step": 692 }, { "epoch": 0.63, "learning_rate": 1.9181196655069126e-05, "loss": 1.7165, "step": 694 }, { "epoch": 0.63, "learning_rate": 1.917539875022755e-05, "loss": 1.7493, "step": 696 }, { "epoch": 0.63, "learning_rate": 1.9169581272998964e-05, "loss": 1.6878, "step": 698 }, { "epoch": 0.63, "learning_rate": 1.9163744235792845e-05, "loss": 1.7825, "step": 700 }, { "epoch": 0.63, "learning_rate": 1.9157887651060402e-05, "loss": 1.6875, "step": 702 }, { "epoch": 0.63, "learning_rate": 1.9152011531294524e-05, "loss": 1.6695, "step": 704 }, { "epoch": 0.64, "learning_rate": 1.9146115889029793e-05, "loss": 1.743, "step": 706 }, { "epoch": 0.64, "learning_rate": 1.914020073684242e-05, "loss": 1.7069, "step": 708 }, { "epoch": 0.64, "learning_rate": 1.9134266087350243e-05, "loss": 1.7586, "step": 710 }, { "epoch": 0.64, "learning_rate": 1.912831195321268e-05, "loss": 1.7369, "step": 712 }, { "epoch": 0.64, "learning_rate": 1.912233834713072e-05, "loss": 1.7295, "step": 714 }, { "epoch": 0.65, "learning_rate": 1.911634528184688e-05, "loss": 1.6867, "step": 716 }, { "epoch": 0.65, "learning_rate": 1.9110332770145198e-05, "loss": 1.7016, "step": 718 }, { "epoch": 0.65, "learning_rate": 1.910430082485119e-05, "loss": 1.6782, "step": 720 }, { "epoch": 0.65, "learning_rate": 1.9098249458831822e-05, "loss": 1.683, "step": 722 }, { "epoch": 0.65, "learning_rate": 1.9092178684995487e-05, "loss": 1.6822, "step": 724 }, { "epoch": 0.65, "learning_rate": 1.9086088516291986e-05, "loss": 1.6561, "step": 726 }, { "epoch": 0.66, "learning_rate": 1.907997896571248e-05, "loss": 1.662, "step": 728 }, { "epoch": 0.66, "learning_rate": 1.9073850046289484e-05, "loss": 1.7087, "step": 730 }, { "epoch": 0.66, "learning_rate": 1.9067701771096826e-05, "loss": 1.7309, "step": 732 }, { "epoch": 0.66, "learning_rate": 1.9061534153249622e-05, "loss": 1.7506, "step": 734 }, { "epoch": 0.66, "learning_rate": 1.9055347205904245e-05, "loss": 1.7252, "step": 736 }, { "epoch": 0.67, "learning_rate": 1.9049140942258312e-05, "loss": 1.619, "step": 738 }, { "epoch": 0.67, "learning_rate": 1.904291537555063e-05, "loss": 1.7701, "step": 740 }, { "epoch": 0.67, "learning_rate": 1.903667051906119e-05, "loss": 1.6819, "step": 742 }, { "epoch": 0.67, "learning_rate": 1.903040638611113e-05, "loss": 1.6712, "step": 744 }, { "epoch": 0.67, "learning_rate": 1.9024122990062707e-05, "loss": 1.7043, "step": 746 }, { "epoch": 0.67, "learning_rate": 1.901782034431927e-05, "loss": 1.755, "step": 748 }, { "epoch": 0.68, "learning_rate": 1.9011498462325226e-05, "loss": 1.6906, "step": 750 }, { "epoch": 0.68, "learning_rate": 1.9005157357566022e-05, "loss": 1.7031, "step": 752 }, { "epoch": 0.68, "learning_rate": 1.8998797043568102e-05, "loss": 1.695, "step": 754 }, { "epoch": 0.68, "learning_rate": 1.8992417533898898e-05, "loss": 1.641, "step": 756 }, { "epoch": 0.68, "learning_rate": 1.898601884216677e-05, "loss": 1.6758, "step": 758 }, { "epoch": 0.69, "learning_rate": 1.8979600982021014e-05, "loss": 1.7431, "step": 760 }, { "epoch": 0.69, "learning_rate": 1.897316396715181e-05, "loss": 1.6965, "step": 762 }, { "epoch": 0.69, "learning_rate": 1.8966707811290188e-05, "loss": 1.7556, "step": 764 }, { "epoch": 0.69, "learning_rate": 1.896023252820802e-05, "loss": 1.67, "step": 766 }, { "epoch": 0.69, "learning_rate": 1.895373813171798e-05, "loss": 1.7312, "step": 768 }, { "epoch": 0.69, "learning_rate": 1.8947224635673503e-05, "loss": 1.6034, "step": 770 }, { "epoch": 0.7, "learning_rate": 1.8940692053968773e-05, "loss": 1.7197, "step": 772 }, { "epoch": 0.7, "learning_rate": 1.8934140400538687e-05, "loss": 1.6933, "step": 774 }, { "epoch": 0.7, "learning_rate": 1.8927569689358818e-05, "loss": 1.7197, "step": 776 }, { "epoch": 0.7, "learning_rate": 1.89209799344454e-05, "loss": 1.6594, "step": 778 }, { "epoch": 0.7, "learning_rate": 1.891437114985528e-05, "loss": 1.7776, "step": 780 }, { "epoch": 0.71, "learning_rate": 1.8907743349685917e-05, "loss": 1.657, "step": 782 }, { "epoch": 0.71, "learning_rate": 1.8901096548075305e-05, "loss": 1.6959, "step": 784 }, { "epoch": 0.71, "learning_rate": 1.8894430759201996e-05, "loss": 1.6839, "step": 786 }, { "epoch": 0.71, "learning_rate": 1.888774599728503e-05, "loss": 1.7109, "step": 788 }, { "epoch": 0.71, "learning_rate": 1.8881042276583924e-05, "loss": 1.6902, "step": 790 }, { "epoch": 0.71, "learning_rate": 1.8874319611398636e-05, "loss": 1.6338, "step": 792 }, { "epoch": 0.72, "learning_rate": 1.8867578016069535e-05, "loss": 1.6391, "step": 794 }, { "epoch": 0.72, "learning_rate": 1.8860817504977374e-05, "loss": 1.6426, "step": 796 }, { "epoch": 0.72, "learning_rate": 1.885403809254325e-05, "loss": 1.6807, "step": 798 }, { "epoch": 0.72, "learning_rate": 1.8847239793228572e-05, "loss": 1.6501, "step": 800 }, { "epoch": 0.72, "learning_rate": 1.8840422621535067e-05, "loss": 1.6706, "step": 802 }, { "epoch": 0.72, "learning_rate": 1.883358659200468e-05, "loss": 1.7352, "step": 804 }, { "epoch": 0.73, "learning_rate": 1.8826731719219615e-05, "loss": 1.6896, "step": 806 }, { "epoch": 0.73, "learning_rate": 1.881985801780225e-05, "loss": 1.743, "step": 808 }, { "epoch": 0.73, "learning_rate": 1.8812965502415145e-05, "loss": 1.7311, "step": 810 }, { "epoch": 0.73, "learning_rate": 1.8806054187760974e-05, "loss": 1.6658, "step": 812 }, { "epoch": 0.73, "learning_rate": 1.8799124088582523e-05, "loss": 1.6806, "step": 814 }, { "epoch": 0.74, "learning_rate": 1.879217521966265e-05, "loss": 1.6803, "step": 816 }, { "epoch": 0.74, "learning_rate": 1.8785207595824243e-05, "loss": 1.7223, "step": 818 }, { "epoch": 0.74, "learning_rate": 1.8778221231930204e-05, "loss": 1.6689, "step": 820 }, { "epoch": 0.74, "learning_rate": 1.8771216142883407e-05, "loss": 1.6601, "step": 822 }, { "epoch": 0.74, "learning_rate": 1.876419234362667e-05, "loss": 1.6667, "step": 824 }, { "epoch": 0.74, "learning_rate": 1.8757149849142724e-05, "loss": 1.7026, "step": 826 }, { "epoch": 0.75, "learning_rate": 1.8750088674454178e-05, "loss": 1.7311, "step": 828 }, { "epoch": 0.75, "learning_rate": 1.8743008834623486e-05, "loss": 1.7196, "step": 830 }, { "epoch": 0.75, "learning_rate": 1.8735910344752925e-05, "loss": 1.7032, "step": 832 }, { "epoch": 0.75, "learning_rate": 1.872879321998454e-05, "loss": 1.7119, "step": 834 }, { "epoch": 0.75, "learning_rate": 1.8721657475500146e-05, "loss": 1.7413, "step": 836 }, { "epoch": 0.76, "learning_rate": 1.871450312652126e-05, "loss": 1.6728, "step": 838 }, { "epoch": 0.76, "learning_rate": 1.87073301883091e-05, "loss": 1.7625, "step": 840 }, { "epoch": 0.76, "learning_rate": 1.8700138676164523e-05, "loss": 1.7422, "step": 842 }, { "epoch": 0.76, "learning_rate": 1.8692928605428016e-05, "loss": 1.6934, "step": 844 }, { "epoch": 0.76, "learning_rate": 1.8685699991479648e-05, "loss": 1.6893, "step": 846 }, { "epoch": 0.76, "learning_rate": 1.867845284973905e-05, "loss": 1.6688, "step": 848 }, { "epoch": 0.77, "learning_rate": 1.8671187195665373e-05, "loss": 1.6917, "step": 850 }, { "epoch": 0.77, "learning_rate": 1.866390304475725e-05, "loss": 1.7303, "step": 852 }, { "epoch": 0.77, "learning_rate": 1.865660041255278e-05, "loss": 1.7043, "step": 854 }, { "epoch": 0.77, "learning_rate": 1.8649279314629484e-05, "loss": 1.6968, "step": 856 }, { "epoch": 0.77, "learning_rate": 1.8641939766604264e-05, "loss": 1.7125, "step": 858 }, { "epoch": 0.78, "learning_rate": 1.863458178413339e-05, "loss": 1.7805, "step": 860 }, { "epoch": 0.78, "learning_rate": 1.862720538291245e-05, "loss": 1.7479, "step": 862 }, { "epoch": 0.78, "learning_rate": 1.861981057867632e-05, "loss": 1.6536, "step": 864 }, { "epoch": 0.78, "learning_rate": 1.8612397387199133e-05, "loss": 1.6844, "step": 866 }, { "epoch": 0.78, "learning_rate": 1.8604965824294253e-05, "loss": 1.7561, "step": 868 }, { "epoch": 0.78, "learning_rate": 1.8597515905814215e-05, "loss": 1.6265, "step": 870 }, { "epoch": 0.79, "learning_rate": 1.8590047647650724e-05, "loss": 1.6159, "step": 872 }, { "epoch": 0.79, "learning_rate": 1.8582561065734602e-05, "loss": 1.7576, "step": 874 }, { "epoch": 0.79, "learning_rate": 1.8575056176035758e-05, "loss": 1.7615, "step": 876 }, { "epoch": 0.79, "learning_rate": 1.8567532994563155e-05, "loss": 1.7376, "step": 878 }, { "epoch": 0.79, "learning_rate": 1.8559991537364767e-05, "loss": 1.7, "step": 880 }, { "epoch": 0.8, "learning_rate": 1.855243182052757e-05, "loss": 1.8079, "step": 882 }, { "epoch": 0.8, "learning_rate": 1.8544853860177466e-05, "loss": 1.7259, "step": 884 }, { "epoch": 0.8, "learning_rate": 1.8537257672479293e-05, "loss": 1.6671, "step": 886 }, { "epoch": 0.8, "learning_rate": 1.852964327363677e-05, "loss": 1.6526, "step": 888 }, { "epoch": 0.8, "learning_rate": 1.8522010679892443e-05, "loss": 1.7153, "step": 890 }, { "epoch": 0.8, "learning_rate": 1.8514359907527693e-05, "loss": 1.6349, "step": 892 }, { "epoch": 0.81, "learning_rate": 1.850669097286267e-05, "loss": 1.6579, "step": 894 }, { "epoch": 0.81, "learning_rate": 1.849900389225626e-05, "loss": 1.6429, "step": 896 }, { "epoch": 0.81, "learning_rate": 1.8491298682106066e-05, "loss": 1.7093, "step": 898 }, { "epoch": 0.81, "learning_rate": 1.8483575358848363e-05, "loss": 1.7222, "step": 900 }, { "epoch": 0.81, "learning_rate": 1.847583393895806e-05, "loss": 1.6711, "step": 902 }, { "epoch": 0.82, "learning_rate": 1.8468074438948664e-05, "loss": 1.6748, "step": 904 }, { "epoch": 0.82, "learning_rate": 1.8460296875372267e-05, "loss": 1.6802, "step": 906 }, { "epoch": 0.82, "learning_rate": 1.845250126481947e-05, "loss": 1.7504, "step": 908 }, { "epoch": 0.82, "learning_rate": 1.8444687623919388e-05, "loss": 1.685, "step": 910 }, { "epoch": 0.82, "learning_rate": 1.8436855969339592e-05, "loss": 1.6664, "step": 912 }, { "epoch": 0.82, "learning_rate": 1.8429006317786073e-05, "loss": 1.6405, "step": 914 }, { "epoch": 0.83, "learning_rate": 1.842113868600322e-05, "loss": 1.6919, "step": 916 }, { "epoch": 0.83, "learning_rate": 1.8413253090773773e-05, "loss": 1.7471, "step": 918 }, { "epoch": 0.83, "learning_rate": 1.8405349548918792e-05, "loss": 1.6325, "step": 920 }, { "epoch": 0.83, "learning_rate": 1.8397428077297622e-05, "loss": 1.6538, "step": 922 }, { "epoch": 0.83, "learning_rate": 1.838948869280784e-05, "loss": 1.7362, "step": 924 }, { "epoch": 0.83, "learning_rate": 1.8381531412385253e-05, "loss": 1.6641, "step": 926 }, { "epoch": 0.84, "learning_rate": 1.837355625300383e-05, "loss": 1.6573, "step": 928 }, { "epoch": 0.84, "learning_rate": 1.8365563231675693e-05, "loss": 1.748, "step": 930 }, { "epoch": 0.84, "learning_rate": 1.8357552365451038e-05, "loss": 1.6665, "step": 932 }, { "epoch": 0.84, "learning_rate": 1.834952367141816e-05, "loss": 1.6976, "step": 934 }, { "epoch": 0.84, "learning_rate": 1.8341477166703356e-05, "loss": 1.6153, "step": 936 }, { "epoch": 0.85, "learning_rate": 1.8333412868470934e-05, "loss": 1.6839, "step": 938 }, { "epoch": 0.85, "learning_rate": 1.8325330793923146e-05, "loss": 1.7304, "step": 940 }, { "epoch": 0.85, "learning_rate": 1.831723096030017e-05, "loss": 1.6834, "step": 942 }, { "epoch": 0.85, "learning_rate": 1.8309113384880065e-05, "loss": 1.7582, "step": 944 }, { "epoch": 0.85, "learning_rate": 1.8300978084978736e-05, "loss": 1.6958, "step": 946 }, { "epoch": 0.85, "learning_rate": 1.8292825077949895e-05, "loss": 1.6799, "step": 948 }, { "epoch": 0.86, "learning_rate": 1.828465438118503e-05, "loss": 1.6473, "step": 950 }, { "epoch": 0.86, "learning_rate": 1.8276466012113358e-05, "loss": 1.6568, "step": 952 }, { "epoch": 0.86, "learning_rate": 1.826825998820179e-05, "loss": 1.635, "step": 954 }, { "epoch": 0.86, "learning_rate": 1.8260036326954914e-05, "loss": 1.7147, "step": 956 }, { "epoch": 0.86, "learning_rate": 1.8251795045914922e-05, "loss": 1.6833, "step": 958 }, { "epoch": 0.87, "learning_rate": 1.824353616266161e-05, "loss": 1.7482, "step": 960 }, { "epoch": 0.87, "learning_rate": 1.8235259694812296e-05, "loss": 1.7112, "step": 962 }, { "epoch": 0.87, "learning_rate": 1.8226965660021836e-05, "loss": 1.6922, "step": 964 }, { "epoch": 0.87, "learning_rate": 1.821865407598254e-05, "loss": 1.7317, "step": 966 }, { "epoch": 0.87, "learning_rate": 1.8210324960424163e-05, "loss": 1.645, "step": 968 }, { "epoch": 0.87, "learning_rate": 1.8201978331113855e-05, "loss": 1.6681, "step": 970 }, { "epoch": 0.88, "learning_rate": 1.8193614205856118e-05, "loss": 1.7141, "step": 972 }, { "epoch": 0.88, "learning_rate": 1.8185232602492785e-05, "loss": 1.6721, "step": 974 }, { "epoch": 0.88, "learning_rate": 1.817683353890297e-05, "loss": 1.6618, "step": 976 }, { "epoch": 0.88, "learning_rate": 1.8168417033003026e-05, "loss": 1.6555, "step": 978 }, { "epoch": 0.88, "learning_rate": 1.8159983102746522e-05, "loss": 1.6415, "step": 980 }, { "epoch": 0.89, "learning_rate": 1.8151531766124186e-05, "loss": 1.7108, "step": 982 }, { "epoch": 0.89, "learning_rate": 1.8143063041163885e-05, "loss": 1.6733, "step": 984 }, { "epoch": 0.89, "learning_rate": 1.813457694593057e-05, "loss": 1.6924, "step": 986 }, { "epoch": 0.89, "learning_rate": 1.8126073498526254e-05, "loss": 1.7121, "step": 988 }, { "epoch": 0.89, "learning_rate": 1.811755271708995e-05, "loss": 1.6959, "step": 990 }, { "epoch": 0.89, "learning_rate": 1.8109014619797667e-05, "loss": 1.6739, "step": 992 }, { "epoch": 0.9, "learning_rate": 1.8100459224862336e-05, "loss": 1.7133, "step": 994 }, { "epoch": 0.9, "learning_rate": 1.809188655053379e-05, "loss": 1.6585, "step": 996 }, { "epoch": 0.9, "learning_rate": 1.8083296615098724e-05, "loss": 1.6682, "step": 998 }, { "epoch": 0.9, "learning_rate": 1.8074689436880643e-05, "loss": 1.6857, "step": 1000 }, { "epoch": 0.9, "learning_rate": 1.8066065034239854e-05, "loss": 1.711, "step": 1002 }, { "epoch": 0.91, "learning_rate": 1.8057423425573385e-05, "loss": 1.7064, "step": 1004 }, { "epoch": 0.91, "learning_rate": 1.804876462931498e-05, "loss": 1.6845, "step": 1006 }, { "epoch": 0.91, "learning_rate": 1.8040088663935037e-05, "loss": 1.6651, "step": 1008 }, { "epoch": 0.91, "learning_rate": 1.803139554794059e-05, "loss": 1.6555, "step": 1010 }, { "epoch": 0.91, "learning_rate": 1.8022685299875245e-05, "loss": 1.7049, "step": 1012 }, { "epoch": 0.91, "learning_rate": 1.8013957938319158e-05, "loss": 1.6926, "step": 1014 }, { "epoch": 0.92, "learning_rate": 1.8005213481888994e-05, "loss": 1.6573, "step": 1016 }, { "epoch": 0.92, "learning_rate": 1.799645194923788e-05, "loss": 1.6412, "step": 1018 }, { "epoch": 0.92, "learning_rate": 1.7987673359055372e-05, "loss": 1.6951, "step": 1020 }, { "epoch": 0.92, "learning_rate": 1.7978877730067403e-05, "loss": 1.6705, "step": 1022 }, { "epoch": 0.92, "learning_rate": 1.7970065081036266e-05, "loss": 1.6474, "step": 1024 }, { "epoch": 0.93, "learning_rate": 1.796123543076055e-05, "loss": 1.6242, "step": 1026 }, { "epoch": 0.93, "learning_rate": 1.7952388798075112e-05, "loss": 1.6494, "step": 1028 }, { "epoch": 0.93, "learning_rate": 1.7943525201851038e-05, "loss": 1.6096, "step": 1030 }, { "epoch": 0.93, "learning_rate": 1.79346446609956e-05, "loss": 1.6974, "step": 1032 }, { "epoch": 0.93, "learning_rate": 1.7925747194452204e-05, "loss": 1.5565, "step": 1034 }, { "epoch": 0.93, "learning_rate": 1.7916832821200375e-05, "loss": 1.6754, "step": 1036 }, { "epoch": 0.94, "learning_rate": 1.7907901560255697e-05, "loss": 1.6594, "step": 1038 }, { "epoch": 0.94, "learning_rate": 1.789895343066978e-05, "loss": 1.7223, "step": 1040 }, { "epoch": 0.94, "learning_rate": 1.7889988451530208e-05, "loss": 1.663, "step": 1042 }, { "epoch": 0.94, "learning_rate": 1.788100664196052e-05, "loss": 1.7279, "step": 1044 }, { "epoch": 0.94, "learning_rate": 1.7872008021120146e-05, "loss": 1.6634, "step": 1046 }, { "epoch": 0.94, "learning_rate": 1.7862992608204384e-05, "loss": 1.7776, "step": 1048 }, { "epoch": 0.95, "learning_rate": 1.7853960422444346e-05, "loss": 1.6886, "step": 1050 }, { "epoch": 0.95, "learning_rate": 1.784491148310693e-05, "loss": 1.6632, "step": 1052 }, { "epoch": 0.95, "learning_rate": 1.783584580949477e-05, "loss": 1.6862, "step": 1054 }, { "epoch": 0.95, "learning_rate": 1.782676342094619e-05, "loss": 1.6353, "step": 1056 }, { "epoch": 0.95, "learning_rate": 1.781766433683517e-05, "loss": 1.6897, "step": 1058 }, { "epoch": 0.96, "learning_rate": 1.7808548576571314e-05, "loss": 1.6054, "step": 1060 }, { "epoch": 0.96, "learning_rate": 1.7799416159599786e-05, "loss": 1.7094, "step": 1062 }, { "epoch": 0.96, "learning_rate": 1.7790267105401295e-05, "loss": 1.6769, "step": 1064 }, { "epoch": 0.96, "learning_rate": 1.7781101433492026e-05, "loss": 1.6944, "step": 1066 }, { "epoch": 0.96, "learning_rate": 1.7771919163423617e-05, "loss": 1.7149, "step": 1068 }, { "epoch": 0.96, "learning_rate": 1.776272031478311e-05, "loss": 1.6351, "step": 1070 }, { "epoch": 0.97, "learning_rate": 1.7753504907192923e-05, "loss": 1.648, "step": 1072 }, { "epoch": 0.97, "learning_rate": 1.7744272960310782e-05, "loss": 1.7506, "step": 1074 }, { "epoch": 0.97, "learning_rate": 1.7735024493829696e-05, "loss": 1.6794, "step": 1076 }, { "epoch": 0.97, "learning_rate": 1.7725759527477923e-05, "loss": 1.6343, "step": 1078 }, { "epoch": 0.97, "learning_rate": 1.7716478081018897e-05, "loss": 1.7295, "step": 1080 }, { "epoch": 0.98, "learning_rate": 1.7707180174251232e-05, "loss": 1.6723, "step": 1082 }, { "epoch": 0.98, "learning_rate": 1.769786582700864e-05, "loss": 1.7363, "step": 1084 }, { "epoch": 0.98, "learning_rate": 1.7688535059159896e-05, "loss": 1.7436, "step": 1086 }, { "epoch": 0.98, "learning_rate": 1.767918789060882e-05, "loss": 1.6605, "step": 1088 }, { "epoch": 0.98, "learning_rate": 1.7669824341294203e-05, "loss": 1.7059, "step": 1090 }, { "epoch": 0.98, "learning_rate": 1.766044443118978e-05, "loss": 1.607, "step": 1092 }, { "epoch": 0.99, "learning_rate": 1.7651048180304196e-05, "loss": 1.5689, "step": 1094 }, { "epoch": 0.99, "learning_rate": 1.7641635608680942e-05, "loss": 1.7319, "step": 1096 }, { "epoch": 0.99, "learning_rate": 1.763220673639833e-05, "loss": 1.6465, "step": 1098 }, { "epoch": 0.99, "learning_rate": 1.7622761583569438e-05, "loss": 1.5836, "step": 1100 }, { "epoch": 0.99, "learning_rate": 1.7613300170342073e-05, "loss": 1.6483, "step": 1102 }, { "epoch": 1.0, "learning_rate": 1.7603822516898735e-05, "loss": 1.6409, "step": 1104 }, { "epoch": 1.0, "learning_rate": 1.759432864345656e-05, "loss": 1.724, "step": 1106 }, { "epoch": 1.0, "learning_rate": 1.7584818570267287e-05, "loss": 1.6458, "step": 1108 }, { "epoch": 1.0, "learning_rate": 1.757529231761721e-05, "loss": 1.6573, "step": 1110 }, { "epoch": 1.0, "learning_rate": 1.756574990582713e-05, "loss": 1.7138, "step": 1112 }, { "epoch": 1.0, "learning_rate": 1.755619135525233e-05, "loss": 1.5559, "step": 1114 }, { "epoch": 1.01, "learning_rate": 1.754661668628251e-05, "loss": 1.7281, "step": 1116 }, { "epoch": 1.01, "learning_rate": 1.753702591934175e-05, "loss": 1.6373, "step": 1118 }, { "epoch": 1.01, "learning_rate": 1.7527419074888483e-05, "loss": 1.6487, "step": 1120 }, { "epoch": 1.01, "learning_rate": 1.7517796173415423e-05, "loss": 1.6042, "step": 1122 }, { "epoch": 1.01, "learning_rate": 1.7508157235449547e-05, "loss": 1.6037, "step": 1124 }, { "epoch": 1.02, "learning_rate": 1.749850228155203e-05, "loss": 1.5873, "step": 1126 }, { "epoch": 1.02, "learning_rate": 1.7488831332318224e-05, "loss": 1.5981, "step": 1128 }, { "epoch": 1.02, "learning_rate": 1.7479144408377583e-05, "loss": 1.5995, "step": 1130 }, { "epoch": 1.02, "learning_rate": 1.7469441530393652e-05, "loss": 1.6619, "step": 1132 }, { "epoch": 1.02, "learning_rate": 1.7459722719064004e-05, "loss": 1.6442, "step": 1134 }, { "epoch": 1.02, "learning_rate": 1.74499879951202e-05, "loss": 1.6343, "step": 1136 }, { "epoch": 1.03, "learning_rate": 1.7440237379327745e-05, "loss": 1.6415, "step": 1138 }, { "epoch": 1.03, "learning_rate": 1.743047089248604e-05, "loss": 1.6876, "step": 1140 }, { "epoch": 1.03, "learning_rate": 1.7420688555428348e-05, "loss": 1.6451, "step": 1142 }, { "epoch": 1.03, "learning_rate": 1.7410890389021737e-05, "loss": 1.6764, "step": 1144 }, { "epoch": 1.03, "learning_rate": 1.7401076414167045e-05, "loss": 1.6495, "step": 1146 }, { "epoch": 1.04, "learning_rate": 1.7391246651798822e-05, "loss": 1.6299, "step": 1148 }, { "epoch": 1.04, "learning_rate": 1.7381401122885316e-05, "loss": 1.6415, "step": 1150 }, { "epoch": 1.04, "learning_rate": 1.737153984842838e-05, "loss": 1.6169, "step": 1152 }, { "epoch": 1.04, "learning_rate": 1.7361662849463477e-05, "loss": 1.5897, "step": 1154 }, { "epoch": 1.04, "learning_rate": 1.7351770147059604e-05, "loss": 1.6446, "step": 1156 }, { "epoch": 1.04, "learning_rate": 1.734186176231925e-05, "loss": 1.6889, "step": 1158 }, { "epoch": 1.05, "learning_rate": 1.7331937716378363e-05, "loss": 1.6264, "step": 1160 }, { "epoch": 1.05, "learning_rate": 1.7321998030406303e-05, "loss": 1.6497, "step": 1162 }, { "epoch": 1.05, "learning_rate": 1.7312042725605778e-05, "loss": 1.6199, "step": 1164 }, { "epoch": 1.05, "learning_rate": 1.7302071823212828e-05, "loss": 1.6627, "step": 1166 }, { "epoch": 1.05, "learning_rate": 1.729208534449676e-05, "loss": 1.6577, "step": 1168 }, { "epoch": 1.06, "learning_rate": 1.7282083310760097e-05, "loss": 1.6438, "step": 1170 }, { "epoch": 1.06, "learning_rate": 1.7272065743338565e-05, "loss": 1.6896, "step": 1172 }, { "epoch": 1.06, "learning_rate": 1.7262032663601003e-05, "loss": 1.5625, "step": 1174 }, { "epoch": 1.06, "learning_rate": 1.725198409294935e-05, "loss": 1.6492, "step": 1176 }, { "epoch": 1.06, "learning_rate": 1.7241920052818592e-05, "loss": 1.6704, "step": 1178 }, { "epoch": 1.06, "learning_rate": 1.723184056467671e-05, "loss": 1.6208, "step": 1180 }, { "epoch": 1.07, "learning_rate": 1.722174565002464e-05, "loss": 1.6938, "step": 1182 }, { "epoch": 1.07, "learning_rate": 1.721163533039621e-05, "loss": 1.6304, "step": 1184 }, { "epoch": 1.07, "learning_rate": 1.7201509627358143e-05, "loss": 1.6077, "step": 1186 }, { "epoch": 1.07, "learning_rate": 1.719644101380906e-05, "loss": 1.6241, "step": 1188 }, { "epoch": 1.07, "learning_rate": 1.718629227616585e-05, "loss": 1.5494, "step": 1190 }, { "epoch": 1.07, "learning_rate": 1.7176128209173214e-05, "loss": 1.6192, "step": 1192 }, { "epoch": 1.08, "learning_rate": 1.7165948834512498e-05, "loss": 1.5993, "step": 1194 }, { "epoch": 1.08, "learning_rate": 1.7155754173897718e-05, "loss": 1.5976, "step": 1196 }, { "epoch": 1.08, "learning_rate": 1.7145544249075473e-05, "loss": 1.627, "step": 1198 }, { "epoch": 1.08, "learning_rate": 1.7135319081824945e-05, "loss": 1.6417, "step": 1200 }, { "epoch": 1.08, "learning_rate": 1.7125078693957817e-05, "loss": 1.577, "step": 1202 }, { "epoch": 1.09, "learning_rate": 1.7114823107318245e-05, "loss": 1.6091, "step": 1204 }, { "epoch": 1.09, "learning_rate": 1.7104552343782814e-05, "loss": 1.6435, "step": 1206 }, { "epoch": 1.09, "learning_rate": 1.709426642526046e-05, "loss": 1.5965, "step": 1208 }, { "epoch": 1.09, "learning_rate": 1.7083965373692476e-05, "loss": 1.6498, "step": 1210 }, { "epoch": 1.09, "learning_rate": 1.707364921105241e-05, "loss": 1.6697, "step": 1212 }, { "epoch": 1.09, "learning_rate": 1.706331795934606e-05, "loss": 1.6733, "step": 1214 }, { "epoch": 1.1, "learning_rate": 1.7052971640611406e-05, "loss": 1.6135, "step": 1216 }, { "epoch": 1.1, "learning_rate": 1.7042610276918566e-05, "loss": 1.624, "step": 1218 }, { "epoch": 1.1, "learning_rate": 1.7032233890369755e-05, "loss": 1.6201, "step": 1220 }, { "epoch": 1.1, "learning_rate": 1.7021842503099236e-05, "loss": 1.6666, "step": 1222 }, { "epoch": 1.1, "learning_rate": 1.701143613727326e-05, "loss": 1.6264, "step": 1224 }, { "epoch": 1.11, "learning_rate": 1.700101481509004e-05, "loss": 1.7085, "step": 1226 }, { "epoch": 1.11, "learning_rate": 1.699057855877969e-05, "loss": 1.5828, "step": 1228 }, { "epoch": 1.11, "learning_rate": 1.6980127390604176e-05, "loss": 1.563, "step": 1230 }, { "epoch": 1.11, "learning_rate": 1.6969661332857278e-05, "loss": 1.5967, "step": 1232 }, { "epoch": 1.11, "learning_rate": 1.6959180407864535e-05, "loss": 1.6464, "step": 1234 }, { "epoch": 1.11, "learning_rate": 1.6948684637983202e-05, "loss": 1.6169, "step": 1236 }, { "epoch": 1.12, "learning_rate": 1.6938174045602203e-05, "loss": 1.6654, "step": 1238 }, { "epoch": 1.12, "learning_rate": 1.692764865314207e-05, "loss": 1.6086, "step": 1240 }, { "epoch": 1.12, "learning_rate": 1.6917108483054917e-05, "loss": 1.626, "step": 1242 }, { "epoch": 1.12, "learning_rate": 1.6906553557824372e-05, "loss": 1.656, "step": 1244 }, { "epoch": 1.12, "learning_rate": 1.6895983899965546e-05, "loss": 1.5495, "step": 1246 }, { "epoch": 1.13, "learning_rate": 1.6885399532024965e-05, "loss": 1.7189, "step": 1248 }, { "epoch": 1.13, "learning_rate": 1.6874800476580553e-05, "loss": 1.6218, "step": 1250 }, { "epoch": 1.13, "learning_rate": 1.6864186756241544e-05, "loss": 1.6006, "step": 1252 }, { "epoch": 1.13, "learning_rate": 1.685355839364846e-05, "loss": 1.6826, "step": 1254 }, { "epoch": 1.13, "learning_rate": 1.684291541147307e-05, "loss": 1.6151, "step": 1256 }, { "epoch": 1.13, "learning_rate": 1.683225783241831e-05, "loss": 1.5954, "step": 1258 }, { "epoch": 1.14, "learning_rate": 1.682158567921826e-05, "loss": 1.6481, "step": 1260 }, { "epoch": 1.14, "learning_rate": 1.6810898974638098e-05, "loss": 1.5727, "step": 1262 }, { "epoch": 1.14, "learning_rate": 1.6800197741474028e-05, "loss": 1.6376, "step": 1264 }, { "epoch": 1.14, "learning_rate": 1.6789482002553252e-05, "loss": 1.6167, "step": 1266 }, { "epoch": 1.14, "learning_rate": 1.677875178073392e-05, "loss": 1.6303, "step": 1268 }, { "epoch": 1.15, "learning_rate": 1.6768007098905058e-05, "loss": 1.5653, "step": 1270 }, { "epoch": 1.15, "learning_rate": 1.6757247979986564e-05, "loss": 1.6156, "step": 1272 }, { "epoch": 1.15, "learning_rate": 1.674647444692911e-05, "loss": 1.576, "step": 1274 }, { "epoch": 1.15, "learning_rate": 1.6735686522714126e-05, "loss": 1.5843, "step": 1276 }, { "epoch": 1.15, "learning_rate": 1.6724884230353737e-05, "loss": 1.6331, "step": 1278 }, { "epoch": 1.15, "learning_rate": 1.6714067592890713e-05, "loss": 1.5994, "step": 1280 }, { "epoch": 1.16, "learning_rate": 1.6703236633398436e-05, "loss": 1.5567, "step": 1282 }, { "epoch": 1.16, "learning_rate": 1.6692391374980827e-05, "loss": 1.5959, "step": 1284 }, { "epoch": 1.16, "learning_rate": 1.6681531840772314e-05, "loss": 1.6335, "step": 1286 }, { "epoch": 1.16, "learning_rate": 1.667065805393778e-05, "loss": 1.7096, "step": 1288 }, { "epoch": 1.16, "learning_rate": 1.66597700376725e-05, "loss": 1.5952, "step": 1290 }, { "epoch": 1.17, "learning_rate": 1.664886781520212e-05, "loss": 1.6733, "step": 1292 }, { "epoch": 1.17, "learning_rate": 1.6637951409782568e-05, "loss": 1.6305, "step": 1294 }, { "epoch": 1.17, "learning_rate": 1.662702084470005e-05, "loss": 1.6955, "step": 1296 }, { "epoch": 1.17, "learning_rate": 1.661607614327095e-05, "loss": 1.5969, "step": 1298 }, { "epoch": 1.17, "learning_rate": 1.660511732884183e-05, "loss": 1.6004, "step": 1300 }, { "epoch": 1.17, "learning_rate": 1.659414442478935e-05, "loss": 1.5801, "step": 1302 }, { "epoch": 1.18, "learning_rate": 1.6583157454520214e-05, "loss": 1.6222, "step": 1304 }, { "epoch": 1.18, "learning_rate": 1.6572156441471146e-05, "loss": 1.566, "step": 1306 }, { "epoch": 1.18, "learning_rate": 1.6561141409108825e-05, "loss": 1.6098, "step": 1308 }, { "epoch": 1.18, "learning_rate": 1.6550112380929814e-05, "loss": 1.5743, "step": 1310 }, { "epoch": 1.18, "learning_rate": 1.6539069380460556e-05, "loss": 1.5831, "step": 1312 }, { "epoch": 1.18, "learning_rate": 1.6528012431257292e-05, "loss": 1.6536, "step": 1314 }, { "epoch": 1.19, "learning_rate": 1.651694155690601e-05, "loss": 1.6429, "step": 1316 }, { "epoch": 1.19, "learning_rate": 1.6505856781022413e-05, "loss": 1.5991, "step": 1318 }, { "epoch": 1.19, "learning_rate": 1.649475812725184e-05, "loss": 1.6168, "step": 1320 }, { "epoch": 1.19, "learning_rate": 1.6483645619269256e-05, "loss": 1.5556, "step": 1322 }, { "epoch": 1.19, "learning_rate": 1.6472519280779164e-05, "loss": 1.6329, "step": 1324 }, { "epoch": 1.2, "learning_rate": 1.6461379135515575e-05, "loss": 1.603, "step": 1326 }, { "epoch": 1.2, "learning_rate": 1.645022520724195e-05, "loss": 1.6533, "step": 1328 }, { "epoch": 1.2, "learning_rate": 1.6439057519751155e-05, "loss": 1.6451, "step": 1330 }, { "epoch": 1.2, "learning_rate": 1.6427876096865394e-05, "loss": 1.6017, "step": 1332 }, { "epoch": 1.2, "learning_rate": 1.641668096243619e-05, "loss": 1.5915, "step": 1334 }, { "epoch": 1.2, "learning_rate": 1.6405472140344294e-05, "loss": 1.592, "step": 1336 }, { "epoch": 1.21, "learning_rate": 1.6399862603895563e-05, "loss": 1.5916, "step": 1338 }, { "epoch": 1.21, "learning_rate": 1.638863329514992e-05, "loss": 1.6036, "step": 1340 }, { "epoch": 1.21, "learning_rate": 1.637739035857112e-05, "loss": 1.6597, "step": 1342 }, { "epoch": 1.21, "learning_rate": 1.6366133818141893e-05, "loss": 1.647, "step": 1344 }, { "epoch": 1.21, "learning_rate": 1.635486369787399e-05, "loss": 1.6059, "step": 1346 }, { "epoch": 1.22, "learning_rate": 1.634358002180812e-05, "loss": 1.583, "step": 1348 }, { "epoch": 1.22, "learning_rate": 1.633228281401392e-05, "loss": 1.5999, "step": 1350 }, { "epoch": 1.22, "learning_rate": 1.6320972098589883e-05, "loss": 1.5893, "step": 1352 }, { "epoch": 1.22, "learning_rate": 1.6309647899663332e-05, "loss": 1.6109, "step": 1354 }, { "epoch": 1.22, "learning_rate": 1.6298310241390326e-05, "loss": 1.6077, "step": 1356 }, { "epoch": 1.22, "learning_rate": 1.6286959147955656e-05, "loss": 1.668, "step": 1358 }, { "epoch": 1.23, "learning_rate": 1.627559464357276e-05, "loss": 1.5566, "step": 1360 }, { "epoch": 1.23, "learning_rate": 1.6264216752483697e-05, "loss": 1.5587, "step": 1362 }, { "epoch": 1.23, "learning_rate": 1.625282549895907e-05, "loss": 1.5886, "step": 1364 }, { "epoch": 1.23, "learning_rate": 1.624142090729798e-05, "loss": 1.5838, "step": 1366 }, { "epoch": 1.23, "learning_rate": 1.6230003001828e-05, "loss": 1.5925, "step": 1368 }, { "epoch": 1.24, "learning_rate": 1.6218571806905088e-05, "loss": 1.5977, "step": 1370 }, { "epoch": 1.24, "learning_rate": 1.620712734691355e-05, "loss": 1.5983, "step": 1372 }, { "epoch": 1.24, "learning_rate": 1.6195669646266003e-05, "loss": 1.6395, "step": 1374 }, { "epoch": 1.24, "learning_rate": 1.6184198729403288e-05, "loss": 1.5128, "step": 1376 }, { "epoch": 1.24, "learning_rate": 1.6172714620794455e-05, "loss": 1.5381, "step": 1378 }, { "epoch": 1.24, "learning_rate": 1.616121734493668e-05, "loss": 1.5826, "step": 1380 }, { "epoch": 1.25, "learning_rate": 1.6149706926355237e-05, "loss": 1.5407, "step": 1382 }, { "epoch": 1.25, "learning_rate": 1.6138183389603427e-05, "loss": 1.5566, "step": 1384 }, { "epoch": 1.25, "learning_rate": 1.6126646759262548e-05, "loss": 1.5748, "step": 1386 }, { "epoch": 1.25, "learning_rate": 1.6115097059941807e-05, "loss": 1.6607, "step": 1388 }, { "epoch": 1.25, "learning_rate": 1.610353431627831e-05, "loss": 1.6268, "step": 1390 }, { "epoch": 1.26, "learning_rate": 1.609195855293697e-05, "loss": 1.5607, "step": 1392 }, { "epoch": 1.26, "learning_rate": 1.6080369794610486e-05, "loss": 1.6097, "step": 1394 }, { "epoch": 1.26, "learning_rate": 1.606876806601928e-05, "loss": 1.6064, "step": 1396 }, { "epoch": 1.26, "learning_rate": 1.6057153391911422e-05, "loss": 1.7139, "step": 1398 }, { "epoch": 1.26, "learning_rate": 1.6045525797062614e-05, "loss": 1.563, "step": 1400 }, { "epoch": 1.26, "learning_rate": 1.6033885306276117e-05, "loss": 1.6391, "step": 1402 }, { "epoch": 1.27, "learning_rate": 1.6022231944382693e-05, "loss": 1.6241, "step": 1404 }, { "epoch": 1.27, "learning_rate": 1.601056573624057e-05, "loss": 1.707, "step": 1406 }, { "epoch": 1.27, "learning_rate": 1.599888670673537e-05, "loss": 1.5843, "step": 1408 }, { "epoch": 1.27, "learning_rate": 1.598719488078007e-05, "loss": 1.5681, "step": 1410 }, { "epoch": 1.27, "learning_rate": 1.5975490283314946e-05, "loss": 1.6174, "step": 1412 }, { "epoch": 1.28, "learning_rate": 1.5963772939307508e-05, "loss": 1.6542, "step": 1414 }, { "epoch": 1.28, "learning_rate": 1.5952042873752463e-05, "loss": 1.5881, "step": 1416 }, { "epoch": 1.28, "learning_rate": 1.5940300111671652e-05, "loss": 1.6124, "step": 1418 }, { "epoch": 1.28, "learning_rate": 1.5928544678114007e-05, "loss": 1.5708, "step": 1420 }, { "epoch": 1.28, "learning_rate": 1.5916776598155478e-05, "loss": 1.6167, "step": 1422 }, { "epoch": 1.28, "learning_rate": 1.5904995896899004e-05, "loss": 1.4885, "step": 1424 }, { "epoch": 1.29, "learning_rate": 1.5893202599474434e-05, "loss": 1.5958, "step": 1426 }, { "epoch": 1.29, "learning_rate": 1.5881396731038493e-05, "loss": 1.625, "step": 1428 }, { "epoch": 1.29, "learning_rate": 1.5869578316774724e-05, "loss": 1.5973, "step": 1430 }, { "epoch": 1.29, "learning_rate": 1.585774738189343e-05, "loss": 1.6034, "step": 1432 }, { "epoch": 1.29, "learning_rate": 1.584590395163162e-05, "loss": 1.5719, "step": 1434 }, { "epoch": 1.29, "learning_rate": 1.583404805125296e-05, "loss": 1.5804, "step": 1436 }, { "epoch": 1.3, "learning_rate": 1.5822179706047717e-05, "loss": 1.6417, "step": 1438 }, { "epoch": 1.3, "learning_rate": 1.5810298941332696e-05, "loss": 1.6222, "step": 1440 }, { "epoch": 1.3, "learning_rate": 1.5798405782451216e-05, "loss": 1.5941, "step": 1442 }, { "epoch": 1.3, "learning_rate": 1.5786500254773005e-05, "loss": 1.6113, "step": 1444 }, { "epoch": 1.3, "learning_rate": 1.5774582383694196e-05, "loss": 1.5371, "step": 1446 }, { "epoch": 1.31, "learning_rate": 1.5762652194637247e-05, "loss": 1.6054, "step": 1448 }, { "epoch": 1.31, "learning_rate": 1.575070971305089e-05, "loss": 1.5252, "step": 1450 }, { "epoch": 1.31, "learning_rate": 1.5738754964410084e-05, "loss": 1.5853, "step": 1452 }, { "epoch": 1.31, "learning_rate": 1.5726787974215948e-05, "loss": 1.6497, "step": 1454 }, { "epoch": 1.31, "learning_rate": 1.5714808767995716e-05, "loss": 1.5488, "step": 1456 }, { "epoch": 1.31, "learning_rate": 1.5702817371302684e-05, "loss": 1.678, "step": 1458 }, { "epoch": 1.32, "learning_rate": 1.5690813809716153e-05, "loss": 1.6052, "step": 1460 }, { "epoch": 1.32, "learning_rate": 1.5678798108841366e-05, "loss": 1.601, "step": 1462 }, { "epoch": 1.32, "learning_rate": 1.5666770294309467e-05, "loss": 1.6847, "step": 1464 }, { "epoch": 1.32, "learning_rate": 1.5654730391777434e-05, "loss": 1.5923, "step": 1466 }, { "epoch": 1.32, "learning_rate": 1.5642678426928036e-05, "loss": 1.59, "step": 1468 }, { "epoch": 1.33, "learning_rate": 1.5630614425469776e-05, "loss": 1.5686, "step": 1470 }, { "epoch": 1.33, "learning_rate": 1.561853841313682e-05, "loss": 1.6082, "step": 1472 }, { "epoch": 1.33, "learning_rate": 1.5606450415688968e-05, "loss": 1.6408, "step": 1474 }, { "epoch": 1.33, "learning_rate": 1.5594350458911586e-05, "loss": 1.6029, "step": 1476 }, { "epoch": 1.33, "learning_rate": 1.558223856861553e-05, "loss": 1.5787, "step": 1478 }, { "epoch": 1.33, "learning_rate": 1.557011477063714e-05, "loss": 1.6162, "step": 1480 }, { "epoch": 1.34, "learning_rate": 1.5557979090838136e-05, "loss": 1.5908, "step": 1482 }, { "epoch": 1.34, "learning_rate": 1.5545831555105598e-05, "loss": 1.6125, "step": 1484 }, { "epoch": 1.34, "learning_rate": 1.553367218935188e-05, "loss": 1.6002, "step": 1486 }, { "epoch": 1.34, "learning_rate": 1.55215010195146e-05, "loss": 1.6325, "step": 1488 }, { "epoch": 1.34, "learning_rate": 1.5509318071556513e-05, "loss": 1.6437, "step": 1490 }, { "epoch": 1.35, "learning_rate": 1.5497123371465537e-05, "loss": 1.6507, "step": 1492 }, { "epoch": 1.35, "learning_rate": 1.5484916945254642e-05, "loss": 1.6168, "step": 1494 }, { "epoch": 1.35, "learning_rate": 1.547269881896181e-05, "loss": 1.6525, "step": 1496 }, { "epoch": 1.35, "learning_rate": 1.546046901864999e-05, "loss": 1.599, "step": 1498 }, { "epoch": 1.35, "learning_rate": 1.5448227570407012e-05, "loss": 1.601, "step": 1500 }, { "epoch": 1.35, "learning_rate": 1.543597450034559e-05, "loss": 1.681, "step": 1502 }, { "epoch": 1.36, "learning_rate": 1.542370983460319e-05, "loss": 1.6409, "step": 1504 }, { "epoch": 1.36, "learning_rate": 1.5411433599342038e-05, "loss": 1.5691, "step": 1506 }, { "epoch": 1.36, "learning_rate": 1.539914582074903e-05, "loss": 1.594, "step": 1508 }, { "epoch": 1.36, "learning_rate": 1.538684652503569e-05, "loss": 1.7021, "step": 1510 }, { "epoch": 1.36, "learning_rate": 1.5374535738438105e-05, "loss": 1.5613, "step": 1512 }, { "epoch": 1.37, "learning_rate": 1.536221348721687e-05, "loss": 1.5947, "step": 1514 }, { "epoch": 1.37, "learning_rate": 1.5349879797657045e-05, "loss": 1.6185, "step": 1516 }, { "epoch": 1.37, "learning_rate": 1.5337534696068088e-05, "loss": 1.6027, "step": 1518 }, { "epoch": 1.37, "learning_rate": 1.5325178208783793e-05, "loss": 1.6229, "step": 1520 }, { "epoch": 1.37, "learning_rate": 1.5312810362162253e-05, "loss": 1.6062, "step": 1522 }, { "epoch": 1.37, "learning_rate": 1.5300431182585777e-05, "loss": 1.5713, "step": 1524 }, { "epoch": 1.38, "learning_rate": 1.5288040696460863e-05, "loss": 1.5456, "step": 1526 }, { "epoch": 1.38, "learning_rate": 1.5275638930218122e-05, "loss": 1.63, "step": 1528 }, { "epoch": 1.38, "learning_rate": 1.5263225910312222e-05, "loss": 1.5683, "step": 1530 }, { "epoch": 1.38, "learning_rate": 1.5250801663221843e-05, "loss": 1.7049, "step": 1532 }, { "epoch": 1.38, "learning_rate": 1.5238366215449611e-05, "loss": 1.628, "step": 1534 }, { "epoch": 1.39, "learning_rate": 1.5225919593522049e-05, "loss": 1.5985, "step": 1536 }, { "epoch": 1.39, "learning_rate": 1.521346182398951e-05, "loss": 1.6315, "step": 1538 }, { "epoch": 1.39, "learning_rate": 1.520099293342613e-05, "loss": 1.6427, "step": 1540 }, { "epoch": 1.39, "learning_rate": 1.5188512948429765e-05, "loss": 1.6735, "step": 1542 }, { "epoch": 1.39, "learning_rate": 1.5176021895621944e-05, "loss": 1.5767, "step": 1544 }, { "epoch": 1.39, "learning_rate": 1.5163519801647795e-05, "loss": 1.6415, "step": 1546 }, { "epoch": 1.4, "learning_rate": 1.5151006693176005e-05, "loss": 1.6607, "step": 1548 }, { "epoch": 1.4, "learning_rate": 1.5138482596898754e-05, "loss": 1.618, "step": 1550 }, { "epoch": 1.4, "learning_rate": 1.5125947539531663e-05, "loss": 1.5899, "step": 1552 }, { "epoch": 1.4, "learning_rate": 1.5113401547813732e-05, "loss": 1.6436, "step": 1554 }, { "epoch": 1.4, "learning_rate": 1.5100844648507284e-05, "loss": 1.6373, "step": 1556 }, { "epoch": 1.4, "learning_rate": 1.5088276868397915e-05, "loss": 1.6599, "step": 1558 }, { "epoch": 1.41, "learning_rate": 1.5075698234294424e-05, "loss": 1.5582, "step": 1560 }, { "epoch": 1.41, "learning_rate": 1.5063108773028771e-05, "loss": 1.6258, "step": 1562 }, { "epoch": 1.41, "learning_rate": 1.5050508511456006e-05, "loss": 1.6312, "step": 1564 }, { "epoch": 1.41, "learning_rate": 1.5037897476454219e-05, "loss": 1.5919, "step": 1566 }, { "epoch": 1.41, "learning_rate": 1.5025275694924481e-05, "loss": 1.5423, "step": 1568 }, { "epoch": 1.42, "learning_rate": 1.5012643193790788e-05, "loss": 1.6427, "step": 1570 }, { "epoch": 1.42, "learning_rate": 1.5000000000000002e-05, "loss": 1.5718, "step": 1572 }, { "epoch": 1.42, "learning_rate": 1.4987346140521792e-05, "loss": 1.6259, "step": 1574 }, { "epoch": 1.42, "learning_rate": 1.4974681642348583e-05, "loss": 1.5403, "step": 1576 }, { "epoch": 1.42, "learning_rate": 1.496200653249549e-05, "loss": 1.5996, "step": 1578 }, { "epoch": 1.42, "learning_rate": 1.4949320838000261e-05, "loss": 1.4791, "step": 1580 }, { "epoch": 1.43, "learning_rate": 1.493662458592323e-05, "loss": 1.6647, "step": 1582 }, { "epoch": 1.43, "learning_rate": 1.492391780334725e-05, "loss": 1.608, "step": 1584 }, { "epoch": 1.43, "learning_rate": 1.4911200517377628e-05, "loss": 1.618, "step": 1586 }, { "epoch": 1.43, "learning_rate": 1.4898472755142095e-05, "loss": 1.5103, "step": 1588 }, { "epoch": 1.43, "learning_rate": 1.4885734543790707e-05, "loss": 1.5614, "step": 1590 }, { "epoch": 1.44, "learning_rate": 1.4872985910495822e-05, "loss": 1.6294, "step": 1592 }, { "epoch": 1.44, "learning_rate": 1.4860226882452033e-05, "loss": 1.6391, "step": 1594 }, { "epoch": 1.44, "learning_rate": 1.4847457486876097e-05, "loss": 1.6182, "step": 1596 }, { "epoch": 1.44, "learning_rate": 1.4834677751006891e-05, "loss": 1.623, "step": 1598 }, { "epoch": 1.44, "learning_rate": 1.4821887702105348e-05, "loss": 1.6026, "step": 1600 }, { "epoch": 1.44, "learning_rate": 1.4809087367454402e-05, "loss": 1.6163, "step": 1602 }, { "epoch": 1.45, "learning_rate": 1.4796276774358927e-05, "loss": 1.5841, "step": 1604 }, { "epoch": 1.45, "learning_rate": 1.4783455950145675e-05, "loss": 1.5542, "step": 1606 }, { "epoch": 1.45, "learning_rate": 1.4770624922163233e-05, "loss": 1.6349, "step": 1608 }, { "epoch": 1.45, "learning_rate": 1.4757783717781942e-05, "loss": 1.6687, "step": 1610 }, { "epoch": 1.45, "learning_rate": 1.4744932364393863e-05, "loss": 1.7306, "step": 1612 }, { "epoch": 1.46, "learning_rate": 1.4732070889412693e-05, "loss": 1.6088, "step": 1614 }, { "epoch": 1.46, "learning_rate": 1.4719199320273729e-05, "loss": 1.5419, "step": 1616 }, { "epoch": 1.46, "learning_rate": 1.4706317684433802e-05, "loss": 1.6691, "step": 1618 }, { "epoch": 1.46, "learning_rate": 1.4693426009371203e-05, "loss": 1.6314, "step": 1620 }, { "epoch": 1.46, "learning_rate": 1.4680524322585656e-05, "loss": 1.5798, "step": 1622 }, { "epoch": 1.46, "learning_rate": 1.4667612651598229e-05, "loss": 1.6116, "step": 1624 }, { "epoch": 1.47, "learning_rate": 1.4654691023951289e-05, "loss": 1.5479, "step": 1626 }, { "epoch": 1.47, "learning_rate": 1.4641759467208448e-05, "loss": 1.61, "step": 1628 }, { "epoch": 1.47, "learning_rate": 1.4628818008954492e-05, "loss": 1.5859, "step": 1630 }, { "epoch": 1.47, "learning_rate": 1.4615866676795334e-05, "loss": 1.5711, "step": 1632 }, { "epoch": 1.47, "learning_rate": 1.4602905498357944e-05, "loss": 1.567, "step": 1634 }, { "epoch": 1.48, "learning_rate": 1.4589934501290297e-05, "loss": 1.5867, "step": 1636 }, { "epoch": 1.48, "learning_rate": 1.4576953713261313e-05, "loss": 1.6344, "step": 1638 }, { "epoch": 1.48, "learning_rate": 1.4563963161960799e-05, "loss": 1.6663, "step": 1640 }, { "epoch": 1.48, "learning_rate": 1.4550962875099385e-05, "loss": 1.659, "step": 1642 }, { "epoch": 1.48, "learning_rate": 1.4537952880408472e-05, "loss": 1.6495, "step": 1644 }, { "epoch": 1.48, "learning_rate": 1.4524933205640168e-05, "loss": 1.5322, "step": 1646 }, { "epoch": 1.49, "learning_rate": 1.4511903878567229e-05, "loss": 1.5643, "step": 1648 }, { "epoch": 1.49, "learning_rate": 1.4498864926982996e-05, "loss": 1.5709, "step": 1650 }, { "epoch": 1.49, "learning_rate": 1.448581637870135e-05, "loss": 1.6062, "step": 1652 }, { "epoch": 1.49, "learning_rate": 1.4472758261556639e-05, "loss": 1.5866, "step": 1654 }, { "epoch": 1.49, "learning_rate": 1.4459690603403623e-05, "loss": 1.603, "step": 1656 }, { "epoch": 1.5, "learning_rate": 1.444661343211741e-05, "loss": 1.6153, "step": 1658 }, { "epoch": 1.5, "learning_rate": 1.4433526775593404e-05, "loss": 1.563, "step": 1660 }, { "epoch": 1.5, "learning_rate": 1.4420430661747245e-05, "loss": 1.6109, "step": 1662 }, { "epoch": 1.5, "learning_rate": 1.4407325118514743e-05, "loss": 1.5876, "step": 1664 }, { "epoch": 1.5, "learning_rate": 1.4394210173851824e-05, "loss": 1.6287, "step": 1666 }, { "epoch": 1.5, "learning_rate": 1.4381085855734468e-05, "loss": 1.5973, "step": 1668 }, { "epoch": 1.51, "learning_rate": 1.4367952192158652e-05, "loss": 1.6117, "step": 1670 }, { "epoch": 1.51, "learning_rate": 1.4354809211140284e-05, "loss": 1.559, "step": 1672 }, { "epoch": 1.51, "learning_rate": 1.4341656940715147e-05, "loss": 1.5539, "step": 1674 }, { "epoch": 1.51, "learning_rate": 1.4328495408938847e-05, "loss": 1.5392, "step": 1676 }, { "epoch": 1.51, "learning_rate": 1.4315324643886737e-05, "loss": 1.5979, "step": 1678 }, { "epoch": 1.51, "learning_rate": 1.4302144673653875e-05, "loss": 1.604, "step": 1680 }, { "epoch": 1.52, "learning_rate": 1.4288955526354942e-05, "loss": 1.5481, "step": 1682 }, { "epoch": 1.52, "learning_rate": 1.4275757230124207e-05, "loss": 1.6369, "step": 1684 }, { "epoch": 1.52, "learning_rate": 1.426254981311545e-05, "loss": 1.6026, "step": 1686 }, { "epoch": 1.52, "learning_rate": 1.4249333303501906e-05, "loss": 1.5704, "step": 1688 }, { "epoch": 1.52, "learning_rate": 1.4236107729476209e-05, "loss": 1.6457, "step": 1690 }, { "epoch": 1.53, "learning_rate": 1.4222873119250325e-05, "loss": 1.5152, "step": 1692 }, { "epoch": 1.53, "learning_rate": 1.4209629501055497e-05, "loss": 1.5963, "step": 1694 }, { "epoch": 1.53, "learning_rate": 1.4196376903142186e-05, "loss": 1.5865, "step": 1696 }, { "epoch": 1.53, "learning_rate": 1.4183115353780001e-05, "loss": 1.6366, "step": 1698 }, { "epoch": 1.53, "learning_rate": 1.4169844881257655e-05, "loss": 1.5734, "step": 1700 }, { "epoch": 1.53, "learning_rate": 1.415656551388289e-05, "loss": 1.562, "step": 1702 }, { "epoch": 1.54, "learning_rate": 1.4143277279982415e-05, "loss": 1.5946, "step": 1704 }, { "epoch": 1.54, "learning_rate": 1.4129980207901867e-05, "loss": 1.5339, "step": 1706 }, { "epoch": 1.54, "learning_rate": 1.4116674326005731e-05, "loss": 1.6555, "step": 1708 }, { "epoch": 1.54, "learning_rate": 1.4103359662677276e-05, "loss": 1.6325, "step": 1710 }, { "epoch": 1.54, "learning_rate": 1.4090036246318513e-05, "loss": 1.5931, "step": 1712 }, { "epoch": 1.55, "learning_rate": 1.4076704105350117e-05, "loss": 1.5118, "step": 1714 }, { "epoch": 1.55, "learning_rate": 1.406336326821138e-05, "loss": 1.6545, "step": 1716 }, { "epoch": 1.55, "learning_rate": 1.4050013763360144e-05, "loss": 1.6025, "step": 1718 }, { "epoch": 1.55, "learning_rate": 1.4036655619272736e-05, "loss": 1.5861, "step": 1720 }, { "epoch": 1.55, "learning_rate": 1.4023288864443915e-05, "loss": 1.604, "step": 1722 }, { "epoch": 1.55, "learning_rate": 1.4009913527386808e-05, "loss": 1.5718, "step": 1724 }, { "epoch": 1.56, "learning_rate": 1.3996529636632843e-05, "loss": 1.675, "step": 1726 }, { "epoch": 1.56, "learning_rate": 1.3983137220731702e-05, "loss": 1.5882, "step": 1728 }, { "epoch": 1.56, "learning_rate": 1.3969736308251252e-05, "loss": 1.5851, "step": 1730 }, { "epoch": 1.56, "learning_rate": 1.3956326927777478e-05, "loss": 1.5425, "step": 1732 }, { "epoch": 1.56, "learning_rate": 1.3942909107914431e-05, "loss": 1.5696, "step": 1734 }, { "epoch": 1.57, "learning_rate": 1.3929482877284168e-05, "loss": 1.6312, "step": 1736 }, { "epoch": 1.57, "learning_rate": 1.3916048264526687e-05, "loss": 1.5701, "step": 1738 }, { "epoch": 1.57, "learning_rate": 1.390260529829986e-05, "loss": 1.5844, "step": 1740 }, { "epoch": 1.57, "learning_rate": 1.3889154007279384e-05, "loss": 1.5455, "step": 1742 }, { "epoch": 1.57, "learning_rate": 1.3875694420158712e-05, "loss": 1.6109, "step": 1744 }, { "epoch": 1.57, "learning_rate": 1.3862226565648996e-05, "loss": 1.5996, "step": 1746 }, { "epoch": 1.58, "learning_rate": 1.3848750472479013e-05, "loss": 1.6145, "step": 1748 }, { "epoch": 1.58, "learning_rate": 1.3835266169395132e-05, "loss": 1.5886, "step": 1750 }, { "epoch": 1.58, "learning_rate": 1.3821773685161224e-05, "loss": 1.5617, "step": 1752 }, { "epoch": 1.58, "learning_rate": 1.3808273048558609e-05, "loss": 1.5521, "step": 1754 }, { "epoch": 1.58, "learning_rate": 1.3794764288386003e-05, "loss": 1.6455, "step": 1756 }, { "epoch": 1.59, "learning_rate": 1.3781247433459447e-05, "loss": 1.6174, "step": 1758 }, { "epoch": 1.59, "learning_rate": 1.3767722512612251e-05, "loss": 1.617, "step": 1760 }, { "epoch": 1.59, "learning_rate": 1.3754189554694934e-05, "loss": 1.5477, "step": 1762 }, { "epoch": 1.59, "learning_rate": 1.3740648588575156e-05, "loss": 1.6121, "step": 1764 }, { "epoch": 1.59, "learning_rate": 1.3727099643137653e-05, "loss": 1.5544, "step": 1766 }, { "epoch": 1.59, "learning_rate": 1.3713542747284194e-05, "loss": 1.5416, "step": 1768 }, { "epoch": 1.6, "learning_rate": 1.3699977929933503e-05, "loss": 1.6363, "step": 1770 }, { "epoch": 1.6, "learning_rate": 1.3686405220021194e-05, "loss": 1.6219, "step": 1772 }, { "epoch": 1.6, "learning_rate": 1.3672824646499731e-05, "loss": 1.5882, "step": 1774 }, { "epoch": 1.6, "learning_rate": 1.3659236238338339e-05, "loss": 1.7416, "step": 1776 }, { "epoch": 1.6, "learning_rate": 1.3645640024522959e-05, "loss": 1.6427, "step": 1778 }, { "epoch": 1.61, "learning_rate": 1.363203603405619e-05, "loss": 1.5825, "step": 1780 }, { "epoch": 1.61, "learning_rate": 1.361842429595721e-05, "loss": 1.621, "step": 1782 }, { "epoch": 1.61, "learning_rate": 1.3604804839261732e-05, "loss": 1.5753, "step": 1784 }, { "epoch": 1.61, "learning_rate": 1.3591177693021927e-05, "loss": 1.6405, "step": 1786 }, { "epoch": 1.61, "learning_rate": 1.3577542886306367e-05, "loss": 1.5353, "step": 1788 }, { "epoch": 1.61, "learning_rate": 1.3563900448199977e-05, "loss": 1.598, "step": 1790 }, { "epoch": 1.62, "learning_rate": 1.3550250407803951e-05, "loss": 1.6229, "step": 1792 }, { "epoch": 1.62, "learning_rate": 1.3536592794235696e-05, "loss": 1.5628, "step": 1794 }, { "epoch": 1.62, "learning_rate": 1.3522927636628787e-05, "loss": 1.5696, "step": 1796 }, { "epoch": 1.62, "learning_rate": 1.3509254964132883e-05, "loss": 1.6368, "step": 1798 }, { "epoch": 1.62, "learning_rate": 1.3495574805913669e-05, "loss": 1.5796, "step": 1800 }, { "epoch": 1.62, "learning_rate": 1.3481887191152807e-05, "loss": 1.5293, "step": 1802 }, { "epoch": 1.63, "learning_rate": 1.346819214904786e-05, "loss": 1.57, "step": 1804 }, { "epoch": 1.63, "learning_rate": 1.3454489708812237e-05, "loss": 1.5036, "step": 1806 }, { "epoch": 1.63, "learning_rate": 1.3440779899675128e-05, "loss": 1.609, "step": 1808 }, { "epoch": 1.63, "learning_rate": 1.3427062750881435e-05, "loss": 1.5554, "step": 1810 }, { "epoch": 1.63, "learning_rate": 1.3413338291691726e-05, "loss": 1.5686, "step": 1812 }, { "epoch": 1.64, "learning_rate": 1.3399606551382164e-05, "loss": 1.544, "step": 1814 }, { "epoch": 1.64, "learning_rate": 1.3385867559244435e-05, "loss": 1.6573, "step": 1816 }, { "epoch": 1.64, "learning_rate": 1.3372121344585694e-05, "loss": 1.591, "step": 1818 }, { "epoch": 1.64, "learning_rate": 1.335836793672851e-05, "loss": 1.577, "step": 1820 }, { "epoch": 1.64, "learning_rate": 1.3344607365010795e-05, "loss": 1.5879, "step": 1822 }, { "epoch": 1.64, "learning_rate": 1.3330839658785739e-05, "loss": 1.5377, "step": 1824 }, { "epoch": 1.65, "learning_rate": 1.331706484742175e-05, "loss": 1.5187, "step": 1826 }, { "epoch": 1.65, "learning_rate": 1.3303282960302397e-05, "loss": 1.5582, "step": 1828 }, { "epoch": 1.65, "learning_rate": 1.3289494026826337e-05, "loss": 1.6114, "step": 1830 }, { "epoch": 1.65, "learning_rate": 1.3275698076407268e-05, "loss": 1.5021, "step": 1832 }, { "epoch": 1.65, "learning_rate": 1.3261895138473841e-05, "loss": 1.6179, "step": 1834 }, { "epoch": 1.66, "learning_rate": 1.3248085242469629e-05, "loss": 1.5231, "step": 1836 }, { "epoch": 1.66, "learning_rate": 1.3234268417853027e-05, "loss": 1.564, "step": 1838 }, { "epoch": 1.66, "learning_rate": 1.3220444694097227e-05, "loss": 1.577, "step": 1840 }, { "epoch": 1.66, "learning_rate": 1.3206614100690139e-05, "loss": 1.6216, "step": 1842 }, { "epoch": 1.66, "learning_rate": 1.3192776667134311e-05, "loss": 1.5821, "step": 1844 }, { "epoch": 1.66, "learning_rate": 1.3178932422946892e-05, "loss": 1.5774, "step": 1846 }, { "epoch": 1.67, "learning_rate": 1.3165081397659563e-05, "loss": 1.642, "step": 1848 }, { "epoch": 1.67, "learning_rate": 1.3151223620818463e-05, "loss": 1.526, "step": 1850 }, { "epoch": 1.67, "learning_rate": 1.3137359121984134e-05, "loss": 1.5949, "step": 1852 }, { "epoch": 1.67, "learning_rate": 1.3123487930731464e-05, "loss": 1.5635, "step": 1854 }, { "epoch": 1.67, "learning_rate": 1.3109610076649602e-05, "loss": 1.5585, "step": 1856 }, { "epoch": 1.68, "learning_rate": 1.3095725589341924e-05, "loss": 1.6193, "step": 1858 }, { "epoch": 1.68, "learning_rate": 1.3081834498425952e-05, "loss": 1.5287, "step": 1860 }, { "epoch": 1.68, "learning_rate": 1.3067936833533286e-05, "loss": 1.5954, "step": 1862 }, { "epoch": 1.68, "learning_rate": 1.3054032624309566e-05, "loss": 1.5767, "step": 1864 }, { "epoch": 1.68, "learning_rate": 1.3040121900414371e-05, "loss": 1.6328, "step": 1866 }, { "epoch": 1.68, "learning_rate": 1.3026204691521193e-05, "loss": 1.5769, "step": 1868 }, { "epoch": 1.69, "learning_rate": 1.3012281027317353e-05, "loss": 1.611, "step": 1870 }, { "epoch": 1.69, "learning_rate": 1.2998350937503939e-05, "loss": 1.5665, "step": 1872 }, { "epoch": 1.69, "learning_rate": 1.2984414451795747e-05, "loss": 1.518, "step": 1874 }, { "epoch": 1.69, "learning_rate": 1.2970471599921222e-05, "loss": 1.5733, "step": 1876 }, { "epoch": 1.69, "learning_rate": 1.2956522411622377e-05, "loss": 1.6023, "step": 1878 }, { "epoch": 1.7, "learning_rate": 1.2942566916654753e-05, "loss": 1.5534, "step": 1880 }, { "epoch": 1.7, "learning_rate": 1.292860514478734e-05, "loss": 1.6051, "step": 1882 }, { "epoch": 1.7, "learning_rate": 1.2914637125802514e-05, "loss": 1.6093, "step": 1884 }, { "epoch": 1.7, "learning_rate": 1.290066288949598e-05, "loss": 1.5965, "step": 1886 }, { "epoch": 1.7, "learning_rate": 1.2886682465676707e-05, "loss": 1.5038, "step": 1888 }, { "epoch": 1.7, "learning_rate": 1.287269588416686e-05, "loss": 1.5615, "step": 1890 }, { "epoch": 1.71, "learning_rate": 1.2858703174801743e-05, "loss": 1.5986, "step": 1892 }, { "epoch": 1.71, "learning_rate": 1.2844704367429723e-05, "loss": 1.5638, "step": 1894 }, { "epoch": 1.71, "learning_rate": 1.2830699491912186e-05, "loss": 1.5111, "step": 1896 }, { "epoch": 1.71, "learning_rate": 1.2816688578123459e-05, "loss": 1.6758, "step": 1898 }, { "epoch": 1.71, "learning_rate": 1.280267165595074e-05, "loss": 1.6036, "step": 1900 }, { "epoch": 1.72, "learning_rate": 1.2788648755294056e-05, "loss": 1.5597, "step": 1902 }, { "epoch": 1.72, "learning_rate": 1.2774619906066178e-05, "loss": 1.5505, "step": 1904 }, { "epoch": 1.72, "learning_rate": 1.2760585138192577e-05, "loss": 1.6206, "step": 1906 }, { "epoch": 1.72, "learning_rate": 1.2746544481611336e-05, "loss": 1.5424, "step": 1908 }, { "epoch": 1.72, "learning_rate": 1.273249796627311e-05, "loss": 1.583, "step": 1910 }, { "epoch": 1.72, "learning_rate": 1.2718445622141044e-05, "loss": 1.6438, "step": 1912 }, { "epoch": 1.73, "learning_rate": 1.270438747919072e-05, "loss": 1.5231, "step": 1914 }, { "epoch": 1.73, "learning_rate": 1.2690323567410094e-05, "loss": 1.6052, "step": 1916 }, { "epoch": 1.73, "learning_rate": 1.2676253916799421e-05, "loss": 1.6286, "step": 1918 }, { "epoch": 1.73, "learning_rate": 1.2662178557371198e-05, "loss": 1.5095, "step": 1920 }, { "epoch": 1.73, "learning_rate": 1.2648097519150107e-05, "loss": 1.5288, "step": 1922 }, { "epoch": 1.73, "learning_rate": 1.2634010832172932e-05, "loss": 1.5961, "step": 1924 }, { "epoch": 1.74, "learning_rate": 1.261991852648852e-05, "loss": 1.4945, "step": 1926 }, { "epoch": 1.74, "learning_rate": 1.2605820632157689e-05, "loss": 1.5989, "step": 1928 }, { "epoch": 1.74, "learning_rate": 1.2591717179253188e-05, "loss": 1.6103, "step": 1930 }, { "epoch": 1.74, "learning_rate": 1.2577608197859627e-05, "loss": 1.5667, "step": 1932 }, { "epoch": 1.74, "learning_rate": 1.25634937180734e-05, "loss": 1.5074, "step": 1934 }, { "epoch": 1.75, "learning_rate": 1.2549373770002626e-05, "loss": 1.6327, "step": 1936 }, { "epoch": 1.75, "learning_rate": 1.2535248383767102e-05, "loss": 1.5997, "step": 1938 }, { "epoch": 1.75, "learning_rate": 1.252111758949822e-05, "loss": 1.5462, "step": 1940 }, { "epoch": 1.75, "learning_rate": 1.2506981417338903e-05, "loss": 1.5862, "step": 1942 }, { "epoch": 1.75, "learning_rate": 1.2492839897443554e-05, "loss": 1.618, "step": 1944 }, { "epoch": 1.75, "learning_rate": 1.2478693059977975e-05, "loss": 1.6031, "step": 1946 }, { "epoch": 1.76, "learning_rate": 1.2464540935119319e-05, "loss": 1.6268, "step": 1948 }, { "epoch": 1.76, "learning_rate": 1.2450383553056011e-05, "loss": 1.637, "step": 1950 }, { "epoch": 1.76, "learning_rate": 1.2436220943987694e-05, "loss": 1.6018, "step": 1952 }, { "epoch": 1.76, "learning_rate": 1.2422053138125164e-05, "loss": 1.5885, "step": 1954 }, { "epoch": 1.76, "learning_rate": 1.2407880165690289e-05, "loss": 1.5386, "step": 1956 }, { "epoch": 1.77, "learning_rate": 1.2393702056915977e-05, "loss": 1.6509, "step": 1958 }, { "epoch": 1.77, "learning_rate": 1.2379518842046081e-05, "loss": 1.5272, "step": 1960 }, { "epoch": 1.77, "learning_rate": 1.2365330551335348e-05, "loss": 1.5918, "step": 1962 }, { "epoch": 1.77, "learning_rate": 1.2351137215049353e-05, "loss": 1.5838, "step": 1964 }, { "epoch": 1.77, "learning_rate": 1.2336938863464434e-05, "loss": 1.5712, "step": 1966 }, { "epoch": 1.77, "learning_rate": 1.2322735526867624e-05, "loss": 1.5799, "step": 1968 }, { "epoch": 1.78, "learning_rate": 1.2308527235556596e-05, "loss": 1.555, "step": 1970 }, { "epoch": 1.78, "learning_rate": 1.229431401983959e-05, "loss": 1.5856, "step": 1972 }, { "epoch": 1.78, "learning_rate": 1.2280095910035343e-05, "loss": 1.6309, "step": 1974 }, { "epoch": 1.78, "learning_rate": 1.2265872936473044e-05, "loss": 1.5952, "step": 1976 }, { "epoch": 1.78, "learning_rate": 1.2251645129492252e-05, "loss": 1.4671, "step": 1978 }, { "epoch": 1.79, "learning_rate": 1.2237412519442828e-05, "loss": 1.6362, "step": 1980 }, { "epoch": 1.79, "learning_rate": 1.222317513668489e-05, "loss": 1.6166, "step": 1982 }, { "epoch": 1.79, "learning_rate": 1.2208933011588735e-05, "loss": 1.4717, "step": 1984 }, { "epoch": 1.79, "learning_rate": 1.2194686174534771e-05, "loss": 1.5405, "step": 1986 }, { "epoch": 1.79, "learning_rate": 1.2180434655913465e-05, "loss": 1.5721, "step": 1988 }, { "epoch": 1.79, "learning_rate": 1.2166178486125258e-05, "loss": 1.5779, "step": 1990 }, { "epoch": 1.8, "learning_rate": 1.2151917695580523e-05, "loss": 1.6138, "step": 1992 }, { "epoch": 1.8, "learning_rate": 1.2137652314699494e-05, "loss": 1.5975, "step": 1994 }, { "epoch": 1.8, "learning_rate": 1.2123382373912178e-05, "loss": 1.4967, "step": 1996 }, { "epoch": 1.8, "learning_rate": 1.2109107903658326e-05, "loss": 1.5278, "step": 1998 }, { "epoch": 1.8, "learning_rate": 1.209482893438735e-05, "loss": 1.46, "step": 2000 }, { "epoch": 1.81, "learning_rate": 1.2080545496558248e-05, "loss": 1.5729, "step": 2002 }, { "epoch": 1.81, "learning_rate": 1.2066257620639557e-05, "loss": 1.6127, "step": 2004 }, { "epoch": 1.81, "learning_rate": 1.2051965337109284e-05, "loss": 1.5214, "step": 2006 }, { "epoch": 1.81, "learning_rate": 1.2037668676454832e-05, "loss": 1.5628, "step": 2008 }, { "epoch": 1.81, "learning_rate": 1.2023367669172947e-05, "loss": 1.6562, "step": 2010 }, { "epoch": 1.81, "learning_rate": 1.2009062345769645e-05, "loss": 1.5651, "step": 2012 }, { "epoch": 1.82, "learning_rate": 1.199475273676014e-05, "loss": 1.603, "step": 2014 }, { "epoch": 1.82, "learning_rate": 1.1980438872668803e-05, "loss": 1.5625, "step": 2016 }, { "epoch": 1.82, "learning_rate": 1.1966120784029066e-05, "loss": 1.5886, "step": 2018 }, { "epoch": 1.82, "learning_rate": 1.195179850138339e-05, "loss": 1.5668, "step": 2020 }, { "epoch": 1.82, "learning_rate": 1.1937472055283168e-05, "loss": 1.6338, "step": 2022 }, { "epoch": 1.83, "learning_rate": 1.1923141476288676e-05, "loss": 1.51, "step": 2024 }, { "epoch": 1.83, "learning_rate": 1.1908806794969015e-05, "loss": 1.5857, "step": 2026 }, { "epoch": 1.83, "learning_rate": 1.189446804190203e-05, "loss": 1.5716, "step": 2028 }, { "epoch": 1.83, "learning_rate": 1.188012524767425e-05, "loss": 1.5771, "step": 2030 }, { "epoch": 1.83, "learning_rate": 1.1865778442880828e-05, "loss": 1.5471, "step": 2032 }, { "epoch": 1.83, "learning_rate": 1.1851427658125474e-05, "loss": 1.5763, "step": 2034 }, { "epoch": 1.84, "learning_rate": 1.183707292402038e-05, "loss": 1.5639, "step": 2036 }, { "epoch": 1.84, "learning_rate": 1.1822714271186173e-05, "loss": 1.5648, "step": 2038 }, { "epoch": 1.84, "learning_rate": 1.180835173025183e-05, "loss": 1.5097, "step": 2040 }, { "epoch": 1.84, "learning_rate": 1.1793985331854622e-05, "loss": 1.5358, "step": 2042 }, { "epoch": 1.84, "learning_rate": 1.1779615106640058e-05, "loss": 1.6405, "step": 2044 }, { "epoch": 1.84, "learning_rate": 1.1765241085261802e-05, "loss": 1.5903, "step": 2046 }, { "epoch": 1.85, "learning_rate": 1.1750863298381618e-05, "loss": 1.6238, "step": 2048 }, { "epoch": 1.85, "learning_rate": 1.1736481776669307e-05, "loss": 1.5676, "step": 2050 }, { "epoch": 1.85, "learning_rate": 1.172209655080262e-05, "loss": 1.5194, "step": 2052 }, { "epoch": 1.85, "learning_rate": 1.170770765146723e-05, "loss": 1.5859, "step": 2054 }, { "epoch": 1.85, "learning_rate": 1.1693315109356637e-05, "loss": 1.4639, "step": 2056 }, { "epoch": 1.86, "learning_rate": 1.1678918955172112e-05, "loss": 1.5708, "step": 2058 }, { "epoch": 1.86, "learning_rate": 1.1664519219622628e-05, "loss": 1.6653, "step": 2060 }, { "epoch": 1.86, "learning_rate": 1.1650115933424804e-05, "loss": 1.633, "step": 2062 }, { "epoch": 1.86, "learning_rate": 1.163570912730283e-05, "loss": 1.588, "step": 2064 }, { "epoch": 1.86, "learning_rate": 1.1621298831988398e-05, "loss": 1.5914, "step": 2066 }, { "epoch": 1.86, "learning_rate": 1.1606885078220658e-05, "loss": 1.5181, "step": 2068 }, { "epoch": 1.87, "learning_rate": 1.1592467896746122e-05, "loss": 1.4927, "step": 2070 }, { "epoch": 1.87, "learning_rate": 1.1578047318318624e-05, "loss": 1.5709, "step": 2072 }, { "epoch": 1.87, "learning_rate": 1.156362337369924e-05, "loss": 1.5502, "step": 2074 }, { "epoch": 1.87, "learning_rate": 1.1549196093656223e-05, "loss": 1.5939, "step": 2076 }, { "epoch": 1.87, "learning_rate": 1.1534765508964952e-05, "loss": 1.5504, "step": 2078 }, { "epoch": 1.88, "learning_rate": 1.1520331650407839e-05, "loss": 1.5904, "step": 2080 }, { "epoch": 1.88, "learning_rate": 1.1505894548774294e-05, "loss": 1.5527, "step": 2082 }, { "epoch": 1.88, "learning_rate": 1.1491454234860645e-05, "loss": 1.5581, "step": 2084 }, { "epoch": 1.88, "learning_rate": 1.1477010739470056e-05, "loss": 1.5685, "step": 2086 }, { "epoch": 1.88, "learning_rate": 1.1462564093412493e-05, "loss": 1.608, "step": 2088 }, { "epoch": 1.88, "learning_rate": 1.144811432750464e-05, "loss": 1.6009, "step": 2090 }, { "epoch": 1.89, "learning_rate": 1.1433661472569832e-05, "loss": 1.5341, "step": 2092 }, { "epoch": 1.89, "learning_rate": 1.1419205559437998e-05, "loss": 1.5827, "step": 2094 }, { "epoch": 1.89, "learning_rate": 1.1404746618945588e-05, "loss": 1.576, "step": 2096 }, { "epoch": 1.89, "learning_rate": 1.1390284681935509e-05, "loss": 1.5971, "step": 2098 }, { "epoch": 1.89, "learning_rate": 1.1375819779257058e-05, "loss": 1.5065, "step": 2100 }, { "epoch": 1.9, "learning_rate": 1.1361351941765866e-05, "loss": 1.6612, "step": 2102 }, { "epoch": 1.9, "learning_rate": 1.1346881200323817e-05, "loss": 1.5207, "step": 2104 }, { "epoch": 1.9, "learning_rate": 1.1332407585798992e-05, "loss": 1.6724, "step": 2106 }, { "epoch": 1.9, "learning_rate": 1.1317931129065602e-05, "loss": 1.4631, "step": 2108 }, { "epoch": 1.9, "learning_rate": 1.1303451861003918e-05, "loss": 1.5509, "step": 2110 }, { "epoch": 1.9, "learning_rate": 1.1288969812500209e-05, "loss": 1.5472, "step": 2112 }, { "epoch": 1.91, "learning_rate": 1.1274485014446675e-05, "loss": 1.6121, "step": 2114 }, { "epoch": 1.91, "learning_rate": 1.1259997497741385e-05, "loss": 1.5248, "step": 2116 }, { "epoch": 1.91, "learning_rate": 1.1245507293288204e-05, "loss": 1.5002, "step": 2118 }, { "epoch": 1.91, "learning_rate": 1.1231014431996727e-05, "loss": 1.6081, "step": 2120 }, { "epoch": 1.91, "learning_rate": 1.1216518944782219e-05, "loss": 1.5393, "step": 2122 }, { "epoch": 1.92, "learning_rate": 1.1202020862565555e-05, "loss": 1.576, "step": 2124 }, { "epoch": 1.92, "learning_rate": 1.1187520216273126e-05, "loss": 1.5501, "step": 2126 }, { "epoch": 1.92, "learning_rate": 1.117301703683681e-05, "loss": 1.5741, "step": 2128 }, { "epoch": 1.92, "learning_rate": 1.1158511355193888e-05, "loss": 1.5463, "step": 2130 }, { "epoch": 1.92, "learning_rate": 1.1144003202286964e-05, "loss": 1.6043, "step": 2132 }, { "epoch": 1.92, "learning_rate": 1.1129492609063927e-05, "loss": 1.5227, "step": 2134 }, { "epoch": 1.93, "learning_rate": 1.1114979606477867e-05, "loss": 1.572, "step": 2136 }, { "epoch": 1.93, "learning_rate": 1.1100464225487013e-05, "loss": 1.5958, "step": 2138 }, { "epoch": 1.93, "learning_rate": 1.1085946497054674e-05, "loss": 1.5177, "step": 2140 }, { "epoch": 1.93, "learning_rate": 1.1071426452149152e-05, "loss": 1.5828, "step": 2142 }, { "epoch": 1.93, "learning_rate": 1.1056904121743702e-05, "loss": 1.5872, "step": 2144 }, { "epoch": 1.94, "learning_rate": 1.1042379536816457e-05, "loss": 1.6537, "step": 2146 }, { "epoch": 1.94, "learning_rate": 1.1027852728350343e-05, "loss": 1.5949, "step": 2148 }, { "epoch": 1.94, "learning_rate": 1.101332372733305e-05, "loss": 1.6051, "step": 2150 }, { "epoch": 1.94, "learning_rate": 1.0998792564756927e-05, "loss": 1.5928, "step": 2152 }, { "epoch": 1.94, "learning_rate": 1.0984259271618947e-05, "loss": 1.5785, "step": 2154 }, { "epoch": 1.94, "learning_rate": 1.096972387892062e-05, "loss": 1.5114, "step": 2156 }, { "epoch": 1.95, "learning_rate": 1.0955186417667936e-05, "loss": 1.5796, "step": 2158 }, { "epoch": 1.95, "learning_rate": 1.09406469188713e-05, "loss": 1.6061, "step": 2160 }, { "epoch": 1.95, "learning_rate": 1.0926105413545463e-05, "loss": 1.5535, "step": 2162 }, { "epoch": 1.95, "learning_rate": 1.0911561932709453e-05, "loss": 1.5657, "step": 2164 }, { "epoch": 1.95, "learning_rate": 1.0897016507386513e-05, "loss": 1.5686, "step": 2166 }, { "epoch": 1.95, "learning_rate": 1.0882469168604042e-05, "loss": 1.5352, "step": 2168 }, { "epoch": 1.96, "learning_rate": 1.0867919947393504e-05, "loss": 1.5629, "step": 2170 }, { "epoch": 1.96, "learning_rate": 1.0853368874790392e-05, "loss": 1.5478, "step": 2172 }, { "epoch": 1.96, "learning_rate": 1.083881598183415e-05, "loss": 1.6021, "step": 2174 }, { "epoch": 1.96, "learning_rate": 1.082426129956809e-05, "loss": 1.5736, "step": 2176 }, { "epoch": 1.96, "learning_rate": 1.0809704859039357e-05, "loss": 1.5961, "step": 2178 }, { "epoch": 1.97, "learning_rate": 1.0795146691298835e-05, "loss": 1.5399, "step": 2180 }, { "epoch": 1.97, "learning_rate": 1.07805868274011e-05, "loss": 1.5734, "step": 2182 }, { "epoch": 1.97, "learning_rate": 1.0766025298404346e-05, "loss": 1.5648, "step": 2184 }, { "epoch": 1.97, "learning_rate": 1.0751462135370313e-05, "loss": 1.5733, "step": 2186 }, { "epoch": 1.97, "learning_rate": 1.0736897369364232e-05, "loss": 1.6152, "step": 2188 }, { "epoch": 1.97, "learning_rate": 1.0722331031454749e-05, "loss": 1.6522, "step": 2190 }, { "epoch": 1.98, "learning_rate": 1.0707763152713869e-05, "loss": 1.5796, "step": 2192 }, { "epoch": 1.98, "learning_rate": 1.0693193764216875e-05, "loss": 1.4848, "step": 2194 }, { "epoch": 1.98, "learning_rate": 1.0678622897042279e-05, "loss": 1.4827, "step": 2196 }, { "epoch": 1.98, "learning_rate": 1.0664050582271744e-05, "loss": 1.5397, "step": 2198 }, { "epoch": 1.98, "learning_rate": 1.0649476850990019e-05, "loss": 1.6373, "step": 2200 }, { "epoch": 1.99, "learning_rate": 1.063490173428488e-05, "loss": 1.5799, "step": 2202 }, { "epoch": 1.99, "learning_rate": 1.062032526324705e-05, "loss": 1.5911, "step": 2204 }, { "epoch": 1.99, "learning_rate": 1.0605747468970148e-05, "loss": 1.6362, "step": 2206 }, { "epoch": 1.99, "learning_rate": 1.0591168382550617e-05, "loss": 1.5298, "step": 2208 }, { "epoch": 1.99, "learning_rate": 1.0576588035087647e-05, "loss": 1.5764, "step": 2210 }, { "epoch": 1.99, "learning_rate": 1.0562006457683128e-05, "loss": 1.5423, "step": 2212 }, { "epoch": 2.0, "learning_rate": 1.0547423681441567e-05, "loss": 1.5802, "step": 2214 }, { "epoch": 2.0, "learning_rate": 1.053283973747003e-05, "loss": 1.5937, "step": 2216 }, { "epoch": 2.0, "learning_rate": 1.0518254656878075e-05, "loss": 1.5666, "step": 2218 }, { "epoch": 2.0, "learning_rate": 1.050366847077769e-05, "loss": 1.5285, "step": 2220 }, { "epoch": 2.0, "learning_rate": 1.0489081210283206e-05, "loss": 1.4526, "step": 2222 }, { "epoch": 2.01, "learning_rate": 1.0474492906511258e-05, "loss": 1.5371, "step": 2224 }, { "epoch": 2.01, "learning_rate": 1.0459903590580706e-05, "loss": 1.5546, "step": 2226 }, { "epoch": 2.01, "learning_rate": 1.0445313293612567e-05, "loss": 1.5165, "step": 2228 }, { "epoch": 2.01, "learning_rate": 1.043072204672995e-05, "loss": 1.5631, "step": 2230 }, { "epoch": 2.01, "learning_rate": 1.0416129881057987e-05, "loss": 1.5341, "step": 2232 }, { "epoch": 2.01, "learning_rate": 1.0401536827723778e-05, "loss": 1.5122, "step": 2234 }, { "epoch": 2.02, "learning_rate": 1.0386942917856312e-05, "loss": 1.5344, "step": 2236 }, { "epoch": 2.02, "learning_rate": 1.03723481825864e-05, "loss": 1.5843, "step": 2238 }, { "epoch": 2.02, "learning_rate": 1.0357752653046625e-05, "loss": 1.5024, "step": 2240 }, { "epoch": 2.02, "learning_rate": 1.0343156360371256e-05, "loss": 1.4637, "step": 2242 }, { "epoch": 2.02, "learning_rate": 1.0328559335696188e-05, "loss": 1.5855, "step": 2244 }, { "epoch": 2.03, "learning_rate": 1.0313961610158887e-05, "loss": 1.5466, "step": 2246 }, { "epoch": 2.03, "learning_rate": 1.0299363214898305e-05, "loss": 1.5501, "step": 2248 }, { "epoch": 2.03, "learning_rate": 1.028476418105483e-05, "loss": 1.5245, "step": 2250 }, { "epoch": 2.03, "learning_rate": 1.0270164539770206e-05, "loss": 1.4956, "step": 2252 }, { "epoch": 2.03, "learning_rate": 1.0255564322187476e-05, "loss": 1.524, "step": 2254 }, { "epoch": 2.03, "learning_rate": 1.0240963559450909e-05, "loss": 1.5229, "step": 2256 }, { "epoch": 2.04, "learning_rate": 1.0226362282705942e-05, "loss": 1.4596, "step": 2258 }, { "epoch": 2.04, "learning_rate": 1.0211760523099107e-05, "loss": 1.5043, "step": 2260 }, { "epoch": 2.04, "learning_rate": 1.0197158311777957e-05, "loss": 1.5721, "step": 2262 }, { "epoch": 2.04, "learning_rate": 1.0182555679891026e-05, "loss": 1.4634, "step": 2264 }, { "epoch": 2.04, "learning_rate": 1.0167952658587729e-05, "loss": 1.5929, "step": 2266 }, { "epoch": 2.05, "learning_rate": 1.015334927901832e-05, "loss": 1.5438, "step": 2268 }, { "epoch": 2.05, "learning_rate": 1.0138745572333816e-05, "loss": 1.5338, "step": 2270 }, { "epoch": 2.05, "learning_rate": 1.0124141569685933e-05, "loss": 1.5582, "step": 2272 }, { "epoch": 2.05, "learning_rate": 1.0109537302227012e-05, "loss": 1.558, "step": 2274 }, { "epoch": 2.05, "learning_rate": 1.0094932801109968e-05, "loss": 1.505, "step": 2276 }, { "epoch": 2.05, "learning_rate": 1.0080328097488206e-05, "loss": 1.5063, "step": 2278 }, { "epoch": 2.06, "learning_rate": 1.0065723222515566e-05, "loss": 1.5605, "step": 2280 }, { "epoch": 2.06, "learning_rate": 1.0051118207346263e-05, "loss": 1.5184, "step": 2282 }, { "epoch": 2.06, "learning_rate": 1.003651308313479e-05, "loss": 1.5838, "step": 2284 }, { "epoch": 2.06, "learning_rate": 1.0021907881035891e-05, "loss": 1.5031, "step": 2286 }, { "epoch": 2.06, "learning_rate": 1.0007302632204472e-05, "loss": 1.5279, "step": 2288 }, { "epoch": 2.06, "learning_rate": 9.992697367795531e-06, "loss": 1.5222, "step": 2290 }, { "epoch": 2.07, "learning_rate": 9.97809211896411e-06, "loss": 1.4966, "step": 2292 }, { "epoch": 2.07, "learning_rate": 9.963486916865212e-06, "loss": 1.4892, "step": 2294 }, { "epoch": 2.07, "learning_rate": 9.948881792653744e-06, "loss": 1.48, "step": 2296 }, { "epoch": 2.07, "learning_rate": 9.934276777484436e-06, "loss": 1.4611, "step": 2298 }, { "epoch": 2.07, "learning_rate": 9.919671902511798e-06, "loss": 1.5598, "step": 2300 }, { "epoch": 2.08, "learning_rate": 9.905067198890035e-06, "loss": 1.4727, "step": 2302 }, { "epoch": 2.08, "learning_rate": 9.89046269777299e-06, "loss": 1.5649, "step": 2304 }, { "epoch": 2.08, "learning_rate": 9.875858430314072e-06, "loss": 1.4784, "step": 2306 }, { "epoch": 2.08, "learning_rate": 9.861254427666187e-06, "loss": 1.5344, "step": 2308 }, { "epoch": 2.08, "learning_rate": 9.846650720981682e-06, "loss": 1.5672, "step": 2310 }, { "epoch": 2.08, "learning_rate": 9.832047341412273e-06, "loss": 1.4945, "step": 2312 }, { "epoch": 2.09, "learning_rate": 9.817444320108976e-06, "loss": 1.5149, "step": 2314 }, { "epoch": 2.09, "learning_rate": 9.802841688222043e-06, "loss": 1.4418, "step": 2316 }, { "epoch": 2.09, "learning_rate": 9.788239476900899e-06, "loss": 1.5727, "step": 2318 }, { "epoch": 2.09, "learning_rate": 9.773637717294061e-06, "loss": 1.4015, "step": 2320 }, { "epoch": 2.09, "learning_rate": 9.759036440549093e-06, "loss": 1.5113, "step": 2322 }, { "epoch": 2.1, "learning_rate": 9.744435677812526e-06, "loss": 1.5221, "step": 2324 }, { "epoch": 2.1, "learning_rate": 9.729835460229796e-06, "loss": 1.5461, "step": 2326 }, { "epoch": 2.1, "learning_rate": 9.715235818945171e-06, "loss": 1.4714, "step": 2328 }, { "epoch": 2.1, "learning_rate": 9.700636785101696e-06, "loss": 1.5814, "step": 2330 }, { "epoch": 2.1, "learning_rate": 9.686038389841116e-06, "loss": 1.5544, "step": 2332 }, { "epoch": 2.1, "learning_rate": 9.671440664303813e-06, "loss": 1.5014, "step": 2334 }, { "epoch": 2.11, "learning_rate": 9.656843639628748e-06, "loss": 1.5115, "step": 2336 }, { "epoch": 2.11, "learning_rate": 9.642247346953376e-06, "loss": 1.5332, "step": 2338 }, { "epoch": 2.11, "learning_rate": 9.627651817413605e-06, "loss": 1.4812, "step": 2340 }, { "epoch": 2.11, "learning_rate": 9.613057082143694e-06, "loss": 1.4951, "step": 2342 }, { "epoch": 2.11, "learning_rate": 9.598463172276224e-06, "loss": 1.4513, "step": 2344 }, { "epoch": 2.12, "learning_rate": 9.583870118942014e-06, "loss": 1.5211, "step": 2346 }, { "epoch": 2.12, "learning_rate": 9.576573923202763e-06, "loss": 1.5283, "step": 2348 }, { "epoch": 2.12, "learning_rate": 9.561982213034707e-06, "loss": 1.5975, "step": 2350 }, { "epoch": 2.12, "learning_rate": 9.547391437218815e-06, "loss": 1.4903, "step": 2352 }, { "epoch": 2.12, "learning_rate": 9.532801626879209e-06, "loss": 1.537, "step": 2354 }, { "epoch": 2.12, "learning_rate": 9.518212813137966e-06, "loss": 1.4725, "step": 2356 }, { "epoch": 2.13, "learning_rate": 9.50362502711503e-06, "loss": 1.4511, "step": 2358 }, { "epoch": 2.13, "learning_rate": 9.489038299928157e-06, "loss": 1.5214, "step": 2360 }, { "epoch": 2.13, "learning_rate": 9.47445266269284e-06, "loss": 1.5027, "step": 2362 }, { "epoch": 2.13, "learning_rate": 9.45986814652225e-06, "loss": 1.5368, "step": 2364 }, { "epoch": 2.13, "learning_rate": 9.445284782527157e-06, "loss": 1.5637, "step": 2366 }, { "epoch": 2.14, "learning_rate": 9.430702601815892e-06, "loss": 1.4993, "step": 2368 }, { "epoch": 2.14, "learning_rate": 9.416121635494245e-06, "loss": 1.5066, "step": 2370 }, { "epoch": 2.14, "learning_rate": 9.401541914665425e-06, "loss": 1.4701, "step": 2372 }, { "epoch": 2.14, "learning_rate": 9.38696347042998e-06, "loss": 1.546, "step": 2374 }, { "epoch": 2.14, "learning_rate": 9.372386333885738e-06, "loss": 1.5266, "step": 2376 }, { "epoch": 2.14, "learning_rate": 9.35781053612773e-06, "loss": 1.5479, "step": 2378 }, { "epoch": 2.15, "learning_rate": 9.34323610824814e-06, "loss": 1.596, "step": 2380 }, { "epoch": 2.15, "learning_rate": 9.328663081336225e-06, "loss": 1.5529, "step": 2382 }, { "epoch": 2.15, "learning_rate": 9.314091486478255e-06, "loss": 1.4637, "step": 2384 }, { "epoch": 2.15, "learning_rate": 9.29952135475745e-06, "loss": 1.5238, "step": 2386 }, { "epoch": 2.15, "learning_rate": 9.284952717253898e-06, "loss": 1.5229, "step": 2388 }, { "epoch": 2.16, "learning_rate": 9.270385605044509e-06, "loss": 1.5199, "step": 2390 }, { "epoch": 2.16, "learning_rate": 9.255820049202933e-06, "loss": 1.5131, "step": 2392 }, { "epoch": 2.16, "learning_rate": 9.241256080799507e-06, "loss": 1.5487, "step": 2394 }, { "epoch": 2.16, "learning_rate": 9.226693730901174e-06, "loss": 1.6053, "step": 2396 }, { "epoch": 2.16, "learning_rate": 9.212133030571438e-06, "loss": 1.5352, "step": 2398 }, { "epoch": 2.16, "learning_rate": 9.19757401087026e-06, "loss": 1.5733, "step": 2400 }, { "epoch": 2.17, "learning_rate": 9.18301670285404e-06, "loss": 1.4881, "step": 2402 }, { "epoch": 2.17, "learning_rate": 9.168461137575515e-06, "loss": 1.4606, "step": 2404 }, { "epoch": 2.17, "learning_rate": 9.153907346083706e-06, "loss": 1.4393, "step": 2406 }, { "epoch": 2.17, "learning_rate": 9.139355359423855e-06, "loss": 1.5519, "step": 2408 }, { "epoch": 2.17, "learning_rate": 9.124805208637349e-06, "loss": 1.4962, "step": 2410 }, { "epoch": 2.17, "learning_rate": 9.110256924761655e-06, "loss": 1.4699, "step": 2412 }, { "epoch": 2.18, "learning_rate": 9.102983492613489e-06, "loss": 1.4995, "step": 2414 }, { "epoch": 2.18, "learning_rate": 9.08843806729055e-06, "loss": 1.5511, "step": 2416 }, { "epoch": 2.18, "learning_rate": 9.073894586454538e-06, "loss": 1.6146, "step": 2418 }, { "epoch": 2.18, "learning_rate": 9.059353081128702e-06, "loss": 1.443, "step": 2420 }, { "epoch": 2.18, "learning_rate": 9.044813582332067e-06, "loss": 1.4808, "step": 2422 }, { "epoch": 2.19, "learning_rate": 9.030276121079384e-06, "loss": 1.5527, "step": 2424 }, { "epoch": 2.19, "learning_rate": 9.015740728381055e-06, "loss": 1.5851, "step": 2426 }, { "epoch": 2.19, "learning_rate": 9.001207435243074e-06, "loss": 1.4776, "step": 2428 }, { "epoch": 2.19, "learning_rate": 8.986676272666952e-06, "loss": 1.4741, "step": 2430 }, { "epoch": 2.19, "learning_rate": 8.972147271649662e-06, "loss": 1.5235, "step": 2432 }, { "epoch": 2.19, "learning_rate": 8.95762046318355e-06, "loss": 1.5066, "step": 2434 }, { "epoch": 2.2, "learning_rate": 8.9430958782563e-06, "loss": 1.4489, "step": 2436 }, { "epoch": 2.2, "learning_rate": 8.928573547850852e-06, "loss": 1.4724, "step": 2438 }, { "epoch": 2.2, "learning_rate": 8.91405350294533e-06, "loss": 1.5894, "step": 2440 }, { "epoch": 2.2, "learning_rate": 8.899535774512986e-06, "loss": 1.4172, "step": 2442 }, { "epoch": 2.2, "learning_rate": 8.885020393522136e-06, "loss": 1.4497, "step": 2444 }, { "epoch": 2.21, "learning_rate": 8.870507390936076e-06, "loss": 1.5584, "step": 2446 }, { "epoch": 2.21, "learning_rate": 8.85599679771304e-06, "loss": 1.5338, "step": 2448 }, { "epoch": 2.21, "learning_rate": 8.841488644806115e-06, "loss": 1.5138, "step": 2450 }, { "epoch": 2.21, "learning_rate": 8.826982963163188e-06, "loss": 1.529, "step": 2452 }, { "epoch": 2.21, "learning_rate": 8.812479783726874e-06, "loss": 1.4863, "step": 2454 }, { "epoch": 2.21, "learning_rate": 8.797979137434452e-06, "loss": 1.5418, "step": 2456 }, { "epoch": 2.22, "learning_rate": 8.783481055217783e-06, "loss": 1.5622, "step": 2458 }, { "epoch": 2.22, "learning_rate": 8.768985568003277e-06, "loss": 1.5281, "step": 2460 }, { "epoch": 2.22, "learning_rate": 8.754492706711798e-06, "loss": 1.5676, "step": 2462 }, { "epoch": 2.22, "learning_rate": 8.740002502258616e-06, "loss": 1.4468, "step": 2464 }, { "epoch": 2.22, "learning_rate": 8.725514985553327e-06, "loss": 1.5271, "step": 2466 }, { "epoch": 2.23, "learning_rate": 8.711030187499795e-06, "loss": 1.5094, "step": 2468 }, { "epoch": 2.23, "learning_rate": 8.696548138996086e-06, "loss": 1.5152, "step": 2470 }, { "epoch": 2.23, "learning_rate": 8.682068870934401e-06, "loss": 1.4206, "step": 2472 }, { "epoch": 2.23, "learning_rate": 8.66759241420101e-06, "loss": 1.4556, "step": 2474 }, { "epoch": 2.23, "learning_rate": 8.653118799676183e-06, "loss": 1.4849, "step": 2476 }, { "epoch": 2.23, "learning_rate": 8.638648058234139e-06, "loss": 1.4729, "step": 2478 }, { "epoch": 2.24, "learning_rate": 8.624180220742945e-06, "loss": 1.4827, "step": 2480 }, { "epoch": 2.24, "learning_rate": 8.609715318064496e-06, "loss": 1.5816, "step": 2482 }, { "epoch": 2.24, "learning_rate": 8.595253381054414e-06, "loss": 1.4841, "step": 2484 }, { "epoch": 2.24, "learning_rate": 8.580794440562003e-06, "loss": 1.4788, "step": 2486 }, { "epoch": 2.24, "learning_rate": 8.566338527430168e-06, "loss": 1.4628, "step": 2488 }, { "epoch": 2.25, "learning_rate": 8.551885672495363e-06, "loss": 1.5261, "step": 2490 }, { "epoch": 2.25, "learning_rate": 8.53743590658751e-06, "loss": 1.5421, "step": 2492 }, { "epoch": 2.25, "learning_rate": 8.522989260529948e-06, "loss": 1.5478, "step": 2494 }, { "epoch": 2.25, "learning_rate": 8.508545765139359e-06, "loss": 1.5603, "step": 2496 }, { "epoch": 2.25, "learning_rate": 8.494105451225706e-06, "loss": 1.4666, "step": 2498 }, { "epoch": 2.25, "learning_rate": 8.479668349592165e-06, "loss": 1.5813, "step": 2500 }, { "epoch": 2.26, "learning_rate": 8.465234491035053e-06, "loss": 1.5637, "step": 2502 }, { "epoch": 2.26, "learning_rate": 8.45080390634378e-06, "loss": 1.5104, "step": 2504 }, { "epoch": 2.26, "learning_rate": 8.436376626300764e-06, "loss": 1.553, "step": 2506 }, { "epoch": 2.26, "learning_rate": 8.421952681681378e-06, "loss": 1.5615, "step": 2508 }, { "epoch": 2.26, "learning_rate": 8.407532103253878e-06, "loss": 1.5202, "step": 2510 }, { "epoch": 2.27, "learning_rate": 8.393114921779345e-06, "loss": 1.5754, "step": 2512 }, { "epoch": 2.27, "learning_rate": 8.378701168011603e-06, "loss": 1.5279, "step": 2514 }, { "epoch": 2.27, "learning_rate": 8.364290872697175e-06, "loss": 1.491, "step": 2516 }, { "epoch": 2.27, "learning_rate": 8.349884066575198e-06, "loss": 1.5686, "step": 2518 }, { "epoch": 2.27, "learning_rate": 8.335480780377374e-06, "loss": 1.4939, "step": 2520 }, { "epoch": 2.27, "learning_rate": 8.321081044827894e-06, "loss": 1.4625, "step": 2522 }, { "epoch": 2.28, "learning_rate": 8.306684890643368e-06, "loss": 1.4761, "step": 2524 }, { "epoch": 2.28, "learning_rate": 8.292292348532774e-06, "loss": 1.5043, "step": 2526 }, { "epoch": 2.28, "learning_rate": 8.277903449197383e-06, "loss": 1.4946, "step": 2528 }, { "epoch": 2.28, "learning_rate": 8.263518223330698e-06, "loss": 1.5154, "step": 2530 }, { "epoch": 2.28, "learning_rate": 8.24913670161838e-06, "loss": 1.4999, "step": 2532 }, { "epoch": 2.28, "learning_rate": 8.2347589147382e-06, "loss": 1.5577, "step": 2534 }, { "epoch": 2.29, "learning_rate": 8.220384893359944e-06, "loss": 1.5162, "step": 2536 }, { "epoch": 2.29, "learning_rate": 8.20601466814538e-06, "loss": 1.5193, "step": 2538 }, { "epoch": 2.29, "learning_rate": 8.191648269748173e-06, "loss": 1.5696, "step": 2540 }, { "epoch": 2.29, "learning_rate": 8.177285728813829e-06, "loss": 1.5841, "step": 2542 }, { "epoch": 2.29, "learning_rate": 8.16292707597962e-06, "loss": 1.5085, "step": 2544 }, { "epoch": 2.3, "learning_rate": 8.14857234187453e-06, "loss": 1.5908, "step": 2546 }, { "epoch": 2.3, "learning_rate": 8.134221557119175e-06, "loss": 1.5277, "step": 2548 }, { "epoch": 2.3, "learning_rate": 8.119874752325753e-06, "loss": 1.452, "step": 2550 }, { "epoch": 2.3, "learning_rate": 8.105531958097973e-06, "loss": 1.5027, "step": 2552 }, { "epoch": 2.3, "learning_rate": 8.091193205030987e-06, "loss": 1.5185, "step": 2554 }, { "epoch": 2.3, "learning_rate": 8.076858523711327e-06, "loss": 1.4928, "step": 2556 }, { "epoch": 2.31, "learning_rate": 8.062527944716837e-06, "loss": 1.5301, "step": 2558 }, { "epoch": 2.31, "learning_rate": 8.048201498616613e-06, "loss": 1.577, "step": 2560 }, { "epoch": 2.31, "learning_rate": 8.033879215970935e-06, "loss": 1.4957, "step": 2562 }, { "epoch": 2.31, "learning_rate": 8.019561127331202e-06, "loss": 1.4609, "step": 2564 }, { "epoch": 2.31, "learning_rate": 8.005247263239862e-06, "loss": 1.6149, "step": 2566 }, { "epoch": 2.32, "learning_rate": 7.990937654230362e-06, "loss": 1.5086, "step": 2568 }, { "epoch": 2.32, "learning_rate": 7.976632330827056e-06, "loss": 1.4795, "step": 2570 }, { "epoch": 2.32, "learning_rate": 7.962331323545171e-06, "loss": 1.5367, "step": 2572 }, { "epoch": 2.32, "learning_rate": 7.94803466289072e-06, "loss": 1.4965, "step": 2574 }, { "epoch": 2.32, "learning_rate": 7.933742379360446e-06, "loss": 1.5235, "step": 2576 }, { "epoch": 2.32, "learning_rate": 7.919454503441757e-06, "loss": 1.4606, "step": 2578 }, { "epoch": 2.33, "learning_rate": 7.905171065612655e-06, "loss": 1.569, "step": 2580 }, { "epoch": 2.33, "learning_rate": 7.890892096341677e-06, "loss": 1.5586, "step": 2582 }, { "epoch": 2.33, "learning_rate": 7.876617626087825e-06, "loss": 1.516, "step": 2584 }, { "epoch": 2.33, "learning_rate": 7.862347685300511e-06, "loss": 1.4995, "step": 2586 }, { "epoch": 2.33, "learning_rate": 7.848082304419478e-06, "loss": 1.5585, "step": 2588 }, { "epoch": 2.34, "learning_rate": 7.833821513874747e-06, "loss": 1.534, "step": 2590 }, { "epoch": 2.34, "learning_rate": 7.819565344086541e-06, "loss": 1.5516, "step": 2592 }, { "epoch": 2.34, "learning_rate": 7.805313825465232e-06, "loss": 1.5351, "step": 2594 }, { "epoch": 2.34, "learning_rate": 7.791066988411268e-06, "loss": 1.565, "step": 2596 }, { "epoch": 2.34, "learning_rate": 7.776824863315111e-06, "loss": 1.5207, "step": 2598 }, { "epoch": 2.34, "learning_rate": 7.762587480557175e-06, "loss": 1.4396, "step": 2600 }, { "epoch": 2.35, "learning_rate": 7.748354870507753e-06, "loss": 1.449, "step": 2602 }, { "epoch": 2.35, "learning_rate": 7.734127063526958e-06, "loss": 1.4943, "step": 2604 }, { "epoch": 2.35, "learning_rate": 7.719904089964658e-06, "loss": 1.5601, "step": 2606 }, { "epoch": 2.35, "learning_rate": 7.705685980160413e-06, "loss": 1.5115, "step": 2608 }, { "epoch": 2.35, "learning_rate": 7.691472764443404e-06, "loss": 1.5607, "step": 2610 }, { "epoch": 2.36, "learning_rate": 7.67726447313238e-06, "loss": 1.4841, "step": 2612 }, { "epoch": 2.36, "learning_rate": 7.66306113653557e-06, "loss": 1.4853, "step": 2614 }, { "epoch": 2.36, "learning_rate": 7.64886278495065e-06, "loss": 1.5695, "step": 2616 }, { "epoch": 2.36, "learning_rate": 7.634669448664655e-06, "loss": 1.5634, "step": 2618 }, { "epoch": 2.36, "learning_rate": 7.62048115795392e-06, "loss": 1.5444, "step": 2620 }, { "epoch": 2.36, "learning_rate": 7.606297943084024e-06, "loss": 1.5306, "step": 2622 }, { "epoch": 2.37, "learning_rate": 7.5921198343097145e-06, "loss": 1.5062, "step": 2624 }, { "epoch": 2.37, "learning_rate": 7.577946861874843e-06, "loss": 1.562, "step": 2626 }, { "epoch": 2.37, "learning_rate": 7.563779056012309e-06, "loss": 1.4988, "step": 2628 }, { "epoch": 2.37, "learning_rate": 7.549616446943992e-06, "loss": 1.437, "step": 2630 }, { "epoch": 2.37, "learning_rate": 7.535459064880684e-06, "loss": 1.4982, "step": 2632 }, { "epoch": 2.38, "learning_rate": 7.521306940022025e-06, "loss": 1.5545, "step": 2634 }, { "epoch": 2.38, "learning_rate": 7.507160102556451e-06, "loss": 1.5476, "step": 2636 }, { "epoch": 2.38, "learning_rate": 7.4930185826611e-06, "loss": 1.4544, "step": 2638 }, { "epoch": 2.38, "learning_rate": 7.478882410501784e-06, "loss": 1.5477, "step": 2640 }, { "epoch": 2.38, "learning_rate": 7.464751616232902e-06, "loss": 1.5119, "step": 2642 }, { "epoch": 2.38, "learning_rate": 7.4506262299973775e-06, "loss": 1.5679, "step": 2644 }, { "epoch": 2.39, "learning_rate": 7.4365062819266076e-06, "loss": 1.5516, "step": 2646 }, { "epoch": 2.39, "learning_rate": 7.422391802140376e-06, "loss": 1.5458, "step": 2648 }, { "epoch": 2.39, "learning_rate": 7.408282820746813e-06, "loss": 1.6268, "step": 2650 }, { "epoch": 2.39, "learning_rate": 7.394179367842314e-06, "loss": 1.5683, "step": 2652 }, { "epoch": 2.39, "learning_rate": 7.380081473511484e-06, "loss": 1.5416, "step": 2654 }, { "epoch": 2.39, "learning_rate": 7.365989167827068e-06, "loss": 1.5423, "step": 2656 }, { "epoch": 2.4, "learning_rate": 7.351902480849899e-06, "loss": 1.5289, "step": 2658 }, { "epoch": 2.4, "learning_rate": 7.337821442628805e-06, "loss": 1.4968, "step": 2660 }, { "epoch": 2.4, "learning_rate": 7.323746083200583e-06, "loss": 1.5061, "step": 2662 }, { "epoch": 2.4, "learning_rate": 7.309676432589908e-06, "loss": 1.5273, "step": 2664 }, { "epoch": 2.4, "learning_rate": 7.295612520809281e-06, "loss": 1.5042, "step": 2666 }, { "epoch": 2.41, "learning_rate": 7.281554377858959e-06, "loss": 1.4732, "step": 2668 }, { "epoch": 2.41, "learning_rate": 7.267502033726895e-06, "loss": 1.5093, "step": 2670 }, { "epoch": 2.41, "learning_rate": 7.253455518388668e-06, "loss": 1.5265, "step": 2672 }, { "epoch": 2.41, "learning_rate": 7.239414861807427e-06, "loss": 1.5804, "step": 2674 }, { "epoch": 2.41, "learning_rate": 7.2253800939338225e-06, "loss": 1.5111, "step": 2676 }, { "epoch": 2.41, "learning_rate": 7.211351244705947e-06, "loss": 1.5801, "step": 2678 }, { "epoch": 2.42, "learning_rate": 7.197328344049266e-06, "loss": 1.4756, "step": 2680 }, { "epoch": 2.42, "learning_rate": 7.183311421876546e-06, "loss": 1.5308, "step": 2682 }, { "epoch": 2.42, "learning_rate": 7.169300508087815e-06, "loss": 1.5217, "step": 2684 }, { "epoch": 2.42, "learning_rate": 7.155295632570279e-06, "loss": 1.5401, "step": 2686 }, { "epoch": 2.42, "learning_rate": 7.14129682519826e-06, "loss": 1.4986, "step": 2688 }, { "epoch": 2.43, "learning_rate": 7.127304115833141e-06, "loss": 1.5343, "step": 2690 }, { "epoch": 2.43, "learning_rate": 7.113317534323296e-06, "loss": 1.5726, "step": 2692 }, { "epoch": 2.43, "learning_rate": 7.099337110504023e-06, "loss": 1.4744, "step": 2694 }, { "epoch": 2.43, "learning_rate": 7.08536287419749e-06, "loss": 1.4813, "step": 2696 }, { "epoch": 2.43, "learning_rate": 7.071394855212663e-06, "loss": 1.5123, "step": 2698 }, { "epoch": 2.43, "learning_rate": 7.0574330833452475e-06, "loss": 1.5364, "step": 2700 }, { "epoch": 2.44, "learning_rate": 7.043477588377623e-06, "loss": 1.5404, "step": 2702 }, { "epoch": 2.44, "learning_rate": 7.029528400078784e-06, "loss": 1.5873, "step": 2704 }, { "epoch": 2.44, "learning_rate": 7.015585548204256e-06, "loss": 1.5007, "step": 2706 }, { "epoch": 2.44, "learning_rate": 7.001649062496065e-06, "loss": 1.585, "step": 2708 }, { "epoch": 2.44, "learning_rate": 6.987718972682651e-06, "loss": 1.5347, "step": 2710 }, { "epoch": 2.45, "learning_rate": 6.97379530847881e-06, "loss": 1.5023, "step": 2712 }, { "epoch": 2.45, "learning_rate": 6.959878099585634e-06, "loss": 1.5362, "step": 2714 }, { "epoch": 2.45, "learning_rate": 6.94596737569044e-06, "loss": 1.5489, "step": 2716 }, { "epoch": 2.45, "learning_rate": 6.932063166466716e-06, "loss": 1.4816, "step": 2718 }, { "epoch": 2.45, "learning_rate": 6.918165501574051e-06, "loss": 1.513, "step": 2720 }, { "epoch": 2.45, "learning_rate": 6.904274410658076e-06, "loss": 1.5079, "step": 2722 }, { "epoch": 2.46, "learning_rate": 6.890389923350398e-06, "loss": 1.5264, "step": 2724 }, { "epoch": 2.46, "learning_rate": 6.876512069268541e-06, "loss": 1.4754, "step": 2726 }, { "epoch": 2.46, "learning_rate": 6.862640878015867e-06, "loss": 1.4453, "step": 2728 }, { "epoch": 2.46, "learning_rate": 6.84877637918154e-06, "loss": 1.5165, "step": 2730 }, { "epoch": 2.46, "learning_rate": 6.834918602340439e-06, "loss": 1.6026, "step": 2732 }, { "epoch": 2.47, "learning_rate": 6.82106757705311e-06, "loss": 1.5246, "step": 2734 }, { "epoch": 2.47, "learning_rate": 6.807223332865696e-06, "loss": 1.5018, "step": 2736 }, { "epoch": 2.47, "learning_rate": 6.793385899309866e-06, "loss": 1.4946, "step": 2738 }, { "epoch": 2.47, "learning_rate": 6.779555305902774e-06, "loss": 1.562, "step": 2740 }, { "epoch": 2.47, "learning_rate": 6.765731582146977e-06, "loss": 1.526, "step": 2742 }, { "epoch": 2.47, "learning_rate": 6.751914757530375e-06, "loss": 1.5335, "step": 2744 }, { "epoch": 2.48, "learning_rate": 6.738104861526158e-06, "loss": 1.5006, "step": 2746 }, { "epoch": 2.48, "learning_rate": 6.7243019235927355e-06, "loss": 1.5107, "step": 2748 }, { "epoch": 2.48, "learning_rate": 6.7105059731736645e-06, "loss": 1.5225, "step": 2750 }, { "epoch": 2.48, "learning_rate": 6.6967170396976064e-06, "loss": 1.5346, "step": 2752 }, { "epoch": 2.48, "learning_rate": 6.682935152578254e-06, "loss": 1.6253, "step": 2754 }, { "epoch": 2.49, "learning_rate": 6.669160341214265e-06, "loss": 1.4834, "step": 2756 }, { "epoch": 2.49, "learning_rate": 6.6553926349892074e-06, "loss": 1.5052, "step": 2758 }, { "epoch": 2.49, "learning_rate": 6.6416320632714936e-06, "loss": 1.6093, "step": 2760 }, { "epoch": 2.49, "learning_rate": 6.627878655414311e-06, "loss": 1.4979, "step": 2762 }, { "epoch": 2.49, "learning_rate": 6.614132440755569e-06, "loss": 1.5197, "step": 2764 }, { "epoch": 2.49, "learning_rate": 6.600393448617838e-06, "loss": 1.5165, "step": 2766 }, { "epoch": 2.5, "learning_rate": 6.586661708308273e-06, "loss": 1.5064, "step": 2768 }, { "epoch": 2.5, "learning_rate": 6.5729372491185695e-06, "loss": 1.5066, "step": 2770 }, { "epoch": 2.5, "learning_rate": 6.559220100324879e-06, "loss": 1.4739, "step": 2772 }, { "epoch": 2.5, "learning_rate": 6.5455102911877665e-06, "loss": 1.4729, "step": 2774 }, { "epoch": 2.5, "learning_rate": 6.531807850952143e-06, "loss": 1.5633, "step": 2776 }, { "epoch": 2.5, "learning_rate": 6.518112808847197e-06, "loss": 1.5471, "step": 2778 }, { "epoch": 2.51, "learning_rate": 6.504425194086334e-06, "loss": 1.4803, "step": 2780 }, { "epoch": 2.51, "learning_rate": 6.490745035867123e-06, "loss": 1.5166, "step": 2782 }, { "epoch": 2.51, "learning_rate": 6.477072363371215e-06, "loss": 1.5522, "step": 2784 }, { "epoch": 2.51, "learning_rate": 6.4634072057643045e-06, "loss": 1.5213, "step": 2786 }, { "epoch": 2.51, "learning_rate": 6.449749592196053e-06, "loss": 1.5297, "step": 2788 }, { "epoch": 2.52, "learning_rate": 6.436099551800023e-06, "loss": 1.4882, "step": 2790 }, { "epoch": 2.52, "learning_rate": 6.422457113693633e-06, "loss": 1.4607, "step": 2792 }, { "epoch": 2.52, "learning_rate": 6.40882230697808e-06, "loss": 1.5366, "step": 2794 }, { "epoch": 2.52, "learning_rate": 6.395195160738273e-06, "loss": 1.5072, "step": 2796 }, { "epoch": 2.52, "learning_rate": 6.381575704042792e-06, "loss": 1.5576, "step": 2798 }, { "epoch": 2.52, "learning_rate": 6.367963965943813e-06, "loss": 1.5283, "step": 2800 }, { "epoch": 2.53, "learning_rate": 6.35435997547704e-06, "loss": 1.4778, "step": 2802 }, { "epoch": 2.53, "learning_rate": 6.340763761661665e-06, "loss": 1.511, "step": 2804 }, { "epoch": 2.53, "learning_rate": 6.327175353500272e-06, "loss": 1.5468, "step": 2806 }, { "epoch": 2.53, "learning_rate": 6.3135947799788065e-06, "loss": 1.5025, "step": 2808 }, { "epoch": 2.53, "learning_rate": 6.3000220700664985e-06, "loss": 1.5675, "step": 2810 }, { "epoch": 2.54, "learning_rate": 6.286457252715805e-06, "loss": 1.5266, "step": 2812 }, { "epoch": 2.54, "learning_rate": 6.272900356862346e-06, "loss": 1.4049, "step": 2814 }, { "epoch": 2.54, "learning_rate": 6.259351411424849e-06, "loss": 1.5185, "step": 2816 }, { "epoch": 2.54, "learning_rate": 6.245810445305068e-06, "loss": 1.5236, "step": 2818 }, { "epoch": 2.54, "learning_rate": 6.232277487387751e-06, "loss": 1.513, "step": 2820 }, { "epoch": 2.54, "learning_rate": 6.218752566540555e-06, "loss": 1.5418, "step": 2822 }, { "epoch": 2.55, "learning_rate": 6.205235711614e-06, "loss": 1.4801, "step": 2824 }, { "epoch": 2.55, "learning_rate": 6.191726951441397e-06, "loss": 1.5086, "step": 2826 }, { "epoch": 2.55, "learning_rate": 6.17822631483878e-06, "loss": 1.503, "step": 2828 }, { "epoch": 2.55, "learning_rate": 6.16473383060487e-06, "loss": 1.477, "step": 2830 }, { "epoch": 2.55, "learning_rate": 6.151249527520988e-06, "loss": 1.5222, "step": 2832 }, { "epoch": 2.56, "learning_rate": 6.137773434351009e-06, "loss": 1.4894, "step": 2834 }, { "epoch": 2.56, "learning_rate": 6.12430557984129e-06, "loss": 1.5005, "step": 2836 }, { "epoch": 2.56, "learning_rate": 6.110845992720622e-06, "loss": 1.5306, "step": 2838 }, { "epoch": 2.56, "learning_rate": 6.097394701700146e-06, "loss": 1.4587, "step": 2840 }, { "epoch": 2.56, "learning_rate": 6.083951735473316e-06, "loss": 1.4603, "step": 2842 }, { "epoch": 2.56, "learning_rate": 6.070517122715833e-06, "loss": 1.5366, "step": 2844 }, { "epoch": 2.57, "learning_rate": 6.057090892085571e-06, "loss": 1.5082, "step": 2846 }, { "epoch": 2.57, "learning_rate": 6.043673072222525e-06, "loss": 1.5352, "step": 2848 }, { "epoch": 2.57, "learning_rate": 6.030263691748754e-06, "loss": 1.5126, "step": 2850 }, { "epoch": 2.57, "learning_rate": 6.016862779268301e-06, "loss": 1.4523, "step": 2852 }, { "epoch": 2.57, "learning_rate": 6.003470363367161e-06, "loss": 1.4748, "step": 2854 }, { "epoch": 2.58, "learning_rate": 5.990086472613195e-06, "loss": 1.5156, "step": 2856 }, { "epoch": 2.58, "learning_rate": 5.976711135556086e-06, "loss": 1.4555, "step": 2858 }, { "epoch": 2.58, "learning_rate": 5.963344380727267e-06, "loss": 1.546, "step": 2860 }, { "epoch": 2.58, "learning_rate": 5.949986236639857e-06, "loss": 1.522, "step": 2862 }, { "epoch": 2.58, "learning_rate": 5.936636731788621e-06, "loss": 1.5375, "step": 2864 }, { "epoch": 2.58, "learning_rate": 5.9232958946498854e-06, "loss": 1.5262, "step": 2866 }, { "epoch": 2.59, "learning_rate": 5.909963753681492e-06, "loss": 1.54, "step": 2868 }, { "epoch": 2.59, "learning_rate": 5.896640337322725e-06, "loss": 1.6126, "step": 2870 }, { "epoch": 2.59, "learning_rate": 5.883325673994274e-06, "loss": 1.515, "step": 2872 }, { "epoch": 2.59, "learning_rate": 5.870019792098134e-06, "loss": 1.4953, "step": 2874 }, { "epoch": 2.59, "learning_rate": 5.8567227200175865e-06, "loss": 1.5431, "step": 2876 }, { "epoch": 2.6, "learning_rate": 5.843434486117115e-06, "loss": 1.5919, "step": 2878 }, { "epoch": 2.6, "learning_rate": 5.830155118742347e-06, "loss": 1.4651, "step": 2880 }, { "epoch": 2.6, "learning_rate": 5.816884646219997e-06, "loss": 1.5157, "step": 2882 }, { "epoch": 2.6, "learning_rate": 5.803623096857819e-06, "loss": 1.5199, "step": 2884 }, { "epoch": 2.6, "learning_rate": 5.790370498944504e-06, "loss": 1.4584, "step": 2886 }, { "epoch": 2.6, "learning_rate": 5.7771268807496794e-06, "loss": 1.4451, "step": 2888 }, { "epoch": 2.61, "learning_rate": 5.763892270523792e-06, "loss": 1.501, "step": 2890 }, { "epoch": 2.61, "learning_rate": 5.750666696498097e-06, "loss": 1.4653, "step": 2892 }, { "epoch": 2.61, "learning_rate": 5.737450186884555e-06, "loss": 1.5312, "step": 2894 }, { "epoch": 2.61, "learning_rate": 5.724242769875794e-06, "loss": 1.5635, "step": 2896 }, { "epoch": 2.61, "learning_rate": 5.711044473645061e-06, "loss": 1.5362, "step": 2898 }, { "epoch": 2.61, "learning_rate": 5.6978553263461265e-06, "loss": 1.519, "step": 2900 }, { "epoch": 2.62, "learning_rate": 5.684675356113263e-06, "loss": 1.5324, "step": 2902 }, { "epoch": 2.62, "learning_rate": 5.6715045910611525e-06, "loss": 1.5318, "step": 2904 }, { "epoch": 2.62, "learning_rate": 5.6583430592848565e-06, "loss": 1.556, "step": 2906 }, { "epoch": 2.62, "learning_rate": 5.645190788859719e-06, "loss": 1.4563, "step": 2908 }, { "epoch": 2.62, "learning_rate": 5.632047807841352e-06, "loss": 1.5101, "step": 2910 }, { "epoch": 2.63, "learning_rate": 5.618914144265532e-06, "loss": 1.4905, "step": 2912 }, { "epoch": 2.63, "learning_rate": 5.605789826148178e-06, "loss": 1.553, "step": 2914 }, { "epoch": 2.63, "learning_rate": 5.592674881485258e-06, "loss": 1.5348, "step": 2916 }, { "epoch": 2.63, "learning_rate": 5.579569338252758e-06, "loss": 1.5323, "step": 2918 }, { "epoch": 2.63, "learning_rate": 5.5664732244066015e-06, "loss": 1.5046, "step": 2920 }, { "epoch": 2.63, "learning_rate": 5.553386567882592e-06, "loss": 1.4862, "step": 2922 }, { "epoch": 2.64, "learning_rate": 5.5403093965963806e-06, "loss": 1.5642, "step": 2924 }, { "epoch": 2.64, "learning_rate": 5.5272417384433605e-06, "loss": 1.5198, "step": 2926 }, { "epoch": 2.64, "learning_rate": 5.514183621298655e-06, "loss": 1.4895, "step": 2928 }, { "epoch": 2.64, "learning_rate": 5.501135073017008e-06, "loss": 1.5053, "step": 2930 }, { "epoch": 2.64, "learning_rate": 5.488096121432778e-06, "loss": 1.4725, "step": 2932 }, { "epoch": 2.65, "learning_rate": 5.475066794359833e-06, "loss": 1.5248, "step": 2934 }, { "epoch": 2.65, "learning_rate": 5.4620471195915304e-06, "loss": 1.5257, "step": 2936 }, { "epoch": 2.65, "learning_rate": 5.449037124900615e-06, "loss": 1.524, "step": 2938 }, { "epoch": 2.65, "learning_rate": 5.4360368380392025e-06, "loss": 1.4771, "step": 2940 }, { "epoch": 2.65, "learning_rate": 5.42304628673869e-06, "loss": 1.5273, "step": 2942 }, { "epoch": 2.65, "learning_rate": 5.4100654987097044e-06, "loss": 1.488, "step": 2944 }, { "epoch": 2.66, "learning_rate": 5.397094501642059e-06, "loss": 1.5327, "step": 2946 }, { "epoch": 2.66, "learning_rate": 5.384133323204666e-06, "loss": 1.5387, "step": 2948 }, { "epoch": 2.66, "learning_rate": 5.371181991045512e-06, "loss": 1.4628, "step": 2950 }, { "epoch": 2.66, "learning_rate": 5.358240532791555e-06, "loss": 1.4749, "step": 2952 }, { "epoch": 2.66, "learning_rate": 5.345308976048715e-06, "loss": 1.5011, "step": 2954 }, { "epoch": 2.67, "learning_rate": 5.332387348401775e-06, "loss": 1.5534, "step": 2956 }, { "epoch": 2.67, "learning_rate": 5.319475677414348e-06, "loss": 1.4919, "step": 2958 }, { "epoch": 2.67, "learning_rate": 5.306573990628796e-06, "loss": 1.464, "step": 2960 }, { "epoch": 2.67, "learning_rate": 5.293682315566202e-06, "loss": 1.4961, "step": 2962 }, { "epoch": 2.67, "learning_rate": 5.280800679726272e-06, "loss": 1.509, "step": 2964 }, { "epoch": 2.67, "learning_rate": 5.267929110587308e-06, "loss": 1.4824, "step": 2966 }, { "epoch": 2.68, "learning_rate": 5.255067635606139e-06, "loss": 1.4869, "step": 2968 }, { "epoch": 2.68, "learning_rate": 5.242216282218057e-06, "loss": 1.5432, "step": 2970 }, { "epoch": 2.68, "learning_rate": 5.229375077836769e-06, "loss": 1.5251, "step": 2972 }, { "epoch": 2.68, "learning_rate": 5.2165440498543276e-06, "loss": 1.5558, "step": 2974 }, { "epoch": 2.68, "learning_rate": 5.20372322564108e-06, "loss": 1.553, "step": 2976 }, { "epoch": 2.69, "learning_rate": 5.190912632545599e-06, "loss": 1.5385, "step": 2978 }, { "epoch": 2.69, "learning_rate": 5.178112297894655e-06, "loss": 1.4673, "step": 2980 }, { "epoch": 2.69, "learning_rate": 5.16532224899311e-06, "loss": 1.5011, "step": 2982 }, { "epoch": 2.69, "learning_rate": 5.152542513123906e-06, "loss": 1.5183, "step": 2984 }, { "epoch": 2.69, "learning_rate": 5.13977311754797e-06, "loss": 1.4702, "step": 2986 }, { "epoch": 2.69, "learning_rate": 5.1270140895041785e-06, "loss": 1.5591, "step": 2988 }, { "epoch": 2.7, "learning_rate": 5.114265456209297e-06, "loss": 1.5238, "step": 2990 }, { "epoch": 2.7, "learning_rate": 5.101527244857907e-06, "loss": 1.5689, "step": 2992 }, { "epoch": 2.7, "learning_rate": 5.088799482622372e-06, "loss": 1.5431, "step": 2994 }, { "epoch": 2.7, "learning_rate": 5.076082196652754e-06, "loss": 1.6206, "step": 2996 }, { "epoch": 2.7, "learning_rate": 5.063375414076775e-06, "loss": 1.4451, "step": 2998 }, { "epoch": 2.71, "learning_rate": 5.050679161999741e-06, "loss": 1.5286, "step": 3000 }, { "epoch": 2.71, "learning_rate": 5.037993467504515e-06, "loss": 1.5318, "step": 3002 }, { "epoch": 2.71, "learning_rate": 5.025318357651419e-06, "loss": 1.5027, "step": 3004 }, { "epoch": 2.71, "learning_rate": 5.012653859478209e-06, "loss": 1.5066, "step": 3006 }, { "epoch": 2.71, "learning_rate": 5.000000000000003e-06, "loss": 1.5122, "step": 3008 }, { "epoch": 2.71, "learning_rate": 4.987356806209214e-06, "loss": 1.5269, "step": 3010 }, { "epoch": 2.72, "learning_rate": 4.974724305075523e-06, "loss": 1.5193, "step": 3012 }, { "epoch": 2.72, "learning_rate": 4.962102523545782e-06, "loss": 1.5, "step": 3014 }, { "epoch": 2.72, "learning_rate": 4.949491488543996e-06, "loss": 1.5606, "step": 3016 }, { "epoch": 2.72, "learning_rate": 4.936891226971232e-06, "loss": 1.4576, "step": 3018 }, { "epoch": 2.72, "learning_rate": 4.92430176570558e-06, "loss": 1.53, "step": 3020 }, { "epoch": 2.72, "learning_rate": 4.911723131602089e-06, "loss": 1.5139, "step": 3022 }, { "epoch": 2.73, "learning_rate": 4.899155351492721e-06, "loss": 1.5269, "step": 3024 }, { "epoch": 2.73, "learning_rate": 4.88659845218627e-06, "loss": 1.4767, "step": 3026 }, { "epoch": 2.73, "learning_rate": 4.874052460468339e-06, "loss": 1.5009, "step": 3028 }, { "epoch": 2.73, "learning_rate": 4.861517403101249e-06, "loss": 1.5074, "step": 3030 }, { "epoch": 2.73, "learning_rate": 4.848993306823997e-06, "loss": 1.5336, "step": 3032 }, { "epoch": 2.74, "learning_rate": 4.836480198352208e-06, "loss": 1.4969, "step": 3034 }, { "epoch": 2.74, "learning_rate": 4.823978104378056e-06, "loss": 1.4998, "step": 3036 }, { "epoch": 2.74, "learning_rate": 4.811487051570235e-06, "loss": 1.4481, "step": 3038 }, { "epoch": 2.74, "learning_rate": 4.7990070665738734e-06, "loss": 1.572, "step": 3040 }, { "epoch": 2.74, "learning_rate": 4.786538176010494e-06, "loss": 1.516, "step": 3042 }, { "epoch": 2.74, "learning_rate": 4.774080406477954e-06, "loss": 1.5844, "step": 3044 }, { "epoch": 2.75, "learning_rate": 4.761633784550393e-06, "loss": 1.5226, "step": 3046 }, { "epoch": 2.75, "learning_rate": 4.749198336778159e-06, "loss": 1.5655, "step": 3048 }, { "epoch": 2.75, "learning_rate": 4.736774089687781e-06, "loss": 1.4839, "step": 3050 }, { "epoch": 2.75, "learning_rate": 4.724361069781882e-06, "loss": 1.498, "step": 3052 }, { "epoch": 2.75, "learning_rate": 4.711959303539137e-06, "loss": 1.485, "step": 3054 }, { "epoch": 2.76, "learning_rate": 4.699568817414224e-06, "loss": 1.484, "step": 3056 }, { "epoch": 2.76, "learning_rate": 4.687189637837748e-06, "loss": 1.4352, "step": 3058 }, { "epoch": 2.76, "learning_rate": 4.674821791216207e-06, "loss": 1.4599, "step": 3060 }, { "epoch": 2.76, "learning_rate": 4.662465303931912e-06, "loss": 1.5327, "step": 3062 }, { "epoch": 2.76, "learning_rate": 4.650120202342959e-06, "loss": 1.4812, "step": 3064 }, { "epoch": 2.76, "learning_rate": 4.637786512783134e-06, "loss": 1.4903, "step": 3066 }, { "epoch": 2.77, "learning_rate": 4.625464261561902e-06, "loss": 1.4594, "step": 3068 }, { "epoch": 2.77, "learning_rate": 4.613153474964311e-06, "loss": 1.4989, "step": 3070 }, { "epoch": 2.77, "learning_rate": 4.600854179250971e-06, "loss": 1.5395, "step": 3072 }, { "epoch": 2.77, "learning_rate": 4.588566400657965e-06, "loss": 1.4893, "step": 3074 }, { "epoch": 2.77, "learning_rate": 4.576290165396811e-06, "loss": 1.5686, "step": 3076 }, { "epoch": 2.78, "learning_rate": 4.564025499654414e-06, "loss": 1.5599, "step": 3078 }, { "epoch": 2.78, "learning_rate": 4.551772429592987e-06, "loss": 1.5061, "step": 3080 }, { "epoch": 2.78, "learning_rate": 4.539530981350017e-06, "loss": 1.5466, "step": 3082 }, { "epoch": 2.78, "learning_rate": 4.527301181038189e-06, "loss": 1.5349, "step": 3084 }, { "epoch": 2.78, "learning_rate": 4.515083054745363e-06, "loss": 1.5065, "step": 3086 }, { "epoch": 2.78, "learning_rate": 4.502876628534465e-06, "loss": 1.5608, "step": 3088 }, { "epoch": 2.79, "learning_rate": 4.49068192844349e-06, "loss": 1.5091, "step": 3090 }, { "epoch": 2.79, "learning_rate": 4.4784989804854055e-06, "loss": 1.5016, "step": 3092 }, { "epoch": 2.79, "learning_rate": 4.46632781064812e-06, "loss": 1.482, "step": 3094 }, { "epoch": 2.79, "learning_rate": 4.454168444894404e-06, "loss": 1.574, "step": 3096 }, { "epoch": 2.79, "learning_rate": 4.4420209091618675e-06, "loss": 1.5043, "step": 3098 }, { "epoch": 2.8, "learning_rate": 4.429885229362866e-06, "loss": 1.5039, "step": 3100 }, { "epoch": 2.8, "learning_rate": 4.417761431384471e-06, "loss": 1.5083, "step": 3102 }, { "epoch": 2.8, "learning_rate": 4.405649541088419e-06, "loss": 1.4593, "step": 3104 }, { "epoch": 2.8, "learning_rate": 4.393549584311029e-06, "loss": 1.4436, "step": 3106 }, { "epoch": 2.8, "learning_rate": 4.381461586863183e-06, "loss": 1.4931, "step": 3108 }, { "epoch": 2.8, "learning_rate": 4.369385574530227e-06, "loss": 1.5188, "step": 3110 }, { "epoch": 2.81, "learning_rate": 4.3573215730719665e-06, "loss": 1.4873, "step": 3112 }, { "epoch": 2.81, "learning_rate": 4.3452696082225685e-06, "loss": 1.4547, "step": 3114 }, { "epoch": 2.81, "learning_rate": 4.3332297056905385e-06, "loss": 1.5253, "step": 3116 }, { "epoch": 2.81, "learning_rate": 4.321201891158635e-06, "loss": 1.4432, "step": 3118 }, { "epoch": 2.81, "learning_rate": 4.309186190283849e-06, "loss": 1.5252, "step": 3120 }, { "epoch": 2.82, "learning_rate": 4.2971826286973175e-06, "loss": 1.4943, "step": 3122 }, { "epoch": 2.82, "learning_rate": 4.285191232004285e-06, "loss": 1.4916, "step": 3124 }, { "epoch": 2.82, "learning_rate": 4.273212025784056e-06, "loss": 1.4142, "step": 3126 }, { "epoch": 2.82, "learning_rate": 4.261245035589917e-06, "loss": 1.5139, "step": 3128 }, { "epoch": 2.82, "learning_rate": 4.24929028694911e-06, "loss": 1.4081, "step": 3130 }, { "epoch": 2.82, "learning_rate": 4.237347805362757e-06, "loss": 1.4579, "step": 3132 }, { "epoch": 2.83, "learning_rate": 4.225417616305809e-06, "loss": 1.536, "step": 3134 }, { "epoch": 2.83, "learning_rate": 4.213499745226999e-06, "loss": 1.5318, "step": 3136 }, { "epoch": 2.83, "learning_rate": 4.201594217548789e-06, "loss": 1.4739, "step": 3138 }, { "epoch": 2.83, "learning_rate": 4.189701058667301e-06, "loss": 1.5138, "step": 3140 }, { "epoch": 2.83, "learning_rate": 4.177820293952285e-06, "loss": 1.4591, "step": 3142 }, { "epoch": 2.83, "learning_rate": 4.165951948747043e-06, "loss": 1.4755, "step": 3144 }, { "epoch": 2.84, "learning_rate": 4.15409604836838e-06, "loss": 1.5083, "step": 3146 }, { "epoch": 2.84, "learning_rate": 4.142252618106573e-06, "loss": 1.5349, "step": 3148 }, { "epoch": 2.84, "learning_rate": 4.1304216832252755e-06, "loss": 1.5078, "step": 3150 }, { "epoch": 2.84, "learning_rate": 4.118603268961509e-06, "loss": 1.4798, "step": 3152 }, { "epoch": 2.84, "learning_rate": 4.1067974005255705e-06, "loss": 1.5029, "step": 3154 }, { "epoch": 2.85, "learning_rate": 4.095004103101001e-06, "loss": 1.5056, "step": 3156 }, { "epoch": 2.85, "learning_rate": 4.083223401844523e-06, "loss": 1.5195, "step": 3158 }, { "epoch": 2.85, "learning_rate": 4.071455321885996e-06, "loss": 1.5299, "step": 3160 }, { "epoch": 2.85, "learning_rate": 4.059699888328348e-06, "loss": 1.5126, "step": 3162 }, { "epoch": 2.85, "learning_rate": 4.047957126247542e-06, "loss": 1.4871, "step": 3164 }, { "epoch": 2.85, "learning_rate": 4.036227060692498e-06, "loss": 1.487, "step": 3166 }, { "epoch": 2.86, "learning_rate": 4.024509716685058e-06, "loss": 1.5214, "step": 3168 }, { "epoch": 2.86, "learning_rate": 4.0128051192199325e-06, "loss": 1.4821, "step": 3170 }, { "epoch": 2.86, "learning_rate": 4.001113293264631e-06, "loss": 1.46, "step": 3172 }, { "epoch": 2.86, "learning_rate": 3.989434263759433e-06, "loss": 1.4978, "step": 3174 }, { "epoch": 2.86, "learning_rate": 3.97776805561731e-06, "loss": 1.4738, "step": 3176 }, { "epoch": 2.87, "learning_rate": 3.9661146937238895e-06, "loss": 1.5618, "step": 3178 }, { "epoch": 2.87, "learning_rate": 3.954474202937389e-06, "loss": 1.5181, "step": 3180 }, { "epoch": 2.87, "learning_rate": 3.942846608088583e-06, "loss": 1.482, "step": 3182 }, { "epoch": 2.87, "learning_rate": 3.931231933980724e-06, "loss": 1.4962, "step": 3184 }, { "epoch": 2.87, "learning_rate": 3.919630205389516e-06, "loss": 1.5599, "step": 3186 }, { "epoch": 2.87, "learning_rate": 3.908041447063034e-06, "loss": 1.497, "step": 3188 }, { "epoch": 2.88, "learning_rate": 3.896465683721695e-06, "loss": 1.4755, "step": 3190 }, { "epoch": 2.88, "learning_rate": 3.884902940058196e-06, "loss": 1.5611, "step": 3192 }, { "epoch": 2.88, "learning_rate": 3.8733532407374555e-06, "loss": 1.543, "step": 3194 }, { "epoch": 2.88, "learning_rate": 3.861816610396574e-06, "loss": 1.506, "step": 3196 }, { "epoch": 2.88, "learning_rate": 3.850293073644767e-06, "loss": 1.5262, "step": 3198 }, { "epoch": 2.89, "learning_rate": 3.838782655063326e-06, "loss": 1.5193, "step": 3200 }, { "epoch": 2.89, "learning_rate": 3.827285379205549e-06, "loss": 1.5002, "step": 3202 }, { "epoch": 2.89, "learning_rate": 3.815801270596715e-06, "loss": 1.482, "step": 3204 }, { "epoch": 2.89, "learning_rate": 3.804330353733998e-06, "loss": 1.4588, "step": 3206 }, { "epoch": 2.89, "learning_rate": 3.7928726530864502e-06, "loss": 1.4336, "step": 3208 }, { "epoch": 2.89, "learning_rate": 3.7814281930949173e-06, "loss": 1.515, "step": 3210 }, { "epoch": 2.9, "learning_rate": 3.769996998172002e-06, "loss": 1.541, "step": 3212 }, { "epoch": 2.9, "learning_rate": 3.758579092702023e-06, "loss": 1.5338, "step": 3214 }, { "epoch": 2.9, "learning_rate": 3.7471745010409333e-06, "loss": 1.5087, "step": 3216 }, { "epoch": 2.9, "learning_rate": 3.735783247516305e-06, "loss": 1.479, "step": 3218 }, { "epoch": 2.9, "learning_rate": 3.7244053564272377e-06, "loss": 1.5071, "step": 3220 }, { "epoch": 2.91, "learning_rate": 3.7130408520443505e-06, "loss": 1.4724, "step": 3222 }, { "epoch": 2.91, "learning_rate": 3.7016897586096778e-06, "loss": 1.5396, "step": 3224 }, { "epoch": 2.91, "learning_rate": 3.690352100336675e-06, "loss": 1.5369, "step": 3226 }, { "epoch": 2.91, "learning_rate": 3.679027901410117e-06, "loss": 1.5212, "step": 3228 }, { "epoch": 2.91, "learning_rate": 3.667717185986085e-06, "loss": 1.4958, "step": 3230 }, { "epoch": 2.91, "learning_rate": 3.6564199781918865e-06, "loss": 1.4261, "step": 3232 }, { "epoch": 2.92, "learning_rate": 3.6451363021260146e-06, "loss": 1.4705, "step": 3234 }, { "epoch": 2.92, "learning_rate": 3.63386618185811e-06, "loss": 1.4583, "step": 3236 }, { "epoch": 2.92, "learning_rate": 3.6226096414288816e-06, "loss": 1.4864, "step": 3238 }, { "epoch": 2.92, "learning_rate": 3.6113667048500833e-06, "loss": 1.5096, "step": 3240 }, { "epoch": 2.92, "learning_rate": 3.6001373961044385e-06, "loss": 1.5321, "step": 3242 }, { "epoch": 2.93, "learning_rate": 3.5889217391456167e-06, "loss": 1.4713, "step": 3244 }, { "epoch": 2.93, "learning_rate": 3.5777197578981425e-06, "loss": 1.5169, "step": 3246 }, { "epoch": 2.93, "learning_rate": 3.5665314762573933e-06, "loss": 1.4016, "step": 3248 }, { "epoch": 2.93, "learning_rate": 3.5553569180895044e-06, "loss": 1.4573, "step": 3250 }, { "epoch": 2.93, "learning_rate": 3.5441961072313504e-06, "loss": 1.5512, "step": 3252 }, { "epoch": 2.93, "learning_rate": 3.5330490674904737e-06, "loss": 1.4868, "step": 3254 }, { "epoch": 2.94, "learning_rate": 3.521915822645039e-06, "loss": 1.5873, "step": 3256 }, { "epoch": 2.94, "learning_rate": 3.510796396443793e-06, "loss": 1.5238, "step": 3258 }, { "epoch": 2.94, "learning_rate": 3.499690812605997e-06, "loss": 1.5263, "step": 3260 }, { "epoch": 2.94, "learning_rate": 3.4885990948213922e-06, "loss": 1.5268, "step": 3262 }, { "epoch": 2.94, "learning_rate": 3.477521266750131e-06, "loss": 1.5712, "step": 3264 }, { "epoch": 2.94, "learning_rate": 3.4664573520227564e-06, "loss": 1.5208, "step": 3266 }, { "epoch": 2.95, "learning_rate": 3.4554073742401028e-06, "loss": 1.4913, "step": 3268 }, { "epoch": 2.95, "learning_rate": 3.444371356973305e-06, "loss": 1.5158, "step": 3270 }, { "epoch": 2.95, "learning_rate": 3.433349323763696e-06, "loss": 1.4721, "step": 3272 }, { "epoch": 2.95, "learning_rate": 3.4223412981227964e-06, "loss": 1.4383, "step": 3274 }, { "epoch": 2.95, "learning_rate": 3.411347303532231e-06, "loss": 1.5057, "step": 3276 }, { "epoch": 2.96, "learning_rate": 3.4003673634437084e-06, "loss": 1.4861, "step": 3278 }, { "epoch": 2.96, "learning_rate": 3.389401501278946e-06, "loss": 1.469, "step": 3280 }, { "epoch": 2.96, "learning_rate": 3.3784497404296347e-06, "loss": 1.5809, "step": 3282 }, { "epoch": 2.96, "learning_rate": 3.367512104257391e-06, "loss": 1.4898, "step": 3284 }, { "epoch": 2.96, "learning_rate": 3.35658861609369e-06, "loss": 1.4907, "step": 3286 }, { "epoch": 2.96, "learning_rate": 3.3456792992398433e-06, "loss": 1.5118, "step": 3288 }, { "epoch": 2.97, "learning_rate": 3.334784176966912e-06, "loss": 1.4053, "step": 3290 }, { "epoch": 2.97, "learning_rate": 3.3239032725156983e-06, "loss": 1.5658, "step": 3292 }, { "epoch": 2.97, "learning_rate": 3.3130366090966604e-06, "loss": 1.5098, "step": 3294 }, { "epoch": 2.97, "learning_rate": 3.3021842098898938e-06, "loss": 1.5287, "step": 3296 }, { "epoch": 2.97, "learning_rate": 3.2913460980450506e-06, "loss": 1.5354, "step": 3298 }, { "epoch": 2.98, "learning_rate": 3.2805222966813187e-06, "loss": 1.509, "step": 3300 }, { "epoch": 2.98, "learning_rate": 3.2697128288873536e-06, "loss": 1.6128, "step": 3302 }, { "epoch": 2.98, "learning_rate": 3.2589177177212304e-06, "loss": 1.5342, "step": 3304 }, { "epoch": 2.98, "learning_rate": 3.248136986210414e-06, "loss": 1.5372, "step": 3306 }, { "epoch": 2.98, "learning_rate": 3.2373706573516795e-06, "loss": 1.4921, "step": 3308 }, { "epoch": 2.98, "learning_rate": 3.2266187541110937e-06, "loss": 1.5076, "step": 3310 }, { "epoch": 2.99, "learning_rate": 3.2158812994239396e-06, "loss": 1.5531, "step": 3312 }, { "epoch": 2.99, "learning_rate": 3.2051583161946865e-06, "loss": 1.4592, "step": 3314 }, { "epoch": 2.99, "learning_rate": 3.1944498272969273e-06, "loss": 1.4771, "step": 3316 }, { "epoch": 2.99, "learning_rate": 3.183755855573346e-06, "loss": 1.5302, "step": 3318 }, { "epoch": 2.99, "learning_rate": 3.1730764238356517e-06, "loss": 1.4898, "step": 3320 }, { "epoch": 3.0, "learning_rate": 3.162411554864544e-06, "loss": 1.5458, "step": 3322 }, { "epoch": 3.0, "learning_rate": 3.1517612714096534e-06, "loss": 1.5402, "step": 3324 }, { "epoch": 3.0, "learning_rate": 3.141125596189494e-06, "loss": 1.5306, "step": 3326 }, { "epoch": 3.0, "learning_rate": 3.130504551891431e-06, "loss": 1.467, "step": 3328 }, { "epoch": 3.0, "learning_rate": 3.119898161171605e-06, "loss": 1.5479, "step": 3330 }, { "epoch": 3.0, "learning_rate": 3.109306446654912e-06, "loss": 1.444, "step": 3332 }, { "epoch": 3.01, "learning_rate": 3.0987294309349316e-06, "loss": 1.5628, "step": 3334 }, { "epoch": 3.01, "learning_rate": 3.088167136573894e-06, "loss": 1.473, "step": 3336 }, { "epoch": 3.01, "learning_rate": 3.0776195861026202e-06, "loss": 1.5232, "step": 3338 }, { "epoch": 3.01, "learning_rate": 3.0670868020204936e-06, "loss": 1.4349, "step": 3340 }, { "epoch": 3.01, "learning_rate": 3.0565688067953836e-06, "loss": 1.4101, "step": 3342 }, { "epoch": 3.02, "learning_rate": 3.0460656228636254e-06, "loss": 1.5501, "step": 3344 }, { "epoch": 3.02, "learning_rate": 3.0355772726299536e-06, "loss": 1.5632, "step": 3346 }, { "epoch": 3.02, "learning_rate": 3.0251037784674555e-06, "loss": 1.4991, "step": 3348 }, { "epoch": 3.02, "learning_rate": 3.014645162717542e-06, "loss": 1.543, "step": 3350 }, { "epoch": 3.02, "learning_rate": 3.0042014476898717e-06, "loss": 1.4972, "step": 3352 }, { "epoch": 3.02, "learning_rate": 2.9937726556623303e-06, "loss": 1.3964, "step": 3354 }, { "epoch": 3.03, "learning_rate": 2.9833588088809627e-06, "loss": 1.4989, "step": 3356 }, { "epoch": 3.03, "learning_rate": 2.9729599295599366e-06, "loss": 1.5028, "step": 3358 }, { "epoch": 3.03, "learning_rate": 2.962576039881485e-06, "loss": 1.4963, "step": 3360 }, { "epoch": 3.03, "learning_rate": 2.952207161995879e-06, "loss": 1.4059, "step": 3362 }, { "epoch": 3.03, "learning_rate": 2.9418533180213613e-06, "loss": 1.4411, "step": 3364 }, { "epoch": 3.04, "learning_rate": 2.9315145300441006e-06, "loss": 1.4897, "step": 3366 }, { "epoch": 3.04, "learning_rate": 2.9211908201181592e-06, "loss": 1.4588, "step": 3368 }, { "epoch": 3.04, "learning_rate": 2.910882210265421e-06, "loss": 1.5836, "step": 3370 }, { "epoch": 3.04, "learning_rate": 2.9005887224755745e-06, "loss": 1.4342, "step": 3372 }, { "epoch": 3.04, "learning_rate": 2.8903103787060395e-06, "loss": 1.4968, "step": 3374 }, { "epoch": 3.04, "learning_rate": 2.8800472008819426e-06, "loss": 1.5249, "step": 3376 }, { "epoch": 3.05, "learning_rate": 2.8697992108960494e-06, "loss": 1.447, "step": 3378 }, { "epoch": 3.05, "learning_rate": 2.8595664306087313e-06, "loss": 1.5357, "step": 3380 }, { "epoch": 3.05, "learning_rate": 2.849348881847913e-06, "loss": 1.5141, "step": 3382 }, { "epoch": 3.05, "learning_rate": 2.8391465864090338e-06, "loss": 1.4493, "step": 3384 }, { "epoch": 3.05, "learning_rate": 2.828959566054996e-06, "loss": 1.5233, "step": 3386 }, { "epoch": 3.06, "learning_rate": 2.8187878425161088e-06, "loss": 1.48, "step": 3388 }, { "epoch": 3.06, "learning_rate": 2.8086314374900637e-06, "loss": 1.5077, "step": 3390 }, { "epoch": 3.06, "learning_rate": 2.79849037264186e-06, "loss": 1.5296, "step": 3392 }, { "epoch": 3.06, "learning_rate": 2.788364669603789e-06, "loss": 1.4566, "step": 3394 }, { "epoch": 3.06, "learning_rate": 2.7782543499753644e-06, "loss": 1.5193, "step": 3396 }, { "epoch": 3.06, "learning_rate": 2.7681594353232934e-06, "loss": 1.5448, "step": 3398 }, { "epoch": 3.07, "learning_rate": 2.758079947181409e-06, "loss": 1.4608, "step": 3400 }, { "epoch": 3.07, "learning_rate": 2.7480159070506517e-06, "loss": 1.4614, "step": 3402 }, { "epoch": 3.07, "learning_rate": 2.737967336399002e-06, "loss": 1.5236, "step": 3404 }, { "epoch": 3.07, "learning_rate": 2.7279342566614374e-06, "loss": 1.4661, "step": 3406 }, { "epoch": 3.07, "learning_rate": 2.717916689239903e-06, "loss": 1.5057, "step": 3408 }, { "epoch": 3.07, "learning_rate": 2.707914655503242e-06, "loss": 1.4687, "step": 3410 }, { "epoch": 3.08, "learning_rate": 2.697928176787176e-06, "loss": 1.4575, "step": 3412 }, { "epoch": 3.08, "learning_rate": 2.687957274394224e-06, "loss": 1.5014, "step": 3414 }, { "epoch": 3.08, "learning_rate": 2.678001969593701e-06, "loss": 1.4898, "step": 3416 }, { "epoch": 3.08, "learning_rate": 2.6680622836216375e-06, "loss": 1.4814, "step": 3418 }, { "epoch": 3.08, "learning_rate": 2.658138237680753e-06, "loss": 1.4391, "step": 3420 }, { "epoch": 3.09, "learning_rate": 2.6482298529403973e-06, "loss": 1.4201, "step": 3422 }, { "epoch": 3.09, "learning_rate": 2.638337150536523e-06, "loss": 1.5524, "step": 3424 }, { "epoch": 3.09, "learning_rate": 2.6284601515716213e-06, "loss": 1.522, "step": 3426 }, { "epoch": 3.09, "learning_rate": 2.6185988771146864e-06, "loss": 1.5003, "step": 3428 }, { "epoch": 3.09, "learning_rate": 2.608753348201177e-06, "loss": 1.4368, "step": 3430 }, { "epoch": 3.09, "learning_rate": 2.5989235858329575e-06, "loss": 1.5304, "step": 3432 }, { "epoch": 3.1, "learning_rate": 2.5891096109782644e-06, "loss": 1.4369, "step": 3434 }, { "epoch": 3.1, "learning_rate": 2.579311444571655e-06, "loss": 1.4968, "step": 3436 }, { "epoch": 3.1, "learning_rate": 2.569529107513964e-06, "loss": 1.4706, "step": 3438 }, { "epoch": 3.1, "learning_rate": 2.5597626206722583e-06, "loss": 1.4667, "step": 3440 }, { "epoch": 3.1, "learning_rate": 2.5500120048798026e-06, "loss": 1.3906, "step": 3442 }, { "epoch": 3.11, "learning_rate": 2.540277280935998e-06, "loss": 1.4251, "step": 3444 }, { "epoch": 3.11, "learning_rate": 2.530558469606351e-06, "loss": 1.4692, "step": 3446 }, { "epoch": 3.11, "learning_rate": 2.520855591622421e-06, "loss": 1.4652, "step": 3448 }, { "epoch": 3.11, "learning_rate": 2.511168667681779e-06, "loss": 1.4424, "step": 3450 }, { "epoch": 3.11, "learning_rate": 2.5014977184479696e-06, "loss": 1.4772, "step": 3452 }, { "epoch": 3.11, "learning_rate": 2.491842764550453e-06, "loss": 1.4563, "step": 3454 }, { "epoch": 3.12, "learning_rate": 2.482203826584577e-06, "loss": 1.4652, "step": 3456 }, { "epoch": 3.12, "learning_rate": 2.4725809251115208e-06, "loss": 1.5157, "step": 3458 }, { "epoch": 3.12, "learning_rate": 2.4629740806582534e-06, "loss": 1.4707, "step": 3460 }, { "epoch": 3.12, "learning_rate": 2.453383313717496e-06, "loss": 1.4329, "step": 3462 }, { "epoch": 3.12, "learning_rate": 2.443808644747675e-06, "loss": 1.4575, "step": 3464 }, { "epoch": 3.13, "learning_rate": 2.434250094172872e-06, "loss": 1.4706, "step": 3466 }, { "epoch": 3.13, "learning_rate": 2.424707682382794e-06, "loss": 1.4185, "step": 3468 }, { "epoch": 3.13, "learning_rate": 2.4151814297327157e-06, "loss": 1.4808, "step": 3470 }, { "epoch": 3.13, "learning_rate": 2.405671356543441e-06, "loss": 1.4116, "step": 3472 }, { "epoch": 3.13, "learning_rate": 2.3961774831012672e-06, "loss": 1.5432, "step": 3474 }, { "epoch": 3.13, "learning_rate": 2.386699829657928e-06, "loss": 1.4426, "step": 3476 }, { "epoch": 3.14, "learning_rate": 2.3772384164305662e-06, "loss": 1.4822, "step": 3478 }, { "epoch": 3.14, "learning_rate": 2.367793263601673e-06, "loss": 1.555, "step": 3480 }, { "epoch": 3.14, "learning_rate": 2.35836439131906e-06, "loss": 1.5162, "step": 3482 }, { "epoch": 3.14, "learning_rate": 2.348951819695805e-06, "loss": 1.5948, "step": 3484 }, { "epoch": 3.14, "learning_rate": 2.339555568810221e-06, "loss": 1.4229, "step": 3486 }, { "epoch": 3.15, "learning_rate": 2.330175658705799e-06, "loss": 1.5137, "step": 3488 }, { "epoch": 3.15, "learning_rate": 2.3254918377005877e-06, "loss": 1.4265, "step": 3490 }, { "epoch": 3.15, "learning_rate": 2.3161364762732095e-06, "loss": 1.4405, "step": 3492 }, { "epoch": 3.15, "learning_rate": 2.3067975055831337e-06, "loss": 1.4645, "step": 3494 }, { "epoch": 3.15, "learning_rate": 2.2974749455516786e-06, "loss": 1.5029, "step": 3496 }, { "epoch": 3.15, "learning_rate": 2.2881688160651406e-06, "loss": 1.4532, "step": 3498 }, { "epoch": 3.16, "learning_rate": 2.2788791369747774e-06, "loss": 1.5009, "step": 3500 }, { "epoch": 3.16, "learning_rate": 2.2696059280967474e-06, "loss": 1.4555, "step": 3502 }, { "epoch": 3.16, "learning_rate": 2.260349209212086e-06, "loss": 1.4426, "step": 3504 }, { "epoch": 3.16, "learning_rate": 2.2511090000666424e-06, "loss": 1.5772, "step": 3506 }, { "epoch": 3.16, "learning_rate": 2.2418853203710523e-06, "loss": 1.4803, "step": 3508 }, { "epoch": 3.17, "learning_rate": 2.232678189800698e-06, "loss": 1.4887, "step": 3510 }, { "epoch": 3.17, "learning_rate": 2.2234876279956484e-06, "loss": 1.4796, "step": 3512 }, { "epoch": 3.17, "learning_rate": 2.214313654560644e-06, "loss": 1.5214, "step": 3514 }, { "epoch": 3.17, "learning_rate": 2.20515628906502e-06, "loss": 1.5006, "step": 3516 }, { "epoch": 3.17, "learning_rate": 2.196015551042706e-06, "loss": 1.5007, "step": 3518 }, { "epoch": 3.17, "learning_rate": 2.186891459992145e-06, "loss": 1.4674, "step": 3520 }, { "epoch": 3.18, "learning_rate": 2.177784035376286e-06, "loss": 1.4773, "step": 3522 }, { "epoch": 3.18, "learning_rate": 2.1686932966225094e-06, "loss": 1.459, "step": 3524 }, { "epoch": 3.18, "learning_rate": 2.1596192631226175e-06, "loss": 1.4893, "step": 3526 }, { "epoch": 3.18, "learning_rate": 2.150561954232768e-06, "loss": 1.481, "step": 3528 }, { "epoch": 3.18, "learning_rate": 2.1415213892734452e-06, "loss": 1.4857, "step": 3530 }, { "epoch": 3.18, "learning_rate": 2.1324975875294207e-06, "loss": 1.4183, "step": 3532 }, { "epoch": 3.19, "learning_rate": 2.1234905682496987e-06, "loss": 1.5281, "step": 3534 }, { "epoch": 3.19, "learning_rate": 2.1145003506474957e-06, "loss": 1.4928, "step": 3536 }, { "epoch": 3.19, "learning_rate": 2.10552695390018e-06, "loss": 1.4607, "step": 3538 }, { "epoch": 3.19, "learning_rate": 2.096570397149239e-06, "loss": 1.4789, "step": 3540 }, { "epoch": 3.19, "learning_rate": 2.0876306995002384e-06, "loss": 1.4629, "step": 3542 }, { "epoch": 3.2, "learning_rate": 2.0787078800227868e-06, "loss": 1.4693, "step": 3544 }, { "epoch": 3.2, "learning_rate": 2.069801957750479e-06, "loss": 1.5246, "step": 3546 }, { "epoch": 3.2, "learning_rate": 2.060912951680878e-06, "loss": 1.5408, "step": 3548 }, { "epoch": 3.2, "learning_rate": 2.0520408807754532e-06, "loss": 1.4941, "step": 3550 }, { "epoch": 3.2, "learning_rate": 2.043185763959549e-06, "loss": 1.4488, "step": 3552 }, { "epoch": 3.2, "learning_rate": 2.034347620122351e-06, "loss": 1.4593, "step": 3554 }, { "epoch": 3.21, "learning_rate": 2.0255264681168306e-06, "loss": 1.5067, "step": 3556 }, { "epoch": 3.21, "learning_rate": 2.0167223267597246e-06, "loss": 1.452, "step": 3558 }, { "epoch": 3.21, "learning_rate": 2.0079352148314724e-06, "loss": 1.4799, "step": 3560 }, { "epoch": 3.21, "learning_rate": 1.999165151076192e-06, "loss": 1.4979, "step": 3562 }, { "epoch": 3.21, "learning_rate": 1.990412154201633e-06, "loss": 1.4793, "step": 3564 }, { "epoch": 3.22, "learning_rate": 1.9816762428791457e-06, "loss": 1.508, "step": 3566 }, { "epoch": 3.22, "learning_rate": 1.9729574357436266e-06, "loss": 1.435, "step": 3568 }, { "epoch": 3.22, "learning_rate": 1.9642557513934935e-06, "loss": 1.5827, "step": 3570 }, { "epoch": 3.22, "learning_rate": 1.955571208390633e-06, "loss": 1.4942, "step": 3572 }, { "epoch": 3.22, "learning_rate": 1.946903825260368e-06, "loss": 1.5088, "step": 3574 }, { "epoch": 3.22, "learning_rate": 1.9382536204914214e-06, "loss": 1.486, "step": 3576 }, { "epoch": 3.23, "learning_rate": 1.9296206125358664e-06, "loss": 1.4394, "step": 3578 }, { "epoch": 3.23, "learning_rate": 1.921004819809099e-06, "loss": 1.4363, "step": 3580 }, { "epoch": 3.23, "learning_rate": 1.9124062606897884e-06, "loss": 1.3946, "step": 3582 }, { "epoch": 3.23, "learning_rate": 1.9038249535198428e-06, "loss": 1.4707, "step": 3584 }, { "epoch": 3.23, "learning_rate": 1.895260916604368e-06, "loss": 1.46, "step": 3586 }, { "epoch": 3.24, "learning_rate": 1.8867141682116373e-06, "loss": 1.5116, "step": 3588 }, { "epoch": 3.24, "learning_rate": 1.8781847265730347e-06, "loss": 1.4524, "step": 3590 }, { "epoch": 3.24, "learning_rate": 1.8696726098830364e-06, "loss": 1.4589, "step": 3592 }, { "epoch": 3.24, "learning_rate": 1.861177836299155e-06, "loss": 1.3879, "step": 3594 }, { "epoch": 3.24, "learning_rate": 1.85270042394191e-06, "loss": 1.4768, "step": 3596 }, { "epoch": 3.24, "learning_rate": 1.8442403908947904e-06, "loss": 1.394, "step": 3598 }, { "epoch": 3.25, "learning_rate": 1.835797755204205e-06, "loss": 1.4304, "step": 3600 }, { "epoch": 3.25, "learning_rate": 1.8273725348794624e-06, "loss": 1.5056, "step": 3602 }, { "epoch": 3.25, "learning_rate": 1.818964747892713e-06, "loss": 1.559, "step": 3604 }, { "epoch": 3.25, "learning_rate": 1.8105744121789226e-06, "loss": 1.5817, "step": 3606 }, { "epoch": 3.25, "learning_rate": 1.8022015456358277e-06, "loss": 1.4484, "step": 3608 }, { "epoch": 3.26, "learning_rate": 1.7938461661239093e-06, "loss": 1.4658, "step": 3610 }, { "epoch": 3.26, "learning_rate": 1.7855082914663346e-06, "loss": 1.4426, "step": 3612 }, { "epoch": 3.26, "learning_rate": 1.7771879394489432e-06, "loss": 1.5372, "step": 3614 }, { "epoch": 3.26, "learning_rate": 1.768885127820187e-06, "loss": 1.4458, "step": 3616 }, { "epoch": 3.26, "learning_rate": 1.7605998742911001e-06, "loss": 1.4432, "step": 3618 }, { "epoch": 3.26, "learning_rate": 1.7523321965352757e-06, "loss": 1.4313, "step": 3620 }, { "epoch": 3.27, "learning_rate": 1.7440821121887997e-06, "loss": 1.4853, "step": 3622 }, { "epoch": 3.27, "learning_rate": 1.7358496388502422e-06, "loss": 1.4187, "step": 3624 }, { "epoch": 3.27, "learning_rate": 1.727634794080596e-06, "loss": 1.5036, "step": 3626 }, { "epoch": 3.27, "learning_rate": 1.7194375954032604e-06, "loss": 1.5692, "step": 3628 }, { "epoch": 3.27, "learning_rate": 1.7112580603039785e-06, "loss": 1.4624, "step": 3630 }, { "epoch": 3.28, "learning_rate": 1.703096206230831e-06, "loss": 1.4422, "step": 3632 }, { "epoch": 3.28, "learning_rate": 1.6949520505941686e-06, "loss": 1.5489, "step": 3634 }, { "epoch": 3.28, "learning_rate": 1.6868256107666015e-06, "loss": 1.4635, "step": 3636 }, { "epoch": 3.28, "learning_rate": 1.6787169040829387e-06, "loss": 1.4007, "step": 3638 }, { "epoch": 3.28, "learning_rate": 1.6706259478401653e-06, "loss": 1.5285, "step": 3640 }, { "epoch": 3.28, "learning_rate": 1.6625527592974077e-06, "loss": 1.468, "step": 3642 }, { "epoch": 3.29, "learning_rate": 1.6544973556758826e-06, "loss": 1.4775, "step": 3644 }, { "epoch": 3.29, "learning_rate": 1.646459754158878e-06, "loss": 1.4737, "step": 3646 }, { "epoch": 3.29, "learning_rate": 1.6384399718916977e-06, "loss": 1.5256, "step": 3648 }, { "epoch": 3.29, "learning_rate": 1.6304380259816477e-06, "loss": 1.473, "step": 3650 }, { "epoch": 3.29, "learning_rate": 1.6224539334979683e-06, "loss": 1.5123, "step": 3652 }, { "epoch": 3.29, "learning_rate": 1.614487711471835e-06, "loss": 1.4314, "step": 3654 }, { "epoch": 3.3, "learning_rate": 1.6065393768962889e-06, "loss": 1.4433, "step": 3656 }, { "epoch": 3.3, "learning_rate": 1.5986089467262256e-06, "loss": 1.4855, "step": 3658 }, { "epoch": 3.3, "learning_rate": 1.5906964378783373e-06, "loss": 1.4353, "step": 3660 }, { "epoch": 3.3, "learning_rate": 1.5828018672310996e-06, "loss": 1.4591, "step": 3662 }, { "epoch": 3.3, "learning_rate": 1.5749252516247139e-06, "loss": 1.4398, "step": 3664 }, { "epoch": 3.31, "learning_rate": 1.5670666078610809e-06, "loss": 1.449, "step": 3666 }, { "epoch": 3.31, "learning_rate": 1.5592259527037744e-06, "loss": 1.4773, "step": 3668 }, { "epoch": 3.31, "learning_rate": 1.551403302877984e-06, "loss": 1.5057, "step": 3670 }, { "epoch": 3.31, "learning_rate": 1.5435986750705046e-06, "loss": 1.4788, "step": 3672 }, { "epoch": 3.31, "learning_rate": 1.5358120859296721e-06, "loss": 1.4658, "step": 3674 }, { "epoch": 3.31, "learning_rate": 1.5280435520653569e-06, "loss": 1.4303, "step": 3676 }, { "epoch": 3.32, "learning_rate": 1.5202930900489056e-06, "loss": 1.4822, "step": 3678 }, { "epoch": 3.32, "learning_rate": 1.512560716413125e-06, "loss": 1.4529, "step": 3680 }, { "epoch": 3.32, "learning_rate": 1.5048464476522251e-06, "loss": 1.4679, "step": 3682 }, { "epoch": 3.32, "learning_rate": 1.497150300221808e-06, "loss": 1.5103, "step": 3684 }, { "epoch": 3.32, "learning_rate": 1.4894722905388103e-06, "loss": 1.5214, "step": 3686 }, { "epoch": 3.33, "learning_rate": 1.4818124349814812e-06, "loss": 1.4546, "step": 3688 }, { "epoch": 3.33, "learning_rate": 1.4741707498893487e-06, "loss": 1.4666, "step": 3690 }, { "epoch": 3.33, "learning_rate": 1.466547251563175e-06, "loss": 1.54, "step": 3692 }, { "epoch": 3.33, "learning_rate": 1.4589419562649342e-06, "loss": 1.5121, "step": 3694 }, { "epoch": 3.33, "learning_rate": 1.4513548802177635e-06, "loss": 1.5541, "step": 3696 }, { "epoch": 3.33, "learning_rate": 1.44378603960594e-06, "loss": 1.4956, "step": 3698 }, { "epoch": 3.34, "learning_rate": 1.43623545057484e-06, "loss": 1.4957, "step": 3700 }, { "epoch": 3.34, "learning_rate": 1.4287031292309105e-06, "loss": 1.5291, "step": 3702 }, { "epoch": 3.34, "learning_rate": 1.4211890916416249e-06, "loss": 1.461, "step": 3704 }, { "epoch": 3.34, "learning_rate": 1.4136933538354636e-06, "loss": 1.4236, "step": 3706 }, { "epoch": 3.34, "learning_rate": 1.406215931801862e-06, "loss": 1.4785, "step": 3708 }, { "epoch": 3.35, "learning_rate": 1.398756841491189e-06, "loss": 1.4242, "step": 3710 }, { "epoch": 3.35, "learning_rate": 1.3913160988147134e-06, "loss": 1.4035, "step": 3712 }, { "epoch": 3.35, "learning_rate": 1.383893719644558e-06, "loss": 1.409, "step": 3714 }, { "epoch": 3.35, "learning_rate": 1.3764897198136818e-06, "loss": 1.4705, "step": 3716 }, { "epoch": 3.35, "learning_rate": 1.3691041151158336e-06, "loss": 1.4163, "step": 3718 }, { "epoch": 3.35, "learning_rate": 1.361736921305522e-06, "loss": 1.5217, "step": 3720 }, { "epoch": 3.36, "learning_rate": 1.354388154097983e-06, "loss": 1.4778, "step": 3722 }, { "epoch": 3.36, "learning_rate": 1.3470578291691506e-06, "loss": 1.4679, "step": 3724 }, { "epoch": 3.36, "learning_rate": 1.339745962155613e-06, "loss": 1.4595, "step": 3726 }, { "epoch": 3.36, "learning_rate": 1.332452568654592e-06, "loss": 1.4545, "step": 3728 }, { "epoch": 3.36, "learning_rate": 1.3251776642238957e-06, "loss": 1.5006, "step": 3730 }, { "epoch": 3.37, "learning_rate": 1.3179212643818928e-06, "loss": 1.4342, "step": 3732 }, { "epoch": 3.37, "learning_rate": 1.3106833846074872e-06, "loss": 1.4721, "step": 3734 }, { "epoch": 3.37, "learning_rate": 1.3034640403400667e-06, "loss": 1.4343, "step": 3736 }, { "epoch": 3.37, "learning_rate": 1.2962632469794901e-06, "loss": 1.4435, "step": 3738 }, { "epoch": 3.37, "learning_rate": 1.289081019886036e-06, "loss": 1.503, "step": 3740 }, { "epoch": 3.37, "learning_rate": 1.2819173743803848e-06, "loss": 1.4574, "step": 3742 }, { "epoch": 3.38, "learning_rate": 1.2747723257435729e-06, "loss": 1.4816, "step": 3744 }, { "epoch": 3.38, "learning_rate": 1.2676458892169752e-06, "loss": 1.465, "step": 3746 }, { "epoch": 3.38, "learning_rate": 1.2605380800022594e-06, "loss": 1.4731, "step": 3748 }, { "epoch": 3.38, "learning_rate": 1.2534489132613603e-06, "loss": 1.5092, "step": 3750 }, { "epoch": 3.38, "learning_rate": 1.2463784041164461e-06, "loss": 1.4065, "step": 3752 }, { "epoch": 3.39, "learning_rate": 1.2393265676498812e-06, "loss": 1.5526, "step": 3754 }, { "epoch": 3.39, "learning_rate": 1.232293418904207e-06, "loss": 1.4752, "step": 3756 }, { "epoch": 3.39, "learning_rate": 1.2252789728820913e-06, "loss": 1.4876, "step": 3758 }, { "epoch": 3.39, "learning_rate": 1.2182832445463177e-06, "loss": 1.4785, "step": 3760 }, { "epoch": 3.39, "learning_rate": 1.2113062488197347e-06, "loss": 1.4643, "step": 3762 }, { "epoch": 3.39, "learning_rate": 1.2043480005852315e-06, "loss": 1.5868, "step": 3764 }, { "epoch": 3.4, "learning_rate": 1.1974085146857085e-06, "loss": 1.5001, "step": 3766 }, { "epoch": 3.4, "learning_rate": 1.1904878059240443e-06, "loss": 1.4765, "step": 3768 }, { "epoch": 3.4, "learning_rate": 1.1835858890630613e-06, "loss": 1.4759, "step": 3770 }, { "epoch": 3.4, "learning_rate": 1.1767027788254993e-06, "loss": 1.4678, "step": 3772 }, { "epoch": 3.4, "learning_rate": 1.1698384898939774e-06, "loss": 1.5006, "step": 3774 }, { "epoch": 3.4, "learning_rate": 1.1629930369109643e-06, "loss": 1.4419, "step": 3776 }, { "epoch": 3.41, "learning_rate": 1.156166434478758e-06, "loss": 1.4797, "step": 3778 }, { "epoch": 3.41, "learning_rate": 1.149358697159435e-06, "loss": 1.48, "step": 3780 }, { "epoch": 3.41, "learning_rate": 1.1425698394748408e-06, "loss": 1.4535, "step": 3782 }, { "epoch": 3.41, "learning_rate": 1.1357998759065358e-06, "loss": 1.4366, "step": 3784 }, { "epoch": 3.41, "learning_rate": 1.1290488208957894e-06, "loss": 1.4915, "step": 3786 }, { "epoch": 3.42, "learning_rate": 1.1223166888435243e-06, "loss": 1.4562, "step": 3788 }, { "epoch": 3.42, "learning_rate": 1.1156034941103067e-06, "loss": 1.517, "step": 3790 }, { "epoch": 3.42, "learning_rate": 1.1089092510163025e-06, "loss": 1.4764, "step": 3792 }, { "epoch": 3.42, "learning_rate": 1.1022339738412546e-06, "loss": 1.4986, "step": 3794 }, { "epoch": 3.42, "learning_rate": 1.0955776768244465e-06, "loss": 1.5427, "step": 3796 }, { "epoch": 3.42, "learning_rate": 1.088940374164672e-06, "loss": 1.401, "step": 3798 }, { "epoch": 3.43, "learning_rate": 1.082322080020215e-06, "loss": 1.532, "step": 3800 }, { "epoch": 3.43, "learning_rate": 1.0757228085088011e-06, "loss": 1.4071, "step": 3802 }, { "epoch": 3.43, "learning_rate": 1.06914257370759e-06, "loss": 1.4352, "step": 3804 }, { "epoch": 3.43, "learning_rate": 1.0625813896531212e-06, "loss": 1.4857, "step": 3806 }, { "epoch": 3.43, "learning_rate": 1.0560392703413103e-06, "loss": 1.4942, "step": 3808 }, { "epoch": 3.44, "learning_rate": 1.0495162297273876e-06, "loss": 1.4106, "step": 3810 }, { "epoch": 3.44, "learning_rate": 1.0430122817259015e-06, "loss": 1.4163, "step": 3812 }, { "epoch": 3.44, "learning_rate": 1.036527440210665e-06, "loss": 1.4498, "step": 3814 }, { "epoch": 3.44, "learning_rate": 1.0300617190147388e-06, "loss": 1.4431, "step": 3816 }, { "epoch": 3.44, "learning_rate": 1.0236151319303945e-06, "loss": 1.4551, "step": 3818 }, { "epoch": 3.44, "learning_rate": 1.0171876927090852e-06, "loss": 1.4693, "step": 3820 }, { "epoch": 3.45, "learning_rate": 1.010779415061428e-06, "loss": 1.4752, "step": 3822 }, { "epoch": 3.45, "learning_rate": 1.0043903126571553e-06, "loss": 1.4794, "step": 3824 }, { "epoch": 3.45, "learning_rate": 9.980203991251047e-07, "loss": 1.4714, "step": 3826 }, { "epoch": 3.45, "learning_rate": 9.916696880531739e-07, "loss": 1.4768, "step": 3828 }, { "epoch": 3.45, "learning_rate": 9.85338192988309e-07, "loss": 1.4577, "step": 3830 }, { "epoch": 3.46, "learning_rate": 9.79025927436451e-07, "loss": 1.4632, "step": 3832 }, { "epoch": 3.46, "learning_rate": 9.727329048625355e-07, "loss": 1.4518, "step": 3834 }, { "epoch": 3.46, "learning_rate": 9.664591386904432e-07, "loss": 1.456, "step": 3836 }, { "epoch": 3.46, "learning_rate": 9.60204642302982e-07, "loss": 1.5072, "step": 3838 }, { "epoch": 3.46, "learning_rate": 9.539694290418488e-07, "loss": 1.4777, "step": 3840 }, { "epoch": 3.46, "learning_rate": 9.477535122076142e-07, "loss": 1.5157, "step": 3842 }, { "epoch": 3.47, "learning_rate": 9.415569050596829e-07, "loss": 1.517, "step": 3844 }, { "epoch": 3.47, "learning_rate": 9.353796208162669e-07, "loss": 1.4609, "step": 3846 }, { "epoch": 3.47, "learning_rate": 9.292216726543668e-07, "loss": 1.388, "step": 3848 }, { "epoch": 3.47, "learning_rate": 9.230830737097297e-07, "loss": 1.5012, "step": 3850 }, { "epoch": 3.47, "learning_rate": 9.169638370768341e-07, "loss": 1.4105, "step": 3852 }, { "epoch": 3.48, "learning_rate": 9.108639758088489e-07, "loss": 1.4909, "step": 3854 }, { "epoch": 3.48, "learning_rate": 9.04783502917621e-07, "loss": 1.4654, "step": 3856 }, { "epoch": 3.48, "learning_rate": 8.987224313736309e-07, "loss": 1.4628, "step": 3858 }, { "epoch": 3.48, "learning_rate": 8.926807741059818e-07, "loss": 1.4521, "step": 3860 }, { "epoch": 3.48, "learning_rate": 8.866585440023545e-07, "loss": 1.5005, "step": 3862 }, { "epoch": 3.48, "learning_rate": 8.806557539089988e-07, "loss": 1.4683, "step": 3864 }, { "epoch": 3.49, "learning_rate": 8.746724166306897e-07, "loss": 1.5085, "step": 3866 }, { "epoch": 3.49, "learning_rate": 8.687085449307053e-07, "loss": 1.4309, "step": 3868 }, { "epoch": 3.49, "learning_rate": 8.627641515308072e-07, "loss": 1.521, "step": 3870 }, { "epoch": 3.49, "learning_rate": 8.56839249111201e-07, "loss": 1.5306, "step": 3872 }, { "epoch": 3.49, "learning_rate": 8.509338503105224e-07, "loss": 1.4273, "step": 3874 }, { "epoch": 3.5, "learning_rate": 8.450479677257962e-07, "loss": 1.4743, "step": 3876 }, { "epoch": 3.5, "learning_rate": 8.391816139124187e-07, "loss": 1.4535, "step": 3878 }, { "epoch": 3.5, "learning_rate": 8.33334801384128e-07, "loss": 1.5456, "step": 3880 }, { "epoch": 3.5, "learning_rate": 8.275075426129831e-07, "loss": 1.4466, "step": 3882 }, { "epoch": 3.5, "learning_rate": 8.216998500293238e-07, "loss": 1.5223, "step": 3884 }, { "epoch": 3.5, "learning_rate": 8.159117360217606e-07, "loss": 1.4852, "step": 3886 }, { "epoch": 3.51, "learning_rate": 8.101432129371345e-07, "loss": 1.5151, "step": 3888 }, { "epoch": 3.51, "learning_rate": 8.04394293080496e-07, "loss": 1.4739, "step": 3890 }, { "epoch": 3.51, "learning_rate": 7.986649887150877e-07, "loss": 1.4808, "step": 3892 }, { "epoch": 3.51, "learning_rate": 7.929553120622968e-07, "loss": 1.5439, "step": 3894 }, { "epoch": 3.51, "learning_rate": 7.87265275301654e-07, "loss": 1.4876, "step": 3896 }, { "epoch": 3.51, "learning_rate": 7.815948905707871e-07, "loss": 1.5406, "step": 3898 }, { "epoch": 3.52, "learning_rate": 7.759441699654068e-07, "loss": 1.4724, "step": 3900 }, { "epoch": 3.52, "learning_rate": 7.703131255392759e-07, "loss": 1.5031, "step": 3902 }, { "epoch": 3.52, "learning_rate": 7.647017693041881e-07, "loss": 1.4945, "step": 3904 }, { "epoch": 3.52, "learning_rate": 7.591101132299383e-07, "loss": 1.4353, "step": 3906 }, { "epoch": 3.52, "learning_rate": 7.535381692442945e-07, "loss": 1.4728, "step": 3908 }, { "epoch": 3.53, "learning_rate": 7.479859492329856e-07, "loss": 1.5694, "step": 3910 }, { "epoch": 3.53, "learning_rate": 7.42453465039652e-07, "loss": 1.4905, "step": 3912 }, { "epoch": 3.53, "learning_rate": 7.36940728465848e-07, "loss": 1.4811, "step": 3914 }, { "epoch": 3.53, "learning_rate": 7.314477512709972e-07, "loss": 1.4571, "step": 3916 }, { "epoch": 3.53, "learning_rate": 7.259745451723765e-07, "loss": 1.4766, "step": 3918 }, { "epoch": 3.53, "learning_rate": 7.20521121845087e-07, "loss": 1.4502, "step": 3920 }, { "epoch": 3.54, "learning_rate": 7.150874929220298e-07, "loss": 1.4518, "step": 3922 }, { "epoch": 3.54, "learning_rate": 7.0967366999388e-07, "loss": 1.4984, "step": 3924 }, { "epoch": 3.54, "learning_rate": 7.042796646090688e-07, "loss": 1.472, "step": 3926 }, { "epoch": 3.54, "learning_rate": 6.989054882737523e-07, "loss": 1.4784, "step": 3928 }, { "epoch": 3.54, "learning_rate": 6.935511524517835e-07, "loss": 1.4565, "step": 3930 }, { "epoch": 3.55, "learning_rate": 6.88216668564704e-07, "loss": 1.4471, "step": 3932 }, { "epoch": 3.55, "learning_rate": 6.829020479916936e-07, "loss": 1.4408, "step": 3934 }, { "epoch": 3.55, "learning_rate": 6.776073020695728e-07, "loss": 1.4734, "step": 3936 }, { "epoch": 3.55, "learning_rate": 6.723324420927601e-07, "loss": 1.4693, "step": 3938 }, { "epoch": 3.55, "learning_rate": 6.670774793132606e-07, "loss": 1.4931, "step": 3940 }, { "epoch": 3.55, "learning_rate": 6.618424249406297e-07, "loss": 1.4914, "step": 3942 }, { "epoch": 3.56, "learning_rate": 6.566272901419579e-07, "loss": 1.4823, "step": 3944 }, { "epoch": 3.56, "learning_rate": 6.514320860418477e-07, "loss": 1.5082, "step": 3946 }, { "epoch": 3.56, "learning_rate": 6.462568237223787e-07, "loss": 1.5022, "step": 3948 }, { "epoch": 3.56, "learning_rate": 6.411015142231025e-07, "loss": 1.4829, "step": 3950 }, { "epoch": 3.56, "learning_rate": 6.35966168540998e-07, "loss": 1.5037, "step": 3952 }, { "epoch": 3.57, "learning_rate": 6.308507976304701e-07, "loss": 1.5403, "step": 3954 }, { "epoch": 3.57, "learning_rate": 6.257554124033005e-07, "loss": 1.4079, "step": 3956 }, { "epoch": 3.57, "learning_rate": 6.206800237286514e-07, "loss": 1.5042, "step": 3958 }, { "epoch": 3.57, "learning_rate": 6.156246424330215e-07, "loss": 1.458, "step": 3960 }, { "epoch": 3.57, "learning_rate": 6.105892793002377e-07, "loss": 1.4147, "step": 3962 }, { "epoch": 3.57, "learning_rate": 6.055739450714182e-07, "loss": 1.4517, "step": 3964 }, { "epoch": 3.58, "learning_rate": 6.005786504449651e-07, "loss": 1.4733, "step": 3966 }, { "epoch": 3.58, "learning_rate": 5.956034060765248e-07, "loss": 1.395, "step": 3968 }, { "epoch": 3.58, "learning_rate": 5.906482225789767e-07, "loss": 1.4927, "step": 3970 }, { "epoch": 3.58, "learning_rate": 5.857131105224123e-07, "loss": 1.4974, "step": 3972 }, { "epoch": 3.58, "learning_rate": 5.807980804341007e-07, "loss": 1.5244, "step": 3974 }, { "epoch": 3.59, "learning_rate": 5.759031427984818e-07, "loss": 1.4225, "step": 3976 }, { "epoch": 3.59, "learning_rate": 5.710283080571233e-07, "loss": 1.4604, "step": 3978 }, { "epoch": 3.59, "learning_rate": 5.661735866087237e-07, "loss": 1.4633, "step": 3980 }, { "epoch": 3.59, "learning_rate": 5.61338988809067e-07, "loss": 1.4687, "step": 3982 }, { "epoch": 3.59, "learning_rate": 5.565245249710194e-07, "loss": 1.4547, "step": 3984 }, { "epoch": 3.59, "learning_rate": 5.517302053644902e-07, "loss": 1.5225, "step": 3986 }, { "epoch": 3.6, "learning_rate": 5.469560402164254e-07, "loss": 1.4871, "step": 3988 }, { "epoch": 3.6, "learning_rate": 5.422020397107753e-07, "loss": 1.4344, "step": 3990 }, { "epoch": 3.6, "learning_rate": 5.374682139884735e-07, "loss": 1.5054, "step": 3992 }, { "epoch": 3.6, "learning_rate": 5.327545731474237e-07, "loss": 1.4971, "step": 3994 }, { "epoch": 3.6, "learning_rate": 5.280611272424696e-07, "loss": 1.5025, "step": 3996 }, { "epoch": 3.61, "learning_rate": 5.23387886285377e-07, "loss": 1.4235, "step": 3998 }, { "epoch": 3.61, "learning_rate": 5.187348602448106e-07, "loss": 1.6073, "step": 4000 }, { "epoch": 3.61, "learning_rate": 5.141020590463142e-07, "loss": 1.466, "step": 4002 }, { "epoch": 3.61, "learning_rate": 5.094894925722893e-07, "loss": 1.4511, "step": 4004 }, { "epoch": 3.61, "learning_rate": 5.048971706619777e-07, "loss": 1.5318, "step": 4006 }, { "epoch": 3.61, "learning_rate": 5.003251031114287e-07, "loss": 1.4607, "step": 4008 }, { "epoch": 3.62, "learning_rate": 4.957732996734943e-07, "loss": 1.4208, "step": 4010 }, { "epoch": 3.62, "learning_rate": 4.912417700577965e-07, "loss": 1.4769, "step": 4012 }, { "epoch": 3.62, "learning_rate": 4.867305239307096e-07, "loss": 1.4936, "step": 4014 }, { "epoch": 3.62, "learning_rate": 4.822395709153426e-07, "loss": 1.5077, "step": 4016 }, { "epoch": 3.62, "learning_rate": 4.77768920591516e-07, "loss": 1.5571, "step": 4018 }, { "epoch": 3.62, "learning_rate": 4.73318582495742e-07, "loss": 1.4608, "step": 4020 }, { "epoch": 3.63, "learning_rate": 4.688885661212039e-07, "loss": 1.5269, "step": 4022 }, { "epoch": 3.63, "learning_rate": 4.644788809177336e-07, "loss": 1.4387, "step": 4024 }, { "epoch": 3.63, "learning_rate": 4.6008953629179676e-07, "loss": 1.5016, "step": 4026 }, { "epoch": 3.63, "learning_rate": 4.5572054160647025e-07, "loss": 1.4119, "step": 4028 }, { "epoch": 3.63, "learning_rate": 4.5137190618141813e-07, "loss": 1.479, "step": 4030 }, { "epoch": 3.64, "learning_rate": 4.470436392928812e-07, "loss": 1.474, "step": 4032 }, { "epoch": 3.64, "learning_rate": 4.42735750173644e-07, "loss": 1.491, "step": 4034 }, { "epoch": 3.64, "learning_rate": 4.384482480130281e-07, "loss": 1.466, "step": 4036 }, { "epoch": 3.64, "learning_rate": 4.341811419568653e-07, "loss": 1.4711, "step": 4038 }, { "epoch": 3.64, "learning_rate": 4.2993444110747795e-07, "loss": 1.4969, "step": 4040 }, { "epoch": 3.64, "learning_rate": 4.257081545236641e-07, "loss": 1.4628, "step": 4042 }, { "epoch": 3.65, "learning_rate": 4.2150229122067565e-07, "loss": 1.4757, "step": 4044 }, { "epoch": 3.65, "learning_rate": 4.1731686017019377e-07, "loss": 1.4377, "step": 4046 }, { "epoch": 3.65, "learning_rate": 4.131518703003201e-07, "loss": 1.3858, "step": 4048 }, { "epoch": 3.65, "learning_rate": 4.090073304955511e-07, "loss": 1.4229, "step": 4050 }, { "epoch": 3.65, "learning_rate": 4.0488324959675805e-07, "loss": 1.4659, "step": 4052 }, { "epoch": 3.66, "learning_rate": 4.007796364011762e-07, "loss": 1.3815, "step": 4054 }, { "epoch": 3.66, "learning_rate": 3.9669649966237347e-07, "loss": 1.4247, "step": 4056 }, { "epoch": 3.66, "learning_rate": 3.9263384809024143e-07, "loss": 1.4442, "step": 4058 }, { "epoch": 3.66, "learning_rate": 3.885916903509779e-07, "loss": 1.4681, "step": 4060 }, { "epoch": 3.66, "learning_rate": 3.845700350670567e-07, "loss": 1.5604, "step": 4062 }, { "epoch": 3.66, "learning_rate": 3.8056889081722556e-07, "loss": 1.4922, "step": 4064 }, { "epoch": 3.67, "learning_rate": 3.765882661364739e-07, "loss": 1.4546, "step": 4066 }, { "epoch": 3.67, "learning_rate": 3.7262816951602057e-07, "loss": 1.4963, "step": 4068 }, { "epoch": 3.67, "learning_rate": 3.68688609403296e-07, "loss": 1.4392, "step": 4070 }, { "epoch": 3.67, "learning_rate": 3.6476959420192473e-07, "loss": 1.5181, "step": 4072 }, { "epoch": 3.67, "learning_rate": 3.6087113227170287e-07, "loss": 1.5524, "step": 4074 }, { "epoch": 3.68, "learning_rate": 3.569932319285885e-07, "loss": 1.4297, "step": 4076 }, { "epoch": 3.68, "learning_rate": 3.5313590144467446e-07, "loss": 1.49, "step": 4078 }, { "epoch": 3.68, "learning_rate": 3.4929914904817677e-07, "loss": 1.4437, "step": 4080 }, { "epoch": 3.68, "learning_rate": 3.4548298292341744e-07, "loss": 1.501, "step": 4082 }, { "epoch": 3.68, "learning_rate": 3.4168741121080153e-07, "loss": 1.5746, "step": 4084 }, { "epoch": 3.68, "learning_rate": 3.379124420068081e-07, "loss": 1.4168, "step": 4086 }, { "epoch": 3.69, "learning_rate": 3.3415808336396127e-07, "loss": 1.4926, "step": 4088 }, { "epoch": 3.69, "learning_rate": 3.304243432908294e-07, "loss": 1.4883, "step": 4090 }, { "epoch": 3.69, "learning_rate": 3.267112297519881e-07, "loss": 1.4488, "step": 4092 }, { "epoch": 3.69, "learning_rate": 3.230187506680227e-07, "loss": 1.4376, "step": 4094 }, { "epoch": 3.69, "learning_rate": 3.193469139154959e-07, "loss": 1.4768, "step": 4096 }, { "epoch": 3.7, "learning_rate": 3.156957273269434e-07, "loss": 1.4695, "step": 4098 }, { "epoch": 3.7, "learning_rate": 3.120651986908474e-07, "loss": 1.4708, "step": 4100 }, { "epoch": 3.7, "learning_rate": 3.084553357516218e-07, "loss": 1.4751, "step": 4102 }, { "epoch": 3.7, "learning_rate": 3.0486614620960476e-07, "loss": 1.5633, "step": 4104 }, { "epoch": 3.7, "learning_rate": 3.012976377210275e-07, "loss": 1.4757, "step": 4106 }, { "epoch": 3.7, "learning_rate": 2.9774981789801205e-07, "loss": 1.4903, "step": 4108 }, { "epoch": 3.71, "learning_rate": 2.942226943085424e-07, "loss": 1.4601, "step": 4110 }, { "epoch": 3.71, "learning_rate": 2.9071627447646353e-07, "loss": 1.4546, "step": 4112 }, { "epoch": 3.71, "learning_rate": 2.872305658814445e-07, "loss": 1.4475, "step": 4114 }, { "epoch": 3.71, "learning_rate": 2.8376557595898635e-07, "loss": 1.5515, "step": 4116 }, { "epoch": 3.71, "learning_rate": 2.803213121003856e-07, "loss": 1.4752, "step": 4118 }, { "epoch": 3.72, "learning_rate": 2.7689778165273184e-07, "loss": 1.4019, "step": 4120 }, { "epoch": 3.72, "learning_rate": 2.7349499191888675e-07, "loss": 1.4234, "step": 4122 }, { "epoch": 3.72, "learning_rate": 2.701129501574673e-07, "loss": 1.4638, "step": 4124 }, { "epoch": 3.72, "learning_rate": 2.667516635828349e-07, "loss": 1.4352, "step": 4126 }, { "epoch": 3.72, "learning_rate": 2.634111393650751e-07, "loss": 1.3558, "step": 4128 }, { "epoch": 3.72, "learning_rate": 2.600913846299857e-07, "loss": 1.5526, "step": 4130 }, { "epoch": 3.73, "learning_rate": 2.567924064590599e-07, "loss": 1.5054, "step": 4132 }, { "epoch": 3.73, "learning_rate": 2.5351421188947287e-07, "loss": 1.4943, "step": 4134 }, { "epoch": 3.73, "learning_rate": 2.50256807914061e-07, "loss": 1.4276, "step": 4136 }, { "epoch": 3.73, "learning_rate": 2.4702020148131836e-07, "loss": 1.4076, "step": 4138 }, { "epoch": 3.73, "learning_rate": 2.438043994953687e-07, "loss": 1.4831, "step": 4140 }, { "epoch": 3.73, "learning_rate": 2.406094088159616e-07, "loss": 1.5224, "step": 4142 }, { "epoch": 3.74, "learning_rate": 2.374352362584509e-07, "loss": 1.4096, "step": 4144 }, { "epoch": 3.74, "learning_rate": 2.3428188859378253e-07, "loss": 1.4154, "step": 4146 }, { "epoch": 3.74, "learning_rate": 2.3114937254848258e-07, "loss": 1.5115, "step": 4148 }, { "epoch": 3.74, "learning_rate": 2.280376948046359e-07, "loss": 1.3861, "step": 4150 }, { "epoch": 3.74, "learning_rate": 2.2494686199988069e-07, "loss": 1.4862, "step": 4152 }, { "epoch": 3.75, "learning_rate": 2.2187688072738632e-07, "loss": 1.4352, "step": 4154 }, { "epoch": 3.75, "learning_rate": 2.1882775753584884e-07, "loss": 1.5093, "step": 4156 }, { "epoch": 3.75, "learning_rate": 2.1579949892946206e-07, "loss": 1.4904, "step": 4158 }, { "epoch": 3.75, "learning_rate": 2.127921113679221e-07, "loss": 1.5107, "step": 4160 }, { "epoch": 3.75, "learning_rate": 2.1129624623544843e-07, "loss": 1.3679, "step": 4162 }, { "epoch": 3.75, "learning_rate": 2.083201772557053e-07, "loss": 1.4715, "step": 4164 }, { "epoch": 3.76, "learning_rate": 2.0536499527522792e-07, "loss": 1.4586, "step": 4166 }, { "epoch": 3.76, "learning_rate": 2.0243070659782705e-07, "loss": 1.4954, "step": 4168 }, { "epoch": 3.76, "learning_rate": 1.9951731748274473e-07, "loss": 1.449, "step": 4170 }, { "epoch": 3.76, "learning_rate": 1.966248341446375e-07, "loss": 1.4139, "step": 4172 }, { "epoch": 3.76, "learning_rate": 1.937532627535721e-07, "loss": 1.4097, "step": 4174 }, { "epoch": 3.77, "learning_rate": 1.9090260943500416e-07, "loss": 1.4908, "step": 4176 }, { "epoch": 3.77, "learning_rate": 1.8807288026977066e-07, "loss": 1.5407, "step": 4178 }, { "epoch": 3.77, "learning_rate": 1.8526408129407093e-07, "loss": 1.5268, "step": 4180 }, { "epoch": 3.77, "learning_rate": 1.8247621849946107e-07, "loss": 1.4683, "step": 4182 }, { "epoch": 3.77, "learning_rate": 1.7970929783283408e-07, "loss": 1.4642, "step": 4184 }, { "epoch": 3.77, "learning_rate": 1.7696332519641313e-07, "loss": 1.4373, "step": 4186 }, { "epoch": 3.78, "learning_rate": 1.742383064477371e-07, "loss": 1.48, "step": 4188 }, { "epoch": 3.78, "learning_rate": 1.7153424739964174e-07, "loss": 1.4326, "step": 4190 }, { "epoch": 3.78, "learning_rate": 1.6885115382026084e-07, "loss": 1.4323, "step": 4192 }, { "epoch": 3.78, "learning_rate": 1.6618903143299836e-07, "loss": 1.4751, "step": 4194 }, { "epoch": 3.78, "learning_rate": 1.635478859165296e-07, "loss": 1.4597, "step": 4196 }, { "epoch": 3.79, "learning_rate": 1.609277229047801e-07, "loss": 1.5044, "step": 4198 }, { "epoch": 3.79, "learning_rate": 1.5832854798691788e-07, "loss": 1.4625, "step": 4200 }, { "epoch": 3.79, "learning_rate": 1.5575036670734123e-07, "loss": 1.4762, "step": 4202 }, { "epoch": 3.79, "learning_rate": 1.5319318456566424e-07, "loss": 1.5121, "step": 4204 }, { "epoch": 3.79, "learning_rate": 1.5065700701670793e-07, "loss": 1.5383, "step": 4206 }, { "epoch": 3.79, "learning_rate": 1.481418394704881e-07, "loss": 1.5195, "step": 4208 }, { "epoch": 3.8, "learning_rate": 1.4564768729220414e-07, "loss": 1.5122, "step": 4210 }, { "epoch": 3.8, "learning_rate": 1.431745558022235e-07, "loss": 1.4437, "step": 4212 }, { "epoch": 3.8, "learning_rate": 1.4072245027607734e-07, "loss": 1.5162, "step": 4214 }, { "epoch": 3.8, "learning_rate": 1.3829137594444154e-07, "loss": 1.4974, "step": 4216 }, { "epoch": 3.8, "learning_rate": 1.358813379931345e-07, "loss": 1.4813, "step": 4218 }, { "epoch": 3.81, "learning_rate": 1.3349234156309732e-07, "loss": 1.4017, "step": 4220 }, { "epoch": 3.81, "learning_rate": 1.3112439175038794e-07, "loss": 1.3822, "step": 4222 }, { "epoch": 3.81, "learning_rate": 1.287774936061692e-07, "loss": 1.4827, "step": 4224 }, { "epoch": 3.81, "learning_rate": 1.2645165213669651e-07, "loss": 1.4623, "step": 4226 }, { "epoch": 3.81, "learning_rate": 1.2414687230331124e-07, "loss": 1.5188, "step": 4228 }, { "epoch": 3.81, "learning_rate": 1.2186315902242285e-07, "loss": 1.4864, "step": 4230 }, { "epoch": 3.82, "learning_rate": 1.1960051716550903e-07, "loss": 1.5138, "step": 4232 }, { "epoch": 3.82, "learning_rate": 1.1735895155909338e-07, "loss": 1.427, "step": 4234 }, { "epoch": 3.82, "learning_rate": 1.1513846698474551e-07, "loss": 1.405, "step": 4236 }, { "epoch": 3.82, "learning_rate": 1.1293906817906209e-07, "loss": 1.5173, "step": 4238 }, { "epoch": 3.82, "learning_rate": 1.1076075983366574e-07, "loss": 1.448, "step": 4240 }, { "epoch": 3.83, "learning_rate": 1.0860354659518512e-07, "loss": 1.4635, "step": 4242 }, { "epoch": 3.83, "learning_rate": 1.0646743306525376e-07, "loss": 1.4324, "step": 4244 }, { "epoch": 3.83, "learning_rate": 1.043524238004956e-07, "loss": 1.3653, "step": 4246 }, { "epoch": 3.83, "learning_rate": 1.0225852331251729e-07, "loss": 1.4727, "step": 4248 }, { "epoch": 3.83, "learning_rate": 1.0018573606789483e-07, "loss": 1.446, "step": 4250 }, { "epoch": 3.83, "learning_rate": 9.813406648816804e-08, "loss": 1.4742, "step": 4252 }, { "epoch": 3.84, "learning_rate": 9.610351894983162e-08, "loss": 1.4897, "step": 4254 }, { "epoch": 3.84, "learning_rate": 9.409409778432189e-08, "loss": 1.5121, "step": 4256 }, { "epoch": 3.84, "learning_rate": 9.21058072780101e-08, "loss": 1.4643, "step": 4258 }, { "epoch": 3.84, "learning_rate": 9.013865167219471e-08, "loss": 1.4174, "step": 4260 }, { "epoch": 3.84, "learning_rate": 8.819263516308796e-08, "loss": 1.5006, "step": 4262 }, { "epoch": 3.84, "learning_rate": 8.62677619018104e-08, "loss": 1.4582, "step": 4264 }, { "epoch": 3.85, "learning_rate": 8.436403599438203e-08, "loss": 1.5086, "step": 4266 }, { "epoch": 3.85, "learning_rate": 8.24814615017111e-08, "loss": 1.4035, "step": 4268 }, { "epoch": 3.85, "learning_rate": 8.062004243958866e-08, "loss": 1.4, "step": 4270 }, { "epoch": 3.85, "learning_rate": 7.877978277867737e-08, "loss": 1.4812, "step": 4272 }, { "epoch": 3.85, "learning_rate": 7.696068644450383e-08, "loss": 1.4961, "step": 4274 }, { "epoch": 3.86, "learning_rate": 7.51627573174507e-08, "loss": 1.4979, "step": 4276 }, { "epoch": 3.86, "learning_rate": 7.338599923274791e-08, "loss": 1.437, "step": 4278 }, { "epoch": 3.86, "learning_rate": 7.163041598046483e-08, "loss": 1.4799, "step": 4280 }, { "epoch": 3.86, "learning_rate": 6.98960113055025e-08, "loss": 1.5305, "step": 4282 }, { "epoch": 3.86, "learning_rate": 6.818278890758367e-08, "loss": 1.5565, "step": 4284 }, { "epoch": 3.86, "learning_rate": 6.649075244124614e-08, "loss": 1.4055, "step": 4286 }, { "epoch": 3.87, "learning_rate": 6.481990551583939e-08, "loss": 1.4872, "step": 4288 }, { "epoch": 3.87, "learning_rate": 6.317025169550684e-08, "loss": 1.514, "step": 4290 }, { "epoch": 3.87, "learning_rate": 6.154179449918918e-08, "loss": 1.4633, "step": 4292 }, { "epoch": 3.87, "learning_rate": 5.993453740060773e-08, "loss": 1.4542, "step": 4294 }, { "epoch": 3.87, "learning_rate": 5.8348483828263305e-08, "loss": 1.4561, "step": 4296 }, { "epoch": 3.88, "learning_rate": 5.678363716542623e-08, "loss": 1.4949, "step": 4298 }, { "epoch": 3.88, "learning_rate": 5.5240000750129695e-08, "loss": 1.3946, "step": 4300 }, { "epoch": 3.88, "learning_rate": 5.3717577875161964e-08, "loss": 1.5279, "step": 4302 }, { "epoch": 3.88, "learning_rate": 5.2216371788061936e-08, "loss": 1.4138, "step": 4304 }, { "epoch": 3.88, "learning_rate": 5.0736385691106946e-08, "loss": 1.4473, "step": 4306 }, { "epoch": 3.88, "learning_rate": 4.9277622741310535e-08, "loss": 1.5867, "step": 4308 }, { "epoch": 3.89, "learning_rate": 4.78400860504169e-08, "loss": 1.4765, "step": 4310 }, { "epoch": 3.89, "learning_rate": 4.642377868488646e-08, "loss": 1.5305, "step": 4312 }, { "epoch": 3.89, "learning_rate": 4.5028703665900286e-08, "loss": 1.5238, "step": 4314 }, { "epoch": 3.89, "learning_rate": 4.365486396934349e-08, "loss": 1.5024, "step": 4316 }, { "epoch": 3.89, "learning_rate": 4.230226252580516e-08, "loss": 1.4614, "step": 4318 }, { "epoch": 3.9, "learning_rate": 4.0970902220570654e-08, "loss": 1.4628, "step": 4320 }, { "epoch": 3.9, "learning_rate": 3.9660785893614883e-08, "loss": 1.4929, "step": 4322 }, { "epoch": 3.9, "learning_rate": 3.837191633959458e-08, "loss": 1.4142, "step": 4324 }, { "epoch": 3.9, "learning_rate": 3.710429630784718e-08, "loss": 1.4489, "step": 4326 }, { "epoch": 3.9, "learning_rate": 3.585792850238079e-08, "loss": 1.4397, "step": 4328 }, { "epoch": 3.9, "learning_rate": 3.4632815581866484e-08, "loss": 1.5186, "step": 4330 }, { "epoch": 3.91, "learning_rate": 3.3428960159642697e-08, "loss": 1.3846, "step": 4332 }, { "epoch": 3.91, "learning_rate": 3.2246364803695254e-08, "loss": 1.4404, "step": 4334 }, { "epoch": 3.91, "learning_rate": 3.108503203666402e-08, "loss": 1.4731, "step": 4336 }, { "epoch": 3.91, "learning_rate": 2.994496433583072e-08, "loss": 1.5085, "step": 4338 }, { "epoch": 3.91, "learning_rate": 2.8826164133117784e-08, "loss": 1.4772, "step": 4340 }, { "epoch": 3.92, "learning_rate": 2.7728633815079508e-08, "loss": 1.47, "step": 4342 }, { "epoch": 3.92, "learning_rate": 2.6652375722897582e-08, "loss": 1.5466, "step": 4344 }, { "epoch": 3.92, "learning_rate": 2.5597392152379996e-08, "loss": 1.4395, "step": 4346 }, { "epoch": 3.92, "learning_rate": 2.456368535394993e-08, "loss": 1.4466, "step": 4348 }, { "epoch": 3.92, "learning_rate": 2.3551257532649085e-08, "loss": 1.5694, "step": 4350 }, { "epoch": 3.92, "learning_rate": 2.256011084812326e-08, "loss": 1.4094, "step": 4352 }, { "epoch": 3.93, "learning_rate": 2.1590247414624566e-08, "loss": 1.3878, "step": 4354 }, { "epoch": 3.93, "learning_rate": 2.0641669301004752e-08, "loss": 1.4059, "step": 4356 }, { "epoch": 3.93, "learning_rate": 1.9714378530711896e-08, "loss": 1.4624, "step": 4358 }, { "epoch": 3.93, "learning_rate": 1.8808377081785954e-08, "loss": 1.5119, "step": 4360 }, { "epoch": 3.93, "learning_rate": 1.7923666886849877e-08, "loss": 1.5316, "step": 4362 }, { "epoch": 3.94, "learning_rate": 1.706024983311405e-08, "loss": 1.4541, "step": 4364 }, { "epoch": 3.94, "learning_rate": 1.62181277623652e-08, "loss": 1.4447, "step": 4366 }, { "epoch": 3.94, "learning_rate": 1.539730247096638e-08, "loss": 1.5013, "step": 4368 }, { "epoch": 3.94, "learning_rate": 1.4597775709849215e-08, "loss": 1.4457, "step": 4370 }, { "epoch": 3.94, "learning_rate": 1.3819549184516113e-08, "loss": 1.4869, "step": 4372 }, { "epoch": 3.94, "learning_rate": 1.3062624555029157e-08, "loss": 1.4717, "step": 4374 }, { "epoch": 3.95, "learning_rate": 1.2327003436013452e-08, "loss": 1.4999, "step": 4376 }, { "epoch": 3.95, "learning_rate": 1.1612687396650445e-08, "loss": 1.3948, "step": 4378 }, { "epoch": 3.95, "learning_rate": 1.0919677960674612e-08, "loss": 1.5142, "step": 4380 }, { "epoch": 3.95, "learning_rate": 1.0247976606369004e-08, "loss": 1.4844, "step": 4382 }, { "epoch": 3.95, "learning_rate": 9.59758476656636e-09, "loss": 1.4479, "step": 4384 }, { "epoch": 3.95, "learning_rate": 8.96850382864134e-09, "loss": 1.4565, "step": 4386 }, { "epoch": 3.96, "learning_rate": 8.360735134510522e-09, "loss": 1.5029, "step": 4388 }, { "epoch": 3.96, "learning_rate": 7.774279980626853e-09, "loss": 1.4039, "step": 4390 }, { "epoch": 3.96, "learning_rate": 7.2091396179807534e-09, "loss": 1.4211, "step": 4392 }, { "epoch": 3.96, "learning_rate": 6.665315252094573e-09, "loss": 1.484, "step": 4394 }, { "epoch": 3.96, "learning_rate": 6.142808043020365e-09, "loss": 1.5201, "step": 4396 }, { "epoch": 3.97, "learning_rate": 5.64161910533767e-09, "loss": 1.502, "step": 4398 }, { "epoch": 3.97, "learning_rate": 5.161749508150182e-09, "loss": 1.5364, "step": 4400 }, { "epoch": 3.97, "learning_rate": 4.703200275087971e-09, "loss": 1.4239, "step": 4402 }, { "epoch": 3.97, "learning_rate": 4.26597238429749e-09, "loss": 1.3793, "step": 4404 }, { "epoch": 3.97, "learning_rate": 3.850066768448235e-09, "loss": 1.564, "step": 4406 }, { "epoch": 3.97, "learning_rate": 3.4554843147216467e-09, "loss": 1.5356, "step": 4408 }, { "epoch": 3.98, "learning_rate": 3.0822258648177673e-09, "loss": 1.5149, "step": 4410 }, { "epoch": 3.98, "learning_rate": 2.730292214947472e-09, "loss": 1.4139, "step": 4412 }, { "epoch": 3.98, "learning_rate": 2.3996841158346885e-09, "loss": 1.5084, "step": 4414 }, { "epoch": 3.98, "learning_rate": 2.090402272710845e-09, "loss": 1.4369, "step": 4416 }, { "epoch": 3.98, "learning_rate": 1.8024473453170933e-09, "loss": 1.5157, "step": 4418 }, { "epoch": 3.99, "learning_rate": 1.535819947900974e-09, "loss": 1.5208, "step": 4420 }, { "epoch": 3.99, "learning_rate": 1.29052064921531e-09, "loss": 1.4772, "step": 4422 }, { "epoch": 3.99, "learning_rate": 1.0665499725170948e-09, "loss": 1.4908, "step": 4424 }, { "epoch": 3.99, "learning_rate": 8.639083955663819e-10, "loss": 1.4492, "step": 4426 }, { "epoch": 3.99, "learning_rate": 6.825963506262856e-10, "loss": 1.4351, "step": 4428 }, { "epoch": 3.99, "learning_rate": 5.226142244585397e-10, "loss": 1.4981, "step": 4430 }, { "epoch": 4.0, "learning_rate": 3.839623583301588e-10, "loss": 1.4821, "step": 4432 }, { "epoch": 4.0, "learning_rate": 2.6664104800122604e-10, "loss": 1.4517, "step": 4434 }, { "epoch": 4.0, "learning_rate": 1.7065054373710533e-10, "loss": 1.4852, "step": 4436 }, { "epoch": 4.0, "step": 4436, "total_flos": 1.1913558786415002e+17, "train_loss": 1.5918720781641893, "train_runtime": 35147.4688, "train_samples_per_second": 8.077, "train_steps_per_second": 0.126 } ], "logging_steps": 2, "max_steps": 4436, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "total_flos": 1.1913558786415002e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }