Nhanvi282's picture
Model save
bd18e35 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9990091652216995,
"eval_steps": 500,
"global_step": 4036,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.278688524590164e-08,
"loss": 1.8177,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 6.557377049180328e-08,
"loss": 1.8121,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 9.836065573770492e-08,
"loss": 1.8325,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 1.3114754098360656e-07,
"loss": 1.841,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 1.6393442622950818e-07,
"loss": 1.7706,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 1.9672131147540984e-07,
"loss": 1.8287,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 2.2950819672131146e-07,
"loss": 1.8792,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 2.622950819672131e-07,
"loss": 1.8158,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 2.950819672131147e-07,
"loss": 1.8152,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 3.2786885245901637e-07,
"loss": 1.7649,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 3.60655737704918e-07,
"loss": 1.8348,
"step": 22
},
{
"epoch": 0.02,
"learning_rate": 3.9344262295081967e-07,
"loss": 1.8553,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 4.2622950819672127e-07,
"loss": 1.8001,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 4.590163934426229e-07,
"loss": 1.8601,
"step": 28
},
{
"epoch": 0.03,
"learning_rate": 4.918032786885245e-07,
"loss": 1.8731,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 5.245901639344262e-07,
"loss": 1.8386,
"step": 32
},
{
"epoch": 0.03,
"learning_rate": 5.573770491803278e-07,
"loss": 1.8927,
"step": 34
},
{
"epoch": 0.04,
"learning_rate": 5.901639344262294e-07,
"loss": 1.8632,
"step": 36
},
{
"epoch": 0.04,
"learning_rate": 6.229508196721311e-07,
"loss": 1.8382,
"step": 38
},
{
"epoch": 0.04,
"learning_rate": 6.557377049180327e-07,
"loss": 1.7967,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 6.885245901639343e-07,
"loss": 1.8523,
"step": 42
},
{
"epoch": 0.04,
"learning_rate": 7.21311475409836e-07,
"loss": 1.8736,
"step": 44
},
{
"epoch": 0.05,
"learning_rate": 7.540983606557376e-07,
"loss": 1.8387,
"step": 46
},
{
"epoch": 0.05,
"learning_rate": 7.868852459016393e-07,
"loss": 1.7413,
"step": 48
},
{
"epoch": 0.05,
"learning_rate": 8.196721311475409e-07,
"loss": 1.8022,
"step": 50
},
{
"epoch": 0.05,
"learning_rate": 8.524590163934425e-07,
"loss": 1.8004,
"step": 52
},
{
"epoch": 0.05,
"learning_rate": 8.852459016393443e-07,
"loss": 1.8658,
"step": 54
},
{
"epoch": 0.06,
"learning_rate": 9.180327868852458e-07,
"loss": 1.819,
"step": 56
},
{
"epoch": 0.06,
"learning_rate": 9.508196721311474e-07,
"loss": 1.8115,
"step": 58
},
{
"epoch": 0.06,
"learning_rate": 9.83606557377049e-07,
"loss": 1.7732,
"step": 60
},
{
"epoch": 0.06,
"learning_rate": 1.0163934426229509e-06,
"loss": 1.8582,
"step": 62
},
{
"epoch": 0.06,
"learning_rate": 1.0491803278688525e-06,
"loss": 1.8034,
"step": 64
},
{
"epoch": 0.07,
"learning_rate": 1.081967213114754e-06,
"loss": 1.7767,
"step": 66
},
{
"epoch": 0.07,
"learning_rate": 1.1147540983606557e-06,
"loss": 1.7388,
"step": 68
},
{
"epoch": 0.07,
"learning_rate": 1.1475409836065573e-06,
"loss": 1.891,
"step": 70
},
{
"epoch": 0.07,
"learning_rate": 1.1803278688524589e-06,
"loss": 1.847,
"step": 72
},
{
"epoch": 0.07,
"learning_rate": 1.2131147540983607e-06,
"loss": 1.8508,
"step": 74
},
{
"epoch": 0.08,
"learning_rate": 1.2459016393442623e-06,
"loss": 1.8578,
"step": 76
},
{
"epoch": 0.08,
"learning_rate": 1.2786885245901639e-06,
"loss": 1.8297,
"step": 78
},
{
"epoch": 0.08,
"learning_rate": 1.3114754098360655e-06,
"loss": 1.7981,
"step": 80
},
{
"epoch": 0.08,
"learning_rate": 1.344262295081967e-06,
"loss": 1.8453,
"step": 82
},
{
"epoch": 0.08,
"learning_rate": 1.3770491803278687e-06,
"loss": 1.7627,
"step": 84
},
{
"epoch": 0.09,
"learning_rate": 1.4098360655737705e-06,
"loss": 1.7822,
"step": 86
},
{
"epoch": 0.09,
"learning_rate": 1.442622950819672e-06,
"loss": 1.8156,
"step": 88
},
{
"epoch": 0.09,
"learning_rate": 1.4754098360655739e-06,
"loss": 1.863,
"step": 90
},
{
"epoch": 0.09,
"learning_rate": 1.5081967213114753e-06,
"loss": 1.7581,
"step": 92
},
{
"epoch": 0.09,
"learning_rate": 1.5409836065573769e-06,
"loss": 1.8652,
"step": 94
},
{
"epoch": 0.1,
"learning_rate": 1.5737704918032787e-06,
"loss": 1.8223,
"step": 96
},
{
"epoch": 0.1,
"learning_rate": 1.6065573770491803e-06,
"loss": 1.8178,
"step": 98
},
{
"epoch": 0.1,
"learning_rate": 1.6393442622950819e-06,
"loss": 1.8272,
"step": 100
},
{
"epoch": 0.1,
"learning_rate": 1.6721311475409837e-06,
"loss": 1.8506,
"step": 102
},
{
"epoch": 0.1,
"learning_rate": 1.704918032786885e-06,
"loss": 1.8025,
"step": 104
},
{
"epoch": 0.11,
"learning_rate": 1.7377049180327867e-06,
"loss": 1.7876,
"step": 106
},
{
"epoch": 0.11,
"learning_rate": 1.7704918032786885e-06,
"loss": 1.7781,
"step": 108
},
{
"epoch": 0.11,
"learning_rate": 1.80327868852459e-06,
"loss": 1.8246,
"step": 110
},
{
"epoch": 0.11,
"learning_rate": 1.8360655737704917e-06,
"loss": 1.7799,
"step": 112
},
{
"epoch": 0.11,
"learning_rate": 1.8688524590163935e-06,
"loss": 1.8039,
"step": 114
},
{
"epoch": 0.11,
"learning_rate": 1.901639344262295e-06,
"loss": 1.8075,
"step": 116
},
{
"epoch": 0.12,
"learning_rate": 1.9344262295081967e-06,
"loss": 1.7993,
"step": 118
},
{
"epoch": 0.12,
"learning_rate": 1.967213114754098e-06,
"loss": 1.8053,
"step": 120
},
{
"epoch": 0.12,
"learning_rate": 2e-06,
"loss": 1.7745,
"step": 122
},
{
"epoch": 0.12,
"learning_rate": 1.9999987114893716e-06,
"loss": 1.7959,
"step": 124
},
{
"epoch": 0.12,
"learning_rate": 1.9999948459608074e-06,
"loss": 1.8582,
"step": 126
},
{
"epoch": 0.13,
"learning_rate": 1.9999884034242694e-06,
"loss": 1.8663,
"step": 128
},
{
"epoch": 0.13,
"learning_rate": 1.9999793838963593e-06,
"loss": 1.8255,
"step": 130
},
{
"epoch": 0.13,
"learning_rate": 1.9999677874003214e-06,
"loss": 1.8037,
"step": 132
},
{
"epoch": 0.13,
"learning_rate": 1.9999536139660395e-06,
"loss": 1.8094,
"step": 134
},
{
"epoch": 0.13,
"learning_rate": 1.999936863630039e-06,
"loss": 1.8929,
"step": 136
},
{
"epoch": 0.14,
"learning_rate": 1.9999175364354857e-06,
"loss": 1.8412,
"step": 138
},
{
"epoch": 0.14,
"learning_rate": 1.9998956324321866e-06,
"loss": 1.7918,
"step": 140
},
{
"epoch": 0.14,
"learning_rate": 1.9998711516765885e-06,
"loss": 1.8734,
"step": 142
},
{
"epoch": 0.14,
"learning_rate": 1.999844094231779e-06,
"loss": 1.7952,
"step": 144
},
{
"epoch": 0.14,
"learning_rate": 1.999814460167486e-06,
"loss": 1.84,
"step": 146
},
{
"epoch": 0.15,
"learning_rate": 1.999782249560076e-06,
"loss": 1.7485,
"step": 148
},
{
"epoch": 0.15,
"learning_rate": 1.9997474624925582e-06,
"loss": 1.7845,
"step": 150
},
{
"epoch": 0.15,
"learning_rate": 1.9997100990545777e-06,
"loss": 1.8562,
"step": 152
},
{
"epoch": 0.15,
"learning_rate": 1.9996701593424223e-06,
"loss": 1.8288,
"step": 154
},
{
"epoch": 0.15,
"learning_rate": 1.9996276434590164e-06,
"loss": 1.8192,
"step": 156
},
{
"epoch": 0.16,
"learning_rate": 1.999582551513925e-06,
"loss": 1.8072,
"step": 158
},
{
"epoch": 0.16,
"learning_rate": 1.9995348836233515e-06,
"loss": 1.7816,
"step": 160
},
{
"epoch": 0.16,
"learning_rate": 1.999484639910136e-06,
"loss": 1.7941,
"step": 162
},
{
"epoch": 0.16,
"learning_rate": 1.999431820503758e-06,
"loss": 1.8132,
"step": 164
},
{
"epoch": 0.16,
"learning_rate": 1.9993764255403344e-06,
"loss": 1.762,
"step": 166
},
{
"epoch": 0.17,
"learning_rate": 1.999318455162619e-06,
"loss": 1.7573,
"step": 168
},
{
"epoch": 0.17,
"learning_rate": 1.999257909520003e-06,
"loss": 1.8117,
"step": 170
},
{
"epoch": 0.17,
"learning_rate": 1.9991947887685134e-06,
"loss": 1.7551,
"step": 172
},
{
"epoch": 0.17,
"learning_rate": 1.999129093070814e-06,
"loss": 1.8449,
"step": 174
},
{
"epoch": 0.17,
"learning_rate": 1.999060822596204e-06,
"loss": 1.7815,
"step": 176
},
{
"epoch": 0.18,
"learning_rate": 1.9989899775206176e-06,
"loss": 1.7854,
"step": 178
},
{
"epoch": 0.18,
"learning_rate": 1.9989165580266245e-06,
"loss": 1.7899,
"step": 180
},
{
"epoch": 0.18,
"learning_rate": 1.998840564303428e-06,
"loss": 1.8634,
"step": 182
},
{
"epoch": 0.18,
"learning_rate": 1.9987619965468652e-06,
"loss": 1.8095,
"step": 184
},
{
"epoch": 0.18,
"learning_rate": 1.9986808549594078e-06,
"loss": 1.7522,
"step": 186
},
{
"epoch": 0.19,
"learning_rate": 1.9985971397501588e-06,
"loss": 1.8314,
"step": 188
},
{
"epoch": 0.19,
"learning_rate": 1.9985108511348537e-06,
"loss": 1.8314,
"step": 190
},
{
"epoch": 0.19,
"learning_rate": 1.998421989335861e-06,
"loss": 1.7902,
"step": 192
},
{
"epoch": 0.19,
"learning_rate": 1.998330554582179e-06,
"loss": 1.768,
"step": 194
},
{
"epoch": 0.19,
"learning_rate": 1.9982365471094366e-06,
"loss": 1.8559,
"step": 196
},
{
"epoch": 0.2,
"learning_rate": 1.9981399671598938e-06,
"loss": 1.8016,
"step": 198
},
{
"epoch": 0.2,
"learning_rate": 1.9980408149824386e-06,
"loss": 1.8489,
"step": 200
},
{
"epoch": 0.2,
"learning_rate": 1.9979390908325886e-06,
"loss": 1.8126,
"step": 202
},
{
"epoch": 0.2,
"learning_rate": 1.997834794972489e-06,
"loss": 1.8001,
"step": 204
},
{
"epoch": 0.2,
"learning_rate": 1.997727927670912e-06,
"loss": 1.8156,
"step": 206
},
{
"epoch": 0.21,
"learning_rate": 1.997618489203258e-06,
"loss": 1.7559,
"step": 208
},
{
"epoch": 0.21,
"learning_rate": 1.9975064798515517e-06,
"loss": 1.7617,
"step": 210
},
{
"epoch": 0.21,
"learning_rate": 1.997391899904443e-06,
"loss": 1.8464,
"step": 212
},
{
"epoch": 0.21,
"learning_rate": 1.9972747496572075e-06,
"loss": 1.838,
"step": 214
},
{
"epoch": 0.21,
"learning_rate": 1.997155029411744e-06,
"loss": 1.7975,
"step": 216
},
{
"epoch": 0.22,
"learning_rate": 1.9970327394765733e-06,
"loss": 1.8403,
"step": 218
},
{
"epoch": 0.22,
"learning_rate": 1.99690788016684e-06,
"loss": 1.8571,
"step": 220
},
{
"epoch": 0.22,
"learning_rate": 1.9967804518043086e-06,
"loss": 1.8265,
"step": 222
},
{
"epoch": 0.22,
"learning_rate": 1.9966504547173653e-06,
"loss": 1.833,
"step": 224
},
{
"epoch": 0.22,
"learning_rate": 1.9965178892410154e-06,
"loss": 1.8409,
"step": 226
},
{
"epoch": 0.23,
"learning_rate": 1.996382755716882e-06,
"loss": 1.8173,
"step": 228
},
{
"epoch": 0.23,
"learning_rate": 1.9962450544932076e-06,
"loss": 1.7903,
"step": 230
},
{
"epoch": 0.23,
"learning_rate": 1.9961047859248517e-06,
"loss": 1.8099,
"step": 232
},
{
"epoch": 0.23,
"learning_rate": 1.9959619503732886e-06,
"loss": 1.8147,
"step": 234
},
{
"epoch": 0.23,
"learning_rate": 1.995816548206609e-06,
"loss": 1.7784,
"step": 236
},
{
"epoch": 0.24,
"learning_rate": 1.9956685797995172e-06,
"loss": 1.849,
"step": 238
},
{
"epoch": 0.24,
"learning_rate": 1.9955180455333312e-06,
"loss": 1.8219,
"step": 240
},
{
"epoch": 0.24,
"learning_rate": 1.9953649457959808e-06,
"loss": 1.7788,
"step": 242
},
{
"epoch": 0.24,
"learning_rate": 1.995209280982007e-06,
"loss": 1.8205,
"step": 244
},
{
"epoch": 0.24,
"learning_rate": 1.9950510514925617e-06,
"loss": 1.8305,
"step": 246
},
{
"epoch": 0.25,
"learning_rate": 1.9948902577354057e-06,
"loss": 1.8397,
"step": 248
},
{
"epoch": 0.25,
"learning_rate": 1.994726900124908e-06,
"loss": 1.8697,
"step": 250
},
{
"epoch": 0.25,
"learning_rate": 1.994560979082044e-06,
"loss": 1.8091,
"step": 252
},
{
"epoch": 0.25,
"learning_rate": 1.994392495034396e-06,
"loss": 1.8292,
"step": 254
},
{
"epoch": 0.25,
"learning_rate": 1.9942214484161517e-06,
"loss": 1.8443,
"step": 256
},
{
"epoch": 0.26,
"learning_rate": 1.994047839668101e-06,
"loss": 1.8159,
"step": 258
},
{
"epoch": 0.26,
"learning_rate": 1.9938716692376378e-06,
"loss": 1.8701,
"step": 260
},
{
"epoch": 0.26,
"learning_rate": 1.9936929375787566e-06,
"loss": 1.7886,
"step": 262
},
{
"epoch": 0.26,
"learning_rate": 1.9935116451520533e-06,
"loss": 1.7754,
"step": 264
},
{
"epoch": 0.26,
"learning_rate": 1.993327792424722e-06,
"loss": 1.812,
"step": 266
},
{
"epoch": 0.27,
"learning_rate": 1.993141379870555e-06,
"loss": 1.8494,
"step": 268
},
{
"epoch": 0.27,
"learning_rate": 1.9929524079699417e-06,
"loss": 1.8719,
"step": 270
},
{
"epoch": 0.27,
"learning_rate": 1.992760877209866e-06,
"loss": 1.7864,
"step": 272
},
{
"epoch": 0.27,
"learning_rate": 1.992566788083908e-06,
"loss": 1.8107,
"step": 274
},
{
"epoch": 0.27,
"learning_rate": 1.9923701410922383e-06,
"loss": 1.7902,
"step": 276
},
{
"epoch": 0.28,
"learning_rate": 1.9921709367416207e-06,
"loss": 1.7544,
"step": 278
},
{
"epoch": 0.28,
"learning_rate": 1.99196917554541e-06,
"loss": 1.8509,
"step": 280
},
{
"epoch": 0.28,
"learning_rate": 1.9917648580235474e-06,
"loss": 1.8262,
"step": 282
},
{
"epoch": 0.28,
"learning_rate": 1.991557984702565e-06,
"loss": 1.766,
"step": 284
},
{
"epoch": 0.28,
"learning_rate": 1.9913485561155783e-06,
"loss": 1.7944,
"step": 286
},
{
"epoch": 0.29,
"learning_rate": 1.991136572802291e-06,
"loss": 1.8283,
"step": 288
},
{
"epoch": 0.29,
"learning_rate": 1.990922035308987e-06,
"loss": 1.7621,
"step": 290
},
{
"epoch": 0.29,
"learning_rate": 1.9907049441885347e-06,
"loss": 1.7697,
"step": 292
},
{
"epoch": 0.29,
"learning_rate": 1.990485300000383e-06,
"loss": 1.8438,
"step": 294
},
{
"epoch": 0.29,
"learning_rate": 1.9902631033105586e-06,
"loss": 1.8083,
"step": 296
},
{
"epoch": 0.3,
"learning_rate": 1.9900383546916676e-06,
"loss": 1.8385,
"step": 298
},
{
"epoch": 0.3,
"learning_rate": 1.9898110547228923e-06,
"loss": 1.7602,
"step": 300
},
{
"epoch": 0.3,
"learning_rate": 1.989581203989989e-06,
"loss": 1.8295,
"step": 302
},
{
"epoch": 0.3,
"learning_rate": 1.9893488030852884e-06,
"loss": 1.8248,
"step": 304
},
{
"epoch": 0.3,
"learning_rate": 1.989113852607692e-06,
"loss": 1.7993,
"step": 306
},
{
"epoch": 0.31,
"learning_rate": 1.9888763531626725e-06,
"loss": 1.8724,
"step": 308
},
{
"epoch": 0.31,
"learning_rate": 1.988636305362271e-06,
"loss": 1.7902,
"step": 310
},
{
"epoch": 0.31,
"learning_rate": 1.988393709825096e-06,
"loss": 1.7993,
"step": 312
},
{
"epoch": 0.31,
"learning_rate": 1.988148567176321e-06,
"loss": 1.7969,
"step": 314
},
{
"epoch": 0.31,
"learning_rate": 1.987900878047684e-06,
"loss": 1.8642,
"step": 316
},
{
"epoch": 0.32,
"learning_rate": 1.987650643077485e-06,
"loss": 1.8064,
"step": 318
},
{
"epoch": 0.32,
"learning_rate": 1.9873978629105853e-06,
"loss": 1.7677,
"step": 320
},
{
"epoch": 0.32,
"learning_rate": 1.9871425381984043e-06,
"loss": 1.7867,
"step": 322
},
{
"epoch": 0.32,
"learning_rate": 1.986884669598919e-06,
"loss": 1.8304,
"step": 324
},
{
"epoch": 0.32,
"learning_rate": 1.9866242577766627e-06,
"loss": 1.8228,
"step": 326
},
{
"epoch": 0.32,
"learning_rate": 1.9863613034027223e-06,
"loss": 1.7832,
"step": 328
},
{
"epoch": 0.33,
"learning_rate": 1.9860958071547365e-06,
"loss": 1.8687,
"step": 330
},
{
"epoch": 0.33,
"learning_rate": 1.9858277697168947e-06,
"loss": 1.8497,
"step": 332
},
{
"epoch": 0.33,
"learning_rate": 1.9855571917799356e-06,
"loss": 1.7625,
"step": 334
},
{
"epoch": 0.33,
"learning_rate": 1.9852840740411437e-06,
"loss": 1.8641,
"step": 336
},
{
"epoch": 0.33,
"learning_rate": 1.9850084172043492e-06,
"loss": 1.8542,
"step": 338
},
{
"epoch": 0.34,
"learning_rate": 1.9847302219799266e-06,
"loss": 1.8338,
"step": 340
},
{
"epoch": 0.34,
"learning_rate": 1.9844494890847895e-06,
"loss": 1.7942,
"step": 342
},
{
"epoch": 0.34,
"learning_rate": 1.9841662192423937e-06,
"loss": 1.8268,
"step": 344
},
{
"epoch": 0.34,
"learning_rate": 1.983880413182731e-06,
"loss": 1.801,
"step": 346
},
{
"epoch": 0.34,
"learning_rate": 1.9835920716423297e-06,
"loss": 1.7898,
"step": 348
},
{
"epoch": 0.35,
"learning_rate": 1.983301195364252e-06,
"loss": 1.7926,
"step": 350
},
{
"epoch": 0.35,
"learning_rate": 1.9830077850980933e-06,
"loss": 1.8177,
"step": 352
},
{
"epoch": 0.35,
"learning_rate": 1.9827118415999765e-06,
"loss": 1.8194,
"step": 354
},
{
"epoch": 0.35,
"learning_rate": 1.9824133656325553e-06,
"loss": 1.8553,
"step": 356
},
{
"epoch": 0.35,
"learning_rate": 1.9821123579650084e-06,
"loss": 1.7962,
"step": 358
},
{
"epoch": 0.36,
"learning_rate": 1.9818088193730385e-06,
"loss": 1.8247,
"step": 360
},
{
"epoch": 0.36,
"learning_rate": 1.981502750638872e-06,
"loss": 1.8226,
"step": 362
},
{
"epoch": 0.36,
"learning_rate": 1.981194152551254e-06,
"loss": 1.7985,
"step": 364
},
{
"epoch": 0.36,
"learning_rate": 1.980883025905448e-06,
"loss": 1.773,
"step": 366
},
{
"epoch": 0.36,
"learning_rate": 1.980569371503234e-06,
"loss": 1.8056,
"step": 368
},
{
"epoch": 0.37,
"learning_rate": 1.9802531901529065e-06,
"loss": 1.8649,
"step": 370
},
{
"epoch": 0.37,
"learning_rate": 1.979934482669271e-06,
"loss": 1.8389,
"step": 372
},
{
"epoch": 0.37,
"learning_rate": 1.9796132498736443e-06,
"loss": 1.7644,
"step": 374
},
{
"epoch": 0.37,
"learning_rate": 1.97928949259385e-06,
"loss": 1.7506,
"step": 376
},
{
"epoch": 0.37,
"learning_rate": 1.9789632116642166e-06,
"loss": 1.8144,
"step": 378
},
{
"epoch": 0.38,
"learning_rate": 1.978634407925578e-06,
"loss": 1.8213,
"step": 380
},
{
"epoch": 0.38,
"learning_rate": 1.9783030822252677e-06,
"loss": 1.8077,
"step": 382
},
{
"epoch": 0.38,
"learning_rate": 1.9779692354171195e-06,
"loss": 1.7861,
"step": 384
},
{
"epoch": 0.38,
"learning_rate": 1.9776328683614634e-06,
"loss": 1.8397,
"step": 386
},
{
"epoch": 0.38,
"learning_rate": 1.9772939819251245e-06,
"loss": 1.7771,
"step": 388
},
{
"epoch": 0.39,
"learning_rate": 1.976952576981421e-06,
"loss": 1.7759,
"step": 390
},
{
"epoch": 0.39,
"learning_rate": 1.9766086544101597e-06,
"loss": 1.7946,
"step": 392
},
{
"epoch": 0.39,
"learning_rate": 1.9762622150976374e-06,
"loss": 1.8384,
"step": 394
},
{
"epoch": 0.39,
"learning_rate": 1.9759132599366346e-06,
"loss": 1.8419,
"step": 396
},
{
"epoch": 0.39,
"learning_rate": 1.9755617898264166e-06,
"loss": 1.7618,
"step": 398
},
{
"epoch": 0.4,
"learning_rate": 1.9752078056727297e-06,
"loss": 1.8551,
"step": 400
},
{
"epoch": 0.4,
"learning_rate": 1.974851308387798e-06,
"loss": 1.7832,
"step": 402
},
{
"epoch": 0.4,
"learning_rate": 1.974492298890323e-06,
"loss": 1.8065,
"step": 404
},
{
"epoch": 0.4,
"learning_rate": 1.9741307781054792e-06,
"loss": 1.8013,
"step": 406
},
{
"epoch": 0.4,
"learning_rate": 1.973766746964914e-06,
"loss": 1.839,
"step": 408
},
{
"epoch": 0.41,
"learning_rate": 1.973400206406743e-06,
"loss": 1.815,
"step": 410
},
{
"epoch": 0.41,
"learning_rate": 1.9730311573755493e-06,
"loss": 1.7583,
"step": 412
},
{
"epoch": 0.41,
"learning_rate": 1.97265960082238e-06,
"loss": 1.7778,
"step": 414
},
{
"epoch": 0.41,
"learning_rate": 1.9722855377047442e-06,
"loss": 1.811,
"step": 416
},
{
"epoch": 0.41,
"learning_rate": 1.9719089689866105e-06,
"loss": 1.7852,
"step": 418
},
{
"epoch": 0.42,
"learning_rate": 1.9715298956384047e-06,
"loss": 1.7967,
"step": 420
},
{
"epoch": 0.42,
"learning_rate": 1.9711483186370063e-06,
"loss": 1.8403,
"step": 422
},
{
"epoch": 0.42,
"learning_rate": 1.970764238965748e-06,
"loss": 1.7783,
"step": 424
},
{
"epoch": 0.42,
"learning_rate": 1.9703776576144106e-06,
"loss": 1.7977,
"step": 426
},
{
"epoch": 0.42,
"learning_rate": 1.969988575579223e-06,
"loss": 1.7673,
"step": 428
},
{
"epoch": 0.43,
"learning_rate": 1.969596993862858e-06,
"loss": 1.7923,
"step": 430
},
{
"epoch": 0.43,
"learning_rate": 1.9692029134744295e-06,
"loss": 1.844,
"step": 432
},
{
"epoch": 0.43,
"learning_rate": 1.9688063354294913e-06,
"loss": 1.812,
"step": 434
},
{
"epoch": 0.43,
"learning_rate": 1.9684072607500334e-06,
"loss": 1.7798,
"step": 436
},
{
"epoch": 0.43,
"learning_rate": 1.9680056904644796e-06,
"loss": 1.7617,
"step": 438
},
{
"epoch": 0.44,
"learning_rate": 1.9676016256076855e-06,
"loss": 1.7449,
"step": 440
},
{
"epoch": 0.44,
"learning_rate": 1.967195067220934e-06,
"loss": 1.8363,
"step": 442
},
{
"epoch": 0.44,
"learning_rate": 1.966786016351936e-06,
"loss": 1.8269,
"step": 444
},
{
"epoch": 0.44,
"learning_rate": 1.9663744740548232e-06,
"loss": 1.7656,
"step": 446
},
{
"epoch": 0.44,
"learning_rate": 1.9659604413901496e-06,
"loss": 1.8018,
"step": 448
},
{
"epoch": 0.45,
"learning_rate": 1.965543919424885e-06,
"loss": 1.8162,
"step": 450
},
{
"epoch": 0.45,
"learning_rate": 1.9651249092324166e-06,
"loss": 1.8419,
"step": 452
},
{
"epoch": 0.45,
"learning_rate": 1.9647034118925417e-06,
"loss": 1.764,
"step": 454
},
{
"epoch": 0.45,
"learning_rate": 1.9642794284914687e-06,
"loss": 1.8261,
"step": 456
},
{
"epoch": 0.45,
"learning_rate": 1.9638529601218114e-06,
"loss": 1.7924,
"step": 458
},
{
"epoch": 0.46,
"learning_rate": 1.9634240078825876e-06,
"loss": 1.7687,
"step": 460
},
{
"epoch": 0.46,
"learning_rate": 1.9629925728792166e-06,
"loss": 1.8531,
"step": 462
},
{
"epoch": 0.46,
"learning_rate": 1.962558656223516e-06,
"loss": 1.7934,
"step": 464
},
{
"epoch": 0.46,
"learning_rate": 1.9621222590336973e-06,
"loss": 1.807,
"step": 466
},
{
"epoch": 0.46,
"learning_rate": 1.961683382434366e-06,
"loss": 1.7776,
"step": 468
},
{
"epoch": 0.47,
"learning_rate": 1.961242027556517e-06,
"loss": 1.8311,
"step": 470
},
{
"epoch": 0.47,
"learning_rate": 1.9607981955375295e-06,
"loss": 1.8013,
"step": 472
},
{
"epoch": 0.47,
"learning_rate": 1.9603518875211696e-06,
"loss": 1.8536,
"step": 474
},
{
"epoch": 0.47,
"learning_rate": 1.9599031046575816e-06,
"loss": 1.7611,
"step": 476
},
{
"epoch": 0.47,
"learning_rate": 1.9594518481032896e-06,
"loss": 1.7873,
"step": 478
},
{
"epoch": 0.48,
"learning_rate": 1.95899811902119e-06,
"loss": 1.7876,
"step": 480
},
{
"epoch": 0.48,
"learning_rate": 1.958541918580553e-06,
"loss": 1.7066,
"step": 482
},
{
"epoch": 0.48,
"learning_rate": 1.958083247957017e-06,
"loss": 1.8098,
"step": 484
},
{
"epoch": 0.48,
"learning_rate": 1.957622108332585e-06,
"loss": 1.7821,
"step": 486
},
{
"epoch": 0.48,
"learning_rate": 1.957158500895625e-06,
"loss": 1.7832,
"step": 488
},
{
"epoch": 0.49,
"learning_rate": 1.9566924268408624e-06,
"loss": 1.799,
"step": 490
},
{
"epoch": 0.49,
"learning_rate": 1.9562238873693796e-06,
"loss": 1.7274,
"step": 492
},
{
"epoch": 0.49,
"learning_rate": 1.9557528836886135e-06,
"loss": 1.8111,
"step": 494
},
{
"epoch": 0.49,
"learning_rate": 1.9552794170123505e-06,
"loss": 1.7611,
"step": 496
},
{
"epoch": 0.49,
"learning_rate": 1.954803488560724e-06,
"loss": 1.7715,
"step": 498
},
{
"epoch": 0.5,
"learning_rate": 1.9543250995602117e-06,
"loss": 1.7802,
"step": 500
},
{
"epoch": 0.5,
"learning_rate": 1.9538442512436325e-06,
"loss": 1.7697,
"step": 502
},
{
"epoch": 0.5,
"learning_rate": 1.953360944850143e-06,
"loss": 1.7719,
"step": 504
},
{
"epoch": 0.5,
"learning_rate": 1.9528751816252328e-06,
"loss": 1.8125,
"step": 506
},
{
"epoch": 0.5,
"learning_rate": 1.9523869628207254e-06,
"loss": 1.7546,
"step": 508
},
{
"epoch": 0.51,
"learning_rate": 1.9518962896947706e-06,
"loss": 1.7955,
"step": 510
},
{
"epoch": 0.51,
"learning_rate": 1.951403163511843e-06,
"loss": 1.8274,
"step": 512
},
{
"epoch": 0.51,
"learning_rate": 1.9509075855427403e-06,
"loss": 1.7821,
"step": 514
},
{
"epoch": 0.51,
"learning_rate": 1.9504095570645768e-06,
"loss": 1.8088,
"step": 516
},
{
"epoch": 0.51,
"learning_rate": 1.949909079360782e-06,
"loss": 1.8297,
"step": 518
},
{
"epoch": 0.52,
"learning_rate": 1.9494061537210983e-06,
"loss": 1.7987,
"step": 520
},
{
"epoch": 0.52,
"learning_rate": 1.948900781441576e-06,
"loss": 1.8488,
"step": 522
},
{
"epoch": 0.52,
"learning_rate": 1.948392963824569e-06,
"loss": 1.8249,
"step": 524
},
{
"epoch": 0.52,
"learning_rate": 1.9478827021787355e-06,
"loss": 1.7542,
"step": 526
},
{
"epoch": 0.52,
"learning_rate": 1.9473699978190297e-06,
"loss": 1.7514,
"step": 528
},
{
"epoch": 0.53,
"learning_rate": 1.946854852066702e-06,
"loss": 1.803,
"step": 530
},
{
"epoch": 0.53,
"learning_rate": 1.9463372662492934e-06,
"loss": 1.8222,
"step": 532
},
{
"epoch": 0.53,
"learning_rate": 1.9458172417006346e-06,
"loss": 1.8136,
"step": 534
},
{
"epoch": 0.53,
"learning_rate": 1.945294779760839e-06,
"loss": 1.7658,
"step": 536
},
{
"epoch": 0.53,
"learning_rate": 1.9447698817763026e-06,
"loss": 1.8392,
"step": 538
},
{
"epoch": 0.54,
"learning_rate": 1.9442425490996984e-06,
"loss": 1.8257,
"step": 540
},
{
"epoch": 0.54,
"learning_rate": 1.9437127830899744e-06,
"loss": 1.7824,
"step": 542
},
{
"epoch": 0.54,
"learning_rate": 1.943180585112348e-06,
"loss": 1.8179,
"step": 544
},
{
"epoch": 0.54,
"learning_rate": 1.9426459565383056e-06,
"loss": 1.8429,
"step": 546
},
{
"epoch": 0.54,
"learning_rate": 1.9421088987455956e-06,
"loss": 1.7787,
"step": 548
},
{
"epoch": 0.54,
"learning_rate": 1.9415694131182277e-06,
"loss": 1.7805,
"step": 550
},
{
"epoch": 0.55,
"learning_rate": 1.9410275010464685e-06,
"loss": 1.8401,
"step": 552
},
{
"epoch": 0.55,
"learning_rate": 1.9404831639268356e-06,
"loss": 1.7954,
"step": 554
},
{
"epoch": 0.55,
"learning_rate": 1.9399364031620987e-06,
"loss": 1.7947,
"step": 556
},
{
"epoch": 0.55,
"learning_rate": 1.939387220161271e-06,
"loss": 1.7727,
"step": 558
},
{
"epoch": 0.55,
"learning_rate": 1.9388356163396085e-06,
"loss": 1.784,
"step": 560
},
{
"epoch": 0.56,
"learning_rate": 1.938281593118607e-06,
"loss": 1.7915,
"step": 562
},
{
"epoch": 0.56,
"learning_rate": 1.9377251519259954e-06,
"loss": 1.7829,
"step": 564
},
{
"epoch": 0.56,
"learning_rate": 1.9371662941957347e-06,
"loss": 1.8075,
"step": 566
},
{
"epoch": 0.56,
"learning_rate": 1.936605021368013e-06,
"loss": 1.8001,
"step": 568
},
{
"epoch": 0.56,
"learning_rate": 1.9360413348892427e-06,
"loss": 1.8251,
"step": 570
},
{
"epoch": 0.57,
"learning_rate": 1.935475236212056e-06,
"loss": 1.7733,
"step": 572
},
{
"epoch": 0.57,
"learning_rate": 1.9349067267953e-06,
"loss": 1.8052,
"step": 574
},
{
"epoch": 0.57,
"learning_rate": 1.934335808104037e-06,
"loss": 1.7986,
"step": 576
},
{
"epoch": 0.57,
"learning_rate": 1.9337624816095357e-06,
"loss": 1.8115,
"step": 578
},
{
"epoch": 0.57,
"learning_rate": 1.9331867487892706e-06,
"loss": 1.7622,
"step": 580
},
{
"epoch": 0.58,
"learning_rate": 1.932608611126918e-06,
"loss": 1.8088,
"step": 582
},
{
"epoch": 0.58,
"learning_rate": 1.9320280701123507e-06,
"loss": 1.7687,
"step": 584
},
{
"epoch": 0.58,
"learning_rate": 1.931445127241635e-06,
"loss": 1.7906,
"step": 586
},
{
"epoch": 0.58,
"learning_rate": 1.9308597840170277e-06,
"loss": 1.7157,
"step": 588
},
{
"epoch": 0.58,
"learning_rate": 1.93027204194697e-06,
"loss": 1.8403,
"step": 590
},
{
"epoch": 0.59,
"learning_rate": 1.9296819025460857e-06,
"loss": 1.7787,
"step": 592
},
{
"epoch": 0.59,
"learning_rate": 1.9290893673351777e-06,
"loss": 1.728,
"step": 594
},
{
"epoch": 0.59,
"learning_rate": 1.9284944378412205e-06,
"loss": 1.7893,
"step": 596
},
{
"epoch": 0.59,
"learning_rate": 1.9278971155973607e-06,
"loss": 1.7054,
"step": 598
},
{
"epoch": 0.59,
"learning_rate": 1.9272974021429104e-06,
"loss": 1.7644,
"step": 600
},
{
"epoch": 0.6,
"learning_rate": 1.926695299023344e-06,
"loss": 1.8168,
"step": 602
},
{
"epoch": 0.6,
"learning_rate": 1.9260908077902935e-06,
"loss": 1.7614,
"step": 604
},
{
"epoch": 0.6,
"learning_rate": 1.925483930001546e-06,
"loss": 1.7905,
"step": 606
},
{
"epoch": 0.6,
"learning_rate": 1.9248746672210384e-06,
"loss": 1.817,
"step": 608
},
{
"epoch": 0.6,
"learning_rate": 1.9242630210188545e-06,
"loss": 1.8445,
"step": 610
},
{
"epoch": 0.61,
"learning_rate": 1.9236489929712185e-06,
"loss": 1.7845,
"step": 612
},
{
"epoch": 0.61,
"learning_rate": 1.9230325846604944e-06,
"loss": 1.7529,
"step": 614
},
{
"epoch": 0.61,
"learning_rate": 1.9224137976751793e-06,
"loss": 1.7691,
"step": 616
},
{
"epoch": 0.61,
"learning_rate": 1.9217926336099004e-06,
"loss": 1.7631,
"step": 618
},
{
"epoch": 0.61,
"learning_rate": 1.9211690940654107e-06,
"loss": 1.7793,
"step": 620
},
{
"epoch": 0.62,
"learning_rate": 1.920543180648585e-06,
"loss": 1.7552,
"step": 622
},
{
"epoch": 0.62,
"learning_rate": 1.9199148949724155e-06,
"loss": 1.7952,
"step": 624
},
{
"epoch": 0.62,
"learning_rate": 1.9192842386560077e-06,
"loss": 1.785,
"step": 626
},
{
"epoch": 0.62,
"learning_rate": 1.9186512133245757e-06,
"loss": 1.7388,
"step": 628
},
{
"epoch": 0.62,
"learning_rate": 1.9180158206094405e-06,
"loss": 1.8321,
"step": 630
},
{
"epoch": 0.63,
"learning_rate": 1.9173780621480214e-06,
"loss": 1.7727,
"step": 632
},
{
"epoch": 0.63,
"learning_rate": 1.9167379395838364e-06,
"loss": 1.781,
"step": 634
},
{
"epoch": 0.63,
"learning_rate": 1.9160954545664944e-06,
"loss": 1.7616,
"step": 636
},
{
"epoch": 0.63,
"learning_rate": 1.915450608751693e-06,
"loss": 1.7842,
"step": 638
},
{
"epoch": 0.63,
"learning_rate": 1.914803403801214e-06,
"loss": 1.7829,
"step": 640
},
{
"epoch": 0.64,
"learning_rate": 1.914153841382918e-06,
"loss": 1.7974,
"step": 642
},
{
"epoch": 0.64,
"learning_rate": 1.9135019231707414e-06,
"loss": 1.7255,
"step": 644
},
{
"epoch": 0.64,
"learning_rate": 1.912847650844691e-06,
"loss": 1.7882,
"step": 646
},
{
"epoch": 0.64,
"learning_rate": 1.91219102609084e-06,
"loss": 1.7773,
"step": 648
},
{
"epoch": 0.64,
"learning_rate": 1.9115320506013255e-06,
"loss": 1.7819,
"step": 650
},
{
"epoch": 0.65,
"learning_rate": 1.9108707260743404e-06,
"loss": 1.7731,
"step": 652
},
{
"epoch": 0.65,
"learning_rate": 1.910207054214133e-06,
"loss": 1.7596,
"step": 654
},
{
"epoch": 0.65,
"learning_rate": 1.9095410367309986e-06,
"loss": 1.7473,
"step": 656
},
{
"epoch": 0.65,
"learning_rate": 1.908872675341279e-06,
"loss": 1.7474,
"step": 658
},
{
"epoch": 0.65,
"learning_rate": 1.9082019717673562e-06,
"loss": 1.7585,
"step": 660
},
{
"epoch": 0.66,
"learning_rate": 1.9075289277376466e-06,
"loss": 1.7408,
"step": 662
},
{
"epoch": 0.66,
"learning_rate": 1.9068535449866002e-06,
"loss": 1.7541,
"step": 664
},
{
"epoch": 0.66,
"learning_rate": 1.9061758252546913e-06,
"loss": 1.7507,
"step": 666
},
{
"epoch": 0.66,
"learning_rate": 1.9054957702884189e-06,
"loss": 1.8213,
"step": 668
},
{
"epoch": 0.66,
"learning_rate": 1.9048133818402991e-06,
"loss": 1.7869,
"step": 670
},
{
"epoch": 0.67,
"learning_rate": 1.9041286616688615e-06,
"loss": 1.8003,
"step": 672
},
{
"epoch": 0.67,
"learning_rate": 1.9034416115386438e-06,
"loss": 1.7579,
"step": 674
},
{
"epoch": 0.67,
"learning_rate": 1.9027522332201895e-06,
"loss": 1.8254,
"step": 676
},
{
"epoch": 0.67,
"learning_rate": 1.902060528490041e-06,
"loss": 1.7649,
"step": 678
},
{
"epoch": 0.67,
"learning_rate": 1.9013664991307362e-06,
"loss": 1.8614,
"step": 680
},
{
"epoch": 0.68,
"learning_rate": 1.9006701469308032e-06,
"loss": 1.7119,
"step": 682
},
{
"epoch": 0.68,
"learning_rate": 1.8999714736847567e-06,
"loss": 1.7665,
"step": 684
},
{
"epoch": 0.68,
"learning_rate": 1.8992704811930923e-06,
"loss": 1.7592,
"step": 686
},
{
"epoch": 0.68,
"learning_rate": 1.8985671712622828e-06,
"loss": 1.7544,
"step": 688
},
{
"epoch": 0.68,
"learning_rate": 1.897861545704773e-06,
"loss": 1.806,
"step": 690
},
{
"epoch": 0.69,
"learning_rate": 1.8971536063389742e-06,
"loss": 1.7541,
"step": 692
},
{
"epoch": 0.69,
"learning_rate": 1.8964433549892617e-06,
"loss": 1.8463,
"step": 694
},
{
"epoch": 0.69,
"learning_rate": 1.8957307934859683e-06,
"loss": 1.8462,
"step": 696
},
{
"epoch": 0.69,
"learning_rate": 1.8950159236653805e-06,
"loss": 1.7611,
"step": 698
},
{
"epoch": 0.69,
"learning_rate": 1.8942987473697322e-06,
"loss": 1.7608,
"step": 700
},
{
"epoch": 0.7,
"learning_rate": 1.8935792664472026e-06,
"loss": 1.8391,
"step": 702
},
{
"epoch": 0.7,
"learning_rate": 1.8928574827519093e-06,
"loss": 1.826,
"step": 704
},
{
"epoch": 0.7,
"learning_rate": 1.8921333981439038e-06,
"loss": 1.8257,
"step": 706
},
{
"epoch": 0.7,
"learning_rate": 1.8914070144891677e-06,
"loss": 1.7911,
"step": 708
},
{
"epoch": 0.7,
"learning_rate": 1.8906783336596075e-06,
"loss": 1.7824,
"step": 710
},
{
"epoch": 0.71,
"learning_rate": 1.8899473575330487e-06,
"loss": 1.7291,
"step": 712
},
{
"epoch": 0.71,
"learning_rate": 1.8892140879932325e-06,
"loss": 1.8055,
"step": 714
},
{
"epoch": 0.71,
"learning_rate": 1.8884785269298105e-06,
"loss": 1.763,
"step": 716
},
{
"epoch": 0.71,
"learning_rate": 1.8877406762383385e-06,
"loss": 1.7944,
"step": 718
},
{
"epoch": 0.71,
"learning_rate": 1.8870005378202735e-06,
"loss": 1.8079,
"step": 720
},
{
"epoch": 0.72,
"learning_rate": 1.8862581135829686e-06,
"loss": 1.7776,
"step": 722
},
{
"epoch": 0.72,
"learning_rate": 1.885513405439666e-06,
"loss": 1.8052,
"step": 724
},
{
"epoch": 0.72,
"learning_rate": 1.884766415309495e-06,
"loss": 1.8075,
"step": 726
},
{
"epoch": 0.72,
"learning_rate": 1.8840171451174648e-06,
"loss": 1.7557,
"step": 728
},
{
"epoch": 0.72,
"learning_rate": 1.8832655967944605e-06,
"loss": 1.793,
"step": 730
},
{
"epoch": 0.73,
"learning_rate": 1.8825117722772387e-06,
"loss": 1.7873,
"step": 732
},
{
"epoch": 0.73,
"learning_rate": 1.8817556735084205e-06,
"loss": 1.7842,
"step": 734
},
{
"epoch": 0.73,
"learning_rate": 1.8809973024364888e-06,
"loss": 1.7921,
"step": 736
},
{
"epoch": 0.73,
"learning_rate": 1.880236661015782e-06,
"loss": 1.741,
"step": 738
},
{
"epoch": 0.73,
"learning_rate": 1.8794737512064888e-06,
"loss": 1.7511,
"step": 740
},
{
"epoch": 0.74,
"learning_rate": 1.8787085749746447e-06,
"loss": 1.798,
"step": 742
},
{
"epoch": 0.74,
"learning_rate": 1.8779411342921247e-06,
"loss": 1.7962,
"step": 744
},
{
"epoch": 0.74,
"learning_rate": 1.8771714311366398e-06,
"loss": 1.755,
"step": 746
},
{
"epoch": 0.74,
"learning_rate": 1.8763994674917317e-06,
"loss": 1.7955,
"step": 748
},
{
"epoch": 0.74,
"learning_rate": 1.8756252453467663e-06,
"loss": 1.7368,
"step": 750
},
{
"epoch": 0.75,
"learning_rate": 1.8748487666969312e-06,
"loss": 1.7901,
"step": 752
},
{
"epoch": 0.75,
"learning_rate": 1.8740700335432287e-06,
"loss": 1.8222,
"step": 754
},
{
"epoch": 0.75,
"learning_rate": 1.8732890478924696e-06,
"loss": 1.7811,
"step": 756
},
{
"epoch": 0.75,
"learning_rate": 1.8725058117572715e-06,
"loss": 1.7372,
"step": 758
},
{
"epoch": 0.75,
"learning_rate": 1.8717203271560499e-06,
"loss": 1.7984,
"step": 760
},
{
"epoch": 0.76,
"learning_rate": 1.870932596113016e-06,
"loss": 1.7976,
"step": 762
},
{
"epoch": 0.76,
"learning_rate": 1.8701426206581693e-06,
"loss": 1.7846,
"step": 764
},
{
"epoch": 0.76,
"learning_rate": 1.8693504028272925e-06,
"loss": 1.7977,
"step": 766
},
{
"epoch": 0.76,
"learning_rate": 1.8685559446619487e-06,
"loss": 1.7319,
"step": 768
},
{
"epoch": 0.76,
"learning_rate": 1.8677592482094733e-06,
"loss": 1.7326,
"step": 770
},
{
"epoch": 0.76,
"learning_rate": 1.8669603155229696e-06,
"loss": 1.8116,
"step": 772
},
{
"epoch": 0.77,
"learning_rate": 1.8661591486613047e-06,
"loss": 1.7751,
"step": 774
},
{
"epoch": 0.77,
"learning_rate": 1.865355749689102e-06,
"loss": 1.8183,
"step": 776
},
{
"epoch": 0.77,
"learning_rate": 1.8645501206767383e-06,
"loss": 1.7286,
"step": 778
},
{
"epoch": 0.77,
"learning_rate": 1.8637422637003363e-06,
"loss": 1.7795,
"step": 780
},
{
"epoch": 0.77,
"learning_rate": 1.8629321808417608e-06,
"loss": 1.7433,
"step": 782
},
{
"epoch": 0.78,
"learning_rate": 1.8621198741886124e-06,
"loss": 1.777,
"step": 784
},
{
"epoch": 0.78,
"learning_rate": 1.8613053458342227e-06,
"loss": 1.8075,
"step": 786
},
{
"epoch": 0.78,
"learning_rate": 1.8604885978776488e-06,
"loss": 1.7769,
"step": 788
},
{
"epoch": 0.78,
"learning_rate": 1.8596696324236674e-06,
"loss": 1.8142,
"step": 790
},
{
"epoch": 0.78,
"learning_rate": 1.8588484515827695e-06,
"loss": 1.7904,
"step": 792
},
{
"epoch": 0.79,
"learning_rate": 1.858025057471156e-06,
"loss": 1.7639,
"step": 794
},
{
"epoch": 0.79,
"learning_rate": 1.8571994522107311e-06,
"loss": 1.8408,
"step": 796
},
{
"epoch": 0.79,
"learning_rate": 1.8563716379290965e-06,
"loss": 1.7565,
"step": 798
},
{
"epoch": 0.79,
"learning_rate": 1.8555416167595478e-06,
"loss": 1.7836,
"step": 800
},
{
"epoch": 0.79,
"learning_rate": 1.8547093908410673e-06,
"loss": 1.8322,
"step": 802
},
{
"epoch": 0.8,
"learning_rate": 1.8538749623183185e-06,
"loss": 1.7878,
"step": 804
},
{
"epoch": 0.8,
"learning_rate": 1.8530383333416415e-06,
"loss": 1.7332,
"step": 806
},
{
"epoch": 0.8,
"learning_rate": 1.852199506067047e-06,
"loss": 1.7737,
"step": 808
},
{
"epoch": 0.8,
"learning_rate": 1.8513584826562107e-06,
"loss": 1.7779,
"step": 810
},
{
"epoch": 0.8,
"learning_rate": 1.8505152652764679e-06,
"loss": 1.8014,
"step": 812
},
{
"epoch": 0.81,
"learning_rate": 1.8496698561008075e-06,
"loss": 1.7936,
"step": 814
},
{
"epoch": 0.81,
"learning_rate": 1.8488222573078672e-06,
"loss": 1.7844,
"step": 816
},
{
"epoch": 0.81,
"learning_rate": 1.8479724710819268e-06,
"loss": 1.7462,
"step": 818
},
{
"epoch": 0.81,
"learning_rate": 1.8471204996129037e-06,
"loss": 1.7485,
"step": 820
},
{
"epoch": 0.81,
"learning_rate": 1.8462663450963464e-06,
"loss": 1.8099,
"step": 822
},
{
"epoch": 0.82,
"learning_rate": 1.8454100097334292e-06,
"loss": 1.776,
"step": 824
},
{
"epoch": 0.82,
"learning_rate": 1.8445514957309469e-06,
"loss": 1.7912,
"step": 826
},
{
"epoch": 0.82,
"learning_rate": 1.8436908053013079e-06,
"loss": 1.7699,
"step": 828
},
{
"epoch": 0.82,
"learning_rate": 1.8428279406625298e-06,
"loss": 1.7629,
"step": 830
},
{
"epoch": 0.82,
"learning_rate": 1.8419629040382332e-06,
"loss": 1.727,
"step": 832
},
{
"epoch": 0.83,
"learning_rate": 1.8410956976576357e-06,
"loss": 1.7597,
"step": 834
},
{
"epoch": 0.83,
"learning_rate": 1.8402263237555469e-06,
"loss": 1.748,
"step": 836
},
{
"epoch": 0.83,
"learning_rate": 1.8393547845723616e-06,
"loss": 1.8594,
"step": 838
},
{
"epoch": 0.83,
"learning_rate": 1.838481082354055e-06,
"loss": 1.7529,
"step": 840
},
{
"epoch": 0.83,
"learning_rate": 1.8376052193521757e-06,
"loss": 1.7255,
"step": 842
},
{
"epoch": 0.84,
"learning_rate": 1.8367271978238418e-06,
"loss": 1.7804,
"step": 844
},
{
"epoch": 0.84,
"learning_rate": 1.8358470200317338e-06,
"loss": 1.7526,
"step": 846
},
{
"epoch": 0.84,
"learning_rate": 1.8349646882440876e-06,
"loss": 1.7221,
"step": 848
},
{
"epoch": 0.84,
"learning_rate": 1.834080204734692e-06,
"loss": 1.7501,
"step": 850
},
{
"epoch": 0.84,
"learning_rate": 1.8331935717828788e-06,
"loss": 1.7033,
"step": 852
},
{
"epoch": 0.85,
"learning_rate": 1.8323047916735211e-06,
"loss": 1.7825,
"step": 854
},
{
"epoch": 0.85,
"learning_rate": 1.8314138666970228e-06,
"loss": 1.7489,
"step": 856
},
{
"epoch": 0.85,
"learning_rate": 1.8305207991493178e-06,
"loss": 1.7837,
"step": 858
},
{
"epoch": 0.85,
"learning_rate": 1.829625591331859e-06,
"loss": 1.7204,
"step": 860
},
{
"epoch": 0.85,
"learning_rate": 1.8287282455516171e-06,
"loss": 1.747,
"step": 862
},
{
"epoch": 0.86,
"learning_rate": 1.82782876412107e-06,
"loss": 1.7927,
"step": 864
},
{
"epoch": 0.86,
"learning_rate": 1.8269271493582017e-06,
"loss": 1.7864,
"step": 866
},
{
"epoch": 0.86,
"learning_rate": 1.8260234035864918e-06,
"loss": 1.752,
"step": 868
},
{
"epoch": 0.86,
"learning_rate": 1.825117529134913e-06,
"loss": 1.7716,
"step": 870
},
{
"epoch": 0.86,
"learning_rate": 1.8242095283379224e-06,
"loss": 1.7956,
"step": 872
},
{
"epoch": 0.87,
"learning_rate": 1.8232994035354574e-06,
"loss": 1.8058,
"step": 874
},
{
"epoch": 0.87,
"learning_rate": 1.8223871570729291e-06,
"loss": 1.8101,
"step": 876
},
{
"epoch": 0.87,
"learning_rate": 1.8214727913012167e-06,
"loss": 1.7702,
"step": 878
},
{
"epoch": 0.87,
"learning_rate": 1.820556308576659e-06,
"loss": 1.7393,
"step": 880
},
{
"epoch": 0.87,
"learning_rate": 1.8196377112610524e-06,
"loss": 1.7273,
"step": 882
},
{
"epoch": 0.88,
"learning_rate": 1.8187170017216414e-06,
"loss": 1.7896,
"step": 884
},
{
"epoch": 0.88,
"learning_rate": 1.817794182331114e-06,
"loss": 1.731,
"step": 886
},
{
"epoch": 0.88,
"learning_rate": 1.816869255467595e-06,
"loss": 1.8067,
"step": 888
},
{
"epoch": 0.88,
"learning_rate": 1.8159422235146416e-06,
"loss": 1.7634,
"step": 890
},
{
"epoch": 0.88,
"learning_rate": 1.815013088861234e-06,
"loss": 1.8085,
"step": 892
},
{
"epoch": 0.89,
"learning_rate": 1.814081853901772e-06,
"loss": 1.7552,
"step": 894
},
{
"epoch": 0.89,
"learning_rate": 1.8131485210360683e-06,
"loss": 1.7744,
"step": 896
},
{
"epoch": 0.89,
"learning_rate": 1.8122130926693416e-06,
"loss": 1.7522,
"step": 898
},
{
"epoch": 0.89,
"learning_rate": 1.81127557121221e-06,
"loss": 1.7646,
"step": 900
},
{
"epoch": 0.89,
"learning_rate": 1.8103359590806868e-06,
"loss": 1.7668,
"step": 902
},
{
"epoch": 0.9,
"learning_rate": 1.8093942586961723e-06,
"loss": 1.7521,
"step": 904
},
{
"epoch": 0.9,
"learning_rate": 1.8084504724854484e-06,
"loss": 1.7688,
"step": 906
},
{
"epoch": 0.9,
"learning_rate": 1.8075046028806722e-06,
"loss": 1.7481,
"step": 908
},
{
"epoch": 0.9,
"learning_rate": 1.8065566523193696e-06,
"loss": 1.7742,
"step": 910
},
{
"epoch": 0.9,
"learning_rate": 1.80560662324443e-06,
"loss": 1.784,
"step": 912
},
{
"epoch": 0.91,
"learning_rate": 1.8046545181040972e-06,
"loss": 1.771,
"step": 914
},
{
"epoch": 0.91,
"learning_rate": 1.8037003393519679e-06,
"loss": 1.7641,
"step": 916
},
{
"epoch": 0.91,
"learning_rate": 1.80274408944698e-06,
"loss": 1.7893,
"step": 918
},
{
"epoch": 0.91,
"learning_rate": 1.8017857708534106e-06,
"loss": 1.8559,
"step": 920
},
{
"epoch": 0.91,
"learning_rate": 1.8008253860408663e-06,
"loss": 1.7336,
"step": 922
},
{
"epoch": 0.92,
"learning_rate": 1.79986293748428e-06,
"loss": 1.7683,
"step": 924
},
{
"epoch": 0.92,
"learning_rate": 1.7988984276639012e-06,
"loss": 1.7372,
"step": 926
},
{
"epoch": 0.92,
"learning_rate": 1.7979318590652927e-06,
"loss": 1.7963,
"step": 928
},
{
"epoch": 0.92,
"learning_rate": 1.7969632341793225e-06,
"loss": 1.8381,
"step": 930
},
{
"epoch": 0.92,
"learning_rate": 1.7959925555021574e-06,
"loss": 1.7575,
"step": 932
},
{
"epoch": 0.93,
"learning_rate": 1.7950198255352566e-06,
"loss": 1.8207,
"step": 934
},
{
"epoch": 0.93,
"learning_rate": 1.7940450467853662e-06,
"loss": 1.7789,
"step": 936
},
{
"epoch": 0.93,
"learning_rate": 1.7930682217645119e-06,
"loss": 1.7791,
"step": 938
},
{
"epoch": 0.93,
"learning_rate": 1.7920893529899923e-06,
"loss": 1.767,
"step": 940
},
{
"epoch": 0.93,
"learning_rate": 1.791108442984373e-06,
"loss": 1.7942,
"step": 942
},
{
"epoch": 0.94,
"learning_rate": 1.7901254942754803e-06,
"loss": 1.7598,
"step": 944
},
{
"epoch": 0.94,
"learning_rate": 1.7891405093963937e-06,
"loss": 1.8034,
"step": 946
},
{
"epoch": 0.94,
"learning_rate": 1.78815349088544e-06,
"loss": 1.7288,
"step": 948
},
{
"epoch": 0.94,
"learning_rate": 1.787164441286187e-06,
"loss": 1.7815,
"step": 950
},
{
"epoch": 0.94,
"learning_rate": 1.7861733631474364e-06,
"loss": 1.797,
"step": 952
},
{
"epoch": 0.95,
"learning_rate": 1.7851802590232182e-06,
"loss": 1.8177,
"step": 954
},
{
"epoch": 0.95,
"learning_rate": 1.7841851314727822e-06,
"loss": 1.7422,
"step": 956
},
{
"epoch": 0.95,
"learning_rate": 1.7831879830605936e-06,
"loss": 1.7399,
"step": 958
},
{
"epoch": 0.95,
"learning_rate": 1.7821888163563249e-06,
"loss": 1.7812,
"step": 960
},
{
"epoch": 0.95,
"learning_rate": 1.7811876339348496e-06,
"loss": 1.7869,
"step": 962
},
{
"epoch": 0.96,
"learning_rate": 1.780184438376237e-06,
"loss": 1.7563,
"step": 964
},
{
"epoch": 0.96,
"learning_rate": 1.7791792322657427e-06,
"loss": 1.7901,
"step": 966
},
{
"epoch": 0.96,
"learning_rate": 1.7781720181938042e-06,
"loss": 1.7711,
"step": 968
},
{
"epoch": 0.96,
"learning_rate": 1.7771627987560337e-06,
"loss": 1.7734,
"step": 970
},
{
"epoch": 0.96,
"learning_rate": 1.7761515765532113e-06,
"loss": 1.7324,
"step": 972
},
{
"epoch": 0.97,
"learning_rate": 1.7751383541912782e-06,
"loss": 1.7546,
"step": 974
},
{
"epoch": 0.97,
"learning_rate": 1.7741231342813294e-06,
"loss": 1.7561,
"step": 976
},
{
"epoch": 0.97,
"learning_rate": 1.7731059194396088e-06,
"loss": 1.7741,
"step": 978
},
{
"epoch": 0.97,
"learning_rate": 1.7720867122875004e-06,
"loss": 1.7245,
"step": 980
},
{
"epoch": 0.97,
"learning_rate": 1.7710655154515224e-06,
"loss": 1.7769,
"step": 982
},
{
"epoch": 0.97,
"learning_rate": 1.770042331563321e-06,
"loss": 1.7355,
"step": 984
},
{
"epoch": 0.98,
"learning_rate": 1.7690171632596633e-06,
"loss": 1.7452,
"step": 986
},
{
"epoch": 0.98,
"learning_rate": 1.767990013182429e-06,
"loss": 1.7606,
"step": 988
},
{
"epoch": 0.98,
"learning_rate": 1.7669608839786062e-06,
"loss": 1.8084,
"step": 990
},
{
"epoch": 0.98,
"learning_rate": 1.7659297783002826e-06,
"loss": 1.7889,
"step": 992
},
{
"epoch": 0.98,
"learning_rate": 1.7648966988046397e-06,
"loss": 1.7204,
"step": 994
},
{
"epoch": 0.99,
"learning_rate": 1.7638616481539448e-06,
"loss": 1.7551,
"step": 996
},
{
"epoch": 0.99,
"learning_rate": 1.7628246290155458e-06,
"loss": 1.8024,
"step": 998
},
{
"epoch": 0.99,
"learning_rate": 1.761785644061863e-06,
"loss": 1.7638,
"step": 1000
},
{
"epoch": 0.99,
"learning_rate": 1.7607446959703825e-06,
"loss": 1.7361,
"step": 1002
},
{
"epoch": 0.99,
"learning_rate": 1.75970178742365e-06,
"loss": 1.7123,
"step": 1004
},
{
"epoch": 1.0,
"learning_rate": 1.7586569211092627e-06,
"loss": 1.7525,
"step": 1006
},
{
"epoch": 1.0,
"learning_rate": 1.7576100997198635e-06,
"loss": 1.749,
"step": 1008
},
{
"epoch": 1.0,
"learning_rate": 1.7565613259531334e-06,
"loss": 1.747,
"step": 1010
},
{
"epoch": 1.0,
"learning_rate": 1.7555106025117846e-06,
"loss": 1.7705,
"step": 1012
},
{
"epoch": 1.0,
"learning_rate": 1.7544579321035538e-06,
"loss": 1.7469,
"step": 1014
},
{
"epoch": 1.01,
"learning_rate": 1.7534033174411946e-06,
"loss": 1.755,
"step": 1016
},
{
"epoch": 1.01,
"learning_rate": 1.7523467612424721e-06,
"loss": 1.7792,
"step": 1018
},
{
"epoch": 1.01,
"learning_rate": 1.7512882662301535e-06,
"loss": 1.7223,
"step": 1020
},
{
"epoch": 1.01,
"learning_rate": 1.7502278351320032e-06,
"loss": 1.692,
"step": 1022
},
{
"epoch": 1.01,
"learning_rate": 1.7491654706807748e-06,
"loss": 1.6989,
"step": 1024
},
{
"epoch": 1.02,
"learning_rate": 1.748101175614204e-06,
"loss": 1.7227,
"step": 1026
},
{
"epoch": 1.02,
"learning_rate": 1.7470349526750017e-06,
"loss": 1.7257,
"step": 1028
},
{
"epoch": 1.02,
"learning_rate": 1.7459668046108469e-06,
"loss": 1.7522,
"step": 1030
},
{
"epoch": 1.02,
"learning_rate": 1.7448967341743804e-06,
"loss": 1.7951,
"step": 1032
},
{
"epoch": 1.02,
"learning_rate": 1.743824744123196e-06,
"loss": 1.6694,
"step": 1034
},
{
"epoch": 1.03,
"learning_rate": 1.742750837219835e-06,
"loss": 1.7478,
"step": 1036
},
{
"epoch": 1.03,
"learning_rate": 1.7416750162317783e-06,
"loss": 1.7157,
"step": 1038
},
{
"epoch": 1.03,
"learning_rate": 1.7405972839314395e-06,
"loss": 1.7839,
"step": 1040
},
{
"epoch": 1.03,
"learning_rate": 1.7395176430961577e-06,
"loss": 1.6986,
"step": 1042
},
{
"epoch": 1.03,
"learning_rate": 1.7384360965081901e-06,
"loss": 1.6926,
"step": 1044
},
{
"epoch": 1.04,
"learning_rate": 1.7373526469547055e-06,
"loss": 1.7446,
"step": 1046
},
{
"epoch": 1.04,
"learning_rate": 1.7362672972277761e-06,
"loss": 1.7724,
"step": 1048
},
{
"epoch": 1.04,
"learning_rate": 1.7351800501243713e-06,
"loss": 1.7376,
"step": 1050
},
{
"epoch": 1.04,
"learning_rate": 1.7340909084463505e-06,
"loss": 1.742,
"step": 1052
},
{
"epoch": 1.04,
"learning_rate": 1.7329998750004543e-06,
"loss": 1.7251,
"step": 1054
},
{
"epoch": 1.05,
"learning_rate": 1.7319069525982993e-06,
"loss": 1.7639,
"step": 1056
},
{
"epoch": 1.05,
"learning_rate": 1.7308121440563696e-06,
"loss": 1.7568,
"step": 1058
},
{
"epoch": 1.05,
"learning_rate": 1.7297154521960107e-06,
"loss": 1.7945,
"step": 1060
},
{
"epoch": 1.05,
"learning_rate": 1.7286168798434201e-06,
"loss": 1.6914,
"step": 1062
},
{
"epoch": 1.05,
"learning_rate": 1.7275164298296427e-06,
"loss": 1.7106,
"step": 1064
},
{
"epoch": 1.06,
"learning_rate": 1.7264141049905612e-06,
"loss": 1.7053,
"step": 1066
},
{
"epoch": 1.06,
"learning_rate": 1.72530990816689e-06,
"loss": 1.7859,
"step": 1068
},
{
"epoch": 1.06,
"learning_rate": 1.7242038422041683e-06,
"loss": 1.7561,
"step": 1070
},
{
"epoch": 1.06,
"learning_rate": 1.723095909952751e-06,
"loss": 1.7365,
"step": 1072
},
{
"epoch": 1.06,
"learning_rate": 1.7219861142678039e-06,
"loss": 1.7149,
"step": 1074
},
{
"epoch": 1.07,
"learning_rate": 1.7208744580092928e-06,
"loss": 1.7186,
"step": 1076
},
{
"epoch": 1.07,
"learning_rate": 1.7197609440419809e-06,
"loss": 1.7458,
"step": 1078
},
{
"epoch": 1.07,
"learning_rate": 1.7186455752354165e-06,
"loss": 1.7466,
"step": 1080
},
{
"epoch": 1.07,
"learning_rate": 1.717528354463929e-06,
"loss": 1.7572,
"step": 1082
},
{
"epoch": 1.07,
"learning_rate": 1.7164092846066198e-06,
"loss": 1.738,
"step": 1084
},
{
"epoch": 1.08,
"learning_rate": 1.7152883685473564e-06,
"loss": 1.7654,
"step": 1086
},
{
"epoch": 1.08,
"learning_rate": 1.7141656091747623e-06,
"loss": 1.6798,
"step": 1088
},
{
"epoch": 1.08,
"learning_rate": 1.713041009382213e-06,
"loss": 1.7499,
"step": 1090
},
{
"epoch": 1.08,
"learning_rate": 1.711914572067826e-06,
"loss": 1.7802,
"step": 1092
},
{
"epoch": 1.08,
"learning_rate": 1.7107863001344538e-06,
"loss": 1.6773,
"step": 1094
},
{
"epoch": 1.09,
"learning_rate": 1.7096561964896776e-06,
"loss": 1.7358,
"step": 1096
},
{
"epoch": 1.09,
"learning_rate": 1.7085242640457986e-06,
"loss": 1.7554,
"step": 1098
},
{
"epoch": 1.09,
"learning_rate": 1.7073905057198303e-06,
"loss": 1.7309,
"step": 1100
},
{
"epoch": 1.09,
"learning_rate": 1.7062549244334922e-06,
"loss": 1.751,
"step": 1102
},
{
"epoch": 1.09,
"learning_rate": 1.7051175231132016e-06,
"loss": 1.7844,
"step": 1104
},
{
"epoch": 1.1,
"learning_rate": 1.7039783046900652e-06,
"loss": 1.7343,
"step": 1106
},
{
"epoch": 1.1,
"learning_rate": 1.7028372720998742e-06,
"loss": 1.7545,
"step": 1108
},
{
"epoch": 1.1,
"learning_rate": 1.701694428283093e-06,
"loss": 1.7874,
"step": 1110
},
{
"epoch": 1.1,
"learning_rate": 1.7005497761848546e-06,
"loss": 1.7513,
"step": 1112
},
{
"epoch": 1.1,
"learning_rate": 1.6994033187549524e-06,
"loss": 1.7417,
"step": 1114
},
{
"epoch": 1.11,
"learning_rate": 1.6982550589478304e-06,
"loss": 1.7354,
"step": 1116
},
{
"epoch": 1.11,
"learning_rate": 1.6971049997225793e-06,
"loss": 1.7352,
"step": 1118
},
{
"epoch": 1.11,
"learning_rate": 1.695953144042926e-06,
"loss": 1.7793,
"step": 1120
},
{
"epoch": 1.11,
"learning_rate": 1.694799494877227e-06,
"loss": 1.7799,
"step": 1122
},
{
"epoch": 1.11,
"learning_rate": 1.6936440551984613e-06,
"loss": 1.7053,
"step": 1124
},
{
"epoch": 1.12,
"learning_rate": 1.6924868279842204e-06,
"loss": 1.7682,
"step": 1126
},
{
"epoch": 1.12,
"learning_rate": 1.6913278162167044e-06,
"loss": 1.7215,
"step": 1128
},
{
"epoch": 1.12,
"learning_rate": 1.6901670228827108e-06,
"loss": 1.7185,
"step": 1130
},
{
"epoch": 1.12,
"learning_rate": 1.6890044509736287e-06,
"loss": 1.7653,
"step": 1132
},
{
"epoch": 1.12,
"learning_rate": 1.6878401034854308e-06,
"loss": 1.8248,
"step": 1134
},
{
"epoch": 1.13,
"learning_rate": 1.6866739834186654e-06,
"loss": 1.7018,
"step": 1136
},
{
"epoch": 1.13,
"learning_rate": 1.6855060937784484e-06,
"loss": 1.6891,
"step": 1138
},
{
"epoch": 1.13,
"learning_rate": 1.6843364375744565e-06,
"loss": 1.7638,
"step": 1140
},
{
"epoch": 1.13,
"learning_rate": 1.6831650178209182e-06,
"loss": 1.7252,
"step": 1142
},
{
"epoch": 1.13,
"learning_rate": 1.6819918375366077e-06,
"loss": 1.7095,
"step": 1144
},
{
"epoch": 1.14,
"learning_rate": 1.680816899744835e-06,
"loss": 1.7227,
"step": 1146
},
{
"epoch": 1.14,
"learning_rate": 1.6796402074734402e-06,
"loss": 1.7136,
"step": 1148
},
{
"epoch": 1.14,
"learning_rate": 1.678461763754784e-06,
"loss": 1.7166,
"step": 1150
},
{
"epoch": 1.14,
"learning_rate": 1.6772815716257411e-06,
"loss": 1.7457,
"step": 1152
},
{
"epoch": 1.14,
"learning_rate": 1.6760996341276914e-06,
"loss": 1.7715,
"step": 1154
},
{
"epoch": 1.15,
"learning_rate": 1.6749159543065132e-06,
"loss": 1.7529,
"step": 1156
},
{
"epoch": 1.15,
"learning_rate": 1.6737305352125746e-06,
"loss": 1.7111,
"step": 1158
},
{
"epoch": 1.15,
"learning_rate": 1.6725433799007255e-06,
"loss": 1.7536,
"step": 1160
},
{
"epoch": 1.15,
"learning_rate": 1.6713544914302908e-06,
"loss": 1.7355,
"step": 1162
},
{
"epoch": 1.15,
"learning_rate": 1.6701638728650611e-06,
"loss": 1.7847,
"step": 1164
},
{
"epoch": 1.16,
"learning_rate": 1.6689715272732858e-06,
"loss": 1.7407,
"step": 1166
},
{
"epoch": 1.16,
"learning_rate": 1.6677774577276646e-06,
"loss": 1.8005,
"step": 1168
},
{
"epoch": 1.16,
"learning_rate": 1.6665816673053405e-06,
"loss": 1.708,
"step": 1170
},
{
"epoch": 1.16,
"learning_rate": 1.6653841590878907e-06,
"loss": 1.7602,
"step": 1172
},
{
"epoch": 1.16,
"learning_rate": 1.6641849361613192e-06,
"loss": 1.7986,
"step": 1174
},
{
"epoch": 1.17,
"learning_rate": 1.6629840016160493e-06,
"loss": 1.7357,
"step": 1176
},
{
"epoch": 1.17,
"learning_rate": 1.6617813585469144e-06,
"loss": 1.7352,
"step": 1178
},
{
"epoch": 1.17,
"learning_rate": 1.6605770100531516e-06,
"loss": 1.7441,
"step": 1180
},
{
"epoch": 1.17,
"learning_rate": 1.6593709592383923e-06,
"loss": 1.7138,
"step": 1182
},
{
"epoch": 1.17,
"learning_rate": 1.6581632092106555e-06,
"loss": 1.7629,
"step": 1184
},
{
"epoch": 1.18,
"learning_rate": 1.6569537630823382e-06,
"loss": 1.7185,
"step": 1186
},
{
"epoch": 1.18,
"learning_rate": 1.6557426239702087e-06,
"loss": 1.744,
"step": 1188
},
{
"epoch": 1.18,
"learning_rate": 1.6545297949953993e-06,
"loss": 1.6972,
"step": 1190
},
{
"epoch": 1.18,
"learning_rate": 1.6533152792833947e-06,
"loss": 1.6991,
"step": 1192
},
{
"epoch": 1.18,
"learning_rate": 1.6520990799640285e-06,
"loss": 1.6925,
"step": 1194
},
{
"epoch": 1.19,
"learning_rate": 1.650881200171472e-06,
"loss": 1.7568,
"step": 1196
},
{
"epoch": 1.19,
"learning_rate": 1.6496616430442271e-06,
"loss": 1.6709,
"step": 1198
},
{
"epoch": 1.19,
"learning_rate": 1.648440411725119e-06,
"loss": 1.7474,
"step": 1200
},
{
"epoch": 1.19,
"learning_rate": 1.6472175093612862e-06,
"loss": 1.7719,
"step": 1202
},
{
"epoch": 1.19,
"learning_rate": 1.6459929391041746e-06,
"loss": 1.7151,
"step": 1204
},
{
"epoch": 1.19,
"learning_rate": 1.6447667041095267e-06,
"loss": 1.7478,
"step": 1206
},
{
"epoch": 1.2,
"learning_rate": 1.6435388075373777e-06,
"loss": 1.7257,
"step": 1208
},
{
"epoch": 1.2,
"learning_rate": 1.6423092525520421e-06,
"loss": 1.699,
"step": 1210
},
{
"epoch": 1.2,
"learning_rate": 1.6410780423221095e-06,
"loss": 1.7522,
"step": 1212
},
{
"epoch": 1.2,
"learning_rate": 1.6398451800204352e-06,
"loss": 1.712,
"step": 1214
},
{
"epoch": 1.2,
"learning_rate": 1.6386106688241309e-06,
"loss": 1.7137,
"step": 1216
},
{
"epoch": 1.21,
"learning_rate": 1.6373745119145584e-06,
"loss": 1.7794,
"step": 1218
},
{
"epoch": 1.21,
"learning_rate": 1.6361367124773207e-06,
"loss": 1.7425,
"step": 1220
},
{
"epoch": 1.21,
"learning_rate": 1.6348972737022527e-06,
"loss": 1.7158,
"step": 1222
},
{
"epoch": 1.21,
"learning_rate": 1.6336561987834151e-06,
"loss": 1.7363,
"step": 1224
},
{
"epoch": 1.21,
"learning_rate": 1.632413490919084e-06,
"loss": 1.7048,
"step": 1226
},
{
"epoch": 1.22,
"learning_rate": 1.631169153311744e-06,
"loss": 1.6891,
"step": 1228
},
{
"epoch": 1.22,
"learning_rate": 1.6299231891680793e-06,
"loss": 1.7387,
"step": 1230
},
{
"epoch": 1.22,
"learning_rate": 1.6286756016989664e-06,
"loss": 1.7149,
"step": 1232
},
{
"epoch": 1.22,
"learning_rate": 1.6274263941194648e-06,
"loss": 1.7447,
"step": 1234
},
{
"epoch": 1.22,
"learning_rate": 1.6261755696488084e-06,
"loss": 1.7323,
"step": 1236
},
{
"epoch": 1.23,
"learning_rate": 1.6249231315103992e-06,
"loss": 1.71,
"step": 1238
},
{
"epoch": 1.23,
"learning_rate": 1.6236690829317963e-06,
"loss": 1.7227,
"step": 1240
},
{
"epoch": 1.23,
"learning_rate": 1.6224134271447099e-06,
"loss": 1.7161,
"step": 1242
},
{
"epoch": 1.23,
"learning_rate": 1.6211561673849913e-06,
"loss": 1.6992,
"step": 1244
},
{
"epoch": 1.23,
"learning_rate": 1.6198973068926259e-06,
"loss": 1.7344,
"step": 1246
},
{
"epoch": 1.24,
"learning_rate": 1.618636848911724e-06,
"loss": 1.78,
"step": 1248
},
{
"epoch": 1.24,
"learning_rate": 1.6173747966905125e-06,
"loss": 1.6864,
"step": 1250
},
{
"epoch": 1.24,
"learning_rate": 1.6161111534813267e-06,
"loss": 1.7658,
"step": 1252
},
{
"epoch": 1.24,
"learning_rate": 1.614845922540602e-06,
"loss": 1.7322,
"step": 1254
},
{
"epoch": 1.24,
"learning_rate": 1.6135791071288657e-06,
"loss": 1.7848,
"step": 1256
},
{
"epoch": 1.25,
"learning_rate": 1.612310710510728e-06,
"loss": 1.7114,
"step": 1258
},
{
"epoch": 1.25,
"learning_rate": 1.6110407359548733e-06,
"loss": 1.7484,
"step": 1260
},
{
"epoch": 1.25,
"learning_rate": 1.6097691867340543e-06,
"loss": 1.7689,
"step": 1262
},
{
"epoch": 1.25,
"learning_rate": 1.6084960661250793e-06,
"loss": 1.7018,
"step": 1264
},
{
"epoch": 1.25,
"learning_rate": 1.6072213774088073e-06,
"loss": 1.7428,
"step": 1266
},
{
"epoch": 1.26,
"learning_rate": 1.6059451238701388e-06,
"loss": 1.7393,
"step": 1268
},
{
"epoch": 1.26,
"learning_rate": 1.6046673087980055e-06,
"loss": 1.7786,
"step": 1270
},
{
"epoch": 1.26,
"learning_rate": 1.6033879354853648e-06,
"loss": 1.7473,
"step": 1272
},
{
"epoch": 1.26,
"learning_rate": 1.602107007229188e-06,
"loss": 1.7678,
"step": 1274
},
{
"epoch": 1.26,
"learning_rate": 1.6008245273304554e-06,
"loss": 1.727,
"step": 1276
},
{
"epoch": 1.27,
"learning_rate": 1.5995404990941445e-06,
"loss": 1.7662,
"step": 1278
},
{
"epoch": 1.27,
"learning_rate": 1.5982549258292233e-06,
"loss": 1.7225,
"step": 1280
},
{
"epoch": 1.27,
"learning_rate": 1.5969678108486417e-06,
"loss": 1.7646,
"step": 1282
},
{
"epoch": 1.27,
"learning_rate": 1.595679157469322e-06,
"loss": 1.6826,
"step": 1284
},
{
"epoch": 1.27,
"learning_rate": 1.5943889690121518e-06,
"loss": 1.7319,
"step": 1286
},
{
"epoch": 1.28,
"learning_rate": 1.5930972488019736e-06,
"loss": 1.737,
"step": 1288
},
{
"epoch": 1.28,
"learning_rate": 1.5918040001675783e-06,
"loss": 1.7317,
"step": 1290
},
{
"epoch": 1.28,
"learning_rate": 1.5905092264416951e-06,
"loss": 1.8041,
"step": 1292
},
{
"epoch": 1.28,
"learning_rate": 1.5892129309609829e-06,
"loss": 1.7459,
"step": 1294
},
{
"epoch": 1.28,
"learning_rate": 1.5879151170660236e-06,
"loss": 1.688,
"step": 1296
},
{
"epoch": 1.29,
"learning_rate": 1.5866157881013105e-06,
"loss": 1.712,
"step": 1298
},
{
"epoch": 1.29,
"learning_rate": 1.585314947415242e-06,
"loss": 1.696,
"step": 1300
},
{
"epoch": 1.29,
"learning_rate": 1.5840125983601124e-06,
"loss": 1.7379,
"step": 1302
},
{
"epoch": 1.29,
"learning_rate": 1.582708744292103e-06,
"loss": 1.712,
"step": 1304
},
{
"epoch": 1.29,
"learning_rate": 1.5814033885712731e-06,
"loss": 1.6724,
"step": 1306
},
{
"epoch": 1.3,
"learning_rate": 1.5800965345615524e-06,
"loss": 1.7081,
"step": 1308
},
{
"epoch": 1.3,
"learning_rate": 1.578788185630732e-06,
"loss": 1.7365,
"step": 1310
},
{
"epoch": 1.3,
"learning_rate": 1.5774783451504536e-06,
"loss": 1.7326,
"step": 1312
},
{
"epoch": 1.3,
"learning_rate": 1.576167016496205e-06,
"loss": 1.7431,
"step": 1314
},
{
"epoch": 1.3,
"learning_rate": 1.5748542030473076e-06,
"loss": 1.7429,
"step": 1316
},
{
"epoch": 1.31,
"learning_rate": 1.5741972405819502e-06,
"loss": 1.7743,
"step": 1318
},
{
"epoch": 1.31,
"learning_rate": 1.5728822062856757e-06,
"loss": 1.747,
"step": 1320
},
{
"epoch": 1.31,
"learning_rate": 1.5715656956597782e-06,
"loss": 1.7156,
"step": 1322
},
{
"epoch": 1.31,
"learning_rate": 1.570247712096934e-06,
"loss": 1.7401,
"step": 1324
},
{
"epoch": 1.31,
"learning_rate": 1.568928258993614e-06,
"loss": 1.7241,
"step": 1326
},
{
"epoch": 1.32,
"learning_rate": 1.5676073397500774e-06,
"loss": 1.741,
"step": 1328
},
{
"epoch": 1.32,
"learning_rate": 1.566284957770361e-06,
"loss": 1.7617,
"step": 1330
},
{
"epoch": 1.32,
"learning_rate": 1.5649611164622712e-06,
"loss": 1.7222,
"step": 1332
},
{
"epoch": 1.32,
"learning_rate": 1.5636358192373752e-06,
"loss": 1.758,
"step": 1334
},
{
"epoch": 1.32,
"learning_rate": 1.5623090695109923e-06,
"loss": 1.7101,
"step": 1336
},
{
"epoch": 1.33,
"learning_rate": 1.5609808707021848e-06,
"loss": 1.7094,
"step": 1338
},
{
"epoch": 1.33,
"learning_rate": 1.5596512262337489e-06,
"loss": 1.7366,
"step": 1340
},
{
"epoch": 1.33,
"learning_rate": 1.5583201395322068e-06,
"loss": 1.7033,
"step": 1342
},
{
"epoch": 1.33,
"learning_rate": 1.5569876140277976e-06,
"loss": 1.7713,
"step": 1344
},
{
"epoch": 1.33,
"learning_rate": 1.5556536531544674e-06,
"loss": 1.7216,
"step": 1346
},
{
"epoch": 1.34,
"learning_rate": 1.5543182603498615e-06,
"loss": 1.6945,
"step": 1348
},
{
"epoch": 1.34,
"learning_rate": 1.5529814390553162e-06,
"loss": 1.7828,
"step": 1350
},
{
"epoch": 1.34,
"learning_rate": 1.551643192715848e-06,
"loss": 1.712,
"step": 1352
},
{
"epoch": 1.34,
"learning_rate": 1.5503035247801464e-06,
"loss": 1.7641,
"step": 1354
},
{
"epoch": 1.34,
"learning_rate": 1.5489624387005638e-06,
"loss": 1.7353,
"step": 1356
},
{
"epoch": 1.35,
"learning_rate": 1.5476199379331078e-06,
"loss": 1.7454,
"step": 1358
},
{
"epoch": 1.35,
"learning_rate": 1.5462760259374314e-06,
"loss": 1.7672,
"step": 1360
},
{
"epoch": 1.35,
"learning_rate": 1.5449307061768245e-06,
"loss": 1.7404,
"step": 1362
},
{
"epoch": 1.35,
"learning_rate": 1.5435839821182043e-06,
"loss": 1.749,
"step": 1364
},
{
"epoch": 1.35,
"learning_rate": 1.5422358572321078e-06,
"loss": 1.7655,
"step": 1366
},
{
"epoch": 1.36,
"learning_rate": 1.540886334992681e-06,
"loss": 1.6808,
"step": 1368
},
{
"epoch": 1.36,
"learning_rate": 1.5395354188776718e-06,
"loss": 1.7245,
"step": 1370
},
{
"epoch": 1.36,
"learning_rate": 1.5381831123684194e-06,
"loss": 1.7344,
"step": 1372
},
{
"epoch": 1.36,
"learning_rate": 1.5368294189498471e-06,
"loss": 1.7266,
"step": 1374
},
{
"epoch": 1.36,
"learning_rate": 1.5354743421104508e-06,
"loss": 1.7561,
"step": 1376
},
{
"epoch": 1.37,
"learning_rate": 1.5341178853422928e-06,
"loss": 1.7185,
"step": 1378
},
{
"epoch": 1.37,
"learning_rate": 1.5327600521409906e-06,
"loss": 1.6742,
"step": 1380
},
{
"epoch": 1.37,
"learning_rate": 1.5314008460057098e-06,
"loss": 1.7401,
"step": 1382
},
{
"epoch": 1.37,
"learning_rate": 1.5300402704391533e-06,
"loss": 1.7582,
"step": 1384
},
{
"epoch": 1.37,
"learning_rate": 1.5286783289475527e-06,
"loss": 1.7308,
"step": 1386
},
{
"epoch": 1.38,
"learning_rate": 1.527315025040661e-06,
"loss": 1.6841,
"step": 1388
},
{
"epoch": 1.38,
"learning_rate": 1.5259503622317408e-06,
"loss": 1.7609,
"step": 1390
},
{
"epoch": 1.38,
"learning_rate": 1.5245843440375574e-06,
"loss": 1.7415,
"step": 1392
},
{
"epoch": 1.38,
"learning_rate": 1.523216973978369e-06,
"loss": 1.7465,
"step": 1394
},
{
"epoch": 1.38,
"learning_rate": 1.5218482555779164e-06,
"loss": 1.7168,
"step": 1396
},
{
"epoch": 1.39,
"learning_rate": 1.520478192363417e-06,
"loss": 1.7005,
"step": 1398
},
{
"epoch": 1.39,
"learning_rate": 1.5191067878655522e-06,
"loss": 1.7353,
"step": 1400
},
{
"epoch": 1.39,
"learning_rate": 1.517734045618461e-06,
"loss": 1.7134,
"step": 1402
},
{
"epoch": 1.39,
"learning_rate": 1.5163599691597288e-06,
"loss": 1.7103,
"step": 1404
},
{
"epoch": 1.39,
"learning_rate": 1.5149845620303801e-06,
"loss": 1.7121,
"step": 1406
},
{
"epoch": 1.4,
"learning_rate": 1.513607827774869e-06,
"loss": 1.7294,
"step": 1408
},
{
"epoch": 1.4,
"learning_rate": 1.5122297699410678e-06,
"loss": 1.757,
"step": 1410
},
{
"epoch": 1.4,
"learning_rate": 1.5108503920802613e-06,
"loss": 1.7124,
"step": 1412
},
{
"epoch": 1.4,
"learning_rate": 1.5094696977471358e-06,
"loss": 1.6962,
"step": 1414
},
{
"epoch": 1.4,
"learning_rate": 1.5080876904997694e-06,
"loss": 1.7188,
"step": 1416
},
{
"epoch": 1.41,
"learning_rate": 1.506704373899625e-06,
"loss": 1.7219,
"step": 1418
},
{
"epoch": 1.41,
"learning_rate": 1.505319751511538e-06,
"loss": 1.7692,
"step": 1420
},
{
"epoch": 1.41,
"learning_rate": 1.5039338269037103e-06,
"loss": 1.6748,
"step": 1422
},
{
"epoch": 1.41,
"learning_rate": 1.5025466036476985e-06,
"loss": 1.7626,
"step": 1424
},
{
"epoch": 1.41,
"learning_rate": 1.5011580853184073e-06,
"loss": 1.7258,
"step": 1426
},
{
"epoch": 1.41,
"learning_rate": 1.4997682754940773e-06,
"loss": 1.7283,
"step": 1428
},
{
"epoch": 1.42,
"learning_rate": 1.498377177756278e-06,
"loss": 1.7183,
"step": 1430
},
{
"epoch": 1.42,
"learning_rate": 1.496984795689898e-06,
"loss": 1.739,
"step": 1432
},
{
"epoch": 1.42,
"learning_rate": 1.4955911328831353e-06,
"loss": 1.7227,
"step": 1434
},
{
"epoch": 1.42,
"learning_rate": 1.4941961929274892e-06,
"loss": 1.6902,
"step": 1436
},
{
"epoch": 1.42,
"learning_rate": 1.4927999794177486e-06,
"loss": 1.773,
"step": 1438
},
{
"epoch": 1.43,
"learning_rate": 1.4914024959519861e-06,
"loss": 1.7376,
"step": 1440
},
{
"epoch": 1.43,
"learning_rate": 1.490003746131546e-06,
"loss": 1.7352,
"step": 1442
},
{
"epoch": 1.43,
"learning_rate": 1.4886037335610366e-06,
"loss": 1.7633,
"step": 1444
},
{
"epoch": 1.43,
"learning_rate": 1.4872024618483199e-06,
"loss": 1.7321,
"step": 1446
},
{
"epoch": 1.43,
"learning_rate": 1.4857999346045026e-06,
"loss": 1.7696,
"step": 1448
},
{
"epoch": 1.44,
"learning_rate": 1.484396155443928e-06,
"loss": 1.7155,
"step": 1450
},
{
"epoch": 1.44,
"learning_rate": 1.482991127984164e-06,
"loss": 1.686,
"step": 1452
},
{
"epoch": 1.44,
"learning_rate": 1.4815848558459965e-06,
"loss": 1.6871,
"step": 1454
},
{
"epoch": 1.44,
"learning_rate": 1.4801773426534188e-06,
"loss": 1.7717,
"step": 1456
},
{
"epoch": 1.44,
"learning_rate": 1.478768592033622e-06,
"loss": 1.7399,
"step": 1458
},
{
"epoch": 1.45,
"learning_rate": 1.4773586076169872e-06,
"loss": 1.7117,
"step": 1460
},
{
"epoch": 1.45,
"learning_rate": 1.4759473930370736e-06,
"loss": 1.7231,
"step": 1462
},
{
"epoch": 1.45,
"learning_rate": 1.474534951930611e-06,
"loss": 1.7653,
"step": 1464
},
{
"epoch": 1.45,
"learning_rate": 1.4731212879374905e-06,
"loss": 1.7456,
"step": 1466
},
{
"epoch": 1.45,
"learning_rate": 1.471706404700754e-06,
"loss": 1.7219,
"step": 1468
},
{
"epoch": 1.46,
"learning_rate": 1.470290305866586e-06,
"loss": 1.6839,
"step": 1470
},
{
"epoch": 1.46,
"learning_rate": 1.4688729950843033e-06,
"loss": 1.7499,
"step": 1472
},
{
"epoch": 1.46,
"learning_rate": 1.4674544760063455e-06,
"loss": 1.7668,
"step": 1474
},
{
"epoch": 1.46,
"learning_rate": 1.4660347522882668e-06,
"loss": 1.7368,
"step": 1476
},
{
"epoch": 1.46,
"learning_rate": 1.464613827588725e-06,
"loss": 1.7342,
"step": 1478
},
{
"epoch": 1.47,
"learning_rate": 1.4631917055694735e-06,
"loss": 1.7864,
"step": 1480
},
{
"epoch": 1.47,
"learning_rate": 1.4617683898953514e-06,
"loss": 1.7277,
"step": 1482
},
{
"epoch": 1.47,
"learning_rate": 1.4603438842342728e-06,
"loss": 1.7806,
"step": 1484
},
{
"epoch": 1.47,
"learning_rate": 1.4589181922572194e-06,
"loss": 1.7868,
"step": 1486
},
{
"epoch": 1.47,
"learning_rate": 1.4574913176382296e-06,
"loss": 1.7212,
"step": 1488
},
{
"epoch": 1.48,
"learning_rate": 1.4560632640543893e-06,
"loss": 1.7262,
"step": 1490
},
{
"epoch": 1.48,
"learning_rate": 1.4546340351858237e-06,
"loss": 1.7261,
"step": 1492
},
{
"epoch": 1.48,
"learning_rate": 1.4532036347156854e-06,
"loss": 1.7392,
"step": 1494
},
{
"epoch": 1.48,
"learning_rate": 1.451772066330147e-06,
"loss": 1.6957,
"step": 1496
},
{
"epoch": 1.48,
"learning_rate": 1.4503393337183906e-06,
"loss": 1.6911,
"step": 1498
},
{
"epoch": 1.49,
"learning_rate": 1.448905440572599e-06,
"loss": 1.6689,
"step": 1500
},
{
"epoch": 1.49,
"learning_rate": 1.4474703905879447e-06,
"loss": 1.7359,
"step": 1502
},
{
"epoch": 1.49,
"learning_rate": 1.446034187462582e-06,
"loss": 1.7313,
"step": 1504
},
{
"epoch": 1.49,
"learning_rate": 1.4445968348976373e-06,
"loss": 1.8013,
"step": 1506
},
{
"epoch": 1.49,
"learning_rate": 1.4431583365971992e-06,
"loss": 1.7201,
"step": 1508
},
{
"epoch": 1.5,
"learning_rate": 1.441718696268307e-06,
"loss": 1.742,
"step": 1510
},
{
"epoch": 1.5,
"learning_rate": 1.4402779176209457e-06,
"loss": 1.7234,
"step": 1512
},
{
"epoch": 1.5,
"learning_rate": 1.4388360043680314e-06,
"loss": 1.6799,
"step": 1514
},
{
"epoch": 1.5,
"learning_rate": 1.4373929602254061e-06,
"loss": 1.7464,
"step": 1516
},
{
"epoch": 1.5,
"learning_rate": 1.435948788911825e-06,
"loss": 1.777,
"step": 1518
},
{
"epoch": 1.51,
"learning_rate": 1.4345034941489482e-06,
"loss": 1.672,
"step": 1520
},
{
"epoch": 1.51,
"learning_rate": 1.433057079661331e-06,
"loss": 1.7038,
"step": 1522
},
{
"epoch": 1.51,
"learning_rate": 1.4316095491764145e-06,
"loss": 1.7479,
"step": 1524
},
{
"epoch": 1.51,
"learning_rate": 1.430160906424515e-06,
"loss": 1.7366,
"step": 1526
},
{
"epoch": 1.51,
"learning_rate": 1.428711155138816e-06,
"loss": 1.7715,
"step": 1528
},
{
"epoch": 1.52,
"learning_rate": 1.4272602990553574e-06,
"loss": 1.7368,
"step": 1530
},
{
"epoch": 1.52,
"learning_rate": 1.4258083419130264e-06,
"loss": 1.6916,
"step": 1532
},
{
"epoch": 1.52,
"learning_rate": 1.424355287453547e-06,
"loss": 1.7251,
"step": 1534
},
{
"epoch": 1.52,
"learning_rate": 1.4229011394214717e-06,
"loss": 1.7461,
"step": 1536
},
{
"epoch": 1.52,
"learning_rate": 1.4214459015641708e-06,
"loss": 1.7513,
"step": 1538
},
{
"epoch": 1.53,
"learning_rate": 1.4199895776318228e-06,
"loss": 1.772,
"step": 1540
},
{
"epoch": 1.53,
"learning_rate": 1.4185321713774059e-06,
"loss": 1.6761,
"step": 1542
},
{
"epoch": 1.53,
"learning_rate": 1.4170736865566872e-06,
"loss": 1.7396,
"step": 1544
},
{
"epoch": 1.53,
"learning_rate": 1.4156141269282124e-06,
"loss": 1.7004,
"step": 1546
},
{
"epoch": 1.53,
"learning_rate": 1.4141534962532984e-06,
"loss": 1.7606,
"step": 1548
},
{
"epoch": 1.54,
"learning_rate": 1.4126917982960212e-06,
"loss": 1.7303,
"step": 1550
},
{
"epoch": 1.54,
"learning_rate": 1.4112290368232072e-06,
"loss": 1.7514,
"step": 1552
},
{
"epoch": 1.54,
"learning_rate": 1.4097652156044242e-06,
"loss": 1.7644,
"step": 1554
},
{
"epoch": 1.54,
"learning_rate": 1.4083003384119704e-06,
"loss": 1.7305,
"step": 1556
},
{
"epoch": 1.54,
"learning_rate": 1.4068344090208656e-06,
"loss": 1.6838,
"step": 1558
},
{
"epoch": 1.55,
"learning_rate": 1.405367431208841e-06,
"loss": 1.7386,
"step": 1560
},
{
"epoch": 1.55,
"learning_rate": 1.4038994087563297e-06,
"loss": 1.7237,
"step": 1562
},
{
"epoch": 1.55,
"learning_rate": 1.4024303454464564e-06,
"loss": 1.7457,
"step": 1564
},
{
"epoch": 1.55,
"learning_rate": 1.4009602450650287e-06,
"loss": 1.7415,
"step": 1566
},
{
"epoch": 1.55,
"learning_rate": 1.3994891114005264e-06,
"loss": 1.7358,
"step": 1568
},
{
"epoch": 1.56,
"learning_rate": 1.398016948244093e-06,
"loss": 1.7364,
"step": 1570
},
{
"epoch": 1.56,
"learning_rate": 1.3965437593895226e-06,
"loss": 1.7275,
"step": 1572
},
{
"epoch": 1.56,
"learning_rate": 1.395069548633256e-06,
"loss": 1.7275,
"step": 1574
},
{
"epoch": 1.56,
"learning_rate": 1.3935943197743642e-06,
"loss": 1.7671,
"step": 1576
},
{
"epoch": 1.56,
"learning_rate": 1.3921180766145443e-06,
"loss": 1.7179,
"step": 1578
},
{
"epoch": 1.57,
"learning_rate": 1.3906408229581062e-06,
"loss": 1.6914,
"step": 1580
},
{
"epoch": 1.57,
"learning_rate": 1.3891625626119632e-06,
"loss": 1.7091,
"step": 1582
},
{
"epoch": 1.57,
"learning_rate": 1.3876832993856246e-06,
"loss": 1.6798,
"step": 1584
},
{
"epoch": 1.57,
"learning_rate": 1.3862030370911827e-06,
"loss": 1.7547,
"step": 1586
},
{
"epoch": 1.57,
"learning_rate": 1.3847217795433047e-06,
"loss": 1.7722,
"step": 1588
},
{
"epoch": 1.58,
"learning_rate": 1.3832395305592232e-06,
"loss": 1.7308,
"step": 1590
},
{
"epoch": 1.58,
"learning_rate": 1.381756293958725e-06,
"loss": 1.6963,
"step": 1592
},
{
"epoch": 1.58,
"learning_rate": 1.3802720735641432e-06,
"loss": 1.794,
"step": 1594
},
{
"epoch": 1.58,
"learning_rate": 1.378786873200344e-06,
"loss": 1.6798,
"step": 1596
},
{
"epoch": 1.58,
"learning_rate": 1.377300696694721e-06,
"loss": 1.7114,
"step": 1598
},
{
"epoch": 1.59,
"learning_rate": 1.3758135478771828e-06,
"loss": 1.7843,
"step": 1600
},
{
"epoch": 1.59,
"learning_rate": 1.374325430580143e-06,
"loss": 1.7503,
"step": 1602
},
{
"epoch": 1.59,
"learning_rate": 1.3728363486385122e-06,
"loss": 1.7296,
"step": 1604
},
{
"epoch": 1.59,
"learning_rate": 1.3713463058896856e-06,
"loss": 1.7322,
"step": 1606
},
{
"epoch": 1.59,
"learning_rate": 1.3698553061735351e-06,
"loss": 1.696,
"step": 1608
},
{
"epoch": 1.6,
"learning_rate": 1.368363353332399e-06,
"loss": 1.791,
"step": 1610
},
{
"epoch": 1.6,
"learning_rate": 1.3668704512110714e-06,
"loss": 1.7301,
"step": 1612
},
{
"epoch": 1.6,
"learning_rate": 1.3653766036567923e-06,
"loss": 1.6983,
"step": 1614
},
{
"epoch": 1.6,
"learning_rate": 1.3638818145192395e-06,
"loss": 1.7793,
"step": 1616
},
{
"epoch": 1.6,
"learning_rate": 1.3623860876505155e-06,
"loss": 1.735,
"step": 1618
},
{
"epoch": 1.61,
"learning_rate": 1.360889426905141e-06,
"loss": 1.7408,
"step": 1620
},
{
"epoch": 1.61,
"learning_rate": 1.359391836140042e-06,
"loss": 1.7122,
"step": 1622
},
{
"epoch": 1.61,
"learning_rate": 1.357893319214542e-06,
"loss": 1.6859,
"step": 1624
},
{
"epoch": 1.61,
"learning_rate": 1.3563938799903505e-06,
"loss": 1.7284,
"step": 1626
},
{
"epoch": 1.61,
"learning_rate": 1.354893522331555e-06,
"loss": 1.7578,
"step": 1628
},
{
"epoch": 1.62,
"learning_rate": 1.3533922501046087e-06,
"loss": 1.7275,
"step": 1630
},
{
"epoch": 1.62,
"learning_rate": 1.351890067178322e-06,
"loss": 1.7473,
"step": 1632
},
{
"epoch": 1.62,
"learning_rate": 1.350386977423852e-06,
"loss": 1.7252,
"step": 1634
},
{
"epoch": 1.62,
"learning_rate": 1.3488829847146936e-06,
"loss": 1.7676,
"step": 1636
},
{
"epoch": 1.62,
"learning_rate": 1.3473780929266675e-06,
"loss": 1.6839,
"step": 1638
},
{
"epoch": 1.62,
"learning_rate": 1.345872305937912e-06,
"loss": 1.755,
"step": 1640
},
{
"epoch": 1.63,
"learning_rate": 1.3443656276288723e-06,
"loss": 1.7246,
"step": 1642
},
{
"epoch": 1.63,
"learning_rate": 1.3428580618822897e-06,
"loss": 1.7662,
"step": 1644
},
{
"epoch": 1.63,
"learning_rate": 1.3413496125831944e-06,
"loss": 1.7334,
"step": 1646
},
{
"epoch": 1.63,
"learning_rate": 1.3398402836188912e-06,
"loss": 1.7078,
"step": 1648
},
{
"epoch": 1.63,
"learning_rate": 1.3383300788789534e-06,
"loss": 1.7083,
"step": 1650
},
{
"epoch": 1.64,
"learning_rate": 1.3368190022552108e-06,
"loss": 1.751,
"step": 1652
},
{
"epoch": 1.64,
"learning_rate": 1.3353070576417395e-06,
"loss": 1.7763,
"step": 1654
},
{
"epoch": 1.64,
"learning_rate": 1.3337942489348537e-06,
"loss": 1.7089,
"step": 1656
},
{
"epoch": 1.64,
"learning_rate": 1.332280580033093e-06,
"loss": 1.7138,
"step": 1658
},
{
"epoch": 1.64,
"learning_rate": 1.330766054837214e-06,
"loss": 1.7001,
"step": 1660
},
{
"epoch": 1.65,
"learning_rate": 1.3292506772501816e-06,
"loss": 1.7434,
"step": 1662
},
{
"epoch": 1.65,
"learning_rate": 1.327734451177155e-06,
"loss": 1.7676,
"step": 1664
},
{
"epoch": 1.65,
"learning_rate": 1.326217380525481e-06,
"loss": 1.7203,
"step": 1666
},
{
"epoch": 1.65,
"learning_rate": 1.3246994692046835e-06,
"loss": 1.7887,
"step": 1668
},
{
"epoch": 1.65,
"learning_rate": 1.3231807211264518e-06,
"loss": 1.705,
"step": 1670
},
{
"epoch": 1.66,
"learning_rate": 1.321661140204632e-06,
"loss": 1.7378,
"step": 1672
},
{
"epoch": 1.66,
"learning_rate": 1.320140730355217e-06,
"loss": 1.6944,
"step": 1674
},
{
"epoch": 1.66,
"learning_rate": 1.3186194954963345e-06,
"loss": 1.6968,
"step": 1676
},
{
"epoch": 1.66,
"learning_rate": 1.3170974395482395e-06,
"loss": 1.7132,
"step": 1678
},
{
"epoch": 1.66,
"learning_rate": 1.3155745664333023e-06,
"loss": 1.7319,
"step": 1680
},
{
"epoch": 1.67,
"learning_rate": 1.3140508800759996e-06,
"loss": 1.7053,
"step": 1682
},
{
"epoch": 1.67,
"learning_rate": 1.3125263844029032e-06,
"loss": 1.7298,
"step": 1684
},
{
"epoch": 1.67,
"learning_rate": 1.3110010833426712e-06,
"loss": 1.728,
"step": 1686
},
{
"epoch": 1.67,
"learning_rate": 1.3094749808260365e-06,
"loss": 1.7833,
"step": 1688
},
{
"epoch": 1.67,
"learning_rate": 1.3079480807857983e-06,
"loss": 1.731,
"step": 1690
},
{
"epoch": 1.68,
"learning_rate": 1.3064203871568098e-06,
"loss": 1.7514,
"step": 1692
},
{
"epoch": 1.68,
"learning_rate": 1.3048919038759705e-06,
"loss": 1.7312,
"step": 1694
},
{
"epoch": 1.68,
"learning_rate": 1.3033626348822135e-06,
"loss": 1.716,
"step": 1696
},
{
"epoch": 1.68,
"learning_rate": 1.3018325841164984e-06,
"loss": 1.7134,
"step": 1698
},
{
"epoch": 1.68,
"learning_rate": 1.300301755521798e-06,
"loss": 1.761,
"step": 1700
},
{
"epoch": 1.69,
"learning_rate": 1.2987701530430903e-06,
"loss": 1.7453,
"step": 1702
},
{
"epoch": 1.69,
"learning_rate": 1.2972377806273478e-06,
"loss": 1.7532,
"step": 1704
},
{
"epoch": 1.69,
"learning_rate": 1.295704642223526e-06,
"loss": 1.6817,
"step": 1706
},
{
"epoch": 1.69,
"learning_rate": 1.2941707417825556e-06,
"loss": 1.7187,
"step": 1708
},
{
"epoch": 1.69,
"learning_rate": 1.292636083257331e-06,
"loss": 1.705,
"step": 1710
},
{
"epoch": 1.7,
"learning_rate": 1.291100670602699e-06,
"loss": 1.7183,
"step": 1712
},
{
"epoch": 1.7,
"learning_rate": 1.2895645077754515e-06,
"loss": 1.6986,
"step": 1714
},
{
"epoch": 1.7,
"learning_rate": 1.288027598734312e-06,
"loss": 1.7339,
"step": 1716
},
{
"epoch": 1.7,
"learning_rate": 1.2864899474399287e-06,
"loss": 1.7451,
"step": 1718
},
{
"epoch": 1.7,
"learning_rate": 1.284951557854861e-06,
"loss": 1.7132,
"step": 1720
},
{
"epoch": 1.71,
"learning_rate": 1.2834124339435712e-06,
"loss": 1.6633,
"step": 1722
},
{
"epoch": 1.71,
"learning_rate": 1.2818725796724153e-06,
"loss": 1.6976,
"step": 1724
},
{
"epoch": 1.71,
"learning_rate": 1.2803319990096299e-06,
"loss": 1.729,
"step": 1726
},
{
"epoch": 1.71,
"learning_rate": 1.2787906959253242e-06,
"loss": 1.7273,
"step": 1728
},
{
"epoch": 1.71,
"learning_rate": 1.277248674391469e-06,
"loss": 1.7227,
"step": 1730
},
{
"epoch": 1.72,
"learning_rate": 1.2757059383818861e-06,
"loss": 1.7268,
"step": 1732
},
{
"epoch": 1.72,
"learning_rate": 1.2741624918722402e-06,
"loss": 1.6966,
"step": 1734
},
{
"epoch": 1.72,
"learning_rate": 1.272618338840025e-06,
"loss": 1.7406,
"step": 1736
},
{
"epoch": 1.72,
"learning_rate": 1.2710734832645555e-06,
"loss": 1.7516,
"step": 1738
},
{
"epoch": 1.72,
"learning_rate": 1.2695279291269576e-06,
"loss": 1.7037,
"step": 1740
},
{
"epoch": 1.73,
"learning_rate": 1.2679816804101573e-06,
"loss": 1.7144,
"step": 1742
},
{
"epoch": 1.73,
"learning_rate": 1.2664347410988701e-06,
"loss": 1.7097,
"step": 1744
},
{
"epoch": 1.73,
"learning_rate": 1.2648871151795919e-06,
"loss": 1.7297,
"step": 1746
},
{
"epoch": 1.73,
"learning_rate": 1.2633388066405873e-06,
"loss": 1.768,
"step": 1748
},
{
"epoch": 1.73,
"learning_rate": 1.26178981947188e-06,
"loss": 1.7564,
"step": 1750
},
{
"epoch": 1.74,
"learning_rate": 1.2602401576652435e-06,
"loss": 1.7013,
"step": 1752
},
{
"epoch": 1.74,
"learning_rate": 1.2586898252141892e-06,
"loss": 1.7277,
"step": 1754
},
{
"epoch": 1.74,
"learning_rate": 1.2571388261139562e-06,
"loss": 1.6789,
"step": 1756
},
{
"epoch": 1.74,
"learning_rate": 1.2555871643615024e-06,
"loss": 1.6935,
"step": 1758
},
{
"epoch": 1.74,
"learning_rate": 1.2540348439554932e-06,
"loss": 1.7629,
"step": 1760
},
{
"epoch": 1.75,
"learning_rate": 1.2524818688962917e-06,
"loss": 1.7519,
"step": 1762
},
{
"epoch": 1.75,
"learning_rate": 1.2509282431859468e-06,
"loss": 1.7405,
"step": 1764
},
{
"epoch": 1.75,
"learning_rate": 1.2493739708281857e-06,
"loss": 1.6889,
"step": 1766
},
{
"epoch": 1.75,
"learning_rate": 1.2478190558284007e-06,
"loss": 1.6996,
"step": 1768
},
{
"epoch": 1.75,
"learning_rate": 1.2462635021936412e-06,
"loss": 1.6835,
"step": 1770
},
{
"epoch": 1.76,
"learning_rate": 1.244707313932602e-06,
"loss": 1.721,
"step": 1772
},
{
"epoch": 1.76,
"learning_rate": 1.243150495055613e-06,
"loss": 1.7609,
"step": 1774
},
{
"epoch": 1.76,
"learning_rate": 1.24159304957463e-06,
"loss": 1.685,
"step": 1776
},
{
"epoch": 1.76,
"learning_rate": 1.2400349815032226e-06,
"loss": 1.7116,
"step": 1778
},
{
"epoch": 1.76,
"learning_rate": 1.2384762948565657e-06,
"loss": 1.7281,
"step": 1780
},
{
"epoch": 1.77,
"learning_rate": 1.2369169936514281e-06,
"loss": 1.7215,
"step": 1782
},
{
"epoch": 1.77,
"learning_rate": 1.2353570819061615e-06,
"loss": 1.6872,
"step": 1784
},
{
"epoch": 1.77,
"learning_rate": 1.2337965636406922e-06,
"loss": 1.6888,
"step": 1786
},
{
"epoch": 1.77,
"learning_rate": 1.2322354428765087e-06,
"loss": 1.7483,
"step": 1788
},
{
"epoch": 1.77,
"learning_rate": 1.2306737236366524e-06,
"loss": 1.6962,
"step": 1790
},
{
"epoch": 1.78,
"learning_rate": 1.229111409945707e-06,
"loss": 1.727,
"step": 1792
},
{
"epoch": 1.78,
"learning_rate": 1.2275485058297878e-06,
"loss": 1.7225,
"step": 1794
},
{
"epoch": 1.78,
"learning_rate": 1.225985015316533e-06,
"loss": 1.7546,
"step": 1796
},
{
"epoch": 1.78,
"learning_rate": 1.2244209424350895e-06,
"loss": 1.7603,
"step": 1798
},
{
"epoch": 1.78,
"learning_rate": 1.222856291216107e-06,
"loss": 1.7435,
"step": 1800
},
{
"epoch": 1.79,
"learning_rate": 1.2212910656917252e-06,
"loss": 1.6999,
"step": 1802
},
{
"epoch": 1.79,
"learning_rate": 1.2197252698955629e-06,
"loss": 1.7197,
"step": 1804
},
{
"epoch": 1.79,
"learning_rate": 1.21815890786271e-06,
"loss": 1.7252,
"step": 1806
},
{
"epoch": 1.79,
"learning_rate": 1.2165919836297141e-06,
"loss": 1.7659,
"step": 1808
},
{
"epoch": 1.79,
"learning_rate": 1.215024501234572e-06,
"loss": 1.7596,
"step": 1810
},
{
"epoch": 1.8,
"learning_rate": 1.2134564647167203e-06,
"loss": 1.7666,
"step": 1812
},
{
"epoch": 1.8,
"learning_rate": 1.2118878781170213e-06,
"loss": 1.7682,
"step": 1814
},
{
"epoch": 1.8,
"learning_rate": 1.2103187454777564e-06,
"loss": 1.7111,
"step": 1816
},
{
"epoch": 1.8,
"learning_rate": 1.208749070842614e-06,
"loss": 1.7394,
"step": 1818
},
{
"epoch": 1.8,
"learning_rate": 1.2071788582566788e-06,
"loss": 1.7801,
"step": 1820
},
{
"epoch": 1.81,
"learning_rate": 1.2056081117664217e-06,
"loss": 1.7963,
"step": 1822
},
{
"epoch": 1.81,
"learning_rate": 1.2040368354196904e-06,
"loss": 1.7514,
"step": 1824
},
{
"epoch": 1.81,
"learning_rate": 1.202465033265697e-06,
"loss": 1.7433,
"step": 1826
},
{
"epoch": 1.81,
"learning_rate": 1.2008927093550092e-06,
"loss": 1.7135,
"step": 1828
},
{
"epoch": 1.81,
"learning_rate": 1.1993198677395392e-06,
"loss": 1.7861,
"step": 1830
},
{
"epoch": 1.82,
"learning_rate": 1.1977465124725331e-06,
"loss": 1.6881,
"step": 1832
},
{
"epoch": 1.82,
"learning_rate": 1.1961726476085617e-06,
"loss": 1.7643,
"step": 1834
},
{
"epoch": 1.82,
"learning_rate": 1.1945982772035066e-06,
"loss": 1.7258,
"step": 1836
},
{
"epoch": 1.82,
"learning_rate": 1.1930234053145551e-06,
"loss": 1.7356,
"step": 1838
},
{
"epoch": 1.82,
"learning_rate": 1.1914480360001848e-06,
"loss": 1.7001,
"step": 1840
},
{
"epoch": 1.83,
"learning_rate": 1.1898721733201566e-06,
"loss": 1.6947,
"step": 1842
},
{
"epoch": 1.83,
"learning_rate": 1.1882958213355018e-06,
"loss": 1.7146,
"step": 1844
},
{
"epoch": 1.83,
"learning_rate": 1.1867189841085128e-06,
"loss": 1.7223,
"step": 1846
},
{
"epoch": 1.83,
"learning_rate": 1.185141665702733e-06,
"loss": 1.7309,
"step": 1848
},
{
"epoch": 1.83,
"learning_rate": 1.1835638701829447e-06,
"loss": 1.7528,
"step": 1850
},
{
"epoch": 1.84,
"learning_rate": 1.1819856016151615e-06,
"loss": 1.746,
"step": 1852
},
{
"epoch": 1.84,
"learning_rate": 1.1804068640666146e-06,
"loss": 1.731,
"step": 1854
},
{
"epoch": 1.84,
"learning_rate": 1.1788276616057437e-06,
"loss": 1.7242,
"step": 1856
},
{
"epoch": 1.84,
"learning_rate": 1.1772479983021883e-06,
"loss": 1.6927,
"step": 1858
},
{
"epoch": 1.84,
"learning_rate": 1.1756678782267736e-06,
"loss": 1.7216,
"step": 1860
},
{
"epoch": 1.84,
"learning_rate": 1.1740873054515021e-06,
"loss": 1.724,
"step": 1862
},
{
"epoch": 1.85,
"learning_rate": 1.1725062840495444e-06,
"loss": 1.7287,
"step": 1864
},
{
"epoch": 1.85,
"learning_rate": 1.1709248180952257e-06,
"loss": 1.755,
"step": 1866
},
{
"epoch": 1.85,
"learning_rate": 1.1693429116640176e-06,
"loss": 1.7372,
"step": 1868
},
{
"epoch": 1.85,
"learning_rate": 1.1677605688325264e-06,
"loss": 1.7581,
"step": 1870
},
{
"epoch": 1.85,
"learning_rate": 1.1661777936784835e-06,
"loss": 1.7146,
"step": 1872
},
{
"epoch": 1.86,
"learning_rate": 1.164594590280734e-06,
"loss": 1.7697,
"step": 1874
},
{
"epoch": 1.86,
"learning_rate": 1.1630109627192263e-06,
"loss": 1.7321,
"step": 1876
},
{
"epoch": 1.86,
"learning_rate": 1.1614269150750032e-06,
"loss": 1.7004,
"step": 1878
},
{
"epoch": 1.86,
"learning_rate": 1.1598424514301884e-06,
"loss": 1.7181,
"step": 1880
},
{
"epoch": 1.86,
"learning_rate": 1.1582575758679785e-06,
"loss": 1.7547,
"step": 1882
},
{
"epoch": 1.87,
"learning_rate": 1.1566722924726316e-06,
"loss": 1.7581,
"step": 1884
},
{
"epoch": 1.87,
"learning_rate": 1.1550866053294566e-06,
"loss": 1.7138,
"step": 1886
},
{
"epoch": 1.87,
"learning_rate": 1.1535005185248032e-06,
"loss": 1.7351,
"step": 1888
},
{
"epoch": 1.87,
"learning_rate": 1.151914036146051e-06,
"loss": 1.6995,
"step": 1890
},
{
"epoch": 1.87,
"learning_rate": 1.1503271622815983e-06,
"loss": 1.7171,
"step": 1892
},
{
"epoch": 1.88,
"learning_rate": 1.1487399010208528e-06,
"loss": 1.6896,
"step": 1894
},
{
"epoch": 1.88,
"learning_rate": 1.147152256454221e-06,
"loss": 1.6964,
"step": 1896
},
{
"epoch": 1.88,
"learning_rate": 1.1455642326730962e-06,
"loss": 1.6868,
"step": 1898
},
{
"epoch": 1.88,
"learning_rate": 1.1439758337698497e-06,
"loss": 1.753,
"step": 1900
},
{
"epoch": 1.88,
"learning_rate": 1.1423870638378192e-06,
"loss": 1.7063,
"step": 1902
},
{
"epoch": 1.89,
"learning_rate": 1.1407979269712984e-06,
"loss": 1.7445,
"step": 1904
},
{
"epoch": 1.89,
"learning_rate": 1.1392084272655273e-06,
"loss": 1.6722,
"step": 1906
},
{
"epoch": 1.89,
"learning_rate": 1.1376185688166797e-06,
"loss": 1.7483,
"step": 1908
},
{
"epoch": 1.89,
"learning_rate": 1.1360283557218555e-06,
"loss": 1.7225,
"step": 1910
},
{
"epoch": 1.89,
"learning_rate": 1.1344377920790665e-06,
"loss": 1.7167,
"step": 1912
},
{
"epoch": 1.9,
"learning_rate": 1.13284688198723e-06,
"loss": 1.7582,
"step": 1914
},
{
"epoch": 1.9,
"learning_rate": 1.1312556295461547e-06,
"loss": 1.7406,
"step": 1916
},
{
"epoch": 1.9,
"learning_rate": 1.129664038856532e-06,
"loss": 1.7747,
"step": 1918
},
{
"epoch": 1.9,
"learning_rate": 1.1280721140199255e-06,
"loss": 1.6814,
"step": 1920
},
{
"epoch": 1.9,
"learning_rate": 1.126479859138758e-06,
"loss": 1.7328,
"step": 1922
},
{
"epoch": 1.91,
"learning_rate": 1.1248872783163052e-06,
"loss": 1.6897,
"step": 1924
},
{
"epoch": 1.91,
"learning_rate": 1.1232943756566821e-06,
"loss": 1.6813,
"step": 1926
},
{
"epoch": 1.91,
"learning_rate": 1.1217011552648315e-06,
"loss": 1.6974,
"step": 1928
},
{
"epoch": 1.91,
"learning_rate": 1.1201076212465174e-06,
"loss": 1.6851,
"step": 1930
},
{
"epoch": 1.91,
"learning_rate": 1.11851377770831e-06,
"loss": 1.726,
"step": 1932
},
{
"epoch": 1.92,
"learning_rate": 1.1169196287575783e-06,
"loss": 1.7227,
"step": 1934
},
{
"epoch": 1.92,
"learning_rate": 1.115325178502478e-06,
"loss": 1.7094,
"step": 1936
},
{
"epoch": 1.92,
"learning_rate": 1.1137304310519413e-06,
"loss": 1.7229,
"step": 1938
},
{
"epoch": 1.92,
"learning_rate": 1.1121353905156662e-06,
"loss": 1.729,
"step": 1940
},
{
"epoch": 1.92,
"learning_rate": 1.1105400610041064e-06,
"loss": 1.6949,
"step": 1942
},
{
"epoch": 1.93,
"learning_rate": 1.1089444466284597e-06,
"loss": 1.7241,
"step": 1944
},
{
"epoch": 1.93,
"learning_rate": 1.1073485515006579e-06,
"loss": 1.6917,
"step": 1946
},
{
"epoch": 1.93,
"learning_rate": 1.1057523797333575e-06,
"loss": 1.7039,
"step": 1948
},
{
"epoch": 1.93,
"learning_rate": 1.1041559354399263e-06,
"loss": 1.7494,
"step": 1950
},
{
"epoch": 1.93,
"learning_rate": 1.1025592227344355e-06,
"loss": 1.6907,
"step": 1952
},
{
"epoch": 1.94,
"learning_rate": 1.1009622457316479e-06,
"loss": 1.7057,
"step": 1954
},
{
"epoch": 1.94,
"learning_rate": 1.099365008547007e-06,
"loss": 1.6984,
"step": 1956
},
{
"epoch": 1.94,
"learning_rate": 1.097767515296627e-06,
"loss": 1.6396,
"step": 1958
},
{
"epoch": 1.94,
"learning_rate": 1.0961697700972813e-06,
"loss": 1.684,
"step": 1960
},
{
"epoch": 1.94,
"learning_rate": 1.0945717770663943e-06,
"loss": 1.7735,
"step": 1962
},
{
"epoch": 1.95,
"learning_rate": 1.0929735403220274e-06,
"loss": 1.7183,
"step": 1964
},
{
"epoch": 1.95,
"learning_rate": 1.0913750639828709e-06,
"loss": 1.7382,
"step": 1966
},
{
"epoch": 1.95,
"learning_rate": 1.0897763521682325e-06,
"loss": 1.7274,
"step": 1968
},
{
"epoch": 1.95,
"learning_rate": 1.0881774089980257e-06,
"loss": 1.7601,
"step": 1970
},
{
"epoch": 1.95,
"learning_rate": 1.086578238592762e-06,
"loss": 1.7263,
"step": 1972
},
{
"epoch": 1.96,
"learning_rate": 1.0849788450735369e-06,
"loss": 1.7362,
"step": 1974
},
{
"epoch": 1.96,
"learning_rate": 1.0833792325620217e-06,
"loss": 1.7737,
"step": 1976
},
{
"epoch": 1.96,
"learning_rate": 1.0817794051804522e-06,
"loss": 1.7052,
"step": 1978
},
{
"epoch": 1.96,
"learning_rate": 1.0801793670516166e-06,
"loss": 1.7665,
"step": 1980
},
{
"epoch": 1.96,
"learning_rate": 1.0785791222988485e-06,
"loss": 1.7011,
"step": 1982
},
{
"epoch": 1.97,
"learning_rate": 1.0769786750460115e-06,
"loss": 1.7428,
"step": 1984
},
{
"epoch": 1.97,
"learning_rate": 1.0753780294174925e-06,
"loss": 1.6434,
"step": 1986
},
{
"epoch": 1.97,
"learning_rate": 1.0737771895381896e-06,
"loss": 1.7,
"step": 1988
},
{
"epoch": 1.97,
"learning_rate": 1.072176159533501e-06,
"loss": 1.6991,
"step": 1990
},
{
"epoch": 1.97,
"learning_rate": 1.0705749435293151e-06,
"loss": 1.7181,
"step": 1992
},
{
"epoch": 1.98,
"learning_rate": 1.0689735456519997e-06,
"loss": 1.7335,
"step": 1994
},
{
"epoch": 1.98,
"learning_rate": 1.0673719700283908e-06,
"loss": 1.7361,
"step": 1996
},
{
"epoch": 1.98,
"learning_rate": 1.0657702207857834e-06,
"loss": 1.7084,
"step": 1998
},
{
"epoch": 1.98,
"learning_rate": 1.0641683020519188e-06,
"loss": 1.7251,
"step": 2000
},
{
"epoch": 1.98,
"learning_rate": 1.0625662179549756e-06,
"loss": 1.7079,
"step": 2002
},
{
"epoch": 1.99,
"learning_rate": 1.060963972623559e-06,
"loss": 1.7479,
"step": 2004
},
{
"epoch": 1.99,
"learning_rate": 1.0593615701866891e-06,
"loss": 1.6626,
"step": 2006
},
{
"epoch": 1.99,
"learning_rate": 1.057759014773791e-06,
"loss": 1.6945,
"step": 2008
},
{
"epoch": 1.99,
"learning_rate": 1.056156310514684e-06,
"loss": 1.7186,
"step": 2010
},
{
"epoch": 1.99,
"learning_rate": 1.0545534615395713e-06,
"loss": 1.6669,
"step": 2012
},
{
"epoch": 2.0,
"learning_rate": 1.0529504719790286e-06,
"loss": 1.7381,
"step": 2014
},
{
"epoch": 2.0,
"learning_rate": 1.051347345963994e-06,
"loss": 1.6247,
"step": 2016
},
{
"epoch": 2.0,
"learning_rate": 1.0497440876257573e-06,
"loss": 1.737,
"step": 2018
},
{
"epoch": 2.0,
"learning_rate": 1.0481407010959493e-06,
"loss": 1.7018,
"step": 2020
},
{
"epoch": 2.0,
"learning_rate": 1.0465371905065315e-06,
"loss": 1.7111,
"step": 2022
},
{
"epoch": 2.01,
"learning_rate": 1.0449335599897847e-06,
"loss": 1.719,
"step": 2024
},
{
"epoch": 2.01,
"learning_rate": 1.0433298136782983e-06,
"loss": 1.7134,
"step": 2026
},
{
"epoch": 2.01,
"learning_rate": 1.0417259557049613e-06,
"loss": 1.7313,
"step": 2028
},
{
"epoch": 2.01,
"learning_rate": 1.0401219902029497e-06,
"loss": 1.7178,
"step": 2030
},
{
"epoch": 2.01,
"learning_rate": 1.0385179213057158e-06,
"loss": 1.6614,
"step": 2032
},
{
"epoch": 2.02,
"learning_rate": 1.0369137531469805e-06,
"loss": 1.7438,
"step": 2034
},
{
"epoch": 2.02,
"learning_rate": 1.0353094898607183e-06,
"loss": 1.6895,
"step": 2036
},
{
"epoch": 2.02,
"learning_rate": 1.0337051355811506e-06,
"loss": 1.7036,
"step": 2038
},
{
"epoch": 2.02,
"learning_rate": 1.0321006944427323e-06,
"loss": 1.6317,
"step": 2040
},
{
"epoch": 2.02,
"learning_rate": 1.0304961705801413e-06,
"loss": 1.7419,
"step": 2042
},
{
"epoch": 2.03,
"learning_rate": 1.0288915681282708e-06,
"loss": 1.6857,
"step": 2044
},
{
"epoch": 2.03,
"learning_rate": 1.0272868912222154e-06,
"loss": 1.6882,
"step": 2046
},
{
"epoch": 2.03,
"learning_rate": 1.025682143997261e-06,
"loss": 1.6892,
"step": 2048
},
{
"epoch": 2.03,
"learning_rate": 1.0240773305888757e-06,
"loss": 1.733,
"step": 2050
},
{
"epoch": 2.03,
"learning_rate": 1.0224724551326975e-06,
"loss": 1.6998,
"step": 2052
},
{
"epoch": 2.04,
"learning_rate": 1.020867521764525e-06,
"loss": 1.725,
"step": 2054
},
{
"epoch": 2.04,
"learning_rate": 1.0192625346203053e-06,
"loss": 1.6571,
"step": 2056
},
{
"epoch": 2.04,
"learning_rate": 1.0176574978361241e-06,
"loss": 1.7124,
"step": 2058
},
{
"epoch": 2.04,
"learning_rate": 1.0160524155481963e-06,
"loss": 1.6705,
"step": 2060
},
{
"epoch": 2.04,
"learning_rate": 1.014447291892852e-06,
"loss": 1.7278,
"step": 2062
},
{
"epoch": 2.05,
"learning_rate": 1.0128421310065295e-06,
"loss": 1.6724,
"step": 2064
},
{
"epoch": 2.05,
"learning_rate": 1.011236937025762e-06,
"loss": 1.6778,
"step": 2066
},
{
"epoch": 2.05,
"learning_rate": 1.0096317140871693e-06,
"loss": 1.7119,
"step": 2068
},
{
"epoch": 2.05,
"learning_rate": 1.0080264663274447e-06,
"loss": 1.7009,
"step": 2070
},
{
"epoch": 2.05,
"learning_rate": 1.0064211978833456e-06,
"loss": 1.7222,
"step": 2072
},
{
"epoch": 2.05,
"learning_rate": 1.0048159128916832e-06,
"loss": 1.6838,
"step": 2074
},
{
"epoch": 2.06,
"learning_rate": 1.0032106154893109e-06,
"loss": 1.6765,
"step": 2076
},
{
"epoch": 2.06,
"learning_rate": 1.0016053098131143e-06,
"loss": 1.6718,
"step": 2078
},
{
"epoch": 2.06,
"learning_rate": 1e-06,
"loss": 1.6745,
"step": 2080
},
{
"epoch": 2.06,
"learning_rate": 9.983946901868858e-07,
"loss": 1.6932,
"step": 2082
},
{
"epoch": 2.06,
"learning_rate": 9.967893845106888e-07,
"loss": 1.6402,
"step": 2084
},
{
"epoch": 2.07,
"learning_rate": 9.95184087108317e-07,
"loss": 1.7658,
"step": 2086
},
{
"epoch": 2.07,
"learning_rate": 9.935788021166543e-07,
"loss": 1.7063,
"step": 2088
},
{
"epoch": 2.07,
"learning_rate": 9.919735336725554e-07,
"loss": 1.6647,
"step": 2090
},
{
"epoch": 2.07,
"learning_rate": 9.903682859128306e-07,
"loss": 1.7114,
"step": 2092
},
{
"epoch": 2.07,
"learning_rate": 9.887630629742376e-07,
"loss": 1.7583,
"step": 2094
},
{
"epoch": 2.08,
"learning_rate": 9.87157868993471e-07,
"loss": 1.7003,
"step": 2096
},
{
"epoch": 2.08,
"learning_rate": 9.85552708107148e-07,
"loss": 1.6645,
"step": 2098
},
{
"epoch": 2.08,
"learning_rate": 9.839475844518038e-07,
"loss": 1.7179,
"step": 2100
},
{
"epoch": 2.08,
"learning_rate": 9.823425021638758e-07,
"loss": 1.7364,
"step": 2102
},
{
"epoch": 2.08,
"learning_rate": 9.807374653796948e-07,
"loss": 1.688,
"step": 2104
},
{
"epoch": 2.09,
"learning_rate": 9.79132478235475e-07,
"loss": 1.6513,
"step": 2106
},
{
"epoch": 2.09,
"learning_rate": 9.775275448673026e-07,
"loss": 1.6897,
"step": 2108
},
{
"epoch": 2.09,
"learning_rate": 9.759226694111242e-07,
"loss": 1.7039,
"step": 2110
},
{
"epoch": 2.09,
"learning_rate": 9.74317856002739e-07,
"loss": 1.6901,
"step": 2112
},
{
"epoch": 2.09,
"learning_rate": 9.727131087777848e-07,
"loss": 1.7266,
"step": 2114
},
{
"epoch": 2.1,
"learning_rate": 9.711084318717291e-07,
"loss": 1.6732,
"step": 2116
},
{
"epoch": 2.1,
"learning_rate": 9.695038294198588e-07,
"loss": 1.7003,
"step": 2118
},
{
"epoch": 2.1,
"learning_rate": 9.678993055572678e-07,
"loss": 1.7708,
"step": 2120
},
{
"epoch": 2.1,
"learning_rate": 9.66294864418849e-07,
"loss": 1.6985,
"step": 2122
},
{
"epoch": 2.1,
"learning_rate": 9.646905101392816e-07,
"loss": 1.6716,
"step": 2124
},
{
"epoch": 2.11,
"learning_rate": 9.630862468530196e-07,
"loss": 1.6845,
"step": 2126
},
{
"epoch": 2.11,
"learning_rate": 9.614820786942841e-07,
"loss": 1.7186,
"step": 2128
},
{
"epoch": 2.11,
"learning_rate": 9.598780097970505e-07,
"loss": 1.7135,
"step": 2130
},
{
"epoch": 2.11,
"learning_rate": 9.582740442950386e-07,
"loss": 1.7215,
"step": 2132
},
{
"epoch": 2.11,
"learning_rate": 9.566701863217018e-07,
"loss": 1.676,
"step": 2134
},
{
"epoch": 2.12,
"learning_rate": 9.550664400102154e-07,
"loss": 1.7331,
"step": 2136
},
{
"epoch": 2.12,
"learning_rate": 9.534628094934685e-07,
"loss": 1.7449,
"step": 2138
},
{
"epoch": 2.12,
"learning_rate": 9.518592989040506e-07,
"loss": 1.6794,
"step": 2140
},
{
"epoch": 2.12,
"learning_rate": 9.502559123742426e-07,
"loss": 1.6426,
"step": 2142
},
{
"epoch": 2.12,
"learning_rate": 9.486526540360062e-07,
"loss": 1.7009,
"step": 2144
},
{
"epoch": 2.13,
"learning_rate": 9.470495280209716e-07,
"loss": 1.6821,
"step": 2146
},
{
"epoch": 2.13,
"learning_rate": 9.454465384604287e-07,
"loss": 1.7464,
"step": 2148
},
{
"epoch": 2.13,
"learning_rate": 9.43843689485316e-07,
"loss": 1.701,
"step": 2150
},
{
"epoch": 2.13,
"learning_rate": 9.422409852262092e-07,
"loss": 1.7189,
"step": 2152
},
{
"epoch": 2.13,
"learning_rate": 9.406384298133111e-07,
"loss": 1.7062,
"step": 2154
},
{
"epoch": 2.14,
"learning_rate": 9.39036027376441e-07,
"loss": 1.6925,
"step": 2156
},
{
"epoch": 2.14,
"learning_rate": 9.374337820450243e-07,
"loss": 1.6618,
"step": 2158
},
{
"epoch": 2.14,
"learning_rate": 9.358316979480813e-07,
"loss": 1.6962,
"step": 2160
},
{
"epoch": 2.14,
"learning_rate": 9.34229779214217e-07,
"loss": 1.6879,
"step": 2162
},
{
"epoch": 2.14,
"learning_rate": 9.326280299716093e-07,
"loss": 1.6706,
"step": 2164
},
{
"epoch": 2.15,
"learning_rate": 9.310264543480002e-07,
"loss": 1.6878,
"step": 2166
},
{
"epoch": 2.15,
"learning_rate": 9.294250564706847e-07,
"loss": 1.692,
"step": 2168
},
{
"epoch": 2.15,
"learning_rate": 9.278238404664989e-07,
"loss": 1.7141,
"step": 2170
},
{
"epoch": 2.15,
"learning_rate": 9.262228104618105e-07,
"loss": 1.7044,
"step": 2172
},
{
"epoch": 2.15,
"learning_rate": 9.246219705825076e-07,
"loss": 1.6214,
"step": 2174
},
{
"epoch": 2.16,
"learning_rate": 9.230213249539886e-07,
"loss": 1.7014,
"step": 2176
},
{
"epoch": 2.16,
"learning_rate": 9.214208777011516e-07,
"loss": 1.7389,
"step": 2178
},
{
"epoch": 2.16,
"learning_rate": 9.19820632948383e-07,
"loss": 1.731,
"step": 2180
},
{
"epoch": 2.16,
"learning_rate": 9.18220594819548e-07,
"loss": 1.6979,
"step": 2182
},
{
"epoch": 2.16,
"learning_rate": 9.166207674379783e-07,
"loss": 1.6608,
"step": 2184
},
{
"epoch": 2.17,
"learning_rate": 9.150211549264631e-07,
"loss": 1.6818,
"step": 2186
},
{
"epoch": 2.17,
"learning_rate": 9.134217614072378e-07,
"loss": 1.7185,
"step": 2188
},
{
"epoch": 2.17,
"learning_rate": 9.11822591001974e-07,
"loss": 1.734,
"step": 2190
},
{
"epoch": 2.17,
"learning_rate": 9.102236478317676e-07,
"loss": 1.6561,
"step": 2192
},
{
"epoch": 2.17,
"learning_rate": 9.08624936017129e-07,
"loss": 1.7117,
"step": 2194
},
{
"epoch": 2.18,
"learning_rate": 9.070264596779725e-07,
"loss": 1.7161,
"step": 2196
},
{
"epoch": 2.18,
"learning_rate": 9.054282229336055e-07,
"loss": 1.6567,
"step": 2198
},
{
"epoch": 2.18,
"learning_rate": 9.038302299027188e-07,
"loss": 1.6488,
"step": 2200
},
{
"epoch": 2.18,
"learning_rate": 9.022324847033733e-07,
"loss": 1.7681,
"step": 2202
},
{
"epoch": 2.18,
"learning_rate": 9.006349914529932e-07,
"loss": 1.7595,
"step": 2204
},
{
"epoch": 2.19,
"learning_rate": 8.99037754268352e-07,
"loss": 1.7348,
"step": 2206
},
{
"epoch": 2.19,
"learning_rate": 8.974407772655641e-07,
"loss": 1.6891,
"step": 2208
},
{
"epoch": 2.19,
"learning_rate": 8.958440645600739e-07,
"loss": 1.6879,
"step": 2210
},
{
"epoch": 2.19,
"learning_rate": 8.942476202666426e-07,
"loss": 1.6694,
"step": 2212
},
{
"epoch": 2.19,
"learning_rate": 8.926514484993419e-07,
"loss": 1.6864,
"step": 2214
},
{
"epoch": 2.2,
"learning_rate": 8.910555533715405e-07,
"loss": 1.6839,
"step": 2216
},
{
"epoch": 2.2,
"learning_rate": 8.894599389958934e-07,
"loss": 1.7449,
"step": 2218
},
{
"epoch": 2.2,
"learning_rate": 8.878646094843338e-07,
"loss": 1.6596,
"step": 2220
},
{
"epoch": 2.2,
"learning_rate": 8.862695689480587e-07,
"loss": 1.6671,
"step": 2222
},
{
"epoch": 2.2,
"learning_rate": 8.84674821497522e-07,
"loss": 1.7066,
"step": 2224
},
{
"epoch": 2.21,
"learning_rate": 8.830803712424217e-07,
"loss": 1.6791,
"step": 2226
},
{
"epoch": 2.21,
"learning_rate": 8.8148622229169e-07,
"loss": 1.7459,
"step": 2228
},
{
"epoch": 2.21,
"learning_rate": 8.798923787534828e-07,
"loss": 1.7514,
"step": 2230
},
{
"epoch": 2.21,
"learning_rate": 8.782988447351684e-07,
"loss": 1.7402,
"step": 2232
},
{
"epoch": 2.21,
"learning_rate": 8.76705624343318e-07,
"loss": 1.7044,
"step": 2234
},
{
"epoch": 2.22,
"learning_rate": 8.751127216836945e-07,
"loss": 1.6672,
"step": 2236
},
{
"epoch": 2.22,
"learning_rate": 8.735201408612418e-07,
"loss": 1.7068,
"step": 2238
},
{
"epoch": 2.22,
"learning_rate": 8.719278859800749e-07,
"loss": 1.6593,
"step": 2240
},
{
"epoch": 2.22,
"learning_rate": 8.70335961143468e-07,
"loss": 1.6671,
"step": 2242
},
{
"epoch": 2.22,
"learning_rate": 8.687443704538452e-07,
"loss": 1.7018,
"step": 2244
},
{
"epoch": 2.23,
"learning_rate": 8.671531180127699e-07,
"loss": 1.7241,
"step": 2246
},
{
"epoch": 2.23,
"learning_rate": 8.655622079209336e-07,
"loss": 1.6957,
"step": 2248
},
{
"epoch": 2.23,
"learning_rate": 8.639716442781448e-07,
"loss": 1.6789,
"step": 2250
},
{
"epoch": 2.23,
"learning_rate": 8.623814311833202e-07,
"loss": 1.6626,
"step": 2252
},
{
"epoch": 2.23,
"learning_rate": 8.607915727344727e-07,
"loss": 1.6976,
"step": 2254
},
{
"epoch": 2.24,
"learning_rate": 8.592020730287013e-07,
"loss": 1.6522,
"step": 2256
},
{
"epoch": 2.24,
"learning_rate": 8.576129361621808e-07,
"loss": 1.7328,
"step": 2258
},
{
"epoch": 2.24,
"learning_rate": 8.560241662301504e-07,
"loss": 1.6631,
"step": 2260
},
{
"epoch": 2.24,
"learning_rate": 8.54435767326904e-07,
"loss": 1.6692,
"step": 2262
},
{
"epoch": 2.24,
"learning_rate": 8.528477435457789e-07,
"loss": 1.6941,
"step": 2264
},
{
"epoch": 2.25,
"learning_rate": 8.51260098979147e-07,
"loss": 1.6753,
"step": 2266
},
{
"epoch": 2.25,
"learning_rate": 8.496728377184019e-07,
"loss": 1.685,
"step": 2268
},
{
"epoch": 2.25,
"learning_rate": 8.480859638539492e-07,
"loss": 1.6844,
"step": 2270
},
{
"epoch": 2.25,
"learning_rate": 8.464994814751967e-07,
"loss": 1.7154,
"step": 2272
},
{
"epoch": 2.25,
"learning_rate": 8.449133946705433e-07,
"loss": 1.6493,
"step": 2274
},
{
"epoch": 2.26,
"learning_rate": 8.433277075273684e-07,
"loss": 1.6897,
"step": 2276
},
{
"epoch": 2.26,
"learning_rate": 8.417424241320219e-07,
"loss": 1.7593,
"step": 2278
},
{
"epoch": 2.26,
"learning_rate": 8.401575485698118e-07,
"loss": 1.7086,
"step": 2280
},
{
"epoch": 2.26,
"learning_rate": 8.385730849249969e-07,
"loss": 1.7061,
"step": 2282
},
{
"epoch": 2.26,
"learning_rate": 8.369890372807736e-07,
"loss": 1.7041,
"step": 2284
},
{
"epoch": 2.27,
"learning_rate": 8.354054097192659e-07,
"loss": 1.7031,
"step": 2286
},
{
"epoch": 2.27,
"learning_rate": 8.338222063215168e-07,
"loss": 1.6871,
"step": 2288
},
{
"epoch": 2.27,
"learning_rate": 8.322394311674735e-07,
"loss": 1.7028,
"step": 2290
},
{
"epoch": 2.27,
"learning_rate": 8.306570883359824e-07,
"loss": 1.6795,
"step": 2292
},
{
"epoch": 2.27,
"learning_rate": 8.290751819047743e-07,
"loss": 1.6632,
"step": 2294
},
{
"epoch": 2.27,
"learning_rate": 8.274937159504558e-07,
"loss": 1.7115,
"step": 2296
},
{
"epoch": 2.28,
"learning_rate": 8.25912694548498e-07,
"loss": 1.6845,
"step": 2298
},
{
"epoch": 2.28,
"learning_rate": 8.243321217732267e-07,
"loss": 1.6457,
"step": 2300
},
{
"epoch": 2.28,
"learning_rate": 8.227520016978117e-07,
"loss": 1.6617,
"step": 2302
},
{
"epoch": 2.28,
"learning_rate": 8.211723383942559e-07,
"loss": 1.6927,
"step": 2304
},
{
"epoch": 2.28,
"learning_rate": 8.195931359333857e-07,
"loss": 1.7118,
"step": 2306
},
{
"epoch": 2.29,
"learning_rate": 8.180143983848387e-07,
"loss": 1.6656,
"step": 2308
},
{
"epoch": 2.29,
"learning_rate": 8.164361298170553e-07,
"loss": 1.6978,
"step": 2310
},
{
"epoch": 2.29,
"learning_rate": 8.148583342972671e-07,
"loss": 1.6868,
"step": 2312
},
{
"epoch": 2.29,
"learning_rate": 8.132810158914871e-07,
"loss": 1.6648,
"step": 2314
},
{
"epoch": 2.29,
"learning_rate": 8.117041786644983e-07,
"loss": 1.682,
"step": 2316
},
{
"epoch": 2.3,
"learning_rate": 8.109159417629565e-07,
"loss": 1.7076,
"step": 2318
},
{
"epoch": 2.3,
"learning_rate": 8.101278266798435e-07,
"loss": 1.6716,
"step": 2320
},
{
"epoch": 2.3,
"learning_rate": 8.08551963999815e-07,
"loss": 1.7117,
"step": 2322
},
{
"epoch": 2.3,
"learning_rate": 8.069765946854449e-07,
"loss": 1.6857,
"step": 2324
},
{
"epoch": 2.3,
"learning_rate": 8.054017227964933e-07,
"loss": 1.7236,
"step": 2326
},
{
"epoch": 2.31,
"learning_rate": 8.038273523914387e-07,
"loss": 1.6899,
"step": 2328
},
{
"epoch": 2.31,
"learning_rate": 8.022534875274667e-07,
"loss": 1.7217,
"step": 2330
},
{
"epoch": 2.31,
"learning_rate": 8.006801322604609e-07,
"loss": 1.6659,
"step": 2332
},
{
"epoch": 2.31,
"learning_rate": 7.991072906449905e-07,
"loss": 1.6978,
"step": 2334
},
{
"epoch": 2.31,
"learning_rate": 7.975349667343028e-07,
"loss": 1.6926,
"step": 2336
},
{
"epoch": 2.32,
"learning_rate": 7.959631645803097e-07,
"loss": 1.7067,
"step": 2338
},
{
"epoch": 2.32,
"learning_rate": 7.943918882335783e-07,
"loss": 1.6996,
"step": 2340
},
{
"epoch": 2.32,
"learning_rate": 7.928211417433214e-07,
"loss": 1.688,
"step": 2342
},
{
"epoch": 2.32,
"learning_rate": 7.912509291573858e-07,
"loss": 1.7048,
"step": 2344
},
{
"epoch": 2.32,
"learning_rate": 7.896812545222438e-07,
"loss": 1.7409,
"step": 2346
},
{
"epoch": 2.33,
"learning_rate": 7.881121218829787e-07,
"loss": 1.675,
"step": 2348
},
{
"epoch": 2.33,
"learning_rate": 7.865435352832799e-07,
"loss": 1.7349,
"step": 2350
},
{
"epoch": 2.33,
"learning_rate": 7.849754987654278e-07,
"loss": 1.7014,
"step": 2352
},
{
"epoch": 2.33,
"learning_rate": 7.834080163702861e-07,
"loss": 1.6584,
"step": 2354
},
{
"epoch": 2.33,
"learning_rate": 7.818410921372903e-07,
"loss": 1.7162,
"step": 2356
},
{
"epoch": 2.34,
"learning_rate": 7.802747301044371e-07,
"loss": 1.6483,
"step": 2358
},
{
"epoch": 2.34,
"learning_rate": 7.787089343082748e-07,
"loss": 1.7095,
"step": 2360
},
{
"epoch": 2.34,
"learning_rate": 7.771437087838929e-07,
"loss": 1.6742,
"step": 2362
},
{
"epoch": 2.34,
"learning_rate": 7.755790575649105e-07,
"loss": 1.6931,
"step": 2364
},
{
"epoch": 2.34,
"learning_rate": 7.740149846834673e-07,
"loss": 1.6985,
"step": 2366
},
{
"epoch": 2.35,
"learning_rate": 7.724514941702121e-07,
"loss": 1.7498,
"step": 2368
},
{
"epoch": 2.35,
"learning_rate": 7.708885900542929e-07,
"loss": 1.7108,
"step": 2370
},
{
"epoch": 2.35,
"learning_rate": 7.693262763633477e-07,
"loss": 1.7009,
"step": 2372
},
{
"epoch": 2.35,
"learning_rate": 7.677645571234913e-07,
"loss": 1.6861,
"step": 2374
},
{
"epoch": 2.35,
"learning_rate": 7.662034363593079e-07,
"loss": 1.6602,
"step": 2376
},
{
"epoch": 2.36,
"learning_rate": 7.646429180938386e-07,
"loss": 1.6654,
"step": 2378
},
{
"epoch": 2.36,
"learning_rate": 7.630830063485719e-07,
"loss": 1.7435,
"step": 2380
},
{
"epoch": 2.36,
"learning_rate": 7.615237051434341e-07,
"loss": 1.7151,
"step": 2382
},
{
"epoch": 2.36,
"learning_rate": 7.599650184967772e-07,
"loss": 1.6959,
"step": 2384
},
{
"epoch": 2.36,
"learning_rate": 7.584069504253701e-07,
"loss": 1.7323,
"step": 2386
},
{
"epoch": 2.37,
"learning_rate": 7.56849504944387e-07,
"loss": 1.7185,
"step": 2388
},
{
"epoch": 2.37,
"learning_rate": 7.552926860673979e-07,
"loss": 1.7051,
"step": 2390
},
{
"epoch": 2.37,
"learning_rate": 7.537364978063587e-07,
"loss": 1.6372,
"step": 2392
},
{
"epoch": 2.37,
"learning_rate": 7.521809441715994e-07,
"loss": 1.6673,
"step": 2394
},
{
"epoch": 2.37,
"learning_rate": 7.506260291718145e-07,
"loss": 1.6899,
"step": 2396
},
{
"epoch": 2.38,
"learning_rate": 7.490717568140532e-07,
"loss": 1.7418,
"step": 2398
},
{
"epoch": 2.38,
"learning_rate": 7.475181311037084e-07,
"loss": 1.6876,
"step": 2400
},
{
"epoch": 2.38,
"learning_rate": 7.459651560445063e-07,
"loss": 1.6791,
"step": 2402
},
{
"epoch": 2.38,
"learning_rate": 7.444128356384978e-07,
"loss": 1.7036,
"step": 2404
},
{
"epoch": 2.38,
"learning_rate": 7.42861173886044e-07,
"loss": 1.7317,
"step": 2406
},
{
"epoch": 2.39,
"learning_rate": 7.413101747858111e-07,
"loss": 1.7448,
"step": 2408
},
{
"epoch": 2.39,
"learning_rate": 7.397598423347564e-07,
"loss": 1.6793,
"step": 2410
},
{
"epoch": 2.39,
"learning_rate": 7.382101805281198e-07,
"loss": 1.649,
"step": 2412
},
{
"epoch": 2.39,
"learning_rate": 7.366611933594131e-07,
"loss": 1.7431,
"step": 2414
},
{
"epoch": 2.39,
"learning_rate": 7.351128848204083e-07,
"loss": 1.692,
"step": 2416
},
{
"epoch": 2.4,
"learning_rate": 7.335652589011299e-07,
"loss": 1.7502,
"step": 2418
},
{
"epoch": 2.4,
"learning_rate": 7.320183195898428e-07,
"loss": 1.6684,
"step": 2420
},
{
"epoch": 2.4,
"learning_rate": 7.304720708730422e-07,
"loss": 1.6798,
"step": 2422
},
{
"epoch": 2.4,
"learning_rate": 7.289265167354448e-07,
"loss": 1.6456,
"step": 2424
},
{
"epoch": 2.4,
"learning_rate": 7.273816611599751e-07,
"loss": 1.6918,
"step": 2426
},
{
"epoch": 2.41,
"learning_rate": 7.258375081277597e-07,
"loss": 1.7056,
"step": 2428
},
{
"epoch": 2.41,
"learning_rate": 7.242940616181136e-07,
"loss": 1.7111,
"step": 2430
},
{
"epoch": 2.41,
"learning_rate": 7.227513256085312e-07,
"loss": 1.6859,
"step": 2432
},
{
"epoch": 2.41,
"learning_rate": 7.212093040746762e-07,
"loss": 1.7127,
"step": 2434
},
{
"epoch": 2.41,
"learning_rate": 7.196680009903701e-07,
"loss": 1.7154,
"step": 2436
},
{
"epoch": 2.42,
"learning_rate": 7.181274203275847e-07,
"loss": 1.6469,
"step": 2438
},
{
"epoch": 2.42,
"learning_rate": 7.165875660564286e-07,
"loss": 1.7553,
"step": 2440
},
{
"epoch": 2.42,
"learning_rate": 7.150484421451393e-07,
"loss": 1.6825,
"step": 2442
},
{
"epoch": 2.42,
"learning_rate": 7.135100525600715e-07,
"loss": 1.6694,
"step": 2444
},
{
"epoch": 2.42,
"learning_rate": 7.119724012656878e-07,
"loss": 1.6641,
"step": 2446
},
{
"epoch": 2.43,
"learning_rate": 7.104354922245483e-07,
"loss": 1.6566,
"step": 2448
},
{
"epoch": 2.43,
"learning_rate": 7.088993293973008e-07,
"loss": 1.7227,
"step": 2450
},
{
"epoch": 2.43,
"learning_rate": 7.073639167426692e-07,
"loss": 1.7445,
"step": 2452
},
{
"epoch": 2.43,
"learning_rate": 7.058292582174444e-07,
"loss": 1.7304,
"step": 2454
},
{
"epoch": 2.43,
"learning_rate": 7.042953577764741e-07,
"loss": 1.7014,
"step": 2456
},
{
"epoch": 2.44,
"learning_rate": 7.027622193726524e-07,
"loss": 1.6649,
"step": 2458
},
{
"epoch": 2.44,
"learning_rate": 7.012298469569095e-07,
"loss": 1.6338,
"step": 2460
},
{
"epoch": 2.44,
"learning_rate": 6.99698244478202e-07,
"loss": 1.6842,
"step": 2462
},
{
"epoch": 2.44,
"learning_rate": 6.981674158835018e-07,
"loss": 1.6714,
"step": 2464
},
{
"epoch": 2.44,
"learning_rate": 6.966373651177865e-07,
"loss": 1.7427,
"step": 2466
},
{
"epoch": 2.45,
"learning_rate": 6.951080961240296e-07,
"loss": 1.6865,
"step": 2468
},
{
"epoch": 2.45,
"learning_rate": 6.935796128431901e-07,
"loss": 1.7064,
"step": 2470
},
{
"epoch": 2.45,
"learning_rate": 6.920519192142018e-07,
"loss": 1.6265,
"step": 2472
},
{
"epoch": 2.45,
"learning_rate": 6.905250191739633e-07,
"loss": 1.6545,
"step": 2474
},
{
"epoch": 2.45,
"learning_rate": 6.889989166573287e-07,
"loss": 1.7049,
"step": 2476
},
{
"epoch": 2.46,
"learning_rate": 6.874736155970967e-07,
"loss": 1.7092,
"step": 2478
},
{
"epoch": 2.46,
"learning_rate": 6.859491199240001e-07,
"loss": 1.7326,
"step": 2480
},
{
"epoch": 2.46,
"learning_rate": 6.844254335666979e-07,
"loss": 1.6849,
"step": 2482
},
{
"epoch": 2.46,
"learning_rate": 6.829025604517606e-07,
"loss": 1.696,
"step": 2484
},
{
"epoch": 2.46,
"learning_rate": 6.813805045036655e-07,
"loss": 1.7074,
"step": 2486
},
{
"epoch": 2.47,
"learning_rate": 6.798592696447832e-07,
"loss": 1.7195,
"step": 2488
},
{
"epoch": 2.47,
"learning_rate": 6.783388597953681e-07,
"loss": 1.642,
"step": 2490
},
{
"epoch": 2.47,
"learning_rate": 6.768192788735484e-07,
"loss": 1.6769,
"step": 2492
},
{
"epoch": 2.47,
"learning_rate": 6.753005307953165e-07,
"loss": 1.7308,
"step": 2494
},
{
"epoch": 2.47,
"learning_rate": 6.737826194745188e-07,
"loss": 1.6753,
"step": 2496
},
{
"epoch": 2.48,
"learning_rate": 6.72265548822845e-07,
"loss": 1.7083,
"step": 2498
},
{
"epoch": 2.48,
"learning_rate": 6.707493227498186e-07,
"loss": 1.6606,
"step": 2500
},
{
"epoch": 2.48,
"learning_rate": 6.692339451627858e-07,
"loss": 1.6709,
"step": 2502
},
{
"epoch": 2.48,
"learning_rate": 6.677194199669071e-07,
"loss": 1.6954,
"step": 2504
},
{
"epoch": 2.48,
"learning_rate": 6.662057510651463e-07,
"loss": 1.6816,
"step": 2506
},
{
"epoch": 2.49,
"learning_rate": 6.646929423582603e-07,
"loss": 1.7113,
"step": 2508
},
{
"epoch": 2.49,
"learning_rate": 6.631809977447896e-07,
"loss": 1.6677,
"step": 2510
},
{
"epoch": 2.49,
"learning_rate": 6.616699211210468e-07,
"loss": 1.7202,
"step": 2512
},
{
"epoch": 2.49,
"learning_rate": 6.601597163811087e-07,
"loss": 1.6861,
"step": 2514
},
{
"epoch": 2.49,
"learning_rate": 6.586503874168057e-07,
"loss": 1.7311,
"step": 2516
},
{
"epoch": 2.49,
"learning_rate": 6.5714193811771e-07,
"loss": 1.6645,
"step": 2518
},
{
"epoch": 2.5,
"learning_rate": 6.55634372371128e-07,
"loss": 1.6595,
"step": 2520
},
{
"epoch": 2.5,
"learning_rate": 6.541276940620879e-07,
"loss": 1.6938,
"step": 2522
},
{
"epoch": 2.5,
"learning_rate": 6.526219070733325e-07,
"loss": 1.7347,
"step": 2524
},
{
"epoch": 2.5,
"learning_rate": 6.511170152853062e-07,
"loss": 1.7152,
"step": 2526
},
{
"epoch": 2.5,
"learning_rate": 6.496130225761476e-07,
"loss": 1.6773,
"step": 2528
},
{
"epoch": 2.51,
"learning_rate": 6.481099328216781e-07,
"loss": 1.6889,
"step": 2530
},
{
"epoch": 2.51,
"learning_rate": 6.466077498953915e-07,
"loss": 1.7002,
"step": 2532
},
{
"epoch": 2.51,
"learning_rate": 6.451064776684449e-07,
"loss": 1.697,
"step": 2534
},
{
"epoch": 2.51,
"learning_rate": 6.436061200096492e-07,
"loss": 1.7142,
"step": 2536
},
{
"epoch": 2.51,
"learning_rate": 6.421066807854584e-07,
"loss": 1.687,
"step": 2538
},
{
"epoch": 2.52,
"learning_rate": 6.406081638599581e-07,
"loss": 1.6856,
"step": 2540
},
{
"epoch": 2.52,
"learning_rate": 6.391105730948591e-07,
"loss": 1.7328,
"step": 2542
},
{
"epoch": 2.52,
"learning_rate": 6.376139123494843e-07,
"loss": 1.7172,
"step": 2544
},
{
"epoch": 2.52,
"learning_rate": 6.361181854807604e-07,
"loss": 1.7127,
"step": 2546
},
{
"epoch": 2.52,
"learning_rate": 6.346233963432077e-07,
"loss": 1.6909,
"step": 2548
},
{
"epoch": 2.53,
"learning_rate": 6.331295487889288e-07,
"loss": 1.6796,
"step": 2550
},
{
"epoch": 2.53,
"learning_rate": 6.316366466676011e-07,
"loss": 1.6829,
"step": 2552
},
{
"epoch": 2.53,
"learning_rate": 6.301446938264648e-07,
"loss": 1.6658,
"step": 2554
},
{
"epoch": 2.53,
"learning_rate": 6.286536941103146e-07,
"loss": 1.7522,
"step": 2556
},
{
"epoch": 2.53,
"learning_rate": 6.271636513614882e-07,
"loss": 1.691,
"step": 2558
},
{
"epoch": 2.54,
"learning_rate": 6.256745694198568e-07,
"loss": 1.704,
"step": 2560
},
{
"epoch": 2.54,
"learning_rate": 6.241864521228173e-07,
"loss": 1.7034,
"step": 2562
},
{
"epoch": 2.54,
"learning_rate": 6.226993033052789e-07,
"loss": 1.7227,
"step": 2564
},
{
"epoch": 2.54,
"learning_rate": 6.21213126799656e-07,
"loss": 1.7184,
"step": 2566
},
{
"epoch": 2.54,
"learning_rate": 6.197279264358572e-07,
"loss": 1.737,
"step": 2568
},
{
"epoch": 2.55,
"learning_rate": 6.182437060412749e-07,
"loss": 1.6819,
"step": 2570
},
{
"epoch": 2.55,
"learning_rate": 6.167604694407766e-07,
"loss": 1.7859,
"step": 2572
},
{
"epoch": 2.55,
"learning_rate": 6.152782204566951e-07,
"loss": 1.679,
"step": 2574
},
{
"epoch": 2.55,
"learning_rate": 6.137969629088174e-07,
"loss": 1.6912,
"step": 2576
},
{
"epoch": 2.55,
"learning_rate": 6.123167006143755e-07,
"loss": 1.688,
"step": 2578
},
{
"epoch": 2.56,
"learning_rate": 6.108374373880367e-07,
"loss": 1.6896,
"step": 2580
},
{
"epoch": 2.56,
"learning_rate": 6.093591770418938e-07,
"loss": 1.7549,
"step": 2582
},
{
"epoch": 2.56,
"learning_rate": 6.078819233854555e-07,
"loss": 1.7085,
"step": 2584
},
{
"epoch": 2.56,
"learning_rate": 6.064056802256358e-07,
"loss": 1.6606,
"step": 2586
},
{
"epoch": 2.56,
"learning_rate": 6.049304513667442e-07,
"loss": 1.6915,
"step": 2588
},
{
"epoch": 2.57,
"learning_rate": 6.034562406104773e-07,
"loss": 1.6713,
"step": 2590
},
{
"epoch": 2.57,
"learning_rate": 6.019830517559073e-07,
"loss": 1.6162,
"step": 2592
},
{
"epoch": 2.57,
"learning_rate": 6.005108885994732e-07,
"loss": 1.6848,
"step": 2594
},
{
"epoch": 2.57,
"learning_rate": 5.990397549349713e-07,
"loss": 1.7615,
"step": 2596
},
{
"epoch": 2.57,
"learning_rate": 5.975696545535436e-07,
"loss": 1.7274,
"step": 2598
},
{
"epoch": 2.58,
"learning_rate": 5.961005912436703e-07,
"loss": 1.6831,
"step": 2600
},
{
"epoch": 2.58,
"learning_rate": 5.946325687911588e-07,
"loss": 1.7243,
"step": 2602
},
{
"epoch": 2.58,
"learning_rate": 5.93165590979134e-07,
"loss": 1.6636,
"step": 2604
},
{
"epoch": 2.58,
"learning_rate": 5.916996615880295e-07,
"loss": 1.7544,
"step": 2606
},
{
"epoch": 2.58,
"learning_rate": 5.902347843955758e-07,
"loss": 1.7464,
"step": 2608
},
{
"epoch": 2.59,
"learning_rate": 5.887709631767927e-07,
"loss": 1.709,
"step": 2610
},
{
"epoch": 2.59,
"learning_rate": 5.87308201703979e-07,
"loss": 1.6589,
"step": 2612
},
{
"epoch": 2.59,
"learning_rate": 5.858465037467014e-07,
"loss": 1.7203,
"step": 2614
},
{
"epoch": 2.59,
"learning_rate": 5.843858730717877e-07,
"loss": 1.6294,
"step": 2616
},
{
"epoch": 2.59,
"learning_rate": 5.829263134433129e-07,
"loss": 1.7211,
"step": 2618
},
{
"epoch": 2.6,
"learning_rate": 5.814678286225938e-07,
"loss": 1.6945,
"step": 2620
},
{
"epoch": 2.6,
"learning_rate": 5.800104223681772e-07,
"loss": 1.6875,
"step": 2622
},
{
"epoch": 2.6,
"learning_rate": 5.785540984358293e-07,
"loss": 1.7123,
"step": 2624
},
{
"epoch": 2.6,
"learning_rate": 5.770988605785285e-07,
"loss": 1.6918,
"step": 2626
},
{
"epoch": 2.6,
"learning_rate": 5.756447125464532e-07,
"loss": 1.6703,
"step": 2628
},
{
"epoch": 2.61,
"learning_rate": 5.741916580869736e-07,
"loss": 1.6663,
"step": 2630
},
{
"epoch": 2.61,
"learning_rate": 5.727397009446422e-07,
"loss": 1.6892,
"step": 2632
},
{
"epoch": 2.61,
"learning_rate": 5.712888448611843e-07,
"loss": 1.7073,
"step": 2634
},
{
"epoch": 2.61,
"learning_rate": 5.698390935754853e-07,
"loss": 1.7169,
"step": 2636
},
{
"epoch": 2.61,
"learning_rate": 5.683904508235857e-07,
"loss": 1.71,
"step": 2638
},
{
"epoch": 2.62,
"learning_rate": 5.66942920338669e-07,
"loss": 1.7353,
"step": 2640
},
{
"epoch": 2.62,
"learning_rate": 5.654965058510516e-07,
"loss": 1.6847,
"step": 2642
},
{
"epoch": 2.62,
"learning_rate": 5.64051211088175e-07,
"loss": 1.7284,
"step": 2644
},
{
"epoch": 2.62,
"learning_rate": 5.626070397745937e-07,
"loss": 1.6899,
"step": 2646
},
{
"epoch": 2.62,
"learning_rate": 5.611639956319687e-07,
"loss": 1.7052,
"step": 2648
},
{
"epoch": 2.63,
"learning_rate": 5.597220823790545e-07,
"loss": 1.6736,
"step": 2650
},
{
"epoch": 2.63,
"learning_rate": 5.582813037316926e-07,
"loss": 1.6843,
"step": 2652
},
{
"epoch": 2.63,
"learning_rate": 5.568416634028013e-07,
"loss": 1.6836,
"step": 2654
},
{
"epoch": 2.63,
"learning_rate": 5.554031651023625e-07,
"loss": 1.643,
"step": 2656
},
{
"epoch": 2.63,
"learning_rate": 5.539658125374177e-07,
"loss": 1.7168,
"step": 2658
},
{
"epoch": 2.64,
"learning_rate": 5.525296094120555e-07,
"loss": 1.6669,
"step": 2660
},
{
"epoch": 2.64,
"learning_rate": 5.51094559427401e-07,
"loss": 1.7637,
"step": 2662
},
{
"epoch": 2.64,
"learning_rate": 5.496606662816093e-07,
"loss": 1.6912,
"step": 2664
},
{
"epoch": 2.64,
"learning_rate": 5.482279336698527e-07,
"loss": 1.7166,
"step": 2666
},
{
"epoch": 2.64,
"learning_rate": 5.467963652843146e-07,
"loss": 1.7496,
"step": 2668
},
{
"epoch": 2.65,
"learning_rate": 5.453659648141762e-07,
"loss": 1.6845,
"step": 2670
},
{
"epoch": 2.65,
"learning_rate": 5.439367359456106e-07,
"loss": 1.6562,
"step": 2672
},
{
"epoch": 2.65,
"learning_rate": 5.425086823617708e-07,
"loss": 1.6745,
"step": 2674
},
{
"epoch": 2.65,
"learning_rate": 5.410818077427807e-07,
"loss": 1.7184,
"step": 2676
},
{
"epoch": 2.65,
"learning_rate": 5.396561157657271e-07,
"loss": 1.6447,
"step": 2678
},
{
"epoch": 2.66,
"learning_rate": 5.382316101046487e-07,
"loss": 1.731,
"step": 2680
},
{
"epoch": 2.66,
"learning_rate": 5.368082944305265e-07,
"loss": 1.6807,
"step": 2682
},
{
"epoch": 2.66,
"learning_rate": 5.35386172411275e-07,
"loss": 1.7142,
"step": 2684
},
{
"epoch": 2.66,
"learning_rate": 5.339652477117335e-07,
"loss": 1.6614,
"step": 2686
},
{
"epoch": 2.66,
"learning_rate": 5.325455239936545e-07,
"loss": 1.6886,
"step": 2688
},
{
"epoch": 2.67,
"learning_rate": 5.311270049156966e-07,
"loss": 1.6559,
"step": 2690
},
{
"epoch": 2.67,
"learning_rate": 5.297096941334138e-07,
"loss": 1.7406,
"step": 2692
},
{
"epoch": 2.67,
"learning_rate": 5.28293595299246e-07,
"loss": 1.6698,
"step": 2694
},
{
"epoch": 2.67,
"learning_rate": 5.268787120625096e-07,
"loss": 1.6907,
"step": 2696
},
{
"epoch": 2.67,
"learning_rate": 5.254650480693888e-07,
"loss": 1.6777,
"step": 2698
},
{
"epoch": 2.68,
"learning_rate": 5.240526069629264e-07,
"loss": 1.6978,
"step": 2700
},
{
"epoch": 2.68,
"learning_rate": 5.226413923830128e-07,
"loss": 1.7113,
"step": 2702
},
{
"epoch": 2.68,
"learning_rate": 5.212314079663777e-07,
"loss": 1.64,
"step": 2704
},
{
"epoch": 2.68,
"learning_rate": 5.198226573465813e-07,
"loss": 1.6904,
"step": 2706
},
{
"epoch": 2.68,
"learning_rate": 5.184151441540035e-07,
"loss": 1.6339,
"step": 2708
},
{
"epoch": 2.69,
"learning_rate": 5.170088720158359e-07,
"loss": 1.6761,
"step": 2710
},
{
"epoch": 2.69,
"learning_rate": 5.156038445560724e-07,
"loss": 1.7021,
"step": 2712
},
{
"epoch": 2.69,
"learning_rate": 5.142000653954973e-07,
"loss": 1.7174,
"step": 2714
},
{
"epoch": 2.69,
"learning_rate": 5.1279753815168e-07,
"loss": 1.6789,
"step": 2716
},
{
"epoch": 2.69,
"learning_rate": 5.113962664389635e-07,
"loss": 1.6851,
"step": 2718
},
{
"epoch": 2.7,
"learning_rate": 5.099962538684541e-07,
"loss": 1.7208,
"step": 2720
},
{
"epoch": 2.7,
"learning_rate": 5.085975040480139e-07,
"loss": 1.6654,
"step": 2722
},
{
"epoch": 2.7,
"learning_rate": 5.072000205822514e-07,
"loss": 1.6596,
"step": 2724
},
{
"epoch": 2.7,
"learning_rate": 5.05803807072511e-07,
"loss": 1.6819,
"step": 2726
},
{
"epoch": 2.7,
"learning_rate": 5.044088671168644e-07,
"loss": 1.7198,
"step": 2728
},
{
"epoch": 2.7,
"learning_rate": 5.030152043101019e-07,
"loss": 1.6521,
"step": 2730
},
{
"epoch": 2.71,
"learning_rate": 5.016228222437222e-07,
"loss": 1.7227,
"step": 2732
},
{
"epoch": 2.71,
"learning_rate": 5.002317245059227e-07,
"loss": 1.7101,
"step": 2734
},
{
"epoch": 2.71,
"learning_rate": 4.988419146815926e-07,
"loss": 1.6724,
"step": 2736
},
{
"epoch": 2.71,
"learning_rate": 4.974533963523014e-07,
"loss": 1.7026,
"step": 2738
},
{
"epoch": 2.71,
"learning_rate": 4.9606617309629e-07,
"loss": 1.6954,
"step": 2740
},
{
"epoch": 2.72,
"learning_rate": 4.94680248488462e-07,
"loss": 1.6506,
"step": 2742
},
{
"epoch": 2.72,
"learning_rate": 4.932956261003749e-07,
"loss": 1.645,
"step": 2744
},
{
"epoch": 2.72,
"learning_rate": 4.919123095002304e-07,
"loss": 1.6815,
"step": 2746
},
{
"epoch": 2.72,
"learning_rate": 4.905303022528641e-07,
"loss": 1.6839,
"step": 2748
},
{
"epoch": 2.72,
"learning_rate": 4.891496079197387e-07,
"loss": 1.6525,
"step": 2750
},
{
"epoch": 2.73,
"learning_rate": 4.877702300589324e-07,
"loss": 1.7223,
"step": 2752
},
{
"epoch": 2.73,
"learning_rate": 4.863921722251312e-07,
"loss": 1.7158,
"step": 2754
},
{
"epoch": 2.73,
"learning_rate": 4.850154379696194e-07,
"loss": 1.7095,
"step": 2756
},
{
"epoch": 2.73,
"learning_rate": 4.836400308402711e-07,
"loss": 1.6946,
"step": 2758
},
{
"epoch": 2.73,
"learning_rate": 4.822659543815393e-07,
"loss": 1.6986,
"step": 2760
},
{
"epoch": 2.74,
"learning_rate": 4.808932121344478e-07,
"loss": 1.6826,
"step": 2762
},
{
"epoch": 2.74,
"learning_rate": 4.795218076365832e-07,
"loss": 1.6625,
"step": 2764
},
{
"epoch": 2.74,
"learning_rate": 4.781517444220835e-07,
"loss": 1.6878,
"step": 2766
},
{
"epoch": 2.74,
"learning_rate": 4.7678302602163135e-07,
"loss": 1.6795,
"step": 2768
},
{
"epoch": 2.74,
"learning_rate": 4.754156559624425e-07,
"loss": 1.7281,
"step": 2770
},
{
"epoch": 2.75,
"learning_rate": 4.7404963776825925e-07,
"loss": 1.7138,
"step": 2772
},
{
"epoch": 2.75,
"learning_rate": 4.72684974959339e-07,
"loss": 1.6189,
"step": 2774
},
{
"epoch": 2.75,
"learning_rate": 4.713216710524469e-07,
"loss": 1.7349,
"step": 2776
},
{
"epoch": 2.75,
"learning_rate": 4.6995972956084717e-07,
"loss": 1.7164,
"step": 2778
},
{
"epoch": 2.75,
"learning_rate": 4.685991539942903e-07,
"loss": 1.6753,
"step": 2780
},
{
"epoch": 2.76,
"learning_rate": 4.672399478590092e-07,
"loss": 1.6889,
"step": 2782
},
{
"epoch": 2.76,
"learning_rate": 4.6588211465770735e-07,
"loss": 1.7264,
"step": 2784
},
{
"epoch": 2.76,
"learning_rate": 4.645256578895491e-07,
"loss": 1.6632,
"step": 2786
},
{
"epoch": 2.76,
"learning_rate": 4.6317058105015306e-07,
"loss": 1.6736,
"step": 2788
},
{
"epoch": 2.76,
"learning_rate": 4.6181688763158024e-07,
"loss": 1.721,
"step": 2790
},
{
"epoch": 2.77,
"learning_rate": 4.604645811223282e-07,
"loss": 1.6435,
"step": 2792
},
{
"epoch": 2.77,
"learning_rate": 4.5911366500731873e-07,
"loss": 1.7203,
"step": 2794
},
{
"epoch": 2.77,
"learning_rate": 4.577641427678923e-07,
"loss": 1.6521,
"step": 2796
},
{
"epoch": 2.77,
"learning_rate": 4.564160178817958e-07,
"loss": 1.6348,
"step": 2798
},
{
"epoch": 2.77,
"learning_rate": 4.550692938231755e-07,
"loss": 1.643,
"step": 2800
},
{
"epoch": 2.78,
"learning_rate": 4.5372397406256824e-07,
"loss": 1.6576,
"step": 2802
},
{
"epoch": 2.78,
"learning_rate": 4.5238006206689204e-07,
"loss": 1.7095,
"step": 2804
},
{
"epoch": 2.78,
"learning_rate": 4.5103756129943593e-07,
"loss": 1.6866,
"step": 2806
},
{
"epoch": 2.78,
"learning_rate": 4.4969647521985363e-07,
"loss": 1.6594,
"step": 2808
},
{
"epoch": 2.78,
"learning_rate": 4.4835680728415217e-07,
"loss": 1.7039,
"step": 2810
},
{
"epoch": 2.79,
"learning_rate": 4.4701856094468383e-07,
"loss": 1.6914,
"step": 2812
},
{
"epoch": 2.79,
"learning_rate": 4.4568173965013835e-07,
"loss": 1.6774,
"step": 2814
},
{
"epoch": 2.79,
"learning_rate": 4.4434634684553286e-07,
"loss": 1.6971,
"step": 2816
},
{
"epoch": 2.79,
"learning_rate": 4.430123859722027e-07,
"loss": 1.724,
"step": 2818
},
{
"epoch": 2.79,
"learning_rate": 4.4167986046779315e-07,
"loss": 1.6961,
"step": 2820
},
{
"epoch": 2.8,
"learning_rate": 4.403487737662509e-07,
"loss": 1.7321,
"step": 2822
},
{
"epoch": 2.8,
"learning_rate": 4.3901912929781524e-07,
"loss": 1.6939,
"step": 2824
},
{
"epoch": 2.8,
"learning_rate": 4.376909304890077e-07,
"loss": 1.6564,
"step": 2826
},
{
"epoch": 2.8,
"learning_rate": 4.3636418076262473e-07,
"loss": 1.628,
"step": 2828
},
{
"epoch": 2.8,
"learning_rate": 4.35038883537729e-07,
"loss": 1.6867,
"step": 2830
},
{
"epoch": 2.81,
"learning_rate": 4.3371504222963906e-07,
"loss": 1.7011,
"step": 2832
},
{
"epoch": 2.81,
"learning_rate": 4.323926602499224e-07,
"loss": 1.6426,
"step": 2834
},
{
"epoch": 2.81,
"learning_rate": 4.31071741006386e-07,
"loss": 1.6692,
"step": 2836
},
{
"epoch": 2.81,
"learning_rate": 4.2975228790306627e-07,
"loss": 1.6933,
"step": 2838
},
{
"epoch": 2.81,
"learning_rate": 4.2843430434022166e-07,
"loss": 1.7154,
"step": 2840
},
{
"epoch": 2.82,
"learning_rate": 4.2711779371432445e-07,
"loss": 1.739,
"step": 2842
},
{
"epoch": 2.82,
"learning_rate": 4.258027594180499e-07,
"loss": 1.6868,
"step": 2844
},
{
"epoch": 2.82,
"learning_rate": 4.2448920484027006e-07,
"loss": 1.7613,
"step": 2846
},
{
"epoch": 2.82,
"learning_rate": 4.231771333660422e-07,
"loss": 1.6815,
"step": 2848
},
{
"epoch": 2.82,
"learning_rate": 4.2186654837660316e-07,
"loss": 1.6944,
"step": 2850
},
{
"epoch": 2.83,
"learning_rate": 4.2055745324935763e-07,
"loss": 1.6703,
"step": 2852
},
{
"epoch": 2.83,
"learning_rate": 4.1924985135787194e-07,
"loss": 1.6135,
"step": 2854
},
{
"epoch": 2.83,
"learning_rate": 4.1794374607186453e-07,
"loss": 1.6998,
"step": 2856
},
{
"epoch": 2.83,
"learning_rate": 4.166391407571952e-07,
"loss": 1.7389,
"step": 2858
},
{
"epoch": 2.83,
"learning_rate": 4.153360387758603e-07,
"loss": 1.7111,
"step": 2860
},
{
"epoch": 2.84,
"learning_rate": 4.140344434859816e-07,
"loss": 1.6901,
"step": 2862
},
{
"epoch": 2.84,
"learning_rate": 4.1273435824179757e-07,
"loss": 1.7234,
"step": 2864
},
{
"epoch": 2.84,
"learning_rate": 4.1143578639365507e-07,
"loss": 1.6779,
"step": 2866
},
{
"epoch": 2.84,
"learning_rate": 4.101387312880018e-07,
"loss": 1.7124,
"step": 2868
},
{
"epoch": 2.84,
"learning_rate": 4.0884319626737664e-07,
"loss": 1.6751,
"step": 2870
},
{
"epoch": 2.85,
"learning_rate": 4.0754918467040013e-07,
"loss": 1.6454,
"step": 2872
},
{
"epoch": 2.85,
"learning_rate": 4.062566998317685e-07,
"loss": 1.7012,
"step": 2874
},
{
"epoch": 2.85,
"learning_rate": 4.049657450822421e-07,
"loss": 1.684,
"step": 2876
},
{
"epoch": 2.85,
"learning_rate": 4.0367632374863856e-07,
"loss": 1.7018,
"step": 2878
},
{
"epoch": 2.85,
"learning_rate": 4.0238843915382435e-07,
"loss": 1.682,
"step": 2880
},
{
"epoch": 2.86,
"learning_rate": 4.01102094616706e-07,
"loss": 1.7358,
"step": 2882
},
{
"epoch": 2.86,
"learning_rate": 3.9981729345222005e-07,
"loss": 1.6558,
"step": 2884
},
{
"epoch": 2.86,
"learning_rate": 3.985340389713262e-07,
"loss": 1.7431,
"step": 2886
},
{
"epoch": 2.86,
"learning_rate": 3.972523344809993e-07,
"loss": 1.6865,
"step": 2888
},
{
"epoch": 2.86,
"learning_rate": 3.959721832842182e-07,
"loss": 1.7101,
"step": 2890
},
{
"epoch": 2.87,
"learning_rate": 3.946935886799603e-07,
"loss": 1.6728,
"step": 2892
},
{
"epoch": 2.87,
"learning_rate": 3.934165539631913e-07,
"loss": 1.6375,
"step": 2894
},
{
"epoch": 2.87,
"learning_rate": 3.9214108242485645e-07,
"loss": 1.7287,
"step": 2896
},
{
"epoch": 2.87,
"learning_rate": 3.908671773518727e-07,
"loss": 1.6645,
"step": 2898
},
{
"epoch": 2.87,
"learning_rate": 3.895948420271209e-07,
"loss": 1.7205,
"step": 2900
},
{
"epoch": 2.88,
"learning_rate": 3.883240797294365e-07,
"loss": 1.6477,
"step": 2902
},
{
"epoch": 2.88,
"learning_rate": 3.8705489373360055e-07,
"loss": 1.6981,
"step": 2904
},
{
"epoch": 2.88,
"learning_rate": 3.8578728731033214e-07,
"loss": 1.7069,
"step": 2906
},
{
"epoch": 2.88,
"learning_rate": 3.845212637262806e-07,
"loss": 1.7126,
"step": 2908
},
{
"epoch": 2.88,
"learning_rate": 3.8325682624401487e-07,
"loss": 1.661,
"step": 2910
},
{
"epoch": 2.89,
"learning_rate": 3.8199397812201803e-07,
"loss": 1.6845,
"step": 2912
},
{
"epoch": 2.89,
"learning_rate": 3.807327226146757e-07,
"loss": 1.6835,
"step": 2914
},
{
"epoch": 2.89,
"learning_rate": 3.79473062972271e-07,
"loss": 1.7238,
"step": 2916
},
{
"epoch": 2.89,
"learning_rate": 3.782150024409727e-07,
"loss": 1.6807,
"step": 2918
},
{
"epoch": 2.89,
"learning_rate": 3.769585442628304e-07,
"loss": 1.7436,
"step": 2920
},
{
"epoch": 2.9,
"learning_rate": 3.757036916757631e-07,
"loss": 1.6375,
"step": 2922
},
{
"epoch": 2.9,
"learning_rate": 3.744504479135522e-07,
"loss": 1.7159,
"step": 2924
},
{
"epoch": 2.9,
"learning_rate": 3.7319881620583396e-07,
"loss": 1.6949,
"step": 2926
},
{
"epoch": 2.9,
"learning_rate": 3.719487997780902e-07,
"loss": 1.6679,
"step": 2928
},
{
"epoch": 2.9,
"learning_rate": 3.707004018516392e-07,
"loss": 1.6609,
"step": 2930
},
{
"epoch": 2.91,
"learning_rate": 3.6945362564362946e-07,
"loss": 1.7207,
"step": 2932
},
{
"epoch": 2.91,
"learning_rate": 3.682084743670296e-07,
"loss": 1.7001,
"step": 2934
},
{
"epoch": 2.91,
"learning_rate": 3.6696495123062056e-07,
"loss": 1.69,
"step": 2936
},
{
"epoch": 2.91,
"learning_rate": 3.6572305943898817e-07,
"loss": 1.6587,
"step": 2938
},
{
"epoch": 2.91,
"learning_rate": 3.644828021925144e-07,
"loss": 1.7012,
"step": 2940
},
{
"epoch": 2.92,
"learning_rate": 3.6324418268736813e-07,
"loss": 1.6881,
"step": 2942
},
{
"epoch": 2.92,
"learning_rate": 3.620072041154978e-07,
"loss": 1.6681,
"step": 2944
},
{
"epoch": 2.92,
"learning_rate": 3.607718696646238e-07,
"loss": 1.6934,
"step": 2946
},
{
"epoch": 2.92,
"learning_rate": 3.5953818251822976e-07,
"loss": 1.6946,
"step": 2948
},
{
"epoch": 2.92,
"learning_rate": 3.5830614585555296e-07,
"loss": 1.6701,
"step": 2950
},
{
"epoch": 2.92,
"learning_rate": 3.570757628515786e-07,
"loss": 1.625,
"step": 2952
},
{
"epoch": 2.93,
"learning_rate": 3.558470366770295e-07,
"loss": 1.6427,
"step": 2954
},
{
"epoch": 2.93,
"learning_rate": 3.546199704983591e-07,
"loss": 1.7193,
"step": 2956
},
{
"epoch": 2.93,
"learning_rate": 3.53394567477743e-07,
"loss": 1.6401,
"step": 2958
},
{
"epoch": 2.93,
"learning_rate": 3.521708307730713e-07,
"loss": 1.7526,
"step": 2960
},
{
"epoch": 2.93,
"learning_rate": 3.509487635379392e-07,
"loss": 1.6339,
"step": 2962
},
{
"epoch": 2.94,
"learning_rate": 3.497283689216397e-07,
"loss": 1.6991,
"step": 2964
},
{
"epoch": 2.94,
"learning_rate": 3.485096500691561e-07,
"loss": 1.6556,
"step": 2966
},
{
"epoch": 2.94,
"learning_rate": 3.472926101211522e-07,
"loss": 1.7234,
"step": 2968
},
{
"epoch": 2.94,
"learning_rate": 3.4607725221396653e-07,
"loss": 1.6865,
"step": 2970
},
{
"epoch": 2.94,
"learning_rate": 3.4486357947960164e-07,
"loss": 1.7662,
"step": 2972
},
{
"epoch": 2.95,
"learning_rate": 3.436515950457186e-07,
"loss": 1.7085,
"step": 2974
},
{
"epoch": 2.95,
"learning_rate": 3.4244130203562626e-07,
"loss": 1.6606,
"step": 2976
},
{
"epoch": 2.95,
"learning_rate": 3.4123270356827594e-07,
"loss": 1.6801,
"step": 2978
},
{
"epoch": 2.95,
"learning_rate": 3.400258027582522e-07,
"loss": 1.6536,
"step": 2980
},
{
"epoch": 2.95,
"learning_rate": 3.388206027157626e-07,
"loss": 1.6931,
"step": 2982
},
{
"epoch": 2.96,
"learning_rate": 3.376171065466341e-07,
"loss": 1.6881,
"step": 2984
},
{
"epoch": 2.96,
"learning_rate": 3.3641531735230233e-07,
"loss": 1.7132,
"step": 2986
},
{
"epoch": 2.96,
"learning_rate": 3.352152382298027e-07,
"loss": 1.6624,
"step": 2988
},
{
"epoch": 2.96,
"learning_rate": 3.3401687227176534e-07,
"loss": 1.6712,
"step": 2990
},
{
"epoch": 2.96,
"learning_rate": 3.328202225664042e-07,
"loss": 1.6864,
"step": 2992
},
{
"epoch": 2.97,
"learning_rate": 3.3162529219751155e-07,
"loss": 1.6854,
"step": 2994
},
{
"epoch": 2.97,
"learning_rate": 3.304320842444479e-07,
"loss": 1.7078,
"step": 2996
},
{
"epoch": 2.97,
"learning_rate": 3.29240601782136e-07,
"loss": 1.7342,
"step": 2998
},
{
"epoch": 2.97,
"learning_rate": 3.280508478810512e-07,
"loss": 1.7156,
"step": 3000
},
{
"epoch": 2.97,
"learning_rate": 3.2686282560721424e-07,
"loss": 1.6919,
"step": 3002
},
{
"epoch": 2.98,
"learning_rate": 3.256765380221841e-07,
"loss": 1.6578,
"step": 3004
},
{
"epoch": 2.98,
"learning_rate": 3.2449198818304956e-07,
"loss": 1.7127,
"step": 3006
},
{
"epoch": 2.98,
"learning_rate": 3.2330917914242016e-07,
"loss": 1.7205,
"step": 3008
},
{
"epoch": 2.98,
"learning_rate": 3.221281139484199e-07,
"loss": 1.7033,
"step": 3010
},
{
"epoch": 2.98,
"learning_rate": 3.2094879564467926e-07,
"loss": 1.7026,
"step": 3012
},
{
"epoch": 2.99,
"learning_rate": 3.1977122727032613e-07,
"loss": 1.6754,
"step": 3014
},
{
"epoch": 2.99,
"learning_rate": 3.185954118599795e-07,
"loss": 1.6521,
"step": 3016
},
{
"epoch": 2.99,
"learning_rate": 3.17421352443741e-07,
"loss": 1.6882,
"step": 3018
},
{
"epoch": 2.99,
"learning_rate": 3.1624905204718645e-07,
"loss": 1.7017,
"step": 3020
},
{
"epoch": 2.99,
"learning_rate": 3.150785136913584e-07,
"loss": 1.7032,
"step": 3022
},
{
"epoch": 3.0,
"learning_rate": 3.1390974039275943e-07,
"loss": 1.6883,
"step": 3024
},
{
"epoch": 3.0,
"learning_rate": 3.127427351633437e-07,
"loss": 1.7084,
"step": 3026
},
{
"epoch": 3.0,
"learning_rate": 3.1157750101050795e-07,
"loss": 1.6742,
"step": 3028
},
{
"epoch": 3.0,
"learning_rate": 3.104140409370851e-07,
"loss": 1.623,
"step": 3030
},
{
"epoch": 3.0,
"learning_rate": 3.0925235794133717e-07,
"loss": 1.6781,
"step": 3032
},
{
"epoch": 3.01,
"learning_rate": 3.080924550169453e-07,
"loss": 1.6811,
"step": 3034
},
{
"epoch": 3.01,
"learning_rate": 3.069343351530043e-07,
"loss": 1.6752,
"step": 3036
},
{
"epoch": 3.01,
"learning_rate": 3.0577800133401387e-07,
"loss": 1.7036,
"step": 3038
},
{
"epoch": 3.01,
"learning_rate": 3.0462345653987066e-07,
"loss": 1.6701,
"step": 3040
},
{
"epoch": 3.01,
"learning_rate": 3.034707037458608e-07,
"loss": 1.692,
"step": 3042
},
{
"epoch": 3.02,
"learning_rate": 3.023197459226534e-07,
"loss": 1.6856,
"step": 3044
},
{
"epoch": 3.02,
"learning_rate": 3.0117058603629054e-07,
"loss": 1.6747,
"step": 3046
},
{
"epoch": 3.02,
"learning_rate": 3.0002322704818237e-07,
"loss": 1.7099,
"step": 3048
},
{
"epoch": 3.02,
"learning_rate": 2.988776719150967e-07,
"loss": 1.6694,
"step": 3050
},
{
"epoch": 3.02,
"learning_rate": 2.977339235891541e-07,
"loss": 1.6709,
"step": 3052
},
{
"epoch": 3.03,
"learning_rate": 2.965919850178177e-07,
"loss": 1.6794,
"step": 3054
},
{
"epoch": 3.03,
"learning_rate": 2.9545185914388817e-07,
"loss": 1.6777,
"step": 3056
},
{
"epoch": 3.03,
"learning_rate": 2.943135489054936e-07,
"loss": 1.641,
"step": 3058
},
{
"epoch": 3.03,
"learning_rate": 2.9317705723608365e-07,
"loss": 1.6374,
"step": 3060
},
{
"epoch": 3.03,
"learning_rate": 2.920423870644216e-07,
"loss": 1.6835,
"step": 3062
},
{
"epoch": 3.04,
"learning_rate": 2.90909541314577e-07,
"loss": 1.6734,
"step": 3064
},
{
"epoch": 3.04,
"learning_rate": 2.8977852290591707e-07,
"loss": 1.6777,
"step": 3066
},
{
"epoch": 3.04,
"learning_rate": 2.8864933475310006e-07,
"loss": 1.7276,
"step": 3068
},
{
"epoch": 3.04,
"learning_rate": 2.875219797660681e-07,
"loss": 1.7273,
"step": 3070
},
{
"epoch": 3.04,
"learning_rate": 2.863964608500392e-07,
"loss": 1.6844,
"step": 3072
},
{
"epoch": 3.05,
"learning_rate": 2.85272780905499e-07,
"loss": 1.7278,
"step": 3074
},
{
"epoch": 3.05,
"learning_rate": 2.841509428281953e-07,
"loss": 1.6933,
"step": 3076
},
{
"epoch": 3.05,
"learning_rate": 2.830309495091282e-07,
"loss": 1.6473,
"step": 3078
},
{
"epoch": 3.05,
"learning_rate": 2.8191280383454394e-07,
"loss": 1.7405,
"step": 3080
},
{
"epoch": 3.05,
"learning_rate": 2.807965086859283e-07,
"loss": 1.6804,
"step": 3082
},
{
"epoch": 3.06,
"learning_rate": 2.796820669399976e-07,
"loss": 1.6326,
"step": 3084
},
{
"epoch": 3.06,
"learning_rate": 2.7856948146869177e-07,
"loss": 1.754,
"step": 3086
},
{
"epoch": 3.06,
"learning_rate": 2.774587551391667e-07,
"loss": 1.6694,
"step": 3088
},
{
"epoch": 3.06,
"learning_rate": 2.763498908137887e-07,
"loss": 1.72,
"step": 3090
},
{
"epoch": 3.06,
"learning_rate": 2.752428913501239e-07,
"loss": 1.6509,
"step": 3092
},
{
"epoch": 3.07,
"learning_rate": 2.741377596009338e-07,
"loss": 1.6915,
"step": 3094
},
{
"epoch": 3.07,
"learning_rate": 2.7303449841416657e-07,
"loss": 1.7006,
"step": 3096
},
{
"epoch": 3.07,
"learning_rate": 2.7193311063294964e-07,
"loss": 1.6722,
"step": 3098
},
{
"epoch": 3.07,
"learning_rate": 2.7083359909558235e-07,
"loss": 1.6992,
"step": 3100
},
{
"epoch": 3.07,
"learning_rate": 2.6973596663552957e-07,
"loss": 1.6196,
"step": 3102
},
{
"epoch": 3.08,
"learning_rate": 2.686402160814141e-07,
"loss": 1.6996,
"step": 3104
},
{
"epoch": 3.08,
"learning_rate": 2.6754635025700724e-07,
"loss": 1.7231,
"step": 3106
},
{
"epoch": 3.08,
"learning_rate": 2.66454371981225e-07,
"loss": 1.6904,
"step": 3108
},
{
"epoch": 3.08,
"learning_rate": 2.6536428406811904e-07,
"loss": 1.6754,
"step": 3110
},
{
"epoch": 3.08,
"learning_rate": 2.642760893268684e-07,
"loss": 1.6741,
"step": 3112
},
{
"epoch": 3.09,
"learning_rate": 2.631897905617746e-07,
"loss": 1.6569,
"step": 3114
},
{
"epoch": 3.09,
"learning_rate": 2.6210539057225214e-07,
"loss": 1.6799,
"step": 3116
},
{
"epoch": 3.09,
"learning_rate": 2.610228921528234e-07,
"loss": 1.6369,
"step": 3118
},
{
"epoch": 3.09,
"learning_rate": 2.599422980931093e-07,
"loss": 1.6839,
"step": 3120
},
{
"epoch": 3.09,
"learning_rate": 2.588636111778242e-07,
"loss": 1.6174,
"step": 3122
},
{
"epoch": 3.1,
"learning_rate": 2.5778683418676683e-07,
"loss": 1.6918,
"step": 3124
},
{
"epoch": 3.1,
"learning_rate": 2.5671196989481436e-07,
"loss": 1.7307,
"step": 3126
},
{
"epoch": 3.1,
"learning_rate": 2.556390210719149e-07,
"loss": 1.7038,
"step": 3128
},
{
"epoch": 3.1,
"learning_rate": 2.54567990483081e-07,
"loss": 1.644,
"step": 3130
},
{
"epoch": 3.1,
"learning_rate": 2.5349888088838033e-07,
"loss": 1.6354,
"step": 3132
},
{
"epoch": 3.11,
"learning_rate": 2.524316950429318e-07,
"loss": 1.6278,
"step": 3134
},
{
"epoch": 3.11,
"learning_rate": 2.5136643569689587e-07,
"loss": 1.7275,
"step": 3136
},
{
"epoch": 3.11,
"learning_rate": 2.5030310559546797e-07,
"loss": 1.6689,
"step": 3138
},
{
"epoch": 3.11,
"learning_rate": 2.4924170747887275e-07,
"loss": 1.726,
"step": 3140
},
{
"epoch": 3.11,
"learning_rate": 2.48182244082356e-07,
"loss": 1.7026,
"step": 3142
},
{
"epoch": 3.12,
"learning_rate": 2.471247181361772e-07,
"loss": 1.7054,
"step": 3144
},
{
"epoch": 3.12,
"learning_rate": 2.4606913236560277e-07,
"loss": 1.6816,
"step": 3146
},
{
"epoch": 3.12,
"learning_rate": 2.450154894909001e-07,
"loss": 1.6525,
"step": 3148
},
{
"epoch": 3.12,
"learning_rate": 2.439637922273294e-07,
"loss": 1.7477,
"step": 3150
},
{
"epoch": 3.12,
"learning_rate": 2.4291404328513686e-07,
"loss": 1.6488,
"step": 3152
},
{
"epoch": 3.13,
"learning_rate": 2.4186624536954736e-07,
"loss": 1.7179,
"step": 3154
},
{
"epoch": 3.13,
"learning_rate": 2.408204011807589e-07,
"loss": 1.651,
"step": 3156
},
{
"epoch": 3.13,
"learning_rate": 2.3977651341393367e-07,
"loss": 1.6154,
"step": 3158
},
{
"epoch": 3.13,
"learning_rate": 2.3873458475919296e-07,
"loss": 1.6606,
"step": 3160
},
{
"epoch": 3.13,
"learning_rate": 2.376946179016094e-07,
"loss": 1.6724,
"step": 3162
},
{
"epoch": 3.14,
"learning_rate": 2.366566155211992e-07,
"loss": 1.7043,
"step": 3164
},
{
"epoch": 3.14,
"learning_rate": 2.3562058029291654e-07,
"loss": 1.6914,
"step": 3166
},
{
"epoch": 3.14,
"learning_rate": 2.345865148866466e-07,
"loss": 1.6487,
"step": 3168
},
{
"epoch": 3.14,
"learning_rate": 2.3355442196719732e-07,
"loss": 1.6732,
"step": 3170
},
{
"epoch": 3.14,
"learning_rate": 2.3252430419429492e-07,
"loss": 1.7267,
"step": 3172
},
{
"epoch": 3.14,
"learning_rate": 2.314961642225739e-07,
"loss": 1.6803,
"step": 3174
},
{
"epoch": 3.15,
"learning_rate": 2.3047000470157363e-07,
"loss": 1.7008,
"step": 3176
},
{
"epoch": 3.15,
"learning_rate": 2.2944582827572835e-07,
"loss": 1.6797,
"step": 3178
},
{
"epoch": 3.15,
"learning_rate": 2.2842363758436267e-07,
"loss": 1.7207,
"step": 3180
},
{
"epoch": 3.15,
"learning_rate": 2.2740343526168448e-07,
"loss": 1.7016,
"step": 3182
},
{
"epoch": 3.15,
"learning_rate": 2.2638522393677562e-07,
"loss": 1.6971,
"step": 3184
},
{
"epoch": 3.16,
"learning_rate": 2.253690062335888e-07,
"loss": 1.6489,
"step": 3186
},
{
"epoch": 3.16,
"learning_rate": 2.2435478477093917e-07,
"loss": 1.6823,
"step": 3188
},
{
"epoch": 3.16,
"learning_rate": 2.2334256216249614e-07,
"loss": 1.6691,
"step": 3190
},
{
"epoch": 3.16,
"learning_rate": 2.2233234101677967e-07,
"loss": 1.6913,
"step": 3192
},
{
"epoch": 3.16,
"learning_rate": 2.213241239371505e-07,
"loss": 1.5959,
"step": 3194
},
{
"epoch": 3.17,
"learning_rate": 2.2031791352180607e-07,
"loss": 1.6952,
"step": 3196
},
{
"epoch": 3.17,
"learning_rate": 2.1931371236377128e-07,
"loss": 1.6515,
"step": 3198
},
{
"epoch": 3.17,
"learning_rate": 2.1831152305089463e-07,
"loss": 1.6253,
"step": 3200
},
{
"epoch": 3.17,
"learning_rate": 2.1731134816583886e-07,
"loss": 1.6846,
"step": 3202
},
{
"epoch": 3.17,
"learning_rate": 2.1631319028607565e-07,
"loss": 1.6864,
"step": 3204
},
{
"epoch": 3.18,
"learning_rate": 2.153170519838795e-07,
"loss": 1.6426,
"step": 3206
},
{
"epoch": 3.18,
"learning_rate": 2.1432293582632012e-07,
"loss": 1.6669,
"step": 3208
},
{
"epoch": 3.18,
"learning_rate": 2.1333084437525562e-07,
"loss": 1.7057,
"step": 3210
},
{
"epoch": 3.18,
"learning_rate": 2.1234078018732671e-07,
"loss": 1.7278,
"step": 3212
},
{
"epoch": 3.18,
"learning_rate": 2.1135274581395025e-07,
"loss": 1.6809,
"step": 3214
},
{
"epoch": 3.19,
"learning_rate": 2.1036674380131137e-07,
"loss": 1.6837,
"step": 3216
},
{
"epoch": 3.19,
"learning_rate": 2.0938277669035832e-07,
"loss": 1.6744,
"step": 3218
},
{
"epoch": 3.19,
"learning_rate": 2.0840084701679573e-07,
"loss": 1.6669,
"step": 3220
},
{
"epoch": 3.19,
"learning_rate": 2.074209573110769e-07,
"loss": 1.6984,
"step": 3222
},
{
"epoch": 3.19,
"learning_rate": 2.0644311009839798e-07,
"loss": 1.6924,
"step": 3224
},
{
"epoch": 3.2,
"learning_rate": 2.054673078986926e-07,
"loss": 1.7082,
"step": 3226
},
{
"epoch": 3.2,
"learning_rate": 2.0449355322662387e-07,
"loss": 1.735,
"step": 3228
},
{
"epoch": 3.2,
"learning_rate": 2.0352184859157794e-07,
"loss": 1.6493,
"step": 3230
},
{
"epoch": 3.2,
"learning_rate": 2.0255219649765832e-07,
"loss": 1.6953,
"step": 3232
},
{
"epoch": 3.2,
"learning_rate": 2.0158459944367924e-07,
"loss": 1.7332,
"step": 3234
},
{
"epoch": 3.21,
"learning_rate": 2.0061905992315853e-07,
"loss": 1.6583,
"step": 3236
},
{
"epoch": 3.21,
"learning_rate": 1.9965558042431264e-07,
"loss": 1.6999,
"step": 3238
},
{
"epoch": 3.21,
"learning_rate": 1.9869416343004808e-07,
"loss": 1.6864,
"step": 3240
},
{
"epoch": 3.21,
"learning_rate": 1.9773481141795755e-07,
"loss": 1.6661,
"step": 3242
},
{
"epoch": 3.21,
"learning_rate": 1.967775268603109e-07,
"loss": 1.6839,
"step": 3244
},
{
"epoch": 3.22,
"learning_rate": 1.9582231222405132e-07,
"loss": 1.6569,
"step": 3246
},
{
"epoch": 3.22,
"learning_rate": 1.9486916997078706e-07,
"loss": 1.6366,
"step": 3248
},
{
"epoch": 3.22,
"learning_rate": 1.9391810255678565e-07,
"loss": 1.6826,
"step": 3250
},
{
"epoch": 3.22,
"learning_rate": 1.9296911243296822e-07,
"loss": 1.6749,
"step": 3252
},
{
"epoch": 3.22,
"learning_rate": 1.9202220204490293e-07,
"loss": 1.7141,
"step": 3254
},
{
"epoch": 3.23,
"learning_rate": 1.9107737383279733e-07,
"loss": 1.6661,
"step": 3256
},
{
"epoch": 3.23,
"learning_rate": 1.9013463023149435e-07,
"loss": 1.6622,
"step": 3258
},
{
"epoch": 3.23,
"learning_rate": 1.8919397367046409e-07,
"loss": 1.6893,
"step": 3260
},
{
"epoch": 3.23,
"learning_rate": 1.8825540657379812e-07,
"loss": 1.6911,
"step": 3262
},
{
"epoch": 3.23,
"learning_rate": 1.8731893136020415e-07,
"loss": 1.6718,
"step": 3264
},
{
"epoch": 3.24,
"learning_rate": 1.8638455044299894e-07,
"loss": 1.7332,
"step": 3266
},
{
"epoch": 3.24,
"learning_rate": 1.854522662301018e-07,
"loss": 1.7031,
"step": 3268
},
{
"epoch": 3.24,
"learning_rate": 1.8452208112402857e-07,
"loss": 1.647,
"step": 3270
},
{
"epoch": 3.24,
"learning_rate": 1.835939975218863e-07,
"loss": 1.6536,
"step": 3272
},
{
"epoch": 3.24,
"learning_rate": 1.8266801781536655e-07,
"loss": 1.6738,
"step": 3274
},
{
"epoch": 3.25,
"learning_rate": 1.8174414439073816e-07,
"loss": 1.6868,
"step": 3276
},
{
"epoch": 3.25,
"learning_rate": 1.8082237962884306e-07,
"loss": 1.6532,
"step": 3278
},
{
"epoch": 3.25,
"learning_rate": 1.7990272590508827e-07,
"loss": 1.6979,
"step": 3280
},
{
"epoch": 3.25,
"learning_rate": 1.7898518558944088e-07,
"loss": 1.6538,
"step": 3282
},
{
"epoch": 3.25,
"learning_rate": 1.7806976104642202e-07,
"loss": 1.7394,
"step": 3284
},
{
"epoch": 3.26,
"learning_rate": 1.7715645463510041e-07,
"loss": 1.6426,
"step": 3286
},
{
"epoch": 3.26,
"learning_rate": 1.7624526870908584e-07,
"loss": 1.7107,
"step": 3288
},
{
"epoch": 3.26,
"learning_rate": 1.7533620561652363e-07,
"loss": 1.6849,
"step": 3290
},
{
"epoch": 3.26,
"learning_rate": 1.7442926770008903e-07,
"loss": 1.66,
"step": 3292
},
{
"epoch": 3.26,
"learning_rate": 1.7352445729697994e-07,
"loss": 1.6332,
"step": 3294
},
{
"epoch": 3.27,
"learning_rate": 1.7262177673891253e-07,
"loss": 1.7031,
"step": 3296
},
{
"epoch": 3.27,
"learning_rate": 1.7172122835211333e-07,
"loss": 1.6407,
"step": 3298
},
{
"epoch": 3.27,
"learning_rate": 1.7082281445731495e-07,
"loss": 1.705,
"step": 3300
},
{
"epoch": 3.27,
"learning_rate": 1.6992653736974883e-07,
"loss": 1.6543,
"step": 3302
},
{
"epoch": 3.27,
"learning_rate": 1.6903239939914016e-07,
"loss": 1.668,
"step": 3304
},
{
"epoch": 3.28,
"learning_rate": 1.681404028497021e-07,
"loss": 1.6595,
"step": 3306
},
{
"epoch": 3.28,
"learning_rate": 1.6725055002012768e-07,
"loss": 1.6746,
"step": 3308
},
{
"epoch": 3.28,
"learning_rate": 1.6636284320358707e-07,
"loss": 1.7035,
"step": 3310
},
{
"epoch": 3.28,
"learning_rate": 1.6547728468771983e-07,
"loss": 1.729,
"step": 3312
},
{
"epoch": 3.28,
"learning_rate": 1.6459387675462867e-07,
"loss": 1.6552,
"step": 3314
},
{
"epoch": 3.29,
"learning_rate": 1.6371262168087497e-07,
"loss": 1.6789,
"step": 3316
},
{
"epoch": 3.29,
"learning_rate": 1.6283352173747146e-07,
"loss": 1.6701,
"step": 3318
},
{
"epoch": 3.29,
"learning_rate": 1.6195657918987758e-07,
"loss": 1.5709,
"step": 3320
},
{
"epoch": 3.29,
"learning_rate": 1.610817962979928e-07,
"loss": 1.6626,
"step": 3322
},
{
"epoch": 3.29,
"learning_rate": 1.6064521542763833e-07,
"loss": 1.6294,
"step": 3324
},
{
"epoch": 3.3,
"learning_rate": 1.5977367624445305e-07,
"loss": 1.6654,
"step": 3326
},
{
"epoch": 3.3,
"learning_rate": 1.589043023423643e-07,
"loss": 1.6842,
"step": 3328
},
{
"epoch": 3.3,
"learning_rate": 1.5847042807207256e-07,
"loss": 1.7304,
"step": 3330
},
{
"epoch": 3.3,
"learning_rate": 1.576043062906236e-07,
"loss": 1.6603,
"step": 3332
},
{
"epoch": 3.3,
"learning_rate": 1.567403553807838e-07,
"loss": 1.6465,
"step": 3334
},
{
"epoch": 3.31,
"learning_rate": 1.5587857756897283e-07,
"loss": 1.6381,
"step": 3336
},
{
"epoch": 3.31,
"learning_rate": 1.5501897507601015e-07,
"loss": 1.7105,
"step": 3338
},
{
"epoch": 3.31,
"learning_rate": 1.5416155011711018e-07,
"loss": 1.7057,
"step": 3340
},
{
"epoch": 3.31,
"learning_rate": 1.5330630490187468e-07,
"loss": 1.6832,
"step": 3342
},
{
"epoch": 3.31,
"learning_rate": 1.5245324163428908e-07,
"loss": 1.6965,
"step": 3344
},
{
"epoch": 3.32,
"learning_rate": 1.5160236251271574e-07,
"loss": 1.683,
"step": 3346
},
{
"epoch": 3.32,
"learning_rate": 1.5075366972988812e-07,
"loss": 1.677,
"step": 3348
},
{
"epoch": 3.32,
"learning_rate": 1.4990716547290517e-07,
"loss": 1.6961,
"step": 3350
},
{
"epoch": 3.32,
"learning_rate": 1.4906285192322666e-07,
"loss": 1.6437,
"step": 3352
},
{
"epoch": 3.32,
"learning_rate": 1.4822073125666678e-07,
"loss": 1.6676,
"step": 3354
},
{
"epoch": 3.33,
"learning_rate": 1.4738080564338817e-07,
"loss": 1.7125,
"step": 3356
},
{
"epoch": 3.33,
"learning_rate": 1.4654307724789682e-07,
"loss": 1.6495,
"step": 3358
},
{
"epoch": 3.33,
"learning_rate": 1.4570754822903686e-07,
"loss": 1.7064,
"step": 3360
},
{
"epoch": 3.33,
"learning_rate": 1.448742207399841e-07,
"loss": 1.6487,
"step": 3362
},
{
"epoch": 3.33,
"learning_rate": 1.4404309692824135e-07,
"loss": 1.6539,
"step": 3364
},
{
"epoch": 3.34,
"learning_rate": 1.4321417893563258e-07,
"loss": 1.6744,
"step": 3366
},
{
"epoch": 3.34,
"learning_rate": 1.4238746889829678e-07,
"loss": 1.644,
"step": 3368
},
{
"epoch": 3.34,
"learning_rate": 1.4156296894668317e-07,
"loss": 1.718,
"step": 3370
},
{
"epoch": 3.34,
"learning_rate": 1.4074068120554595e-07,
"loss": 1.6806,
"step": 3372
},
{
"epoch": 3.34,
"learning_rate": 1.3992060779393778e-07,
"loss": 1.7006,
"step": 3374
},
{
"epoch": 3.35,
"learning_rate": 1.3910275082520572e-07,
"loss": 1.6705,
"step": 3376
},
{
"epoch": 3.35,
"learning_rate": 1.38287112406984e-07,
"loss": 1.6357,
"step": 3378
},
{
"epoch": 3.35,
"learning_rate": 1.3747369464119074e-07,
"loss": 1.6412,
"step": 3380
},
{
"epoch": 3.35,
"learning_rate": 1.366624996240203e-07,
"loss": 1.7191,
"step": 3382
},
{
"epoch": 3.35,
"learning_rate": 1.358535294459401e-07,
"loss": 1.7351,
"step": 3384
},
{
"epoch": 3.35,
"learning_rate": 1.3504678619168297e-07,
"loss": 1.6871,
"step": 3386
},
{
"epoch": 3.36,
"learning_rate": 1.3424227194024328e-07,
"loss": 1.6774,
"step": 3388
},
{
"epoch": 3.36,
"learning_rate": 1.3343998876487172e-07,
"loss": 1.6983,
"step": 3390
},
{
"epoch": 3.36,
"learning_rate": 1.3263993873306923e-07,
"loss": 1.6706,
"step": 3392
},
{
"epoch": 3.36,
"learning_rate": 1.3184212390658155e-07,
"loss": 1.7114,
"step": 3394
},
{
"epoch": 3.36,
"learning_rate": 1.310465463413941e-07,
"loss": 1.6966,
"step": 3396
},
{
"epoch": 3.37,
"learning_rate": 1.3025320808772766e-07,
"loss": 1.697,
"step": 3398
},
{
"epoch": 3.37,
"learning_rate": 1.2946211119003169e-07,
"loss": 1.6294,
"step": 3400
},
{
"epoch": 3.37,
"learning_rate": 1.2867325768697946e-07,
"loss": 1.7066,
"step": 3402
},
{
"epoch": 3.37,
"learning_rate": 1.2788664961146367e-07,
"loss": 1.6652,
"step": 3404
},
{
"epoch": 3.37,
"learning_rate": 1.2710228899058971e-07,
"loss": 1.6734,
"step": 3406
},
{
"epoch": 3.38,
"learning_rate": 1.2632017784567138e-07,
"loss": 1.724,
"step": 3408
},
{
"epoch": 3.38,
"learning_rate": 1.25540318192226e-07,
"loss": 1.6693,
"step": 3410
},
{
"epoch": 3.38,
"learning_rate": 1.2476271203996868e-07,
"loss": 1.676,
"step": 3412
},
{
"epoch": 3.38,
"learning_rate": 1.2398736139280687e-07,
"loss": 1.719,
"step": 3414
},
{
"epoch": 3.38,
"learning_rate": 1.2321426824883517e-07,
"loss": 1.7009,
"step": 3416
},
{
"epoch": 3.39,
"learning_rate": 1.224434346003318e-07,
"loss": 1.6509,
"step": 3418
},
{
"epoch": 3.39,
"learning_rate": 1.2167486243375092e-07,
"loss": 1.6845,
"step": 3420
},
{
"epoch": 3.39,
"learning_rate": 1.2090855372971942e-07,
"loss": 1.6586,
"step": 3422
},
{
"epoch": 3.39,
"learning_rate": 1.2014451046303142e-07,
"loss": 1.6796,
"step": 3424
},
{
"epoch": 3.39,
"learning_rate": 1.193827346026426e-07,
"loss": 1.6421,
"step": 3426
},
{
"epoch": 3.4,
"learning_rate": 1.1862322811166514e-07,
"loss": 1.6503,
"step": 3428
},
{
"epoch": 3.4,
"learning_rate": 1.1786599294736366e-07,
"loss": 1.716,
"step": 3430
},
{
"epoch": 3.4,
"learning_rate": 1.1711103106114961e-07,
"loss": 1.696,
"step": 3432
},
{
"epoch": 3.4,
"learning_rate": 1.1635834439857528e-07,
"loss": 1.6932,
"step": 3434
},
{
"epoch": 3.4,
"learning_rate": 1.156079348993303e-07,
"loss": 1.6951,
"step": 3436
},
{
"epoch": 3.41,
"learning_rate": 1.148598044972363e-07,
"loss": 1.6551,
"step": 3438
},
{
"epoch": 3.41,
"learning_rate": 1.1411395512024069e-07,
"loss": 1.7018,
"step": 3440
},
{
"epoch": 3.41,
"learning_rate": 1.133703886904136e-07,
"loss": 1.6772,
"step": 3442
},
{
"epoch": 3.41,
"learning_rate": 1.1262910712394125e-07,
"loss": 1.6992,
"step": 3444
},
{
"epoch": 3.41,
"learning_rate": 1.1189011233112223e-07,
"loss": 1.6705,
"step": 3446
},
{
"epoch": 3.42,
"learning_rate": 1.111534062163616e-07,
"loss": 1.6787,
"step": 3448
},
{
"epoch": 3.42,
"learning_rate": 1.1041899067816663e-07,
"loss": 1.7002,
"step": 3450
},
{
"epoch": 3.42,
"learning_rate": 1.0968686760914248e-07,
"loss": 1.6665,
"step": 3452
},
{
"epoch": 3.42,
"learning_rate": 1.0895703889598473e-07,
"loss": 1.6806,
"step": 3454
},
{
"epoch": 3.42,
"learning_rate": 1.0822950641947814e-07,
"loss": 1.6748,
"step": 3456
},
{
"epoch": 3.43,
"learning_rate": 1.0750427205448953e-07,
"loss": 1.6429,
"step": 3458
},
{
"epoch": 3.43,
"learning_rate": 1.0678133766996277e-07,
"loss": 1.6581,
"step": 3460
},
{
"epoch": 3.43,
"learning_rate": 1.0606070512891562e-07,
"loss": 1.6574,
"step": 3462
},
{
"epoch": 3.43,
"learning_rate": 1.0534237628843323e-07,
"loss": 1.6619,
"step": 3464
},
{
"epoch": 3.43,
"learning_rate": 1.0462635299966404e-07,
"loss": 1.6999,
"step": 3466
},
{
"epoch": 3.44,
"learning_rate": 1.0391263710781561e-07,
"loss": 1.6289,
"step": 3468
},
{
"epoch": 3.44,
"learning_rate": 1.0320123045214902e-07,
"loss": 1.6939,
"step": 3470
},
{
"epoch": 3.44,
"learning_rate": 1.024921348659743e-07,
"loss": 1.7441,
"step": 3472
},
{
"epoch": 3.44,
"learning_rate": 1.0178535217664552e-07,
"loss": 1.697,
"step": 3474
},
{
"epoch": 3.44,
"learning_rate": 1.0108088420555694e-07,
"loss": 1.6491,
"step": 3476
},
{
"epoch": 3.45,
"learning_rate": 1.0037873276813769e-07,
"loss": 1.6462,
"step": 3478
},
{
"epoch": 3.45,
"learning_rate": 9.967889967384668e-08,
"loss": 1.6783,
"step": 3480
},
{
"epoch": 3.45,
"learning_rate": 9.898138672616863e-08,
"loss": 1.691,
"step": 3482
},
{
"epoch": 3.45,
"learning_rate": 9.828619572260954e-08,
"loss": 1.723,
"step": 3484
},
{
"epoch": 3.45,
"learning_rate": 9.759332845469104e-08,
"loss": 1.683,
"step": 3486
},
{
"epoch": 3.46,
"learning_rate": 9.690278670794682e-08,
"loss": 1.6886,
"step": 3488
},
{
"epoch": 3.46,
"learning_rate": 9.62145722619182e-08,
"loss": 1.7261,
"step": 3490
},
{
"epoch": 3.46,
"learning_rate": 9.552868689014792e-08,
"loss": 1.6743,
"step": 3492
},
{
"epoch": 3.46,
"learning_rate": 9.48451323601771e-08,
"loss": 1.6576,
"step": 3494
},
{
"epoch": 3.46,
"learning_rate": 9.416391043354043e-08,
"loss": 1.6683,
"step": 3496
},
{
"epoch": 3.47,
"learning_rate": 9.348502286576154e-08,
"loss": 1.6587,
"step": 3498
},
{
"epoch": 3.47,
"learning_rate": 9.280847140634796e-08,
"loss": 1.683,
"step": 3500
},
{
"epoch": 3.47,
"learning_rate": 9.213425779878692e-08,
"loss": 1.6838,
"step": 3502
},
{
"epoch": 3.47,
"learning_rate": 9.14623837805416e-08,
"loss": 1.6175,
"step": 3504
},
{
"epoch": 3.47,
"learning_rate": 9.079285108304535e-08,
"loss": 1.6349,
"step": 3506
},
{
"epoch": 3.48,
"learning_rate": 9.01256614316982e-08,
"loss": 1.6756,
"step": 3508
},
{
"epoch": 3.48,
"learning_rate": 8.946081654586258e-08,
"loss": 1.6785,
"step": 3510
},
{
"epoch": 3.48,
"learning_rate": 8.879831813885697e-08,
"loss": 1.6431,
"step": 3512
},
{
"epoch": 3.48,
"learning_rate": 8.813816791795448e-08,
"loss": 1.6858,
"step": 3514
},
{
"epoch": 3.48,
"learning_rate": 8.748036758437616e-08,
"loss": 1.6935,
"step": 3516
},
{
"epoch": 3.49,
"learning_rate": 8.682491883328724e-08,
"loss": 1.6505,
"step": 3518
},
{
"epoch": 3.49,
"learning_rate": 8.617182335379346e-08,
"loss": 1.6802,
"step": 3520
},
{
"epoch": 3.49,
"learning_rate": 8.552108282893545e-08,
"loss": 1.695,
"step": 3522
},
{
"epoch": 3.49,
"learning_rate": 8.487269893568549e-08,
"loss": 1.6742,
"step": 3524
},
{
"epoch": 3.49,
"learning_rate": 8.422667334494249e-08,
"loss": 1.7183,
"step": 3526
},
{
"epoch": 3.5,
"learning_rate": 8.358300772152849e-08,
"loss": 1.6844,
"step": 3528
},
{
"epoch": 3.5,
"learning_rate": 8.29417037241833e-08,
"loss": 1.6702,
"step": 3530
},
{
"epoch": 3.5,
"learning_rate": 8.230276300556071e-08,
"loss": 1.6505,
"step": 3532
},
{
"epoch": 3.5,
"learning_rate": 8.166618721222462e-08,
"loss": 1.6815,
"step": 3534
},
{
"epoch": 3.5,
"learning_rate": 8.103197798464478e-08,
"loss": 1.6861,
"step": 3536
},
{
"epoch": 3.51,
"learning_rate": 8.040013695719162e-08,
"loss": 1.6608,
"step": 3538
},
{
"epoch": 3.51,
"learning_rate": 7.977066575813263e-08,
"loss": 1.6985,
"step": 3540
},
{
"epoch": 3.51,
"learning_rate": 7.914356600962879e-08,
"loss": 1.7021,
"step": 3542
},
{
"epoch": 3.51,
"learning_rate": 7.851883932772929e-08,
"loss": 1.6672,
"step": 3544
},
{
"epoch": 3.51,
"learning_rate": 7.78964873223682e-08,
"loss": 1.6433,
"step": 3546
},
{
"epoch": 3.52,
"learning_rate": 7.727651159735992e-08,
"loss": 1.6498,
"step": 3548
},
{
"epoch": 3.52,
"learning_rate": 7.665891375039501e-08,
"loss": 1.6734,
"step": 3550
},
{
"epoch": 3.52,
"learning_rate": 7.6043695373036e-08,
"loss": 1.6656,
"step": 3552
},
{
"epoch": 3.52,
"learning_rate": 7.543085805071392e-08,
"loss": 1.6649,
"step": 3554
},
{
"epoch": 3.52,
"learning_rate": 7.482040336272388e-08,
"loss": 1.6739,
"step": 3556
},
{
"epoch": 3.53,
"learning_rate": 7.421233288222029e-08,
"loss": 1.7235,
"step": 3558
},
{
"epoch": 3.53,
"learning_rate": 7.360664817621342e-08,
"loss": 1.6998,
"step": 3560
},
{
"epoch": 3.53,
"learning_rate": 7.30033508055663e-08,
"loss": 1.7018,
"step": 3562
},
{
"epoch": 3.53,
"learning_rate": 7.240244232498837e-08,
"loss": 1.7032,
"step": 3564
},
{
"epoch": 3.53,
"learning_rate": 7.180392428303394e-08,
"loss": 1.6909,
"step": 3566
},
{
"epoch": 3.54,
"learning_rate": 7.120779822209688e-08,
"loss": 1.7179,
"step": 3568
},
{
"epoch": 3.54,
"learning_rate": 7.061406567840655e-08,
"loss": 1.6764,
"step": 3570
},
{
"epoch": 3.54,
"learning_rate": 7.002272818202415e-08,
"loss": 1.7069,
"step": 3572
},
{
"epoch": 3.54,
"learning_rate": 6.943378725683912e-08,
"loss": 1.7024,
"step": 3574
},
{
"epoch": 3.54,
"learning_rate": 6.884724442056499e-08,
"loss": 1.6531,
"step": 3576
},
{
"epoch": 3.55,
"learning_rate": 6.826310118473466e-08,
"loss": 1.6295,
"step": 3578
},
{
"epoch": 3.55,
"learning_rate": 6.768135905469785e-08,
"loss": 1.6435,
"step": 3580
},
{
"epoch": 3.55,
"learning_rate": 6.71020195296167e-08,
"loss": 1.6703,
"step": 3582
},
{
"epoch": 3.55,
"learning_rate": 6.652508410246094e-08,
"loss": 1.6818,
"step": 3584
},
{
"epoch": 3.55,
"learning_rate": 6.595055426000595e-08,
"loss": 1.68,
"step": 3586
},
{
"epoch": 3.56,
"learning_rate": 6.537843148282706e-08,
"loss": 1.6795,
"step": 3588
},
{
"epoch": 3.56,
"learning_rate": 6.480871724529668e-08,
"loss": 1.7166,
"step": 3590
},
{
"epoch": 3.56,
"learning_rate": 6.424141301558084e-08,
"loss": 1.6654,
"step": 3592
},
{
"epoch": 3.56,
"learning_rate": 6.367652025563453e-08,
"loss": 1.7272,
"step": 3594
},
{
"epoch": 3.56,
"learning_rate": 6.311404042119828e-08,
"loss": 1.6226,
"step": 3596
},
{
"epoch": 3.57,
"learning_rate": 6.255397496179448e-08,
"loss": 1.7252,
"step": 3598
},
{
"epoch": 3.57,
"learning_rate": 6.199632532072397e-08,
"loss": 1.7283,
"step": 3600
},
{
"epoch": 3.57,
"learning_rate": 6.144109293506172e-08,
"loss": 1.6501,
"step": 3602
},
{
"epoch": 3.57,
"learning_rate": 6.088827923565321e-08,
"loss": 1.7162,
"step": 3604
},
{
"epoch": 3.57,
"learning_rate": 6.033788564711128e-08,
"loss": 1.6611,
"step": 3606
},
{
"epoch": 3.57,
"learning_rate": 5.978991358781182e-08,
"loss": 1.6925,
"step": 3608
},
{
"epoch": 3.58,
"learning_rate": 5.924436446989034e-08,
"loss": 1.6528,
"step": 3610
},
{
"epoch": 3.58,
"learning_rate": 5.870123969923868e-08,
"loss": 1.7173,
"step": 3612
},
{
"epoch": 3.58,
"learning_rate": 5.8160540675500913e-08,
"loss": 1.6828,
"step": 3614
},
{
"epoch": 3.58,
"learning_rate": 5.7622268792069996e-08,
"loss": 1.6779,
"step": 3616
},
{
"epoch": 3.58,
"learning_rate": 5.7086425436083775e-08,
"loss": 1.6229,
"step": 3618
},
{
"epoch": 3.59,
"learning_rate": 5.6553011988421993e-08,
"loss": 1.6758,
"step": 3620
},
{
"epoch": 3.59,
"learning_rate": 5.6022029823702745e-08,
"loss": 1.667,
"step": 3622
},
{
"epoch": 3.59,
"learning_rate": 5.5493480310278115e-08,
"loss": 1.6687,
"step": 3624
},
{
"epoch": 3.59,
"learning_rate": 5.496736481023145e-08,
"loss": 1.7242,
"step": 3626
},
{
"epoch": 3.59,
"learning_rate": 5.444368467937366e-08,
"loss": 1.6859,
"step": 3628
},
{
"epoch": 3.6,
"learning_rate": 5.392244126723933e-08,
"loss": 1.6798,
"step": 3630
},
{
"epoch": 3.6,
"learning_rate": 5.340363591708408e-08,
"loss": 1.6622,
"step": 3632
},
{
"epoch": 3.6,
"learning_rate": 5.2887269965880556e-08,
"loss": 1.6727,
"step": 3634
},
{
"epoch": 3.6,
"learning_rate": 5.2373344744314095e-08,
"loss": 1.7243,
"step": 3636
},
{
"epoch": 3.6,
"learning_rate": 5.1861861576781405e-08,
"loss": 1.7299,
"step": 3638
},
{
"epoch": 3.61,
"learning_rate": 5.135282178138545e-08,
"loss": 1.6512,
"step": 3640
},
{
"epoch": 3.61,
"learning_rate": 5.0846226669932437e-08,
"loss": 1.641,
"step": 3642
},
{
"epoch": 3.61,
"learning_rate": 5.034207754792896e-08,
"loss": 1.664,
"step": 3644
},
{
"epoch": 3.61,
"learning_rate": 4.9840375714577666e-08,
"loss": 1.6698,
"step": 3646
},
{
"epoch": 3.61,
"learning_rate": 4.934112246277533e-08,
"loss": 1.7022,
"step": 3648
},
{
"epoch": 3.62,
"learning_rate": 4.88443190791078e-08,
"loss": 1.6603,
"step": 3650
},
{
"epoch": 3.62,
"learning_rate": 4.834996684384795e-08,
"loss": 1.7094,
"step": 3652
},
{
"epoch": 3.62,
"learning_rate": 4.785806703095252e-08,
"loss": 1.6443,
"step": 3654
},
{
"epoch": 3.62,
"learning_rate": 4.7368620908057155e-08,
"loss": 1.6811,
"step": 3656
},
{
"epoch": 3.62,
"learning_rate": 4.6881629736475137e-08,
"loss": 1.6409,
"step": 3658
},
{
"epoch": 3.63,
"learning_rate": 4.639709477119325e-08,
"loss": 1.6218,
"step": 3660
},
{
"epoch": 3.63,
"learning_rate": 4.59150172608681e-08,
"loss": 1.701,
"step": 3662
},
{
"epoch": 3.63,
"learning_rate": 4.543539844782396e-08,
"loss": 1.669,
"step": 3664
},
{
"epoch": 3.63,
"learning_rate": 4.495823956804834e-08,
"loss": 1.704,
"step": 3666
},
{
"epoch": 3.63,
"learning_rate": 4.4483541851190297e-08,
"loss": 1.649,
"step": 3668
},
{
"epoch": 3.64,
"learning_rate": 4.4011306520555514e-08,
"loss": 1.6022,
"step": 3670
},
{
"epoch": 3.64,
"learning_rate": 4.354153479310485e-08,
"loss": 1.6646,
"step": 3672
},
{
"epoch": 3.64,
"learning_rate": 4.30742278794497e-08,
"loss": 1.6509,
"step": 3674
},
{
"epoch": 3.64,
"learning_rate": 4.260938698384997e-08,
"loss": 1.6517,
"step": 3676
},
{
"epoch": 3.64,
"learning_rate": 4.2147013304210666e-08,
"loss": 1.6678,
"step": 3678
},
{
"epoch": 3.65,
"learning_rate": 4.168710803207864e-08,
"loss": 1.701,
"step": 3680
},
{
"epoch": 3.65,
"learning_rate": 4.122967235263941e-08,
"loss": 1.689,
"step": 3682
},
{
"epoch": 3.65,
"learning_rate": 4.077470744471434e-08,
"loss": 1.6651,
"step": 3684
},
{
"epoch": 3.65,
"learning_rate": 4.03222144807579e-08,
"loss": 1.6899,
"step": 3686
},
{
"epoch": 3.65,
"learning_rate": 3.987219462685387e-08,
"loss": 1.7042,
"step": 3688
},
{
"epoch": 3.66,
"learning_rate": 3.942464904271314e-08,
"loss": 1.6799,
"step": 3690
},
{
"epoch": 3.66,
"learning_rate": 3.897957888167025e-08,
"loss": 1.6515,
"step": 3692
},
{
"epoch": 3.66,
"learning_rate": 3.853698529068028e-08,
"loss": 1.7065,
"step": 3694
},
{
"epoch": 3.66,
"learning_rate": 3.8096869410316426e-08,
"loss": 1.6632,
"step": 3696
},
{
"epoch": 3.66,
"learning_rate": 3.765923237476654e-08,
"loss": 1.6718,
"step": 3698
},
{
"epoch": 3.67,
"learning_rate": 3.72240753118308e-08,
"loss": 1.6799,
"step": 3700
},
{
"epoch": 3.67,
"learning_rate": 3.6791399342918086e-08,
"loss": 1.7134,
"step": 3702
},
{
"epoch": 3.67,
"learning_rate": 3.6361205583043456e-08,
"loss": 1.6646,
"step": 3704
},
{
"epoch": 3.67,
"learning_rate": 3.593349514082555e-08,
"loss": 1.6454,
"step": 3706
},
{
"epoch": 3.67,
"learning_rate": 3.5508269118483034e-08,
"loss": 1.7319,
"step": 3708
},
{
"epoch": 3.68,
"learning_rate": 3.508552861183267e-08,
"loss": 1.6319,
"step": 3710
},
{
"epoch": 3.68,
"learning_rate": 3.46652747102858e-08,
"loss": 1.7415,
"step": 3712
},
{
"epoch": 3.68,
"learning_rate": 3.4247508496845326e-08,
"loss": 1.6909,
"step": 3714
},
{
"epoch": 3.68,
"learning_rate": 3.3832231048103735e-08,
"loss": 1.6501,
"step": 3716
},
{
"epoch": 3.68,
"learning_rate": 3.341944343424008e-08,
"loss": 1.7375,
"step": 3718
},
{
"epoch": 3.69,
"learning_rate": 3.300914671901656e-08,
"loss": 1.6772,
"step": 3720
},
{
"epoch": 3.69,
"learning_rate": 3.2601341959776594e-08,
"loss": 1.6978,
"step": 3722
},
{
"epoch": 3.69,
"learning_rate": 3.219603020744177e-08,
"loss": 1.649,
"step": 3724
},
{
"epoch": 3.69,
"learning_rate": 3.179321250650924e-08,
"loss": 1.6255,
"step": 3726
},
{
"epoch": 3.69,
"learning_rate": 3.13928898950484e-08,
"loss": 1.6701,
"step": 3728
},
{
"epoch": 3.7,
"learning_rate": 3.099506340469948e-08,
"loss": 1.7098,
"step": 3730
},
{
"epoch": 3.7,
"learning_rate": 3.0599734060669626e-08,
"loss": 1.6822,
"step": 3732
},
{
"epoch": 3.7,
"learning_rate": 3.0206902881731e-08,
"loss": 1.7008,
"step": 3734
},
{
"epoch": 3.7,
"learning_rate": 2.981657088021783e-08,
"loss": 1.6697,
"step": 3736
},
{
"epoch": 3.7,
"learning_rate": 2.9428739062024144e-08,
"loss": 1.6766,
"step": 3738
},
{
"epoch": 3.71,
"learning_rate": 2.904340842660069e-08,
"loss": 1.6991,
"step": 3740
},
{
"epoch": 3.71,
"learning_rate": 2.86605799669527e-08,
"loss": 1.6161,
"step": 3742
},
{
"epoch": 3.71,
"learning_rate": 2.8280254669637127e-08,
"loss": 1.6393,
"step": 3744
},
{
"epoch": 3.71,
"learning_rate": 2.7902433514760626e-08,
"loss": 1.6797,
"step": 3746
},
{
"epoch": 3.71,
"learning_rate": 2.7527117475976136e-08,
"loss": 1.6685,
"step": 3748
},
{
"epoch": 3.72,
"learning_rate": 2.71543075204812e-08,
"loss": 1.6945,
"step": 3750
},
{
"epoch": 3.72,
"learning_rate": 2.6784004609014864e-08,
"loss": 1.6821,
"step": 3752
},
{
"epoch": 3.72,
"learning_rate": 2.6416209695855563e-08,
"loss": 1.7373,
"step": 3754
},
{
"epoch": 3.72,
"learning_rate": 2.6050923728818784e-08,
"loss": 1.6551,
"step": 3756
},
{
"epoch": 3.72,
"learning_rate": 2.56881476492542e-08,
"loss": 1.6936,
"step": 3758
},
{
"epoch": 3.73,
"learning_rate": 2.532788239204342e-08,
"loss": 1.7145,
"step": 3760
},
{
"epoch": 3.73,
"learning_rate": 2.4970128885597576e-08,
"loss": 1.7068,
"step": 3762
},
{
"epoch": 3.73,
"learning_rate": 2.4614888051855297e-08,
"loss": 1.7091,
"step": 3764
},
{
"epoch": 3.73,
"learning_rate": 2.426216080627952e-08,
"loss": 1.6595,
"step": 3766
},
{
"epoch": 3.73,
"learning_rate": 2.3911948057856125e-08,
"loss": 1.6309,
"step": 3768
},
{
"epoch": 3.74,
"learning_rate": 2.3564250709090405e-08,
"loss": 1.6751,
"step": 3770
},
{
"epoch": 3.74,
"learning_rate": 2.3219069656006285e-08,
"loss": 1.7125,
"step": 3772
},
{
"epoch": 3.74,
"learning_rate": 2.2876405788142207e-08,
"loss": 1.6595,
"step": 3774
},
{
"epoch": 3.74,
"learning_rate": 2.2536259988550577e-08,
"loss": 1.6482,
"step": 3776
},
{
"epoch": 3.74,
"learning_rate": 2.2198633133794444e-08,
"loss": 1.6452,
"step": 3778
},
{
"epoch": 3.75,
"learning_rate": 2.186352609394504e-08,
"loss": 1.6772,
"step": 3780
},
{
"epoch": 3.75,
"learning_rate": 2.1530939732580466e-08,
"loss": 1.692,
"step": 3782
},
{
"epoch": 3.75,
"learning_rate": 2.1200874906782905e-08,
"loss": 1.6542,
"step": 3784
},
{
"epoch": 3.75,
"learning_rate": 2.0873332467136405e-08,
"loss": 1.692,
"step": 3786
},
{
"epoch": 3.75,
"learning_rate": 2.0548313257724882e-08,
"loss": 1.6492,
"step": 3788
},
{
"epoch": 3.76,
"learning_rate": 2.0225818116129445e-08,
"loss": 1.6871,
"step": 3790
},
{
"epoch": 3.76,
"learning_rate": 1.9905847873427305e-08,
"loss": 1.6905,
"step": 3792
},
{
"epoch": 3.76,
"learning_rate": 1.958840335418832e-08,
"loss": 1.6602,
"step": 3794
},
{
"epoch": 3.76,
"learning_rate": 1.9273485376473997e-08,
"loss": 1.6722,
"step": 3796
},
{
"epoch": 3.76,
"learning_rate": 1.8961094751834382e-08,
"loss": 1.6703,
"step": 3798
},
{
"epoch": 3.77,
"learning_rate": 1.8651232285306962e-08,
"loss": 1.666,
"step": 3800
},
{
"epoch": 3.77,
"learning_rate": 1.8343898775413647e-08,
"loss": 1.6542,
"step": 3802
},
{
"epoch": 3.77,
"learning_rate": 1.8039095014159788e-08,
"loss": 1.6748,
"step": 3804
},
{
"epoch": 3.77,
"learning_rate": 1.7736821787031063e-08,
"loss": 1.6764,
"step": 3806
},
{
"epoch": 3.77,
"learning_rate": 1.74370798729917e-08,
"loss": 1.681,
"step": 3808
},
{
"epoch": 3.78,
"learning_rate": 1.713987004448325e-08,
"loss": 1.6905,
"step": 3810
},
{
"epoch": 3.78,
"learning_rate": 1.6845193067421713e-08,
"loss": 1.661,
"step": 3812
},
{
"epoch": 3.78,
"learning_rate": 1.6553049701195865e-08,
"loss": 1.6492,
"step": 3814
},
{
"epoch": 3.78,
"learning_rate": 1.6263440698665587e-08,
"loss": 1.7071,
"step": 3816
},
{
"epoch": 3.78,
"learning_rate": 1.5976366806159215e-08,
"loss": 1.7526,
"step": 3818
},
{
"epoch": 3.78,
"learning_rate": 1.569182876347219e-08,
"loss": 1.6962,
"step": 3820
},
{
"epoch": 3.79,
"learning_rate": 1.540982730386531e-08,
"loss": 1.689,
"step": 3822
},
{
"epoch": 3.79,
"learning_rate": 1.5130363154062244e-08,
"loss": 1.6757,
"step": 3824
},
{
"epoch": 3.79,
"learning_rate": 1.4853437034248129e-08,
"loss": 1.7157,
"step": 3826
},
{
"epoch": 3.79,
"learning_rate": 1.4579049658067333e-08,
"loss": 1.7109,
"step": 3828
},
{
"epoch": 3.79,
"learning_rate": 1.430720173262201e-08,
"loss": 1.6854,
"step": 3830
},
{
"epoch": 3.8,
"learning_rate": 1.4037893958469993e-08,
"loss": 1.617,
"step": 3832
},
{
"epoch": 3.8,
"learning_rate": 1.3771127029623132e-08,
"loss": 1.6639,
"step": 3834
},
{
"epoch": 3.8,
"learning_rate": 1.350690163354573e-08,
"loss": 1.6558,
"step": 3836
},
{
"epoch": 3.8,
"learning_rate": 1.3245218451152008e-08,
"loss": 1.649,
"step": 3838
},
{
"epoch": 3.8,
"learning_rate": 1.2986078156804969e-08,
"loss": 1.6518,
"step": 3840
},
{
"epoch": 3.81,
"learning_rate": 1.272948141831498e-08,
"loss": 1.6551,
"step": 3842
},
{
"epoch": 3.81,
"learning_rate": 1.247542889693709e-08,
"loss": 1.6744,
"step": 3844
},
{
"epoch": 3.81,
"learning_rate": 1.2223921247370151e-08,
"loss": 1.6261,
"step": 3846
},
{
"epoch": 3.81,
"learning_rate": 1.1974959117754591e-08,
"loss": 1.7008,
"step": 3848
},
{
"epoch": 3.81,
"learning_rate": 1.17285431496712e-08,
"loss": 1.6909,
"step": 3850
},
{
"epoch": 3.82,
"learning_rate": 1.1484673978139125e-08,
"loss": 1.6798,
"step": 3852
},
{
"epoch": 3.82,
"learning_rate": 1.1243352231614434e-08,
"loss": 1.6802,
"step": 3854
},
{
"epoch": 3.82,
"learning_rate": 1.100457853198844e-08,
"loss": 1.6862,
"step": 3856
},
{
"epoch": 3.82,
"learning_rate": 1.0768353494585935e-08,
"loss": 1.6948,
"step": 3858
},
{
"epoch": 3.82,
"learning_rate": 1.0534677728163855e-08,
"loss": 1.7023,
"step": 3860
},
{
"epoch": 3.83,
"learning_rate": 1.030355183490983e-08,
"loss": 1.7078,
"step": 3862
},
{
"epoch": 3.83,
"learning_rate": 1.0074976410439972e-08,
"loss": 1.6903,
"step": 3864
},
{
"epoch": 3.83,
"learning_rate": 9.848952043798097e-09,
"loss": 1.6432,
"step": 3866
},
{
"epoch": 3.83,
"learning_rate": 9.625479317453722e-09,
"loss": 1.6835,
"step": 3868
},
{
"epoch": 3.83,
"learning_rate": 9.404558807301065e-09,
"loss": 1.6456,
"step": 3870
},
{
"epoch": 3.84,
"learning_rate": 9.186191082656613e-09,
"loss": 1.6611,
"step": 3872
},
{
"epoch": 3.84,
"learning_rate": 8.970376706258886e-09,
"loss": 1.6522,
"step": 3874
},
{
"epoch": 3.84,
"learning_rate": 8.757116234266115e-09,
"loss": 1.6628,
"step": 3876
},
{
"epoch": 3.84,
"learning_rate": 8.546410216254907e-09,
"loss": 1.6732,
"step": 3878
},
{
"epoch": 3.84,
"learning_rate": 8.338259195219243e-09,
"loss": 1.681,
"step": 3880
},
{
"epoch": 3.85,
"learning_rate": 8.132663707568821e-09,
"loss": 1.6631,
"step": 3882
},
{
"epoch": 3.85,
"learning_rate": 7.929624283127489e-09,
"loss": 1.6971,
"step": 3884
},
{
"epoch": 3.85,
"learning_rate": 7.729141445132038e-09,
"loss": 1.6769,
"step": 3886
},
{
"epoch": 3.85,
"learning_rate": 7.531215710231298e-09,
"loss": 1.6463,
"step": 3888
},
{
"epoch": 3.85,
"learning_rate": 7.335847588483823e-09,
"loss": 1.6906,
"step": 3890
},
{
"epoch": 3.86,
"learning_rate": 7.1430375833574365e-09,
"loss": 1.6887,
"step": 3892
},
{
"epoch": 3.86,
"learning_rate": 6.952786191727899e-09,
"loss": 1.6517,
"step": 3894
},
{
"epoch": 3.86,
"learning_rate": 6.765093903876806e-09,
"loss": 1.7011,
"step": 3896
},
{
"epoch": 3.86,
"learning_rate": 6.5799612034912465e-09,
"loss": 1.6609,
"step": 3898
},
{
"epoch": 3.86,
"learning_rate": 6.397388567662143e-09,
"loss": 1.7013,
"step": 3900
},
{
"epoch": 3.87,
"learning_rate": 6.21737646688314e-09,
"loss": 1.6742,
"step": 3902
},
{
"epoch": 3.87,
"learning_rate": 6.0399253650490474e-09,
"loss": 1.6729,
"step": 3904
},
{
"epoch": 3.87,
"learning_rate": 5.86503571945518e-09,
"loss": 1.7014,
"step": 3906
},
{
"epoch": 3.87,
"learning_rate": 5.69270798079613e-09,
"loss": 1.6966,
"step": 3908
},
{
"epoch": 3.87,
"learning_rate": 5.522942593163771e-09,
"loss": 1.6772,
"step": 3910
},
{
"epoch": 3.88,
"learning_rate": 5.355739994047481e-09,
"loss": 1.6908,
"step": 3912
},
{
"epoch": 3.88,
"learning_rate": 5.19110061433159e-09,
"loss": 1.6486,
"step": 3914
},
{
"epoch": 3.88,
"learning_rate": 5.029024878295373e-09,
"loss": 1.6703,
"step": 3916
},
{
"epoch": 3.88,
"learning_rate": 4.86951320361162e-09,
"loss": 1.6672,
"step": 3918
},
{
"epoch": 3.88,
"learning_rate": 4.712566001345064e-09,
"loss": 1.6561,
"step": 3920
},
{
"epoch": 3.89,
"learning_rate": 4.558183675952176e-09,
"loss": 1.6885,
"step": 3922
},
{
"epoch": 3.89,
"learning_rate": 4.406366625279378e-09,
"loss": 1.7056,
"step": 3924
},
{
"epoch": 3.89,
"learning_rate": 4.257115240562381e-09,
"loss": 1.6483,
"step": 3926
},
{
"epoch": 3.89,
"learning_rate": 4.110429906425406e-09,
"loss": 1.7079,
"step": 3928
},
{
"epoch": 3.89,
"learning_rate": 3.966311000879408e-09,
"loss": 1.6728,
"step": 3930
},
{
"epoch": 3.9,
"learning_rate": 3.824758895322078e-09,
"loss": 1.6286,
"step": 3932
},
{
"epoch": 3.9,
"learning_rate": 3.6857739545360643e-09,
"loss": 1.6412,
"step": 3934
},
{
"epoch": 3.9,
"learning_rate": 3.54935653668853e-09,
"loss": 1.6989,
"step": 3936
},
{
"epoch": 3.9,
"learning_rate": 3.415506993330153e-09,
"loss": 1.6291,
"step": 3938
},
{
"epoch": 3.9,
"learning_rate": 3.284225669394014e-09,
"loss": 1.6346,
"step": 3940
},
{
"epoch": 3.91,
"learning_rate": 3.1555129031948235e-09,
"loss": 1.7289,
"step": 3942
},
{
"epoch": 3.91,
"learning_rate": 3.0293690264281413e-09,
"loss": 1.6947,
"step": 3944
},
{
"epoch": 3.91,
"learning_rate": 2.9057943641693784e-09,
"loss": 1.6906,
"step": 3946
},
{
"epoch": 3.91,
"learning_rate": 2.7847892348731306e-09,
"loss": 1.6827,
"step": 3948
},
{
"epoch": 3.91,
"learning_rate": 2.666353950372291e-09,
"loss": 1.712,
"step": 3950
},
{
"epoch": 3.92,
"learning_rate": 2.5504888158768277e-09,
"loss": 1.6933,
"step": 3952
},
{
"epoch": 3.92,
"learning_rate": 2.437194129973896e-09,
"loss": 1.6748,
"step": 3954
},
{
"epoch": 3.92,
"learning_rate": 2.3264701846261727e-09,
"loss": 1.6532,
"step": 3956
},
{
"epoch": 3.92,
"learning_rate": 2.218317265171743e-09,
"loss": 1.7141,
"step": 3958
},
{
"epoch": 3.92,
"learning_rate": 2.112735650322883e-09,
"loss": 1.68,
"step": 3960
},
{
"epoch": 3.93,
"learning_rate": 2.0097256121657246e-09,
"loss": 1.7106,
"step": 3962
},
{
"epoch": 3.93,
"learning_rate": 1.9092874161591442e-09,
"loss": 1.7274,
"step": 3964
},
{
"epoch": 3.93,
"learning_rate": 1.811421321134765e-09,
"loss": 1.6989,
"step": 3966
},
{
"epoch": 3.93,
"learning_rate": 1.716127579295401e-09,
"loss": 1.714,
"step": 3968
},
{
"epoch": 3.93,
"learning_rate": 1.62340643621528e-09,
"loss": 1.668,
"step": 3970
},
{
"epoch": 3.94,
"learning_rate": 1.5332581308384885e-09,
"loss": 1.7128,
"step": 3972
},
{
"epoch": 3.94,
"learning_rate": 1.4456828954791945e-09,
"loss": 1.6977,
"step": 3974
},
{
"epoch": 3.94,
"learning_rate": 1.360680955820759e-09,
"loss": 1.7203,
"step": 3976
},
{
"epoch": 3.94,
"learning_rate": 1.2782525309149582e-09,
"loss": 1.685,
"step": 3978
},
{
"epoch": 3.94,
"learning_rate": 1.1983978331815413e-09,
"loss": 1.6471,
"step": 3980
},
{
"epoch": 3.95,
"learning_rate": 1.1211170684076731e-09,
"loss": 1.688,
"step": 3982
},
{
"epoch": 3.95,
"learning_rate": 1.0464104357477132e-09,
"loss": 1.6823,
"step": 3984
},
{
"epoch": 3.95,
"learning_rate": 9.742781277221057e-10,
"loss": 1.7105,
"step": 3986
},
{
"epoch": 3.95,
"learning_rate": 9.0472033021749e-10,
"loss": 1.6584,
"step": 3988
},
{
"epoch": 3.95,
"learning_rate": 8.377372224855905e-10,
"loss": 1.7026,
"step": 3990
},
{
"epoch": 3.96,
"learning_rate": 7.733289771434392e-10,
"loss": 1.6724,
"step": 3992
},
{
"epoch": 3.96,
"learning_rate": 7.114957601723759e-10,
"loss": 1.6822,
"step": 3994
},
{
"epoch": 3.96,
"learning_rate": 6.522377309180482e-10,
"loss": 1.6495,
"step": 3996
},
{
"epoch": 3.96,
"learning_rate": 5.955550420895239e-10,
"loss": 1.6512,
"step": 3998
},
{
"epoch": 3.96,
"learning_rate": 5.414478397592904e-10,
"loss": 1.6595,
"step": 4000
},
{
"epoch": 3.97,
"learning_rate": 4.899162633629217e-10,
"loss": 1.6704,
"step": 4002
},
{
"epoch": 3.97,
"learning_rate": 4.409604456981908e-10,
"loss": 1.6712,
"step": 4004
},
{
"epoch": 3.97,
"learning_rate": 3.9458051292529104e-10,
"loss": 1.63,
"step": 4006
},
{
"epoch": 3.97,
"learning_rate": 3.507765845665034e-10,
"loss": 1.7009,
"step": 4008
},
{
"epoch": 3.97,
"learning_rate": 3.0954877350519716e-10,
"loss": 1.6726,
"step": 4010
},
{
"epoch": 3.98,
"learning_rate": 2.7089718598660715e-10,
"loss": 1.6663,
"step": 4012
},
{
"epoch": 3.98,
"learning_rate": 2.348219216163905e-10,
"loss": 1.6915,
"step": 4014
},
{
"epoch": 3.98,
"learning_rate": 2.0132307336151455e-10,
"loss": 1.6691,
"step": 4016
},
{
"epoch": 3.98,
"learning_rate": 1.7040072754914702e-10,
"loss": 1.6769,
"step": 4018
},
{
"epoch": 3.98,
"learning_rate": 1.4205496386687775e-10,
"loss": 1.6206,
"step": 4020
},
{
"epoch": 3.99,
"learning_rate": 1.1628585536216374e-10,
"loss": 1.6741,
"step": 4022
},
{
"epoch": 3.99,
"learning_rate": 9.30934684427731e-11,
"loss": 1.6694,
"step": 4024
},
{
"epoch": 3.99,
"learning_rate": 7.247786287578605e-11,
"loss": 1.6933,
"step": 4026
},
{
"epoch": 3.99,
"learning_rate": 5.443909178826089e-11,
"loss": 1.685,
"step": 4028
},
{
"epoch": 3.99,
"learning_rate": 3.897720166623486e-11,
"loss": 1.6814,
"step": 4030
},
{
"epoch": 4.0,
"learning_rate": 2.609223235561231e-11,
"loss": 1.707,
"step": 4032
},
{
"epoch": 4.0,
"learning_rate": 1.5784217061054483e-11,
"loss": 1.7169,
"step": 4034
},
{
"epoch": 4.0,
"learning_rate": 8.053182346534626e-12,
"loss": 1.6614,
"step": 4036
},
{
"epoch": 4.0,
"step": 4036,
"total_flos": 2.3255819414300262e+17,
"train_loss": 1.7244027754360196,
"train_runtime": 42261.7031,
"train_samples_per_second": 6.113,
"train_steps_per_second": 0.096
}
],
"logging_steps": 2,
"max_steps": 4036,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 4000,
"total_flos": 2.3255819414300262e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}