TienAnh's picture
Model save
e04d9ed verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.996608253250424,
"eval_steps": 500,
"global_step": 2652,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 13.4718,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 2.5e-08,
"loss": 13.504,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 5e-08,
"loss": 13.7237,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 1e-07,
"loss": 13.4448,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 1.5e-07,
"loss": 13.4276,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 2e-07,
"loss": 13.4904,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 2.5e-07,
"loss": 13.5499,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 3e-07,
"loss": 13.469,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 3.5e-07,
"loss": 13.3643,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 4e-07,
"loss": 13.1439,
"step": 20
},
{
"epoch": 0.05,
"learning_rate": 4.5e-07,
"loss": 12.9525,
"step": 22
},
{
"epoch": 0.05,
"learning_rate": 5e-07,
"loss": 13.0609,
"step": 24
},
{
"epoch": 0.06,
"learning_rate": 5.5e-07,
"loss": 12.9562,
"step": 26
},
{
"epoch": 0.06,
"learning_rate": 6e-07,
"loss": 12.6147,
"step": 28
},
{
"epoch": 0.07,
"learning_rate": 6.5e-07,
"loss": 12.0172,
"step": 30
},
{
"epoch": 0.07,
"learning_rate": 7e-07,
"loss": 11.5616,
"step": 32
},
{
"epoch": 0.08,
"learning_rate": 7.5e-07,
"loss": 11.5432,
"step": 34
},
{
"epoch": 0.08,
"learning_rate": 8e-07,
"loss": 11.1458,
"step": 36
},
{
"epoch": 0.09,
"learning_rate": 8.499999999999999e-07,
"loss": 10.7621,
"step": 38
},
{
"epoch": 0.09,
"learning_rate": 9e-07,
"loss": 10.9786,
"step": 40
},
{
"epoch": 0.09,
"learning_rate": 9.499999999999999e-07,
"loss": 10.2569,
"step": 42
},
{
"epoch": 0.1,
"learning_rate": 1e-06,
"loss": 9.8751,
"step": 44
},
{
"epoch": 0.1,
"learning_rate": 1.05e-06,
"loss": 9.2012,
"step": 46
},
{
"epoch": 0.11,
"learning_rate": 1.1e-06,
"loss": 9.7962,
"step": 48
},
{
"epoch": 0.11,
"learning_rate": 1.1499999999999998e-06,
"loss": 9.4366,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 1.2e-06,
"loss": 8.2082,
"step": 52
},
{
"epoch": 0.12,
"learning_rate": 1.2499999999999999e-06,
"loss": 8.3599,
"step": 54
},
{
"epoch": 0.13,
"learning_rate": 1.3e-06,
"loss": 7.7925,
"step": 56
},
{
"epoch": 0.13,
"learning_rate": 1.35e-06,
"loss": 7.4611,
"step": 58
},
{
"epoch": 0.14,
"learning_rate": 1.4e-06,
"loss": 7.4838,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 1.4499999999999999e-06,
"loss": 7.1745,
"step": 62
},
{
"epoch": 0.14,
"learning_rate": 1.5e-06,
"loss": 6.8636,
"step": 64
},
{
"epoch": 0.15,
"learning_rate": 1.55e-06,
"loss": 6.328,
"step": 66
},
{
"epoch": 0.15,
"learning_rate": 1.6e-06,
"loss": 6.1617,
"step": 68
},
{
"epoch": 0.16,
"learning_rate": 1.6499999999999999e-06,
"loss": 5.541,
"step": 70
},
{
"epoch": 0.16,
"learning_rate": 1.6999999999999998e-06,
"loss": 5.0421,
"step": 72
},
{
"epoch": 0.17,
"learning_rate": 1.75e-06,
"loss": 5.902,
"step": 74
},
{
"epoch": 0.17,
"learning_rate": 1.8e-06,
"loss": 5.2086,
"step": 76
},
{
"epoch": 0.18,
"learning_rate": 1.85e-06,
"loss": 4.6743,
"step": 78
},
{
"epoch": 0.18,
"learning_rate": 1.8999999999999998e-06,
"loss": 5.2676,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 1.95e-06,
"loss": 4.601,
"step": 82
},
{
"epoch": 0.19,
"learning_rate": 2e-06,
"loss": 4.5361,
"step": 84
},
{
"epoch": 0.19,
"learning_rate": 1.999997016077106e-06,
"loss": 4.8133,
"step": 86
},
{
"epoch": 0.2,
"learning_rate": 1.9999880643262316e-06,
"loss": 4.6242,
"step": 88
},
{
"epoch": 0.2,
"learning_rate": 1.9999731448007997e-06,
"loss": 4.2251,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 1.9999522575898473e-06,
"loss": 4.7076,
"step": 92
},
{
"epoch": 0.21,
"learning_rate": 1.9999254028180264e-06,
"loss": 5.183,
"step": 94
},
{
"epoch": 0.22,
"learning_rate": 1.999892580645602e-06,
"loss": 4.759,
"step": 96
},
{
"epoch": 0.22,
"learning_rate": 1.9998537912684517e-06,
"loss": 4.6168,
"step": 98
},
{
"epoch": 0.23,
"learning_rate": 1.9998090349180645e-06,
"loss": 4.3821,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 1.99975831186154e-06,
"loss": 4.39,
"step": 102
},
{
"epoch": 0.24,
"learning_rate": 1.999701622401585e-06,
"loss": 3.8744,
"step": 104
},
{
"epoch": 0.24,
"learning_rate": 1.999638966876513e-06,
"loss": 4.2736,
"step": 106
},
{
"epoch": 0.24,
"learning_rate": 1.9995703456602437e-06,
"loss": 4.2618,
"step": 108
},
{
"epoch": 0.25,
"learning_rate": 1.999495759162297e-06,
"loss": 4.6974,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 1.999415207827794e-06,
"loss": 4.3729,
"step": 112
},
{
"epoch": 0.26,
"learning_rate": 1.9993286921374522e-06,
"loss": 4.1522,
"step": 114
},
{
"epoch": 0.26,
"learning_rate": 1.9992362126075847e-06,
"loss": 4.41,
"step": 116
},
{
"epoch": 0.27,
"learning_rate": 1.9991377697900943e-06,
"loss": 3.8781,
"step": 118
},
{
"epoch": 0.27,
"learning_rate": 1.9990333642724733e-06,
"loss": 4.3471,
"step": 120
},
{
"epoch": 0.28,
"learning_rate": 1.998922996677797e-06,
"loss": 3.8304,
"step": 122
},
{
"epoch": 0.28,
"learning_rate": 1.998806667664722e-06,
"loss": 4.6405,
"step": 124
},
{
"epoch": 0.28,
"learning_rate": 1.9986843779274833e-06,
"loss": 4.5621,
"step": 126
},
{
"epoch": 0.29,
"learning_rate": 1.998556128195886e-06,
"loss": 4.3485,
"step": 128
},
{
"epoch": 0.29,
"learning_rate": 1.9984219192353047e-06,
"loss": 4.5476,
"step": 130
},
{
"epoch": 0.3,
"learning_rate": 1.9982817518466784e-06,
"loss": 4.1366,
"step": 132
},
{
"epoch": 0.3,
"learning_rate": 1.998135626866504e-06,
"loss": 3.8135,
"step": 134
},
{
"epoch": 0.31,
"learning_rate": 1.997983545166833e-06,
"loss": 4.8264,
"step": 136
},
{
"epoch": 0.31,
"learning_rate": 1.9978255076552656e-06,
"loss": 4.5821,
"step": 138
},
{
"epoch": 0.32,
"learning_rate": 1.997661515274945e-06,
"loss": 5.1009,
"step": 140
},
{
"epoch": 0.32,
"learning_rate": 1.997491569004553e-06,
"loss": 3.5422,
"step": 142
},
{
"epoch": 0.33,
"learning_rate": 1.9973156698583026e-06,
"loss": 3.8344,
"step": 144
},
{
"epoch": 0.33,
"learning_rate": 1.997133818885932e-06,
"loss": 4.4703,
"step": 146
},
{
"epoch": 0.33,
"learning_rate": 1.9969460171727005e-06,
"loss": 4.1585,
"step": 148
},
{
"epoch": 0.34,
"learning_rate": 1.9967522658393796e-06,
"loss": 3.9991,
"step": 150
},
{
"epoch": 0.34,
"learning_rate": 1.9965525660422476e-06,
"loss": 4.329,
"step": 152
},
{
"epoch": 0.35,
"learning_rate": 1.996346918973082e-06,
"loss": 4.0715,
"step": 154
},
{
"epoch": 0.35,
"learning_rate": 1.9961353258591523e-06,
"loss": 4.1138,
"step": 156
},
{
"epoch": 0.36,
"learning_rate": 1.995917787963214e-06,
"loss": 4.3047,
"step": 158
},
{
"epoch": 0.36,
"learning_rate": 1.9956943065834995e-06,
"loss": 4.166,
"step": 160
},
{
"epoch": 0.37,
"learning_rate": 1.9954648830537116e-06,
"loss": 4.7719,
"step": 162
},
{
"epoch": 0.37,
"learning_rate": 1.9952295187430144e-06,
"loss": 3.9149,
"step": 164
},
{
"epoch": 0.38,
"learning_rate": 1.9949882150560254e-06,
"loss": 4.4385,
"step": 166
},
{
"epoch": 0.38,
"learning_rate": 1.994740973432809e-06,
"loss": 4.3416,
"step": 168
},
{
"epoch": 0.38,
"learning_rate": 1.994487795348863e-06,
"loss": 4.5966,
"step": 170
},
{
"epoch": 0.39,
"learning_rate": 1.994228682315117e-06,
"loss": 4.2079,
"step": 172
},
{
"epoch": 0.39,
"learning_rate": 1.993963635877917e-06,
"loss": 4.3397,
"step": 174
},
{
"epoch": 0.4,
"learning_rate": 1.993692657619019e-06,
"loss": 4.2443,
"step": 176
},
{
"epoch": 0.4,
"learning_rate": 1.9934157491555797e-06,
"loss": 3.8927,
"step": 178
},
{
"epoch": 0.41,
"learning_rate": 1.9931329121401462e-06,
"loss": 4.0237,
"step": 180
},
{
"epoch": 0.41,
"learning_rate": 1.9928441482606463e-06,
"loss": 4.3192,
"step": 182
},
{
"epoch": 0.42,
"learning_rate": 1.992549459240378e-06,
"loss": 3.5739,
"step": 184
},
{
"epoch": 0.42,
"learning_rate": 1.992248846838e-06,
"loss": 4.0837,
"step": 186
},
{
"epoch": 0.43,
"learning_rate": 1.9919423128475205e-06,
"loss": 3.9352,
"step": 188
},
{
"epoch": 0.43,
"learning_rate": 1.9916298590982877e-06,
"loss": 4.2776,
"step": 190
},
{
"epoch": 0.43,
"learning_rate": 1.991311487454977e-06,
"loss": 3.9825,
"step": 192
},
{
"epoch": 0.44,
"learning_rate": 1.9909871998175815e-06,
"loss": 3.5599,
"step": 194
},
{
"epoch": 0.44,
"learning_rate": 1.9906569981213996e-06,
"loss": 3.8652,
"step": 196
},
{
"epoch": 0.45,
"learning_rate": 1.990320884337024e-06,
"loss": 4.0849,
"step": 198
},
{
"epoch": 0.45,
"learning_rate": 1.9899788604703303e-06,
"loss": 3.9656,
"step": 200
},
{
"epoch": 0.46,
"learning_rate": 1.989630928562464e-06,
"loss": 3.6045,
"step": 202
},
{
"epoch": 0.46,
"learning_rate": 1.989277090689829e-06,
"loss": 3.4662,
"step": 204
},
{
"epoch": 0.47,
"learning_rate": 1.988917348964075e-06,
"loss": 3.6883,
"step": 206
},
{
"epoch": 0.47,
"learning_rate": 1.9885517055320857e-06,
"loss": 4.3033,
"step": 208
},
{
"epoch": 0.47,
"learning_rate": 1.988180162575964e-06,
"loss": 3.9864,
"step": 210
},
{
"epoch": 0.48,
"learning_rate": 1.9878027223130216e-06,
"loss": 3.7801,
"step": 212
},
{
"epoch": 0.48,
"learning_rate": 1.9874193869957635e-06,
"loss": 3.8201,
"step": 214
},
{
"epoch": 0.49,
"learning_rate": 1.9870301589118756e-06,
"loss": 3.5885,
"step": 216
},
{
"epoch": 0.49,
"learning_rate": 1.986635040384211e-06,
"loss": 3.4567,
"step": 218
},
{
"epoch": 0.5,
"learning_rate": 1.986234033770777e-06,
"loss": 3.8317,
"step": 220
},
{
"epoch": 0.5,
"learning_rate": 1.9858271414647185e-06,
"loss": 3.8597,
"step": 222
},
{
"epoch": 0.51,
"learning_rate": 1.985414365894306e-06,
"loss": 3.9962,
"step": 224
},
{
"epoch": 0.51,
"learning_rate": 1.9849957095229205e-06,
"loss": 3.8344,
"step": 226
},
{
"epoch": 0.52,
"learning_rate": 1.984571174849039e-06,
"loss": 4.2405,
"step": 228
},
{
"epoch": 0.52,
"learning_rate": 1.9841407644062186e-06,
"loss": 3.6655,
"step": 230
},
{
"epoch": 0.52,
"learning_rate": 1.9837044807630826e-06,
"loss": 4.4589,
"step": 232
},
{
"epoch": 0.53,
"learning_rate": 1.9832623265233046e-06,
"loss": 3.8763,
"step": 234
},
{
"epoch": 0.53,
"learning_rate": 1.9828143043255924e-06,
"loss": 4.6417,
"step": 236
},
{
"epoch": 0.54,
"learning_rate": 1.9823604168436743e-06,
"loss": 4.3164,
"step": 238
},
{
"epoch": 0.54,
"learning_rate": 1.98190066678628e-06,
"loss": 3.9327,
"step": 240
},
{
"epoch": 0.55,
"learning_rate": 1.9814350568971276e-06,
"loss": 3.7355,
"step": 242
},
{
"epoch": 0.55,
"learning_rate": 1.9809635899549044e-06,
"loss": 3.8522,
"step": 244
},
{
"epoch": 0.56,
"learning_rate": 1.9804862687732527e-06,
"loss": 3.8598,
"step": 246
},
{
"epoch": 0.56,
"learning_rate": 1.980003096200752e-06,
"loss": 3.558,
"step": 248
},
{
"epoch": 0.57,
"learning_rate": 1.9795140751209014e-06,
"loss": 3.4079,
"step": 250
},
{
"epoch": 0.57,
"learning_rate": 1.9790192084521032e-06,
"loss": 4.322,
"step": 252
},
{
"epoch": 0.57,
"learning_rate": 1.9785184991476455e-06,
"loss": 3.9125,
"step": 254
},
{
"epoch": 0.58,
"learning_rate": 1.978011950195685e-06,
"loss": 3.7043,
"step": 256
},
{
"epoch": 0.58,
"learning_rate": 1.977499564619226e-06,
"loss": 3.6562,
"step": 258
},
{
"epoch": 0.59,
"learning_rate": 1.9769813454761078e-06,
"loss": 3.8608,
"step": 260
},
{
"epoch": 0.59,
"learning_rate": 1.976457295858982e-06,
"loss": 4.3005,
"step": 262
},
{
"epoch": 0.6,
"learning_rate": 1.975927418895296e-06,
"loss": 3.3468,
"step": 264
},
{
"epoch": 0.6,
"learning_rate": 1.975391717747274e-06,
"loss": 3.7217,
"step": 266
},
{
"epoch": 0.61,
"learning_rate": 1.9748501956118977e-06,
"loss": 3.1169,
"step": 268
},
{
"epoch": 0.61,
"learning_rate": 1.9743028557208875e-06,
"loss": 3.6231,
"step": 270
},
{
"epoch": 0.62,
"learning_rate": 1.973749701340684e-06,
"loss": 4.0356,
"step": 272
},
{
"epoch": 0.62,
"learning_rate": 1.9731907357724263e-06,
"loss": 3.749,
"step": 274
},
{
"epoch": 0.62,
"learning_rate": 1.9726259623519353e-06,
"loss": 3.6537,
"step": 276
},
{
"epoch": 0.63,
"learning_rate": 1.9720553844496916e-06,
"loss": 3.4349,
"step": 278
},
{
"epoch": 0.63,
"learning_rate": 1.9714790054708166e-06,
"loss": 4.0901,
"step": 280
},
{
"epoch": 0.64,
"learning_rate": 1.9708968288550505e-06,
"loss": 3.46,
"step": 282
},
{
"epoch": 0.64,
"learning_rate": 1.9703088580767337e-06,
"loss": 4.2503,
"step": 284
},
{
"epoch": 0.65,
"learning_rate": 1.969715096644785e-06,
"loss": 4.1053,
"step": 286
},
{
"epoch": 0.65,
"learning_rate": 1.9691155481026816e-06,
"loss": 4.0205,
"step": 288
},
{
"epoch": 0.66,
"learning_rate": 1.9685102160284363e-06,
"loss": 3.708,
"step": 290
},
{
"epoch": 0.66,
"learning_rate": 1.9678991040345775e-06,
"loss": 3.3928,
"step": 292
},
{
"epoch": 0.66,
"learning_rate": 1.9672822157681278e-06,
"loss": 4.2508,
"step": 294
},
{
"epoch": 0.67,
"learning_rate": 1.966659554910581e-06,
"loss": 3.5589,
"step": 296
},
{
"epoch": 0.67,
"learning_rate": 1.9660311251778808e-06,
"loss": 4.1245,
"step": 298
},
{
"epoch": 0.68,
"learning_rate": 1.9653969303203995e-06,
"loss": 3.8555,
"step": 300
},
{
"epoch": 0.68,
"learning_rate": 1.964756974122913e-06,
"loss": 3.594,
"step": 302
},
{
"epoch": 0.69,
"learning_rate": 1.964111260404583e-06,
"loss": 3.627,
"step": 304
},
{
"epoch": 0.69,
"learning_rate": 1.9634597930189283e-06,
"loss": 3.5641,
"step": 306
},
{
"epoch": 0.7,
"learning_rate": 1.962802575853806e-06,
"loss": 3.8781,
"step": 308
},
{
"epoch": 0.7,
"learning_rate": 1.962139612831387e-06,
"loss": 3.3285,
"step": 310
},
{
"epoch": 0.71,
"learning_rate": 1.9614709079081313e-06,
"loss": 3.5192,
"step": 312
},
{
"epoch": 0.71,
"learning_rate": 1.9607964650747686e-06,
"loss": 3.9396,
"step": 314
},
{
"epoch": 0.71,
"learning_rate": 1.960116288356268e-06,
"loss": 3.5475,
"step": 316
},
{
"epoch": 0.72,
"learning_rate": 1.9594303818118203e-06,
"loss": 3.8398,
"step": 318
},
{
"epoch": 0.72,
"learning_rate": 1.9587387495348097e-06,
"loss": 3.7307,
"step": 320
},
{
"epoch": 0.73,
"learning_rate": 1.958041395652791e-06,
"loss": 3.5379,
"step": 322
},
{
"epoch": 0.73,
"learning_rate": 1.957338324327464e-06,
"loss": 4.157,
"step": 324
},
{
"epoch": 0.74,
"learning_rate": 1.956629539754651e-06,
"loss": 3.4495,
"step": 326
},
{
"epoch": 0.74,
"learning_rate": 1.9559150461642684e-06,
"loss": 3.6444,
"step": 328
},
{
"epoch": 0.75,
"learning_rate": 1.9551948478203044e-06,
"loss": 3.3748,
"step": 330
},
{
"epoch": 0.75,
"learning_rate": 1.9544689490207904e-06,
"loss": 3.8947,
"step": 332
},
{
"epoch": 0.76,
"learning_rate": 1.9537373540977796e-06,
"loss": 3.3734,
"step": 334
},
{
"epoch": 0.76,
"learning_rate": 1.9530000674173176e-06,
"loss": 3.5926,
"step": 336
},
{
"epoch": 0.76,
"learning_rate": 1.952257093379417e-06,
"loss": 3.706,
"step": 338
},
{
"epoch": 0.77,
"learning_rate": 1.9515084364180328e-06,
"loss": 3.6486,
"step": 340
},
{
"epoch": 0.77,
"learning_rate": 1.950754101001034e-06,
"loss": 3.7235,
"step": 342
},
{
"epoch": 0.78,
"learning_rate": 1.9499940916301783e-06,
"loss": 4.0214,
"step": 344
},
{
"epoch": 0.78,
"learning_rate": 1.9492284128410843e-06,
"loss": 3.2606,
"step": 346
},
{
"epoch": 0.79,
"learning_rate": 1.9484570692032046e-06,
"loss": 3.3426,
"step": 348
},
{
"epoch": 0.79,
"learning_rate": 1.9476800653197994e-06,
"loss": 3.8355,
"step": 350
},
{
"epoch": 0.8,
"learning_rate": 1.9468974058279084e-06,
"loss": 3.0529,
"step": 352
},
{
"epoch": 0.8,
"learning_rate": 1.9461090953983223e-06,
"loss": 3.5752,
"step": 354
},
{
"epoch": 0.8,
"learning_rate": 1.945315138735556e-06,
"loss": 4.0122,
"step": 356
},
{
"epoch": 0.81,
"learning_rate": 1.944515540577821e-06,
"loss": 2.9584,
"step": 358
},
{
"epoch": 0.81,
"learning_rate": 1.9437103056969956e-06,
"loss": 3.0343,
"step": 360
},
{
"epoch": 0.82,
"learning_rate": 1.9428994388985973e-06,
"loss": 3.2726,
"step": 362
},
{
"epoch": 0.82,
"learning_rate": 1.942082945021754e-06,
"loss": 3.6547,
"step": 364
},
{
"epoch": 0.83,
"learning_rate": 1.941260828939175e-06,
"loss": 3.528,
"step": 366
},
{
"epoch": 0.83,
"learning_rate": 1.940433095557123e-06,
"loss": 3.4337,
"step": 368
},
{
"epoch": 0.84,
"learning_rate": 1.939599749815383e-06,
"loss": 3.5137,
"step": 370
},
{
"epoch": 0.84,
"learning_rate": 1.938760796687234e-06,
"loss": 3.4036,
"step": 372
},
{
"epoch": 0.85,
"learning_rate": 1.9379162411794177e-06,
"loss": 3.1954,
"step": 374
},
{
"epoch": 0.85,
"learning_rate": 1.9370660883321126e-06,
"loss": 3.665,
"step": 376
},
{
"epoch": 0.85,
"learning_rate": 1.936210343218899e-06,
"loss": 3.2216,
"step": 378
},
{
"epoch": 0.86,
"learning_rate": 1.9353490109467317e-06,
"loss": 3.5219,
"step": 380
},
{
"epoch": 0.86,
"learning_rate": 1.93448209665591e-06,
"loss": 3.4054,
"step": 382
},
{
"epoch": 0.87,
"learning_rate": 1.933609605520043e-06,
"loss": 3.2842,
"step": 384
},
{
"epoch": 0.87,
"learning_rate": 1.9327315427460243e-06,
"loss": 3.6067,
"step": 386
},
{
"epoch": 0.88,
"learning_rate": 1.9318479135739977e-06,
"loss": 2.9899,
"step": 388
},
{
"epoch": 0.88,
"learning_rate": 1.930958723277324e-06,
"loss": 3.375,
"step": 390
},
{
"epoch": 0.89,
"learning_rate": 1.9300639771625554e-06,
"loss": 3.213,
"step": 392
},
{
"epoch": 0.89,
"learning_rate": 1.929163680569398e-06,
"loss": 3.3821,
"step": 394
},
{
"epoch": 0.9,
"learning_rate": 1.9282578388706834e-06,
"loss": 3.3247,
"step": 396
},
{
"epoch": 0.9,
"learning_rate": 1.927346457472335e-06,
"loss": 3.6484,
"step": 398
},
{
"epoch": 0.9,
"learning_rate": 1.926429541813336e-06,
"loss": 2.9465,
"step": 400
},
{
"epoch": 0.91,
"learning_rate": 1.9255070973656983e-06,
"loss": 4.158,
"step": 402
},
{
"epoch": 0.91,
"learning_rate": 1.924579129634428e-06,
"loss": 3.193,
"step": 404
},
{
"epoch": 0.92,
"learning_rate": 1.9236456441574927e-06,
"loss": 3.3612,
"step": 406
},
{
"epoch": 0.92,
"learning_rate": 1.922706646505791e-06,
"loss": 3.2942,
"step": 408
},
{
"epoch": 0.93,
"learning_rate": 1.9217621422831153e-06,
"loss": 3.1285,
"step": 410
},
{
"epoch": 0.93,
"learning_rate": 1.9208121371261213e-06,
"loss": 3.6443,
"step": 412
},
{
"epoch": 0.94,
"learning_rate": 1.919856636704293e-06,
"loss": 3.7119,
"step": 414
},
{
"epoch": 0.94,
"learning_rate": 1.91889564671991e-06,
"loss": 3.3598,
"step": 416
},
{
"epoch": 0.95,
"learning_rate": 1.917929172908012e-06,
"loss": 3.5273,
"step": 418
},
{
"epoch": 0.95,
"learning_rate": 1.916957221036366e-06,
"loss": 3.6511,
"step": 420
},
{
"epoch": 0.95,
"learning_rate": 1.9159797969054307e-06,
"loss": 3.0714,
"step": 422
},
{
"epoch": 0.96,
"learning_rate": 1.9149969063483223e-06,
"loss": 3.094,
"step": 424
},
{
"epoch": 0.96,
"learning_rate": 1.914008555230781e-06,
"loss": 3.3933,
"step": 426
},
{
"epoch": 0.97,
"learning_rate": 1.9130147494511325e-06,
"loss": 3.3078,
"step": 428
},
{
"epoch": 0.97,
"learning_rate": 1.9120154949402576e-06,
"loss": 3.474,
"step": 430
},
{
"epoch": 0.98,
"learning_rate": 1.9110107976615527e-06,
"loss": 3.2673,
"step": 432
},
{
"epoch": 0.98,
"learning_rate": 1.910000663610896e-06,
"loss": 3.2293,
"step": 434
},
{
"epoch": 0.99,
"learning_rate": 1.908985098816612e-06,
"loss": 3.6537,
"step": 436
},
{
"epoch": 0.99,
"learning_rate": 1.9079641093394344e-06,
"loss": 3.3056,
"step": 438
},
{
"epoch": 0.99,
"learning_rate": 1.9069377012724716e-06,
"loss": 3.0124,
"step": 440
},
{
"epoch": 1.0,
"learning_rate": 1.9059058807411686e-06,
"loss": 3.5076,
"step": 442
},
{
"epoch": 1.0,
"learning_rate": 1.9048686539032705e-06,
"loss": 2.8775,
"step": 444
},
{
"epoch": 1.01,
"learning_rate": 1.9038260269487875e-06,
"loss": 3.7755,
"step": 446
},
{
"epoch": 1.01,
"learning_rate": 1.902778006099957e-06,
"loss": 3.6773,
"step": 448
},
{
"epoch": 1.02,
"learning_rate": 1.9017245976112051e-06,
"loss": 3.3806,
"step": 450
},
{
"epoch": 1.02,
"learning_rate": 1.900665807769111e-06,
"loss": 3.0561,
"step": 452
},
{
"epoch": 1.03,
"learning_rate": 1.8996016428923704e-06,
"loss": 3.4006,
"step": 454
},
{
"epoch": 1.03,
"learning_rate": 1.898532109331754e-06,
"loss": 3.1419,
"step": 456
},
{
"epoch": 1.04,
"learning_rate": 1.8974572134700738e-06,
"loss": 3.3714,
"step": 458
},
{
"epoch": 1.04,
"learning_rate": 1.896376961722142e-06,
"loss": 3.3205,
"step": 460
},
{
"epoch": 1.04,
"learning_rate": 1.8952913605347352e-06,
"loss": 3.2149,
"step": 462
},
{
"epoch": 1.05,
"learning_rate": 1.894200416386553e-06,
"loss": 3.4137,
"step": 464
},
{
"epoch": 1.05,
"learning_rate": 1.8931041357881826e-06,
"loss": 3.5795,
"step": 466
},
{
"epoch": 1.06,
"learning_rate": 1.892002525282057e-06,
"loss": 3.3404,
"step": 468
},
{
"epoch": 1.06,
"learning_rate": 1.8908955914424182e-06,
"loss": 3.477,
"step": 470
},
{
"epoch": 1.07,
"learning_rate": 1.8897833408752766e-06,
"loss": 3.4584,
"step": 472
},
{
"epoch": 1.07,
"learning_rate": 1.8886657802183718e-06,
"loss": 3.1697,
"step": 474
},
{
"epoch": 1.08,
"learning_rate": 1.8875429161411339e-06,
"loss": 3.2975,
"step": 476
},
{
"epoch": 1.08,
"learning_rate": 1.886414755344642e-06,
"loss": 3.3685,
"step": 478
},
{
"epoch": 1.09,
"learning_rate": 1.8852813045615861e-06,
"loss": 3.0643,
"step": 480
},
{
"epoch": 1.09,
"learning_rate": 1.8841425705562255e-06,
"loss": 2.9802,
"step": 482
},
{
"epoch": 1.09,
"learning_rate": 1.8829985601243494e-06,
"loss": 3.6372,
"step": 484
},
{
"epoch": 1.1,
"learning_rate": 1.8818492800932354e-06,
"loss": 3.0764,
"step": 486
},
{
"epoch": 1.1,
"learning_rate": 1.8806947373216099e-06,
"loss": 3.1796,
"step": 488
},
{
"epoch": 1.11,
"learning_rate": 1.8795349386996057e-06,
"loss": 3.0618,
"step": 490
},
{
"epoch": 1.11,
"learning_rate": 1.8783698911487219e-06,
"loss": 3.275,
"step": 492
},
{
"epoch": 1.12,
"learning_rate": 1.877199601621783e-06,
"loss": 3.2729,
"step": 494
},
{
"epoch": 1.12,
"learning_rate": 1.8760240771028966e-06,
"loss": 3.0195,
"step": 496
},
{
"epoch": 1.13,
"learning_rate": 1.8748433246074114e-06,
"loss": 3.0746,
"step": 498
},
{
"epoch": 1.13,
"learning_rate": 1.8736573511818763e-06,
"loss": 3.3867,
"step": 500
},
{
"epoch": 1.14,
"learning_rate": 1.872466163903998e-06,
"loss": 3.4967,
"step": 502
},
{
"epoch": 1.14,
"learning_rate": 1.871269769882598e-06,
"loss": 3.3825,
"step": 504
},
{
"epoch": 1.14,
"learning_rate": 1.8700681762575718e-06,
"loss": 3.0541,
"step": 506
},
{
"epoch": 1.15,
"learning_rate": 1.8688613901998449e-06,
"loss": 2.9684,
"step": 508
},
{
"epoch": 1.15,
"learning_rate": 1.8676494189113302e-06,
"loss": 3.6065,
"step": 510
},
{
"epoch": 1.16,
"learning_rate": 1.866432269624885e-06,
"loss": 2.6745,
"step": 512
},
{
"epoch": 1.16,
"learning_rate": 1.8652099496042692e-06,
"loss": 2.8921,
"step": 514
},
{
"epoch": 1.17,
"learning_rate": 1.8639824661441002e-06,
"loss": 3.4221,
"step": 516
},
{
"epoch": 1.17,
"learning_rate": 1.8627498265698093e-06,
"loss": 3.1598,
"step": 518
},
{
"epoch": 1.18,
"learning_rate": 1.8615120382375999e-06,
"loss": 3.304,
"step": 520
},
{
"epoch": 1.18,
"learning_rate": 1.8602691085344022e-06,
"loss": 3.6469,
"step": 522
},
{
"epoch": 1.18,
"learning_rate": 1.8590210448778285e-06,
"loss": 3.029,
"step": 524
},
{
"epoch": 1.19,
"learning_rate": 1.8577678547161302e-06,
"loss": 3.1291,
"step": 526
},
{
"epoch": 1.19,
"learning_rate": 1.8565095455281531e-06,
"loss": 3.1253,
"step": 528
},
{
"epoch": 1.2,
"learning_rate": 1.8552461248232926e-06,
"loss": 3.2768,
"step": 530
},
{
"epoch": 1.2,
"learning_rate": 1.8539776001414484e-06,
"loss": 3.3853,
"step": 532
},
{
"epoch": 1.21,
"learning_rate": 1.85270397905298e-06,
"loss": 2.9475,
"step": 534
},
{
"epoch": 1.21,
"learning_rate": 1.8514252691586618e-06,
"loss": 3.0003,
"step": 536
},
{
"epoch": 1.22,
"learning_rate": 1.8501414780896375e-06,
"loss": 3.3764,
"step": 538
},
{
"epoch": 1.22,
"learning_rate": 1.8488526135073742e-06,
"loss": 3.1691,
"step": 540
},
{
"epoch": 1.23,
"learning_rate": 1.8475586831036163e-06,
"loss": 2.9825,
"step": 542
},
{
"epoch": 1.23,
"learning_rate": 1.8462596946003415e-06,
"loss": 3.3984,
"step": 544
},
{
"epoch": 1.23,
"learning_rate": 1.844955655749713e-06,
"loss": 3.0431,
"step": 546
},
{
"epoch": 1.24,
"learning_rate": 1.8436465743340334e-06,
"loss": 3.3784,
"step": 548
},
{
"epoch": 1.24,
"learning_rate": 1.8423324581656982e-06,
"loss": 3.6729,
"step": 550
},
{
"epoch": 1.25,
"learning_rate": 1.8410133150871507e-06,
"loss": 3.1924,
"step": 552
},
{
"epoch": 1.25,
"learning_rate": 1.8396891529708331e-06,
"loss": 3.4732,
"step": 554
},
{
"epoch": 1.26,
"learning_rate": 1.838359979719141e-06,
"loss": 3.0055,
"step": 556
},
{
"epoch": 1.26,
"learning_rate": 1.8370258032643751e-06,
"loss": 3.1568,
"step": 558
},
{
"epoch": 1.27,
"learning_rate": 1.8356866315686945e-06,
"loss": 3.1793,
"step": 560
},
{
"epoch": 1.27,
"learning_rate": 1.83434247262407e-06,
"loss": 3.0726,
"step": 562
},
{
"epoch": 1.28,
"learning_rate": 1.832993334452234e-06,
"loss": 3.269,
"step": 564
},
{
"epoch": 1.28,
"learning_rate": 1.8316392251046362e-06,
"loss": 3.0972,
"step": 566
},
{
"epoch": 1.28,
"learning_rate": 1.8302801526623915e-06,
"loss": 3.6254,
"step": 568
},
{
"epoch": 1.29,
"learning_rate": 1.8289161252362349e-06,
"loss": 3.1806,
"step": 570
},
{
"epoch": 1.29,
"learning_rate": 1.8275471509664718e-06,
"loss": 3.1242,
"step": 572
},
{
"epoch": 1.3,
"learning_rate": 1.8261732380229295e-06,
"loss": 3.3653,
"step": 574
},
{
"epoch": 1.3,
"learning_rate": 1.8247943946049084e-06,
"loss": 2.9998,
"step": 576
},
{
"epoch": 1.31,
"learning_rate": 1.8234106289411336e-06,
"loss": 2.9557,
"step": 578
},
{
"epoch": 1.31,
"learning_rate": 1.8220219492897052e-06,
"loss": 2.9404,
"step": 580
},
{
"epoch": 1.32,
"learning_rate": 1.820628363938049e-06,
"loss": 2.9931,
"step": 582
},
{
"epoch": 1.32,
"learning_rate": 1.8192298812028677e-06,
"loss": 2.8562,
"step": 584
},
{
"epoch": 1.33,
"learning_rate": 1.8178265094300904e-06,
"loss": 3.688,
"step": 586
},
{
"epoch": 1.33,
"learning_rate": 1.8164182569948237e-06,
"loss": 3.6602,
"step": 588
},
{
"epoch": 1.33,
"learning_rate": 1.8150051323013006e-06,
"loss": 3.111,
"step": 590
},
{
"epoch": 1.34,
"learning_rate": 1.8135871437828316e-06,
"loss": 3.4345,
"step": 592
},
{
"epoch": 1.34,
"learning_rate": 1.8121642999017534e-06,
"loss": 2.7353,
"step": 594
},
{
"epoch": 1.35,
"learning_rate": 1.8107366091493791e-06,
"loss": 2.9942,
"step": 596
},
{
"epoch": 1.35,
"learning_rate": 1.8100209488579017e-06,
"loss": 3.6281,
"step": 598
},
{
"epoch": 1.36,
"learning_rate": 1.8085860037830547e-06,
"loss": 2.9742,
"step": 600
},
{
"epoch": 1.36,
"learning_rate": 1.807146233191631e-06,
"loss": 3.1228,
"step": 602
},
{
"epoch": 1.37,
"learning_rate": 1.8057016456759593e-06,
"loss": 2.8459,
"step": 604
},
{
"epoch": 1.37,
"learning_rate": 1.804252249857115e-06,
"loss": 3.2035,
"step": 606
},
{
"epoch": 1.37,
"learning_rate": 1.802798054384869e-06,
"loss": 2.8832,
"step": 608
},
{
"epoch": 1.38,
"learning_rate": 1.8013390679376354e-06,
"loss": 3.0979,
"step": 610
},
{
"epoch": 1.38,
"learning_rate": 1.7998752992224207e-06,
"loss": 3.3985,
"step": 612
},
{
"epoch": 1.39,
"learning_rate": 1.7984067569747706e-06,
"loss": 2.9677,
"step": 614
},
{
"epoch": 1.39,
"learning_rate": 1.7969334499587187e-06,
"loss": 3.2882,
"step": 616
},
{
"epoch": 1.4,
"learning_rate": 1.7954553869667341e-06,
"loss": 2.8713,
"step": 618
},
{
"epoch": 1.4,
"learning_rate": 1.7939725768196693e-06,
"loss": 3.0123,
"step": 620
},
{
"epoch": 1.41,
"learning_rate": 1.7924850283667059e-06,
"loss": 3.2748,
"step": 622
},
{
"epoch": 1.41,
"learning_rate": 1.7909927504853038e-06,
"loss": 3.1904,
"step": 624
},
{
"epoch": 1.42,
"learning_rate": 1.7894957520811475e-06,
"loss": 3.0524,
"step": 626
},
{
"epoch": 1.42,
"learning_rate": 1.7879940420880928e-06,
"loss": 3.4434,
"step": 628
},
{
"epoch": 1.42,
"learning_rate": 1.7864876294681128e-06,
"loss": 3.3113,
"step": 630
},
{
"epoch": 1.43,
"learning_rate": 1.7849765232112461e-06,
"loss": 2.9968,
"step": 632
},
{
"epoch": 1.43,
"learning_rate": 1.783460732335542e-06,
"loss": 3.321,
"step": 634
},
{
"epoch": 1.44,
"learning_rate": 1.7819402658870062e-06,
"loss": 2.4748,
"step": 636
},
{
"epoch": 1.44,
"learning_rate": 1.7804151329395478e-06,
"loss": 3.3042,
"step": 638
},
{
"epoch": 1.45,
"learning_rate": 1.7788853425949259e-06,
"loss": 3.0145,
"step": 640
},
{
"epoch": 1.45,
"learning_rate": 1.777350903982693e-06,
"loss": 2.8297,
"step": 642
},
{
"epoch": 1.46,
"learning_rate": 1.7758118262601415e-06,
"loss": 2.8803,
"step": 644
},
{
"epoch": 1.46,
"learning_rate": 1.7742681186122507e-06,
"loss": 3.0037,
"step": 646
},
{
"epoch": 1.47,
"learning_rate": 1.7727197902516295e-06,
"loss": 3.3117,
"step": 648
},
{
"epoch": 1.47,
"learning_rate": 1.7711668504184628e-06,
"loss": 3.0009,
"step": 650
},
{
"epoch": 1.47,
"learning_rate": 1.769609308380456e-06,
"loss": 3.4707,
"step": 652
},
{
"epoch": 1.48,
"learning_rate": 1.7680471734327798e-06,
"loss": 2.9016,
"step": 654
},
{
"epoch": 1.48,
"learning_rate": 1.7664804548980148e-06,
"loss": 2.9012,
"step": 656
},
{
"epoch": 1.49,
"learning_rate": 1.7649091621260955e-06,
"loss": 3.3625,
"step": 658
},
{
"epoch": 1.49,
"learning_rate": 1.7633333044942543e-06,
"loss": 2.8827,
"step": 660
},
{
"epoch": 1.5,
"learning_rate": 1.7617528914069677e-06,
"loss": 3.0527,
"step": 662
},
{
"epoch": 1.5,
"learning_rate": 1.7601679322958964e-06,
"loss": 3.2379,
"step": 664
},
{
"epoch": 1.51,
"learning_rate": 1.7585784366198321e-06,
"loss": 2.7934,
"step": 666
},
{
"epoch": 1.51,
"learning_rate": 1.7569844138646404e-06,
"loss": 3.009,
"step": 668
},
{
"epoch": 1.51,
"learning_rate": 1.7553858735432025e-06,
"loss": 3.4461,
"step": 670
},
{
"epoch": 1.52,
"learning_rate": 1.753782825195361e-06,
"loss": 2.8499,
"step": 672
},
{
"epoch": 1.52,
"learning_rate": 1.752175278387861e-06,
"loss": 2.9445,
"step": 674
},
{
"epoch": 1.53,
"learning_rate": 1.7505632427142945e-06,
"loss": 3.0315,
"step": 676
},
{
"epoch": 1.53,
"learning_rate": 1.7489467277950409e-06,
"loss": 3.0093,
"step": 678
},
{
"epoch": 1.54,
"learning_rate": 1.7473257432772126e-06,
"loss": 3.1375,
"step": 680
},
{
"epoch": 1.54,
"learning_rate": 1.745700298834595e-06,
"loss": 2.9345,
"step": 682
},
{
"epoch": 1.55,
"learning_rate": 1.7440704041675903e-06,
"loss": 3.6223,
"step": 684
},
{
"epoch": 1.55,
"learning_rate": 1.7424360690031576e-06,
"loss": 3.0581,
"step": 686
},
{
"epoch": 1.56,
"learning_rate": 1.7407973030947578e-06,
"loss": 3.6681,
"step": 688
},
{
"epoch": 1.56,
"learning_rate": 1.7391541162222932e-06,
"loss": 3.0931,
"step": 690
},
{
"epoch": 1.56,
"learning_rate": 1.7375065181920495e-06,
"loss": 2.9938,
"step": 692
},
{
"epoch": 1.57,
"learning_rate": 1.7358545188366373e-06,
"loss": 3.0917,
"step": 694
},
{
"epoch": 1.57,
"learning_rate": 1.7341981280149347e-06,
"loss": 3.0784,
"step": 696
},
{
"epoch": 1.58,
"learning_rate": 1.732537355612026e-06,
"loss": 3.2368,
"step": 698
},
{
"epoch": 1.58,
"learning_rate": 1.7308722115391453e-06,
"loss": 2.9081,
"step": 700
},
{
"epoch": 1.59,
"learning_rate": 1.7292027057336153e-06,
"loss": 3.2202,
"step": 702
},
{
"epoch": 1.59,
"learning_rate": 1.7275288481587893e-06,
"loss": 3.2725,
"step": 704
},
{
"epoch": 1.6,
"learning_rate": 1.7258506488039912e-06,
"loss": 2.7724,
"step": 706
},
{
"epoch": 1.6,
"learning_rate": 1.724168117684456e-06,
"loss": 3.2317,
"step": 708
},
{
"epoch": 1.61,
"learning_rate": 1.72248126484127e-06,
"loss": 3.4725,
"step": 710
},
{
"epoch": 1.61,
"learning_rate": 1.7207901003413109e-06,
"loss": 3.0466,
"step": 712
},
{
"epoch": 1.61,
"learning_rate": 1.719094634277187e-06,
"loss": 3.1728,
"step": 714
},
{
"epoch": 1.62,
"learning_rate": 1.717394876767179e-06,
"loss": 3.0119,
"step": 716
},
{
"epoch": 1.62,
"learning_rate": 1.7156908379551775e-06,
"loss": 3.1355,
"step": 718
},
{
"epoch": 1.63,
"learning_rate": 1.7139825280106232e-06,
"loss": 2.7676,
"step": 720
},
{
"epoch": 1.63,
"learning_rate": 1.7122699571284463e-06,
"loss": 3.3676,
"step": 722
},
{
"epoch": 1.64,
"learning_rate": 1.710553135529006e-06,
"loss": 3.1025,
"step": 724
},
{
"epoch": 1.64,
"learning_rate": 1.7088320734580287e-06,
"loss": 2.9067,
"step": 726
},
{
"epoch": 1.65,
"learning_rate": 1.7071067811865474e-06,
"loss": 2.7302,
"step": 728
},
{
"epoch": 1.65,
"learning_rate": 1.7053772690108406e-06,
"loss": 3.2192,
"step": 730
},
{
"epoch": 1.66,
"learning_rate": 1.7036435472523696e-06,
"loss": 2.9505,
"step": 732
},
{
"epoch": 1.66,
"learning_rate": 1.7019056262577193e-06,
"loss": 3.1462,
"step": 734
},
{
"epoch": 1.66,
"learning_rate": 1.7001635163985335e-06,
"loss": 2.8909,
"step": 736
},
{
"epoch": 1.67,
"learning_rate": 1.6984172280714554e-06,
"loss": 3.5523,
"step": 738
},
{
"epoch": 1.67,
"learning_rate": 1.6966667716980649e-06,
"loss": 2.8366,
"step": 740
},
{
"epoch": 1.68,
"learning_rate": 1.6949121577248154e-06,
"loss": 3.124,
"step": 742
},
{
"epoch": 1.68,
"learning_rate": 1.6931533966229718e-06,
"loss": 2.9826,
"step": 744
},
{
"epoch": 1.69,
"learning_rate": 1.6913904988885503e-06,
"loss": 2.8664,
"step": 746
},
{
"epoch": 1.69,
"learning_rate": 1.6896234750422517e-06,
"loss": 2.9879,
"step": 748
},
{
"epoch": 1.7,
"learning_rate": 1.6878523356294028e-06,
"loss": 2.722,
"step": 750
},
{
"epoch": 1.7,
"learning_rate": 1.6860770912198898e-06,
"loss": 3.1467,
"step": 752
},
{
"epoch": 1.7,
"learning_rate": 1.684297752408098e-06,
"loss": 2.9069,
"step": 754
},
{
"epoch": 1.71,
"learning_rate": 1.6825143298128463e-06,
"loss": 3.0215,
"step": 756
},
{
"epoch": 1.71,
"learning_rate": 1.6807268340773267e-06,
"loss": 3.0145,
"step": 758
},
{
"epoch": 1.72,
"learning_rate": 1.6789352758690368e-06,
"loss": 3.1656,
"step": 760
},
{
"epoch": 1.72,
"learning_rate": 1.6771396658797214e-06,
"loss": 3.0245,
"step": 762
},
{
"epoch": 1.73,
"learning_rate": 1.6753400148253027e-06,
"loss": 3.1808,
"step": 764
},
{
"epoch": 1.73,
"learning_rate": 1.6735363334458213e-06,
"loss": 3.6505,
"step": 766
},
{
"epoch": 1.74,
"learning_rate": 1.6717286325053692e-06,
"loss": 3.3838,
"step": 768
},
{
"epoch": 1.74,
"learning_rate": 1.669916922792027e-06,
"loss": 2.7696,
"step": 770
},
{
"epoch": 1.75,
"learning_rate": 1.6681012151177986e-06,
"loss": 2.8441,
"step": 772
},
{
"epoch": 1.75,
"learning_rate": 1.6662815203185478e-06,
"loss": 2.6182,
"step": 774
},
{
"epoch": 1.75,
"learning_rate": 1.6644578492539322e-06,
"loss": 2.7576,
"step": 776
},
{
"epoch": 1.76,
"learning_rate": 1.6626302128073402e-06,
"loss": 2.8683,
"step": 778
},
{
"epoch": 1.76,
"learning_rate": 1.6607986218858233e-06,
"loss": 2.9898,
"step": 780
},
{
"epoch": 1.77,
"learning_rate": 1.6589630874200343e-06,
"loss": 3.0682,
"step": 782
},
{
"epoch": 1.77,
"learning_rate": 1.6571236203641595e-06,
"loss": 2.7996,
"step": 784
},
{
"epoch": 1.78,
"learning_rate": 1.6552802316958543e-06,
"loss": 3.3134,
"step": 786
},
{
"epoch": 1.78,
"learning_rate": 1.653432932416179e-06,
"loss": 3.1492,
"step": 788
},
{
"epoch": 1.79,
"learning_rate": 1.65158173354953e-06,
"loss": 3.1127,
"step": 790
},
{
"epoch": 1.79,
"learning_rate": 1.6497266461435775e-06,
"loss": 2.9384,
"step": 792
},
{
"epoch": 1.8,
"learning_rate": 1.6478676812691959e-06,
"loss": 2.8595,
"step": 794
},
{
"epoch": 1.8,
"learning_rate": 1.6460048500204016e-06,
"loss": 2.6861,
"step": 796
},
{
"epoch": 1.8,
"learning_rate": 1.6441381635142842e-06,
"loss": 2.8855,
"step": 798
},
{
"epoch": 1.81,
"learning_rate": 1.6422676328909408e-06,
"loss": 3.2246,
"step": 800
},
{
"epoch": 1.81,
"learning_rate": 1.6403932693134096e-06,
"loss": 3.1151,
"step": 802
},
{
"epoch": 1.82,
"learning_rate": 1.6385150839676039e-06,
"loss": 3.106,
"step": 804
},
{
"epoch": 1.82,
"learning_rate": 1.6366330880622435e-06,
"loss": 2.726,
"step": 806
},
{
"epoch": 1.83,
"learning_rate": 1.63474729282879e-06,
"loss": 3.0861,
"step": 808
},
{
"epoch": 1.83,
"learning_rate": 1.6328577095213785e-06,
"loss": 2.9671,
"step": 810
},
{
"epoch": 1.84,
"learning_rate": 1.6309643494167509e-06,
"loss": 2.8445,
"step": 812
},
{
"epoch": 1.84,
"learning_rate": 1.6290672238141882e-06,
"loss": 2.864,
"step": 814
},
{
"epoch": 1.85,
"learning_rate": 1.627166344035444e-06,
"loss": 3.6071,
"step": 816
},
{
"epoch": 1.85,
"learning_rate": 1.6252617214246747e-06,
"loss": 2.987,
"step": 818
},
{
"epoch": 1.85,
"learning_rate": 1.623353367348375e-06,
"loss": 3.0725,
"step": 820
},
{
"epoch": 1.86,
"learning_rate": 1.6214412931953074e-06,
"loss": 2.8785,
"step": 822
},
{
"epoch": 1.86,
"learning_rate": 1.619525510376436e-06,
"loss": 2.6348,
"step": 824
},
{
"epoch": 1.87,
"learning_rate": 1.6176060303248569e-06,
"loss": 3.2379,
"step": 826
},
{
"epoch": 1.87,
"learning_rate": 1.6156828644957313e-06,
"loss": 3.2543,
"step": 828
},
{
"epoch": 1.88,
"learning_rate": 1.6137560243662156e-06,
"loss": 3.1434,
"step": 830
},
{
"epoch": 1.88,
"learning_rate": 1.6118255214353958e-06,
"loss": 3.2644,
"step": 832
},
{
"epoch": 1.89,
"learning_rate": 1.6098913672242142e-06,
"loss": 3.0826,
"step": 834
},
{
"epoch": 1.89,
"learning_rate": 1.607953573275406e-06,
"loss": 2.8396,
"step": 836
},
{
"epoch": 1.89,
"learning_rate": 1.6060121511534261e-06,
"loss": 2.8427,
"step": 838
},
{
"epoch": 1.9,
"learning_rate": 1.6040671124443824e-06,
"loss": 3.0612,
"step": 840
},
{
"epoch": 1.9,
"learning_rate": 1.6021184687559663e-06,
"loss": 2.8976,
"step": 842
},
{
"epoch": 1.91,
"learning_rate": 1.6001662317173826e-06,
"loss": 3.0533,
"step": 844
},
{
"epoch": 1.91,
"learning_rate": 1.598210412979281e-06,
"loss": 2.9777,
"step": 846
},
{
"epoch": 1.92,
"learning_rate": 1.5962510242136853e-06,
"loss": 3.2888,
"step": 848
},
{
"epoch": 1.92,
"learning_rate": 1.594288077113927e-06,
"loss": 3.1706,
"step": 850
},
{
"epoch": 1.93,
"learning_rate": 1.5923215833945704e-06,
"loss": 2.8492,
"step": 852
},
{
"epoch": 1.93,
"learning_rate": 1.5903515547913474e-06,
"loss": 3.0864,
"step": 854
},
{
"epoch": 1.94,
"learning_rate": 1.5883780030610847e-06,
"loss": 3.1081,
"step": 856
},
{
"epoch": 1.94,
"learning_rate": 1.5864009399816344e-06,
"loss": 3.5884,
"step": 858
},
{
"epoch": 1.94,
"learning_rate": 1.5844203773518045e-06,
"loss": 2.8961,
"step": 860
},
{
"epoch": 1.95,
"learning_rate": 1.5824363269912877e-06,
"loss": 3.2312,
"step": 862
},
{
"epoch": 1.95,
"learning_rate": 1.5804488007405898e-06,
"loss": 2.6319,
"step": 864
},
{
"epoch": 1.96,
"learning_rate": 1.5784578104609611e-06,
"loss": 3.2546,
"step": 866
},
{
"epoch": 1.96,
"learning_rate": 1.5764633680343251e-06,
"loss": 3.0429,
"step": 868
},
{
"epoch": 1.97,
"learning_rate": 1.574465485363206e-06,
"loss": 2.845,
"step": 870
},
{
"epoch": 1.97,
"learning_rate": 1.5724641743706598e-06,
"loss": 2.6803,
"step": 872
},
{
"epoch": 1.98,
"learning_rate": 1.570459447000202e-06,
"loss": 2.7251,
"step": 874
},
{
"epoch": 1.98,
"learning_rate": 1.5684513152157361e-06,
"loss": 2.8006,
"step": 876
},
{
"epoch": 1.99,
"learning_rate": 1.566439791001483e-06,
"loss": 2.8095,
"step": 878
},
{
"epoch": 1.99,
"learning_rate": 1.5644248863619089e-06,
"loss": 3.0018,
"step": 880
},
{
"epoch": 1.99,
"learning_rate": 1.5624066133216544e-06,
"loss": 2.8637,
"step": 882
},
{
"epoch": 2.0,
"learning_rate": 1.5603849839254614e-06,
"loss": 3.4833,
"step": 884
},
{
"epoch": 2.0,
"learning_rate": 1.5583600102381022e-06,
"loss": 2.8546,
"step": 886
},
{
"epoch": 2.01,
"learning_rate": 1.5563317043443081e-06,
"loss": 2.832,
"step": 888
},
{
"epoch": 2.01,
"learning_rate": 1.5543000783486955e-06,
"loss": 2.9643,
"step": 890
},
{
"epoch": 2.02,
"learning_rate": 1.5522651443756946e-06,
"loss": 3.2313,
"step": 892
},
{
"epoch": 2.02,
"learning_rate": 1.5502269145694787e-06,
"loss": 3.019,
"step": 894
},
{
"epoch": 2.03,
"learning_rate": 1.5481854010938878e-06,
"loss": 3.1545,
"step": 896
},
{
"epoch": 2.03,
"learning_rate": 1.5461406161323598e-06,
"loss": 2.829,
"step": 898
},
{
"epoch": 2.04,
"learning_rate": 1.5440925718878566e-06,
"loss": 3.1711,
"step": 900
},
{
"epoch": 2.04,
"learning_rate": 1.5420412805827898e-06,
"loss": 2.9604,
"step": 902
},
{
"epoch": 2.04,
"learning_rate": 1.5399867544589499e-06,
"loss": 2.7188,
"step": 904
},
{
"epoch": 2.05,
"learning_rate": 1.5379290057774316e-06,
"loss": 2.9056,
"step": 906
},
{
"epoch": 2.05,
"learning_rate": 1.5358680468185624e-06,
"loss": 3.2179,
"step": 908
},
{
"epoch": 2.06,
"learning_rate": 1.5338038898818266e-06,
"loss": 2.8448,
"step": 910
},
{
"epoch": 2.06,
"learning_rate": 1.5317365472857953e-06,
"loss": 2.9409,
"step": 912
},
{
"epoch": 2.07,
"learning_rate": 1.52966603136805e-06,
"loss": 3.0139,
"step": 914
},
{
"epoch": 2.07,
"learning_rate": 1.52759235448511e-06,
"loss": 3.0152,
"step": 916
},
{
"epoch": 2.08,
"learning_rate": 1.52551552901236e-06,
"loss": 2.956,
"step": 918
},
{
"epoch": 2.08,
"learning_rate": 1.5234355673439736e-06,
"loss": 3.0707,
"step": 920
},
{
"epoch": 2.08,
"learning_rate": 1.521352481892841e-06,
"loss": 2.6729,
"step": 922
},
{
"epoch": 2.09,
"learning_rate": 1.5192662850904955e-06,
"loss": 2.9812,
"step": 924
},
{
"epoch": 2.09,
"learning_rate": 1.5171769893870376e-06,
"loss": 2.9229,
"step": 926
},
{
"epoch": 2.1,
"learning_rate": 1.5150846072510623e-06,
"loss": 2.9769,
"step": 928
},
{
"epoch": 2.1,
"learning_rate": 1.512989151169583e-06,
"loss": 3.3119,
"step": 930
},
{
"epoch": 2.11,
"learning_rate": 1.5108906336479586e-06,
"loss": 2.8164,
"step": 932
},
{
"epoch": 2.11,
"learning_rate": 1.5087890672098182e-06,
"loss": 2.8443,
"step": 934
},
{
"epoch": 2.12,
"learning_rate": 1.5066844643969858e-06,
"loss": 2.9107,
"step": 936
},
{
"epoch": 2.12,
"learning_rate": 1.5045768377694067e-06,
"loss": 3.1381,
"step": 938
},
{
"epoch": 2.13,
"learning_rate": 1.502466199905072e-06,
"loss": 2.9275,
"step": 940
},
{
"epoch": 2.13,
"learning_rate": 1.5003525633999425e-06,
"loss": 2.986,
"step": 942
},
{
"epoch": 2.13,
"learning_rate": 1.498235940867875e-06,
"loss": 2.5823,
"step": 944
},
{
"epoch": 2.14,
"learning_rate": 1.4961163449405466e-06,
"loss": 2.6783,
"step": 946
},
{
"epoch": 2.14,
"learning_rate": 1.4939937882673782e-06,
"loss": 3.398,
"step": 948
},
{
"epoch": 2.15,
"learning_rate": 1.4918682835154614e-06,
"loss": 3.247,
"step": 950
},
{
"epoch": 2.15,
"learning_rate": 1.4897398433694805e-06,
"loss": 2.377,
"step": 952
},
{
"epoch": 2.16,
"learning_rate": 1.4876084805316388e-06,
"loss": 3.2205,
"step": 954
},
{
"epoch": 2.16,
"learning_rate": 1.4854742077215797e-06,
"loss": 3.2777,
"step": 956
},
{
"epoch": 2.17,
"learning_rate": 1.483337037676315e-06,
"loss": 2.829,
"step": 958
},
{
"epoch": 2.17,
"learning_rate": 1.4811969831501458e-06,
"loss": 2.7846,
"step": 960
},
{
"epoch": 2.18,
"learning_rate": 1.4790540569145878e-06,
"loss": 2.8359,
"step": 962
},
{
"epoch": 2.18,
"learning_rate": 1.476908271758294e-06,
"loss": 2.607,
"step": 964
},
{
"epoch": 2.18,
"learning_rate": 1.4747596404869791e-06,
"loss": 2.5559,
"step": 966
},
{
"epoch": 2.19,
"learning_rate": 1.4726081759233438e-06,
"loss": 2.6534,
"step": 968
},
{
"epoch": 2.19,
"learning_rate": 1.470453890906996e-06,
"loss": 2.8959,
"step": 970
},
{
"epoch": 2.2,
"learning_rate": 1.4682967982943772e-06,
"loss": 2.8039,
"step": 972
},
{
"epoch": 2.2,
"learning_rate": 1.4661369109586832e-06,
"loss": 3.3513,
"step": 974
},
{
"epoch": 2.21,
"learning_rate": 1.4639742417897885e-06,
"loss": 2.993,
"step": 976
},
{
"epoch": 2.21,
"learning_rate": 1.4618088036941693e-06,
"loss": 3.6406,
"step": 978
},
{
"epoch": 2.22,
"learning_rate": 1.4596406095948261e-06,
"loss": 2.7913,
"step": 980
},
{
"epoch": 2.22,
"learning_rate": 1.4574696724312068e-06,
"loss": 2.8517,
"step": 982
},
{
"epoch": 2.22,
"learning_rate": 1.4552960051591296e-06,
"loss": 3.2277,
"step": 984
},
{
"epoch": 2.23,
"learning_rate": 1.4531196207507064e-06,
"loss": 2.9813,
"step": 986
},
{
"epoch": 2.23,
"learning_rate": 1.4509405321942626e-06,
"loss": 2.8984,
"step": 988
},
{
"epoch": 2.24,
"learning_rate": 1.448758752494263e-06,
"loss": 2.8021,
"step": 990
},
{
"epoch": 2.24,
"learning_rate": 1.446574294671233e-06,
"loss": 3.1709,
"step": 992
},
{
"epoch": 2.25,
"learning_rate": 1.4443871717616794e-06,
"loss": 3.1339,
"step": 994
},
{
"epoch": 2.25,
"learning_rate": 1.4421973968180142e-06,
"loss": 2.6732,
"step": 996
},
{
"epoch": 2.26,
"learning_rate": 1.4400049829084772e-06,
"loss": 3.086,
"step": 998
},
{
"epoch": 2.26,
"learning_rate": 1.4378099431170561e-06,
"loss": 2.8538,
"step": 1000
},
{
"epoch": 2.27,
"learning_rate": 1.4356122905434106e-06,
"loss": 3.1565,
"step": 1002
},
{
"epoch": 2.27,
"learning_rate": 1.4334120383027911e-06,
"loss": 2.7204,
"step": 1004
},
{
"epoch": 2.27,
"learning_rate": 1.4312091995259647e-06,
"loss": 2.8988,
"step": 1006
},
{
"epoch": 2.28,
"learning_rate": 1.4290037873591332e-06,
"loss": 2.7425,
"step": 1008
},
{
"epoch": 2.28,
"learning_rate": 1.426795814963856e-06,
"loss": 2.9555,
"step": 1010
},
{
"epoch": 2.29,
"learning_rate": 1.4245852955169726e-06,
"loss": 2.6559,
"step": 1012
},
{
"epoch": 2.29,
"learning_rate": 1.4223722422105218e-06,
"loss": 2.7681,
"step": 1014
},
{
"epoch": 2.3,
"learning_rate": 1.420156668251664e-06,
"loss": 2.9471,
"step": 1016
},
{
"epoch": 2.3,
"learning_rate": 1.4179385868626035e-06,
"loss": 2.7926,
"step": 1018
},
{
"epoch": 2.31,
"learning_rate": 1.4157180112805075e-06,
"loss": 2.8494,
"step": 1020
},
{
"epoch": 2.31,
"learning_rate": 1.4134949547574292e-06,
"loss": 3.3081,
"step": 1022
},
{
"epoch": 2.32,
"learning_rate": 1.4112694305602263e-06,
"loss": 2.8013,
"step": 1024
},
{
"epoch": 2.32,
"learning_rate": 1.4090414519704851e-06,
"loss": 3.0064,
"step": 1026
},
{
"epoch": 2.32,
"learning_rate": 1.4068110322844375e-06,
"loss": 2.7133,
"step": 1028
},
{
"epoch": 2.33,
"learning_rate": 1.4045781848128845e-06,
"loss": 3.3862,
"step": 1030
},
{
"epoch": 2.33,
"learning_rate": 1.4023429228811152e-06,
"loss": 2.7249,
"step": 1032
},
{
"epoch": 2.34,
"learning_rate": 1.4001052598288282e-06,
"loss": 3.0071,
"step": 1034
},
{
"epoch": 2.34,
"learning_rate": 1.3978652090100518e-06,
"loss": 3.2306,
"step": 1036
},
{
"epoch": 2.35,
"learning_rate": 1.3956227837930636e-06,
"loss": 3.1295,
"step": 1038
},
{
"epoch": 2.35,
"learning_rate": 1.3933779975603112e-06,
"loss": 3.0307,
"step": 1040
},
{
"epoch": 2.36,
"learning_rate": 1.3911308637083334e-06,
"loss": 2.7784,
"step": 1042
},
{
"epoch": 2.36,
"learning_rate": 1.388881395647678e-06,
"loss": 2.8326,
"step": 1044
},
{
"epoch": 2.37,
"learning_rate": 1.3866296068028236e-06,
"loss": 3.0111,
"step": 1046
},
{
"epoch": 2.37,
"learning_rate": 1.3843755106120985e-06,
"loss": 2.8593,
"step": 1048
},
{
"epoch": 2.37,
"learning_rate": 1.3821191205276016e-06,
"loss": 2.6555,
"step": 1050
},
{
"epoch": 2.38,
"learning_rate": 1.3798604500151206e-06,
"loss": 2.645,
"step": 1052
},
{
"epoch": 2.38,
"learning_rate": 1.3775995125540532e-06,
"loss": 3.0032,
"step": 1054
},
{
"epoch": 2.39,
"learning_rate": 1.375336321637325e-06,
"loss": 3.0761,
"step": 1056
},
{
"epoch": 2.39,
"learning_rate": 1.3730708907713108e-06,
"loss": 2.8773,
"step": 1058
},
{
"epoch": 2.4,
"learning_rate": 1.3708032334757524e-06,
"loss": 2.5142,
"step": 1060
},
{
"epoch": 2.4,
"learning_rate": 1.368533363283679e-06,
"loss": 3.1279,
"step": 1062
},
{
"epoch": 2.41,
"learning_rate": 1.366261293741326e-06,
"loss": 2.7788,
"step": 1064
},
{
"epoch": 2.41,
"learning_rate": 1.3639870384080537e-06,
"loss": 3.0011,
"step": 1066
},
{
"epoch": 2.41,
"learning_rate": 1.3617106108562673e-06,
"loss": 2.8576,
"step": 1068
},
{
"epoch": 2.42,
"learning_rate": 1.3594320246713356e-06,
"loss": 2.6657,
"step": 1070
},
{
"epoch": 2.42,
"learning_rate": 1.3571512934515091e-06,
"loss": 2.8379,
"step": 1072
},
{
"epoch": 2.43,
"learning_rate": 1.3548684308078407e-06,
"loss": 2.7702,
"step": 1074
},
{
"epoch": 2.43,
"learning_rate": 1.352583450364102e-06,
"loss": 2.5082,
"step": 1076
},
{
"epoch": 2.44,
"learning_rate": 1.3502963657567041e-06,
"loss": 2.9535,
"step": 1078
},
{
"epoch": 2.44,
"learning_rate": 1.3480071906346156e-06,
"loss": 3.1248,
"step": 1080
},
{
"epoch": 2.45,
"learning_rate": 1.34571593865928e-06,
"loss": 2.939,
"step": 1082
},
{
"epoch": 2.45,
"learning_rate": 1.3434226235045362e-06,
"loss": 2.8517,
"step": 1084
},
{
"epoch": 2.46,
"learning_rate": 1.341127258856535e-06,
"loss": 2.7523,
"step": 1086
},
{
"epoch": 2.46,
"learning_rate": 1.3388298584136592e-06,
"loss": 2.7239,
"step": 1088
},
{
"epoch": 2.46,
"learning_rate": 1.3365304358864397e-06,
"loss": 2.8057,
"step": 1090
},
{
"epoch": 2.47,
"learning_rate": 1.3342290049974762e-06,
"loss": 2.571,
"step": 1092
},
{
"epoch": 2.47,
"learning_rate": 1.3319255794813528e-06,
"loss": 2.7409,
"step": 1094
},
{
"epoch": 2.48,
"learning_rate": 1.3296201730845582e-06,
"loss": 2.7012,
"step": 1096
},
{
"epoch": 2.48,
"learning_rate": 1.3273127995654014e-06,
"loss": 3.1777,
"step": 1098
},
{
"epoch": 2.49,
"learning_rate": 1.325003472693933e-06,
"loss": 2.9287,
"step": 1100
},
{
"epoch": 2.49,
"learning_rate": 1.322692206251859e-06,
"loss": 2.9347,
"step": 1102
},
{
"epoch": 2.5,
"learning_rate": 1.320379014032461e-06,
"loss": 3.1072,
"step": 1104
},
{
"epoch": 2.5,
"learning_rate": 1.3180639098405135e-06,
"loss": 3.2279,
"step": 1106
},
{
"epoch": 2.51,
"learning_rate": 1.3157469074922015e-06,
"loss": 2.9649,
"step": 1108
},
{
"epoch": 2.51,
"learning_rate": 1.3134280208150373e-06,
"loss": 2.9718,
"step": 1110
},
{
"epoch": 2.51,
"learning_rate": 1.3111072636477793e-06,
"loss": 2.5961,
"step": 1112
},
{
"epoch": 2.52,
"learning_rate": 1.3087846498403486e-06,
"loss": 3.1793,
"step": 1114
},
{
"epoch": 2.52,
"learning_rate": 1.3064601932537457e-06,
"loss": 2.8457,
"step": 1116
},
{
"epoch": 2.53,
"learning_rate": 1.3041339077599696e-06,
"loss": 2.8363,
"step": 1118
},
{
"epoch": 2.53,
"learning_rate": 1.3018058072419331e-06,
"loss": 2.5942,
"step": 1120
},
{
"epoch": 2.54,
"learning_rate": 1.2994759055933806e-06,
"loss": 2.8349,
"step": 1122
},
{
"epoch": 2.54,
"learning_rate": 1.2971442167188067e-06,
"loss": 2.8009,
"step": 1124
},
{
"epoch": 2.55,
"learning_rate": 1.2948107545333704e-06,
"loss": 2.6612,
"step": 1126
},
{
"epoch": 2.55,
"learning_rate": 1.2924755329628148e-06,
"loss": 3.0204,
"step": 1128
},
{
"epoch": 2.56,
"learning_rate": 1.2901385659433816e-06,
"loss": 3.2938,
"step": 1130
},
{
"epoch": 2.56,
"learning_rate": 1.2877998674217295e-06,
"loss": 2.6818,
"step": 1132
},
{
"epoch": 2.56,
"learning_rate": 1.2854594513548507e-06,
"loss": 2.7761,
"step": 1134
},
{
"epoch": 2.57,
"learning_rate": 1.283117331709988e-06,
"loss": 3.1311,
"step": 1136
},
{
"epoch": 2.57,
"learning_rate": 1.2807735224645495e-06,
"loss": 3.0373,
"step": 1138
},
{
"epoch": 2.58,
"learning_rate": 1.2784280376060274e-06,
"loss": 3.108,
"step": 1140
},
{
"epoch": 2.58,
"learning_rate": 1.2760808911319141e-06,
"loss": 2.7939,
"step": 1142
},
{
"epoch": 2.59,
"learning_rate": 1.2737320970496175e-06,
"loss": 2.8404,
"step": 1144
},
{
"epoch": 2.59,
"learning_rate": 1.2713816693763786e-06,
"loss": 2.9958,
"step": 1146
},
{
"epoch": 2.6,
"learning_rate": 1.2690296221391867e-06,
"loss": 2.832,
"step": 1148
},
{
"epoch": 2.6,
"learning_rate": 1.2666759693746978e-06,
"loss": 3.1871,
"step": 1150
},
{
"epoch": 2.6,
"learning_rate": 1.2643207251291484e-06,
"loss": 2.7197,
"step": 1152
},
{
"epoch": 2.61,
"learning_rate": 1.261963903458273e-06,
"loss": 2.8825,
"step": 1154
},
{
"epoch": 2.61,
"learning_rate": 1.2596055184272194e-06,
"loss": 2.6513,
"step": 1156
},
{
"epoch": 2.62,
"learning_rate": 1.2572455841104664e-06,
"loss": 2.6503,
"step": 1158
},
{
"epoch": 2.62,
"learning_rate": 1.2548841145917374e-06,
"loss": 2.9881,
"step": 1160
},
{
"epoch": 2.63,
"learning_rate": 1.252521123963919e-06,
"loss": 2.6832,
"step": 1162
},
{
"epoch": 2.63,
"learning_rate": 1.2501566263289744e-06,
"loss": 2.8204,
"step": 1164
},
{
"epoch": 2.64,
"learning_rate": 1.2477906357978614e-06,
"loss": 2.727,
"step": 1166
},
{
"epoch": 2.64,
"learning_rate": 1.2454231664904462e-06,
"loss": 3.0467,
"step": 1168
},
{
"epoch": 2.65,
"learning_rate": 1.2430542325354205e-06,
"loss": 3.0145,
"step": 1170
},
{
"epoch": 2.65,
"learning_rate": 1.2406838480702168e-06,
"loss": 3.2211,
"step": 1172
},
{
"epoch": 2.65,
"learning_rate": 1.2383120272409243e-06,
"loss": 2.8369,
"step": 1174
},
{
"epoch": 2.66,
"learning_rate": 1.2359387842022035e-06,
"loss": 2.5051,
"step": 1176
},
{
"epoch": 2.66,
"learning_rate": 1.2335641331172037e-06,
"loss": 3.3503,
"step": 1178
},
{
"epoch": 2.67,
"learning_rate": 1.2311880881574754e-06,
"loss": 3.0488,
"step": 1180
},
{
"epoch": 2.67,
"learning_rate": 1.2288106635028892e-06,
"loss": 2.9949,
"step": 1182
},
{
"epoch": 2.68,
"learning_rate": 1.2264318733415486e-06,
"loss": 2.6827,
"step": 1184
},
{
"epoch": 2.68,
"learning_rate": 1.2240517318697063e-06,
"loss": 2.903,
"step": 1186
},
{
"epoch": 2.69,
"learning_rate": 1.22167025329168e-06,
"loss": 3.3562,
"step": 1188
},
{
"epoch": 2.69,
"learning_rate": 1.2192874518197657e-06,
"loss": 2.7463,
"step": 1190
},
{
"epoch": 2.7,
"learning_rate": 1.2169033416741562e-06,
"loss": 2.9223,
"step": 1192
},
{
"epoch": 2.7,
"learning_rate": 1.2145179370828527e-06,
"loss": 2.8244,
"step": 1194
},
{
"epoch": 2.7,
"learning_rate": 1.2121312522815816e-06,
"loss": 2.5914,
"step": 1196
},
{
"epoch": 2.71,
"learning_rate": 1.20974330151371e-06,
"loss": 3.044,
"step": 1198
},
{
"epoch": 2.71,
"learning_rate": 1.2073540990301602e-06,
"loss": 3.1083,
"step": 1200
},
{
"epoch": 2.72,
"learning_rate": 1.204963659089324e-06,
"loss": 3.0048,
"step": 1202
},
{
"epoch": 2.72,
"learning_rate": 1.2025719959569781e-06,
"loss": 3.0108,
"step": 1204
},
{
"epoch": 2.73,
"learning_rate": 1.2001791239061994e-06,
"loss": 3.0631,
"step": 1206
},
{
"epoch": 2.73,
"learning_rate": 1.197785057217279e-06,
"loss": 3.1584,
"step": 1208
},
{
"epoch": 2.74,
"learning_rate": 1.1953898101776382e-06,
"loss": 3.051,
"step": 1210
},
{
"epoch": 2.74,
"learning_rate": 1.1929933970817416e-06,
"loss": 3.1265,
"step": 1212
},
{
"epoch": 2.75,
"learning_rate": 1.1905958322310132e-06,
"loss": 2.9612,
"step": 1214
},
{
"epoch": 2.75,
"learning_rate": 1.1881971299337502e-06,
"loss": 2.9475,
"step": 1216
},
{
"epoch": 2.75,
"learning_rate": 1.1857973045050382e-06,
"loss": 2.6938,
"step": 1218
},
{
"epoch": 2.76,
"learning_rate": 1.183396370266665e-06,
"loss": 3.1884,
"step": 1220
},
{
"epoch": 2.76,
"learning_rate": 1.1809943415470358e-06,
"loss": 2.7971,
"step": 1222
},
{
"epoch": 2.77,
"learning_rate": 1.1785912326810877e-06,
"loss": 2.8811,
"step": 1224
},
{
"epoch": 2.77,
"learning_rate": 1.1761870580102043e-06,
"loss": 3.1646,
"step": 1226
},
{
"epoch": 2.78,
"learning_rate": 1.1737818318821286e-06,
"loss": 2.8446,
"step": 1228
},
{
"epoch": 2.78,
"learning_rate": 1.1713755686508794e-06,
"loss": 3.2058,
"step": 1230
},
{
"epoch": 2.79,
"learning_rate": 1.1689682826766648e-06,
"loss": 2.7236,
"step": 1232
},
{
"epoch": 2.79,
"learning_rate": 1.1665599883257959e-06,
"loss": 2.9138,
"step": 1234
},
{
"epoch": 2.79,
"learning_rate": 1.1641506999706026e-06,
"loss": 3.158,
"step": 1236
},
{
"epoch": 2.8,
"learning_rate": 1.1617404319893458e-06,
"loss": 3.062,
"step": 1238
},
{
"epoch": 2.8,
"learning_rate": 1.1593291987661331e-06,
"loss": 3.1808,
"step": 1240
},
{
"epoch": 2.81,
"learning_rate": 1.1569170146908326e-06,
"loss": 2.7202,
"step": 1242
},
{
"epoch": 2.81,
"learning_rate": 1.154503894158987e-06,
"loss": 3.3373,
"step": 1244
},
{
"epoch": 2.82,
"learning_rate": 1.1520898515717274e-06,
"loss": 2.9694,
"step": 1246
},
{
"epoch": 2.82,
"learning_rate": 1.1496749013356877e-06,
"loss": 3.053,
"step": 1248
},
{
"epoch": 2.83,
"learning_rate": 1.1472590578629185e-06,
"loss": 3.1397,
"step": 1250
},
{
"epoch": 2.83,
"learning_rate": 1.1448423355708013e-06,
"loss": 3.0751,
"step": 1252
},
{
"epoch": 2.84,
"learning_rate": 1.1424247488819617e-06,
"loss": 2.4685,
"step": 1254
},
{
"epoch": 2.84,
"learning_rate": 1.1400063122241843e-06,
"loss": 2.569,
"step": 1256
},
{
"epoch": 2.84,
"learning_rate": 1.1375870400303262e-06,
"loss": 2.7506,
"step": 1258
},
{
"epoch": 2.85,
"learning_rate": 1.1351669467382308e-06,
"loss": 2.9374,
"step": 1260
},
{
"epoch": 2.85,
"learning_rate": 1.1327460467906417e-06,
"loss": 3.1167,
"step": 1262
},
{
"epoch": 2.86,
"learning_rate": 1.1303243546351161e-06,
"loss": 2.9643,
"step": 1264
},
{
"epoch": 2.86,
"learning_rate": 1.12790188472394e-06,
"loss": 2.6368,
"step": 1266
},
{
"epoch": 2.87,
"learning_rate": 1.12547865151404e-06,
"loss": 2.8845,
"step": 1268
},
{
"epoch": 2.87,
"learning_rate": 1.1230546694668978e-06,
"loss": 2.8049,
"step": 1270
},
{
"epoch": 2.88,
"learning_rate": 1.1206299530484647e-06,
"loss": 2.8742,
"step": 1272
},
{
"epoch": 2.88,
"learning_rate": 1.1182045167290743e-06,
"loss": 3.016,
"step": 1274
},
{
"epoch": 2.89,
"learning_rate": 1.1157783749833568e-06,
"loss": 2.8742,
"step": 1276
},
{
"epoch": 2.89,
"learning_rate": 1.113351542290152e-06,
"loss": 2.8077,
"step": 1278
},
{
"epoch": 2.89,
"learning_rate": 1.1109240331324229e-06,
"loss": 2.9582,
"step": 1280
},
{
"epoch": 2.9,
"learning_rate": 1.1084958619971697e-06,
"loss": 2.6966,
"step": 1282
},
{
"epoch": 2.9,
"learning_rate": 1.1060670433753435e-06,
"loss": 2.586,
"step": 1284
},
{
"epoch": 2.91,
"learning_rate": 1.1036375917617594e-06,
"loss": 2.7536,
"step": 1286
},
{
"epoch": 2.91,
"learning_rate": 1.10120752165501e-06,
"loss": 3.1543,
"step": 1288
},
{
"epoch": 2.92,
"learning_rate": 1.0987768475573781e-06,
"loss": 2.8458,
"step": 1290
},
{
"epoch": 2.92,
"learning_rate": 1.0963455839747533e-06,
"loss": 2.7247,
"step": 1292
},
{
"epoch": 2.93,
"learning_rate": 1.0939137454165405e-06,
"loss": 2.7273,
"step": 1294
},
{
"epoch": 2.93,
"learning_rate": 1.0914813463955779e-06,
"loss": 3.1296,
"step": 1296
},
{
"epoch": 2.93,
"learning_rate": 1.0890484014280475e-06,
"loss": 2.4097,
"step": 1298
},
{
"epoch": 2.94,
"learning_rate": 1.0866149250333895e-06,
"loss": 2.7304,
"step": 1300
},
{
"epoch": 2.94,
"learning_rate": 1.0841809317342163e-06,
"loss": 2.8627,
"step": 1302
},
{
"epoch": 2.95,
"learning_rate": 1.0817464360562242e-06,
"loss": 2.6054,
"step": 1304
},
{
"epoch": 2.95,
"learning_rate": 1.079311452528108e-06,
"loss": 2.7118,
"step": 1306
},
{
"epoch": 2.96,
"learning_rate": 1.0768759956814737e-06,
"loss": 2.9143,
"step": 1308
},
{
"epoch": 2.96,
"learning_rate": 1.0744400800507525e-06,
"loss": 3.0118,
"step": 1310
},
{
"epoch": 2.97,
"learning_rate": 1.0720037201731132e-06,
"loss": 2.5814,
"step": 1312
},
{
"epoch": 2.97,
"learning_rate": 1.0695669305883753e-06,
"loss": 3.0125,
"step": 1314
},
{
"epoch": 2.98,
"learning_rate": 1.0671297258389242e-06,
"loss": 2.6754,
"step": 1316
},
{
"epoch": 2.98,
"learning_rate": 1.0646921204696215e-06,
"loss": 3.3871,
"step": 1318
},
{
"epoch": 2.98,
"learning_rate": 1.0622541290277201e-06,
"loss": 3.0433,
"step": 1320
},
{
"epoch": 2.99,
"learning_rate": 1.0598157660627764e-06,
"loss": 2.8308,
"step": 1322
},
{
"epoch": 2.99,
"learning_rate": 1.0573770461265658e-06,
"loss": 2.6814,
"step": 1324
},
{
"epoch": 3.0,
"learning_rate": 1.0549379837729922e-06,
"loss": 3.0351,
"step": 1326
},
{
"epoch": 3.0,
"learning_rate": 1.0524985935580032e-06,
"loss": 3.2508,
"step": 1328
},
{
"epoch": 3.01,
"learning_rate": 1.050058890039504e-06,
"loss": 2.7,
"step": 1330
},
{
"epoch": 3.01,
"learning_rate": 1.0476188877772687e-06,
"loss": 2.839,
"step": 1332
},
{
"epoch": 3.02,
"learning_rate": 1.0451786013328542e-06,
"loss": 2.7308,
"step": 1334
},
{
"epoch": 3.02,
"learning_rate": 1.0427380452695145e-06,
"loss": 2.9305,
"step": 1336
},
{
"epoch": 3.03,
"learning_rate": 1.0402972341521112e-06,
"loss": 3.0336,
"step": 1338
},
{
"epoch": 3.03,
"learning_rate": 1.0378561825470286e-06,
"loss": 2.7144,
"step": 1340
},
{
"epoch": 3.03,
"learning_rate": 1.0354149050220866e-06,
"loss": 2.723,
"step": 1342
},
{
"epoch": 3.04,
"learning_rate": 1.0329734161464529e-06,
"loss": 2.8139,
"step": 1344
},
{
"epoch": 3.04,
"learning_rate": 1.0305317304905566e-06,
"loss": 3.3544,
"step": 1346
},
{
"epoch": 3.05,
"learning_rate": 1.0280898626260006e-06,
"loss": 3.0795,
"step": 1348
},
{
"epoch": 3.05,
"learning_rate": 1.0256478271254766e-06,
"loss": 3.2152,
"step": 1350
},
{
"epoch": 3.06,
"learning_rate": 1.0232056385626756e-06,
"loss": 2.8333,
"step": 1352
},
{
"epoch": 3.06,
"learning_rate": 1.0207633115122026e-06,
"loss": 3.3351,
"step": 1354
},
{
"epoch": 3.07,
"learning_rate": 1.0183208605494878e-06,
"loss": 3.2257,
"step": 1356
},
{
"epoch": 3.07,
"learning_rate": 1.0158783002507028e-06,
"loss": 2.6375,
"step": 1358
},
{
"epoch": 3.08,
"learning_rate": 1.0134356451926703e-06,
"loss": 2.9266,
"step": 1360
},
{
"epoch": 3.08,
"learning_rate": 1.0109929099527794e-06,
"loss": 2.9206,
"step": 1362
},
{
"epoch": 3.08,
"learning_rate": 1.008550109108897e-06,
"loss": 2.9864,
"step": 1364
},
{
"epoch": 3.09,
"learning_rate": 1.0061072572392823e-06,
"loss": 3.0176,
"step": 1366
},
{
"epoch": 3.09,
"learning_rate": 1.0036643689224982e-06,
"loss": 2.9777,
"step": 1368
},
{
"epoch": 3.1,
"learning_rate": 1.0012214587373254e-06,
"loss": 3.1409,
"step": 1370
},
{
"epoch": 3.1,
"learning_rate": 9.987785412626747e-07,
"loss": 3.1139,
"step": 1372
},
{
"epoch": 3.11,
"learning_rate": 9.96335631077502e-07,
"loss": 2.5028,
"step": 1374
},
{
"epoch": 3.11,
"learning_rate": 9.938927427607176e-07,
"loss": 3.0438,
"step": 1376
},
{
"epoch": 3.12,
"learning_rate": 9.914498908911027e-07,
"loss": 2.6127,
"step": 1378
},
{
"epoch": 3.12,
"learning_rate": 9.890070900472205e-07,
"loss": 2.7627,
"step": 1380
},
{
"epoch": 3.12,
"learning_rate": 9.865643548073294e-07,
"loss": 3.1435,
"step": 1382
},
{
"epoch": 3.13,
"learning_rate": 9.841216997492975e-07,
"loss": 2.7834,
"step": 1384
},
{
"epoch": 3.13,
"learning_rate": 9.816791394505124e-07,
"loss": 2.8447,
"step": 1386
},
{
"epoch": 3.14,
"learning_rate": 9.792366884877978e-07,
"loss": 3.0814,
"step": 1388
},
{
"epoch": 3.14,
"learning_rate": 9.767943614373243e-07,
"loss": 2.3932,
"step": 1390
},
{
"epoch": 3.15,
"learning_rate": 9.74352172874523e-07,
"loss": 2.9254,
"step": 1392
},
{
"epoch": 3.15,
"learning_rate": 9.719101373739993e-07,
"loss": 3.0355,
"step": 1394
},
{
"epoch": 3.16,
"learning_rate": 9.694682695094435e-07,
"loss": 2.8689,
"step": 1396
},
{
"epoch": 3.16,
"learning_rate": 9.670265838535473e-07,
"loss": 2.8432,
"step": 1398
},
{
"epoch": 3.17,
"learning_rate": 9.645850949779133e-07,
"loss": 2.9826,
"step": 1400
},
{
"epoch": 3.17,
"learning_rate": 9.621438174529715e-07,
"loss": 2.6587,
"step": 1402
},
{
"epoch": 3.17,
"learning_rate": 9.597027658478887e-07,
"loss": 2.8838,
"step": 1404
},
{
"epoch": 3.18,
"learning_rate": 9.572619547304854e-07,
"loss": 3.2349,
"step": 1406
},
{
"epoch": 3.18,
"learning_rate": 9.548213986671457e-07,
"loss": 2.6617,
"step": 1408
},
{
"epoch": 3.19,
"learning_rate": 9.523811122227314e-07,
"loss": 2.76,
"step": 1410
},
{
"epoch": 3.19,
"learning_rate": 9.499411099604961e-07,
"loss": 3.2104,
"step": 1412
},
{
"epoch": 3.2,
"learning_rate": 9.475014064419968e-07,
"loss": 2.5588,
"step": 1414
},
{
"epoch": 3.2,
"learning_rate": 9.450620162270076e-07,
"loss": 2.6541,
"step": 1416
},
{
"epoch": 3.21,
"learning_rate": 9.426229538734341e-07,
"loss": 2.9613,
"step": 1418
},
{
"epoch": 3.21,
"learning_rate": 9.401842339372232e-07,
"loss": 2.8701,
"step": 1420
},
{
"epoch": 3.22,
"learning_rate": 9.377458709722802e-07,
"loss": 2.8146,
"step": 1422
},
{
"epoch": 3.22,
"learning_rate": 9.353078795303785e-07,
"loss": 2.8971,
"step": 1424
},
{
"epoch": 3.22,
"learning_rate": 9.32870274161076e-07,
"loss": 2.826,
"step": 1426
},
{
"epoch": 3.23,
"learning_rate": 9.304330694116246e-07,
"loss": 2.4784,
"step": 1428
},
{
"epoch": 3.23,
"learning_rate": 9.279962798268869e-07,
"loss": 2.9121,
"step": 1430
},
{
"epoch": 3.24,
"learning_rate": 9.255599199492477e-07,
"loss": 2.7017,
"step": 1432
},
{
"epoch": 3.24,
"learning_rate": 9.231240043185263e-07,
"loss": 2.5173,
"step": 1434
},
{
"epoch": 3.25,
"learning_rate": 9.206885474718924e-07,
"loss": 2.5665,
"step": 1436
},
{
"epoch": 3.25,
"learning_rate": 9.182535639437757e-07,
"loss": 2.4979,
"step": 1438
},
{
"epoch": 3.26,
"learning_rate": 9.158190682657834e-07,
"loss": 3.0036,
"step": 1440
},
{
"epoch": 3.26,
"learning_rate": 9.133850749666104e-07,
"loss": 2.8472,
"step": 1442
},
{
"epoch": 3.27,
"learning_rate": 9.109515985719524e-07,
"loss": 3.1484,
"step": 1444
},
{
"epoch": 3.27,
"learning_rate": 9.085186536044221e-07,
"loss": 2.7544,
"step": 1446
},
{
"epoch": 3.27,
"learning_rate": 9.060862545834594e-07,
"loss": 2.4876,
"step": 1448
},
{
"epoch": 3.28,
"learning_rate": 9.03654416025247e-07,
"loss": 2.5342,
"step": 1450
},
{
"epoch": 3.28,
"learning_rate": 9.012231524426218e-07,
"loss": 3.0598,
"step": 1452
},
{
"epoch": 3.29,
"learning_rate": 8.987924783449902e-07,
"loss": 2.8366,
"step": 1454
},
{
"epoch": 3.29,
"learning_rate": 8.963624082382406e-07,
"loss": 2.8295,
"step": 1456
},
{
"epoch": 3.3,
"learning_rate": 8.939329566246562e-07,
"loss": 2.5202,
"step": 1458
},
{
"epoch": 3.3,
"learning_rate": 8.915041380028305e-07,
"loss": 2.9637,
"step": 1460
},
{
"epoch": 3.31,
"learning_rate": 8.890759668675773e-07,
"loss": 2.8707,
"step": 1462
},
{
"epoch": 3.31,
"learning_rate": 8.866484577098482e-07,
"loss": 2.758,
"step": 1464
},
{
"epoch": 3.31,
"learning_rate": 8.842216250166431e-07,
"loss": 3.0174,
"step": 1466
},
{
"epoch": 3.32,
"learning_rate": 8.817954832709255e-07,
"loss": 2.8084,
"step": 1468
},
{
"epoch": 3.32,
"learning_rate": 8.793700469515353e-07,
"loss": 2.8896,
"step": 1470
},
{
"epoch": 3.33,
"learning_rate": 8.769453305331021e-07,
"loss": 2.7274,
"step": 1472
},
{
"epoch": 3.33,
"learning_rate": 8.745213484859604e-07,
"loss": 2.8304,
"step": 1474
},
{
"epoch": 3.34,
"learning_rate": 8.720981152760599e-07,
"loss": 2.9487,
"step": 1476
},
{
"epoch": 3.34,
"learning_rate": 8.696756453648836e-07,
"loss": 2.6765,
"step": 1478
},
{
"epoch": 3.35,
"learning_rate": 8.672539532093584e-07,
"loss": 2.9754,
"step": 1480
},
{
"epoch": 3.35,
"learning_rate": 8.648330532617692e-07,
"loss": 3.1057,
"step": 1482
},
{
"epoch": 3.36,
"learning_rate": 8.62412959969674e-07,
"loss": 3.1504,
"step": 1484
},
{
"epoch": 3.36,
"learning_rate": 8.599936877758158e-07,
"loss": 3.1809,
"step": 1486
},
{
"epoch": 3.36,
"learning_rate": 8.575752511180386e-07,
"loss": 2.8562,
"step": 1488
},
{
"epoch": 3.37,
"learning_rate": 8.551576644291988e-07,
"loss": 2.9199,
"step": 1490
},
{
"epoch": 3.37,
"learning_rate": 8.527409421370812e-07,
"loss": 3.0049,
"step": 1492
},
{
"epoch": 3.38,
"learning_rate": 8.503250986643125e-07,
"loss": 2.7818,
"step": 1494
},
{
"epoch": 3.38,
"learning_rate": 8.479101484282726e-07,
"loss": 3.0402,
"step": 1496
},
{
"epoch": 3.39,
"learning_rate": 8.45496105841013e-07,
"loss": 2.9152,
"step": 1498
},
{
"epoch": 3.39,
"learning_rate": 8.430829853091674e-07,
"loss": 3.2198,
"step": 1500
},
{
"epoch": 3.4,
"learning_rate": 8.40670801233867e-07,
"loss": 2.9759,
"step": 1502
},
{
"epoch": 3.4,
"learning_rate": 8.382595680106543e-07,
"loss": 2.7832,
"step": 1504
},
{
"epoch": 3.41,
"learning_rate": 8.358493000293971e-07,
"loss": 2.8934,
"step": 1506
},
{
"epoch": 3.41,
"learning_rate": 8.334400116742039e-07,
"loss": 3.1431,
"step": 1508
},
{
"epoch": 3.41,
"learning_rate": 8.310317173233352e-07,
"loss": 2.6003,
"step": 1510
},
{
"epoch": 3.42,
"learning_rate": 8.286244313491206e-07,
"loss": 2.5809,
"step": 1512
},
{
"epoch": 3.42,
"learning_rate": 8.262181681178714e-07,
"loss": 2.5528,
"step": 1514
},
{
"epoch": 3.43,
"learning_rate": 8.238129419897956e-07,
"loss": 3.105,
"step": 1516
},
{
"epoch": 3.43,
"learning_rate": 8.214087673189123e-07,
"loss": 3.015,
"step": 1518
},
{
"epoch": 3.44,
"learning_rate": 8.190056584529642e-07,
"loss": 2.6137,
"step": 1520
},
{
"epoch": 3.44,
"learning_rate": 8.166036297333352e-07,
"loss": 2.8402,
"step": 1522
},
{
"epoch": 3.45,
"learning_rate": 8.142026954949618e-07,
"loss": 2.863,
"step": 1524
},
{
"epoch": 3.45,
"learning_rate": 8.118028700662499e-07,
"loss": 2.7368,
"step": 1526
},
{
"epoch": 3.46,
"learning_rate": 8.094041677689869e-07,
"loss": 2.5069,
"step": 1528
},
{
"epoch": 3.46,
"learning_rate": 8.070066029182582e-07,
"loss": 2.9194,
"step": 1530
},
{
"epoch": 3.46,
"learning_rate": 8.046101898223618e-07,
"loss": 2.6058,
"step": 1532
},
{
"epoch": 3.47,
"learning_rate": 8.022149427827209e-07,
"loss": 2.7355,
"step": 1534
},
{
"epoch": 3.47,
"learning_rate": 7.998208760938008e-07,
"loss": 2.8185,
"step": 1536
},
{
"epoch": 3.48,
"learning_rate": 7.974280040430219e-07,
"loss": 2.8695,
"step": 1538
},
{
"epoch": 3.48,
"learning_rate": 7.950363409106758e-07,
"loss": 2.9614,
"step": 1540
},
{
"epoch": 3.49,
"learning_rate": 7.926459009698397e-07,
"loss": 3.1771,
"step": 1542
},
{
"epoch": 3.49,
"learning_rate": 7.902566984862897e-07,
"loss": 2.9998,
"step": 1544
},
{
"epoch": 3.5,
"learning_rate": 7.878687477184184e-07,
"loss": 2.5025,
"step": 1546
},
{
"epoch": 3.5,
"learning_rate": 7.854820629171475e-07,
"loss": 2.7393,
"step": 1548
},
{
"epoch": 3.5,
"learning_rate": 7.830966583258441e-07,
"loss": 2.8933,
"step": 1550
},
{
"epoch": 3.51,
"learning_rate": 7.807125481802342e-07,
"loss": 2.7926,
"step": 1552
},
{
"epoch": 3.51,
"learning_rate": 7.783297467083201e-07,
"loss": 2.8088,
"step": 1554
},
{
"epoch": 3.52,
"learning_rate": 7.759482681302937e-07,
"loss": 2.8894,
"step": 1556
},
{
"epoch": 3.52,
"learning_rate": 7.735681266584513e-07,
"loss": 2.9384,
"step": 1558
},
{
"epoch": 3.53,
"learning_rate": 7.711893364971109e-07,
"loss": 2.7174,
"step": 1560
},
{
"epoch": 3.53,
"learning_rate": 7.688119118425246e-07,
"loss": 3.0065,
"step": 1562
},
{
"epoch": 3.54,
"learning_rate": 7.664358668827966e-07,
"loss": 2.7175,
"step": 1564
},
{
"epoch": 3.54,
"learning_rate": 7.640612157977963e-07,
"loss": 2.9579,
"step": 1566
},
{
"epoch": 3.55,
"learning_rate": 7.616879727590757e-07,
"loss": 2.8526,
"step": 1568
},
{
"epoch": 3.55,
"learning_rate": 7.593161519297833e-07,
"loss": 2.9314,
"step": 1570
},
{
"epoch": 3.55,
"learning_rate": 7.569457674645795e-07,
"loss": 3.2361,
"step": 1572
},
{
"epoch": 3.56,
"learning_rate": 7.54576833509554e-07,
"loss": 2.8873,
"step": 1574
},
{
"epoch": 3.56,
"learning_rate": 7.522093642021385e-07,
"loss": 2.7272,
"step": 1576
},
{
"epoch": 3.57,
"learning_rate": 7.498433736710252e-07,
"loss": 2.9086,
"step": 1578
},
{
"epoch": 3.57,
"learning_rate": 7.474788760360808e-07,
"loss": 2.7368,
"step": 1580
},
{
"epoch": 3.58,
"learning_rate": 7.451158854082625e-07,
"loss": 2.5693,
"step": 1582
},
{
"epoch": 3.58,
"learning_rate": 7.427544158895339e-07,
"loss": 3.0066,
"step": 1584
},
{
"epoch": 3.59,
"learning_rate": 7.403944815727808e-07,
"loss": 2.8588,
"step": 1586
},
{
"epoch": 3.59,
"learning_rate": 7.380360965417274e-07,
"loss": 2.5618,
"step": 1588
},
{
"epoch": 3.6,
"learning_rate": 7.356792748708516e-07,
"loss": 3.1683,
"step": 1590
},
{
"epoch": 3.6,
"learning_rate": 7.333240306253019e-07,
"loss": 2.4461,
"step": 1592
},
{
"epoch": 3.6,
"learning_rate": 7.309703778608134e-07,
"loss": 2.6854,
"step": 1594
},
{
"epoch": 3.61,
"learning_rate": 7.286183306236214e-07,
"loss": 2.6439,
"step": 1596
},
{
"epoch": 3.61,
"learning_rate": 7.262679029503827e-07,
"loss": 2.9089,
"step": 1598
},
{
"epoch": 3.62,
"learning_rate": 7.239191088680859e-07,
"loss": 2.9493,
"step": 1600
},
{
"epoch": 3.62,
"learning_rate": 7.215719623939724e-07,
"loss": 2.6733,
"step": 1602
},
{
"epoch": 3.63,
"learning_rate": 7.192264775354507e-07,
"loss": 3.1419,
"step": 1604
},
{
"epoch": 3.63,
"learning_rate": 7.16882668290012e-07,
"loss": 2.8254,
"step": 1606
},
{
"epoch": 3.64,
"learning_rate": 7.145405486451492e-07,
"loss": 2.821,
"step": 1608
},
{
"epoch": 3.64,
"learning_rate": 7.122001325782707e-07,
"loss": 2.646,
"step": 1610
},
{
"epoch": 3.64,
"learning_rate": 7.098614340566187e-07,
"loss": 2.6237,
"step": 1612
},
{
"epoch": 3.65,
"learning_rate": 7.075244670371853e-07,
"loss": 2.8235,
"step": 1614
},
{
"epoch": 3.65,
"learning_rate": 7.051892454666293e-07,
"loss": 2.4762,
"step": 1616
},
{
"epoch": 3.66,
"learning_rate": 7.028557832811934e-07,
"loss": 2.7275,
"step": 1618
},
{
"epoch": 3.66,
"learning_rate": 7.005240944066193e-07,
"loss": 2.9431,
"step": 1620
},
{
"epoch": 3.67,
"learning_rate": 6.981941927580672e-07,
"loss": 2.8502,
"step": 1622
},
{
"epoch": 3.67,
"learning_rate": 6.958660922400304e-07,
"loss": 2.8384,
"step": 1624
},
{
"epoch": 3.68,
"learning_rate": 6.935398067462544e-07,
"loss": 2.8361,
"step": 1626
},
{
"epoch": 3.68,
"learning_rate": 6.912153501596516e-07,
"loss": 2.5719,
"step": 1628
},
{
"epoch": 3.69,
"learning_rate": 6.888927363522203e-07,
"loss": 2.5443,
"step": 1630
},
{
"epoch": 3.69,
"learning_rate": 6.865719791849627e-07,
"loss": 2.7307,
"step": 1632
},
{
"epoch": 3.69,
"learning_rate": 6.842530925077985e-07,
"loss": 2.9377,
"step": 1634
},
{
"epoch": 3.7,
"learning_rate": 6.819360901594866e-07,
"loss": 3.1469,
"step": 1636
},
{
"epoch": 3.7,
"learning_rate": 6.796209859675391e-07,
"loss": 3.0058,
"step": 1638
},
{
"epoch": 3.71,
"learning_rate": 6.773077937481409e-07,
"loss": 2.7583,
"step": 1640
},
{
"epoch": 3.71,
"learning_rate": 6.749965273060669e-07,
"loss": 3.0511,
"step": 1642
},
{
"epoch": 3.72,
"learning_rate": 6.726872004345983e-07,
"loss": 2.712,
"step": 1644
},
{
"epoch": 3.72,
"learning_rate": 6.70379826915442e-07,
"loss": 2.7067,
"step": 1646
},
{
"epoch": 3.73,
"learning_rate": 6.680744205186471e-07,
"loss": 2.664,
"step": 1648
},
{
"epoch": 3.73,
"learning_rate": 6.657709950025241e-07,
"loss": 3.0128,
"step": 1650
},
{
"epoch": 3.74,
"learning_rate": 6.634695641135603e-07,
"loss": 2.8639,
"step": 1652
},
{
"epoch": 3.74,
"learning_rate": 6.611701415863409e-07,
"loss": 2.5107,
"step": 1654
},
{
"epoch": 3.74,
"learning_rate": 6.588727411434649e-07,
"loss": 2.9211,
"step": 1656
},
{
"epoch": 3.75,
"learning_rate": 6.565773764954639e-07,
"loss": 2.6104,
"step": 1658
},
{
"epoch": 3.75,
"learning_rate": 6.542840613407202e-07,
"loss": 2.9756,
"step": 1660
},
{
"epoch": 3.76,
"learning_rate": 6.519928093653843e-07,
"loss": 2.8468,
"step": 1662
},
{
"epoch": 3.76,
"learning_rate": 6.49703634243296e-07,
"loss": 2.8689,
"step": 1664
},
{
"epoch": 3.77,
"learning_rate": 6.474165496358981e-07,
"loss": 2.8522,
"step": 1666
},
{
"epoch": 3.77,
"learning_rate": 6.451315691921593e-07,
"loss": 2.5796,
"step": 1668
},
{
"epoch": 3.78,
"learning_rate": 6.428487065484908e-07,
"loss": 2.9733,
"step": 1670
},
{
"epoch": 3.78,
"learning_rate": 6.405679753286644e-07,
"loss": 3.1356,
"step": 1672
},
{
"epoch": 3.79,
"learning_rate": 6.382893891437328e-07,
"loss": 2.8461,
"step": 1674
},
{
"epoch": 3.79,
"learning_rate": 6.360129615919465e-07,
"loss": 2.919,
"step": 1676
},
{
"epoch": 3.79,
"learning_rate": 6.33738706258674e-07,
"loss": 2.6785,
"step": 1678
},
{
"epoch": 3.8,
"learning_rate": 6.314666367163209e-07,
"loss": 3.3216,
"step": 1680
},
{
"epoch": 3.8,
"learning_rate": 6.291967665242475e-07,
"loss": 2.9245,
"step": 1682
},
{
"epoch": 3.81,
"learning_rate": 6.269291092286895e-07,
"loss": 2.5513,
"step": 1684
},
{
"epoch": 3.81,
"learning_rate": 6.24663678362675e-07,
"loss": 2.8984,
"step": 1686
},
{
"epoch": 3.82,
"learning_rate": 6.224004874459471e-07,
"loss": 2.9254,
"step": 1688
},
{
"epoch": 3.82,
"learning_rate": 6.201395499848794e-07,
"loss": 2.6489,
"step": 1690
},
{
"epoch": 3.83,
"learning_rate": 6.178808794723985e-07,
"loss": 2.8017,
"step": 1692
},
{
"epoch": 3.83,
"learning_rate": 6.156244893879016e-07,
"loss": 2.8137,
"step": 1694
},
{
"epoch": 3.83,
"learning_rate": 6.133703931971764e-07,
"loss": 2.6998,
"step": 1696
},
{
"epoch": 3.84,
"learning_rate": 6.111186043523222e-07,
"loss": 2.6918,
"step": 1698
},
{
"epoch": 3.84,
"learning_rate": 6.088691362916667e-07,
"loss": 3.1488,
"step": 1700
},
{
"epoch": 3.85,
"learning_rate": 6.066220024396886e-07,
"loss": 2.7749,
"step": 1702
},
{
"epoch": 3.85,
"learning_rate": 6.043772162069365e-07,
"loss": 2.7309,
"step": 1704
},
{
"epoch": 3.86,
"learning_rate": 6.021347909899482e-07,
"loss": 2.8535,
"step": 1706
},
{
"epoch": 3.86,
"learning_rate": 5.998947401711719e-07,
"loss": 2.7431,
"step": 1708
},
{
"epoch": 3.87,
"learning_rate": 5.976570771188849e-07,
"loss": 2.9236,
"step": 1710
},
{
"epoch": 3.87,
"learning_rate": 5.954218151871156e-07,
"loss": 2.7266,
"step": 1712
},
{
"epoch": 3.88,
"learning_rate": 5.931889677155624e-07,
"loss": 2.6797,
"step": 1714
},
{
"epoch": 3.88,
"learning_rate": 5.909585480295146e-07,
"loss": 2.841,
"step": 1716
},
{
"epoch": 3.88,
"learning_rate": 5.887305694397735e-07,
"loss": 2.8735,
"step": 1718
},
{
"epoch": 3.89,
"learning_rate": 5.865050452425709e-07,
"loss": 2.7291,
"step": 1720
},
{
"epoch": 3.89,
"learning_rate": 5.842819887194925e-07,
"loss": 3.0589,
"step": 1722
},
{
"epoch": 3.9,
"learning_rate": 5.820614131373967e-07,
"loss": 2.8343,
"step": 1724
},
{
"epoch": 3.9,
"learning_rate": 5.798433317483361e-07,
"loss": 2.6256,
"step": 1726
},
{
"epoch": 3.91,
"learning_rate": 5.776277577894785e-07,
"loss": 2.83,
"step": 1728
},
{
"epoch": 3.91,
"learning_rate": 5.754147044830271e-07,
"loss": 2.8623,
"step": 1730
},
{
"epoch": 3.92,
"learning_rate": 5.732041850361435e-07,
"loss": 2.9518,
"step": 1732
},
{
"epoch": 3.92,
"learning_rate": 5.709962126408668e-07,
"loss": 2.6937,
"step": 1734
},
{
"epoch": 3.93,
"learning_rate": 5.687908004740353e-07,
"loss": 2.6544,
"step": 1736
},
{
"epoch": 3.93,
"learning_rate": 5.665879616972089e-07,
"loss": 2.5396,
"step": 1738
},
{
"epoch": 3.93,
"learning_rate": 5.643877094565894e-07,
"loss": 2.9101,
"step": 1740
},
{
"epoch": 3.94,
"learning_rate": 5.621900568829437e-07,
"loss": 3.1239,
"step": 1742
},
{
"epoch": 3.94,
"learning_rate": 5.599950170915228e-07,
"loss": 2.9285,
"step": 1744
},
{
"epoch": 3.95,
"learning_rate": 5.578026031819858e-07,
"loss": 2.872,
"step": 1746
},
{
"epoch": 3.95,
"learning_rate": 5.556128282383211e-07,
"loss": 2.8598,
"step": 1748
},
{
"epoch": 3.96,
"learning_rate": 5.534257053287673e-07,
"loss": 2.815,
"step": 1750
},
{
"epoch": 3.96,
"learning_rate": 5.512412475057367e-07,
"loss": 2.9105,
"step": 1752
},
{
"epoch": 3.97,
"learning_rate": 5.490594678057374e-07,
"loss": 3.0166,
"step": 1754
},
{
"epoch": 3.97,
"learning_rate": 5.468803792492937e-07,
"loss": 2.9043,
"step": 1756
},
{
"epoch": 3.98,
"learning_rate": 5.447039948408699e-07,
"loss": 3.0569,
"step": 1758
},
{
"epoch": 3.98,
"learning_rate": 5.425303275687933e-07,
"loss": 2.7908,
"step": 1760
},
{
"epoch": 3.98,
"learning_rate": 5.403593904051739e-07,
"loss": 2.7985,
"step": 1762
},
{
"epoch": 3.99,
"learning_rate": 5.381911963058308e-07,
"loss": 2.8058,
"step": 1764
},
{
"epoch": 3.99,
"learning_rate": 5.360257582102114e-07,
"loss": 2.4029,
"step": 1766
},
{
"epoch": 4.0,
"learning_rate": 5.338630890413165e-07,
"loss": 2.5988,
"step": 1768
},
{
"epoch": 4.0,
"learning_rate": 5.317032017056229e-07,
"loss": 3.0387,
"step": 1770
},
{
"epoch": 4.01,
"learning_rate": 5.295461090930038e-07,
"loss": 2.9505,
"step": 1772
},
{
"epoch": 4.01,
"learning_rate": 5.273918240766564e-07,
"loss": 3.269,
"step": 1774
},
{
"epoch": 4.02,
"learning_rate": 5.252403595130208e-07,
"loss": 2.4454,
"step": 1776
},
{
"epoch": 4.02,
"learning_rate": 5.230917282417058e-07,
"loss": 2.8803,
"step": 1778
},
{
"epoch": 4.02,
"learning_rate": 5.209459430854124e-07,
"loss": 2.9931,
"step": 1780
},
{
"epoch": 4.03,
"learning_rate": 5.18803016849854e-07,
"loss": 2.8475,
"step": 1782
},
{
"epoch": 4.03,
"learning_rate": 5.16662962323685e-07,
"loss": 2.9663,
"step": 1784
},
{
"epoch": 4.04,
"learning_rate": 5.145257922784205e-07,
"loss": 2.4356,
"step": 1786
},
{
"epoch": 4.04,
"learning_rate": 5.123915194683617e-07,
"loss": 2.7208,
"step": 1788
},
{
"epoch": 4.05,
"learning_rate": 5.102601566305192e-07,
"loss": 2.8387,
"step": 1790
},
{
"epoch": 4.05,
"learning_rate": 5.081317164845384e-07,
"loss": 2.6345,
"step": 1792
},
{
"epoch": 4.06,
"learning_rate": 5.060062117326219e-07,
"loss": 2.923,
"step": 1794
},
{
"epoch": 4.06,
"learning_rate": 5.038836550594538e-07,
"loss": 3.0051,
"step": 1796
},
{
"epoch": 4.07,
"learning_rate": 5.017640591321252e-07,
"loss": 2.6653,
"step": 1798
},
{
"epoch": 4.07,
"learning_rate": 4.996474366000574e-07,
"loss": 2.8347,
"step": 1800
},
{
"epoch": 4.07,
"learning_rate": 4.975338000949278e-07,
"loss": 2.3805,
"step": 1802
},
{
"epoch": 4.08,
"learning_rate": 4.95423162230593e-07,
"loss": 2.8202,
"step": 1804
},
{
"epoch": 4.08,
"learning_rate": 4.933155356030142e-07,
"loss": 2.874,
"step": 1806
},
{
"epoch": 4.09,
"learning_rate": 4.912109327901821e-07,
"loss": 2.5629,
"step": 1808
},
{
"epoch": 4.09,
"learning_rate": 4.891093663520413e-07,
"loss": 3.2899,
"step": 1810
},
{
"epoch": 4.1,
"learning_rate": 4.870108488304174e-07,
"loss": 3.0521,
"step": 1812
},
{
"epoch": 4.1,
"learning_rate": 4.849153927489377e-07,
"loss": 2.5282,
"step": 1814
},
{
"epoch": 4.11,
"learning_rate": 4.828230106129623e-07,
"loss": 2.7628,
"step": 1816
},
{
"epoch": 4.11,
"learning_rate": 4.807337149095045e-07,
"loss": 3.2389,
"step": 1818
},
{
"epoch": 4.12,
"learning_rate": 4.786475181071587e-07,
"loss": 3.0311,
"step": 1820
},
{
"epoch": 4.12,
"learning_rate": 4.7656443265602687e-07,
"loss": 2.9986,
"step": 1822
},
{
"epoch": 4.12,
"learning_rate": 4.744844709876401e-07,
"loss": 2.8551,
"step": 1824
},
{
"epoch": 4.13,
"learning_rate": 4.7240764551488985e-07,
"loss": 2.4769,
"step": 1826
},
{
"epoch": 4.13,
"learning_rate": 4.703339686319503e-07,
"loss": 2.8349,
"step": 1828
},
{
"epoch": 4.14,
"learning_rate": 4.6826345271420454e-07,
"loss": 2.6749,
"step": 1830
},
{
"epoch": 4.14,
"learning_rate": 4.661961101181733e-07,
"loss": 2.7625,
"step": 1832
},
{
"epoch": 4.15,
"learning_rate": 4.6413195318143774e-07,
"loss": 3.217,
"step": 1834
},
{
"epoch": 4.15,
"learning_rate": 4.6207099422256835e-07,
"loss": 2.6652,
"step": 1836
},
{
"epoch": 4.16,
"learning_rate": 4.6001324554105026e-07,
"loss": 2.7919,
"step": 1838
},
{
"epoch": 4.16,
"learning_rate": 4.5795871941720996e-07,
"loss": 3.0125,
"step": 1840
},
{
"epoch": 4.17,
"learning_rate": 4.5590742811214335e-07,
"loss": 2.8058,
"step": 1842
},
{
"epoch": 4.17,
"learning_rate": 4.538593838676401e-07,
"loss": 2.6426,
"step": 1844
},
{
"epoch": 4.17,
"learning_rate": 4.518145989061124e-07,
"loss": 2.7242,
"step": 1846
},
{
"epoch": 4.18,
"learning_rate": 4.497730854305217e-07,
"loss": 2.7069,
"step": 1848
},
{
"epoch": 4.18,
"learning_rate": 4.4773485562430546e-07,
"loss": 2.7588,
"step": 1850
},
{
"epoch": 4.19,
"learning_rate": 4.4569992165130466e-07,
"loss": 2.8127,
"step": 1852
},
{
"epoch": 4.19,
"learning_rate": 4.43668295655692e-07,
"loss": 3.2283,
"step": 1854
},
{
"epoch": 4.2,
"learning_rate": 4.4163998976189776e-07,
"loss": 2.7166,
"step": 1856
},
{
"epoch": 4.2,
"learning_rate": 4.396150160745384e-07,
"loss": 2.9462,
"step": 1858
},
{
"epoch": 4.21,
"learning_rate": 4.375933866783458e-07,
"loss": 2.7383,
"step": 1860
},
{
"epoch": 4.21,
"learning_rate": 4.355751136380909e-07,
"loss": 2.7109,
"step": 1862
},
{
"epoch": 4.21,
"learning_rate": 4.335602089985171e-07,
"loss": 3.0711,
"step": 1864
},
{
"epoch": 4.22,
"learning_rate": 4.31548684784264e-07,
"loss": 2.7965,
"step": 1866
},
{
"epoch": 4.22,
"learning_rate": 4.2954055299979784e-07,
"loss": 2.9035,
"step": 1868
},
{
"epoch": 4.23,
"learning_rate": 4.275358256293403e-07,
"loss": 2.644,
"step": 1870
},
{
"epoch": 4.23,
"learning_rate": 4.255345146367938e-07,
"loss": 3.3082,
"step": 1872
},
{
"epoch": 4.24,
"learning_rate": 4.235366319656749e-07,
"loss": 2.8945,
"step": 1874
},
{
"epoch": 4.24,
"learning_rate": 4.2154218953903875e-07,
"loss": 2.9888,
"step": 1876
},
{
"epoch": 4.25,
"learning_rate": 4.1955119925940996e-07,
"loss": 2.8864,
"step": 1878
},
{
"epoch": 4.25,
"learning_rate": 4.1756367300871255e-07,
"loss": 2.535,
"step": 1880
},
{
"epoch": 4.26,
"learning_rate": 4.155796226481951e-07,
"loss": 2.8278,
"step": 1882
},
{
"epoch": 4.26,
"learning_rate": 4.135990600183654e-07,
"loss": 2.6605,
"step": 1884
},
{
"epoch": 4.26,
"learning_rate": 4.1162199693891555e-07,
"loss": 2.5885,
"step": 1886
},
{
"epoch": 4.27,
"learning_rate": 4.0964844520865284e-07,
"loss": 2.8822,
"step": 1888
},
{
"epoch": 4.27,
"learning_rate": 4.0767841660542943e-07,
"loss": 2.6706,
"step": 1890
},
{
"epoch": 4.28,
"learning_rate": 4.0571192288607305e-07,
"loss": 2.6113,
"step": 1892
},
{
"epoch": 4.28,
"learning_rate": 4.037489757863145e-07,
"loss": 2.7017,
"step": 1894
},
{
"epoch": 4.29,
"learning_rate": 4.017895870207193e-07,
"loss": 2.6754,
"step": 1896
},
{
"epoch": 4.29,
"learning_rate": 3.9983376828261764e-07,
"loss": 2.9501,
"step": 1898
},
{
"epoch": 4.3,
"learning_rate": 3.9788153124403343e-07,
"loss": 2.6901,
"step": 1900
},
{
"epoch": 4.3,
"learning_rate": 3.9593288755561737e-07,
"loss": 2.7851,
"step": 1902
},
{
"epoch": 4.31,
"learning_rate": 3.939878488465739e-07,
"loss": 2.9811,
"step": 1904
},
{
"epoch": 4.31,
"learning_rate": 3.920464267245941e-07,
"loss": 2.5464,
"step": 1906
},
{
"epoch": 4.31,
"learning_rate": 3.901086327757859e-07,
"loss": 2.7844,
"step": 1908
},
{
"epoch": 4.32,
"learning_rate": 3.8817447856460427e-07,
"loss": 2.944,
"step": 1910
},
{
"epoch": 4.32,
"learning_rate": 3.8624397563378453e-07,
"loss": 2.8095,
"step": 1912
},
{
"epoch": 4.33,
"learning_rate": 3.843171355042688e-07,
"loss": 2.9404,
"step": 1914
},
{
"epoch": 4.33,
"learning_rate": 3.823939696751427e-07,
"loss": 2.5454,
"step": 1916
},
{
"epoch": 4.34,
"learning_rate": 3.804744896235641e-07,
"loss": 2.7126,
"step": 1918
},
{
"epoch": 4.34,
"learning_rate": 3.7855870680469225e-07,
"loss": 2.9984,
"step": 1920
},
{
"epoch": 4.35,
"learning_rate": 3.7664663265162487e-07,
"loss": 2.7908,
"step": 1922
},
{
"epoch": 4.35,
"learning_rate": 3.7473827857532535e-07,
"loss": 2.5333,
"step": 1924
},
{
"epoch": 4.36,
"learning_rate": 3.728336559645563e-07,
"loss": 3.2184,
"step": 1926
},
{
"epoch": 4.36,
"learning_rate": 3.709327761858118e-07,
"loss": 2.422,
"step": 1928
},
{
"epoch": 4.36,
"learning_rate": 3.69035650583249e-07,
"loss": 3.0345,
"step": 1930
},
{
"epoch": 4.37,
"learning_rate": 3.6714229047862156e-07,
"loss": 2.8207,
"step": 1932
},
{
"epoch": 4.37,
"learning_rate": 3.652527071712101e-07,
"loss": 2.6455,
"step": 1934
},
{
"epoch": 4.38,
"learning_rate": 3.633669119377568e-07,
"loss": 2.9046,
"step": 1936
},
{
"epoch": 4.38,
"learning_rate": 3.614849160323965e-07,
"loss": 2.9388,
"step": 1938
},
{
"epoch": 4.39,
"learning_rate": 3.596067306865902e-07,
"loss": 2.7176,
"step": 1940
},
{
"epoch": 4.39,
"learning_rate": 3.5773236710905917e-07,
"loss": 3.2415,
"step": 1942
},
{
"epoch": 4.4,
"learning_rate": 3.558618364857158e-07,
"loss": 2.7344,
"step": 1944
},
{
"epoch": 4.4,
"learning_rate": 3.5399514997959847e-07,
"loss": 3.1536,
"step": 1946
},
{
"epoch": 4.4,
"learning_rate": 3.521323187308043e-07,
"loss": 2.9705,
"step": 1948
},
{
"epoch": 4.41,
"learning_rate": 3.5027335385642285e-07,
"loss": 2.8474,
"step": 1950
},
{
"epoch": 4.41,
"learning_rate": 3.484182664504697e-07,
"loss": 2.572,
"step": 1952
},
{
"epoch": 4.42,
"learning_rate": 3.465670675838209e-07,
"loss": 3.1516,
"step": 1954
},
{
"epoch": 4.42,
"learning_rate": 3.4471976830414564e-07,
"loss": 3.0114,
"step": 1956
},
{
"epoch": 4.43,
"learning_rate": 3.428763796358406e-07,
"loss": 2.7337,
"step": 1958
},
{
"epoch": 4.43,
"learning_rate": 3.410369125799661e-07,
"loss": 2.9596,
"step": 1960
},
{
"epoch": 4.44,
"learning_rate": 3.392013781141767e-07,
"loss": 2.9242,
"step": 1962
},
{
"epoch": 4.44,
"learning_rate": 3.3736978719265995e-07,
"loss": 2.5161,
"step": 1964
},
{
"epoch": 4.45,
"learning_rate": 3.3554215074606775e-07,
"loss": 2.5524,
"step": 1966
},
{
"epoch": 4.45,
"learning_rate": 3.3371847968145197e-07,
"loss": 2.929,
"step": 1968
},
{
"epoch": 4.45,
"learning_rate": 3.318987848822016e-07,
"loss": 2.6728,
"step": 1970
},
{
"epoch": 4.46,
"learning_rate": 3.300830772079731e-07,
"loss": 3.0084,
"step": 1972
},
{
"epoch": 4.46,
"learning_rate": 3.282713674946309e-07,
"loss": 2.7006,
"step": 1974
},
{
"epoch": 4.47,
"learning_rate": 3.264636665541789e-07,
"loss": 2.726,
"step": 1976
},
{
"epoch": 4.47,
"learning_rate": 3.2465998517469705e-07,
"loss": 2.6391,
"step": 1978
},
{
"epoch": 4.48,
"learning_rate": 3.228603341202788e-07,
"loss": 2.7296,
"step": 1980
},
{
"epoch": 4.48,
"learning_rate": 3.2106472413096286e-07,
"loss": 2.7943,
"step": 1982
},
{
"epoch": 4.49,
"learning_rate": 3.192731659226735e-07,
"loss": 2.7679,
"step": 1984
},
{
"epoch": 4.49,
"learning_rate": 3.1748567018715366e-07,
"loss": 2.8648,
"step": 1986
},
{
"epoch": 4.5,
"learning_rate": 3.1570224759190234e-07,
"loss": 2.8623,
"step": 1988
},
{
"epoch": 4.5,
"learning_rate": 3.1392290878010994e-07,
"loss": 2.87,
"step": 1990
},
{
"epoch": 4.5,
"learning_rate": 3.1214766437059703e-07,
"loss": 2.4638,
"step": 1992
},
{
"epoch": 4.51,
"learning_rate": 3.1037652495774804e-07,
"loss": 3.205,
"step": 1994
},
{
"epoch": 4.51,
"learning_rate": 3.086095011114498e-07,
"loss": 2.5909,
"step": 1996
},
{
"epoch": 4.52,
"learning_rate": 3.0684660337702817e-07,
"loss": 2.9936,
"step": 1998
},
{
"epoch": 4.52,
"learning_rate": 3.050878422751847e-07,
"loss": 2.8594,
"step": 2000
},
{
"epoch": 4.53,
"learning_rate": 3.0333322830193497e-07,
"loss": 2.891,
"step": 2002
},
{
"epoch": 4.53,
"learning_rate": 3.0158277192854433e-07,
"loss": 2.8304,
"step": 2004
},
{
"epoch": 4.54,
"learning_rate": 2.998364836014665e-07,
"loss": 2.9801,
"step": 2006
},
{
"epoch": 4.54,
"learning_rate": 2.98094373742281e-07,
"loss": 3.0548,
"step": 2008
},
{
"epoch": 4.54,
"learning_rate": 2.963564527476302e-07,
"loss": 3.0822,
"step": 2010
},
{
"epoch": 4.55,
"learning_rate": 2.946227309891598e-07,
"loss": 3.1002,
"step": 2012
},
{
"epoch": 4.55,
"learning_rate": 2.9289321881345254e-07,
"loss": 2.5651,
"step": 2014
},
{
"epoch": 4.56,
"learning_rate": 2.91167926541971e-07,
"loss": 2.7928,
"step": 2016
},
{
"epoch": 4.56,
"learning_rate": 2.894468644709941e-07,
"loss": 2.6256,
"step": 2018
},
{
"epoch": 4.57,
"learning_rate": 2.8773004287155343e-07,
"loss": 2.5386,
"step": 2020
},
{
"epoch": 4.57,
"learning_rate": 2.860174719893768e-07,
"loss": 3.026,
"step": 2022
},
{
"epoch": 4.58,
"learning_rate": 2.8430916204482246e-07,
"loss": 2.6773,
"step": 2024
},
{
"epoch": 4.58,
"learning_rate": 2.8260512323282093e-07,
"loss": 2.7538,
"step": 2026
},
{
"epoch": 4.59,
"learning_rate": 2.809053657228131e-07,
"loss": 2.8801,
"step": 2028
},
{
"epoch": 4.59,
"learning_rate": 2.792098996586891e-07,
"loss": 2.6075,
"step": 2030
},
{
"epoch": 4.59,
"learning_rate": 2.7751873515872993e-07,
"loss": 2.4356,
"step": 2032
},
{
"epoch": 4.6,
"learning_rate": 2.7583188231554386e-07,
"loss": 3.1992,
"step": 2034
},
{
"epoch": 4.6,
"learning_rate": 2.741493511960088e-07,
"loss": 2.7167,
"step": 2036
},
{
"epoch": 4.61,
"learning_rate": 2.724711518412108e-07,
"loss": 3.2261,
"step": 2038
},
{
"epoch": 4.61,
"learning_rate": 2.7079729426638466e-07,
"loss": 2.4948,
"step": 2040
},
{
"epoch": 4.62,
"learning_rate": 2.691277884608547e-07,
"loss": 2.6697,
"step": 2042
},
{
"epoch": 4.62,
"learning_rate": 2.6746264438797396e-07,
"loss": 2.9029,
"step": 2044
},
{
"epoch": 4.63,
"learning_rate": 2.658018719850654e-07,
"loss": 2.9799,
"step": 2046
},
{
"epoch": 4.63,
"learning_rate": 2.6414548116336244e-07,
"loss": 2.7396,
"step": 2048
},
{
"epoch": 4.64,
"learning_rate": 2.6249348180795073e-07,
"loss": 2.9322,
"step": 2050
},
{
"epoch": 4.64,
"learning_rate": 2.6084588377770664e-07,
"loss": 2.906,
"step": 2052
},
{
"epoch": 4.64,
"learning_rate": 2.592026969052421e-07,
"loss": 2.7256,
"step": 2054
},
{
"epoch": 4.65,
"learning_rate": 2.575639309968425e-07,
"loss": 3.139,
"step": 2056
},
{
"epoch": 4.65,
"learning_rate": 2.559295958324097e-07,
"loss": 2.8881,
"step": 2058
},
{
"epoch": 4.66,
"learning_rate": 2.542997011654051e-07,
"loss": 2.8097,
"step": 2060
},
{
"epoch": 4.66,
"learning_rate": 2.5267425672278733e-07,
"loss": 2.6047,
"step": 2062
},
{
"epoch": 4.67,
"learning_rate": 2.510532722049591e-07,
"loss": 2.5878,
"step": 2064
},
{
"epoch": 4.67,
"learning_rate": 2.4943675728570567e-07,
"loss": 2.9128,
"step": 2066
},
{
"epoch": 4.68,
"learning_rate": 2.478247216121386e-07,
"loss": 2.8503,
"step": 2068
},
{
"epoch": 4.68,
"learning_rate": 2.462171748046392e-07,
"loss": 3.01,
"step": 2070
},
{
"epoch": 4.69,
"learning_rate": 2.4461412645679746e-07,
"loss": 3.1334,
"step": 2072
},
{
"epoch": 4.69,
"learning_rate": 2.4301558613535967e-07,
"loss": 2.6883,
"step": 2074
},
{
"epoch": 4.69,
"learning_rate": 2.414215633801678e-07,
"loss": 2.5622,
"step": 2076
},
{
"epoch": 4.7,
"learning_rate": 2.398320677041035e-07,
"loss": 2.9577,
"step": 2078
},
{
"epoch": 4.7,
"learning_rate": 2.3824710859303222e-07,
"loss": 3.0543,
"step": 2080
},
{
"epoch": 4.71,
"learning_rate": 2.3666669550574547e-07,
"loss": 2.4341,
"step": 2082
},
{
"epoch": 4.71,
"learning_rate": 2.350908378739047e-07,
"loss": 2.8255,
"step": 2084
},
{
"epoch": 4.72,
"learning_rate": 2.3351954510198534e-07,
"loss": 2.8486,
"step": 2086
},
{
"epoch": 4.72,
"learning_rate": 2.3195282656722026e-07,
"loss": 2.7911,
"step": 2088
},
{
"epoch": 4.73,
"learning_rate": 2.3039069161954383e-07,
"loss": 2.7081,
"step": 2090
},
{
"epoch": 4.73,
"learning_rate": 2.288331495815371e-07,
"loss": 2.803,
"step": 2092
},
{
"epoch": 4.73,
"learning_rate": 2.2728020974837036e-07,
"loss": 2.9866,
"step": 2094
},
{
"epoch": 4.74,
"learning_rate": 2.2573188138774923e-07,
"loss": 2.8164,
"step": 2096
},
{
"epoch": 4.74,
"learning_rate": 2.2418817373985854e-07,
"loss": 2.3785,
"step": 2098
},
{
"epoch": 4.75,
"learning_rate": 2.2264909601730708e-07,
"loss": 2.8835,
"step": 2100
},
{
"epoch": 4.75,
"learning_rate": 2.2111465740507395e-07,
"loss": 2.6537,
"step": 2102
},
{
"epoch": 4.76,
"learning_rate": 2.1958486706045198e-07,
"loss": 2.8952,
"step": 2104
},
{
"epoch": 4.76,
"learning_rate": 2.1805973411299406e-07,
"loss": 2.8878,
"step": 2106
},
{
"epoch": 4.77,
"learning_rate": 2.165392676644582e-07,
"loss": 2.8798,
"step": 2108
},
{
"epoch": 4.77,
"learning_rate": 2.1502347678875365e-07,
"loss": 2.8305,
"step": 2110
},
{
"epoch": 4.78,
"learning_rate": 2.135123705318873e-07,
"loss": 2.7106,
"step": 2112
},
{
"epoch": 4.78,
"learning_rate": 2.1200595791190723e-07,
"loss": 2.7477,
"step": 2114
},
{
"epoch": 4.78,
"learning_rate": 2.1050424791885212e-07,
"loss": 2.839,
"step": 2116
},
{
"epoch": 4.79,
"learning_rate": 2.0900724951469617e-07,
"loss": 3.1958,
"step": 2118
},
{
"epoch": 4.79,
"learning_rate": 2.0751497163329413e-07,
"loss": 2.6208,
"step": 2120
},
{
"epoch": 4.8,
"learning_rate": 2.0602742318033072e-07,
"loss": 3.0725,
"step": 2122
},
{
"epoch": 4.8,
"learning_rate": 2.0454461303326575e-07,
"loss": 2.8657,
"step": 2124
},
{
"epoch": 4.81,
"learning_rate": 2.0306655004128138e-07,
"loss": 2.8774,
"step": 2126
},
{
"epoch": 4.81,
"learning_rate": 2.0159324302522962e-07,
"loss": 2.8357,
"step": 2128
},
{
"epoch": 4.82,
"learning_rate": 2.0012470077757926e-07,
"loss": 2.469,
"step": 2130
},
{
"epoch": 4.82,
"learning_rate": 1.9866093206236457e-07,
"loss": 2.7227,
"step": 2132
},
{
"epoch": 4.83,
"learning_rate": 1.9720194561513116e-07,
"loss": 3.198,
"step": 2134
},
{
"epoch": 4.83,
"learning_rate": 1.9574775014288515e-07,
"loss": 2.5335,
"step": 2136
},
{
"epoch": 4.83,
"learning_rate": 1.9429835432404095e-07,
"loss": 2.5348,
"step": 2138
},
{
"epoch": 4.84,
"learning_rate": 1.9285376680836895e-07,
"loss": 2.9377,
"step": 2140
},
{
"epoch": 4.84,
"learning_rate": 1.914139962169452e-07,
"loss": 2.6431,
"step": 2142
},
{
"epoch": 4.85,
"learning_rate": 1.8997905114209834e-07,
"loss": 2.9976,
"step": 2144
},
{
"epoch": 4.85,
"learning_rate": 1.8854894014735932e-07,
"loss": 2.9322,
"step": 2146
},
{
"epoch": 4.86,
"learning_rate": 1.8712367176740973e-07,
"loss": 2.6202,
"step": 2148
},
{
"epoch": 4.86,
"learning_rate": 1.8570325450803215e-07,
"loss": 2.5657,
"step": 2150
},
{
"epoch": 4.87,
"learning_rate": 1.8428769684605682e-07,
"loss": 2.7867,
"step": 2152
},
{
"epoch": 4.87,
"learning_rate": 1.8287700722931432e-07,
"loss": 2.853,
"step": 2154
},
{
"epoch": 4.88,
"learning_rate": 1.814711940765824e-07,
"loss": 2.8105,
"step": 2156
},
{
"epoch": 4.88,
"learning_rate": 1.8007026577753702e-07,
"loss": 2.6706,
"step": 2158
},
{
"epoch": 4.88,
"learning_rate": 1.7867423069270293e-07,
"loss": 2.5742,
"step": 2160
},
{
"epoch": 4.89,
"learning_rate": 1.772830971534013e-07,
"loss": 2.667,
"step": 2162
},
{
"epoch": 4.89,
"learning_rate": 1.7589687346170312e-07,
"loss": 2.6658,
"step": 2164
},
{
"epoch": 4.9,
"learning_rate": 1.7451556789037757e-07,
"loss": 3.429,
"step": 2166
},
{
"epoch": 4.9,
"learning_rate": 1.7313918868284294e-07,
"loss": 2.7417,
"step": 2168
},
{
"epoch": 4.91,
"learning_rate": 1.717677440531189e-07,
"loss": 2.9242,
"step": 2170
},
{
"epoch": 4.91,
"learning_rate": 1.7040124218577433e-07,
"loss": 2.7466,
"step": 2172
},
{
"epoch": 4.92,
"learning_rate": 1.690396912358827e-07,
"loss": 2.7001,
"step": 2174
},
{
"epoch": 4.92,
"learning_rate": 1.676830993289695e-07,
"loss": 2.9698,
"step": 2176
},
{
"epoch": 4.92,
"learning_rate": 1.6633147456096608e-07,
"loss": 2.9615,
"step": 2178
},
{
"epoch": 4.93,
"learning_rate": 1.649848249981609e-07,
"loss": 2.512,
"step": 2180
},
{
"epoch": 4.93,
"learning_rate": 1.6364315867715085e-07,
"loss": 2.8938,
"step": 2182
},
{
"epoch": 4.94,
"learning_rate": 1.623064836047935e-07,
"loss": 2.6545,
"step": 2184
},
{
"epoch": 4.94,
"learning_rate": 1.6097480775815954e-07,
"loss": 2.8343,
"step": 2186
},
{
"epoch": 4.95,
"learning_rate": 1.5964813908448505e-07,
"loss": 2.6101,
"step": 2188
},
{
"epoch": 4.95,
"learning_rate": 1.5832648550112393e-07,
"loss": 2.6439,
"step": 2190
},
{
"epoch": 4.96,
"learning_rate": 1.5700985489550134e-07,
"loss": 2.6966,
"step": 2192
},
{
"epoch": 4.96,
"learning_rate": 1.5569825512506552e-07,
"loss": 2.8579,
"step": 2194
},
{
"epoch": 4.97,
"learning_rate": 1.5439169401724151e-07,
"loss": 2.6781,
"step": 2196
},
{
"epoch": 4.97,
"learning_rate": 1.5309017936938473e-07,
"loss": 2.7252,
"step": 2198
},
{
"epoch": 4.97,
"learning_rate": 1.517937189487335e-07,
"loss": 2.494,
"step": 2200
},
{
"epoch": 4.98,
"learning_rate": 1.5050232049236421e-07,
"loss": 3.0062,
"step": 2202
},
{
"epoch": 4.98,
"learning_rate": 1.492159917071435e-07,
"loss": 2.6262,
"step": 2204
},
{
"epoch": 4.99,
"learning_rate": 1.479347402696828e-07,
"loss": 2.8857,
"step": 2206
},
{
"epoch": 4.99,
"learning_rate": 1.4665857382629398e-07,
"loss": 2.9944,
"step": 2208
},
{
"epoch": 5.0,
"learning_rate": 1.4538749999294053e-07,
"loss": 2.9556,
"step": 2210
},
{
"epoch": 5.0,
"learning_rate": 1.441215263551958e-07,
"loss": 2.8379,
"step": 2212
},
{
"epoch": 5.01,
"learning_rate": 1.4286066046819511e-07,
"loss": 2.7581,
"step": 2214
},
{
"epoch": 5.01,
"learning_rate": 1.4160490985659123e-07,
"loss": 2.761,
"step": 2216
},
{
"epoch": 5.02,
"learning_rate": 1.4035428201451095e-07,
"loss": 2.5619,
"step": 2218
},
{
"epoch": 5.02,
"learning_rate": 1.3910878440550732e-07,
"loss": 2.8362,
"step": 2220
},
{
"epoch": 5.02,
"learning_rate": 1.378684244625189e-07,
"loss": 2.7421,
"step": 2222
},
{
"epoch": 5.03,
"learning_rate": 1.366332095878221e-07,
"loss": 2.8827,
"step": 2224
},
{
"epoch": 5.03,
"learning_rate": 1.3540314715298907e-07,
"loss": 3.023,
"step": 2226
},
{
"epoch": 5.04,
"learning_rate": 1.341782444988424e-07,
"loss": 2.5154,
"step": 2228
},
{
"epoch": 5.04,
"learning_rate": 1.3295850893541237e-07,
"loss": 2.7627,
"step": 2230
},
{
"epoch": 5.05,
"learning_rate": 1.317439477418929e-07,
"loss": 2.8211,
"step": 2232
},
{
"epoch": 5.05,
"learning_rate": 1.305345681665978e-07,
"loss": 2.4411,
"step": 2234
},
{
"epoch": 5.06,
"learning_rate": 1.2933037742691776e-07,
"loss": 2.781,
"step": 2236
},
{
"epoch": 5.06,
"learning_rate": 1.281313827092776e-07,
"loss": 3.0131,
"step": 2238
},
{
"epoch": 5.07,
"learning_rate": 1.269375911690924e-07,
"loss": 2.7534,
"step": 2240
},
{
"epoch": 5.07,
"learning_rate": 1.257490099307268e-07,
"loss": 2.7227,
"step": 2242
},
{
"epoch": 5.07,
"learning_rate": 1.2456564608744968e-07,
"loss": 2.9133,
"step": 2244
},
{
"epoch": 5.08,
"learning_rate": 1.2338750670139431e-07,
"loss": 2.7381,
"step": 2246
},
{
"epoch": 5.08,
"learning_rate": 1.2221459880351447e-07,
"loss": 2.7767,
"step": 2248
},
{
"epoch": 5.09,
"learning_rate": 1.2104692939354444e-07,
"loss": 2.831,
"step": 2250
},
{
"epoch": 5.09,
"learning_rate": 1.198845054399541e-07,
"loss": 3.0102,
"step": 2252
},
{
"epoch": 5.1,
"learning_rate": 1.1872733387991108e-07,
"loss": 2.7581,
"step": 2254
},
{
"epoch": 5.1,
"learning_rate": 1.1757542161923662e-07,
"loss": 2.8965,
"step": 2256
},
{
"epoch": 5.11,
"learning_rate": 1.164287755323653e-07,
"loss": 3.1573,
"step": 2258
},
{
"epoch": 5.11,
"learning_rate": 1.1528740246230462e-07,
"loss": 2.8608,
"step": 2260
},
{
"epoch": 5.11,
"learning_rate": 1.1415130922059224e-07,
"loss": 2.9055,
"step": 2262
},
{
"epoch": 5.12,
"learning_rate": 1.1302050258725814e-07,
"loss": 2.9471,
"step": 2264
},
{
"epoch": 5.12,
"learning_rate": 1.1189498931078168e-07,
"loss": 2.8742,
"step": 2266
},
{
"epoch": 5.13,
"learning_rate": 1.107747761080523e-07,
"loss": 2.8975,
"step": 2268
},
{
"epoch": 5.13,
"learning_rate": 1.0965986966433038e-07,
"loss": 2.4924,
"step": 2270
},
{
"epoch": 5.14,
"learning_rate": 1.085502766332046e-07,
"loss": 2.5379,
"step": 2272
},
{
"epoch": 5.14,
"learning_rate": 1.0744600363655587e-07,
"loss": 3.0433,
"step": 2274
},
{
"epoch": 5.15,
"learning_rate": 1.0634705726451476e-07,
"loss": 2.8508,
"step": 2276
},
{
"epoch": 5.15,
"learning_rate": 1.052534440754237e-07,
"loss": 2.7688,
"step": 2278
},
{
"epoch": 5.16,
"learning_rate": 1.0416517059579788e-07,
"loss": 2.8261,
"step": 2280
},
{
"epoch": 5.16,
"learning_rate": 1.0308224332028548e-07,
"loss": 2.4478,
"step": 2282
},
{
"epoch": 5.16,
"learning_rate": 1.0200466871162938e-07,
"loss": 3.2098,
"step": 2284
},
{
"epoch": 5.17,
"learning_rate": 1.0093245320062871e-07,
"loss": 2.7841,
"step": 2286
},
{
"epoch": 5.17,
"learning_rate": 9.986560318610026e-08,
"loss": 2.7897,
"step": 2288
},
{
"epoch": 5.18,
"learning_rate": 9.880412503484026e-08,
"loss": 3.0868,
"step": 2290
},
{
"epoch": 5.18,
"learning_rate": 9.774802508158698e-08,
"loss": 2.8863,
"step": 2292
},
{
"epoch": 5.19,
"learning_rate": 9.669730962898182e-08,
"loss": 2.9924,
"step": 2294
},
{
"epoch": 5.19,
"learning_rate": 9.56519849475328e-08,
"loss": 2.3201,
"step": 2296
},
{
"epoch": 5.2,
"learning_rate": 9.461205727557609e-08,
"loss": 3.0813,
"step": 2298
},
{
"epoch": 5.2,
"learning_rate": 9.357753281923975e-08,
"loss": 2.7553,
"step": 2300
},
{
"epoch": 5.21,
"learning_rate": 9.25484177524064e-08,
"loss": 2.6823,
"step": 2302
},
{
"epoch": 5.21,
"learning_rate": 9.152471821667595e-08,
"loss": 2.7055,
"step": 2304
},
{
"epoch": 5.21,
"learning_rate": 9.050644032132915e-08,
"loss": 2.7976,
"step": 2306
},
{
"epoch": 5.22,
"learning_rate": 8.949359014329205e-08,
"loss": 3.0599,
"step": 2308
},
{
"epoch": 5.22,
"learning_rate": 8.848617372709755e-08,
"loss": 2.7271,
"step": 2310
},
{
"epoch": 5.23,
"learning_rate": 8.748419708485199e-08,
"loss": 3.149,
"step": 2312
},
{
"epoch": 5.23,
"learning_rate": 8.64876661961974e-08,
"loss": 2.569,
"step": 2314
},
{
"epoch": 5.24,
"learning_rate": 8.5496587008276e-08,
"loss": 2.8628,
"step": 2316
},
{
"epoch": 5.24,
"learning_rate": 8.451096543569636e-08,
"loss": 2.5182,
"step": 2318
},
{
"epoch": 5.25,
"learning_rate": 8.353080736049522e-08,
"loss": 2.8243,
"step": 2320
},
{
"epoch": 5.25,
"learning_rate": 8.255611863210521e-08,
"loss": 2.7879,
"step": 2322
},
{
"epoch": 5.25,
"learning_rate": 8.158690506731835e-08,
"loss": 2.8732,
"step": 2324
},
{
"epoch": 5.26,
"learning_rate": 8.062317245025174e-08,
"loss": 3.134,
"step": 2326
},
{
"epoch": 5.26,
"learning_rate": 7.966492653231293e-08,
"loss": 2.7235,
"step": 2328
},
{
"epoch": 5.27,
"learning_rate": 7.871217303216571e-08,
"loss": 2.9314,
"step": 2330
},
{
"epoch": 5.27,
"learning_rate": 7.776491763569625e-08,
"loss": 2.4789,
"step": 2332
},
{
"epoch": 5.28,
"learning_rate": 7.68231659959786e-08,
"loss": 3.1469,
"step": 2334
},
{
"epoch": 5.28,
"learning_rate": 7.588692373324124e-08,
"loss": 2.7273,
"step": 2336
},
{
"epoch": 5.29,
"learning_rate": 7.495619643483364e-08,
"loss": 2.7551,
"step": 2338
},
{
"epoch": 5.29,
"learning_rate": 7.403098965519283e-08,
"loss": 2.7504,
"step": 2340
},
{
"epoch": 5.3,
"learning_rate": 7.311130891581029e-08,
"loss": 3.006,
"step": 2342
},
{
"epoch": 5.3,
"learning_rate": 7.219715970519869e-08,
"loss": 2.8902,
"step": 2344
},
{
"epoch": 5.3,
"learning_rate": 7.128854747885959e-08,
"loss": 2.552,
"step": 2346
},
{
"epoch": 5.31,
"learning_rate": 7.038547765925051e-08,
"loss": 2.7034,
"step": 2348
},
{
"epoch": 5.31,
"learning_rate": 6.948795563575327e-08,
"loss": 2.9586,
"step": 2350
},
{
"epoch": 5.32,
"learning_rate": 6.85959867646403e-08,
"loss": 2.9514,
"step": 2352
},
{
"epoch": 5.32,
"learning_rate": 6.770957636904484e-08,
"loss": 2.9875,
"step": 2354
},
{
"epoch": 5.33,
"learning_rate": 6.682872973892717e-08,
"loss": 2.4188,
"step": 2356
},
{
"epoch": 5.33,
"learning_rate": 6.595345213104397e-08,
"loss": 2.9447,
"step": 2358
},
{
"epoch": 5.34,
"learning_rate": 6.508374876891753e-08,
"loss": 2.9847,
"step": 2360
},
{
"epoch": 5.34,
"learning_rate": 6.421962484280275e-08,
"loss": 2.4446,
"step": 2362
},
{
"epoch": 5.35,
"learning_rate": 6.336108550965857e-08,
"loss": 2.738,
"step": 2364
},
{
"epoch": 5.35,
"learning_rate": 6.250813589311499e-08,
"loss": 3.2748,
"step": 2366
},
{
"epoch": 5.35,
"learning_rate": 6.166078108344364e-08,
"loss": 2.7968,
"step": 2368
},
{
"epoch": 5.36,
"learning_rate": 6.081902613752787e-08,
"loss": 2.825,
"step": 2370
},
{
"epoch": 5.36,
"learning_rate": 5.998287607883102e-08,
"loss": 2.7658,
"step": 2372
},
{
"epoch": 5.37,
"learning_rate": 5.915233589736768e-08,
"loss": 2.9618,
"step": 2374
},
{
"epoch": 5.37,
"learning_rate": 5.8327410549673715e-08,
"loss": 2.6213,
"step": 2376
},
{
"epoch": 5.38,
"learning_rate": 5.750810495877623e-08,
"loss": 2.9399,
"step": 2378
},
{
"epoch": 5.38,
"learning_rate": 5.669442401416491e-08,
"loss": 2.5547,
"step": 2380
},
{
"epoch": 5.39,
"learning_rate": 5.588637257176188e-08,
"loss": 2.8335,
"step": 2382
},
{
"epoch": 5.39,
"learning_rate": 5.508395545389377e-08,
"loss": 2.726,
"step": 2384
},
{
"epoch": 5.4,
"learning_rate": 5.4287177449262035e-08,
"loss": 2.9261,
"step": 2386
},
{
"epoch": 5.4,
"learning_rate": 5.349604331291491e-08,
"loss": 2.5964,
"step": 2388
},
{
"epoch": 5.4,
"learning_rate": 5.271055776621891e-08,
"loss": 2.7978,
"step": 2390
},
{
"epoch": 5.41,
"learning_rate": 5.1930725496830754e-08,
"loss": 2.9067,
"step": 2392
},
{
"epoch": 5.41,
"learning_rate": 5.1156551158669234e-08,
"loss": 2.8638,
"step": 2394
},
{
"epoch": 5.42,
"learning_rate": 5.038803937188729e-08,
"loss": 3.2469,
"step": 2396
},
{
"epoch": 5.42,
"learning_rate": 4.9625194722844767e-08,
"loss": 2.9303,
"step": 2398
},
{
"epoch": 5.43,
"learning_rate": 4.886802176408067e-08,
"loss": 3.0958,
"step": 2400
},
{
"epoch": 5.43,
"learning_rate": 4.811652501428687e-08,
"loss": 2.8197,
"step": 2402
},
{
"epoch": 5.44,
"learning_rate": 4.737070895827988e-08,
"loss": 3.0567,
"step": 2404
},
{
"epoch": 5.44,
"learning_rate": 4.6630578046974787e-08,
"loss": 2.8699,
"step": 2406
},
{
"epoch": 5.44,
"learning_rate": 4.589613669735914e-08,
"loss": 2.7792,
"step": 2408
},
{
"epoch": 5.45,
"learning_rate": 4.516738929246511e-08,
"loss": 2.7646,
"step": 2410
},
{
"epoch": 5.45,
"learning_rate": 4.444434018134524e-08,
"loss": 2.9106,
"step": 2412
},
{
"epoch": 5.46,
"learning_rate": 4.3726993679044866e-08,
"loss": 2.5622,
"step": 2414
},
{
"epoch": 5.46,
"learning_rate": 4.301535406657742e-08,
"loss": 2.6251,
"step": 2416
},
{
"epoch": 5.47,
"learning_rate": 4.2309425590898454e-08,
"loss": 2.4432,
"step": 2418
},
{
"epoch": 5.47,
"learning_rate": 4.160921246488003e-08,
"loss": 2.7231,
"step": 2420
},
{
"epoch": 5.48,
"learning_rate": 4.0914718867286255e-08,
"loss": 3.0152,
"step": 2422
},
{
"epoch": 5.48,
"learning_rate": 4.022594894274778e-08,
"loss": 3.1005,
"step": 2424
},
{
"epoch": 5.49,
"learning_rate": 3.954290680173722e-08,
"loss": 2.8143,
"step": 2426
},
{
"epoch": 5.49,
"learning_rate": 3.886559652054489e-08,
"loss": 2.9262,
"step": 2428
},
{
"epoch": 5.49,
"learning_rate": 3.819402214125389e-08,
"loss": 2.4468,
"step": 2430
},
{
"epoch": 5.5,
"learning_rate": 3.752818767171684e-08,
"loss": 2.7465,
"step": 2432
},
{
"epoch": 5.5,
"learning_rate": 3.6868097085530956e-08,
"loss": 2.8041,
"step": 2434
},
{
"epoch": 5.51,
"learning_rate": 3.621375432201512e-08,
"loss": 2.8799,
"step": 2436
},
{
"epoch": 5.51,
"learning_rate": 3.556516328618597e-08,
"loss": 2.7,
"step": 2438
},
{
"epoch": 5.52,
"learning_rate": 3.492232784873494e-08,
"loss": 2.8016,
"step": 2440
},
{
"epoch": 5.52,
"learning_rate": 3.4285251846004725e-08,
"loss": 3.0438,
"step": 2442
},
{
"epoch": 5.53,
"learning_rate": 3.365393907996661e-08,
"loss": 2.7742,
"step": 2444
},
{
"epoch": 5.53,
"learning_rate": 3.3028393318197845e-08,
"loss": 2.8359,
"step": 2446
},
{
"epoch": 5.54,
"learning_rate": 3.240861829385899e-08,
"loss": 2.8595,
"step": 2448
},
{
"epoch": 5.54,
"learning_rate": 3.1794617705672156e-08,
"loss": 2.6778,
"step": 2450
},
{
"epoch": 5.54,
"learning_rate": 3.118639521789768e-08,
"loss": 2.7007,
"step": 2452
},
{
"epoch": 5.55,
"learning_rate": 3.0583954460313924e-08,
"loss": 3.1771,
"step": 2454
},
{
"epoch": 5.55,
"learning_rate": 2.99872990281943e-08,
"loss": 2.4846,
"step": 2456
},
{
"epoch": 5.56,
"learning_rate": 2.9396432482286405e-08,
"loss": 3.0357,
"step": 2458
},
{
"epoch": 5.56,
"learning_rate": 2.881135834879078e-08,
"loss": 2.8547,
"step": 2460
},
{
"epoch": 5.57,
"learning_rate": 2.8232080119339418e-08,
"loss": 2.8275,
"step": 2462
},
{
"epoch": 5.57,
"learning_rate": 2.7658601250975632e-08,
"loss": 3.0354,
"step": 2464
},
{
"epoch": 5.58,
"learning_rate": 2.7090925166132873e-08,
"loss": 2.5147,
"step": 2466
},
{
"epoch": 5.58,
"learning_rate": 2.6529055252614285e-08,
"loss": 2.6707,
"step": 2468
},
{
"epoch": 5.59,
"learning_rate": 2.5972994863573183e-08,
"loss": 2.5903,
"step": 2470
},
{
"epoch": 5.59,
"learning_rate": 2.5422747317491944e-08,
"loss": 2.7884,
"step": 2472
},
{
"epoch": 5.59,
"learning_rate": 2.487831589816314e-08,
"loss": 2.8384,
"step": 2474
},
{
"epoch": 5.6,
"learning_rate": 2.4339703854669657e-08,
"loss": 2.6199,
"step": 2476
},
{
"epoch": 5.6,
"learning_rate": 2.380691440136484e-08,
"loss": 2.6724,
"step": 2478
},
{
"epoch": 5.61,
"learning_rate": 2.3279950717854157e-08,
"loss": 3.0304,
"step": 2480
},
{
"epoch": 5.61,
"learning_rate": 2.275881594897566e-08,
"loss": 2.823,
"step": 2482
},
{
"epoch": 5.62,
"learning_rate": 2.224351320478124e-08,
"loss": 2.9788,
"step": 2484
},
{
"epoch": 5.62,
"learning_rate": 2.1734045560518166e-08,
"loss": 2.9393,
"step": 2486
},
{
"epoch": 5.63,
"learning_rate": 2.12304160566108e-08,
"loss": 2.8637,
"step": 2488
},
{
"epoch": 5.63,
"learning_rate": 2.073262769864226e-08,
"loss": 2.6603,
"step": 2490
},
{
"epoch": 5.63,
"learning_rate": 2.024068345733676e-08,
"loss": 2.7437,
"step": 2492
},
{
"epoch": 5.64,
"learning_rate": 1.9754586268541874e-08,
"loss": 2.748,
"step": 2494
},
{
"epoch": 5.64,
"learning_rate": 1.9274339033210296e-08,
"loss": 2.8572,
"step": 2496
},
{
"epoch": 5.65,
"learning_rate": 1.8799944617383877e-08,
"loss": 2.8819,
"step": 2498
},
{
"epoch": 5.65,
"learning_rate": 1.833140585217485e-08,
"loss": 2.8412,
"step": 2500
},
{
"epoch": 5.66,
"learning_rate": 1.786872553375074e-08,
"loss": 2.6918,
"step": 2502
},
{
"epoch": 5.66,
"learning_rate": 1.7411906423316138e-08,
"loss": 2.7927,
"step": 2504
},
{
"epoch": 5.67,
"learning_rate": 1.6960951247096954e-08,
"loss": 3.0818,
"step": 2506
},
{
"epoch": 5.67,
"learning_rate": 1.6515862696324547e-08,
"loss": 2.8682,
"step": 2508
},
{
"epoch": 5.68,
"learning_rate": 1.607664342721815e-08,
"loss": 2.7692,
"step": 2510
},
{
"epoch": 5.68,
"learning_rate": 1.5643296060971144e-08,
"loss": 2.8995,
"step": 2512
},
{
"epoch": 5.68,
"learning_rate": 1.52158231837336e-08,
"loss": 2.6709,
"step": 2514
},
{
"epoch": 5.69,
"learning_rate": 1.479422734659752e-08,
"loss": 2.6749,
"step": 2516
},
{
"epoch": 5.69,
"learning_rate": 1.4378511065582299e-08,
"loss": 2.7863,
"step": 2518
},
{
"epoch": 5.7,
"learning_rate": 1.3968676821618065e-08,
"loss": 2.8782,
"step": 2520
},
{
"epoch": 5.7,
"learning_rate": 1.35647270605328e-08,
"loss": 2.6534,
"step": 2522
},
{
"epoch": 5.71,
"learning_rate": 1.3166664193036248e-08,
"loss": 2.7203,
"step": 2524
},
{
"epoch": 5.71,
"learning_rate": 1.2774490594706032e-08,
"loss": 2.7822,
"step": 2526
},
{
"epoch": 5.72,
"learning_rate": 1.2388208605974115e-08,
"loss": 2.5871,
"step": 2528
},
{
"epoch": 5.72,
"learning_rate": 1.2007820532111467e-08,
"loss": 2.6158,
"step": 2530
},
{
"epoch": 5.73,
"learning_rate": 1.1633328643215534e-08,
"loss": 3.0137,
"step": 2532
},
{
"epoch": 5.73,
"learning_rate": 1.126473517419635e-08,
"loss": 2.7719,
"step": 2534
},
{
"epoch": 5.73,
"learning_rate": 1.0902042324762662e-08,
"loss": 2.9144,
"step": 2536
},
{
"epoch": 5.74,
"learning_rate": 1.054525225940961e-08,
"loss": 2.5536,
"step": 2538
},
{
"epoch": 5.74,
"learning_rate": 1.0194367107405177e-08,
"loss": 3.0706,
"step": 2540
},
{
"epoch": 5.75,
"learning_rate": 9.849388962777982e-09,
"loss": 2.903,
"step": 2542
},
{
"epoch": 5.75,
"learning_rate": 9.510319884304175e-09,
"loss": 2.9641,
"step": 2544
},
{
"epoch": 5.76,
"learning_rate": 9.177161895496e-09,
"loss": 3.0056,
"step": 2546
},
{
"epoch": 5.76,
"learning_rate": 8.849916984588702e-09,
"loss": 2.5194,
"step": 2548
},
{
"epoch": 5.77,
"learning_rate": 8.528587104529638e-09,
"loss": 2.7959,
"step": 2550
},
{
"epoch": 5.77,
"learning_rate": 8.213174172965742e-09,
"loss": 2.4067,
"step": 2552
},
{
"epoch": 5.78,
"learning_rate": 7.903680072233077e-09,
"loss": 2.6204,
"step": 2554
},
{
"epoch": 5.78,
"learning_rate": 7.600106649344518e-09,
"loss": 2.9532,
"step": 2556
},
{
"epoch": 5.78,
"learning_rate": 7.3024557159793166e-09,
"loss": 2.633,
"step": 2558
},
{
"epoch": 5.79,
"learning_rate": 7.0107290484726635e-09,
"loss": 2.4816,
"step": 2560
},
{
"epoch": 5.79,
"learning_rate": 6.7249283878039185e-09,
"loss": 2.8236,
"step": 2562
},
{
"epoch": 5.8,
"learning_rate": 6.445055439587732e-09,
"loss": 3.11,
"step": 2564
},
{
"epoch": 5.8,
"learning_rate": 6.171111874062496e-09,
"loss": 2.9049,
"step": 2566
},
{
"epoch": 5.81,
"learning_rate": 5.90309932608124e-09,
"loss": 2.9968,
"step": 2568
},
{
"epoch": 5.81,
"learning_rate": 5.641019395101421e-09,
"loss": 2.6047,
"step": 2570
},
{
"epoch": 5.82,
"learning_rate": 5.384873645175813e-09,
"loss": 3.1338,
"step": 2572
},
{
"epoch": 5.82,
"learning_rate": 5.134663604942635e-09,
"loss": 2.5713,
"step": 2574
},
{
"epoch": 5.82,
"learning_rate": 4.89039076761677e-09,
"loss": 2.7235,
"step": 2576
},
{
"epoch": 5.83,
"learning_rate": 4.652056590980891e-09,
"loss": 3.1282,
"step": 2578
},
{
"epoch": 5.83,
"learning_rate": 4.419662497376797e-09,
"loss": 2.8595,
"step": 2580
},
{
"epoch": 5.84,
"learning_rate": 4.193209873696313e-09,
"loss": 2.8237,
"step": 2582
},
{
"epoch": 5.84,
"learning_rate": 3.972700071373958e-09,
"loss": 2.9459,
"step": 2584
},
{
"epoch": 5.85,
"learning_rate": 3.7581344063780666e-09,
"loss": 2.6471,
"step": 2586
},
{
"epoch": 5.85,
"learning_rate": 3.5495141592035702e-09,
"loss": 2.6666,
"step": 2588
},
{
"epoch": 5.86,
"learning_rate": 3.346840574864007e-09,
"loss": 2.7841,
"step": 2590
},
{
"epoch": 5.86,
"learning_rate": 3.150114862883857e-09,
"loss": 2.9788,
"step": 2592
},
{
"epoch": 5.87,
"learning_rate": 2.959338197291994e-09,
"loss": 2.5035,
"step": 2594
},
{
"epoch": 5.87,
"learning_rate": 2.7745117166140255e-09,
"loss": 2.7372,
"step": 2596
},
{
"epoch": 5.87,
"learning_rate": 2.5956365238660736e-09,
"loss": 2.7331,
"step": 2598
},
{
"epoch": 5.88,
"learning_rate": 2.4227136865474506e-09,
"loss": 2.745,
"step": 2600
},
{
"epoch": 5.88,
"learning_rate": 2.2557442366351043e-09,
"loss": 2.6551,
"step": 2602
},
{
"epoch": 5.89,
"learning_rate": 2.09472917057707e-09,
"loss": 2.6462,
"step": 2604
},
{
"epoch": 5.89,
"learning_rate": 1.939669449286252e-09,
"loss": 2.6305,
"step": 2606
},
{
"epoch": 5.9,
"learning_rate": 1.7905659981353183e-09,
"loss": 2.857,
"step": 2608
},
{
"epoch": 5.9,
"learning_rate": 1.6474197069504815e-09,
"loss": 2.8444,
"step": 2610
},
{
"epoch": 5.91,
"learning_rate": 1.5102314300068365e-09,
"loss": 2.5423,
"step": 2612
},
{
"epoch": 5.91,
"learning_rate": 1.3790019860229206e-09,
"loss": 3.0096,
"step": 2614
},
{
"epoch": 5.92,
"learning_rate": 1.2537321581557182e-09,
"loss": 2.4271,
"step": 2616
},
{
"epoch": 5.92,
"learning_rate": 1.1344226939964397e-09,
"loss": 2.6693,
"step": 2618
},
{
"epoch": 5.92,
"learning_rate": 1.0210743055651949e-09,
"loss": 2.638,
"step": 2620
},
{
"epoch": 5.93,
"learning_rate": 9.136876693081052e-10,
"loss": 2.7145,
"step": 2622
},
{
"epoch": 5.93,
"learning_rate": 8.122634260917527e-10,
"loss": 2.7503,
"step": 2624
},
{
"epoch": 5.94,
"learning_rate": 7.168021812005154e-10,
"loss": 3.0314,
"step": 2626
},
{
"epoch": 5.94,
"learning_rate": 6.273045043322378e-10,
"loss": 2.923,
"step": 2628
},
{
"epoch": 5.95,
"learning_rate": 5.437709295955662e-10,
"loss": 2.6863,
"step": 2630
},
{
"epoch": 5.95,
"learning_rate": 4.662019555056185e-10,
"loss": 2.7078,
"step": 2632
},
{
"epoch": 5.96,
"learning_rate": 3.945980449823194e-10,
"loss": 3.1157,
"step": 2634
},
{
"epoch": 5.96,
"learning_rate": 3.289596253467364e-10,
"loss": 2.7896,
"step": 2636
},
{
"epoch": 5.96,
"learning_rate": 2.6928708831874814e-10,
"loss": 2.8468,
"step": 2638
},
{
"epoch": 5.97,
"learning_rate": 2.1558079001493534e-10,
"loss": 2.5971,
"step": 2640
},
{
"epoch": 5.97,
"learning_rate": 1.6784105094602706e-10,
"loss": 2.3665,
"step": 2642
},
{
"epoch": 5.98,
"learning_rate": 1.2606815601567955e-10,
"loss": 2.8667,
"step": 2644
},
{
"epoch": 5.98,
"learning_rate": 9.026235451803365e-11,
"loss": 2.8586,
"step": 2646
},
{
"epoch": 5.99,
"learning_rate": 6.042386013638267e-11,
"loss": 2.7275,
"step": 2648
},
{
"epoch": 5.99,
"learning_rate": 3.6552850942506156e-11,
"loss": 2.9348,
"step": 2650
},
{
"epoch": 6.0,
"learning_rate": 1.8649469394782512e-11,
"loss": 2.5662,
"step": 2652
},
{
"epoch": 6.0,
"step": 2652,
"total_flos": 5.825747791275622e+16,
"train_loss": 3.2421822742099677,
"train_runtime": 19016.0105,
"train_samples_per_second": 8.929,
"train_steps_per_second": 0.139
}
],
"logging_steps": 2,
"max_steps": 2652,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 4000,
"total_flos": 5.825747791275622e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}