PyTorch
llama
DTA_llama2_13b / trainer_state.json
dongsheng's picture
Upload 10 files
36c8831 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.989351992698509,
"global_step": 2050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 6.097560975609757e-07,
"loss": 1.2345,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.2195121951219514e-06,
"loss": 1.3023,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 1.8292682926829268e-06,
"loss": 1.2241,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 2.4390243902439027e-06,
"loss": 1.2505,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 3.0487804878048782e-06,
"loss": 1.1555,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 3.6585365853658537e-06,
"loss": 1.101,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 4.26829268292683e-06,
"loss": 0.9013,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 4.8780487804878055e-06,
"loss": 0.8904,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 5.487804878048781e-06,
"loss": 0.7205,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 6.0975609756097564e-06,
"loss": 0.6704,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 6.707317073170733e-06,
"loss": 0.6029,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 7.317073170731707e-06,
"loss": 0.5434,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 7.926829268292683e-06,
"loss": 0.5544,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 8.53658536585366e-06,
"loss": 0.5272,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 9.146341463414634e-06,
"loss": 0.504,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 9.756097560975611e-06,
"loss": 0.505,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 1.0365853658536585e-05,
"loss": 0.5116,
"step": 17
},
{
"epoch": 0.04,
"learning_rate": 1.0975609756097562e-05,
"loss": 0.5009,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 1.1585365853658537e-05,
"loss": 0.4888,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 1.2195121951219513e-05,
"loss": 0.4531,
"step": 20
},
{
"epoch": 0.05,
"learning_rate": 1.2804878048780488e-05,
"loss": 0.4701,
"step": 21
},
{
"epoch": 0.05,
"learning_rate": 1.3414634146341466e-05,
"loss": 0.4221,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 1.4024390243902441e-05,
"loss": 0.4427,
"step": 23
},
{
"epoch": 0.06,
"learning_rate": 1.4634146341463415e-05,
"loss": 0.4266,
"step": 24
},
{
"epoch": 0.06,
"learning_rate": 1.524390243902439e-05,
"loss": 0.4375,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 1.5853658536585366e-05,
"loss": 0.4361,
"step": 26
},
{
"epoch": 0.07,
"learning_rate": 1.6463414634146345e-05,
"loss": 0.4482,
"step": 27
},
{
"epoch": 0.07,
"learning_rate": 1.707317073170732e-05,
"loss": 0.4398,
"step": 28
},
{
"epoch": 0.07,
"learning_rate": 1.7682926829268292e-05,
"loss": 0.4464,
"step": 29
},
{
"epoch": 0.07,
"learning_rate": 1.8292682926829268e-05,
"loss": 0.4687,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 1.8902439024390246e-05,
"loss": 0.4459,
"step": 31
},
{
"epoch": 0.08,
"learning_rate": 1.9512195121951222e-05,
"loss": 0.4257,
"step": 32
},
{
"epoch": 0.08,
"learning_rate": 2.0121951219512197e-05,
"loss": 0.3982,
"step": 33
},
{
"epoch": 0.08,
"learning_rate": 2.073170731707317e-05,
"loss": 0.4211,
"step": 34
},
{
"epoch": 0.09,
"learning_rate": 2.134146341463415e-05,
"loss": 0.4319,
"step": 35
},
{
"epoch": 0.09,
"learning_rate": 2.1951219512195124e-05,
"loss": 0.4641,
"step": 36
},
{
"epoch": 0.09,
"learning_rate": 2.25609756097561e-05,
"loss": 0.4335,
"step": 37
},
{
"epoch": 0.09,
"learning_rate": 2.3170731707317075e-05,
"loss": 0.4278,
"step": 38
},
{
"epoch": 0.09,
"learning_rate": 2.378048780487805e-05,
"loss": 0.3997,
"step": 39
},
{
"epoch": 0.1,
"learning_rate": 2.4390243902439026e-05,
"loss": 0.4259,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 2.5e-05,
"loss": 0.4156,
"step": 41
},
{
"epoch": 0.1,
"learning_rate": 2.5609756097560977e-05,
"loss": 0.4356,
"step": 42
},
{
"epoch": 0.1,
"learning_rate": 2.6219512195121952e-05,
"loss": 0.3966,
"step": 43
},
{
"epoch": 0.11,
"learning_rate": 2.682926829268293e-05,
"loss": 0.4271,
"step": 44
},
{
"epoch": 0.11,
"learning_rate": 2.7439024390243906e-05,
"loss": 0.4372,
"step": 45
},
{
"epoch": 0.11,
"learning_rate": 2.8048780487804882e-05,
"loss": 0.4174,
"step": 46
},
{
"epoch": 0.11,
"learning_rate": 2.8658536585365854e-05,
"loss": 0.4342,
"step": 47
},
{
"epoch": 0.12,
"learning_rate": 2.926829268292683e-05,
"loss": 0.401,
"step": 48
},
{
"epoch": 0.12,
"learning_rate": 2.9878048780487805e-05,
"loss": 0.4027,
"step": 49
},
{
"epoch": 0.12,
"learning_rate": 3.048780487804878e-05,
"loss": 0.4319,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 3.109756097560976e-05,
"loss": 0.4345,
"step": 51
},
{
"epoch": 0.13,
"learning_rate": 3.170731707317073e-05,
"loss": 0.4136,
"step": 52
},
{
"epoch": 0.13,
"learning_rate": 3.231707317073171e-05,
"loss": 0.4233,
"step": 53
},
{
"epoch": 0.13,
"learning_rate": 3.292682926829269e-05,
"loss": 0.4089,
"step": 54
},
{
"epoch": 0.13,
"learning_rate": 3.353658536585366e-05,
"loss": 0.4379,
"step": 55
},
{
"epoch": 0.14,
"learning_rate": 3.414634146341464e-05,
"loss": 0.3893,
"step": 56
},
{
"epoch": 0.14,
"learning_rate": 3.475609756097561e-05,
"loss": 0.4188,
"step": 57
},
{
"epoch": 0.14,
"learning_rate": 3.5365853658536584e-05,
"loss": 0.4106,
"step": 58
},
{
"epoch": 0.14,
"learning_rate": 3.597560975609756e-05,
"loss": 0.45,
"step": 59
},
{
"epoch": 0.15,
"learning_rate": 3.6585365853658535e-05,
"loss": 0.3955,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 3.7195121951219514e-05,
"loss": 0.4393,
"step": 61
},
{
"epoch": 0.15,
"learning_rate": 3.780487804878049e-05,
"loss": 0.4256,
"step": 62
},
{
"epoch": 0.15,
"learning_rate": 3.8414634146341465e-05,
"loss": 0.4139,
"step": 63
},
{
"epoch": 0.16,
"learning_rate": 3.9024390243902444e-05,
"loss": 0.4423,
"step": 64
},
{
"epoch": 0.16,
"learning_rate": 3.9634146341463416e-05,
"loss": 0.4259,
"step": 65
},
{
"epoch": 0.16,
"learning_rate": 4.0243902439024395e-05,
"loss": 0.4225,
"step": 66
},
{
"epoch": 0.16,
"learning_rate": 4.085365853658537e-05,
"loss": 0.42,
"step": 67
},
{
"epoch": 0.17,
"learning_rate": 4.146341463414634e-05,
"loss": 0.4104,
"step": 68
},
{
"epoch": 0.17,
"learning_rate": 4.207317073170732e-05,
"loss": 0.4085,
"step": 69
},
{
"epoch": 0.17,
"learning_rate": 4.26829268292683e-05,
"loss": 0.421,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 4.329268292682927e-05,
"loss": 0.3984,
"step": 71
},
{
"epoch": 0.18,
"learning_rate": 4.390243902439025e-05,
"loss": 0.4428,
"step": 72
},
{
"epoch": 0.18,
"learning_rate": 4.451219512195122e-05,
"loss": 0.4468,
"step": 73
},
{
"epoch": 0.18,
"learning_rate": 4.51219512195122e-05,
"loss": 0.4245,
"step": 74
},
{
"epoch": 0.18,
"learning_rate": 4.573170731707318e-05,
"loss": 0.4008,
"step": 75
},
{
"epoch": 0.18,
"learning_rate": 4.634146341463415e-05,
"loss": 0.4013,
"step": 76
},
{
"epoch": 0.19,
"learning_rate": 4.695121951219512e-05,
"loss": 0.4276,
"step": 77
},
{
"epoch": 0.19,
"learning_rate": 4.75609756097561e-05,
"loss": 0.4307,
"step": 78
},
{
"epoch": 0.19,
"learning_rate": 4.817073170731707e-05,
"loss": 0.3836,
"step": 79
},
{
"epoch": 0.19,
"learning_rate": 4.878048780487805e-05,
"loss": 0.4272,
"step": 80
},
{
"epoch": 0.2,
"learning_rate": 4.9390243902439024e-05,
"loss": 0.4091,
"step": 81
},
{
"epoch": 0.2,
"learning_rate": 5e-05,
"loss": 0.3907,
"step": 82
},
{
"epoch": 0.2,
"learning_rate": 4.9999968146329897e-05,
"loss": 0.4449,
"step": 83
},
{
"epoch": 0.2,
"learning_rate": 4.9999872585400745e-05,
"loss": 0.3889,
"step": 84
},
{
"epoch": 0.21,
"learning_rate": 4.999971331745607e-05,
"loss": 0.4582,
"step": 85
},
{
"epoch": 0.21,
"learning_rate": 4.9999490342901726e-05,
"loss": 0.4386,
"step": 86
},
{
"epoch": 0.21,
"learning_rate": 4.9999203662305926e-05,
"loss": 0.4174,
"step": 87
},
{
"epoch": 0.21,
"learning_rate": 4.9998853276399215e-05,
"loss": 0.4124,
"step": 88
},
{
"epoch": 0.22,
"learning_rate": 4.9998439186074476e-05,
"loss": 0.4114,
"step": 89
},
{
"epoch": 0.22,
"learning_rate": 4.999796139238694e-05,
"loss": 0.4208,
"step": 90
},
{
"epoch": 0.22,
"learning_rate": 4.999741989655415e-05,
"loss": 0.4266,
"step": 91
},
{
"epoch": 0.22,
"learning_rate": 4.999681469995601e-05,
"loss": 0.3977,
"step": 92
},
{
"epoch": 0.23,
"learning_rate": 4.999614580413473e-05,
"loss": 0.4004,
"step": 93
},
{
"epoch": 0.23,
"learning_rate": 4.9995413210794864e-05,
"loss": 0.4481,
"step": 94
},
{
"epoch": 0.23,
"learning_rate": 4.9994616921803264e-05,
"loss": 0.4147,
"step": 95
},
{
"epoch": 0.23,
"learning_rate": 4.999375693918911e-05,
"loss": 0.4221,
"step": 96
},
{
"epoch": 0.24,
"learning_rate": 4.99928332651439e-05,
"loss": 0.428,
"step": 97
},
{
"epoch": 0.24,
"learning_rate": 4.999184590202141e-05,
"loss": 0.4283,
"step": 98
},
{
"epoch": 0.24,
"learning_rate": 4.999079485233775e-05,
"loss": 0.4324,
"step": 99
},
{
"epoch": 0.24,
"learning_rate": 4.9989680118771284e-05,
"loss": 0.4293,
"step": 100
},
{
"epoch": 0.25,
"learning_rate": 4.99885017041627e-05,
"loss": 0.4466,
"step": 101
},
{
"epoch": 0.25,
"learning_rate": 4.998725961151493e-05,
"loss": 0.4101,
"step": 102
},
{
"epoch": 0.25,
"learning_rate": 4.9985953843993194e-05,
"loss": 0.3773,
"step": 103
},
{
"epoch": 0.25,
"learning_rate": 4.998458440492497e-05,
"loss": 0.4226,
"step": 104
},
{
"epoch": 0.26,
"learning_rate": 4.9983151297800005e-05,
"loss": 0.4156,
"step": 105
},
{
"epoch": 0.26,
"learning_rate": 4.998165452627025e-05,
"loss": 0.3961,
"step": 106
},
{
"epoch": 0.26,
"learning_rate": 4.9980094094149945e-05,
"loss": 0.4271,
"step": 107
},
{
"epoch": 0.26,
"learning_rate": 4.997847000541551e-05,
"loss": 0.4275,
"step": 108
},
{
"epoch": 0.27,
"learning_rate": 4.997678226420561e-05,
"loss": 0.3846,
"step": 109
},
{
"epoch": 0.27,
"learning_rate": 4.99750308748211e-05,
"loss": 0.4237,
"step": 110
},
{
"epoch": 0.27,
"learning_rate": 4.997321584172504e-05,
"loss": 0.4215,
"step": 111
},
{
"epoch": 0.27,
"learning_rate": 4.9971337169542665e-05,
"loss": 0.3897,
"step": 112
},
{
"epoch": 0.28,
"learning_rate": 4.996939486306138e-05,
"loss": 0.4016,
"step": 113
},
{
"epoch": 0.28,
"learning_rate": 4.996738892723075e-05,
"loss": 0.4399,
"step": 114
},
{
"epoch": 0.28,
"learning_rate": 4.99653193671625e-05,
"loss": 0.4347,
"step": 115
},
{
"epoch": 0.28,
"learning_rate": 4.996318618813046e-05,
"loss": 0.4371,
"step": 116
},
{
"epoch": 0.28,
"learning_rate": 4.996098939557062e-05,
"loss": 0.4298,
"step": 117
},
{
"epoch": 0.29,
"learning_rate": 4.995872899508103e-05,
"loss": 0.4204,
"step": 118
},
{
"epoch": 0.29,
"learning_rate": 4.995640499242187e-05,
"loss": 0.3856,
"step": 119
},
{
"epoch": 0.29,
"learning_rate": 4.995401739351536e-05,
"loss": 0.4044,
"step": 120
},
{
"epoch": 0.29,
"learning_rate": 4.9951566204445834e-05,
"loss": 0.4019,
"step": 121
},
{
"epoch": 0.3,
"learning_rate": 4.9949051431459615e-05,
"loss": 0.4484,
"step": 122
},
{
"epoch": 0.3,
"learning_rate": 4.994647308096509e-05,
"loss": 0.4149,
"step": 123
},
{
"epoch": 0.3,
"learning_rate": 4.9943831159532665e-05,
"loss": 0.4163,
"step": 124
},
{
"epoch": 0.3,
"learning_rate": 4.994112567389471e-05,
"loss": 0.4097,
"step": 125
},
{
"epoch": 0.31,
"learning_rate": 4.9938356630945616e-05,
"loss": 0.4045,
"step": 126
},
{
"epoch": 0.31,
"learning_rate": 4.99355240377417e-05,
"loss": 0.4257,
"step": 127
},
{
"epoch": 0.31,
"learning_rate": 4.993262790150126e-05,
"loss": 0.3949,
"step": 128
},
{
"epoch": 0.31,
"learning_rate": 4.99296682296045e-05,
"loss": 0.4253,
"step": 129
},
{
"epoch": 0.32,
"learning_rate": 4.992664502959351e-05,
"loss": 0.3911,
"step": 130
},
{
"epoch": 0.32,
"learning_rate": 4.992355830917232e-05,
"loss": 0.4163,
"step": 131
},
{
"epoch": 0.32,
"learning_rate": 4.992040807620678e-05,
"loss": 0.3949,
"step": 132
},
{
"epoch": 0.32,
"learning_rate": 4.9917194338724614e-05,
"loss": 0.4146,
"step": 133
},
{
"epoch": 0.33,
"learning_rate": 4.9913917104915374e-05,
"loss": 0.4143,
"step": 134
},
{
"epoch": 0.33,
"learning_rate": 4.9910576383130414e-05,
"loss": 0.4096,
"step": 135
},
{
"epoch": 0.33,
"learning_rate": 4.990717218188286e-05,
"loss": 0.3887,
"step": 136
},
{
"epoch": 0.33,
"learning_rate": 4.990370450984763e-05,
"loss": 0.4135,
"step": 137
},
{
"epoch": 0.34,
"learning_rate": 4.990017337586137e-05,
"loss": 0.426,
"step": 138
},
{
"epoch": 0.34,
"learning_rate": 4.989657878892244e-05,
"loss": 0.4379,
"step": 139
},
{
"epoch": 0.34,
"learning_rate": 4.9892920758190907e-05,
"loss": 0.4185,
"step": 140
},
{
"epoch": 0.34,
"learning_rate": 4.988919929298851e-05,
"loss": 0.4309,
"step": 141
},
{
"epoch": 0.35,
"learning_rate": 4.9885414402798624e-05,
"loss": 0.4489,
"step": 142
},
{
"epoch": 0.35,
"learning_rate": 4.988156609726628e-05,
"loss": 0.3993,
"step": 143
},
{
"epoch": 0.35,
"learning_rate": 4.987765438619806e-05,
"loss": 0.4559,
"step": 144
},
{
"epoch": 0.35,
"learning_rate": 4.987367927956218e-05,
"loss": 0.4005,
"step": 145
},
{
"epoch": 0.36,
"learning_rate": 4.986964078748837e-05,
"loss": 0.3977,
"step": 146
},
{
"epoch": 0.36,
"learning_rate": 4.986553892026789e-05,
"loss": 0.4036,
"step": 147
},
{
"epoch": 0.36,
"learning_rate": 4.9861373688353504e-05,
"loss": 0.4411,
"step": 148
},
{
"epoch": 0.36,
"learning_rate": 4.9857145102359456e-05,
"loss": 0.4303,
"step": 149
},
{
"epoch": 0.37,
"learning_rate": 4.985285317306141e-05,
"loss": 0.4416,
"step": 150
},
{
"epoch": 0.37,
"learning_rate": 4.984849791139646e-05,
"loss": 0.3917,
"step": 151
},
{
"epoch": 0.37,
"learning_rate": 4.984407932846311e-05,
"loss": 0.3887,
"step": 152
},
{
"epoch": 0.37,
"learning_rate": 4.983959743552118e-05,
"loss": 0.4235,
"step": 153
},
{
"epoch": 0.37,
"learning_rate": 4.9835052243991874e-05,
"loss": 0.3951,
"step": 154
},
{
"epoch": 0.38,
"learning_rate": 4.983044376545767e-05,
"loss": 0.3995,
"step": 155
},
{
"epoch": 0.38,
"learning_rate": 4.982577201166232e-05,
"loss": 0.3995,
"step": 156
},
{
"epoch": 0.38,
"learning_rate": 4.982103699451082e-05,
"loss": 0.4131,
"step": 157
},
{
"epoch": 0.38,
"learning_rate": 4.981623872606938e-05,
"loss": 0.4159,
"step": 158
},
{
"epoch": 0.39,
"learning_rate": 4.981137721856541e-05,
"loss": 0.4039,
"step": 159
},
{
"epoch": 0.39,
"learning_rate": 4.980645248438745e-05,
"loss": 0.442,
"step": 160
},
{
"epoch": 0.39,
"learning_rate": 4.980146453608518e-05,
"loss": 0.4113,
"step": 161
},
{
"epoch": 0.39,
"learning_rate": 4.979641338636935e-05,
"loss": 0.4177,
"step": 162
},
{
"epoch": 0.4,
"learning_rate": 4.979129904811176e-05,
"loss": 0.4017,
"step": 163
},
{
"epoch": 0.4,
"learning_rate": 4.9786121534345265e-05,
"loss": 0.4274,
"step": 164
},
{
"epoch": 0.4,
"learning_rate": 4.978088085826368e-05,
"loss": 0.4544,
"step": 165
},
{
"epoch": 0.4,
"learning_rate": 4.977557703322178e-05,
"loss": 0.39,
"step": 166
},
{
"epoch": 0.41,
"learning_rate": 4.977021007273528e-05,
"loss": 0.418,
"step": 167
},
{
"epoch": 0.41,
"learning_rate": 4.976477999048077e-05,
"loss": 0.3923,
"step": 168
},
{
"epoch": 0.41,
"learning_rate": 4.97592868002957e-05,
"loss": 0.4087,
"step": 169
},
{
"epoch": 0.41,
"learning_rate": 4.9753730516178313e-05,
"loss": 0.4061,
"step": 170
},
{
"epoch": 0.42,
"learning_rate": 4.974811115228767e-05,
"loss": 0.3747,
"step": 171
},
{
"epoch": 0.42,
"learning_rate": 4.9742428722943545e-05,
"loss": 0.399,
"step": 172
},
{
"epoch": 0.42,
"learning_rate": 4.973668324262645e-05,
"loss": 0.3833,
"step": 173
},
{
"epoch": 0.42,
"learning_rate": 4.973087472597754e-05,
"loss": 0.4333,
"step": 174
},
{
"epoch": 0.43,
"learning_rate": 4.972500318779863e-05,
"loss": 0.406,
"step": 175
},
{
"epoch": 0.43,
"learning_rate": 4.9719068643052135e-05,
"loss": 0.39,
"step": 176
},
{
"epoch": 0.43,
"learning_rate": 4.9713071106860996e-05,
"loss": 0.4317,
"step": 177
},
{
"epoch": 0.43,
"learning_rate": 4.970701059450872e-05,
"loss": 0.4173,
"step": 178
},
{
"epoch": 0.44,
"learning_rate": 4.9700887121439244e-05,
"loss": 0.3884,
"step": 179
},
{
"epoch": 0.44,
"learning_rate": 4.969470070325699e-05,
"loss": 0.3944,
"step": 180
},
{
"epoch": 0.44,
"learning_rate": 4.968845135572677e-05,
"loss": 0.4076,
"step": 181
},
{
"epoch": 0.44,
"learning_rate": 4.968213909477376e-05,
"loss": 0.4195,
"step": 182
},
{
"epoch": 0.45,
"learning_rate": 4.967576393648344e-05,
"loss": 0.4093,
"step": 183
},
{
"epoch": 0.45,
"learning_rate": 4.9669325897101604e-05,
"loss": 0.3974,
"step": 184
},
{
"epoch": 0.45,
"learning_rate": 4.966282499303424e-05,
"loss": 0.4025,
"step": 185
},
{
"epoch": 0.45,
"learning_rate": 4.965626124084759e-05,
"loss": 0.4058,
"step": 186
},
{
"epoch": 0.46,
"learning_rate": 4.9649634657267995e-05,
"loss": 0.4007,
"step": 187
},
{
"epoch": 0.46,
"learning_rate": 4.964294525918196e-05,
"loss": 0.4218,
"step": 188
},
{
"epoch": 0.46,
"learning_rate": 4.963619306363602e-05,
"loss": 0.4141,
"step": 189
},
{
"epoch": 0.46,
"learning_rate": 4.962937808783675e-05,
"loss": 0.4233,
"step": 190
},
{
"epoch": 0.46,
"learning_rate": 4.9622500349150716e-05,
"loss": 0.3931,
"step": 191
},
{
"epoch": 0.47,
"learning_rate": 4.961555986510442e-05,
"loss": 0.4144,
"step": 192
},
{
"epoch": 0.47,
"learning_rate": 4.960855665338424e-05,
"loss": 0.3957,
"step": 193
},
{
"epoch": 0.47,
"learning_rate": 4.960149073183643e-05,
"loss": 0.3879,
"step": 194
},
{
"epoch": 0.47,
"learning_rate": 4.959436211846703e-05,
"loss": 0.4152,
"step": 195
},
{
"epoch": 0.48,
"learning_rate": 4.958717083144182e-05,
"loss": 0.4143,
"step": 196
},
{
"epoch": 0.48,
"learning_rate": 4.957991688908634e-05,
"loss": 0.3976,
"step": 197
},
{
"epoch": 0.48,
"learning_rate": 4.9572600309885744e-05,
"loss": 0.4072,
"step": 198
},
{
"epoch": 0.48,
"learning_rate": 4.956522111248483e-05,
"loss": 0.3903,
"step": 199
},
{
"epoch": 0.49,
"learning_rate": 4.955777931568797e-05,
"loss": 0.3908,
"step": 200
},
{
"epoch": 0.49,
"learning_rate": 4.955027493845903e-05,
"loss": 0.4284,
"step": 201
},
{
"epoch": 0.49,
"learning_rate": 4.954270799992138e-05,
"loss": 0.4072,
"step": 202
},
{
"epoch": 0.49,
"learning_rate": 4.953507851935779e-05,
"loss": 0.43,
"step": 203
},
{
"epoch": 0.5,
"learning_rate": 4.952738651621043e-05,
"loss": 0.4228,
"step": 204
},
{
"epoch": 0.5,
"learning_rate": 4.951963201008076e-05,
"loss": 0.3991,
"step": 205
},
{
"epoch": 0.5,
"learning_rate": 4.951181502072957e-05,
"loss": 0.4057,
"step": 206
},
{
"epoch": 0.5,
"learning_rate": 4.950393556807682e-05,
"loss": 0.3987,
"step": 207
},
{
"epoch": 0.51,
"learning_rate": 4.949599367220168e-05,
"loss": 0.4142,
"step": 208
},
{
"epoch": 0.51,
"learning_rate": 4.948798935334242e-05,
"loss": 0.3994,
"step": 209
},
{
"epoch": 0.51,
"learning_rate": 4.9479922631896405e-05,
"loss": 0.3989,
"step": 210
},
{
"epoch": 0.51,
"learning_rate": 4.947179352842001e-05,
"loss": 0.4186,
"step": 211
},
{
"epoch": 0.52,
"learning_rate": 4.946360206362858e-05,
"loss": 0.3896,
"step": 212
},
{
"epoch": 0.52,
"learning_rate": 4.9455348258396364e-05,
"loss": 0.4122,
"step": 213
},
{
"epoch": 0.52,
"learning_rate": 4.944703213375648e-05,
"loss": 0.4319,
"step": 214
},
{
"epoch": 0.52,
"learning_rate": 4.9438653710900864e-05,
"loss": 0.3997,
"step": 215
},
{
"epoch": 0.53,
"learning_rate": 4.943021301118019e-05,
"loss": 0.3924,
"step": 216
},
{
"epoch": 0.53,
"learning_rate": 4.942171005610385e-05,
"loss": 0.3952,
"step": 217
},
{
"epoch": 0.53,
"learning_rate": 4.941314486733986e-05,
"loss": 0.4137,
"step": 218
},
{
"epoch": 0.53,
"learning_rate": 4.940451746671484e-05,
"loss": 0.4277,
"step": 219
},
{
"epoch": 0.54,
"learning_rate": 4.9395827876213936e-05,
"loss": 0.4003,
"step": 220
},
{
"epoch": 0.54,
"learning_rate": 4.938707611798078e-05,
"loss": 0.3884,
"step": 221
},
{
"epoch": 0.54,
"learning_rate": 4.937826221431742e-05,
"loss": 0.4003,
"step": 222
},
{
"epoch": 0.54,
"learning_rate": 4.936938618768426e-05,
"loss": 0.4183,
"step": 223
},
{
"epoch": 0.55,
"learning_rate": 4.936044806070004e-05,
"loss": 0.4319,
"step": 224
},
{
"epoch": 0.55,
"learning_rate": 4.935144785614173e-05,
"loss": 0.3968,
"step": 225
},
{
"epoch": 0.55,
"learning_rate": 4.934238559694448e-05,
"loss": 0.3749,
"step": 226
},
{
"epoch": 0.55,
"learning_rate": 4.9333261306201595e-05,
"loss": 0.4044,
"step": 227
},
{
"epoch": 0.55,
"learning_rate": 4.932407500716445e-05,
"loss": 0.4067,
"step": 228
},
{
"epoch": 0.56,
"learning_rate": 4.9314826723242425e-05,
"loss": 0.417,
"step": 229
},
{
"epoch": 0.56,
"learning_rate": 4.9305516478002865e-05,
"loss": 0.4099,
"step": 230
},
{
"epoch": 0.56,
"learning_rate": 4.9296144295171024e-05,
"loss": 0.4201,
"step": 231
},
{
"epoch": 0.56,
"learning_rate": 4.928671019862995e-05,
"loss": 0.3848,
"step": 232
},
{
"epoch": 0.57,
"learning_rate": 4.92772142124205e-05,
"loss": 0.3959,
"step": 233
},
{
"epoch": 0.57,
"learning_rate": 4.9267656360741245e-05,
"loss": 0.3794,
"step": 234
},
{
"epoch": 0.57,
"learning_rate": 4.925803666794838e-05,
"loss": 0.3956,
"step": 235
},
{
"epoch": 0.57,
"learning_rate": 4.924835515855572e-05,
"loss": 0.423,
"step": 236
},
{
"epoch": 0.58,
"learning_rate": 4.92386118572346e-05,
"loss": 0.4015,
"step": 237
},
{
"epoch": 0.58,
"learning_rate": 4.92288067888138e-05,
"loss": 0.4043,
"step": 238
},
{
"epoch": 0.58,
"learning_rate": 4.921893997827951e-05,
"loss": 0.3711,
"step": 239
},
{
"epoch": 0.58,
"learning_rate": 4.920901145077527e-05,
"loss": 0.4248,
"step": 240
},
{
"epoch": 0.59,
"learning_rate": 4.919902123160187e-05,
"loss": 0.4235,
"step": 241
},
{
"epoch": 0.59,
"learning_rate": 4.918896934621734e-05,
"loss": 0.4214,
"step": 242
},
{
"epoch": 0.59,
"learning_rate": 4.9178855820236824e-05,
"loss": 0.3827,
"step": 243
},
{
"epoch": 0.59,
"learning_rate": 4.916868067943256e-05,
"loss": 0.3948,
"step": 244
},
{
"epoch": 0.6,
"learning_rate": 4.915844394973379e-05,
"loss": 0.3697,
"step": 245
},
{
"epoch": 0.6,
"learning_rate": 4.914814565722671e-05,
"loss": 0.4164,
"step": 246
},
{
"epoch": 0.6,
"learning_rate": 4.9137785828154393e-05,
"loss": 0.3942,
"step": 247
},
{
"epoch": 0.6,
"learning_rate": 4.9127364488916716e-05,
"loss": 0.3949,
"step": 248
},
{
"epoch": 0.61,
"learning_rate": 4.9116881666070327e-05,
"loss": 0.3867,
"step": 249
},
{
"epoch": 0.61,
"learning_rate": 4.9106337386328524e-05,
"loss": 0.3842,
"step": 250
},
{
"epoch": 0.61,
"learning_rate": 4.909573167656124e-05,
"loss": 0.3975,
"step": 251
},
{
"epoch": 0.61,
"learning_rate": 4.9085064563794925e-05,
"loss": 0.4215,
"step": 252
},
{
"epoch": 0.62,
"learning_rate": 4.907433607521251e-05,
"loss": 0.3782,
"step": 253
},
{
"epoch": 0.62,
"learning_rate": 4.906354623815336e-05,
"loss": 0.399,
"step": 254
},
{
"epoch": 0.62,
"learning_rate": 4.905269508011312e-05,
"loss": 0.4041,
"step": 255
},
{
"epoch": 0.62,
"learning_rate": 4.904178262874374e-05,
"loss": 0.3899,
"step": 256
},
{
"epoch": 0.63,
"learning_rate": 4.903080891185335e-05,
"loss": 0.3772,
"step": 257
},
{
"epoch": 0.63,
"learning_rate": 4.901977395740619e-05,
"loss": 0.4334,
"step": 258
},
{
"epoch": 0.63,
"learning_rate": 4.9008677793522584e-05,
"loss": 0.383,
"step": 259
},
{
"epoch": 0.63,
"learning_rate": 4.899752044847881e-05,
"loss": 0.4064,
"step": 260
},
{
"epoch": 0.64,
"learning_rate": 4.898630195070705e-05,
"loss": 0.3921,
"step": 261
},
{
"epoch": 0.64,
"learning_rate": 4.8975022328795325e-05,
"loss": 0.415,
"step": 262
},
{
"epoch": 0.64,
"learning_rate": 4.8963681611487445e-05,
"loss": 0.4128,
"step": 263
},
{
"epoch": 0.64,
"learning_rate": 4.895227982768287e-05,
"loss": 0.4232,
"step": 264
},
{
"epoch": 0.64,
"learning_rate": 4.89408170064367e-05,
"loss": 0.3914,
"step": 265
},
{
"epoch": 0.65,
"learning_rate": 4.892929317695957e-05,
"loss": 0.404,
"step": 266
},
{
"epoch": 0.65,
"learning_rate": 4.891770836861757e-05,
"loss": 0.4274,
"step": 267
},
{
"epoch": 0.65,
"learning_rate": 4.8906062610932215e-05,
"loss": 0.4025,
"step": 268
},
{
"epoch": 0.65,
"learning_rate": 4.889435593358029e-05,
"loss": 0.3822,
"step": 269
},
{
"epoch": 0.66,
"learning_rate": 4.888258836639386e-05,
"loss": 0.4048,
"step": 270
},
{
"epoch": 0.66,
"learning_rate": 4.8870759939360136e-05,
"loss": 0.3952,
"step": 271
},
{
"epoch": 0.66,
"learning_rate": 4.885887068262143e-05,
"loss": 0.4112,
"step": 272
},
{
"epoch": 0.66,
"learning_rate": 4.884692062647506e-05,
"loss": 0.4039,
"step": 273
},
{
"epoch": 0.67,
"learning_rate": 4.8834909801373264e-05,
"loss": 0.4157,
"step": 274
},
{
"epoch": 0.67,
"learning_rate": 4.8822838237923166e-05,
"loss": 0.4066,
"step": 275
},
{
"epoch": 0.67,
"learning_rate": 4.881070596688664e-05,
"loss": 0.387,
"step": 276
},
{
"epoch": 0.67,
"learning_rate": 4.8798513019180295e-05,
"loss": 0.407,
"step": 277
},
{
"epoch": 0.68,
"learning_rate": 4.878625942587532e-05,
"loss": 0.4103,
"step": 278
},
{
"epoch": 0.68,
"learning_rate": 4.877394521819747e-05,
"loss": 0.411,
"step": 279
},
{
"epoch": 0.68,
"learning_rate": 4.8761570427526973e-05,
"loss": 0.3986,
"step": 280
},
{
"epoch": 0.68,
"learning_rate": 4.874913508539844e-05,
"loss": 0.3858,
"step": 281
},
{
"epoch": 0.69,
"learning_rate": 4.873663922350073e-05,
"loss": 0.4145,
"step": 282
},
{
"epoch": 0.69,
"learning_rate": 4.8724082873677027e-05,
"loss": 0.4027,
"step": 283
},
{
"epoch": 0.69,
"learning_rate": 4.871146606792455e-05,
"loss": 0.393,
"step": 284
},
{
"epoch": 0.69,
"learning_rate": 4.8698788838394644e-05,
"loss": 0.3802,
"step": 285
},
{
"epoch": 0.7,
"learning_rate": 4.8686051217392606e-05,
"loss": 0.3923,
"step": 286
},
{
"epoch": 0.7,
"learning_rate": 4.867325323737765e-05,
"loss": 0.3985,
"step": 287
},
{
"epoch": 0.7,
"learning_rate": 4.866039493096276e-05,
"loss": 0.3941,
"step": 288
},
{
"epoch": 0.7,
"learning_rate": 4.86474763309147e-05,
"loss": 0.3776,
"step": 289
},
{
"epoch": 0.71,
"learning_rate": 4.863449747015384e-05,
"loss": 0.4265,
"step": 290
},
{
"epoch": 0.71,
"learning_rate": 4.862145838175413e-05,
"loss": 0.4001,
"step": 291
},
{
"epoch": 0.71,
"learning_rate": 4.860835909894301e-05,
"loss": 0.4198,
"step": 292
},
{
"epoch": 0.71,
"learning_rate": 4.859519965510129e-05,
"loss": 0.383,
"step": 293
},
{
"epoch": 0.72,
"learning_rate": 4.858198008376308e-05,
"loss": 0.4056,
"step": 294
},
{
"epoch": 0.72,
"learning_rate": 4.856870041861575e-05,
"loss": 0.4108,
"step": 295
},
{
"epoch": 0.72,
"learning_rate": 4.8555360693499786e-05,
"loss": 0.3703,
"step": 296
},
{
"epoch": 0.72,
"learning_rate": 4.8541960942408716e-05,
"loss": 0.3799,
"step": 297
},
{
"epoch": 0.73,
"learning_rate": 4.852850119948904e-05,
"loss": 0.3736,
"step": 298
},
{
"epoch": 0.73,
"learning_rate": 4.851498149904014e-05,
"loss": 0.3908,
"step": 299
},
{
"epoch": 0.73,
"learning_rate": 4.850140187551417e-05,
"loss": 0.3968,
"step": 300
},
{
"epoch": 0.73,
"learning_rate": 4.8487762363516024e-05,
"loss": 0.3925,
"step": 301
},
{
"epoch": 0.74,
"learning_rate": 4.847406299780316e-05,
"loss": 0.3768,
"step": 302
},
{
"epoch": 0.74,
"learning_rate": 4.8460303813285585e-05,
"loss": 0.4419,
"step": 303
},
{
"epoch": 0.74,
"learning_rate": 4.844648484502575e-05,
"loss": 0.3688,
"step": 304
},
{
"epoch": 0.74,
"learning_rate": 4.843260612823844e-05,
"loss": 0.4208,
"step": 305
},
{
"epoch": 0.74,
"learning_rate": 4.8418667698290696e-05,
"loss": 0.4063,
"step": 306
},
{
"epoch": 0.75,
"learning_rate": 4.840466959070174e-05,
"loss": 0.3719,
"step": 307
},
{
"epoch": 0.75,
"learning_rate": 4.839061184114285e-05,
"loss": 0.3985,
"step": 308
},
{
"epoch": 0.75,
"learning_rate": 4.837649448543731e-05,
"loss": 0.3868,
"step": 309
},
{
"epoch": 0.75,
"learning_rate": 4.8362317559560274e-05,
"loss": 0.3881,
"step": 310
},
{
"epoch": 0.76,
"learning_rate": 4.834808109963873e-05,
"loss": 0.4067,
"step": 311
},
{
"epoch": 0.76,
"learning_rate": 4.833378514195133e-05,
"loss": 0.3883,
"step": 312
},
{
"epoch": 0.76,
"learning_rate": 4.83194297229284e-05,
"loss": 0.3996,
"step": 313
},
{
"epoch": 0.76,
"learning_rate": 4.830501487915174e-05,
"loss": 0.4075,
"step": 314
},
{
"epoch": 0.77,
"learning_rate": 4.8290540647354624e-05,
"loss": 0.3918,
"step": 315
},
{
"epoch": 0.77,
"learning_rate": 4.8276007064421635e-05,
"loss": 0.4206,
"step": 316
},
{
"epoch": 0.77,
"learning_rate": 4.826141416738861e-05,
"loss": 0.3924,
"step": 317
},
{
"epoch": 0.77,
"learning_rate": 4.824676199344253e-05,
"loss": 0.3814,
"step": 318
},
{
"epoch": 0.78,
"learning_rate": 4.8232050579921445e-05,
"loss": 0.3809,
"step": 319
},
{
"epoch": 0.78,
"learning_rate": 4.821727996431435e-05,
"loss": 0.3979,
"step": 320
},
{
"epoch": 0.78,
"learning_rate": 4.8202450184261116e-05,
"loss": 0.4201,
"step": 321
},
{
"epoch": 0.78,
"learning_rate": 4.8187561277552374e-05,
"loss": 0.3785,
"step": 322
},
{
"epoch": 0.79,
"learning_rate": 4.817261328212942e-05,
"loss": 0.3918,
"step": 323
},
{
"epoch": 0.79,
"learning_rate": 4.815760623608415e-05,
"loss": 0.3789,
"step": 324
},
{
"epoch": 0.79,
"learning_rate": 4.8142540177658925e-05,
"loss": 0.3967,
"step": 325
},
{
"epoch": 0.79,
"learning_rate": 4.812741514524647e-05,
"loss": 0.4155,
"step": 326
},
{
"epoch": 0.8,
"learning_rate": 4.811223117738981e-05,
"loss": 0.3727,
"step": 327
},
{
"epoch": 0.8,
"learning_rate": 4.8096988312782174e-05,
"loss": 0.396,
"step": 328
},
{
"epoch": 0.8,
"learning_rate": 4.8081686590266835e-05,
"loss": 0.3694,
"step": 329
},
{
"epoch": 0.8,
"learning_rate": 4.806632604883708e-05,
"loss": 0.3919,
"step": 330
},
{
"epoch": 0.81,
"learning_rate": 4.8050906727636085e-05,
"loss": 0.3757,
"step": 331
},
{
"epoch": 0.81,
"learning_rate": 4.8035428665956806e-05,
"loss": 0.381,
"step": 332
},
{
"epoch": 0.81,
"learning_rate": 4.801989190324188e-05,
"loss": 0.3915,
"step": 333
},
{
"epoch": 0.81,
"learning_rate": 4.800429647908354e-05,
"loss": 0.3995,
"step": 334
},
{
"epoch": 0.82,
"learning_rate": 4.798864243322353e-05,
"loss": 0.4188,
"step": 335
},
{
"epoch": 0.82,
"learning_rate": 4.7972929805552926e-05,
"loss": 0.3832,
"step": 336
},
{
"epoch": 0.82,
"learning_rate": 4.795715863611212e-05,
"loss": 0.3624,
"step": 337
},
{
"epoch": 0.82,
"learning_rate": 4.79413289650907e-05,
"loss": 0.3779,
"step": 338
},
{
"epoch": 0.83,
"learning_rate": 4.7925440832827307e-05,
"loss": 0.425,
"step": 339
},
{
"epoch": 0.83,
"learning_rate": 4.790949427980956e-05,
"loss": 0.3829,
"step": 340
},
{
"epoch": 0.83,
"learning_rate": 4.7893489346673965e-05,
"loss": 0.3877,
"step": 341
},
{
"epoch": 0.83,
"learning_rate": 4.7877426074205786e-05,
"loss": 0.4043,
"step": 342
},
{
"epoch": 0.83,
"learning_rate": 4.786130450333897e-05,
"loss": 0.3687,
"step": 343
},
{
"epoch": 0.84,
"learning_rate": 4.784512467515599e-05,
"loss": 0.3679,
"step": 344
},
{
"epoch": 0.84,
"learning_rate": 4.782888663088781e-05,
"loss": 0.3957,
"step": 345
},
{
"epoch": 0.84,
"learning_rate": 4.781259041191375e-05,
"loss": 0.4215,
"step": 346
},
{
"epoch": 0.84,
"learning_rate": 4.7796236059761346e-05,
"loss": 0.3881,
"step": 347
},
{
"epoch": 0.85,
"learning_rate": 4.777982361610629e-05,
"loss": 0.3882,
"step": 348
},
{
"epoch": 0.85,
"learning_rate": 4.7763353122772305e-05,
"loss": 0.386,
"step": 349
},
{
"epoch": 0.85,
"learning_rate": 4.774682462173105e-05,
"loss": 0.3747,
"step": 350
},
{
"epoch": 0.85,
"learning_rate": 4.773023815510199e-05,
"loss": 0.4025,
"step": 351
},
{
"epoch": 0.86,
"learning_rate": 4.7713593765152316e-05,
"loss": 0.3759,
"step": 352
},
{
"epoch": 0.86,
"learning_rate": 4.7696891494296826e-05,
"loss": 0.3693,
"step": 353
},
{
"epoch": 0.86,
"learning_rate": 4.7680131385097806e-05,
"loss": 0.3718,
"step": 354
},
{
"epoch": 0.86,
"learning_rate": 4.766331348026493e-05,
"loss": 0.3787,
"step": 355
},
{
"epoch": 0.87,
"learning_rate": 4.764643782265516e-05,
"loss": 0.3809,
"step": 356
},
{
"epoch": 0.87,
"learning_rate": 4.762950445527264e-05,
"loss": 0.416,
"step": 357
},
{
"epoch": 0.87,
"learning_rate": 4.7612513421268544e-05,
"loss": 0.3663,
"step": 358
},
{
"epoch": 0.87,
"learning_rate": 4.7595464763941024e-05,
"loss": 0.3872,
"step": 359
},
{
"epoch": 0.88,
"learning_rate": 4.7578358526735065e-05,
"loss": 0.3923,
"step": 360
},
{
"epoch": 0.88,
"learning_rate": 4.756119475324237e-05,
"loss": 0.3853,
"step": 361
},
{
"epoch": 0.88,
"learning_rate": 4.7543973487201286e-05,
"loss": 0.4108,
"step": 362
},
{
"epoch": 0.88,
"learning_rate": 4.752669477249666e-05,
"loss": 0.3972,
"step": 363
},
{
"epoch": 0.89,
"learning_rate": 4.750935865315971e-05,
"loss": 0.3796,
"step": 364
},
{
"epoch": 0.89,
"learning_rate": 4.749196517336798e-05,
"loss": 0.3624,
"step": 365
},
{
"epoch": 0.89,
"learning_rate": 4.747451437744515e-05,
"loss": 0.3902,
"step": 366
},
{
"epoch": 0.89,
"learning_rate": 4.7457006309860976e-05,
"loss": 0.4268,
"step": 367
},
{
"epoch": 0.9,
"learning_rate": 4.7439441015231154e-05,
"loss": 0.3881,
"step": 368
},
{
"epoch": 0.9,
"learning_rate": 4.742181853831721e-05,
"loss": 0.3927,
"step": 369
},
{
"epoch": 0.9,
"learning_rate": 4.740413892402639e-05,
"loss": 0.4028,
"step": 370
},
{
"epoch": 0.9,
"learning_rate": 4.7386402217411555e-05,
"loss": 0.3957,
"step": 371
},
{
"epoch": 0.91,
"learning_rate": 4.7368608463671013e-05,
"loss": 0.3859,
"step": 372
},
{
"epoch": 0.91,
"learning_rate": 4.7350757708148495e-05,
"loss": 0.4055,
"step": 373
},
{
"epoch": 0.91,
"learning_rate": 4.733284999633297e-05,
"loss": 0.4085,
"step": 374
},
{
"epoch": 0.91,
"learning_rate": 4.731488537385853e-05,
"loss": 0.3968,
"step": 375
},
{
"epoch": 0.92,
"learning_rate": 4.729686388650432e-05,
"loss": 0.4205,
"step": 376
},
{
"epoch": 0.92,
"learning_rate": 4.7278785580194365e-05,
"loss": 0.3751,
"step": 377
},
{
"epoch": 0.92,
"learning_rate": 4.7260650500997514e-05,
"loss": 0.3866,
"step": 378
},
{
"epoch": 0.92,
"learning_rate": 4.724245869512727e-05,
"loss": 0.3916,
"step": 379
},
{
"epoch": 0.92,
"learning_rate": 4.722421020894169e-05,
"loss": 0.3858,
"step": 380
},
{
"epoch": 0.93,
"learning_rate": 4.7205905088943286e-05,
"loss": 0.4032,
"step": 381
},
{
"epoch": 0.93,
"learning_rate": 4.7187543381778864e-05,
"loss": 0.3772,
"step": 382
},
{
"epoch": 0.93,
"learning_rate": 4.716912513423945e-05,
"loss": 0.3906,
"step": 383
},
{
"epoch": 0.93,
"learning_rate": 4.715065039326015e-05,
"loss": 0.4172,
"step": 384
},
{
"epoch": 0.94,
"learning_rate": 4.7132119205920026e-05,
"loss": 0.3682,
"step": 385
},
{
"epoch": 0.94,
"learning_rate": 4.7113531619441984e-05,
"loss": 0.3684,
"step": 386
},
{
"epoch": 0.94,
"learning_rate": 4.709488768119266e-05,
"loss": 0.4049,
"step": 387
},
{
"epoch": 0.94,
"learning_rate": 4.707618743868226e-05,
"loss": 0.3852,
"step": 388
},
{
"epoch": 0.95,
"learning_rate": 4.705743093956452e-05,
"loss": 0.4162,
"step": 389
},
{
"epoch": 0.95,
"learning_rate": 4.703861823163649e-05,
"loss": 0.353,
"step": 390
},
{
"epoch": 0.95,
"learning_rate": 4.7019749362838476e-05,
"loss": 0.3958,
"step": 391
},
{
"epoch": 0.95,
"learning_rate": 4.7000824381253905e-05,
"loss": 0.406,
"step": 392
},
{
"epoch": 0.96,
"learning_rate": 4.6981843335109174e-05,
"loss": 0.3851,
"step": 393
},
{
"epoch": 0.96,
"learning_rate": 4.6962806272773564e-05,
"loss": 0.3828,
"step": 394
},
{
"epoch": 0.96,
"learning_rate": 4.69437132427591e-05,
"loss": 0.4331,
"step": 395
},
{
"epoch": 0.96,
"learning_rate": 4.6924564293720434e-05,
"loss": 0.3946,
"step": 396
},
{
"epoch": 0.97,
"learning_rate": 4.6905359474454705e-05,
"loss": 0.3799,
"step": 397
},
{
"epoch": 0.97,
"learning_rate": 4.6886098833901436e-05,
"loss": 0.3543,
"step": 398
},
{
"epoch": 0.97,
"learning_rate": 4.686678242114239e-05,
"loss": 0.3772,
"step": 399
},
{
"epoch": 0.97,
"learning_rate": 4.684741028540146e-05,
"loss": 0.4009,
"step": 400
},
{
"epoch": 0.98,
"learning_rate": 4.6827982476044534e-05,
"loss": 0.3806,
"step": 401
},
{
"epoch": 0.98,
"learning_rate": 4.680849904257938e-05,
"loss": 0.3781,
"step": 402
},
{
"epoch": 0.98,
"learning_rate": 4.678896003465549e-05,
"loss": 0.4264,
"step": 403
},
{
"epoch": 0.98,
"learning_rate": 4.6769365502064025e-05,
"loss": 0.3857,
"step": 404
},
{
"epoch": 0.99,
"learning_rate": 4.674971549473757e-05,
"loss": 0.3797,
"step": 405
},
{
"epoch": 0.99,
"learning_rate": 4.6730010062750134e-05,
"loss": 0.3847,
"step": 406
},
{
"epoch": 0.99,
"learning_rate": 4.671024925631694e-05,
"loss": 0.382,
"step": 407
},
{
"epoch": 0.99,
"learning_rate": 4.669043312579433e-05,
"loss": 0.3778,
"step": 408
},
{
"epoch": 1.0,
"learning_rate": 4.667056172167962e-05,
"loss": 0.3837,
"step": 409
},
{
"epoch": 1.0,
"learning_rate": 4.665063509461097e-05,
"loss": 0.3807,
"step": 410
},
{
"epoch": 1.0,
"eval_loss": 0.5687975287437439,
"eval_runtime": 116.1454,
"eval_samples_per_second": 6.561,
"eval_steps_per_second": 0.413,
"step": 410
},
{
"epoch": 1.0,
"learning_rate": 4.6630653295367286e-05,
"loss": 0.3618,
"step": 411
},
{
"epoch": 1.0,
"learning_rate": 4.6610616374868066e-05,
"loss": 0.2856,
"step": 412
},
{
"epoch": 1.01,
"learning_rate": 4.659052438417326e-05,
"loss": 0.2727,
"step": 413
},
{
"epoch": 1.01,
"learning_rate": 4.6570377374483154e-05,
"loss": 0.2632,
"step": 414
},
{
"epoch": 1.01,
"learning_rate": 4.6550175397138253e-05,
"loss": 0.2758,
"step": 415
},
{
"epoch": 1.01,
"learning_rate": 4.652991850361912e-05,
"loss": 0.2561,
"step": 416
},
{
"epoch": 1.01,
"learning_rate": 4.650960674554627e-05,
"loss": 0.2807,
"step": 417
},
{
"epoch": 1.02,
"learning_rate": 4.648924017468003e-05,
"loss": 0.2686,
"step": 418
},
{
"epoch": 1.02,
"learning_rate": 4.64688188429204e-05,
"loss": 0.2584,
"step": 419
},
{
"epoch": 1.02,
"learning_rate": 4.644834280230692e-05,
"loss": 0.2368,
"step": 420
},
{
"epoch": 1.02,
"learning_rate": 4.6427812105018576e-05,
"loss": 0.2642,
"step": 421
},
{
"epoch": 1.03,
"learning_rate": 4.6407226803373586e-05,
"loss": 0.2476,
"step": 422
},
{
"epoch": 1.03,
"learning_rate": 4.6386586949829356e-05,
"loss": 0.249,
"step": 423
},
{
"epoch": 1.03,
"learning_rate": 4.6365892596982297e-05,
"loss": 0.2541,
"step": 424
},
{
"epoch": 1.03,
"learning_rate": 4.634514379756769e-05,
"loss": 0.2785,
"step": 425
},
{
"epoch": 1.04,
"learning_rate": 4.632434060445956e-05,
"loss": 0.2369,
"step": 426
},
{
"epoch": 1.04,
"learning_rate": 4.630348307067057e-05,
"loss": 0.27,
"step": 427
},
{
"epoch": 1.04,
"learning_rate": 4.6282571249351826e-05,
"loss": 0.2603,
"step": 428
},
{
"epoch": 1.04,
"learning_rate": 4.626160519379279e-05,
"loss": 0.2498,
"step": 429
},
{
"epoch": 1.05,
"learning_rate": 4.624058495742114e-05,
"loss": 0.2654,
"step": 430
},
{
"epoch": 1.05,
"learning_rate": 4.621951059380258e-05,
"loss": 0.2316,
"step": 431
},
{
"epoch": 1.05,
"learning_rate": 4.619838215664082e-05,
"loss": 0.2515,
"step": 432
},
{
"epoch": 1.05,
"learning_rate": 4.6177199699777285e-05,
"loss": 0.2387,
"step": 433
},
{
"epoch": 1.06,
"learning_rate": 4.615596327719111e-05,
"loss": 0.2628,
"step": 434
},
{
"epoch": 1.06,
"learning_rate": 4.613467294299892e-05,
"loss": 0.2586,
"step": 435
},
{
"epoch": 1.06,
"learning_rate": 4.611332875145477e-05,
"loss": 0.2698,
"step": 436
},
{
"epoch": 1.06,
"learning_rate": 4.609193075694989e-05,
"loss": 0.254,
"step": 437
},
{
"epoch": 1.07,
"learning_rate": 4.607047901401267e-05,
"loss": 0.2585,
"step": 438
},
{
"epoch": 1.07,
"learning_rate": 4.604897357730845e-05,
"loss": 0.2311,
"step": 439
},
{
"epoch": 1.07,
"learning_rate": 4.60274145016394e-05,
"loss": 0.2714,
"step": 440
},
{
"epoch": 1.07,
"learning_rate": 4.600580184194436e-05,
"loss": 0.2536,
"step": 441
},
{
"epoch": 1.08,
"learning_rate": 4.598413565329875e-05,
"loss": 0.2485,
"step": 442
},
{
"epoch": 1.08,
"learning_rate": 4.5962415990914375e-05,
"loss": 0.2466,
"step": 443
},
{
"epoch": 1.08,
"learning_rate": 4.59406429101393e-05,
"loss": 0.2465,
"step": 444
},
{
"epoch": 1.08,
"learning_rate": 4.5918816466457746e-05,
"loss": 0.2478,
"step": 445
},
{
"epoch": 1.09,
"learning_rate": 4.5896936715489885e-05,
"loss": 0.2733,
"step": 446
},
{
"epoch": 1.09,
"learning_rate": 4.587500371299176e-05,
"loss": 0.2444,
"step": 447
},
{
"epoch": 1.09,
"learning_rate": 4.585301751485508e-05,
"loss": 0.2629,
"step": 448
},
{
"epoch": 1.09,
"learning_rate": 4.583097817710716e-05,
"loss": 0.2702,
"step": 449
},
{
"epoch": 1.1,
"learning_rate": 4.580888575591068e-05,
"loss": 0.2694,
"step": 450
},
{
"epoch": 1.1,
"learning_rate": 4.5786740307563636e-05,
"loss": 0.2578,
"step": 451
},
{
"epoch": 1.1,
"learning_rate": 4.576454188849911e-05,
"loss": 0.2516,
"step": 452
},
{
"epoch": 1.1,
"learning_rate": 4.574229055528522e-05,
"loss": 0.2685,
"step": 453
},
{
"epoch": 1.1,
"learning_rate": 4.5719986364624866e-05,
"loss": 0.2617,
"step": 454
},
{
"epoch": 1.11,
"learning_rate": 4.569762937335569e-05,
"loss": 0.2532,
"step": 455
},
{
"epoch": 1.11,
"learning_rate": 4.5675219638449876e-05,
"loss": 0.2885,
"step": 456
},
{
"epoch": 1.11,
"learning_rate": 4.5652757217013995e-05,
"loss": 0.2597,
"step": 457
},
{
"epoch": 1.11,
"learning_rate": 4.5630242166288895e-05,
"loss": 0.266,
"step": 458
},
{
"epoch": 1.12,
"learning_rate": 4.5607674543649546e-05,
"loss": 0.254,
"step": 459
},
{
"epoch": 1.12,
"learning_rate": 4.5585054406604864e-05,
"loss": 0.2702,
"step": 460
},
{
"epoch": 1.12,
"learning_rate": 4.556238181279761e-05,
"loss": 0.2475,
"step": 461
},
{
"epoch": 1.12,
"learning_rate": 4.5539656820004194e-05,
"loss": 0.2458,
"step": 462
},
{
"epoch": 1.13,
"learning_rate": 4.551687948613459e-05,
"loss": 0.2492,
"step": 463
},
{
"epoch": 1.13,
"learning_rate": 4.5494049869232125e-05,
"loss": 0.269,
"step": 464
},
{
"epoch": 1.13,
"learning_rate": 4.5471168027473356e-05,
"loss": 0.2646,
"step": 465
},
{
"epoch": 1.13,
"learning_rate": 4.5448234019167945e-05,
"loss": 0.2459,
"step": 466
},
{
"epoch": 1.14,
"learning_rate": 4.5425247902758474e-05,
"loss": 0.2762,
"step": 467
},
{
"epoch": 1.14,
"learning_rate": 4.540220973682032e-05,
"loss": 0.2511,
"step": 468
},
{
"epoch": 1.14,
"learning_rate": 4.537911958006149e-05,
"loss": 0.252,
"step": 469
},
{
"epoch": 1.14,
"learning_rate": 4.5355977491322485e-05,
"loss": 0.2679,
"step": 470
},
{
"epoch": 1.15,
"learning_rate": 4.5332783529576146e-05,
"loss": 0.2551,
"step": 471
},
{
"epoch": 1.15,
"learning_rate": 4.530953775392749e-05,
"loss": 0.2731,
"step": 472
},
{
"epoch": 1.15,
"learning_rate": 4.5286240223613584e-05,
"loss": 0.2612,
"step": 473
},
{
"epoch": 1.15,
"learning_rate": 4.526289099800337e-05,
"loss": 0.2739,
"step": 474
},
{
"epoch": 1.16,
"learning_rate": 4.523949013659753e-05,
"loss": 0.2644,
"step": 475
},
{
"epoch": 1.16,
"learning_rate": 4.521603769902835e-05,
"loss": 0.2811,
"step": 476
},
{
"epoch": 1.16,
"learning_rate": 4.519253374505949e-05,
"loss": 0.2624,
"step": 477
},
{
"epoch": 1.16,
"learning_rate": 4.5168978334585956e-05,
"loss": 0.2552,
"step": 478
},
{
"epoch": 1.17,
"learning_rate": 4.514537152763384e-05,
"loss": 0.27,
"step": 479
},
{
"epoch": 1.17,
"learning_rate": 4.5121713384360215e-05,
"loss": 0.2652,
"step": 480
},
{
"epoch": 1.17,
"learning_rate": 4.5098003965052984e-05,
"loss": 0.2698,
"step": 481
},
{
"epoch": 1.17,
"learning_rate": 4.507424333013069e-05,
"loss": 0.2585,
"step": 482
},
{
"epoch": 1.18,
"learning_rate": 4.505043154014243e-05,
"loss": 0.2573,
"step": 483
},
{
"epoch": 1.18,
"learning_rate": 4.502656865576762e-05,
"loss": 0.2561,
"step": 484
},
{
"epoch": 1.18,
"learning_rate": 4.5002654737815905e-05,
"loss": 0.2629,
"step": 485
},
{
"epoch": 1.18,
"learning_rate": 4.497868984722697e-05,
"loss": 0.2696,
"step": 486
},
{
"epoch": 1.19,
"learning_rate": 4.4954674045070387e-05,
"loss": 0.2727,
"step": 487
},
{
"epoch": 1.19,
"learning_rate": 4.493060739254548e-05,
"loss": 0.2718,
"step": 488
},
{
"epoch": 1.19,
"learning_rate": 4.4906489950981126e-05,
"loss": 0.2537,
"step": 489
},
{
"epoch": 1.19,
"learning_rate": 4.488232178183567e-05,
"loss": 0.2565,
"step": 490
},
{
"epoch": 1.2,
"learning_rate": 4.4858102946696676e-05,
"loss": 0.2554,
"step": 491
},
{
"epoch": 1.2,
"learning_rate": 4.4833833507280884e-05,
"loss": 0.2904,
"step": 492
},
{
"epoch": 1.2,
"learning_rate": 4.4809513525433925e-05,
"loss": 0.262,
"step": 493
},
{
"epoch": 1.2,
"learning_rate": 4.478514306313025e-05,
"loss": 0.2537,
"step": 494
},
{
"epoch": 1.2,
"learning_rate": 4.476072218247297e-05,
"loss": 0.2583,
"step": 495
},
{
"epoch": 1.21,
"learning_rate": 4.4736250945693655e-05,
"loss": 0.2712,
"step": 496
},
{
"epoch": 1.21,
"learning_rate": 4.471172941515219e-05,
"loss": 0.257,
"step": 497
},
{
"epoch": 1.21,
"learning_rate": 4.468715765333664e-05,
"loss": 0.2617,
"step": 498
},
{
"epoch": 1.21,
"learning_rate": 4.466253572286308e-05,
"loss": 0.2528,
"step": 499
},
{
"epoch": 1.22,
"learning_rate": 4.46378636864754e-05,
"loss": 0.2711,
"step": 500
},
{
"epoch": 1.22,
"learning_rate": 4.46131416070452e-05,
"loss": 0.2568,
"step": 501
},
{
"epoch": 1.22,
"learning_rate": 4.458836954757161e-05,
"loss": 0.2702,
"step": 502
},
{
"epoch": 1.22,
"learning_rate": 4.4563547571181086e-05,
"loss": 0.2596,
"step": 503
},
{
"epoch": 1.23,
"learning_rate": 4.4538675741127326e-05,
"loss": 0.2478,
"step": 504
},
{
"epoch": 1.23,
"learning_rate": 4.451375412079106e-05,
"loss": 0.2438,
"step": 505
},
{
"epoch": 1.23,
"learning_rate": 4.4488782773679885e-05,
"loss": 0.2797,
"step": 506
},
{
"epoch": 1.23,
"learning_rate": 4.4463761763428125e-05,
"loss": 0.2355,
"step": 507
},
{
"epoch": 1.24,
"learning_rate": 4.443869115379667e-05,
"loss": 0.2718,
"step": 508
},
{
"epoch": 1.24,
"learning_rate": 4.441357100867278e-05,
"loss": 0.2654,
"step": 509
},
{
"epoch": 1.24,
"learning_rate": 4.4388401392069975e-05,
"loss": 0.2776,
"step": 510
},
{
"epoch": 1.24,
"learning_rate": 4.4363182368127824e-05,
"loss": 0.2631,
"step": 511
},
{
"epoch": 1.25,
"learning_rate": 4.433791400111179e-05,
"loss": 0.2599,
"step": 512
},
{
"epoch": 1.25,
"learning_rate": 4.4312596355413116e-05,
"loss": 0.2629,
"step": 513
},
{
"epoch": 1.25,
"learning_rate": 4.428722949554857e-05,
"loss": 0.25,
"step": 514
},
{
"epoch": 1.25,
"learning_rate": 4.426181348616039e-05,
"loss": 0.2557,
"step": 515
},
{
"epoch": 1.26,
"learning_rate": 4.4236348392016e-05,
"loss": 0.2793,
"step": 516
},
{
"epoch": 1.26,
"learning_rate": 4.421083427800795e-05,
"loss": 0.2641,
"step": 517
},
{
"epoch": 1.26,
"learning_rate": 4.41852712091537e-05,
"loss": 0.2696,
"step": 518
},
{
"epoch": 1.26,
"learning_rate": 4.415965925059544e-05,
"loss": 0.2637,
"step": 519
},
{
"epoch": 1.27,
"learning_rate": 4.413399846759998e-05,
"loss": 0.2772,
"step": 520
},
{
"epoch": 1.27,
"learning_rate": 4.4108288925558505e-05,
"loss": 0.2432,
"step": 521
},
{
"epoch": 1.27,
"learning_rate": 4.40825306899865e-05,
"loss": 0.2657,
"step": 522
},
{
"epoch": 1.27,
"learning_rate": 4.405672382652349e-05,
"loss": 0.2635,
"step": 523
},
{
"epoch": 1.28,
"learning_rate": 4.403086840093297e-05,
"loss": 0.2551,
"step": 524
},
{
"epoch": 1.28,
"learning_rate": 4.400496447910212e-05,
"loss": 0.2555,
"step": 525
},
{
"epoch": 1.28,
"learning_rate": 4.397901212704176e-05,
"loss": 0.2785,
"step": 526
},
{
"epoch": 1.28,
"learning_rate": 4.395301141088611e-05,
"loss": 0.2866,
"step": 527
},
{
"epoch": 1.29,
"learning_rate": 4.3926962396892606e-05,
"loss": 0.256,
"step": 528
},
{
"epoch": 1.29,
"learning_rate": 4.3900865151441796e-05,
"loss": 0.2585,
"step": 529
},
{
"epoch": 1.29,
"learning_rate": 4.387471974103713e-05,
"loss": 0.265,
"step": 530
},
{
"epoch": 1.29,
"learning_rate": 4.384852623230478e-05,
"loss": 0.2445,
"step": 531
},
{
"epoch": 1.29,
"learning_rate": 4.38222846919935e-05,
"loss": 0.2608,
"step": 532
},
{
"epoch": 1.3,
"learning_rate": 4.379599518697444e-05,
"loss": 0.2823,
"step": 533
},
{
"epoch": 1.3,
"learning_rate": 4.3769657784240976e-05,
"loss": 0.2688,
"step": 534
},
{
"epoch": 1.3,
"learning_rate": 4.3743272550908543e-05,
"loss": 0.2572,
"step": 535
},
{
"epoch": 1.3,
"learning_rate": 4.371683955421447e-05,
"loss": 0.2635,
"step": 536
},
{
"epoch": 1.31,
"learning_rate": 4.369035886151778e-05,
"loss": 0.2713,
"step": 537
},
{
"epoch": 1.31,
"learning_rate": 4.366383054029906e-05,
"loss": 0.2651,
"step": 538
},
{
"epoch": 1.31,
"learning_rate": 4.363725465816028e-05,
"loss": 0.2487,
"step": 539
},
{
"epoch": 1.31,
"learning_rate": 4.3610631282824556e-05,
"loss": 0.2513,
"step": 540
},
{
"epoch": 1.32,
"learning_rate": 4.3583960482136085e-05,
"loss": 0.2683,
"step": 541
},
{
"epoch": 1.32,
"learning_rate": 4.3557242324059896e-05,
"loss": 0.2706,
"step": 542
},
{
"epoch": 1.32,
"learning_rate": 4.3530476876681696e-05,
"loss": 0.2741,
"step": 543
},
{
"epoch": 1.32,
"learning_rate": 4.350366420820771e-05,
"loss": 0.238,
"step": 544
},
{
"epoch": 1.33,
"learning_rate": 4.347680438696449e-05,
"loss": 0.2656,
"step": 545
},
{
"epoch": 1.33,
"learning_rate": 4.344989748139873e-05,
"loss": 0.2534,
"step": 546
},
{
"epoch": 1.33,
"learning_rate": 4.342294356007715e-05,
"loss": 0.2832,
"step": 547
},
{
"epoch": 1.33,
"learning_rate": 4.339594269168624e-05,
"loss": 0.259,
"step": 548
},
{
"epoch": 1.34,
"learning_rate": 4.3368894945032146e-05,
"loss": 0.2734,
"step": 549
},
{
"epoch": 1.34,
"learning_rate": 4.334180038904046e-05,
"loss": 0.2805,
"step": 550
},
{
"epoch": 1.34,
"learning_rate": 4.331465909275608e-05,
"loss": 0.2837,
"step": 551
},
{
"epoch": 1.34,
"learning_rate": 4.3287471125342996e-05,
"loss": 0.2763,
"step": 552
},
{
"epoch": 1.35,
"learning_rate": 4.326023655608411e-05,
"loss": 0.2678,
"step": 553
},
{
"epoch": 1.35,
"learning_rate": 4.323295545438112e-05,
"loss": 0.2583,
"step": 554
},
{
"epoch": 1.35,
"learning_rate": 4.3205627889754286e-05,
"loss": 0.2657,
"step": 555
},
{
"epoch": 1.35,
"learning_rate": 4.317825393184226e-05,
"loss": 0.2653,
"step": 556
},
{
"epoch": 1.36,
"learning_rate": 4.315083365040192e-05,
"loss": 0.2566,
"step": 557
},
{
"epoch": 1.36,
"learning_rate": 4.31233671153082e-05,
"loss": 0.2443,
"step": 558
},
{
"epoch": 1.36,
"learning_rate": 4.309585439655389e-05,
"loss": 0.262,
"step": 559
},
{
"epoch": 1.36,
"learning_rate": 4.306829556424948e-05,
"loss": 0.2639,
"step": 560
},
{
"epoch": 1.37,
"learning_rate": 4.304069068862296e-05,
"loss": 0.2558,
"step": 561
},
{
"epoch": 1.37,
"learning_rate": 4.301303984001967e-05,
"loss": 0.2791,
"step": 562
},
{
"epoch": 1.37,
"learning_rate": 4.298534308890209e-05,
"loss": 0.2564,
"step": 563
},
{
"epoch": 1.37,
"learning_rate": 4.295760050584966e-05,
"loss": 0.2749,
"step": 564
},
{
"epoch": 1.38,
"learning_rate": 4.2929812161558636e-05,
"loss": 0.2839,
"step": 565
},
{
"epoch": 1.38,
"learning_rate": 4.290197812684188e-05,
"loss": 0.2432,
"step": 566
},
{
"epoch": 1.38,
"learning_rate": 4.2874098472628675e-05,
"loss": 0.2716,
"step": 567
},
{
"epoch": 1.38,
"learning_rate": 4.284617326996458e-05,
"loss": 0.2628,
"step": 568
},
{
"epoch": 1.38,
"learning_rate": 4.28182025900112e-05,
"loss": 0.2797,
"step": 569
},
{
"epoch": 1.39,
"learning_rate": 4.279018650404604e-05,
"loss": 0.2704,
"step": 570
},
{
"epoch": 1.39,
"learning_rate": 4.276212508346232e-05,
"loss": 0.2701,
"step": 571
},
{
"epoch": 1.39,
"learning_rate": 4.273401839976877e-05,
"loss": 0.2713,
"step": 572
},
{
"epoch": 1.39,
"learning_rate": 4.270586652458948e-05,
"loss": 0.249,
"step": 573
},
{
"epoch": 1.4,
"learning_rate": 4.267766952966369e-05,
"loss": 0.2755,
"step": 574
},
{
"epoch": 1.4,
"learning_rate": 4.264942748684563e-05,
"loss": 0.2882,
"step": 575
},
{
"epoch": 1.4,
"learning_rate": 4.2621140468104295e-05,
"loss": 0.2686,
"step": 576
},
{
"epoch": 1.4,
"learning_rate": 4.2592808545523335e-05,
"loss": 0.2901,
"step": 577
},
{
"epoch": 1.41,
"learning_rate": 4.256443179130081e-05,
"loss": 0.251,
"step": 578
},
{
"epoch": 1.41,
"learning_rate": 4.2536010277748996e-05,
"loss": 0.2522,
"step": 579
},
{
"epoch": 1.41,
"learning_rate": 4.250754407729428e-05,
"loss": 0.2773,
"step": 580
},
{
"epoch": 1.41,
"learning_rate": 4.2479033262476884e-05,
"loss": 0.2574,
"step": 581
},
{
"epoch": 1.42,
"learning_rate": 4.245047790595075e-05,
"loss": 0.2835,
"step": 582
},
{
"epoch": 1.42,
"learning_rate": 4.242187808048329e-05,
"loss": 0.2646,
"step": 583
},
{
"epoch": 1.42,
"learning_rate": 4.239323385895527e-05,
"loss": 0.2786,
"step": 584
},
{
"epoch": 1.42,
"learning_rate": 4.2364545314360585e-05,
"loss": 0.282,
"step": 585
},
{
"epoch": 1.43,
"learning_rate": 4.233581251980604e-05,
"loss": 0.2655,
"step": 586
},
{
"epoch": 1.43,
"learning_rate": 4.2307035548511265e-05,
"loss": 0.259,
"step": 587
},
{
"epoch": 1.43,
"learning_rate": 4.227821447380842e-05,
"loss": 0.2476,
"step": 588
},
{
"epoch": 1.43,
"learning_rate": 4.224934936914206e-05,
"loss": 0.2628,
"step": 589
},
{
"epoch": 1.44,
"learning_rate": 4.222044030806894e-05,
"loss": 0.2629,
"step": 590
},
{
"epoch": 1.44,
"learning_rate": 4.2191487364257854e-05,
"loss": 0.2776,
"step": 591
},
{
"epoch": 1.44,
"learning_rate": 4.216249061148939e-05,
"loss": 0.2549,
"step": 592
},
{
"epoch": 1.44,
"learning_rate": 4.21334501236558e-05,
"loss": 0.2513,
"step": 593
},
{
"epoch": 1.45,
"learning_rate": 4.210436597476076e-05,
"loss": 0.2596,
"step": 594
},
{
"epoch": 1.45,
"learning_rate": 4.207523823891923e-05,
"loss": 0.2767,
"step": 595
},
{
"epoch": 1.45,
"learning_rate": 4.2046066990357235e-05,
"loss": 0.2735,
"step": 596
},
{
"epoch": 1.45,
"learning_rate": 4.201685230341168e-05,
"loss": 0.2487,
"step": 597
},
{
"epoch": 1.46,
"learning_rate": 4.198759425253014e-05,
"loss": 0.2558,
"step": 598
},
{
"epoch": 1.46,
"learning_rate": 4.195829291227076e-05,
"loss": 0.2773,
"step": 599
},
{
"epoch": 1.46,
"learning_rate": 4.192894835730193e-05,
"loss": 0.2716,
"step": 600
},
{
"epoch": 1.46,
"learning_rate": 4.1899560662402206e-05,
"loss": 0.2724,
"step": 601
},
{
"epoch": 1.47,
"learning_rate": 4.1870129902460056e-05,
"loss": 0.27,
"step": 602
},
{
"epoch": 1.47,
"learning_rate": 4.18406561524737e-05,
"loss": 0.2594,
"step": 603
},
{
"epoch": 1.47,
"learning_rate": 4.18111394875509e-05,
"loss": 0.2581,
"step": 604
},
{
"epoch": 1.47,
"learning_rate": 4.178157998290879e-05,
"loss": 0.265,
"step": 605
},
{
"epoch": 1.47,
"learning_rate": 4.175197771387368e-05,
"loss": 0.2653,
"step": 606
},
{
"epoch": 1.48,
"learning_rate": 4.172233275588082e-05,
"loss": 0.2808,
"step": 607
},
{
"epoch": 1.48,
"learning_rate": 4.169264518447428e-05,
"loss": 0.27,
"step": 608
},
{
"epoch": 1.48,
"learning_rate": 4.16629150753067e-05,
"loss": 0.2522,
"step": 609
},
{
"epoch": 1.48,
"learning_rate": 4.163314250413913e-05,
"loss": 0.253,
"step": 610
},
{
"epoch": 1.49,
"learning_rate": 4.160332754684084e-05,
"loss": 0.2572,
"step": 611
},
{
"epoch": 1.49,
"learning_rate": 4.157347027938907e-05,
"loss": 0.2799,
"step": 612
},
{
"epoch": 1.49,
"learning_rate": 4.1543570777868924e-05,
"loss": 0.2816,
"step": 613
},
{
"epoch": 1.49,
"learning_rate": 4.151362911847309e-05,
"loss": 0.2859,
"step": 614
},
{
"epoch": 1.5,
"learning_rate": 4.148364537750172e-05,
"loss": 0.2601,
"step": 615
},
{
"epoch": 1.5,
"learning_rate": 4.1453619631362195e-05,
"loss": 0.2725,
"step": 616
},
{
"epoch": 1.5,
"learning_rate": 4.142355195656892e-05,
"loss": 0.2669,
"step": 617
},
{
"epoch": 1.5,
"learning_rate": 4.1393442429743166e-05,
"loss": 0.2955,
"step": 618
},
{
"epoch": 1.51,
"learning_rate": 4.1363291127612845e-05,
"loss": 0.2655,
"step": 619
},
{
"epoch": 1.51,
"learning_rate": 4.1333098127012326e-05,
"loss": 0.2545,
"step": 620
},
{
"epoch": 1.51,
"learning_rate": 4.130286350488224e-05,
"loss": 0.2724,
"step": 621
},
{
"epoch": 1.51,
"learning_rate": 4.127258733826929e-05,
"loss": 0.2633,
"step": 622
},
{
"epoch": 1.52,
"learning_rate": 4.124226970432602e-05,
"loss": 0.2643,
"step": 623
},
{
"epoch": 1.52,
"learning_rate": 4.121191068031067e-05,
"loss": 0.2817,
"step": 624
},
{
"epoch": 1.52,
"learning_rate": 4.118151034358696e-05,
"loss": 0.263,
"step": 625
},
{
"epoch": 1.52,
"learning_rate": 4.1151068771623866e-05,
"loss": 0.2869,
"step": 626
},
{
"epoch": 1.53,
"learning_rate": 4.112058604199544e-05,
"loss": 0.2666,
"step": 627
},
{
"epoch": 1.53,
"learning_rate": 4.109006223238064e-05,
"loss": 0.2692,
"step": 628
},
{
"epoch": 1.53,
"learning_rate": 4.1059497420563094e-05,
"loss": 0.2615,
"step": 629
},
{
"epoch": 1.53,
"learning_rate": 4.102889168443091e-05,
"loss": 0.2571,
"step": 630
},
{
"epoch": 1.54,
"learning_rate": 4.099824510197649e-05,
"loss": 0.2914,
"step": 631
},
{
"epoch": 1.54,
"learning_rate": 4.0967557751296336e-05,
"loss": 0.2808,
"step": 632
},
{
"epoch": 1.54,
"learning_rate": 4.093682971059081e-05,
"loss": 0.2658,
"step": 633
},
{
"epoch": 1.54,
"learning_rate": 4.0906061058163995e-05,
"loss": 0.2727,
"step": 634
},
{
"epoch": 1.55,
"learning_rate": 4.087525187242345e-05,
"loss": 0.2541,
"step": 635
},
{
"epoch": 1.55,
"learning_rate": 4.0844402231880016e-05,
"loss": 0.2676,
"step": 636
},
{
"epoch": 1.55,
"learning_rate": 4.0813512215147654e-05,
"loss": 0.2555,
"step": 637
},
{
"epoch": 1.55,
"learning_rate": 4.078258190094318e-05,
"loss": 0.2597,
"step": 638
},
{
"epoch": 1.56,
"learning_rate": 4.075161136808612e-05,
"loss": 0.2589,
"step": 639
},
{
"epoch": 1.56,
"learning_rate": 4.0720600695498486e-05,
"loss": 0.2852,
"step": 640
},
{
"epoch": 1.56,
"learning_rate": 4.068954996220457e-05,
"loss": 0.2557,
"step": 641
},
{
"epoch": 1.56,
"learning_rate": 4.0658459247330766e-05,
"loss": 0.2697,
"step": 642
},
{
"epoch": 1.56,
"learning_rate": 4.062732863010534e-05,
"loss": 0.2678,
"step": 643
},
{
"epoch": 1.57,
"learning_rate": 4.0596158189858255e-05,
"loss": 0.2631,
"step": 644
},
{
"epoch": 1.57,
"learning_rate": 4.0564948006020934e-05,
"loss": 0.2559,
"step": 645
},
{
"epoch": 1.57,
"learning_rate": 4.0533698158126085e-05,
"loss": 0.2833,
"step": 646
},
{
"epoch": 1.57,
"learning_rate": 4.050240872580749e-05,
"loss": 0.2542,
"step": 647
},
{
"epoch": 1.58,
"learning_rate": 4.047107978879985e-05,
"loss": 0.28,
"step": 648
},
{
"epoch": 1.58,
"learning_rate": 4.043971142693844e-05,
"loss": 0.2607,
"step": 649
},
{
"epoch": 1.58,
"learning_rate": 4.040830372015909e-05,
"loss": 0.278,
"step": 650
},
{
"epoch": 1.58,
"learning_rate": 4.037685674849786e-05,
"loss": 0.2569,
"step": 651
},
{
"epoch": 1.59,
"learning_rate": 4.034537059209085e-05,
"loss": 0.2844,
"step": 652
},
{
"epoch": 1.59,
"learning_rate": 4.0313845331174036e-05,
"loss": 0.2639,
"step": 653
},
{
"epoch": 1.59,
"learning_rate": 4.0282281046083045e-05,
"loss": 0.2735,
"step": 654
},
{
"epoch": 1.59,
"learning_rate": 4.025067781725294e-05,
"loss": 0.2713,
"step": 655
},
{
"epoch": 1.6,
"learning_rate": 4.021903572521802e-05,
"loss": 0.2515,
"step": 656
},
{
"epoch": 1.6,
"learning_rate": 4.0187354850611636e-05,
"loss": 0.2651,
"step": 657
},
{
"epoch": 1.6,
"learning_rate": 4.015563527416595e-05,
"loss": 0.2788,
"step": 658
},
{
"epoch": 1.6,
"learning_rate": 4.012387707671177e-05,
"loss": 0.2753,
"step": 659
},
{
"epoch": 1.61,
"learning_rate": 4.00920803391783e-05,
"loss": 0.2589,
"step": 660
},
{
"epoch": 1.61,
"learning_rate": 4.0060245142592944e-05,
"loss": 0.2748,
"step": 661
},
{
"epoch": 1.61,
"learning_rate": 4.002837156808116e-05,
"loss": 0.2559,
"step": 662
},
{
"epoch": 1.61,
"learning_rate": 3.999645969686616e-05,
"loss": 0.2563,
"step": 663
},
{
"epoch": 1.62,
"learning_rate": 3.996450961026876e-05,
"loss": 0.251,
"step": 664
},
{
"epoch": 1.62,
"learning_rate": 3.9932521389707155e-05,
"loss": 0.2661,
"step": 665
},
{
"epoch": 1.62,
"learning_rate": 3.990049511669675e-05,
"loss": 0.2563,
"step": 666
},
{
"epoch": 1.62,
"learning_rate": 3.986843087284986e-05,
"loss": 0.2754,
"step": 667
},
{
"epoch": 1.63,
"learning_rate": 3.9836328739875615e-05,
"loss": 0.2591,
"step": 668
},
{
"epoch": 1.63,
"learning_rate": 3.980418879957967e-05,
"loss": 0.2764,
"step": 669
},
{
"epoch": 1.63,
"learning_rate": 3.977201113386402e-05,
"loss": 0.2801,
"step": 670
},
{
"epoch": 1.63,
"learning_rate": 3.9739795824726804e-05,
"loss": 0.2768,
"step": 671
},
{
"epoch": 1.64,
"learning_rate": 3.9707542954262115e-05,
"loss": 0.2933,
"step": 672
},
{
"epoch": 1.64,
"learning_rate": 3.96752526046597e-05,
"loss": 0.2757,
"step": 673
},
{
"epoch": 1.64,
"learning_rate": 3.964292485820487e-05,
"loss": 0.2557,
"step": 674
},
{
"epoch": 1.64,
"learning_rate": 3.9610559797278216e-05,
"loss": 0.2624,
"step": 675
},
{
"epoch": 1.65,
"learning_rate": 3.957815750435542e-05,
"loss": 0.2618,
"step": 676
},
{
"epoch": 1.65,
"learning_rate": 3.954571806200702e-05,
"loss": 0.2689,
"step": 677
},
{
"epoch": 1.65,
"learning_rate": 3.951324155289825e-05,
"loss": 0.2581,
"step": 678
},
{
"epoch": 1.65,
"learning_rate": 3.9480728059788796e-05,
"loss": 0.2589,
"step": 679
},
{
"epoch": 1.66,
"learning_rate": 3.9448177665532574e-05,
"loss": 0.2733,
"step": 680
},
{
"epoch": 1.66,
"learning_rate": 3.941559045307755e-05,
"loss": 0.2653,
"step": 681
},
{
"epoch": 1.66,
"learning_rate": 3.938296650546552e-05,
"loss": 0.2799,
"step": 682
},
{
"epoch": 1.66,
"learning_rate": 3.935030590583186e-05,
"loss": 0.2583,
"step": 683
},
{
"epoch": 1.66,
"learning_rate": 3.931760873740539e-05,
"loss": 0.271,
"step": 684
},
{
"epoch": 1.67,
"learning_rate": 3.9284875083508076e-05,
"loss": 0.2534,
"step": 685
},
{
"epoch": 1.67,
"learning_rate": 3.9252105027554887e-05,
"loss": 0.2576,
"step": 686
},
{
"epoch": 1.67,
"learning_rate": 3.9219298653053546e-05,
"loss": 0.2464,
"step": 687
},
{
"epoch": 1.67,
"learning_rate": 3.918645604360433e-05,
"loss": 0.2738,
"step": 688
},
{
"epoch": 1.68,
"learning_rate": 3.915357728289985e-05,
"loss": 0.2593,
"step": 689
},
{
"epoch": 1.68,
"learning_rate": 3.9120662454724836e-05,
"loss": 0.2795,
"step": 690
},
{
"epoch": 1.68,
"learning_rate": 3.908771164295595e-05,
"loss": 0.2759,
"step": 691
},
{
"epoch": 1.68,
"learning_rate": 3.905472493156151e-05,
"loss": 0.2606,
"step": 692
},
{
"epoch": 1.69,
"learning_rate": 3.9021702404601366e-05,
"loss": 0.2867,
"step": 693
},
{
"epoch": 1.69,
"learning_rate": 3.8988644146226606e-05,
"loss": 0.2693,
"step": 694
},
{
"epoch": 1.69,
"learning_rate": 3.8955550240679364e-05,
"loss": 0.2601,
"step": 695
},
{
"epoch": 1.69,
"learning_rate": 3.8922420772292644e-05,
"loss": 0.2574,
"step": 696
},
{
"epoch": 1.7,
"learning_rate": 3.888925582549006e-05,
"loss": 0.2737,
"step": 697
},
{
"epoch": 1.7,
"learning_rate": 3.8856055484785625e-05,
"loss": 0.2752,
"step": 698
},
{
"epoch": 1.7,
"learning_rate": 3.882281983478355e-05,
"loss": 0.2807,
"step": 699
},
{
"epoch": 1.7,
"learning_rate": 3.878954896017804e-05,
"loss": 0.2779,
"step": 700
},
{
"epoch": 1.71,
"learning_rate": 3.875624294575305e-05,
"loss": 0.2837,
"step": 701
},
{
"epoch": 1.71,
"learning_rate": 3.872290187638208e-05,
"loss": 0.268,
"step": 702
},
{
"epoch": 1.71,
"learning_rate": 3.8689525837027975e-05,
"loss": 0.2621,
"step": 703
},
{
"epoch": 1.71,
"learning_rate": 3.865611491274267e-05,
"loss": 0.2694,
"step": 704
},
{
"epoch": 1.72,
"learning_rate": 3.8622669188667015e-05,
"loss": 0.2759,
"step": 705
},
{
"epoch": 1.72,
"learning_rate": 3.858918875003053e-05,
"loss": 0.2643,
"step": 706
},
{
"epoch": 1.72,
"learning_rate": 3.8555673682151215e-05,
"loss": 0.2663,
"step": 707
},
{
"epoch": 1.72,
"learning_rate": 3.852212407043528e-05,
"loss": 0.2871,
"step": 708
},
{
"epoch": 1.73,
"learning_rate": 3.8488540000377016e-05,
"loss": 0.2718,
"step": 709
},
{
"epoch": 1.73,
"learning_rate": 3.8454921557558476e-05,
"loss": 0.2712,
"step": 710
},
{
"epoch": 1.73,
"learning_rate": 3.842126882764933e-05,
"loss": 0.2579,
"step": 711
},
{
"epoch": 1.73,
"learning_rate": 3.8387581896406606e-05,
"loss": 0.2695,
"step": 712
},
{
"epoch": 1.74,
"learning_rate": 3.835386084967451e-05,
"loss": 0.2619,
"step": 713
},
{
"epoch": 1.74,
"learning_rate": 3.8320105773384144e-05,
"loss": 0.2744,
"step": 714
},
{
"epoch": 1.74,
"learning_rate": 3.828631675355338e-05,
"loss": 0.2606,
"step": 715
},
{
"epoch": 1.74,
"learning_rate": 3.8252493876286546e-05,
"loss": 0.2703,
"step": 716
},
{
"epoch": 1.75,
"learning_rate": 3.8218637227774276e-05,
"loss": 0.2657,
"step": 717
},
{
"epoch": 1.75,
"learning_rate": 3.818474689429323e-05,
"loss": 0.2827,
"step": 718
},
{
"epoch": 1.75,
"learning_rate": 3.8150822962205956e-05,
"loss": 0.263,
"step": 719
},
{
"epoch": 1.75,
"learning_rate": 3.8116865517960585e-05,
"loss": 0.2702,
"step": 720
},
{
"epoch": 1.75,
"learning_rate": 3.808287464809063e-05,
"loss": 0.2659,
"step": 721
},
{
"epoch": 1.76,
"learning_rate": 3.8048850439214844e-05,
"loss": 0.2564,
"step": 722
},
{
"epoch": 1.76,
"learning_rate": 3.801479297803687e-05,
"loss": 0.2758,
"step": 723
},
{
"epoch": 1.76,
"learning_rate": 3.7980702351345146e-05,
"loss": 0.2742,
"step": 724
},
{
"epoch": 1.76,
"learning_rate": 3.7946578646012574e-05,
"loss": 0.2741,
"step": 725
},
{
"epoch": 1.77,
"learning_rate": 3.791242194899639e-05,
"loss": 0.2695,
"step": 726
},
{
"epoch": 1.77,
"learning_rate": 3.7878232347337875e-05,
"loss": 0.2749,
"step": 727
},
{
"epoch": 1.77,
"learning_rate": 3.784400992816219e-05,
"loss": 0.2679,
"step": 728
},
{
"epoch": 1.77,
"learning_rate": 3.78097547786781e-05,
"loss": 0.2617,
"step": 729
},
{
"epoch": 1.78,
"learning_rate": 3.777546698617776e-05,
"loss": 0.2756,
"step": 730
},
{
"epoch": 1.78,
"learning_rate": 3.774114663803657e-05,
"loss": 0.2704,
"step": 731
},
{
"epoch": 1.78,
"learning_rate": 3.7706793821712826e-05,
"loss": 0.2742,
"step": 732
},
{
"epoch": 1.78,
"learning_rate": 3.76724086247476e-05,
"loss": 0.2686,
"step": 733
},
{
"epoch": 1.79,
"learning_rate": 3.763799113476447e-05,
"loss": 0.2548,
"step": 734
},
{
"epoch": 1.79,
"learning_rate": 3.7603541439469315e-05,
"loss": 0.2788,
"step": 735
},
{
"epoch": 1.79,
"learning_rate": 3.756905962665005e-05,
"loss": 0.2525,
"step": 736
},
{
"epoch": 1.79,
"learning_rate": 3.753454578417648e-05,
"loss": 0.2758,
"step": 737
},
{
"epoch": 1.8,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2523,
"step": 738
},
{
"epoch": 1.8,
"learning_rate": 3.746542236215341e-05,
"loss": 0.2652,
"step": 739
},
{
"epoch": 1.8,
"learning_rate": 3.743081295875069e-05,
"loss": 0.2821,
"step": 740
},
{
"epoch": 1.8,
"learning_rate": 3.7396171877986764e-05,
"loss": 0.2833,
"step": 741
},
{
"epoch": 1.81,
"learning_rate": 3.7361499208137254e-05,
"loss": 0.2846,
"step": 742
},
{
"epoch": 1.81,
"learning_rate": 3.732679503755833e-05,
"loss": 0.2651,
"step": 743
},
{
"epoch": 1.81,
"learning_rate": 3.72920594546864e-05,
"loss": 0.2594,
"step": 744
},
{
"epoch": 1.81,
"learning_rate": 3.725729254803791e-05,
"loss": 0.2776,
"step": 745
},
{
"epoch": 1.82,
"learning_rate": 3.722249440620917e-05,
"loss": 0.2637,
"step": 746
},
{
"epoch": 1.82,
"learning_rate": 3.718766511787606e-05,
"loss": 0.2872,
"step": 747
},
{
"epoch": 1.82,
"learning_rate": 3.715280477179382e-05,
"loss": 0.2563,
"step": 748
},
{
"epoch": 1.82,
"learning_rate": 3.7117913456796854e-05,
"loss": 0.2727,
"step": 749
},
{
"epoch": 1.83,
"learning_rate": 3.708299126179847e-05,
"loss": 0.2601,
"step": 750
},
{
"epoch": 1.83,
"learning_rate": 3.7048038275790694e-05,
"loss": 0.2784,
"step": 751
},
{
"epoch": 1.83,
"learning_rate": 3.701305458784397e-05,
"loss": 0.2644,
"step": 752
},
{
"epoch": 1.83,
"learning_rate": 3.697804028710703e-05,
"loss": 0.2705,
"step": 753
},
{
"epoch": 1.84,
"learning_rate": 3.694299546280657e-05,
"loss": 0.2628,
"step": 754
},
{
"epoch": 1.84,
"learning_rate": 3.690792020424712e-05,
"loss": 0.2715,
"step": 755
},
{
"epoch": 1.84,
"learning_rate": 3.687281460081071e-05,
"loss": 0.2728,
"step": 756
},
{
"epoch": 1.84,
"learning_rate": 3.683767874195674e-05,
"loss": 0.2767,
"step": 757
},
{
"epoch": 1.84,
"learning_rate": 3.680251271722169e-05,
"loss": 0.2652,
"step": 758
},
{
"epoch": 1.85,
"learning_rate": 3.676731661621893e-05,
"loss": 0.2726,
"step": 759
},
{
"epoch": 1.85,
"learning_rate": 3.673209052863843e-05,
"loss": 0.2687,
"step": 760
},
{
"epoch": 1.85,
"learning_rate": 3.6696834544246625e-05,
"loss": 0.2561,
"step": 761
},
{
"epoch": 1.85,
"learning_rate": 3.666154875288611e-05,
"loss": 0.2781,
"step": 762
},
{
"epoch": 1.86,
"learning_rate": 3.662623324447544e-05,
"loss": 0.2867,
"step": 763
},
{
"epoch": 1.86,
"learning_rate": 3.65908881090089e-05,
"loss": 0.2711,
"step": 764
},
{
"epoch": 1.86,
"learning_rate": 3.655551343655628e-05,
"loss": 0.2668,
"step": 765
},
{
"epoch": 1.86,
"learning_rate": 3.652010931726262e-05,
"loss": 0.2522,
"step": 766
},
{
"epoch": 1.87,
"learning_rate": 3.648467584134802e-05,
"loss": 0.2658,
"step": 767
},
{
"epoch": 1.87,
"learning_rate": 3.6449213099107373e-05,
"loss": 0.2757,
"step": 768
},
{
"epoch": 1.87,
"learning_rate": 3.641372118091017e-05,
"loss": 0.2865,
"step": 769
},
{
"epoch": 1.87,
"learning_rate": 3.6378200177200224e-05,
"loss": 0.2468,
"step": 770
},
{
"epoch": 1.88,
"learning_rate": 3.634265017849549e-05,
"loss": 0.2828,
"step": 771
},
{
"epoch": 1.88,
"learning_rate": 3.63070712753878e-05,
"loss": 0.2562,
"step": 772
},
{
"epoch": 1.88,
"learning_rate": 3.6271463558542645e-05,
"loss": 0.2701,
"step": 773
},
{
"epoch": 1.88,
"learning_rate": 3.623582711869895e-05,
"loss": 0.2851,
"step": 774
},
{
"epoch": 1.89,
"learning_rate": 3.620016204666882e-05,
"loss": 0.2844,
"step": 775
},
{
"epoch": 1.89,
"learning_rate": 3.616446843333733e-05,
"loss": 0.284,
"step": 776
},
{
"epoch": 1.89,
"learning_rate": 3.612874636966228e-05,
"loss": 0.2673,
"step": 777
},
{
"epoch": 1.89,
"learning_rate": 3.6092995946673994e-05,
"loss": 0.2512,
"step": 778
},
{
"epoch": 1.9,
"learning_rate": 3.6057217255475034e-05,
"loss": 0.2598,
"step": 779
},
{
"epoch": 1.9,
"learning_rate": 3.602141038724001e-05,
"loss": 0.2664,
"step": 780
},
{
"epoch": 1.9,
"learning_rate": 3.598557543321535e-05,
"loss": 0.2745,
"step": 781
},
{
"epoch": 1.9,
"learning_rate": 3.5949712484719014e-05,
"loss": 0.2582,
"step": 782
},
{
"epoch": 1.91,
"learning_rate": 3.5913821633140336e-05,
"loss": 0.2668,
"step": 783
},
{
"epoch": 1.91,
"learning_rate": 3.5877902969939755e-05,
"loss": 0.2593,
"step": 784
},
{
"epoch": 1.91,
"learning_rate": 3.584195658664855e-05,
"loss": 0.2607,
"step": 785
},
{
"epoch": 1.91,
"learning_rate": 3.580598257486867e-05,
"loss": 0.2493,
"step": 786
},
{
"epoch": 1.92,
"learning_rate": 3.5769981026272475e-05,
"loss": 0.272,
"step": 787
},
{
"epoch": 1.92,
"learning_rate": 3.573395203260245e-05,
"loss": 0.2687,
"step": 788
},
{
"epoch": 1.92,
"learning_rate": 3.569789568567107e-05,
"loss": 0.2735,
"step": 789
},
{
"epoch": 1.92,
"learning_rate": 3.56618120773605e-05,
"loss": 0.254,
"step": 790
},
{
"epoch": 1.93,
"learning_rate": 3.5625701299622336e-05,
"loss": 0.2665,
"step": 791
},
{
"epoch": 1.93,
"learning_rate": 3.558956344447748e-05,
"loss": 0.2654,
"step": 792
},
{
"epoch": 1.93,
"learning_rate": 3.555339860401578e-05,
"loss": 0.2718,
"step": 793
},
{
"epoch": 1.93,
"learning_rate": 3.551720687039585e-05,
"loss": 0.2475,
"step": 794
},
{
"epoch": 1.93,
"learning_rate": 3.5480988335844886e-05,
"loss": 0.269,
"step": 795
},
{
"epoch": 1.94,
"learning_rate": 3.544474309265834e-05,
"loss": 0.2577,
"step": 796
},
{
"epoch": 1.94,
"learning_rate": 3.5408471233199716e-05,
"loss": 0.2848,
"step": 797
},
{
"epoch": 1.94,
"learning_rate": 3.5372172849900374e-05,
"loss": 0.2677,
"step": 798
},
{
"epoch": 1.94,
"learning_rate": 3.533584803525926e-05,
"loss": 0.2583,
"step": 799
},
{
"epoch": 1.95,
"learning_rate": 3.529949688184265e-05,
"loss": 0.2596,
"step": 800
},
{
"epoch": 1.95,
"learning_rate": 3.526311948228397e-05,
"loss": 0.2552,
"step": 801
},
{
"epoch": 1.95,
"learning_rate": 3.5226715929283506e-05,
"loss": 0.2709,
"step": 802
},
{
"epoch": 1.95,
"learning_rate": 3.519028631560819e-05,
"loss": 0.2602,
"step": 803
},
{
"epoch": 1.96,
"learning_rate": 3.51538307340914e-05,
"loss": 0.2537,
"step": 804
},
{
"epoch": 1.96,
"learning_rate": 3.511734927763265e-05,
"loss": 0.274,
"step": 805
},
{
"epoch": 1.96,
"learning_rate": 3.508084203919739e-05,
"loss": 0.2442,
"step": 806
},
{
"epoch": 1.96,
"learning_rate": 3.5044309111816796e-05,
"loss": 0.2676,
"step": 807
},
{
"epoch": 1.97,
"learning_rate": 3.50077505885875e-05,
"loss": 0.2875,
"step": 808
},
{
"epoch": 1.97,
"learning_rate": 3.4971166562671324e-05,
"loss": 0.2686,
"step": 809
},
{
"epoch": 1.97,
"learning_rate": 3.493455712729514e-05,
"loss": 0.2753,
"step": 810
},
{
"epoch": 1.97,
"learning_rate": 3.4897922375750514e-05,
"loss": 0.2703,
"step": 811
},
{
"epoch": 1.98,
"learning_rate": 3.4861262401393566e-05,
"loss": 0.2661,
"step": 812
},
{
"epoch": 1.98,
"learning_rate": 3.482457729764466e-05,
"loss": 0.2644,
"step": 813
},
{
"epoch": 1.98,
"learning_rate": 3.478786715798823e-05,
"loss": 0.3001,
"step": 814
},
{
"epoch": 1.98,
"learning_rate": 3.475113207597247e-05,
"loss": 0.269,
"step": 815
},
{
"epoch": 1.99,
"learning_rate": 3.4714372145209166e-05,
"loss": 0.2618,
"step": 816
},
{
"epoch": 1.99,
"learning_rate": 3.467758745937342e-05,
"loss": 0.2592,
"step": 817
},
{
"epoch": 1.99,
"learning_rate": 3.46407781122034e-05,
"loss": 0.2805,
"step": 818
},
{
"epoch": 1.99,
"learning_rate": 3.460394419750013e-05,
"loss": 0.2432,
"step": 819
},
{
"epoch": 2.0,
"learning_rate": 3.456708580912725e-05,
"loss": 0.2524,
"step": 820
},
{
"epoch": 2.0,
"learning_rate": 3.4530203041010745e-05,
"loss": 0.2529,
"step": 821
},
{
"epoch": 2.0,
"eval_loss": 0.5900537967681885,
"eval_runtime": 116.0624,
"eval_samples_per_second": 6.565,
"eval_steps_per_second": 0.414,
"step": 821
},
{
"epoch": 2.0,
"learning_rate": 3.449329598713874e-05,
"loss": 0.2271,
"step": 822
},
{
"epoch": 2.0,
"learning_rate": 3.445636474156125e-05,
"loss": 0.1519,
"step": 823
},
{
"epoch": 2.01,
"learning_rate": 3.4419409398389935e-05,
"loss": 0.1477,
"step": 824
},
{
"epoch": 2.01,
"learning_rate": 3.438243005179784e-05,
"loss": 0.1407,
"step": 825
},
{
"epoch": 2.01,
"learning_rate": 3.434542679601922e-05,
"loss": 0.1235,
"step": 826
},
{
"epoch": 2.01,
"learning_rate": 3.4308399725349226e-05,
"loss": 0.1323,
"step": 827
},
{
"epoch": 2.02,
"learning_rate": 3.42713489341437e-05,
"loss": 0.1445,
"step": 828
},
{
"epoch": 2.02,
"learning_rate": 3.423427451681895e-05,
"loss": 0.1257,
"step": 829
},
{
"epoch": 2.02,
"learning_rate": 3.419717656785146e-05,
"loss": 0.1249,
"step": 830
},
{
"epoch": 2.02,
"learning_rate": 3.416005518177771e-05,
"loss": 0.1279,
"step": 831
},
{
"epoch": 2.02,
"learning_rate": 3.4122910453193885e-05,
"loss": 0.1278,
"step": 832
},
{
"epoch": 2.03,
"learning_rate": 3.408574247675566e-05,
"loss": 0.1207,
"step": 833
},
{
"epoch": 2.03,
"learning_rate": 3.4048551347177945e-05,
"loss": 0.1184,
"step": 834
},
{
"epoch": 2.03,
"learning_rate": 3.401133715923467e-05,
"loss": 0.1308,
"step": 835
},
{
"epoch": 2.03,
"learning_rate": 3.3974100007758514e-05,
"loss": 0.1291,
"step": 836
},
{
"epoch": 2.04,
"learning_rate": 3.3936839987640664e-05,
"loss": 0.1129,
"step": 837
},
{
"epoch": 2.04,
"learning_rate": 3.389955719383058e-05,
"loss": 0.1264,
"step": 838
},
{
"epoch": 2.04,
"learning_rate": 3.3862251721335794e-05,
"loss": 0.1149,
"step": 839
},
{
"epoch": 2.04,
"learning_rate": 3.382492366522158e-05,
"loss": 0.1234,
"step": 840
},
{
"epoch": 2.05,
"learning_rate": 3.378757312061079e-05,
"loss": 0.1245,
"step": 841
},
{
"epoch": 2.05,
"learning_rate": 3.375020018268359e-05,
"loss": 0.1154,
"step": 842
},
{
"epoch": 2.05,
"learning_rate": 3.371280494667719e-05,
"loss": 0.1231,
"step": 843
},
{
"epoch": 2.05,
"learning_rate": 3.367538750788563e-05,
"loss": 0.1224,
"step": 844
},
{
"epoch": 2.06,
"learning_rate": 3.363794796165953e-05,
"loss": 0.1196,
"step": 845
},
{
"epoch": 2.06,
"learning_rate": 3.360048640340585e-05,
"loss": 0.1189,
"step": 846
},
{
"epoch": 2.06,
"learning_rate": 3.3563002928587627e-05,
"loss": 0.1173,
"step": 847
},
{
"epoch": 2.06,
"learning_rate": 3.352549763272379e-05,
"loss": 0.1248,
"step": 848
},
{
"epoch": 2.07,
"learning_rate": 3.348797061138881e-05,
"loss": 0.1255,
"step": 849
},
{
"epoch": 2.07,
"learning_rate": 3.3450421960212566e-05,
"loss": 0.1178,
"step": 850
},
{
"epoch": 2.07,
"learning_rate": 3.3412851774880064e-05,
"loss": 0.121,
"step": 851
},
{
"epoch": 2.07,
"learning_rate": 3.337526015113115e-05,
"loss": 0.1186,
"step": 852
},
{
"epoch": 2.08,
"learning_rate": 3.3337647184760315e-05,
"loss": 0.1191,
"step": 853
},
{
"epoch": 2.08,
"learning_rate": 3.3300012971616467e-05,
"loss": 0.1223,
"step": 854
},
{
"epoch": 2.08,
"learning_rate": 3.3262357607602596e-05,
"loss": 0.1104,
"step": 855
},
{
"epoch": 2.08,
"learning_rate": 3.322468118867564e-05,
"loss": 0.125,
"step": 856
},
{
"epoch": 2.09,
"learning_rate": 3.318698381084619e-05,
"loss": 0.1221,
"step": 857
},
{
"epoch": 2.09,
"learning_rate": 3.314926557017821e-05,
"loss": 0.1181,
"step": 858
},
{
"epoch": 2.09,
"learning_rate": 3.3111526562788864e-05,
"loss": 0.1197,
"step": 859
},
{
"epoch": 2.09,
"learning_rate": 3.3073766884848234e-05,
"loss": 0.1168,
"step": 860
},
{
"epoch": 2.1,
"learning_rate": 3.303598663257904e-05,
"loss": 0.1186,
"step": 861
},
{
"epoch": 2.1,
"learning_rate": 3.299818590225647e-05,
"loss": 0.1192,
"step": 862
},
{
"epoch": 2.1,
"learning_rate": 3.29603647902079e-05,
"loss": 0.1192,
"step": 863
},
{
"epoch": 2.1,
"learning_rate": 3.2922523392812605e-05,
"loss": 0.1285,
"step": 864
},
{
"epoch": 2.11,
"learning_rate": 3.2884661806501574e-05,
"loss": 0.1299,
"step": 865
},
{
"epoch": 2.11,
"learning_rate": 3.284678012775727e-05,
"loss": 0.1322,
"step": 866
},
{
"epoch": 2.11,
"learning_rate": 3.280887845311332e-05,
"loss": 0.1174,
"step": 867
},
{
"epoch": 2.11,
"learning_rate": 3.27709568791543e-05,
"loss": 0.1225,
"step": 868
},
{
"epoch": 2.11,
"learning_rate": 3.273301550251555e-05,
"loss": 0.1194,
"step": 869
},
{
"epoch": 2.12,
"learning_rate": 3.269505441988281e-05,
"loss": 0.1139,
"step": 870
},
{
"epoch": 2.12,
"learning_rate": 3.265707372799208e-05,
"loss": 0.1294,
"step": 871
},
{
"epoch": 2.12,
"learning_rate": 3.2619073523629304e-05,
"loss": 0.1244,
"step": 872
},
{
"epoch": 2.12,
"learning_rate": 3.258105390363016e-05,
"loss": 0.1284,
"step": 873
},
{
"epoch": 2.13,
"learning_rate": 3.2543014964879816e-05,
"loss": 0.1234,
"step": 874
},
{
"epoch": 2.13,
"learning_rate": 3.250495680431264e-05,
"loss": 0.1163,
"step": 875
},
{
"epoch": 2.13,
"learning_rate": 3.246687951891201e-05,
"loss": 0.1269,
"step": 876
},
{
"epoch": 2.13,
"learning_rate": 3.2428783205710026e-05,
"loss": 0.1174,
"step": 877
},
{
"epoch": 2.14,
"learning_rate": 3.2390667961787275e-05,
"loss": 0.1226,
"step": 878
},
{
"epoch": 2.14,
"learning_rate": 3.23525338842726e-05,
"loss": 0.1174,
"step": 879
},
{
"epoch": 2.14,
"learning_rate": 3.231438107034281e-05,
"loss": 0.1212,
"step": 880
},
{
"epoch": 2.14,
"learning_rate": 3.22762096172225e-05,
"loss": 0.1189,
"step": 881
},
{
"epoch": 2.15,
"learning_rate": 3.223801962218372e-05,
"loss": 0.1232,
"step": 882
},
{
"epoch": 2.15,
"learning_rate": 3.21998111825458e-05,
"loss": 0.1271,
"step": 883
},
{
"epoch": 2.15,
"learning_rate": 3.216158439567506e-05,
"loss": 0.1229,
"step": 884
},
{
"epoch": 2.15,
"learning_rate": 3.2123339358984575e-05,
"loss": 0.1144,
"step": 885
},
{
"epoch": 2.16,
"learning_rate": 3.208507616993393e-05,
"loss": 0.1251,
"step": 886
},
{
"epoch": 2.16,
"learning_rate": 3.2046794926028964e-05,
"loss": 0.1176,
"step": 887
},
{
"epoch": 2.16,
"learning_rate": 3.200849572482153e-05,
"loss": 0.1346,
"step": 888
},
{
"epoch": 2.16,
"learning_rate": 3.1970178663909236e-05,
"loss": 0.1159,
"step": 889
},
{
"epoch": 2.17,
"learning_rate": 3.19318438409352e-05,
"loss": 0.1366,
"step": 890
},
{
"epoch": 2.17,
"learning_rate": 3.189349135358781e-05,
"loss": 0.1249,
"step": 891
},
{
"epoch": 2.17,
"learning_rate": 3.1855121299600456e-05,
"loss": 0.1261,
"step": 892
},
{
"epoch": 2.17,
"learning_rate": 3.181673377675131e-05,
"loss": 0.1217,
"step": 893
},
{
"epoch": 2.18,
"learning_rate": 3.1778328882863054e-05,
"loss": 0.1191,
"step": 894
},
{
"epoch": 2.18,
"learning_rate": 3.173990671580263e-05,
"loss": 0.1155,
"step": 895
},
{
"epoch": 2.18,
"learning_rate": 3.1701467373480995e-05,
"loss": 0.1107,
"step": 896
},
{
"epoch": 2.18,
"learning_rate": 3.166301095385288e-05,
"loss": 0.1177,
"step": 897
},
{
"epoch": 2.19,
"learning_rate": 3.162453755491655e-05,
"loss": 0.1212,
"step": 898
},
{
"epoch": 2.19,
"learning_rate": 3.1586047274713494e-05,
"loss": 0.126,
"step": 899
},
{
"epoch": 2.19,
"learning_rate": 3.154754021132827e-05,
"loss": 0.1171,
"step": 900
},
{
"epoch": 2.19,
"learning_rate": 3.1509016462888174e-05,
"loss": 0.1225,
"step": 901
},
{
"epoch": 2.2,
"learning_rate": 3.147047612756302e-05,
"loss": 0.1315,
"step": 902
},
{
"epoch": 2.2,
"learning_rate": 3.143191930356491e-05,
"loss": 0.1207,
"step": 903
},
{
"epoch": 2.2,
"learning_rate": 3.139334608914795e-05,
"loss": 0.1307,
"step": 904
},
{
"epoch": 2.2,
"learning_rate": 3.135475658260801e-05,
"loss": 0.1163,
"step": 905
},
{
"epoch": 2.21,
"learning_rate": 3.131615088228249e-05,
"loss": 0.1162,
"step": 906
},
{
"epoch": 2.21,
"learning_rate": 3.127752908655004e-05,
"loss": 0.1234,
"step": 907
},
{
"epoch": 2.21,
"learning_rate": 3.123889129383034e-05,
"loss": 0.1168,
"step": 908
},
{
"epoch": 2.21,
"learning_rate": 3.1200237602583834e-05,
"loss": 0.1238,
"step": 909
},
{
"epoch": 2.21,
"learning_rate": 3.116156811131148e-05,
"loss": 0.1255,
"step": 910
},
{
"epoch": 2.22,
"learning_rate": 3.112288291855449e-05,
"loss": 0.124,
"step": 911
},
{
"epoch": 2.22,
"learning_rate": 3.108418212289408e-05,
"loss": 0.1267,
"step": 912
},
{
"epoch": 2.22,
"learning_rate": 3.104546582295126e-05,
"loss": 0.124,
"step": 913
},
{
"epoch": 2.22,
"learning_rate": 3.100673411738652e-05,
"loss": 0.1308,
"step": 914
},
{
"epoch": 2.23,
"learning_rate": 3.096798710489962e-05,
"loss": 0.1213,
"step": 915
},
{
"epoch": 2.23,
"learning_rate": 3.092922488422933e-05,
"loss": 0.1277,
"step": 916
},
{
"epoch": 2.23,
"learning_rate": 3.089044755415315e-05,
"loss": 0.1201,
"step": 917
},
{
"epoch": 2.23,
"learning_rate": 3.0851655213487124e-05,
"loss": 0.115,
"step": 918
},
{
"epoch": 2.24,
"learning_rate": 3.0812847961085526e-05,
"loss": 0.1257,
"step": 919
},
{
"epoch": 2.24,
"learning_rate": 3.077402589584061e-05,
"loss": 0.1203,
"step": 920
},
{
"epoch": 2.24,
"learning_rate": 3.0735189116682414e-05,
"loss": 0.1255,
"step": 921
},
{
"epoch": 2.24,
"learning_rate": 3.0696337722578444e-05,
"loss": 0.1215,
"step": 922
},
{
"epoch": 2.25,
"learning_rate": 3.065747181253346e-05,
"loss": 0.1275,
"step": 923
},
{
"epoch": 2.25,
"learning_rate": 3.0618591485589224e-05,
"loss": 0.1346,
"step": 924
},
{
"epoch": 2.25,
"learning_rate": 3.0579696840824206e-05,
"loss": 0.1285,
"step": 925
},
{
"epoch": 2.25,
"learning_rate": 3.05407879773534e-05,
"loss": 0.1261,
"step": 926
},
{
"epoch": 2.26,
"learning_rate": 3.0501864994328e-05,
"loss": 0.1192,
"step": 927
},
{
"epoch": 2.26,
"learning_rate": 3.04629279909352e-05,
"loss": 0.1186,
"step": 928
},
{
"epoch": 2.26,
"learning_rate": 3.0423977066397912e-05,
"loss": 0.1244,
"step": 929
},
{
"epoch": 2.26,
"learning_rate": 3.0385012319974537e-05,
"loss": 0.1248,
"step": 930
},
{
"epoch": 2.27,
"learning_rate": 3.034603385095868e-05,
"loss": 0.1155,
"step": 931
},
{
"epoch": 2.27,
"learning_rate": 3.0307041758678932e-05,
"loss": 0.1267,
"step": 932
},
{
"epoch": 2.27,
"learning_rate": 3.0268036142498596e-05,
"loss": 0.1219,
"step": 933
},
{
"epoch": 2.27,
"learning_rate": 3.022901710181542e-05,
"loss": 0.126,
"step": 934
},
{
"epoch": 2.28,
"learning_rate": 3.018998473606139e-05,
"loss": 0.1219,
"step": 935
},
{
"epoch": 2.28,
"learning_rate": 3.0150939144702423e-05,
"loss": 0.1208,
"step": 936
},
{
"epoch": 2.28,
"learning_rate": 3.011188042723816e-05,
"loss": 0.1234,
"step": 937
},
{
"epoch": 2.28,
"learning_rate": 3.007280868320167e-05,
"loss": 0.1252,
"step": 938
},
{
"epoch": 2.29,
"learning_rate": 3.0033724012159242e-05,
"loss": 0.1185,
"step": 939
},
{
"epoch": 2.29,
"learning_rate": 2.9994626513710084e-05,
"loss": 0.1194,
"step": 940
},
{
"epoch": 2.29,
"learning_rate": 2.99555162874861e-05,
"loss": 0.1238,
"step": 941
},
{
"epoch": 2.29,
"learning_rate": 2.9916393433151634e-05,
"loss": 0.1208,
"step": 942
},
{
"epoch": 2.3,
"learning_rate": 2.9877258050403212e-05,
"loss": 0.1218,
"step": 943
},
{
"epoch": 2.3,
"learning_rate": 2.9838110238969264e-05,
"loss": 0.1254,
"step": 944
},
{
"epoch": 2.3,
"learning_rate": 2.9798950098609923e-05,
"loss": 0.1208,
"step": 945
},
{
"epoch": 2.3,
"learning_rate": 2.975977772911671e-05,
"loss": 0.1211,
"step": 946
},
{
"epoch": 2.3,
"learning_rate": 2.9720593230312337e-05,
"loss": 0.1177,
"step": 947
},
{
"epoch": 2.31,
"learning_rate": 2.9681396702050406e-05,
"loss": 0.1187,
"step": 948
},
{
"epoch": 2.31,
"learning_rate": 2.964218824421518e-05,
"loss": 0.125,
"step": 949
},
{
"epoch": 2.31,
"learning_rate": 2.9602967956721316e-05,
"loss": 0.1174,
"step": 950
},
{
"epoch": 2.31,
"learning_rate": 2.9563735939513636e-05,
"loss": 0.1167,
"step": 951
},
{
"epoch": 2.32,
"learning_rate": 2.9524492292566823e-05,
"loss": 0.1175,
"step": 952
},
{
"epoch": 2.32,
"learning_rate": 2.948523711588522e-05,
"loss": 0.1295,
"step": 953
},
{
"epoch": 2.32,
"learning_rate": 2.9445970509502546e-05,
"loss": 0.1336,
"step": 954
},
{
"epoch": 2.32,
"learning_rate": 2.940669257348163e-05,
"loss": 0.1218,
"step": 955
},
{
"epoch": 2.33,
"learning_rate": 2.9367403407914202e-05,
"loss": 0.1205,
"step": 956
},
{
"epoch": 2.33,
"learning_rate": 2.932810311292058e-05,
"loss": 0.1311,
"step": 957
},
{
"epoch": 2.33,
"learning_rate": 2.9288791788649462e-05,
"loss": 0.1258,
"step": 958
},
{
"epoch": 2.33,
"learning_rate": 2.9249469535277636e-05,
"loss": 0.1255,
"step": 959
},
{
"epoch": 2.34,
"learning_rate": 2.921013645300975e-05,
"loss": 0.1263,
"step": 960
},
{
"epoch": 2.34,
"learning_rate": 2.9170792642078055e-05,
"loss": 0.1219,
"step": 961
},
{
"epoch": 2.34,
"learning_rate": 2.9131438202742124e-05,
"loss": 0.1224,
"step": 962
},
{
"epoch": 2.34,
"learning_rate": 2.909207323528863e-05,
"loss": 0.122,
"step": 963
},
{
"epoch": 2.35,
"learning_rate": 2.9052697840031064e-05,
"loss": 0.1199,
"step": 964
},
{
"epoch": 2.35,
"learning_rate": 2.9013312117309488e-05,
"loss": 0.1206,
"step": 965
},
{
"epoch": 2.35,
"learning_rate": 2.8973916167490307e-05,
"loss": 0.1163,
"step": 966
},
{
"epoch": 2.35,
"learning_rate": 2.8934510090965944e-05,
"loss": 0.1222,
"step": 967
},
{
"epoch": 2.36,
"learning_rate": 2.889509398815467e-05,
"loss": 0.114,
"step": 968
},
{
"epoch": 2.36,
"learning_rate": 2.8855667959500276e-05,
"loss": 0.1218,
"step": 969
},
{
"epoch": 2.36,
"learning_rate": 2.8816232105471863e-05,
"loss": 0.1257,
"step": 970
},
{
"epoch": 2.36,
"learning_rate": 2.8776786526563575e-05,
"loss": 0.1216,
"step": 971
},
{
"epoch": 2.37,
"learning_rate": 2.8737331323294314e-05,
"loss": 0.1285,
"step": 972
},
{
"epoch": 2.37,
"learning_rate": 2.8697866596207524e-05,
"loss": 0.1285,
"step": 973
},
{
"epoch": 2.37,
"learning_rate": 2.8658392445870928e-05,
"loss": 0.1286,
"step": 974
},
{
"epoch": 2.37,
"learning_rate": 2.8618908972876246e-05,
"loss": 0.1267,
"step": 975
},
{
"epoch": 2.38,
"learning_rate": 2.857941627783895e-05,
"loss": 0.1182,
"step": 976
},
{
"epoch": 2.38,
"learning_rate": 2.8539914461398043e-05,
"loss": 0.1193,
"step": 977
},
{
"epoch": 2.38,
"learning_rate": 2.8500403624215734e-05,
"loss": 0.1157,
"step": 978
},
{
"epoch": 2.38,
"learning_rate": 2.846088386697723e-05,
"loss": 0.1269,
"step": 979
},
{
"epoch": 2.39,
"learning_rate": 2.8421355290390506e-05,
"loss": 0.1251,
"step": 980
},
{
"epoch": 2.39,
"learning_rate": 2.838181799518595e-05,
"loss": 0.1176,
"step": 981
},
{
"epoch": 2.39,
"learning_rate": 2.834227208211621e-05,
"loss": 0.1238,
"step": 982
},
{
"epoch": 2.39,
"learning_rate": 2.8302717651955895e-05,
"loss": 0.1226,
"step": 983
},
{
"epoch": 2.39,
"learning_rate": 2.8263154805501297e-05,
"loss": 0.1294,
"step": 984
},
{
"epoch": 2.4,
"learning_rate": 2.822358364357015e-05,
"loss": 0.1201,
"step": 985
},
{
"epoch": 2.4,
"learning_rate": 2.8184004267001425e-05,
"loss": 0.1255,
"step": 986
},
{
"epoch": 2.4,
"learning_rate": 2.8144416776654963e-05,
"loss": 0.1228,
"step": 987
},
{
"epoch": 2.4,
"learning_rate": 2.810482127341133e-05,
"loss": 0.1222,
"step": 988
},
{
"epoch": 2.41,
"learning_rate": 2.8065217858171495e-05,
"loss": 0.118,
"step": 989
},
{
"epoch": 2.41,
"learning_rate": 2.8025606631856578e-05,
"loss": 0.1261,
"step": 990
},
{
"epoch": 2.41,
"learning_rate": 2.7985987695407616e-05,
"loss": 0.1333,
"step": 991
},
{
"epoch": 2.41,
"learning_rate": 2.7946361149785306e-05,
"loss": 0.1165,
"step": 992
},
{
"epoch": 2.42,
"learning_rate": 2.79067270959697e-05,
"loss": 0.1295,
"step": 993
},
{
"epoch": 2.42,
"learning_rate": 2.7867085634960016e-05,
"loss": 0.125,
"step": 994
},
{
"epoch": 2.42,
"learning_rate": 2.782743686777433e-05,
"loss": 0.1143,
"step": 995
},
{
"epoch": 2.42,
"learning_rate": 2.778778089544935e-05,
"loss": 0.1247,
"step": 996
},
{
"epoch": 2.43,
"learning_rate": 2.7748117819040127e-05,
"loss": 0.1219,
"step": 997
},
{
"epoch": 2.43,
"learning_rate": 2.770844773961983e-05,
"loss": 0.1319,
"step": 998
},
{
"epoch": 2.43,
"learning_rate": 2.7668770758279473e-05,
"loss": 0.1291,
"step": 999
},
{
"epoch": 2.43,
"learning_rate": 2.762908697612765e-05,
"loss": 0.118,
"step": 1000
},
{
"epoch": 2.44,
"learning_rate": 2.7589396494290287e-05,
"loss": 0.134,
"step": 1001
},
{
"epoch": 2.44,
"learning_rate": 2.7549699413910384e-05,
"loss": 0.1295,
"step": 1002
},
{
"epoch": 2.44,
"learning_rate": 2.7509995836147766e-05,
"loss": 0.1227,
"step": 1003
},
{
"epoch": 2.44,
"learning_rate": 2.74702858621788e-05,
"loss": 0.118,
"step": 1004
},
{
"epoch": 2.45,
"learning_rate": 2.743056959319616e-05,
"loss": 0.1226,
"step": 1005
},
{
"epoch": 2.45,
"learning_rate": 2.739084713040856e-05,
"loss": 0.1257,
"step": 1006
},
{
"epoch": 2.45,
"learning_rate": 2.7351118575040496e-05,
"loss": 0.1215,
"step": 1007
},
{
"epoch": 2.45,
"learning_rate": 2.7311384028332e-05,
"loss": 0.1232,
"step": 1008
},
{
"epoch": 2.46,
"learning_rate": 2.7271643591538353e-05,
"loss": 0.1208,
"step": 1009
},
{
"epoch": 2.46,
"learning_rate": 2.723189736592986e-05,
"loss": 0.1248,
"step": 1010
},
{
"epoch": 2.46,
"learning_rate": 2.719214545279158e-05,
"loss": 0.119,
"step": 1011
},
{
"epoch": 2.46,
"learning_rate": 2.715238795342305e-05,
"loss": 0.1213,
"step": 1012
},
{
"epoch": 2.47,
"learning_rate": 2.711262496913805e-05,
"loss": 0.122,
"step": 1013
},
{
"epoch": 2.47,
"learning_rate": 2.7072856601264345e-05,
"loss": 0.1218,
"step": 1014
},
{
"epoch": 2.47,
"learning_rate": 2.7033082951143418e-05,
"loss": 0.1178,
"step": 1015
},
{
"epoch": 2.47,
"learning_rate": 2.6993304120130196e-05,
"loss": 0.127,
"step": 1016
},
{
"epoch": 2.48,
"learning_rate": 2.6953520209592824e-05,
"loss": 0.1145,
"step": 1017
},
{
"epoch": 2.48,
"learning_rate": 2.69137313209124e-05,
"loss": 0.1256,
"step": 1018
},
{
"epoch": 2.48,
"learning_rate": 2.6873937555482663e-05,
"loss": 0.1305,
"step": 1019
},
{
"epoch": 2.48,
"learning_rate": 2.6834139014709843e-05,
"loss": 0.1268,
"step": 1020
},
{
"epoch": 2.48,
"learning_rate": 2.6794335800012293e-05,
"loss": 0.1235,
"step": 1021
},
{
"epoch": 2.49,
"learning_rate": 2.6754528012820283e-05,
"loss": 0.125,
"step": 1022
},
{
"epoch": 2.49,
"learning_rate": 2.671471575457576e-05,
"loss": 0.1309,
"step": 1023
},
{
"epoch": 2.49,
"learning_rate": 2.6674899126732045e-05,
"loss": 0.115,
"step": 1024
},
{
"epoch": 2.49,
"learning_rate": 2.663507823075358e-05,
"loss": 0.1269,
"step": 1025
},
{
"epoch": 2.5,
"learning_rate": 2.659525316811571e-05,
"loss": 0.1276,
"step": 1026
},
{
"epoch": 2.5,
"learning_rate": 2.6555424040304398e-05,
"loss": 0.1118,
"step": 1027
},
{
"epoch": 2.5,
"learning_rate": 2.6515590948815933e-05,
"loss": 0.1252,
"step": 1028
},
{
"epoch": 2.5,
"learning_rate": 2.6475753995156743e-05,
"loss": 0.1184,
"step": 1029
},
{
"epoch": 2.51,
"learning_rate": 2.643591328084309e-05,
"loss": 0.1217,
"step": 1030
},
{
"epoch": 2.51,
"learning_rate": 2.6396068907400784e-05,
"loss": 0.1271,
"step": 1031
},
{
"epoch": 2.51,
"learning_rate": 2.635622097636501e-05,
"loss": 0.1175,
"step": 1032
},
{
"epoch": 2.51,
"learning_rate": 2.6316369589279998e-05,
"loss": 0.1184,
"step": 1033
},
{
"epoch": 2.52,
"learning_rate": 2.6276514847698762e-05,
"loss": 0.1197,
"step": 1034
},
{
"epoch": 2.52,
"learning_rate": 2.623665685318291e-05,
"loss": 0.1269,
"step": 1035
},
{
"epoch": 2.52,
"learning_rate": 2.6196795707302302e-05,
"loss": 0.1257,
"step": 1036
},
{
"epoch": 2.52,
"learning_rate": 2.6156931511634834e-05,
"loss": 0.1276,
"step": 1037
},
{
"epoch": 2.53,
"learning_rate": 2.6117064367766197e-05,
"loss": 0.1322,
"step": 1038
},
{
"epoch": 2.53,
"learning_rate": 2.607719437728957e-05,
"loss": 0.1239,
"step": 1039
},
{
"epoch": 2.53,
"learning_rate": 2.603732164180539e-05,
"loss": 0.1169,
"step": 1040
},
{
"epoch": 2.53,
"learning_rate": 2.5997446262921106e-05,
"loss": 0.1144,
"step": 1041
},
{
"epoch": 2.54,
"learning_rate": 2.595756834225089e-05,
"loss": 0.1223,
"step": 1042
},
{
"epoch": 2.54,
"learning_rate": 2.5917687981415373e-05,
"loss": 0.1238,
"step": 1043
},
{
"epoch": 2.54,
"learning_rate": 2.5877805282041455e-05,
"loss": 0.1201,
"step": 1044
},
{
"epoch": 2.54,
"learning_rate": 2.583792034576194e-05,
"loss": 0.1327,
"step": 1045
},
{
"epoch": 2.55,
"learning_rate": 2.579803327421536e-05,
"loss": 0.1336,
"step": 1046
},
{
"epoch": 2.55,
"learning_rate": 2.575814416904569e-05,
"loss": 0.1177,
"step": 1047
},
{
"epoch": 2.55,
"learning_rate": 2.571825313190208e-05,
"loss": 0.1153,
"step": 1048
},
{
"epoch": 2.55,
"learning_rate": 2.5678360264438606e-05,
"loss": 0.1199,
"step": 1049
},
{
"epoch": 2.56,
"learning_rate": 2.5638465668314006e-05,
"loss": 0.1191,
"step": 1050
},
{
"epoch": 2.56,
"learning_rate": 2.5598569445191418e-05,
"loss": 0.1132,
"step": 1051
},
{
"epoch": 2.56,
"learning_rate": 2.5558671696738146e-05,
"loss": 0.1266,
"step": 1052
},
{
"epoch": 2.56,
"learning_rate": 2.5518772524625357e-05,
"loss": 0.1191,
"step": 1053
},
{
"epoch": 2.57,
"learning_rate": 2.5478872030527855e-05,
"loss": 0.1148,
"step": 1054
},
{
"epoch": 2.57,
"learning_rate": 2.5438970316123822e-05,
"loss": 0.1224,
"step": 1055
},
{
"epoch": 2.57,
"learning_rate": 2.539906748309454e-05,
"loss": 0.1136,
"step": 1056
},
{
"epoch": 2.57,
"learning_rate": 2.535916363312414e-05,
"loss": 0.1199,
"step": 1057
},
{
"epoch": 2.57,
"learning_rate": 2.5319258867899348e-05,
"loss": 0.1241,
"step": 1058
},
{
"epoch": 2.58,
"learning_rate": 2.5279353289109227e-05,
"loss": 0.1202,
"step": 1059
},
{
"epoch": 2.58,
"learning_rate": 2.5239446998444898e-05,
"loss": 0.1247,
"step": 1060
},
{
"epoch": 2.58,
"learning_rate": 2.5199540097599318e-05,
"loss": 0.1345,
"step": 1061
},
{
"epoch": 2.58,
"learning_rate": 2.5159632688266982e-05,
"loss": 0.1223,
"step": 1062
},
{
"epoch": 2.59,
"learning_rate": 2.511972487214369e-05,
"loss": 0.1141,
"step": 1063
},
{
"epoch": 2.59,
"learning_rate": 2.5079816750926265e-05,
"loss": 0.1257,
"step": 1064
},
{
"epoch": 2.59,
"learning_rate": 2.5039908426312332e-05,
"loss": 0.1235,
"step": 1065
},
{
"epoch": 2.59,
"learning_rate": 2.5e-05,
"loss": 0.1297,
"step": 1066
},
{
"epoch": 2.6,
"learning_rate": 2.4960091573687677e-05,
"loss": 0.1281,
"step": 1067
},
{
"epoch": 2.6,
"learning_rate": 2.4920183249073744e-05,
"loss": 0.1176,
"step": 1068
},
{
"epoch": 2.6,
"learning_rate": 2.488027512785632e-05,
"loss": 0.1204,
"step": 1069
},
{
"epoch": 2.6,
"learning_rate": 2.4840367311733024e-05,
"loss": 0.1318,
"step": 1070
},
{
"epoch": 2.61,
"learning_rate": 2.4800459902400684e-05,
"loss": 0.1293,
"step": 1071
},
{
"epoch": 2.61,
"learning_rate": 2.4760553001555108e-05,
"loss": 0.1154,
"step": 1072
},
{
"epoch": 2.61,
"learning_rate": 2.472064671089078e-05,
"loss": 0.1178,
"step": 1073
},
{
"epoch": 2.61,
"learning_rate": 2.468074113210066e-05,
"loss": 0.125,
"step": 1074
},
{
"epoch": 2.62,
"learning_rate": 2.4640836366875873e-05,
"loss": 0.1191,
"step": 1075
},
{
"epoch": 2.62,
"learning_rate": 2.4600932516905466e-05,
"loss": 0.1264,
"step": 1076
},
{
"epoch": 2.62,
"learning_rate": 2.4561029683876184e-05,
"loss": 0.1207,
"step": 1077
},
{
"epoch": 2.62,
"learning_rate": 2.4521127969472148e-05,
"loss": 0.1253,
"step": 1078
},
{
"epoch": 2.63,
"learning_rate": 2.4481227475374652e-05,
"loss": 0.1255,
"step": 1079
},
{
"epoch": 2.63,
"learning_rate": 2.4441328303261867e-05,
"loss": 0.1287,
"step": 1080
},
{
"epoch": 2.63,
"learning_rate": 2.440143055480859e-05,
"loss": 0.1176,
"step": 1081
},
{
"epoch": 2.63,
"learning_rate": 2.4361534331686003e-05,
"loss": 0.1223,
"step": 1082
},
{
"epoch": 2.64,
"learning_rate": 2.4321639735561403e-05,
"loss": 0.1321,
"step": 1083
},
{
"epoch": 2.64,
"learning_rate": 2.4281746868097926e-05,
"loss": 0.1268,
"step": 1084
},
{
"epoch": 2.64,
"learning_rate": 2.4241855830954316e-05,
"loss": 0.1229,
"step": 1085
},
{
"epoch": 2.64,
"learning_rate": 2.420196672578465e-05,
"loss": 0.118,
"step": 1086
},
{
"epoch": 2.65,
"learning_rate": 2.4162079654238073e-05,
"loss": 0.135,
"step": 1087
},
{
"epoch": 2.65,
"learning_rate": 2.412219471795855e-05,
"loss": 0.1135,
"step": 1088
},
{
"epoch": 2.65,
"learning_rate": 2.4082312018584626e-05,
"loss": 0.1158,
"step": 1089
},
{
"epoch": 2.65,
"learning_rate": 2.4042431657749117e-05,
"loss": 0.125,
"step": 1090
},
{
"epoch": 2.66,
"learning_rate": 2.40025537370789e-05,
"loss": 0.1245,
"step": 1091
},
{
"epoch": 2.66,
"learning_rate": 2.3962678358194614e-05,
"loss": 0.1259,
"step": 1092
},
{
"epoch": 2.66,
"learning_rate": 2.3922805622710438e-05,
"loss": 0.1157,
"step": 1093
},
{
"epoch": 2.66,
"learning_rate": 2.3882935632233805e-05,
"loss": 0.1228,
"step": 1094
},
{
"epoch": 2.67,
"learning_rate": 2.3843068488365168e-05,
"loss": 0.1255,
"step": 1095
},
{
"epoch": 2.67,
"learning_rate": 2.3803204292697704e-05,
"loss": 0.1207,
"step": 1096
},
{
"epoch": 2.67,
"learning_rate": 2.3763343146817096e-05,
"loss": 0.1201,
"step": 1097
},
{
"epoch": 2.67,
"learning_rate": 2.372348515230124e-05,
"loss": 0.1203,
"step": 1098
},
{
"epoch": 2.67,
"learning_rate": 2.368363041072001e-05,
"loss": 0.1234,
"step": 1099
},
{
"epoch": 2.68,
"learning_rate": 2.364377902363499e-05,
"loss": 0.1252,
"step": 1100
},
{
"epoch": 2.68,
"learning_rate": 2.3603931092599215e-05,
"loss": 0.1239,
"step": 1101
},
{
"epoch": 2.68,
"learning_rate": 2.356408671915692e-05,
"loss": 0.1148,
"step": 1102
},
{
"epoch": 2.68,
"learning_rate": 2.3524246004843263e-05,
"loss": 0.12,
"step": 1103
},
{
"epoch": 2.69,
"learning_rate": 2.3484409051184076e-05,
"loss": 0.12,
"step": 1104
},
{
"epoch": 2.69,
"learning_rate": 2.3444575959695614e-05,
"loss": 0.1235,
"step": 1105
},
{
"epoch": 2.69,
"learning_rate": 2.340474683188429e-05,
"loss": 0.121,
"step": 1106
},
{
"epoch": 2.69,
"learning_rate": 2.3364921769246423e-05,
"loss": 0.1218,
"step": 1107
},
{
"epoch": 2.7,
"learning_rate": 2.332510087326796e-05,
"loss": 0.1246,
"step": 1108
},
{
"epoch": 2.7,
"learning_rate": 2.3285284245424244e-05,
"loss": 0.1243,
"step": 1109
},
{
"epoch": 2.7,
"learning_rate": 2.324547198717972e-05,
"loss": 0.1206,
"step": 1110
},
{
"epoch": 2.7,
"learning_rate": 2.3205664199987716e-05,
"loss": 0.1172,
"step": 1111
},
{
"epoch": 2.71,
"learning_rate": 2.316586098529017e-05,
"loss": 0.111,
"step": 1112
},
{
"epoch": 2.71,
"learning_rate": 2.3126062444517336e-05,
"loss": 0.1272,
"step": 1113
},
{
"epoch": 2.71,
"learning_rate": 2.3086268679087607e-05,
"loss": 0.1196,
"step": 1114
},
{
"epoch": 2.71,
"learning_rate": 2.3046479790407178e-05,
"loss": 0.126,
"step": 1115
},
{
"epoch": 2.72,
"learning_rate": 2.3006695879869807e-05,
"loss": 0.1232,
"step": 1116
},
{
"epoch": 2.72,
"learning_rate": 2.2966917048856588e-05,
"loss": 0.115,
"step": 1117
},
{
"epoch": 2.72,
"learning_rate": 2.292714339873566e-05,
"loss": 0.1258,
"step": 1118
},
{
"epoch": 2.72,
"learning_rate": 2.288737503086195e-05,
"loss": 0.1246,
"step": 1119
},
{
"epoch": 2.73,
"learning_rate": 2.284761204657696e-05,
"loss": 0.1262,
"step": 1120
},
{
"epoch": 2.73,
"learning_rate": 2.280785454720843e-05,
"loss": 0.1126,
"step": 1121
},
{
"epoch": 2.73,
"learning_rate": 2.2768102634070147e-05,
"loss": 0.1244,
"step": 1122
},
{
"epoch": 2.73,
"learning_rate": 2.2728356408461653e-05,
"loss": 0.1271,
"step": 1123
},
{
"epoch": 2.74,
"learning_rate": 2.268861597166801e-05,
"loss": 0.1243,
"step": 1124
},
{
"epoch": 2.74,
"learning_rate": 2.26488814249595e-05,
"loss": 0.1275,
"step": 1125
},
{
"epoch": 2.74,
"learning_rate": 2.2609152869591446e-05,
"loss": 0.1219,
"step": 1126
},
{
"epoch": 2.74,
"learning_rate": 2.2569430406803846e-05,
"loss": 0.1455,
"step": 1127
},
{
"epoch": 2.75,
"learning_rate": 2.2529714137821206e-05,
"loss": 0.12,
"step": 1128
},
{
"epoch": 2.75,
"learning_rate": 2.249000416385224e-05,
"loss": 0.1142,
"step": 1129
},
{
"epoch": 2.75,
"learning_rate": 2.2450300586089622e-05,
"loss": 0.1285,
"step": 1130
},
{
"epoch": 2.75,
"learning_rate": 2.2410603505709715e-05,
"loss": 0.1238,
"step": 1131
},
{
"epoch": 2.76,
"learning_rate": 2.2370913023872355e-05,
"loss": 0.1238,
"step": 1132
},
{
"epoch": 2.76,
"learning_rate": 2.233122924172053e-05,
"loss": 0.1251,
"step": 1133
},
{
"epoch": 2.76,
"learning_rate": 2.229155226038017e-05,
"loss": 0.1346,
"step": 1134
},
{
"epoch": 2.76,
"learning_rate": 2.2251882180959875e-05,
"loss": 0.1255,
"step": 1135
},
{
"epoch": 2.76,
"learning_rate": 2.2212219104550665e-05,
"loss": 0.1289,
"step": 1136
},
{
"epoch": 2.77,
"learning_rate": 2.217256313222567e-05,
"loss": 0.1172,
"step": 1137
},
{
"epoch": 2.77,
"learning_rate": 2.2132914365039993e-05,
"loss": 0.1186,
"step": 1138
},
{
"epoch": 2.77,
"learning_rate": 2.2093272904030307e-05,
"loss": 0.1089,
"step": 1139
},
{
"epoch": 2.77,
"learning_rate": 2.2053638850214704e-05,
"loss": 0.1273,
"step": 1140
},
{
"epoch": 2.78,
"learning_rate": 2.201401230459239e-05,
"loss": 0.1231,
"step": 1141
},
{
"epoch": 2.78,
"learning_rate": 2.197439336814343e-05,
"loss": 0.1146,
"step": 1142
},
{
"epoch": 2.78,
"learning_rate": 2.1934782141828504e-05,
"loss": 0.1198,
"step": 1143
},
{
"epoch": 2.78,
"learning_rate": 2.1895178726588674e-05,
"loss": 0.1205,
"step": 1144
},
{
"epoch": 2.79,
"learning_rate": 2.185558322334504e-05,
"loss": 0.1223,
"step": 1145
},
{
"epoch": 2.79,
"learning_rate": 2.1815995732998584e-05,
"loss": 0.1212,
"step": 1146
},
{
"epoch": 2.79,
"learning_rate": 2.1776416356429856e-05,
"loss": 0.122,
"step": 1147
},
{
"epoch": 2.79,
"learning_rate": 2.173684519449872e-05,
"loss": 0.1261,
"step": 1148
},
{
"epoch": 2.8,
"learning_rate": 2.169728234804411e-05,
"loss": 0.1241,
"step": 1149
},
{
"epoch": 2.8,
"learning_rate": 2.165772791788379e-05,
"loss": 0.1185,
"step": 1150
},
{
"epoch": 2.8,
"learning_rate": 2.1618182004814054e-05,
"loss": 0.1188,
"step": 1151
},
{
"epoch": 2.8,
"learning_rate": 2.1578644709609503e-05,
"loss": 0.1063,
"step": 1152
},
{
"epoch": 2.81,
"learning_rate": 2.1539116133022773e-05,
"loss": 0.1121,
"step": 1153
},
{
"epoch": 2.81,
"learning_rate": 2.1499596375784282e-05,
"loss": 0.1195,
"step": 1154
},
{
"epoch": 2.81,
"learning_rate": 2.146008553860197e-05,
"loss": 0.1131,
"step": 1155
},
{
"epoch": 2.81,
"learning_rate": 2.142058372216105e-05,
"loss": 0.1156,
"step": 1156
},
{
"epoch": 2.82,
"learning_rate": 2.138109102712376e-05,
"loss": 0.1224,
"step": 1157
},
{
"epoch": 2.82,
"learning_rate": 2.1341607554129074e-05,
"loss": 0.1217,
"step": 1158
},
{
"epoch": 2.82,
"learning_rate": 2.1302133403792482e-05,
"loss": 0.1213,
"step": 1159
},
{
"epoch": 2.82,
"learning_rate": 2.1262668676705695e-05,
"loss": 0.1266,
"step": 1160
},
{
"epoch": 2.83,
"learning_rate": 2.1223213473436438e-05,
"loss": 0.1141,
"step": 1161
},
{
"epoch": 2.83,
"learning_rate": 2.1183767894528136e-05,
"loss": 0.1194,
"step": 1162
},
{
"epoch": 2.83,
"learning_rate": 2.1144332040499726e-05,
"loss": 0.1157,
"step": 1163
},
{
"epoch": 2.83,
"learning_rate": 2.1104906011845334e-05,
"loss": 0.1156,
"step": 1164
},
{
"epoch": 2.84,
"learning_rate": 2.1065489909034065e-05,
"loss": 0.1224,
"step": 1165
},
{
"epoch": 2.84,
"learning_rate": 2.1026083832509702e-05,
"loss": 0.1194,
"step": 1166
},
{
"epoch": 2.84,
"learning_rate": 2.0986687882690515e-05,
"loss": 0.11,
"step": 1167
},
{
"epoch": 2.84,
"learning_rate": 2.094730215996894e-05,
"loss": 0.115,
"step": 1168
},
{
"epoch": 2.85,
"learning_rate": 2.090792676471137e-05,
"loss": 0.1267,
"step": 1169
},
{
"epoch": 2.85,
"learning_rate": 2.0868561797257878e-05,
"loss": 0.131,
"step": 1170
},
{
"epoch": 2.85,
"learning_rate": 2.082920735792195e-05,
"loss": 0.123,
"step": 1171
},
{
"epoch": 2.85,
"learning_rate": 2.0789863546990253e-05,
"loss": 0.123,
"step": 1172
},
{
"epoch": 2.85,
"learning_rate": 2.0750530464722373e-05,
"loss": 0.126,
"step": 1173
},
{
"epoch": 2.86,
"learning_rate": 2.071120821135054e-05,
"loss": 0.1151,
"step": 1174
},
{
"epoch": 2.86,
"learning_rate": 2.0671896887079418e-05,
"loss": 0.1242,
"step": 1175
},
{
"epoch": 2.86,
"learning_rate": 2.0632596592085804e-05,
"loss": 0.1138,
"step": 1176
},
{
"epoch": 2.86,
"learning_rate": 2.0593307426518373e-05,
"loss": 0.1184,
"step": 1177
},
{
"epoch": 2.87,
"learning_rate": 2.0554029490497463e-05,
"loss": 0.1216,
"step": 1178
},
{
"epoch": 2.87,
"learning_rate": 2.0514762884114784e-05,
"loss": 0.1114,
"step": 1179
},
{
"epoch": 2.87,
"learning_rate": 2.047550770743318e-05,
"loss": 0.1239,
"step": 1180
},
{
"epoch": 2.87,
"learning_rate": 2.0436264060486366e-05,
"loss": 0.122,
"step": 1181
},
{
"epoch": 2.88,
"learning_rate": 2.0397032043278687e-05,
"loss": 0.1221,
"step": 1182
},
{
"epoch": 2.88,
"learning_rate": 2.035781175578483e-05,
"loss": 0.1218,
"step": 1183
},
{
"epoch": 2.88,
"learning_rate": 2.03186032979496e-05,
"loss": 0.1251,
"step": 1184
},
{
"epoch": 2.88,
"learning_rate": 2.0279406769687666e-05,
"loss": 0.1135,
"step": 1185
},
{
"epoch": 2.89,
"learning_rate": 2.0240222270883288e-05,
"loss": 0.1229,
"step": 1186
},
{
"epoch": 2.89,
"learning_rate": 2.020104990139008e-05,
"loss": 0.1183,
"step": 1187
},
{
"epoch": 2.89,
"learning_rate": 2.016188976103074e-05,
"loss": 0.1207,
"step": 1188
},
{
"epoch": 2.89,
"learning_rate": 2.0122741949596797e-05,
"loss": 0.1142,
"step": 1189
},
{
"epoch": 2.9,
"learning_rate": 2.008360656684837e-05,
"loss": 0.1243,
"step": 1190
},
{
"epoch": 2.9,
"learning_rate": 2.0044483712513908e-05,
"loss": 0.1127,
"step": 1191
},
{
"epoch": 2.9,
"learning_rate": 2.000537348628993e-05,
"loss": 0.113,
"step": 1192
},
{
"epoch": 2.9,
"learning_rate": 1.9966275987840764e-05,
"loss": 0.1221,
"step": 1193
},
{
"epoch": 2.91,
"learning_rate": 1.9927191316798332e-05,
"loss": 0.121,
"step": 1194
},
{
"epoch": 2.91,
"learning_rate": 1.9888119572761845e-05,
"loss": 0.1184,
"step": 1195
},
{
"epoch": 2.91,
"learning_rate": 1.984906085529758e-05,
"loss": 0.1143,
"step": 1196
},
{
"epoch": 2.91,
"learning_rate": 1.9810015263938624e-05,
"loss": 0.1155,
"step": 1197
},
{
"epoch": 2.92,
"learning_rate": 1.977098289818459e-05,
"loss": 0.1211,
"step": 1198
},
{
"epoch": 2.92,
"learning_rate": 1.973196385750141e-05,
"loss": 0.1397,
"step": 1199
},
{
"epoch": 2.92,
"learning_rate": 1.969295824132107e-05,
"loss": 0.1072,
"step": 1200
},
{
"epoch": 2.92,
"learning_rate": 1.965396614904132e-05,
"loss": 0.1223,
"step": 1201
},
{
"epoch": 2.93,
"learning_rate": 1.961498768002547e-05,
"loss": 0.1206,
"step": 1202
},
{
"epoch": 2.93,
"learning_rate": 1.9576022933602097e-05,
"loss": 0.1168,
"step": 1203
},
{
"epoch": 2.93,
"learning_rate": 1.9537072009064814e-05,
"loss": 0.116,
"step": 1204
},
{
"epoch": 2.93,
"learning_rate": 1.949813500567201e-05,
"loss": 0.1186,
"step": 1205
},
{
"epoch": 2.94,
"learning_rate": 1.9459212022646606e-05,
"loss": 0.1121,
"step": 1206
},
{
"epoch": 2.94,
"learning_rate": 1.9420303159175796e-05,
"loss": 0.1251,
"step": 1207
},
{
"epoch": 2.94,
"learning_rate": 1.9381408514410782e-05,
"loss": 0.1256,
"step": 1208
},
{
"epoch": 2.94,
"learning_rate": 1.9342528187466548e-05,
"loss": 0.1354,
"step": 1209
},
{
"epoch": 2.94,
"learning_rate": 1.9303662277421568e-05,
"loss": 0.1258,
"step": 1210
},
{
"epoch": 2.95,
"learning_rate": 1.9264810883317592e-05,
"loss": 0.1149,
"step": 1211
},
{
"epoch": 2.95,
"learning_rate": 1.922597410415939e-05,
"loss": 0.1202,
"step": 1212
},
{
"epoch": 2.95,
"learning_rate": 1.918715203891448e-05,
"loss": 0.1244,
"step": 1213
},
{
"epoch": 2.95,
"learning_rate": 1.9148344786512878e-05,
"loss": 0.1198,
"step": 1214
},
{
"epoch": 2.96,
"learning_rate": 1.9109552445846854e-05,
"loss": 0.1153,
"step": 1215
},
{
"epoch": 2.96,
"learning_rate": 1.907077511577068e-05,
"loss": 0.1194,
"step": 1216
},
{
"epoch": 2.96,
"learning_rate": 1.9032012895100383e-05,
"loss": 0.1181,
"step": 1217
},
{
"epoch": 2.96,
"learning_rate": 1.8993265882613482e-05,
"loss": 0.1173,
"step": 1218
},
{
"epoch": 2.97,
"learning_rate": 1.8954534177048744e-05,
"loss": 0.1196,
"step": 1219
},
{
"epoch": 2.97,
"learning_rate": 1.8915817877105926e-05,
"loss": 0.1218,
"step": 1220
},
{
"epoch": 2.97,
"learning_rate": 1.8877117081445524e-05,
"loss": 0.117,
"step": 1221
},
{
"epoch": 2.97,
"learning_rate": 1.8838431888688527e-05,
"loss": 0.1167,
"step": 1222
},
{
"epoch": 2.98,
"learning_rate": 1.8799762397416158e-05,
"loss": 0.1194,
"step": 1223
},
{
"epoch": 2.98,
"learning_rate": 1.8761108706169655e-05,
"loss": 0.1177,
"step": 1224
},
{
"epoch": 2.98,
"learning_rate": 1.872247091344996e-05,
"loss": 0.1223,
"step": 1225
},
{
"epoch": 2.98,
"learning_rate": 1.8683849117717518e-05,
"loss": 0.1231,
"step": 1226
},
{
"epoch": 2.99,
"learning_rate": 1.8645243417391995e-05,
"loss": 0.1212,
"step": 1227
},
{
"epoch": 2.99,
"learning_rate": 1.8606653910852056e-05,
"loss": 0.1163,
"step": 1228
},
{
"epoch": 2.99,
"learning_rate": 1.856808069643509e-05,
"loss": 0.1265,
"step": 1229
},
{
"epoch": 2.99,
"learning_rate": 1.852952387243698e-05,
"loss": 0.1148,
"step": 1230
},
{
"epoch": 3.0,
"learning_rate": 1.849098353711183e-05,
"loss": 0.12,
"step": 1231
},
{
"epoch": 3.0,
"learning_rate": 1.8452459788671738e-05,
"loss": 0.1195,
"step": 1232
},
{
"epoch": 3.0,
"eval_loss": 0.7090210318565369,
"eval_runtime": 116.3629,
"eval_samples_per_second": 6.548,
"eval_steps_per_second": 0.413,
"step": 1232
},
{
"epoch": 3.0,
"learning_rate": 1.841395272528651e-05,
"loss": 0.0877,
"step": 1233
},
{
"epoch": 3.0,
"learning_rate": 1.8375462445083464e-05,
"loss": 0.0432,
"step": 1234
},
{
"epoch": 3.01,
"learning_rate": 1.8336989046147128e-05,
"loss": 0.0427,
"step": 1235
},
{
"epoch": 3.01,
"learning_rate": 1.8298532626519007e-05,
"loss": 0.0441,
"step": 1236
},
{
"epoch": 3.01,
"learning_rate": 1.826009328419737e-05,
"loss": 0.0398,
"step": 1237
},
{
"epoch": 3.01,
"learning_rate": 1.822167111713695e-05,
"loss": 0.0429,
"step": 1238
},
{
"epoch": 3.02,
"learning_rate": 1.818326622324869e-05,
"loss": 0.0366,
"step": 1239
},
{
"epoch": 3.02,
"learning_rate": 1.814487870039955e-05,
"loss": 0.034,
"step": 1240
},
{
"epoch": 3.02,
"learning_rate": 1.81065086464122e-05,
"loss": 0.0367,
"step": 1241
},
{
"epoch": 3.02,
"learning_rate": 1.80681561590648e-05,
"loss": 0.0336,
"step": 1242
},
{
"epoch": 3.03,
"learning_rate": 1.802982133609077e-05,
"loss": 0.0367,
"step": 1243
},
{
"epoch": 3.03,
"learning_rate": 1.7991504275178473e-05,
"loss": 0.0373,
"step": 1244
},
{
"epoch": 3.03,
"learning_rate": 1.7953205073971035e-05,
"loss": 0.0351,
"step": 1245
},
{
"epoch": 3.03,
"learning_rate": 1.7914923830066074e-05,
"loss": 0.0341,
"step": 1246
},
{
"epoch": 3.03,
"learning_rate": 1.7876660641015437e-05,
"loss": 0.0392,
"step": 1247
},
{
"epoch": 3.04,
"learning_rate": 1.7838415604324943e-05,
"loss": 0.0373,
"step": 1248
},
{
"epoch": 3.04,
"learning_rate": 1.7800188817454208e-05,
"loss": 0.037,
"step": 1249
},
{
"epoch": 3.04,
"learning_rate": 1.7761980377816287e-05,
"loss": 0.0337,
"step": 1250
},
{
"epoch": 3.04,
"learning_rate": 1.772379038277751e-05,
"loss": 0.0368,
"step": 1251
},
{
"epoch": 3.05,
"learning_rate": 1.7685618929657194e-05,
"loss": 0.0413,
"step": 1252
},
{
"epoch": 3.05,
"learning_rate": 1.764746611572742e-05,
"loss": 0.0331,
"step": 1253
},
{
"epoch": 3.05,
"learning_rate": 1.7609332038212728e-05,
"loss": 0.0329,
"step": 1254
},
{
"epoch": 3.05,
"learning_rate": 1.7571216794289984e-05,
"loss": 0.0317,
"step": 1255
},
{
"epoch": 3.06,
"learning_rate": 1.7533120481088e-05,
"loss": 0.035,
"step": 1256
},
{
"epoch": 3.06,
"learning_rate": 1.7495043195687368e-05,
"loss": 0.0352,
"step": 1257
},
{
"epoch": 3.06,
"learning_rate": 1.7456985035120193e-05,
"loss": 0.0373,
"step": 1258
},
{
"epoch": 3.06,
"learning_rate": 1.741894609636985e-05,
"loss": 0.0337,
"step": 1259
},
{
"epoch": 3.07,
"learning_rate": 1.7380926476370702e-05,
"loss": 0.0368,
"step": 1260
},
{
"epoch": 3.07,
"learning_rate": 1.734292627200793e-05,
"loss": 0.0369,
"step": 1261
},
{
"epoch": 3.07,
"learning_rate": 1.7304945580117193e-05,
"loss": 0.0316,
"step": 1262
},
{
"epoch": 3.07,
"learning_rate": 1.7266984497484458e-05,
"loss": 0.0343,
"step": 1263
},
{
"epoch": 3.08,
"learning_rate": 1.7229043120845708e-05,
"loss": 0.035,
"step": 1264
},
{
"epoch": 3.08,
"learning_rate": 1.7191121546886697e-05,
"loss": 0.0375,
"step": 1265
},
{
"epoch": 3.08,
"learning_rate": 1.7153219872242727e-05,
"loss": 0.0331,
"step": 1266
},
{
"epoch": 3.08,
"learning_rate": 1.711533819349842e-05,
"loss": 0.0323,
"step": 1267
},
{
"epoch": 3.09,
"learning_rate": 1.7077476607187397e-05,
"loss": 0.0329,
"step": 1268
},
{
"epoch": 3.09,
"learning_rate": 1.7039635209792105e-05,
"loss": 0.0306,
"step": 1269
},
{
"epoch": 3.09,
"learning_rate": 1.7001814097743528e-05,
"loss": 0.0312,
"step": 1270
},
{
"epoch": 3.09,
"learning_rate": 1.6964013367420966e-05,
"loss": 0.0314,
"step": 1271
},
{
"epoch": 3.1,
"learning_rate": 1.692623311515178e-05,
"loss": 0.0333,
"step": 1272
},
{
"epoch": 3.1,
"learning_rate": 1.6888473437211132e-05,
"loss": 0.0365,
"step": 1273
},
{
"epoch": 3.1,
"learning_rate": 1.685073442982179e-05,
"loss": 0.03,
"step": 1274
},
{
"epoch": 3.1,
"learning_rate": 1.6813016189153814e-05,
"loss": 0.0329,
"step": 1275
},
{
"epoch": 3.11,
"learning_rate": 1.6775318811324364e-05,
"loss": 0.0369,
"step": 1276
},
{
"epoch": 3.11,
"learning_rate": 1.6737642392397414e-05,
"loss": 0.0331,
"step": 1277
},
{
"epoch": 3.11,
"learning_rate": 1.6699987028383546e-05,
"loss": 0.037,
"step": 1278
},
{
"epoch": 3.11,
"learning_rate": 1.6662352815239678e-05,
"loss": 0.0362,
"step": 1279
},
{
"epoch": 3.12,
"learning_rate": 1.6624739848868854e-05,
"loss": 0.0323,
"step": 1280
},
{
"epoch": 3.12,
"learning_rate": 1.6587148225119935e-05,
"loss": 0.0339,
"step": 1281
},
{
"epoch": 3.12,
"learning_rate": 1.6549578039787436e-05,
"loss": 0.0306,
"step": 1282
},
{
"epoch": 3.12,
"learning_rate": 1.65120293886112e-05,
"loss": 0.0311,
"step": 1283
},
{
"epoch": 3.13,
"learning_rate": 1.6474502367276222e-05,
"loss": 0.0361,
"step": 1284
},
{
"epoch": 3.13,
"learning_rate": 1.643699707141237e-05,
"loss": 0.0342,
"step": 1285
},
{
"epoch": 3.13,
"learning_rate": 1.6399513596594158e-05,
"loss": 0.0307,
"step": 1286
},
{
"epoch": 3.13,
"learning_rate": 1.6362052038340475e-05,
"loss": 0.0322,
"step": 1287
},
{
"epoch": 3.13,
"learning_rate": 1.6324612492114378e-05,
"loss": 0.0339,
"step": 1288
},
{
"epoch": 3.14,
"learning_rate": 1.6287195053322816e-05,
"loss": 0.0331,
"step": 1289
},
{
"epoch": 3.14,
"learning_rate": 1.6249799817316415e-05,
"loss": 0.0387,
"step": 1290
},
{
"epoch": 3.14,
"learning_rate": 1.6212426879389205e-05,
"loss": 0.035,
"step": 1291
},
{
"epoch": 3.14,
"learning_rate": 1.617507633477842e-05,
"loss": 0.0325,
"step": 1292
},
{
"epoch": 3.15,
"learning_rate": 1.6137748278664215e-05,
"loss": 0.0334,
"step": 1293
},
{
"epoch": 3.15,
"learning_rate": 1.6100442806169422e-05,
"loss": 0.0318,
"step": 1294
},
{
"epoch": 3.15,
"learning_rate": 1.6063160012359345e-05,
"loss": 0.0325,
"step": 1295
},
{
"epoch": 3.15,
"learning_rate": 1.602589999224149e-05,
"loss": 0.0322,
"step": 1296
},
{
"epoch": 3.16,
"learning_rate": 1.598866284076532e-05,
"loss": 0.0326,
"step": 1297
},
{
"epoch": 3.16,
"learning_rate": 1.5951448652822047e-05,
"loss": 0.0341,
"step": 1298
},
{
"epoch": 3.16,
"learning_rate": 1.5914257523244347e-05,
"loss": 0.0321,
"step": 1299
},
{
"epoch": 3.16,
"learning_rate": 1.5877089546806125e-05,
"loss": 0.0318,
"step": 1300
},
{
"epoch": 3.17,
"learning_rate": 1.5839944818222295e-05,
"loss": 0.0323,
"step": 1301
},
{
"epoch": 3.17,
"learning_rate": 1.5802823432148546e-05,
"loss": 0.0355,
"step": 1302
},
{
"epoch": 3.17,
"learning_rate": 1.5765725483181053e-05,
"loss": 0.0349,
"step": 1303
},
{
"epoch": 3.17,
"learning_rate": 1.5728651065856297e-05,
"loss": 0.0316,
"step": 1304
},
{
"epoch": 3.18,
"learning_rate": 1.569160027465078e-05,
"loss": 0.029,
"step": 1305
},
{
"epoch": 3.18,
"learning_rate": 1.5654573203980784e-05,
"loss": 0.0323,
"step": 1306
},
{
"epoch": 3.18,
"learning_rate": 1.561756994820216e-05,
"loss": 0.0314,
"step": 1307
},
{
"epoch": 3.18,
"learning_rate": 1.5580590601610074e-05,
"loss": 0.0322,
"step": 1308
},
{
"epoch": 3.19,
"learning_rate": 1.5543635258438745e-05,
"loss": 0.0319,
"step": 1309
},
{
"epoch": 3.19,
"learning_rate": 1.5506704012861256e-05,
"loss": 0.0331,
"step": 1310
},
{
"epoch": 3.19,
"learning_rate": 1.546979695898926e-05,
"loss": 0.0312,
"step": 1311
},
{
"epoch": 3.19,
"learning_rate": 1.5432914190872757e-05,
"loss": 0.0378,
"step": 1312
},
{
"epoch": 3.2,
"learning_rate": 1.5396055802499875e-05,
"loss": 0.0304,
"step": 1313
},
{
"epoch": 3.2,
"learning_rate": 1.5359221887796616e-05,
"loss": 0.0327,
"step": 1314
},
{
"epoch": 3.2,
"learning_rate": 1.5322412540626592e-05,
"loss": 0.0338,
"step": 1315
},
{
"epoch": 3.2,
"learning_rate": 1.528562785479084e-05,
"loss": 0.0297,
"step": 1316
},
{
"epoch": 3.21,
"learning_rate": 1.5248867924027534e-05,
"loss": 0.0319,
"step": 1317
},
{
"epoch": 3.21,
"learning_rate": 1.5212132842011779e-05,
"loss": 0.0336,
"step": 1318
},
{
"epoch": 3.21,
"learning_rate": 1.5175422702355343e-05,
"loss": 0.032,
"step": 1319
},
{
"epoch": 3.21,
"learning_rate": 1.5138737598606448e-05,
"loss": 0.0357,
"step": 1320
},
{
"epoch": 3.22,
"learning_rate": 1.5102077624249497e-05,
"loss": 0.0327,
"step": 1321
},
{
"epoch": 3.22,
"learning_rate": 1.506544287270487e-05,
"loss": 0.0356,
"step": 1322
},
{
"epoch": 3.22,
"learning_rate": 1.5028833437328682e-05,
"loss": 0.0289,
"step": 1323
},
{
"epoch": 3.22,
"learning_rate": 1.4992249411412513e-05,
"loss": 0.0334,
"step": 1324
},
{
"epoch": 3.22,
"learning_rate": 1.4955690888183205e-05,
"loss": 0.0309,
"step": 1325
},
{
"epoch": 3.23,
"learning_rate": 1.4919157960802618e-05,
"loss": 0.0363,
"step": 1326
},
{
"epoch": 3.23,
"learning_rate": 1.4882650722367364e-05,
"loss": 0.0311,
"step": 1327
},
{
"epoch": 3.23,
"learning_rate": 1.4846169265908603e-05,
"loss": 0.0286,
"step": 1328
},
{
"epoch": 3.23,
"learning_rate": 1.4809713684391807e-05,
"loss": 0.0344,
"step": 1329
},
{
"epoch": 3.24,
"learning_rate": 1.4773284070716503e-05,
"loss": 0.0339,
"step": 1330
},
{
"epoch": 3.24,
"learning_rate": 1.4736880517716039e-05,
"loss": 0.0309,
"step": 1331
},
{
"epoch": 3.24,
"learning_rate": 1.470050311815736e-05,
"loss": 0.0345,
"step": 1332
},
{
"epoch": 3.24,
"learning_rate": 1.4664151964740752e-05,
"loss": 0.0308,
"step": 1333
},
{
"epoch": 3.25,
"learning_rate": 1.4627827150099627e-05,
"loss": 0.0331,
"step": 1334
},
{
"epoch": 3.25,
"learning_rate": 1.4591528766800283e-05,
"loss": 0.0328,
"step": 1335
},
{
"epoch": 3.25,
"learning_rate": 1.4555256907341667e-05,
"loss": 0.0301,
"step": 1336
},
{
"epoch": 3.25,
"learning_rate": 1.4519011664155118e-05,
"loss": 0.0303,
"step": 1337
},
{
"epoch": 3.26,
"learning_rate": 1.4482793129604148e-05,
"loss": 0.0367,
"step": 1338
},
{
"epoch": 3.26,
"learning_rate": 1.4446601395984233e-05,
"loss": 0.0353,
"step": 1339
},
{
"epoch": 3.26,
"learning_rate": 1.4410436555522522e-05,
"loss": 0.0292,
"step": 1340
},
{
"epoch": 3.26,
"learning_rate": 1.4374298700377665e-05,
"loss": 0.0312,
"step": 1341
},
{
"epoch": 3.27,
"learning_rate": 1.4338187922639507e-05,
"loss": 0.0334,
"step": 1342
},
{
"epoch": 3.27,
"learning_rate": 1.4302104314328935e-05,
"loss": 0.0326,
"step": 1343
},
{
"epoch": 3.27,
"learning_rate": 1.426604796739755e-05,
"loss": 0.032,
"step": 1344
},
{
"epoch": 3.27,
"learning_rate": 1.4230018973727535e-05,
"loss": 0.0307,
"step": 1345
},
{
"epoch": 3.28,
"learning_rate": 1.4194017425131323e-05,
"loss": 0.03,
"step": 1346
},
{
"epoch": 3.28,
"learning_rate": 1.4158043413351455e-05,
"loss": 0.0322,
"step": 1347
},
{
"epoch": 3.28,
"learning_rate": 1.4122097030060249e-05,
"loss": 0.0335,
"step": 1348
},
{
"epoch": 3.28,
"learning_rate": 1.408617836685967e-05,
"loss": 0.0306,
"step": 1349
},
{
"epoch": 3.29,
"learning_rate": 1.405028751528099e-05,
"loss": 0.0299,
"step": 1350
},
{
"epoch": 3.29,
"learning_rate": 1.4014424566784661e-05,
"loss": 0.0294,
"step": 1351
},
{
"epoch": 3.29,
"learning_rate": 1.397858961275999e-05,
"loss": 0.0326,
"step": 1352
},
{
"epoch": 3.29,
"learning_rate": 1.3942782744524973e-05,
"loss": 0.0332,
"step": 1353
},
{
"epoch": 3.3,
"learning_rate": 1.3907004053326006e-05,
"loss": 0.0316,
"step": 1354
},
{
"epoch": 3.3,
"learning_rate": 1.3871253630337722e-05,
"loss": 0.0325,
"step": 1355
},
{
"epoch": 3.3,
"learning_rate": 1.3835531566662673e-05,
"loss": 0.0337,
"step": 1356
},
{
"epoch": 3.3,
"learning_rate": 1.379983795333119e-05,
"loss": 0.0306,
"step": 1357
},
{
"epoch": 3.31,
"learning_rate": 1.3764172881301062e-05,
"loss": 0.0366,
"step": 1358
},
{
"epoch": 3.31,
"learning_rate": 1.3728536441457357e-05,
"loss": 0.0309,
"step": 1359
},
{
"epoch": 3.31,
"learning_rate": 1.3692928724612203e-05,
"loss": 0.0266,
"step": 1360
},
{
"epoch": 3.31,
"learning_rate": 1.3657349821504517e-05,
"loss": 0.0304,
"step": 1361
},
{
"epoch": 3.31,
"learning_rate": 1.3621799822799788e-05,
"loss": 0.0308,
"step": 1362
},
{
"epoch": 3.32,
"learning_rate": 1.3586278819089837e-05,
"loss": 0.0344,
"step": 1363
},
{
"epoch": 3.32,
"learning_rate": 1.3550786900892634e-05,
"loss": 0.0316,
"step": 1364
},
{
"epoch": 3.32,
"learning_rate": 1.3515324158651981e-05,
"loss": 0.0356,
"step": 1365
},
{
"epoch": 3.32,
"learning_rate": 1.3479890682737379e-05,
"loss": 0.0307,
"step": 1366
},
{
"epoch": 3.33,
"learning_rate": 1.3444486563443723e-05,
"loss": 0.0291,
"step": 1367
},
{
"epoch": 3.33,
"learning_rate": 1.3409111890991105e-05,
"loss": 0.0316,
"step": 1368
},
{
"epoch": 3.33,
"learning_rate": 1.3373766755524564e-05,
"loss": 0.0318,
"step": 1369
},
{
"epoch": 3.33,
"learning_rate": 1.3338451247113897e-05,
"loss": 0.0308,
"step": 1370
},
{
"epoch": 3.34,
"learning_rate": 1.330316545575338e-05,
"loss": 0.0324,
"step": 1371
},
{
"epoch": 3.34,
"learning_rate": 1.3267909471361572e-05,
"loss": 0.0329,
"step": 1372
},
{
"epoch": 3.34,
"learning_rate": 1.323268338378108e-05,
"loss": 0.0317,
"step": 1373
},
{
"epoch": 3.34,
"learning_rate": 1.3197487282778315e-05,
"loss": 0.0336,
"step": 1374
},
{
"epoch": 3.35,
"learning_rate": 1.3162321258043261e-05,
"loss": 0.0318,
"step": 1375
},
{
"epoch": 3.35,
"learning_rate": 1.3127185399189295e-05,
"loss": 0.0363,
"step": 1376
},
{
"epoch": 3.35,
"learning_rate": 1.3092079795752887e-05,
"loss": 0.0336,
"step": 1377
},
{
"epoch": 3.35,
"learning_rate": 1.3057004537193423e-05,
"loss": 0.0299,
"step": 1378
},
{
"epoch": 3.36,
"learning_rate": 1.3021959712892979e-05,
"loss": 0.0321,
"step": 1379
},
{
"epoch": 3.36,
"learning_rate": 1.2986945412156038e-05,
"loss": 0.0309,
"step": 1380
},
{
"epoch": 3.36,
"learning_rate": 1.2951961724209317e-05,
"loss": 0.0324,
"step": 1381
},
{
"epoch": 3.36,
"learning_rate": 1.2917008738201537e-05,
"loss": 0.0296,
"step": 1382
},
{
"epoch": 3.37,
"learning_rate": 1.2882086543203154e-05,
"loss": 0.031,
"step": 1383
},
{
"epoch": 3.37,
"learning_rate": 1.284719522820618e-05,
"loss": 0.0281,
"step": 1384
},
{
"epoch": 3.37,
"learning_rate": 1.2812334882123944e-05,
"loss": 0.0302,
"step": 1385
},
{
"epoch": 3.37,
"learning_rate": 1.2777505593790834e-05,
"loss": 0.0317,
"step": 1386
},
{
"epoch": 3.38,
"learning_rate": 1.2742707451962088e-05,
"loss": 0.0308,
"step": 1387
},
{
"epoch": 3.38,
"learning_rate": 1.2707940545313613e-05,
"loss": 0.0312,
"step": 1388
},
{
"epoch": 3.38,
"learning_rate": 1.2673204962441671e-05,
"loss": 0.0294,
"step": 1389
},
{
"epoch": 3.38,
"learning_rate": 1.263850079186274e-05,
"loss": 0.0312,
"step": 1390
},
{
"epoch": 3.39,
"learning_rate": 1.2603828122013246e-05,
"loss": 0.0329,
"step": 1391
},
{
"epoch": 3.39,
"learning_rate": 1.2569187041249315e-05,
"loss": 0.0285,
"step": 1392
},
{
"epoch": 3.39,
"learning_rate": 1.253457763784659e-05,
"loss": 0.0304,
"step": 1393
},
{
"epoch": 3.39,
"learning_rate": 1.2500000000000006e-05,
"loss": 0.0295,
"step": 1394
},
{
"epoch": 3.4,
"learning_rate": 1.246545421582353e-05,
"loss": 0.0317,
"step": 1395
},
{
"epoch": 3.4,
"learning_rate": 1.2430940373349945e-05,
"loss": 0.0315,
"step": 1396
},
{
"epoch": 3.4,
"learning_rate": 1.2396458560530694e-05,
"loss": 0.0333,
"step": 1397
},
{
"epoch": 3.4,
"learning_rate": 1.2362008865235536e-05,
"loss": 0.0306,
"step": 1398
},
{
"epoch": 3.4,
"learning_rate": 1.2327591375252403e-05,
"loss": 0.0301,
"step": 1399
},
{
"epoch": 3.41,
"learning_rate": 1.2293206178287184e-05,
"loss": 0.0311,
"step": 1400
},
{
"epoch": 3.41,
"learning_rate": 1.2258853361963448e-05,
"loss": 0.0328,
"step": 1401
},
{
"epoch": 3.41,
"learning_rate": 1.2224533013822238e-05,
"loss": 0.0303,
"step": 1402
},
{
"epoch": 3.41,
"learning_rate": 1.2190245221321912e-05,
"loss": 0.0355,
"step": 1403
},
{
"epoch": 3.42,
"learning_rate": 1.2155990071837817e-05,
"loss": 0.0314,
"step": 1404
},
{
"epoch": 3.42,
"learning_rate": 1.2121767652662123e-05,
"loss": 0.0331,
"step": 1405
},
{
"epoch": 3.42,
"learning_rate": 1.2087578051003617e-05,
"loss": 0.0345,
"step": 1406
},
{
"epoch": 3.42,
"learning_rate": 1.2053421353987437e-05,
"loss": 0.0276,
"step": 1407
},
{
"epoch": 3.43,
"learning_rate": 1.2019297648654857e-05,
"loss": 0.0291,
"step": 1408
},
{
"epoch": 3.43,
"learning_rate": 1.198520702196313e-05,
"loss": 0.0306,
"step": 1409
},
{
"epoch": 3.43,
"learning_rate": 1.1951149560785167e-05,
"loss": 0.0319,
"step": 1410
},
{
"epoch": 3.43,
"learning_rate": 1.191712535190937e-05,
"loss": 0.0301,
"step": 1411
},
{
"epoch": 3.44,
"learning_rate": 1.188313448203943e-05,
"loss": 0.0246,
"step": 1412
},
{
"epoch": 3.44,
"learning_rate": 1.1849177037794051e-05,
"loss": 0.0307,
"step": 1413
},
{
"epoch": 3.44,
"learning_rate": 1.181525310570677e-05,
"loss": 0.0311,
"step": 1414
},
{
"epoch": 3.44,
"learning_rate": 1.178136277222573e-05,
"loss": 0.0298,
"step": 1415
},
{
"epoch": 3.45,
"learning_rate": 1.1747506123713458e-05,
"loss": 0.0332,
"step": 1416
},
{
"epoch": 3.45,
"learning_rate": 1.1713683246446622e-05,
"loss": 0.0277,
"step": 1417
},
{
"epoch": 3.45,
"learning_rate": 1.1679894226615862e-05,
"loss": 0.0343,
"step": 1418
},
{
"epoch": 3.45,
"learning_rate": 1.1646139150325507e-05,
"loss": 0.0324,
"step": 1419
},
{
"epoch": 3.46,
"learning_rate": 1.16124181035934e-05,
"loss": 0.0311,
"step": 1420
},
{
"epoch": 3.46,
"learning_rate": 1.157873117235067e-05,
"loss": 0.0322,
"step": 1421
},
{
"epoch": 3.46,
"learning_rate": 1.1545078442441526e-05,
"loss": 0.0311,
"step": 1422
},
{
"epoch": 3.46,
"learning_rate": 1.1511459999622981e-05,
"loss": 0.033,
"step": 1423
},
{
"epoch": 3.47,
"learning_rate": 1.147787592956472e-05,
"loss": 0.0335,
"step": 1424
},
{
"epoch": 3.47,
"learning_rate": 1.14443263178488e-05,
"loss": 0.0307,
"step": 1425
},
{
"epoch": 3.47,
"learning_rate": 1.1410811249969475e-05,
"loss": 0.0314,
"step": 1426
},
{
"epoch": 3.47,
"learning_rate": 1.1377330811332988e-05,
"loss": 0.0313,
"step": 1427
},
{
"epoch": 3.48,
"learning_rate": 1.1343885087257337e-05,
"loss": 0.03,
"step": 1428
},
{
"epoch": 3.48,
"learning_rate": 1.1310474162972026e-05,
"loss": 0.0284,
"step": 1429
},
{
"epoch": 3.48,
"learning_rate": 1.1277098123617922e-05,
"loss": 0.032,
"step": 1430
},
{
"epoch": 3.48,
"learning_rate": 1.124375705424696e-05,
"loss": 0.0383,
"step": 1431
},
{
"epoch": 3.49,
"learning_rate": 1.1210451039821965e-05,
"loss": 0.0352,
"step": 1432
},
{
"epoch": 3.49,
"learning_rate": 1.117718016521645e-05,
"loss": 0.0317,
"step": 1433
},
{
"epoch": 3.49,
"learning_rate": 1.1143944515214386e-05,
"loss": 0.0316,
"step": 1434
},
{
"epoch": 3.49,
"learning_rate": 1.1110744174509952e-05,
"loss": 0.0285,
"step": 1435
},
{
"epoch": 3.49,
"learning_rate": 1.1077579227707357e-05,
"loss": 0.0301,
"step": 1436
},
{
"epoch": 3.5,
"learning_rate": 1.104444975932064e-05,
"loss": 0.0307,
"step": 1437
},
{
"epoch": 3.5,
"learning_rate": 1.10113558537734e-05,
"loss": 0.0309,
"step": 1438
},
{
"epoch": 3.5,
"learning_rate": 1.0978297595398632e-05,
"loss": 0.0328,
"step": 1439
},
{
"epoch": 3.5,
"learning_rate": 1.094527506843849e-05,
"loss": 0.0277,
"step": 1440
},
{
"epoch": 3.51,
"learning_rate": 1.0912288357044062e-05,
"loss": 0.03,
"step": 1441
},
{
"epoch": 3.51,
"learning_rate": 1.0879337545275165e-05,
"loss": 0.0302,
"step": 1442
},
{
"epoch": 3.51,
"learning_rate": 1.084642271710016e-05,
"loss": 0.0309,
"step": 1443
},
{
"epoch": 3.51,
"learning_rate": 1.0813543956395675e-05,
"loss": 0.0292,
"step": 1444
},
{
"epoch": 3.52,
"learning_rate": 1.0780701346946453e-05,
"loss": 0.0297,
"step": 1445
},
{
"epoch": 3.52,
"learning_rate": 1.074789497244512e-05,
"loss": 0.0303,
"step": 1446
},
{
"epoch": 3.52,
"learning_rate": 1.0715124916491937e-05,
"loss": 0.0309,
"step": 1447
},
{
"epoch": 3.52,
"learning_rate": 1.0682391262594618e-05,
"loss": 0.0278,
"step": 1448
},
{
"epoch": 3.53,
"learning_rate": 1.0649694094168147e-05,
"loss": 0.0285,
"step": 1449
},
{
"epoch": 3.53,
"learning_rate": 1.0617033494534486e-05,
"loss": 0.0329,
"step": 1450
},
{
"epoch": 3.53,
"learning_rate": 1.0584409546922445e-05,
"loss": 0.0299,
"step": 1451
},
{
"epoch": 3.53,
"learning_rate": 1.0551822334467429e-05,
"loss": 0.0333,
"step": 1452
},
{
"epoch": 3.54,
"learning_rate": 1.0519271940211215e-05,
"loss": 0.0296,
"step": 1453
},
{
"epoch": 3.54,
"learning_rate": 1.0486758447101751e-05,
"loss": 0.0322,
"step": 1454
},
{
"epoch": 3.54,
"learning_rate": 1.0454281937992989e-05,
"loss": 0.031,
"step": 1455
},
{
"epoch": 3.54,
"learning_rate": 1.0421842495644587e-05,
"loss": 0.0293,
"step": 1456
},
{
"epoch": 3.55,
"learning_rate": 1.0389440202721778e-05,
"loss": 0.0341,
"step": 1457
},
{
"epoch": 3.55,
"learning_rate": 1.035707514179513e-05,
"loss": 0.0311,
"step": 1458
},
{
"epoch": 3.55,
"learning_rate": 1.0324747395340309e-05,
"loss": 0.0335,
"step": 1459
},
{
"epoch": 3.55,
"learning_rate": 1.0292457045737895e-05,
"loss": 0.0347,
"step": 1460
},
{
"epoch": 3.56,
"learning_rate": 1.02602041752732e-05,
"loss": 0.0323,
"step": 1461
},
{
"epoch": 3.56,
"learning_rate": 1.0227988866135996e-05,
"loss": 0.0282,
"step": 1462
},
{
"epoch": 3.56,
"learning_rate": 1.0195811200420333e-05,
"loss": 0.0285,
"step": 1463
},
{
"epoch": 3.56,
"learning_rate": 1.0163671260124385e-05,
"loss": 0.0313,
"step": 1464
},
{
"epoch": 3.57,
"learning_rate": 1.0131569127150142e-05,
"loss": 0.0255,
"step": 1465
},
{
"epoch": 3.57,
"learning_rate": 1.0099504883303254e-05,
"loss": 0.0303,
"step": 1466
},
{
"epoch": 3.57,
"learning_rate": 1.0067478610292847e-05,
"loss": 0.0282,
"step": 1467
},
{
"epoch": 3.57,
"learning_rate": 1.0035490389731255e-05,
"loss": 0.0268,
"step": 1468
},
{
"epoch": 3.58,
"learning_rate": 1.0003540303133843e-05,
"loss": 0.0289,
"step": 1469
},
{
"epoch": 3.58,
"learning_rate": 9.971628431918845e-06,
"loss": 0.0339,
"step": 1470
},
{
"epoch": 3.58,
"learning_rate": 9.939754857407063e-06,
"loss": 0.0288,
"step": 1471
},
{
"epoch": 3.58,
"learning_rate": 9.90791966082171e-06,
"loss": 0.0332,
"step": 1472
},
{
"epoch": 3.59,
"learning_rate": 9.876122923288239e-06,
"loss": 0.0286,
"step": 1473
},
{
"epoch": 3.59,
"learning_rate": 9.844364725834057e-06,
"loss": 0.0265,
"step": 1474
},
{
"epoch": 3.59,
"learning_rate": 9.812645149388363e-06,
"loss": 0.0325,
"step": 1475
},
{
"epoch": 3.59,
"learning_rate": 9.780964274781984e-06,
"loss": 0.0295,
"step": 1476
},
{
"epoch": 3.59,
"learning_rate": 9.749322182747072e-06,
"loss": 0.0308,
"step": 1477
},
{
"epoch": 3.6,
"learning_rate": 9.71771895391696e-06,
"loss": 0.0317,
"step": 1478
},
{
"epoch": 3.6,
"learning_rate": 9.686154668825973e-06,
"loss": 0.0275,
"step": 1479
},
{
"epoch": 3.6,
"learning_rate": 9.654629407909163e-06,
"loss": 0.0283,
"step": 1480
},
{
"epoch": 3.6,
"learning_rate": 9.623143251502148e-06,
"loss": 0.0287,
"step": 1481
},
{
"epoch": 3.61,
"learning_rate": 9.591696279840906e-06,
"loss": 0.0304,
"step": 1482
},
{
"epoch": 3.61,
"learning_rate": 9.560288573061563e-06,
"loss": 0.037,
"step": 1483
},
{
"epoch": 3.61,
"learning_rate": 9.52892021120016e-06,
"loss": 0.0308,
"step": 1484
},
{
"epoch": 3.61,
"learning_rate": 9.497591274192508e-06,
"loss": 0.0303,
"step": 1485
},
{
"epoch": 3.62,
"learning_rate": 9.46630184187393e-06,
"loss": 0.0295,
"step": 1486
},
{
"epoch": 3.62,
"learning_rate": 9.435051993979077e-06,
"loss": 0.0314,
"step": 1487
},
{
"epoch": 3.62,
"learning_rate": 9.403841810141747e-06,
"loss": 0.0323,
"step": 1488
},
{
"epoch": 3.62,
"learning_rate": 9.372671369894661e-06,
"loss": 0.0294,
"step": 1489
},
{
"epoch": 3.63,
"learning_rate": 9.341540752669235e-06,
"loss": 0.0316,
"step": 1490
},
{
"epoch": 3.63,
"learning_rate": 9.310450037795435e-06,
"loss": 0.0279,
"step": 1491
},
{
"epoch": 3.63,
"learning_rate": 9.279399304501526e-06,
"loss": 0.0302,
"step": 1492
},
{
"epoch": 3.63,
"learning_rate": 9.248388631913887e-06,
"loss": 0.0302,
"step": 1493
},
{
"epoch": 3.64,
"learning_rate": 9.21741809905682e-06,
"loss": 0.0298,
"step": 1494
},
{
"epoch": 3.64,
"learning_rate": 9.186487784852349e-06,
"loss": 0.029,
"step": 1495
},
{
"epoch": 3.64,
"learning_rate": 9.155597768119978e-06,
"loss": 0.0307,
"step": 1496
},
{
"epoch": 3.64,
"learning_rate": 9.124748127576552e-06,
"loss": 0.0299,
"step": 1497
},
{
"epoch": 3.65,
"learning_rate": 9.09393894183601e-06,
"loss": 0.0307,
"step": 1498
},
{
"epoch": 3.65,
"learning_rate": 9.063170289409192e-06,
"loss": 0.0293,
"step": 1499
},
{
"epoch": 3.65,
"learning_rate": 9.032442248703666e-06,
"loss": 0.0323,
"step": 1500
},
{
"epoch": 3.65,
"learning_rate": 9.001754898023512e-06,
"loss": 0.0289,
"step": 1501
},
{
"epoch": 3.66,
"learning_rate": 8.971108315569094e-06,
"loss": 0.0323,
"step": 1502
},
{
"epoch": 3.66,
"learning_rate": 8.940502579436913e-06,
"loss": 0.0276,
"step": 1503
},
{
"epoch": 3.66,
"learning_rate": 8.90993776761937e-06,
"loss": 0.0294,
"step": 1504
},
{
"epoch": 3.66,
"learning_rate": 8.879413958004566e-06,
"loss": 0.036,
"step": 1505
},
{
"epoch": 3.67,
"learning_rate": 8.848931228376136e-06,
"loss": 0.0305,
"step": 1506
},
{
"epoch": 3.67,
"learning_rate": 8.818489656413043e-06,
"loss": 0.0326,
"step": 1507
},
{
"epoch": 3.67,
"learning_rate": 8.788089319689324e-06,
"loss": 0.0323,
"step": 1508
},
{
"epoch": 3.67,
"learning_rate": 8.757730295673985e-06,
"loss": 0.0318,
"step": 1509
},
{
"epoch": 3.68,
"learning_rate": 8.727412661730724e-06,
"loss": 0.0285,
"step": 1510
},
{
"epoch": 3.68,
"learning_rate": 8.697136495117763e-06,
"loss": 0.0315,
"step": 1511
},
{
"epoch": 3.68,
"learning_rate": 8.666901872987676e-06,
"loss": 0.0302,
"step": 1512
},
{
"epoch": 3.68,
"learning_rate": 8.63670887238716e-06,
"loss": 0.0298,
"step": 1513
},
{
"epoch": 3.68,
"learning_rate": 8.606557570256843e-06,
"loss": 0.0332,
"step": 1514
},
{
"epoch": 3.69,
"learning_rate": 8.576448043431082e-06,
"loss": 0.0327,
"step": 1515
},
{
"epoch": 3.69,
"learning_rate": 8.546380368637812e-06,
"loss": 0.0314,
"step": 1516
},
{
"epoch": 3.69,
"learning_rate": 8.51635462249828e-06,
"loss": 0.0287,
"step": 1517
},
{
"epoch": 3.69,
"learning_rate": 8.486370881526917e-06,
"loss": 0.0282,
"step": 1518
},
{
"epoch": 3.7,
"learning_rate": 8.456429222131082e-06,
"loss": 0.0327,
"step": 1519
},
{
"epoch": 3.7,
"learning_rate": 8.426529720610934e-06,
"loss": 0.0285,
"step": 1520
},
{
"epoch": 3.7,
"learning_rate": 8.396672453159163e-06,
"loss": 0.0258,
"step": 1521
},
{
"epoch": 3.7,
"learning_rate": 8.36685749586087e-06,
"loss": 0.0292,
"step": 1522
},
{
"epoch": 3.71,
"learning_rate": 8.337084924693303e-06,
"loss": 0.0266,
"step": 1523
},
{
"epoch": 3.71,
"learning_rate": 8.307354815525731e-06,
"loss": 0.0299,
"step": 1524
},
{
"epoch": 3.71,
"learning_rate": 8.277667244119187e-06,
"loss": 0.0293,
"step": 1525
},
{
"epoch": 3.71,
"learning_rate": 8.24802228612633e-06,
"loss": 0.0298,
"step": 1526
},
{
"epoch": 3.72,
"learning_rate": 8.218420017091208e-06,
"loss": 0.0274,
"step": 1527
},
{
"epoch": 3.72,
"learning_rate": 8.188860512449107e-06,
"loss": 0.0281,
"step": 1528
},
{
"epoch": 3.72,
"learning_rate": 8.159343847526308e-06,
"loss": 0.026,
"step": 1529
},
{
"epoch": 3.72,
"learning_rate": 8.129870097539951e-06,
"loss": 0.0282,
"step": 1530
},
{
"epoch": 3.73,
"learning_rate": 8.100439337597798e-06,
"loss": 0.0328,
"step": 1531
},
{
"epoch": 3.73,
"learning_rate": 8.071051642698074e-06,
"loss": 0.0301,
"step": 1532
},
{
"epoch": 3.73,
"learning_rate": 8.041707087729244e-06,
"loss": 0.0283,
"step": 1533
},
{
"epoch": 3.73,
"learning_rate": 8.012405747469862e-06,
"loss": 0.032,
"step": 1534
},
{
"epoch": 3.74,
"learning_rate": 7.983147696588339e-06,
"loss": 0.0303,
"step": 1535
},
{
"epoch": 3.74,
"learning_rate": 7.953933009642773e-06,
"loss": 0.0317,
"step": 1536
},
{
"epoch": 3.74,
"learning_rate": 7.924761761080768e-06,
"loss": 0.029,
"step": 1537
},
{
"epoch": 3.74,
"learning_rate": 7.895634025239243e-06,
"loss": 0.034,
"step": 1538
},
{
"epoch": 3.75,
"learning_rate": 7.866549876344201e-06,
"loss": 0.0298,
"step": 1539
},
{
"epoch": 3.75,
"learning_rate": 7.837509388510611e-06,
"loss": 0.03,
"step": 1540
},
{
"epoch": 3.75,
"learning_rate": 7.808512635742157e-06,
"loss": 0.027,
"step": 1541
},
{
"epoch": 3.75,
"learning_rate": 7.779559691931066e-06,
"loss": 0.0287,
"step": 1542
},
{
"epoch": 3.76,
"learning_rate": 7.750650630857947e-06,
"loss": 0.0319,
"step": 1543
},
{
"epoch": 3.76,
"learning_rate": 7.721785526191588e-06,
"loss": 0.0297,
"step": 1544
},
{
"epoch": 3.76,
"learning_rate": 7.692964451488734e-06,
"loss": 0.0292,
"step": 1545
},
{
"epoch": 3.76,
"learning_rate": 7.66418748019396e-06,
"loss": 0.0264,
"step": 1546
},
{
"epoch": 3.77,
"learning_rate": 7.63545468563943e-06,
"loss": 0.0296,
"step": 1547
},
{
"epoch": 3.77,
"learning_rate": 7.606766141044733e-06,
"loss": 0.0279,
"step": 1548
},
{
"epoch": 3.77,
"learning_rate": 7.578121919516712e-06,
"loss": 0.0273,
"step": 1549
},
{
"epoch": 3.77,
"learning_rate": 7.54952209404926e-06,
"loss": 0.0302,
"step": 1550
},
{
"epoch": 3.77,
"learning_rate": 7.520966737523116e-06,
"loss": 0.024,
"step": 1551
},
{
"epoch": 3.78,
"learning_rate": 7.4924559227057265e-06,
"loss": 0.033,
"step": 1552
},
{
"epoch": 3.78,
"learning_rate": 7.463989722251014e-06,
"loss": 0.0283,
"step": 1553
},
{
"epoch": 3.78,
"learning_rate": 7.435568208699203e-06,
"loss": 0.0268,
"step": 1554
},
{
"epoch": 3.78,
"learning_rate": 7.407191454476667e-06,
"loss": 0.0264,
"step": 1555
},
{
"epoch": 3.79,
"learning_rate": 7.37885953189571e-06,
"loss": 0.0283,
"step": 1556
},
{
"epoch": 3.79,
"learning_rate": 7.350572513154377e-06,
"loss": 0.0288,
"step": 1557
},
{
"epoch": 3.79,
"learning_rate": 7.3223304703363135e-06,
"loss": 0.0291,
"step": 1558
},
{
"epoch": 3.79,
"learning_rate": 7.294133475410528e-06,
"loss": 0.0288,
"step": 1559
},
{
"epoch": 3.8,
"learning_rate": 7.265981600231234e-06,
"loss": 0.029,
"step": 1560
},
{
"epoch": 3.8,
"learning_rate": 7.23787491653769e-06,
"loss": 0.0321,
"step": 1561
},
{
"epoch": 3.8,
"learning_rate": 7.209813495953963e-06,
"loss": 0.0284,
"step": 1562
},
{
"epoch": 3.8,
"learning_rate": 7.181797409988802e-06,
"loss": 0.0291,
"step": 1563
},
{
"epoch": 3.81,
"learning_rate": 7.153826730035423e-06,
"loss": 0.0289,
"step": 1564
},
{
"epoch": 3.81,
"learning_rate": 7.125901527371329e-06,
"loss": 0.0286,
"step": 1565
},
{
"epoch": 3.81,
"learning_rate": 7.0980218731581255e-06,
"loss": 0.0292,
"step": 1566
},
{
"epoch": 3.81,
"learning_rate": 7.070187838441369e-06,
"loss": 0.0299,
"step": 1567
},
{
"epoch": 3.82,
"learning_rate": 7.042399494150342e-06,
"loss": 0.0293,
"step": 1568
},
{
"epoch": 3.82,
"learning_rate": 7.0146569110979086e-06,
"loss": 0.0291,
"step": 1569
},
{
"epoch": 3.82,
"learning_rate": 6.986960159980327e-06,
"loss": 0.0306,
"step": 1570
},
{
"epoch": 3.82,
"learning_rate": 6.959309311377038e-06,
"loss": 0.0302,
"step": 1571
},
{
"epoch": 3.83,
"learning_rate": 6.931704435750522e-06,
"loss": 0.0352,
"step": 1572
},
{
"epoch": 3.83,
"learning_rate": 6.904145603446116e-06,
"loss": 0.0274,
"step": 1573
},
{
"epoch": 3.83,
"learning_rate": 6.876632884691803e-06,
"loss": 0.0321,
"step": 1574
},
{
"epoch": 3.83,
"learning_rate": 6.849166349598079e-06,
"loss": 0.0282,
"step": 1575
},
{
"epoch": 3.84,
"learning_rate": 6.821746068157741e-06,
"loss": 0.0289,
"step": 1576
},
{
"epoch": 3.84,
"learning_rate": 6.794372110245717e-06,
"loss": 0.029,
"step": 1577
},
{
"epoch": 3.84,
"learning_rate": 6.767044545618878e-06,
"loss": 0.0286,
"step": 1578
},
{
"epoch": 3.84,
"learning_rate": 6.739763443915895e-06,
"loss": 0.0288,
"step": 1579
},
{
"epoch": 3.85,
"learning_rate": 6.712528874657012e-06,
"loss": 0.0274,
"step": 1580
},
{
"epoch": 3.85,
"learning_rate": 6.685340907243915e-06,
"loss": 0.0312,
"step": 1581
},
{
"epoch": 3.85,
"learning_rate": 6.658199610959537e-06,
"loss": 0.032,
"step": 1582
},
{
"epoch": 3.85,
"learning_rate": 6.6311050549678595e-06,
"loss": 0.0284,
"step": 1583
},
{
"epoch": 3.86,
"learning_rate": 6.604057308313763e-06,
"loss": 0.0303,
"step": 1584
},
{
"epoch": 3.86,
"learning_rate": 6.577056439922857e-06,
"loss": 0.0278,
"step": 1585
},
{
"epoch": 3.86,
"learning_rate": 6.55010251860127e-06,
"loss": 0.0302,
"step": 1586
},
{
"epoch": 3.86,
"learning_rate": 6.523195613035521e-06,
"loss": 0.0285,
"step": 1587
},
{
"epoch": 3.86,
"learning_rate": 6.496335791792293e-06,
"loss": 0.0279,
"step": 1588
},
{
"epoch": 3.87,
"learning_rate": 6.469523123318308e-06,
"loss": 0.0302,
"step": 1589
},
{
"epoch": 3.87,
"learning_rate": 6.442757675940109e-06,
"loss": 0.0319,
"step": 1590
},
{
"epoch": 3.87,
"learning_rate": 6.4160395178639196e-06,
"loss": 0.0297,
"step": 1591
},
{
"epoch": 3.87,
"learning_rate": 6.389368717175448e-06,
"loss": 0.0272,
"step": 1592
},
{
"epoch": 3.88,
"learning_rate": 6.362745341839729e-06,
"loss": 0.0292,
"step": 1593
},
{
"epoch": 3.88,
"learning_rate": 6.336169459700933e-06,
"loss": 0.027,
"step": 1594
},
{
"epoch": 3.88,
"learning_rate": 6.309641138482222e-06,
"loss": 0.0302,
"step": 1595
},
{
"epoch": 3.88,
"learning_rate": 6.283160445785532e-06,
"loss": 0.0258,
"step": 1596
},
{
"epoch": 3.89,
"learning_rate": 6.25672744909146e-06,
"loss": 0.028,
"step": 1597
},
{
"epoch": 3.89,
"learning_rate": 6.230342215759028e-06,
"loss": 0.0255,
"step": 1598
},
{
"epoch": 3.89,
"learning_rate": 6.204004813025568e-06,
"loss": 0.0309,
"step": 1599
},
{
"epoch": 3.89,
"learning_rate": 6.177715308006505e-06,
"loss": 0.0329,
"step": 1600
},
{
"epoch": 3.9,
"learning_rate": 6.151473767695229e-06,
"loss": 0.0294,
"step": 1601
},
{
"epoch": 3.9,
"learning_rate": 6.125280258962873e-06,
"loss": 0.0273,
"step": 1602
},
{
"epoch": 3.9,
"learning_rate": 6.099134848558208e-06,
"loss": 0.0266,
"step": 1603
},
{
"epoch": 3.9,
"learning_rate": 6.073037603107404e-06,
"loss": 0.0281,
"step": 1604
},
{
"epoch": 3.91,
"learning_rate": 6.0469885891139e-06,
"loss": 0.0286,
"step": 1605
},
{
"epoch": 3.91,
"learning_rate": 6.020987872958236e-06,
"loss": 0.0251,
"step": 1606
},
{
"epoch": 3.91,
"learning_rate": 5.995035520897882e-06,
"loss": 0.0266,
"step": 1607
},
{
"epoch": 3.91,
"learning_rate": 5.969131599067044e-06,
"loss": 0.0288,
"step": 1608
},
{
"epoch": 3.92,
"learning_rate": 5.943276173476509e-06,
"loss": 0.0282,
"step": 1609
},
{
"epoch": 3.92,
"learning_rate": 5.91746931001351e-06,
"loss": 0.0272,
"step": 1610
},
{
"epoch": 3.92,
"learning_rate": 5.891711074441495e-06,
"loss": 0.0275,
"step": 1611
},
{
"epoch": 3.92,
"learning_rate": 5.866001532400023e-06,
"loss": 0.0284,
"step": 1612
},
{
"epoch": 3.93,
"learning_rate": 5.84034074940456e-06,
"loss": 0.029,
"step": 1613
},
{
"epoch": 3.93,
"learning_rate": 5.814728790846308e-06,
"loss": 0.0275,
"step": 1614
},
{
"epoch": 3.93,
"learning_rate": 5.789165721992052e-06,
"loss": 0.0286,
"step": 1615
},
{
"epoch": 3.93,
"learning_rate": 5.763651607984008e-06,
"loss": 0.0252,
"step": 1616
},
{
"epoch": 3.94,
"learning_rate": 5.738186513839619e-06,
"loss": 0.0278,
"step": 1617
},
{
"epoch": 3.94,
"learning_rate": 5.712770504451426e-06,
"loss": 0.0271,
"step": 1618
},
{
"epoch": 3.94,
"learning_rate": 5.687403644586891e-06,
"loss": 0.0298,
"step": 1619
},
{
"epoch": 3.94,
"learning_rate": 5.662085998888214e-06,
"loss": 0.0299,
"step": 1620
},
{
"epoch": 3.95,
"learning_rate": 5.636817631872185e-06,
"loss": 0.0254,
"step": 1621
},
{
"epoch": 3.95,
"learning_rate": 5.611598607930032e-06,
"loss": 0.027,
"step": 1622
},
{
"epoch": 3.95,
"learning_rate": 5.586428991327223e-06,
"loss": 0.0297,
"step": 1623
},
{
"epoch": 3.95,
"learning_rate": 5.561308846203333e-06,
"loss": 0.0311,
"step": 1624
},
{
"epoch": 3.95,
"learning_rate": 5.5362382365718775e-06,
"loss": 0.0297,
"step": 1625
},
{
"epoch": 3.96,
"learning_rate": 5.511217226320125e-06,
"loss": 0.0296,
"step": 1626
},
{
"epoch": 3.96,
"learning_rate": 5.486245879208945e-06,
"loss": 0.0279,
"step": 1627
},
{
"epoch": 3.96,
"learning_rate": 5.46132425887268e-06,
"loss": 0.0267,
"step": 1628
},
{
"epoch": 3.96,
"learning_rate": 5.436452428818919e-06,
"loss": 0.0269,
"step": 1629
},
{
"epoch": 3.97,
"learning_rate": 5.411630452428395e-06,
"loss": 0.0305,
"step": 1630
},
{
"epoch": 3.97,
"learning_rate": 5.386858392954799e-06,
"loss": 0.026,
"step": 1631
},
{
"epoch": 3.97,
"learning_rate": 5.362136313524607e-06,
"loss": 0.0271,
"step": 1632
},
{
"epoch": 3.97,
"learning_rate": 5.337464277136925e-06,
"loss": 0.0284,
"step": 1633
},
{
"epoch": 3.98,
"learning_rate": 5.3128423466633634e-06,
"loss": 0.0275,
"step": 1634
},
{
"epoch": 3.98,
"learning_rate": 5.288270584847813e-06,
"loss": 0.0271,
"step": 1635
},
{
"epoch": 3.98,
"learning_rate": 5.263749054306347e-06,
"loss": 0.028,
"step": 1636
},
{
"epoch": 3.98,
"learning_rate": 5.23927781752703e-06,
"loss": 0.0257,
"step": 1637
},
{
"epoch": 3.99,
"learning_rate": 5.214856936869752e-06,
"loss": 0.0269,
"step": 1638
},
{
"epoch": 3.99,
"learning_rate": 5.1904864745660835e-06,
"loss": 0.0233,
"step": 1639
},
{
"epoch": 3.99,
"learning_rate": 5.166166492719124e-06,
"loss": 0.0302,
"step": 1640
},
{
"epoch": 3.99,
"learning_rate": 5.141897053303327e-06,
"loss": 0.0278,
"step": 1641
},
{
"epoch": 4.0,
"learning_rate": 5.117678218164338e-06,
"loss": 0.0327,
"step": 1642
},
{
"epoch": 4.0,
"learning_rate": 5.0935100490188795e-06,
"loss": 0.0293,
"step": 1643
},
{
"epoch": 4.0,
"eval_loss": 0.9072719812393188,
"eval_runtime": 116.1375,
"eval_samples_per_second": 6.561,
"eval_steps_per_second": 0.413,
"step": 1643
},
{
"epoch": 4.0,
"learning_rate": 5.0693926074545315e-06,
"loss": 0.0203,
"step": 1644
},
{
"epoch": 4.0,
"learning_rate": 5.045325954929614e-06,
"loss": 0.0074,
"step": 1645
},
{
"epoch": 4.01,
"learning_rate": 5.0213101527730345e-06,
"loss": 0.0079,
"step": 1646
},
{
"epoch": 4.01,
"learning_rate": 4.9973452621841e-06,
"loss": 0.0079,
"step": 1647
},
{
"epoch": 4.01,
"learning_rate": 4.973431344232377e-06,
"loss": 0.0085,
"step": 1648
},
{
"epoch": 4.01,
"learning_rate": 4.9495684598575735e-06,
"loss": 0.0075,
"step": 1649
},
{
"epoch": 4.02,
"learning_rate": 4.925756669869314e-06,
"loss": 0.0068,
"step": 1650
},
{
"epoch": 4.02,
"learning_rate": 4.9019960349470265e-06,
"loss": 0.0074,
"step": 1651
},
{
"epoch": 4.02,
"learning_rate": 4.878286615639791e-06,
"loss": 0.0053,
"step": 1652
},
{
"epoch": 4.02,
"learning_rate": 4.8546284723661715e-06,
"loss": 0.0074,
"step": 1653
},
{
"epoch": 4.03,
"learning_rate": 4.8310216654140425e-06,
"loss": 0.005,
"step": 1654
},
{
"epoch": 4.03,
"learning_rate": 4.80746625494051e-06,
"loss": 0.006,
"step": 1655
},
{
"epoch": 4.03,
"learning_rate": 4.7839623009716615e-06,
"loss": 0.0054,
"step": 1656
},
{
"epoch": 4.03,
"learning_rate": 4.760509863402468e-06,
"loss": 0.0068,
"step": 1657
},
{
"epoch": 4.04,
"learning_rate": 4.737109001996637e-06,
"loss": 0.0047,
"step": 1658
},
{
"epoch": 4.04,
"learning_rate": 4.7137597763864286e-06,
"loss": 0.0056,
"step": 1659
},
{
"epoch": 4.04,
"learning_rate": 4.690462246072516e-06,
"loss": 0.0059,
"step": 1660
},
{
"epoch": 4.04,
"learning_rate": 4.667216470423858e-06,
"loss": 0.0051,
"step": 1661
},
{
"epoch": 4.05,
"learning_rate": 4.644022508677518e-06,
"loss": 0.0063,
"step": 1662
},
{
"epoch": 4.05,
"learning_rate": 4.620880419938511e-06,
"loss": 0.0059,
"step": 1663
},
{
"epoch": 4.05,
"learning_rate": 4.5977902631796855e-06,
"loss": 0.0067,
"step": 1664
},
{
"epoch": 4.05,
"learning_rate": 4.574752097241533e-06,
"loss": 0.005,
"step": 1665
},
{
"epoch": 4.05,
"learning_rate": 4.551765980832059e-06,
"loss": 0.0048,
"step": 1666
},
{
"epoch": 4.06,
"learning_rate": 4.528831972526645e-06,
"loss": 0.0066,
"step": 1667
},
{
"epoch": 4.06,
"learning_rate": 4.505950130767883e-06,
"loss": 0.0045,
"step": 1668
},
{
"epoch": 4.06,
"learning_rate": 4.483120513865411e-06,
"loss": 0.0046,
"step": 1669
},
{
"epoch": 4.06,
"learning_rate": 4.460343179995807e-06,
"loss": 0.006,
"step": 1670
},
{
"epoch": 4.07,
"learning_rate": 4.4376181872024e-06,
"loss": 0.0047,
"step": 1671
},
{
"epoch": 4.07,
"learning_rate": 4.4149455933951396e-06,
"loss": 0.0059,
"step": 1672
},
{
"epoch": 4.07,
"learning_rate": 4.392325456350454e-06,
"loss": 0.0052,
"step": 1673
},
{
"epoch": 4.07,
"learning_rate": 4.369757833711105e-06,
"loss": 0.0062,
"step": 1674
},
{
"epoch": 4.08,
"learning_rate": 4.347242782986008e-06,
"loss": 0.004,
"step": 1675
},
{
"epoch": 4.08,
"learning_rate": 4.324780361550129e-06,
"loss": 0.006,
"step": 1676
},
{
"epoch": 4.08,
"learning_rate": 4.302370626644314e-06,
"loss": 0.0052,
"step": 1677
},
{
"epoch": 4.08,
"learning_rate": 4.280013635375138e-06,
"loss": 0.0048,
"step": 1678
},
{
"epoch": 4.09,
"learning_rate": 4.2577094447147856e-06,
"loss": 0.0047,
"step": 1679
},
{
"epoch": 4.09,
"learning_rate": 4.235458111500889e-06,
"loss": 0.0046,
"step": 1680
},
{
"epoch": 4.09,
"learning_rate": 4.213259692436367e-06,
"loss": 0.0047,
"step": 1681
},
{
"epoch": 4.09,
"learning_rate": 4.19111424408932e-06,
"loss": 0.0048,
"step": 1682
},
{
"epoch": 4.1,
"learning_rate": 4.169021822892849e-06,
"loss": 0.0045,
"step": 1683
},
{
"epoch": 4.1,
"learning_rate": 4.146982485144921e-06,
"loss": 0.0083,
"step": 1684
},
{
"epoch": 4.1,
"learning_rate": 4.124996287008245e-06,
"loss": 0.0056,
"step": 1685
},
{
"epoch": 4.1,
"learning_rate": 4.103063284510117e-06,
"loss": 0.0061,
"step": 1686
},
{
"epoch": 4.11,
"learning_rate": 4.081183533542262e-06,
"loss": 0.0058,
"step": 1687
},
{
"epoch": 4.11,
"learning_rate": 4.059357089860702e-06,
"loss": 0.0057,
"step": 1688
},
{
"epoch": 4.11,
"learning_rate": 4.037584009085635e-06,
"loss": 0.005,
"step": 1689
},
{
"epoch": 4.11,
"learning_rate": 4.015864346701251e-06,
"loss": 0.0049,
"step": 1690
},
{
"epoch": 4.12,
"learning_rate": 3.994198158055637e-06,
"loss": 0.0048,
"step": 1691
},
{
"epoch": 4.12,
"learning_rate": 3.972585498360606e-06,
"loss": 0.0067,
"step": 1692
},
{
"epoch": 4.12,
"learning_rate": 3.951026422691556e-06,
"loss": 0.0054,
"step": 1693
},
{
"epoch": 4.12,
"learning_rate": 3.929520985987334e-06,
"loss": 0.0043,
"step": 1694
},
{
"epoch": 4.13,
"learning_rate": 3.908069243050122e-06,
"loss": 0.0051,
"step": 1695
},
{
"epoch": 4.13,
"learning_rate": 3.886671248545243e-06,
"loss": 0.0045,
"step": 1696
},
{
"epoch": 4.13,
"learning_rate": 3.865327057001078e-06,
"loss": 0.0054,
"step": 1697
},
{
"epoch": 4.13,
"learning_rate": 3.8440367228088995e-06,
"loss": 0.0051,
"step": 1698
},
{
"epoch": 4.14,
"learning_rate": 3.8228003002227255e-06,
"loss": 0.0048,
"step": 1699
},
{
"epoch": 4.14,
"learning_rate": 3.801617843359187e-06,
"loss": 0.0048,
"step": 1700
},
{
"epoch": 4.14,
"learning_rate": 3.7804894061974183e-06,
"loss": 0.0059,
"step": 1701
},
{
"epoch": 4.14,
"learning_rate": 3.7594150425788675e-06,
"loss": 0.0057,
"step": 1702
},
{
"epoch": 4.14,
"learning_rate": 3.738394806207207e-06,
"loss": 0.0057,
"step": 1703
},
{
"epoch": 4.15,
"learning_rate": 3.7174287506481776e-06,
"loss": 0.0046,
"step": 1704
},
{
"epoch": 4.15,
"learning_rate": 3.6965169293294357e-06,
"loss": 0.0039,
"step": 1705
},
{
"epoch": 4.15,
"learning_rate": 3.67565939554044e-06,
"loss": 0.0045,
"step": 1706
},
{
"epoch": 4.15,
"learning_rate": 3.654856202432319e-06,
"loss": 0.0069,
"step": 1707
},
{
"epoch": 4.16,
"learning_rate": 3.6341074030177114e-06,
"loss": 0.0053,
"step": 1708
},
{
"epoch": 4.16,
"learning_rate": 3.6134130501706417e-06,
"loss": 0.0061,
"step": 1709
},
{
"epoch": 4.16,
"learning_rate": 3.592773196626417e-06,
"loss": 0.0049,
"step": 1710
},
{
"epoch": 4.16,
"learning_rate": 3.5721878949814323e-06,
"loss": 0.0051,
"step": 1711
},
{
"epoch": 4.17,
"learning_rate": 3.5516571976930786e-06,
"loss": 0.0053,
"step": 1712
},
{
"epoch": 4.17,
"learning_rate": 3.531181157079605e-06,
"loss": 0.0045,
"step": 1713
},
{
"epoch": 4.17,
"learning_rate": 3.5107598253199758e-06,
"loss": 0.0048,
"step": 1714
},
{
"epoch": 4.17,
"learning_rate": 3.4903932544537276e-06,
"loss": 0.0044,
"step": 1715
},
{
"epoch": 4.18,
"learning_rate": 3.470081496380881e-06,
"loss": 0.0047,
"step": 1716
},
{
"epoch": 4.18,
"learning_rate": 3.4498246028617536e-06,
"loss": 0.0041,
"step": 1717
},
{
"epoch": 4.18,
"learning_rate": 3.4296226255168485e-06,
"loss": 0.0053,
"step": 1718
},
{
"epoch": 4.18,
"learning_rate": 3.409475615826746e-06,
"loss": 0.0057,
"step": 1719
},
{
"epoch": 4.19,
"learning_rate": 3.3893836251319422e-06,
"loss": 0.0044,
"step": 1720
},
{
"epoch": 4.19,
"learning_rate": 3.3693467046327117e-06,
"loss": 0.005,
"step": 1721
},
{
"epoch": 4.19,
"learning_rate": 3.3493649053890326e-06,
"loss": 0.0045,
"step": 1722
},
{
"epoch": 4.19,
"learning_rate": 3.32943827832039e-06,
"loss": 0.0049,
"step": 1723
},
{
"epoch": 4.2,
"learning_rate": 3.309566874205672e-06,
"loss": 0.0052,
"step": 1724
},
{
"epoch": 4.2,
"learning_rate": 3.289750743683062e-06,
"loss": 0.0046,
"step": 1725
},
{
"epoch": 4.2,
"learning_rate": 3.2699899372498733e-06,
"loss": 0.0048,
"step": 1726
},
{
"epoch": 4.2,
"learning_rate": 3.2502845052624354e-06,
"loss": 0.0055,
"step": 1727
},
{
"epoch": 4.21,
"learning_rate": 3.230634497935983e-06,
"loss": 0.0042,
"step": 1728
},
{
"epoch": 4.21,
"learning_rate": 3.211039965344512e-06,
"loss": 0.0037,
"step": 1729
},
{
"epoch": 4.21,
"learning_rate": 3.1915009574206262e-06,
"loss": 0.0051,
"step": 1730
},
{
"epoch": 4.21,
"learning_rate": 3.17201752395547e-06,
"loss": 0.0051,
"step": 1731
},
{
"epoch": 4.22,
"learning_rate": 3.1525897145985472e-06,
"loss": 0.0041,
"step": 1732
},
{
"epoch": 4.22,
"learning_rate": 3.133217578857611e-06,
"loss": 0.0048,
"step": 1733
},
{
"epoch": 4.22,
"learning_rate": 3.113901166098562e-06,
"loss": 0.0053,
"step": 1734
},
{
"epoch": 4.22,
"learning_rate": 3.0946405255452947e-06,
"loss": 0.0044,
"step": 1735
},
{
"epoch": 4.23,
"learning_rate": 3.075435706279567e-06,
"loss": 0.0044,
"step": 1736
},
{
"epoch": 4.23,
"learning_rate": 3.0562867572409034e-06,
"loss": 0.0051,
"step": 1737
},
{
"epoch": 4.23,
"learning_rate": 3.037193727226445e-06,
"loss": 0.0046,
"step": 1738
},
{
"epoch": 4.23,
"learning_rate": 3.018156664890834e-06,
"loss": 0.0039,
"step": 1739
},
{
"epoch": 4.23,
"learning_rate": 2.9991756187461e-06,
"loss": 0.0054,
"step": 1740
},
{
"epoch": 4.24,
"learning_rate": 2.9802506371615246e-06,
"loss": 0.0043,
"step": 1741
},
{
"epoch": 4.24,
"learning_rate": 2.961381768363511e-06,
"loss": 0.0049,
"step": 1742
},
{
"epoch": 4.24,
"learning_rate": 2.942569060435482e-06,
"loss": 0.0047,
"step": 1743
},
{
"epoch": 4.24,
"learning_rate": 2.9238125613177403e-06,
"loss": 0.0047,
"step": 1744
},
{
"epoch": 4.25,
"learning_rate": 2.905112318807346e-06,
"loss": 0.0047,
"step": 1745
},
{
"epoch": 4.25,
"learning_rate": 2.8864683805580133e-06,
"loss": 0.0054,
"step": 1746
},
{
"epoch": 4.25,
"learning_rate": 2.8678807940799744e-06,
"loss": 0.0037,
"step": 1747
},
{
"epoch": 4.25,
"learning_rate": 2.8493496067398483e-06,
"loss": 0.0062,
"step": 1748
},
{
"epoch": 4.26,
"learning_rate": 2.8308748657605522e-06,
"loss": 0.0042,
"step": 1749
},
{
"epoch": 4.26,
"learning_rate": 2.812456618221143e-06,
"loss": 0.006,
"step": 1750
},
{
"epoch": 4.26,
"learning_rate": 2.794094911056719e-06,
"loss": 0.0043,
"step": 1751
},
{
"epoch": 4.26,
"learning_rate": 2.775789791058306e-06,
"loss": 0.0049,
"step": 1752
},
{
"epoch": 4.27,
"learning_rate": 2.757541304872732e-06,
"loss": 0.0052,
"step": 1753
},
{
"epoch": 4.27,
"learning_rate": 2.7393494990024834e-06,
"loss": 0.0052,
"step": 1754
},
{
"epoch": 4.27,
"learning_rate": 2.7212144198056374e-06,
"loss": 0.0065,
"step": 1755
},
{
"epoch": 4.27,
"learning_rate": 2.7031361134956913e-06,
"loss": 0.0049,
"step": 1756
},
{
"epoch": 4.28,
"learning_rate": 2.6851146261414747e-06,
"loss": 0.0063,
"step": 1757
},
{
"epoch": 4.28,
"learning_rate": 2.667150003667032e-06,
"loss": 0.0043,
"step": 1758
},
{
"epoch": 4.28,
"learning_rate": 2.649242291851503e-06,
"loss": 0.0048,
"step": 1759
},
{
"epoch": 4.28,
"learning_rate": 2.631391536328992e-06,
"loss": 0.005,
"step": 1760
},
{
"epoch": 4.29,
"learning_rate": 2.6135977825884533e-06,
"loss": 0.0066,
"step": 1761
},
{
"epoch": 4.29,
"learning_rate": 2.595861075973613e-06,
"loss": 0.0053,
"step": 1762
},
{
"epoch": 4.29,
"learning_rate": 2.578181461682794e-06,
"loss": 0.0031,
"step": 1763
},
{
"epoch": 4.29,
"learning_rate": 2.5605589847688518e-06,
"loss": 0.0028,
"step": 1764
},
{
"epoch": 4.3,
"learning_rate": 2.5429936901390284e-06,
"loss": 0.004,
"step": 1765
},
{
"epoch": 4.3,
"learning_rate": 2.5254856225548544e-06,
"loss": 0.0034,
"step": 1766
},
{
"epoch": 4.3,
"learning_rate": 2.508034826632022e-06,
"loss": 0.0068,
"step": 1767
},
{
"epoch": 4.3,
"learning_rate": 2.4906413468402916e-06,
"loss": 0.0049,
"step": 1768
},
{
"epoch": 4.31,
"learning_rate": 2.4733052275033448e-06,
"loss": 0.0042,
"step": 1769
},
{
"epoch": 4.31,
"learning_rate": 2.4560265127987147e-06,
"loss": 0.0048,
"step": 1770
},
{
"epoch": 4.31,
"learning_rate": 2.4388052467576308e-06,
"loss": 0.0053,
"step": 1771
},
{
"epoch": 4.31,
"learning_rate": 2.4216414732649432e-06,
"loss": 0.0047,
"step": 1772
},
{
"epoch": 4.32,
"learning_rate": 2.40453523605898e-06,
"loss": 0.0046,
"step": 1773
},
{
"epoch": 4.32,
"learning_rate": 2.3874865787314598e-06,
"loss": 0.0043,
"step": 1774
},
{
"epoch": 4.32,
"learning_rate": 2.3704955447273636e-06,
"loss": 0.0053,
"step": 1775
},
{
"epoch": 4.32,
"learning_rate": 2.3535621773448395e-06,
"loss": 0.005,
"step": 1776
},
{
"epoch": 4.32,
"learning_rate": 2.3366865197350733e-06,
"loss": 0.0045,
"step": 1777
},
{
"epoch": 4.33,
"learning_rate": 2.3198686149022013e-06,
"loss": 0.006,
"step": 1778
},
{
"epoch": 4.33,
"learning_rate": 2.303108505703178e-06,
"loss": 0.0047,
"step": 1779
},
{
"epoch": 4.33,
"learning_rate": 2.2864062348476905e-06,
"loss": 0.0038,
"step": 1780
},
{
"epoch": 4.33,
"learning_rate": 2.2697618448980217e-06,
"loss": 0.0054,
"step": 1781
},
{
"epoch": 4.34,
"learning_rate": 2.2531753782689598e-06,
"loss": 0.0052,
"step": 1782
},
{
"epoch": 4.34,
"learning_rate": 2.2366468772276994e-06,
"loss": 0.0038,
"step": 1783
},
{
"epoch": 4.34,
"learning_rate": 2.2201763838937184e-06,
"loss": 0.0043,
"step": 1784
},
{
"epoch": 4.34,
"learning_rate": 2.2037639402386566e-06,
"loss": 0.0038,
"step": 1785
},
{
"epoch": 4.35,
"learning_rate": 2.1874095880862505e-06,
"loss": 0.0052,
"step": 1786
},
{
"epoch": 4.35,
"learning_rate": 2.1711133691121903e-06,
"loss": 0.0052,
"step": 1787
},
{
"epoch": 4.35,
"learning_rate": 2.1548753248440164e-06,
"loss": 0.0055,
"step": 1788
},
{
"epoch": 4.35,
"learning_rate": 2.138695496661039e-06,
"loss": 0.0044,
"step": 1789
},
{
"epoch": 4.36,
"learning_rate": 2.122573925794219e-06,
"loss": 0.0037,
"step": 1790
},
{
"epoch": 4.36,
"learning_rate": 2.1065106533260383e-06,
"loss": 0.0063,
"step": 1791
},
{
"epoch": 4.36,
"learning_rate": 2.0905057201904445e-06,
"loss": 0.0035,
"step": 1792
},
{
"epoch": 4.36,
"learning_rate": 2.0745591671727018e-06,
"loss": 0.0048,
"step": 1793
},
{
"epoch": 4.37,
"learning_rate": 2.0586710349093013e-06,
"loss": 0.0057,
"step": 1794
},
{
"epoch": 4.37,
"learning_rate": 2.0428413638878764e-06,
"loss": 0.0055,
"step": 1795
},
{
"epoch": 4.37,
"learning_rate": 2.027070194447081e-06,
"loss": 0.0052,
"step": 1796
},
{
"epoch": 4.37,
"learning_rate": 2.0113575667764755e-06,
"loss": 0.0036,
"step": 1797
},
{
"epoch": 4.38,
"learning_rate": 1.995703520916456e-06,
"loss": 0.0041,
"step": 1798
},
{
"epoch": 4.38,
"learning_rate": 1.9801080967581263e-06,
"loss": 0.0054,
"step": 1799
},
{
"epoch": 4.38,
"learning_rate": 1.9645713340431997e-06,
"loss": 0.0044,
"step": 1800
},
{
"epoch": 4.38,
"learning_rate": 1.9490932723639165e-06,
"loss": 0.0047,
"step": 1801
},
{
"epoch": 4.39,
"learning_rate": 1.9336739511629233e-06,
"loss": 0.0049,
"step": 1802
},
{
"epoch": 4.39,
"learning_rate": 1.918313409733169e-06,
"loss": 0.0041,
"step": 1803
},
{
"epoch": 4.39,
"learning_rate": 1.9030116872178316e-06,
"loss": 0.0044,
"step": 1804
},
{
"epoch": 4.39,
"learning_rate": 1.8877688226101919e-06,
"loss": 0.0059,
"step": 1805
},
{
"epoch": 4.4,
"learning_rate": 1.8725848547535368e-06,
"loss": 0.0041,
"step": 1806
},
{
"epoch": 4.4,
"learning_rate": 1.8574598223410872e-06,
"loss": 0.0061,
"step": 1807
},
{
"epoch": 4.4,
"learning_rate": 1.8423937639158534e-06,
"loss": 0.0046,
"step": 1808
},
{
"epoch": 4.4,
"learning_rate": 1.82738671787058e-06,
"loss": 0.0043,
"step": 1809
},
{
"epoch": 4.41,
"learning_rate": 1.8124387224476347e-06,
"loss": 0.0052,
"step": 1810
},
{
"epoch": 4.41,
"learning_rate": 1.7975498157388915e-06,
"loss": 0.0051,
"step": 1811
},
{
"epoch": 4.41,
"learning_rate": 1.7827200356856533e-06,
"loss": 0.0046,
"step": 1812
},
{
"epoch": 4.41,
"learning_rate": 1.7679494200785601e-06,
"loss": 0.0069,
"step": 1813
},
{
"epoch": 4.41,
"learning_rate": 1.7532380065574726e-06,
"loss": 0.005,
"step": 1814
},
{
"epoch": 4.42,
"learning_rate": 1.7385858326113918e-06,
"loss": 0.004,
"step": 1815
},
{
"epoch": 4.42,
"learning_rate": 1.7239929355783668e-06,
"loss": 0.004,
"step": 1816
},
{
"epoch": 4.42,
"learning_rate": 1.709459352645379e-06,
"loss": 0.0052,
"step": 1817
},
{
"epoch": 4.42,
"learning_rate": 1.694985120848258e-06,
"loss": 0.0054,
"step": 1818
},
{
"epoch": 4.43,
"learning_rate": 1.6805702770716053e-06,
"loss": 0.0044,
"step": 1819
},
{
"epoch": 4.43,
"learning_rate": 1.6662148580486702e-06,
"loss": 0.0044,
"step": 1820
},
{
"epoch": 4.43,
"learning_rate": 1.6519189003612767e-06,
"loss": 0.0045,
"step": 1821
},
{
"epoch": 4.43,
"learning_rate": 1.6376824404397251e-06,
"loss": 0.0045,
"step": 1822
},
{
"epoch": 4.44,
"learning_rate": 1.6235055145626953e-06,
"loss": 0.0045,
"step": 1823
},
{
"epoch": 4.44,
"learning_rate": 1.6093881588571501e-06,
"loss": 0.0058,
"step": 1824
},
{
"epoch": 4.44,
"learning_rate": 1.5953304092982624e-06,
"loss": 0.0047,
"step": 1825
},
{
"epoch": 4.44,
"learning_rate": 1.581332301709304e-06,
"loss": 0.005,
"step": 1826
},
{
"epoch": 4.45,
"learning_rate": 1.5673938717615605e-06,
"loss": 0.0065,
"step": 1827
},
{
"epoch": 4.45,
"learning_rate": 1.5535151549742528e-06,
"loss": 0.0043,
"step": 1828
},
{
"epoch": 4.45,
"learning_rate": 1.5396961867144206e-06,
"loss": 0.0044,
"step": 1829
},
{
"epoch": 4.45,
"learning_rate": 1.525937002196845e-06,
"loss": 0.004,
"step": 1830
},
{
"epoch": 4.46,
"learning_rate": 1.512237636483982e-06,
"loss": 0.0058,
"step": 1831
},
{
"epoch": 4.46,
"learning_rate": 1.4985981244858254e-06,
"loss": 0.0032,
"step": 1832
},
{
"epoch": 4.46,
"learning_rate": 1.4850185009598645e-06,
"loss": 0.0058,
"step": 1833
},
{
"epoch": 4.46,
"learning_rate": 1.471498800510962e-06,
"loss": 0.0051,
"step": 1834
},
{
"epoch": 4.47,
"learning_rate": 1.4580390575912872e-06,
"loss": 0.005,
"step": 1835
},
{
"epoch": 4.47,
"learning_rate": 1.4446393065002144e-06,
"loss": 0.0044,
"step": 1836
},
{
"epoch": 4.47,
"learning_rate": 1.431299581384249e-06,
"loss": 0.005,
"step": 1837
},
{
"epoch": 4.47,
"learning_rate": 1.4180199162369207e-06,
"loss": 0.0053,
"step": 1838
},
{
"epoch": 4.48,
"learning_rate": 1.4048003448987213e-06,
"loss": 0.0044,
"step": 1839
},
{
"epoch": 4.48,
"learning_rate": 1.3916409010569926e-06,
"loss": 0.0061,
"step": 1840
},
{
"epoch": 4.48,
"learning_rate": 1.378541618245871e-06,
"loss": 0.0043,
"step": 1841
},
{
"epoch": 4.48,
"learning_rate": 1.365502529846166e-06,
"loss": 0.0038,
"step": 1842
},
{
"epoch": 4.49,
"learning_rate": 1.3525236690853093e-06,
"loss": 0.0038,
"step": 1843
},
{
"epoch": 4.49,
"learning_rate": 1.3396050690372418e-06,
"loss": 0.0042,
"step": 1844
},
{
"epoch": 4.49,
"learning_rate": 1.3267467626223606e-06,
"loss": 0.0058,
"step": 1845
},
{
"epoch": 4.49,
"learning_rate": 1.3139487826073937e-06,
"loss": 0.0041,
"step": 1846
},
{
"epoch": 4.5,
"learning_rate": 1.3012111616053618e-06,
"loss": 0.0047,
"step": 1847
},
{
"epoch": 4.5,
"learning_rate": 1.288533932075453e-06,
"loss": 0.0047,
"step": 1848
},
{
"epoch": 4.5,
"learning_rate": 1.2759171263229813e-06,
"loss": 0.0043,
"step": 1849
},
{
"epoch": 4.5,
"learning_rate": 1.2633607764992671e-06,
"loss": 0.0044,
"step": 1850
},
{
"epoch": 4.51,
"learning_rate": 1.250864914601571e-06,
"loss": 0.0059,
"step": 1851
},
{
"epoch": 4.51,
"learning_rate": 1.2384295724730266e-06,
"loss": 0.0062,
"step": 1852
},
{
"epoch": 4.51,
"learning_rate": 1.2260547818025326e-06,
"loss": 0.0041,
"step": 1853
},
{
"epoch": 4.51,
"learning_rate": 1.2137405741246916e-06,
"loss": 0.0065,
"step": 1854
},
{
"epoch": 4.51,
"learning_rate": 1.201486980819716e-06,
"loss": 0.0047,
"step": 1855
},
{
"epoch": 4.52,
"learning_rate": 1.1892940331133612e-06,
"loss": 0.0041,
"step": 1856
},
{
"epoch": 4.52,
"learning_rate": 1.1771617620768394e-06,
"loss": 0.0051,
"step": 1857
},
{
"epoch": 4.52,
"learning_rate": 1.1650901986267365e-06,
"loss": 0.0042,
"step": 1858
},
{
"epoch": 4.52,
"learning_rate": 1.1530793735249458e-06,
"loss": 0.0048,
"step": 1859
},
{
"epoch": 4.53,
"learning_rate": 1.1411293173785726e-06,
"loss": 0.0042,
"step": 1860
},
{
"epoch": 4.53,
"learning_rate": 1.1292400606398635e-06,
"loss": 0.0034,
"step": 1861
},
{
"epoch": 4.53,
"learning_rate": 1.1174116336061468e-06,
"loss": 0.005,
"step": 1862
},
{
"epoch": 4.53,
"learning_rate": 1.1056440664197144e-06,
"loss": 0.0053,
"step": 1863
},
{
"epoch": 4.54,
"learning_rate": 1.0939373890677923e-06,
"loss": 0.0043,
"step": 1864
},
{
"epoch": 4.54,
"learning_rate": 1.0822916313824316e-06,
"loss": 0.0046,
"step": 1865
},
{
"epoch": 4.54,
"learning_rate": 1.0707068230404404e-06,
"loss": 0.0041,
"step": 1866
},
{
"epoch": 4.54,
"learning_rate": 1.059182993563304e-06,
"loss": 0.0043,
"step": 1867
},
{
"epoch": 4.55,
"learning_rate": 1.0477201723171377e-06,
"loss": 0.0052,
"step": 1868
},
{
"epoch": 4.55,
"learning_rate": 1.036318388512561e-06,
"loss": 0.004,
"step": 1869
},
{
"epoch": 4.55,
"learning_rate": 1.0249776712046744e-06,
"loss": 0.0045,
"step": 1870
},
{
"epoch": 4.55,
"learning_rate": 1.0136980492929605e-06,
"loss": 0.0043,
"step": 1871
},
{
"epoch": 4.56,
"learning_rate": 1.0024795515211988e-06,
"loss": 0.0048,
"step": 1872
},
{
"epoch": 4.56,
"learning_rate": 9.913222064774157e-07,
"loss": 0.0039,
"step": 1873
},
{
"epoch": 4.56,
"learning_rate": 9.802260425938099e-07,
"loss": 0.0051,
"step": 1874
},
{
"epoch": 4.56,
"learning_rate": 9.691910881466564e-07,
"loss": 0.0039,
"step": 1875
},
{
"epoch": 4.57,
"learning_rate": 9.58217371256262e-07,
"loss": 0.0055,
"step": 1876
},
{
"epoch": 4.57,
"learning_rate": 9.473049198868822e-07,
"loss": 0.0047,
"step": 1877
},
{
"epoch": 4.57,
"learning_rate": 9.364537618466451e-07,
"loss": 0.0047,
"step": 1878
},
{
"epoch": 4.57,
"learning_rate": 9.25663924787487e-07,
"loss": 0.0053,
"step": 1879
},
{
"epoch": 4.58,
"learning_rate": 9.149354362050805e-07,
"loss": 0.0037,
"step": 1880
},
{
"epoch": 4.58,
"learning_rate": 9.042683234387645e-07,
"loss": 0.0044,
"step": 1881
},
{
"epoch": 4.58,
"learning_rate": 8.936626136714754e-07,
"loss": 0.0058,
"step": 1882
},
{
"epoch": 4.58,
"learning_rate": 8.831183339296751e-07,
"loss": 0.0045,
"step": 1883
},
{
"epoch": 4.59,
"learning_rate": 8.726355110832862e-07,
"loss": 0.0049,
"step": 1884
},
{
"epoch": 4.59,
"learning_rate": 8.622141718456128e-07,
"loss": 0.0042,
"step": 1885
},
{
"epoch": 4.59,
"learning_rate": 8.51854342773295e-07,
"loss": 0.005,
"step": 1886
},
{
"epoch": 4.59,
"learning_rate": 8.415560502662151e-07,
"loss": 0.008,
"step": 1887
},
{
"epoch": 4.6,
"learning_rate": 8.313193205674391e-07,
"loss": 0.0055,
"step": 1888
},
{
"epoch": 4.6,
"learning_rate": 8.211441797631752e-07,
"loss": 0.004,
"step": 1889
},
{
"epoch": 4.6,
"learning_rate": 8.110306537826601e-07,
"loss": 0.0051,
"step": 1890
},
{
"epoch": 4.6,
"learning_rate": 8.009787683981279e-07,
"loss": 0.0055,
"step": 1891
},
{
"epoch": 4.6,
"learning_rate": 7.909885492247359e-07,
"loss": 0.0037,
"step": 1892
},
{
"epoch": 4.61,
"learning_rate": 7.81060021720495e-07,
"loss": 0.0039,
"step": 1893
},
{
"epoch": 4.61,
"learning_rate": 7.711932111862025e-07,
"loss": 0.0047,
"step": 1894
},
{
"epoch": 4.61,
"learning_rate": 7.613881427654013e-07,
"loss": 0.0039,
"step": 1895
},
{
"epoch": 4.61,
"learning_rate": 7.516448414442739e-07,
"loss": 0.0035,
"step": 1896
},
{
"epoch": 4.62,
"learning_rate": 7.419633320516178e-07,
"loss": 0.0054,
"step": 1897
},
{
"epoch": 4.62,
"learning_rate": 7.32343639258759e-07,
"loss": 0.0055,
"step": 1898
},
{
"epoch": 4.62,
"learning_rate": 7.227857875795025e-07,
"loss": 0.0049,
"step": 1899
},
{
"epoch": 4.62,
"learning_rate": 7.13289801370054e-07,
"loss": 0.0048,
"step": 1900
},
{
"epoch": 4.63,
"learning_rate": 7.038557048289818e-07,
"loss": 0.004,
"step": 1901
},
{
"epoch": 4.63,
"learning_rate": 6.944835219971329e-07,
"loss": 0.0051,
"step": 1902
},
{
"epoch": 4.63,
"learning_rate": 6.851732767575752e-07,
"loss": 0.0037,
"step": 1903
},
{
"epoch": 4.63,
"learning_rate": 6.759249928355554e-07,
"loss": 0.0045,
"step": 1904
},
{
"epoch": 4.64,
"learning_rate": 6.667386937984105e-07,
"loss": 0.0055,
"step": 1905
},
{
"epoch": 4.64,
"learning_rate": 6.576144030555259e-07,
"loss": 0.0039,
"step": 1906
},
{
"epoch": 4.64,
"learning_rate": 6.485521438582748e-07,
"loss": 0.0044,
"step": 1907
},
{
"epoch": 4.64,
"learning_rate": 6.395519392999621e-07,
"loss": 0.0048,
"step": 1908
},
{
"epoch": 4.65,
"learning_rate": 6.30613812315739e-07,
"loss": 0.0053,
"step": 1909
},
{
"epoch": 4.65,
"learning_rate": 6.217377856825885e-07,
"loss": 0.0057,
"step": 1910
},
{
"epoch": 4.65,
"learning_rate": 6.129238820192285e-07,
"loss": 0.0045,
"step": 1911
},
{
"epoch": 4.65,
"learning_rate": 6.041721237860676e-07,
"loss": 0.0047,
"step": 1912
},
{
"epoch": 4.66,
"learning_rate": 5.954825332851632e-07,
"loss": 0.0055,
"step": 1913
},
{
"epoch": 4.66,
"learning_rate": 5.868551326601413e-07,
"loss": 0.0037,
"step": 1914
},
{
"epoch": 4.66,
"learning_rate": 5.782899438961487e-07,
"loss": 0.0054,
"step": 1915
},
{
"epoch": 4.66,
"learning_rate": 5.697869888198065e-07,
"loss": 0.0041,
"step": 1916
},
{
"epoch": 4.67,
"learning_rate": 5.613462890991378e-07,
"loss": 0.0047,
"step": 1917
},
{
"epoch": 4.67,
"learning_rate": 5.529678662435228e-07,
"loss": 0.0028,
"step": 1918
},
{
"epoch": 4.67,
"learning_rate": 5.446517416036412e-07,
"loss": 0.0041,
"step": 1919
},
{
"epoch": 4.67,
"learning_rate": 5.363979363714245e-07,
"loss": 0.0039,
"step": 1920
},
{
"epoch": 4.68,
"learning_rate": 5.282064715799895e-07,
"loss": 0.0046,
"step": 1921
},
{
"epoch": 4.68,
"learning_rate": 5.20077368103597e-07,
"loss": 0.0038,
"step": 1922
},
{
"epoch": 4.68,
"learning_rate": 5.120106466575875e-07,
"loss": 0.0053,
"step": 1923
},
{
"epoch": 4.68,
"learning_rate": 5.040063277983287e-07,
"loss": 0.0039,
"step": 1924
},
{
"epoch": 4.69,
"learning_rate": 4.96064431923185e-07,
"loss": 0.0053,
"step": 1925
},
{
"epoch": 4.69,
"learning_rate": 4.881849792704368e-07,
"loss": 0.0041,
"step": 1926
},
{
"epoch": 4.69,
"learning_rate": 4.803679899192392e-07,
"loss": 0.0042,
"step": 1927
},
{
"epoch": 4.69,
"learning_rate": 4.7261348378958016e-07,
"loss": 0.0048,
"step": 1928
},
{
"epoch": 4.69,
"learning_rate": 4.649214806422164e-07,
"loss": 0.0068,
"step": 1929
},
{
"epoch": 4.7,
"learning_rate": 4.5729200007862683e-07,
"loss": 0.0053,
"step": 1930
},
{
"epoch": 4.7,
"learning_rate": 4.497250615409732e-07,
"loss": 0.004,
"step": 1931
},
{
"epoch": 4.7,
"learning_rate": 4.4222068431203634e-07,
"loss": 0.0038,
"step": 1932
},
{
"epoch": 4.7,
"learning_rate": 4.34778887515172e-07,
"loss": 0.005,
"step": 1933
},
{
"epoch": 4.71,
"learning_rate": 4.2739969011426074e-07,
"loss": 0.0055,
"step": 1934
},
{
"epoch": 4.71,
"learning_rate": 4.2008311091366606e-07,
"loss": 0.0051,
"step": 1935
},
{
"epoch": 4.71,
"learning_rate": 4.128291685581792e-07,
"loss": 0.0055,
"step": 1936
},
{
"epoch": 4.71,
"learning_rate": 4.0563788153297755e-07,
"loss": 0.0043,
"step": 1937
},
{
"epoch": 4.72,
"learning_rate": 3.9850926816357157e-07,
"loss": 0.004,
"step": 1938
},
{
"epoch": 4.72,
"learning_rate": 3.9144334661576074e-07,
"loss": 0.0039,
"step": 1939
},
{
"epoch": 4.72,
"learning_rate": 3.8444013489558337e-07,
"loss": 0.0042,
"step": 1940
},
{
"epoch": 4.72,
"learning_rate": 3.774996508492834e-07,
"loss": 0.0054,
"step": 1941
},
{
"epoch": 4.73,
"learning_rate": 3.70621912163252e-07,
"loss": 0.0062,
"step": 1942
},
{
"epoch": 4.73,
"learning_rate": 3.6380693636398343e-07,
"loss": 0.0051,
"step": 1943
},
{
"epoch": 4.73,
"learning_rate": 3.570547408180441e-07,
"loss": 0.0043,
"step": 1944
},
{
"epoch": 4.73,
"learning_rate": 3.503653427320036e-07,
"loss": 0.0035,
"step": 1945
},
{
"epoch": 4.74,
"learning_rate": 3.4373875915241493e-07,
"loss": 0.0057,
"step": 1946
},
{
"epoch": 4.74,
"learning_rate": 3.371750069657592e-07,
"loss": 0.0045,
"step": 1947
},
{
"epoch": 4.74,
"learning_rate": 3.306741028984012e-07,
"loss": 0.0058,
"step": 1948
},
{
"epoch": 4.74,
"learning_rate": 3.242360635165559e-07,
"loss": 0.0044,
"step": 1949
},
{
"epoch": 4.75,
"learning_rate": 3.1786090522624156e-07,
"loss": 0.0047,
"step": 1950
},
{
"epoch": 4.75,
"learning_rate": 3.1154864427322685e-07,
"loss": 0.0034,
"step": 1951
},
{
"epoch": 4.75,
"learning_rate": 3.052992967430085e-07,
"loss": 0.0035,
"step": 1952
},
{
"epoch": 4.75,
"learning_rate": 2.991128785607589e-07,
"loss": 0.0039,
"step": 1953
},
{
"epoch": 4.76,
"learning_rate": 2.9298940549128964e-07,
"loss": 0.0039,
"step": 1954
},
{
"epoch": 4.76,
"learning_rate": 2.8692889313900186e-07,
"loss": 0.0047,
"step": 1955
},
{
"epoch": 4.76,
"learning_rate": 2.8093135694786667e-07,
"loss": 0.0056,
"step": 1956
},
{
"epoch": 4.76,
"learning_rate": 2.749968122013669e-07,
"loss": 0.0051,
"step": 1957
},
{
"epoch": 4.77,
"learning_rate": 2.6912527402246367e-07,
"loss": 0.0043,
"step": 1958
},
{
"epoch": 4.77,
"learning_rate": 2.633167573735579e-07,
"loss": 0.0054,
"step": 1959
},
{
"epoch": 4.77,
"learning_rate": 2.575712770564592e-07,
"loss": 0.0035,
"step": 1960
},
{
"epoch": 4.77,
"learning_rate": 2.5188884771233656e-07,
"loss": 0.0043,
"step": 1961
},
{
"epoch": 4.78,
"learning_rate": 2.4626948382168726e-07,
"loss": 0.0038,
"step": 1962
},
{
"epoch": 4.78,
"learning_rate": 2.407131997043038e-07,
"loss": 0.0039,
"step": 1963
},
{
"epoch": 4.78,
"learning_rate": 2.3522000951922417e-07,
"loss": 0.0031,
"step": 1964
},
{
"epoch": 4.78,
"learning_rate": 2.2978992726471748e-07,
"loss": 0.0045,
"step": 1965
},
{
"epoch": 4.78,
"learning_rate": 2.244229667782205e-07,
"loss": 0.0081,
"step": 1966
},
{
"epoch": 4.79,
"learning_rate": 2.1911914173632643e-07,
"loss": 0.0046,
"step": 1967
},
{
"epoch": 4.79,
"learning_rate": 2.1387846565474045e-07,
"loss": 0.0048,
"step": 1968
},
{
"epoch": 4.79,
"learning_rate": 2.08700951888241e-07,
"loss": 0.0039,
"step": 1969
},
{
"epoch": 4.79,
"learning_rate": 2.0358661363065746e-07,
"loss": 0.0042,
"step": 1970
},
{
"epoch": 4.8,
"learning_rate": 1.985354639148229e-07,
"loss": 0.0051,
"step": 1971
},
{
"epoch": 4.8,
"learning_rate": 1.9354751561254937e-07,
"loss": 0.004,
"step": 1972
},
{
"epoch": 4.8,
"learning_rate": 1.8862278143459144e-07,
"loss": 0.0043,
"step": 1973
},
{
"epoch": 4.8,
"learning_rate": 1.8376127393062158e-07,
"loss": 0.0051,
"step": 1974
},
{
"epoch": 4.81,
"learning_rate": 1.7896300548918832e-07,
"loss": 0.0058,
"step": 1975
},
{
"epoch": 4.81,
"learning_rate": 1.7422798833768572e-07,
"loss": 0.004,
"step": 1976
},
{
"epoch": 4.81,
"learning_rate": 1.6955623454233128e-07,
"loss": 0.0051,
"step": 1977
},
{
"epoch": 4.81,
"learning_rate": 1.6494775600812417e-07,
"loss": 0.0048,
"step": 1978
},
{
"epoch": 4.82,
"learning_rate": 1.6040256447881763e-07,
"loss": 0.0056,
"step": 1979
},
{
"epoch": 4.82,
"learning_rate": 1.559206715368966e-07,
"loss": 0.0039,
"step": 1980
},
{
"epoch": 4.82,
"learning_rate": 1.5150208860354176e-07,
"loss": 0.0053,
"step": 1981
},
{
"epoch": 4.82,
"learning_rate": 1.4714682693859617e-07,
"loss": 0.0039,
"step": 1982
},
{
"epoch": 4.83,
"learning_rate": 1.428548976405486e-07,
"loss": 0.0058,
"step": 1983
},
{
"epoch": 4.83,
"learning_rate": 1.3862631164649475e-07,
"loss": 0.0039,
"step": 1984
},
{
"epoch": 4.83,
"learning_rate": 1.344610797321122e-07,
"loss": 0.0048,
"step": 1985
},
{
"epoch": 4.83,
"learning_rate": 1.3035921251163263e-07,
"loss": 0.0039,
"step": 1986
},
{
"epoch": 4.84,
"learning_rate": 1.2632072043782252e-07,
"loss": 0.0032,
"step": 1987
},
{
"epoch": 4.84,
"learning_rate": 1.223456138019413e-07,
"loss": 0.0042,
"step": 1988
},
{
"epoch": 4.84,
"learning_rate": 1.1843390273373057e-07,
"loss": 0.0056,
"step": 1989
},
{
"epoch": 4.84,
"learning_rate": 1.1458559720137762e-07,
"loss": 0.0042,
"step": 1990
},
{
"epoch": 4.85,
"learning_rate": 1.1080070701149359e-07,
"loss": 0.004,
"step": 1991
},
{
"epoch": 4.85,
"learning_rate": 1.0707924180909379e-07,
"loss": 0.0047,
"step": 1992
},
{
"epoch": 4.85,
"learning_rate": 1.0342121107755898e-07,
"loss": 0.0057,
"step": 1993
},
{
"epoch": 4.85,
"learning_rate": 9.982662413862975e-08,
"loss": 0.0046,
"step": 1994
},
{
"epoch": 4.86,
"learning_rate": 9.629549015237049e-08,
"loss": 0.0037,
"step": 1995
},
{
"epoch": 4.86,
"learning_rate": 9.282781811714159e-08,
"loss": 0.0055,
"step": 1996
},
{
"epoch": 4.86,
"learning_rate": 8.94236168695911e-08,
"loss": 0.004,
"step": 1997
},
{
"epoch": 4.86,
"learning_rate": 8.608289508462708e-08,
"loss": 0.0036,
"step": 1998
},
{
"epoch": 4.87,
"learning_rate": 8.280566127538691e-08,
"loss": 0.0047,
"step": 1999
},
{
"epoch": 4.87,
"learning_rate": 7.959192379322077e-08,
"loss": 0.0043,
"step": 2000
},
{
"epoch": 4.87,
"learning_rate": 7.644169082768326e-08,
"loss": 0.0043,
"step": 2001
},
{
"epoch": 4.87,
"learning_rate": 7.335497040648898e-08,
"loss": 0.004,
"step": 2002
},
{
"epoch": 4.87,
"learning_rate": 7.033177039550698e-08,
"loss": 0.0037,
"step": 2003
},
{
"epoch": 4.88,
"learning_rate": 6.73720984987386e-08,
"loss": 0.0051,
"step": 2004
},
{
"epoch": 4.88,
"learning_rate": 6.4475962258298e-08,
"loss": 0.0046,
"step": 2005
},
{
"epoch": 4.88,
"learning_rate": 6.164336905438994e-08,
"loss": 0.005,
"step": 2006
},
{
"epoch": 4.88,
"learning_rate": 5.8874326105293196e-08,
"loss": 0.0045,
"step": 2007
},
{
"epoch": 4.89,
"learning_rate": 5.616884046734383e-08,
"loss": 0.0051,
"step": 2008
},
{
"epoch": 4.89,
"learning_rate": 5.352691903491303e-08,
"loss": 0.0046,
"step": 2009
},
{
"epoch": 4.89,
"learning_rate": 5.094856854039043e-08,
"loss": 0.0049,
"step": 2010
},
{
"epoch": 4.89,
"learning_rate": 4.8433795554173046e-08,
"loss": 0.0037,
"step": 2011
},
{
"epoch": 4.9,
"learning_rate": 4.598260648463748e-08,
"loss": 0.0039,
"step": 2012
},
{
"epoch": 4.9,
"learning_rate": 4.359500757813717e-08,
"loss": 0.0041,
"step": 2013
},
{
"epoch": 4.9,
"learning_rate": 4.1271004918971847e-08,
"loss": 0.004,
"step": 2014
},
{
"epoch": 4.9,
"learning_rate": 3.901060442938198e-08,
"loss": 0.0056,
"step": 2015
},
{
"epoch": 4.91,
"learning_rate": 3.68138118695377e-08,
"loss": 0.0049,
"step": 2016
},
{
"epoch": 4.91,
"learning_rate": 3.468063283750267e-08,
"loss": 0.0051,
"step": 2017
},
{
"epoch": 4.91,
"learning_rate": 3.2611072769250795e-08,
"loss": 0.0039,
"step": 2018
},
{
"epoch": 4.91,
"learning_rate": 3.0605136938624544e-08,
"loss": 0.004,
"step": 2019
},
{
"epoch": 4.92,
"learning_rate": 2.866283045734053e-08,
"loss": 0.0045,
"step": 2020
},
{
"epoch": 4.92,
"learning_rate": 2.6784158274964498e-08,
"loss": 0.0063,
"step": 2021
},
{
"epoch": 4.92,
"learning_rate": 2.496912517890304e-08,
"loss": 0.0056,
"step": 2022
},
{
"epoch": 4.92,
"learning_rate": 2.3217735794392458e-08,
"loss": 0.0039,
"step": 2023
},
{
"epoch": 4.93,
"learning_rate": 2.152999458449323e-08,
"loss": 0.004,
"step": 2024
},
{
"epoch": 4.93,
"learning_rate": 1.990590585005947e-08,
"loss": 0.0045,
"step": 2025
},
{
"epoch": 4.93,
"learning_rate": 1.834547372975004e-08,
"loss": 0.0046,
"step": 2026
},
{
"epoch": 4.93,
"learning_rate": 1.6848702200000786e-08,
"loss": 0.0051,
"step": 2027
},
{
"epoch": 4.94,
"learning_rate": 1.5415595075027324e-08,
"loss": 0.0054,
"step": 2028
},
{
"epoch": 4.94,
"learning_rate": 1.4046156006808364e-08,
"loss": 0.0048,
"step": 2029
},
{
"epoch": 4.94,
"learning_rate": 1.2740388485071863e-08,
"loss": 0.0041,
"step": 2030
},
{
"epoch": 4.94,
"learning_rate": 1.149829583730333e-08,
"loss": 0.0048,
"step": 2031
},
{
"epoch": 4.95,
"learning_rate": 1.03198812287153e-08,
"loss": 0.0045,
"step": 2032
},
{
"epoch": 4.95,
"learning_rate": 9.20514766225289e-09,
"loss": 0.0046,
"step": 2033
},
{
"epoch": 4.95,
"learning_rate": 8.154097978591014e-09,
"loss": 0.0039,
"step": 2034
},
{
"epoch": 4.95,
"learning_rate": 7.166734856103863e-09,
"loss": 0.0044,
"step": 2035
},
{
"epoch": 4.96,
"learning_rate": 6.243060810892654e-09,
"loss": 0.0043,
"step": 2036
},
{
"epoch": 4.96,
"learning_rate": 5.3830781967412205e-09,
"loss": 0.006,
"step": 2037
},
{
"epoch": 4.96,
"learning_rate": 4.586789205140995e-09,
"loss": 0.004,
"step": 2038
},
{
"epoch": 4.96,
"learning_rate": 3.854195865271582e-09,
"loss": 0.0048,
"step": 2039
},
{
"epoch": 4.97,
"learning_rate": 3.1853000439951987e-09,
"loss": 0.0054,
"step": 2040
},
{
"epoch": 4.97,
"learning_rate": 2.58010344585391e-09,
"loss": 0.0038,
"step": 2041
},
{
"epoch": 4.97,
"learning_rate": 2.038607613066845e-09,
"loss": 0.0041,
"step": 2042
},
{
"epoch": 4.97,
"learning_rate": 1.5608139255246512e-09,
"loss": 0.0047,
"step": 2043
},
{
"epoch": 4.97,
"learning_rate": 1.1467236007867144e-09,
"loss": 0.0036,
"step": 2044
},
{
"epoch": 4.98,
"learning_rate": 7.963376940728351e-10,
"loss": 0.0051,
"step": 2045
},
{
"epoch": 4.98,
"learning_rate": 5.096570982743298e-10,
"loss": 0.0049,
"step": 2046
},
{
"epoch": 4.98,
"learning_rate": 2.866825439346021e-10,
"loss": 0.004,
"step": 2047
},
{
"epoch": 4.98,
"learning_rate": 1.2741459925746935e-10,
"loss": 0.0057,
"step": 2048
},
{
"epoch": 4.99,
"learning_rate": 3.185367010716256e-11,
"loss": 0.004,
"step": 2049
},
{
"epoch": 4.99,
"learning_rate": 0.0,
"loss": 0.0058,
"step": 2050
},
{
"epoch": 4.99,
"eval_loss": 1.1227930784225464,
"eval_runtime": 115.4648,
"eval_samples_per_second": 6.599,
"eval_steps_per_second": 0.416,
"step": 2050
},
{
"epoch": 4.99,
"step": 2050,
"total_flos": 2.0983989578550477e+19,
"train_loss": 0.1699243627804354,
"train_runtime": 174534.9388,
"train_samples_per_second": 1.506,
"train_steps_per_second": 0.012
}
],
"max_steps": 2050,
"num_train_epochs": 5,
"total_flos": 2.0983989578550477e+19,
"trial_name": null,
"trial_params": null
}