{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.989351992698509, "global_step": 2050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.097560975609757e-07, "loss": 1.2345, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.2195121951219514e-06, "loss": 1.3023, "step": 2 }, { "epoch": 0.01, "learning_rate": 1.8292682926829268e-06, "loss": 1.2241, "step": 3 }, { "epoch": 0.01, "learning_rate": 2.4390243902439027e-06, "loss": 1.2505, "step": 4 }, { "epoch": 0.01, "learning_rate": 3.0487804878048782e-06, "loss": 1.1555, "step": 5 }, { "epoch": 0.01, "learning_rate": 3.6585365853658537e-06, "loss": 1.101, "step": 6 }, { "epoch": 0.02, "learning_rate": 4.26829268292683e-06, "loss": 0.9013, "step": 7 }, { "epoch": 0.02, "learning_rate": 4.8780487804878055e-06, "loss": 0.8904, "step": 8 }, { "epoch": 0.02, "learning_rate": 5.487804878048781e-06, "loss": 0.7205, "step": 9 }, { "epoch": 0.02, "learning_rate": 6.0975609756097564e-06, "loss": 0.6704, "step": 10 }, { "epoch": 0.03, "learning_rate": 6.707317073170733e-06, "loss": 0.6029, "step": 11 }, { "epoch": 0.03, "learning_rate": 7.317073170731707e-06, "loss": 0.5434, "step": 12 }, { "epoch": 0.03, "learning_rate": 7.926829268292683e-06, "loss": 0.5544, "step": 13 }, { "epoch": 0.03, "learning_rate": 8.53658536585366e-06, "loss": 0.5272, "step": 14 }, { "epoch": 0.04, "learning_rate": 9.146341463414634e-06, "loss": 0.504, "step": 15 }, { "epoch": 0.04, "learning_rate": 9.756097560975611e-06, "loss": 0.505, "step": 16 }, { "epoch": 0.04, "learning_rate": 1.0365853658536585e-05, "loss": 0.5116, "step": 17 }, { "epoch": 0.04, "learning_rate": 1.0975609756097562e-05, "loss": 0.5009, "step": 18 }, { "epoch": 0.05, "learning_rate": 1.1585365853658537e-05, "loss": 0.4888, "step": 19 }, { "epoch": 0.05, "learning_rate": 1.2195121951219513e-05, "loss": 0.4531, "step": 20 }, { "epoch": 0.05, "learning_rate": 1.2804878048780488e-05, "loss": 0.4701, "step": 21 }, { "epoch": 0.05, "learning_rate": 1.3414634146341466e-05, "loss": 0.4221, "step": 22 }, { "epoch": 0.06, "learning_rate": 1.4024390243902441e-05, "loss": 0.4427, "step": 23 }, { "epoch": 0.06, "learning_rate": 1.4634146341463415e-05, "loss": 0.4266, "step": 24 }, { "epoch": 0.06, "learning_rate": 1.524390243902439e-05, "loss": 0.4375, "step": 25 }, { "epoch": 0.06, "learning_rate": 1.5853658536585366e-05, "loss": 0.4361, "step": 26 }, { "epoch": 0.07, "learning_rate": 1.6463414634146345e-05, "loss": 0.4482, "step": 27 }, { "epoch": 0.07, "learning_rate": 1.707317073170732e-05, "loss": 0.4398, "step": 28 }, { "epoch": 0.07, "learning_rate": 1.7682926829268292e-05, "loss": 0.4464, "step": 29 }, { "epoch": 0.07, "learning_rate": 1.8292682926829268e-05, "loss": 0.4687, "step": 30 }, { "epoch": 0.08, "learning_rate": 1.8902439024390246e-05, "loss": 0.4459, "step": 31 }, { "epoch": 0.08, "learning_rate": 1.9512195121951222e-05, "loss": 0.4257, "step": 32 }, { "epoch": 0.08, "learning_rate": 2.0121951219512197e-05, "loss": 0.3982, "step": 33 }, { "epoch": 0.08, "learning_rate": 2.073170731707317e-05, "loss": 0.4211, "step": 34 }, { "epoch": 0.09, "learning_rate": 2.134146341463415e-05, "loss": 0.4319, "step": 35 }, { "epoch": 0.09, "learning_rate": 2.1951219512195124e-05, "loss": 0.4641, "step": 36 }, { "epoch": 0.09, "learning_rate": 2.25609756097561e-05, "loss": 0.4335, "step": 37 }, { "epoch": 0.09, "learning_rate": 2.3170731707317075e-05, "loss": 0.4278, "step": 38 }, { "epoch": 0.09, "learning_rate": 2.378048780487805e-05, "loss": 0.3997, "step": 39 }, { "epoch": 0.1, "learning_rate": 2.4390243902439026e-05, "loss": 0.4259, "step": 40 }, { "epoch": 0.1, "learning_rate": 2.5e-05, "loss": 0.4156, "step": 41 }, { "epoch": 0.1, "learning_rate": 2.5609756097560977e-05, "loss": 0.4356, "step": 42 }, { "epoch": 0.1, "learning_rate": 2.6219512195121952e-05, "loss": 0.3966, "step": 43 }, { "epoch": 0.11, "learning_rate": 2.682926829268293e-05, "loss": 0.4271, "step": 44 }, { "epoch": 0.11, "learning_rate": 2.7439024390243906e-05, "loss": 0.4372, "step": 45 }, { "epoch": 0.11, "learning_rate": 2.8048780487804882e-05, "loss": 0.4174, "step": 46 }, { "epoch": 0.11, "learning_rate": 2.8658536585365854e-05, "loss": 0.4342, "step": 47 }, { "epoch": 0.12, "learning_rate": 2.926829268292683e-05, "loss": 0.401, "step": 48 }, { "epoch": 0.12, "learning_rate": 2.9878048780487805e-05, "loss": 0.4027, "step": 49 }, { "epoch": 0.12, "learning_rate": 3.048780487804878e-05, "loss": 0.4319, "step": 50 }, { "epoch": 0.12, "learning_rate": 3.109756097560976e-05, "loss": 0.4345, "step": 51 }, { "epoch": 0.13, "learning_rate": 3.170731707317073e-05, "loss": 0.4136, "step": 52 }, { "epoch": 0.13, "learning_rate": 3.231707317073171e-05, "loss": 0.4233, "step": 53 }, { "epoch": 0.13, "learning_rate": 3.292682926829269e-05, "loss": 0.4089, "step": 54 }, { "epoch": 0.13, "learning_rate": 3.353658536585366e-05, "loss": 0.4379, "step": 55 }, { "epoch": 0.14, "learning_rate": 3.414634146341464e-05, "loss": 0.3893, "step": 56 }, { "epoch": 0.14, "learning_rate": 3.475609756097561e-05, "loss": 0.4188, "step": 57 }, { "epoch": 0.14, "learning_rate": 3.5365853658536584e-05, "loss": 0.4106, "step": 58 }, { "epoch": 0.14, "learning_rate": 3.597560975609756e-05, "loss": 0.45, "step": 59 }, { "epoch": 0.15, "learning_rate": 3.6585365853658535e-05, "loss": 0.3955, "step": 60 }, { "epoch": 0.15, "learning_rate": 3.7195121951219514e-05, "loss": 0.4393, "step": 61 }, { "epoch": 0.15, "learning_rate": 3.780487804878049e-05, "loss": 0.4256, "step": 62 }, { "epoch": 0.15, "learning_rate": 3.8414634146341465e-05, "loss": 0.4139, "step": 63 }, { "epoch": 0.16, "learning_rate": 3.9024390243902444e-05, "loss": 0.4423, "step": 64 }, { "epoch": 0.16, "learning_rate": 3.9634146341463416e-05, "loss": 0.4259, "step": 65 }, { "epoch": 0.16, "learning_rate": 4.0243902439024395e-05, "loss": 0.4225, "step": 66 }, { "epoch": 0.16, "learning_rate": 4.085365853658537e-05, "loss": 0.42, "step": 67 }, { "epoch": 0.17, "learning_rate": 4.146341463414634e-05, "loss": 0.4104, "step": 68 }, { "epoch": 0.17, "learning_rate": 4.207317073170732e-05, "loss": 0.4085, "step": 69 }, { "epoch": 0.17, "learning_rate": 4.26829268292683e-05, "loss": 0.421, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.329268292682927e-05, "loss": 0.3984, "step": 71 }, { "epoch": 0.18, "learning_rate": 4.390243902439025e-05, "loss": 0.4428, "step": 72 }, { "epoch": 0.18, "learning_rate": 4.451219512195122e-05, "loss": 0.4468, "step": 73 }, { "epoch": 0.18, "learning_rate": 4.51219512195122e-05, "loss": 0.4245, "step": 74 }, { "epoch": 0.18, "learning_rate": 4.573170731707318e-05, "loss": 0.4008, "step": 75 }, { "epoch": 0.18, "learning_rate": 4.634146341463415e-05, "loss": 0.4013, "step": 76 }, { "epoch": 0.19, "learning_rate": 4.695121951219512e-05, "loss": 0.4276, "step": 77 }, { "epoch": 0.19, "learning_rate": 4.75609756097561e-05, "loss": 0.4307, "step": 78 }, { "epoch": 0.19, "learning_rate": 4.817073170731707e-05, "loss": 0.3836, "step": 79 }, { "epoch": 0.19, "learning_rate": 4.878048780487805e-05, "loss": 0.4272, "step": 80 }, { "epoch": 0.2, "learning_rate": 4.9390243902439024e-05, "loss": 0.4091, "step": 81 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 0.3907, "step": 82 }, { "epoch": 0.2, "learning_rate": 4.9999968146329897e-05, "loss": 0.4449, "step": 83 }, { "epoch": 0.2, "learning_rate": 4.9999872585400745e-05, "loss": 0.3889, "step": 84 }, { "epoch": 0.21, "learning_rate": 4.999971331745607e-05, "loss": 0.4582, "step": 85 }, { "epoch": 0.21, "learning_rate": 4.9999490342901726e-05, "loss": 0.4386, "step": 86 }, { "epoch": 0.21, "learning_rate": 4.9999203662305926e-05, "loss": 0.4174, "step": 87 }, { "epoch": 0.21, "learning_rate": 4.9998853276399215e-05, "loss": 0.4124, "step": 88 }, { "epoch": 0.22, "learning_rate": 4.9998439186074476e-05, "loss": 0.4114, "step": 89 }, { "epoch": 0.22, "learning_rate": 4.999796139238694e-05, "loss": 0.4208, "step": 90 }, { "epoch": 0.22, "learning_rate": 4.999741989655415e-05, "loss": 0.4266, "step": 91 }, { "epoch": 0.22, "learning_rate": 4.999681469995601e-05, "loss": 0.3977, "step": 92 }, { "epoch": 0.23, "learning_rate": 4.999614580413473e-05, "loss": 0.4004, "step": 93 }, { "epoch": 0.23, "learning_rate": 4.9995413210794864e-05, "loss": 0.4481, "step": 94 }, { "epoch": 0.23, "learning_rate": 4.9994616921803264e-05, "loss": 0.4147, "step": 95 }, { "epoch": 0.23, "learning_rate": 4.999375693918911e-05, "loss": 0.4221, "step": 96 }, { "epoch": 0.24, "learning_rate": 4.99928332651439e-05, "loss": 0.428, "step": 97 }, { "epoch": 0.24, "learning_rate": 4.999184590202141e-05, "loss": 0.4283, "step": 98 }, { "epoch": 0.24, "learning_rate": 4.999079485233775e-05, "loss": 0.4324, "step": 99 }, { "epoch": 0.24, "learning_rate": 4.9989680118771284e-05, "loss": 0.4293, "step": 100 }, { "epoch": 0.25, "learning_rate": 4.99885017041627e-05, "loss": 0.4466, "step": 101 }, { "epoch": 0.25, "learning_rate": 4.998725961151493e-05, "loss": 0.4101, "step": 102 }, { "epoch": 0.25, "learning_rate": 4.9985953843993194e-05, "loss": 0.3773, "step": 103 }, { "epoch": 0.25, "learning_rate": 4.998458440492497e-05, "loss": 0.4226, "step": 104 }, { "epoch": 0.26, "learning_rate": 4.9983151297800005e-05, "loss": 0.4156, "step": 105 }, { "epoch": 0.26, "learning_rate": 4.998165452627025e-05, "loss": 0.3961, "step": 106 }, { "epoch": 0.26, "learning_rate": 4.9980094094149945e-05, "loss": 0.4271, "step": 107 }, { "epoch": 0.26, "learning_rate": 4.997847000541551e-05, "loss": 0.4275, "step": 108 }, { "epoch": 0.27, "learning_rate": 4.997678226420561e-05, "loss": 0.3846, "step": 109 }, { "epoch": 0.27, "learning_rate": 4.99750308748211e-05, "loss": 0.4237, "step": 110 }, { "epoch": 0.27, "learning_rate": 4.997321584172504e-05, "loss": 0.4215, "step": 111 }, { "epoch": 0.27, "learning_rate": 4.9971337169542665e-05, "loss": 0.3897, "step": 112 }, { "epoch": 0.28, "learning_rate": 4.996939486306138e-05, "loss": 0.4016, "step": 113 }, { "epoch": 0.28, "learning_rate": 4.996738892723075e-05, "loss": 0.4399, "step": 114 }, { "epoch": 0.28, "learning_rate": 4.99653193671625e-05, "loss": 0.4347, "step": 115 }, { "epoch": 0.28, "learning_rate": 4.996318618813046e-05, "loss": 0.4371, "step": 116 }, { "epoch": 0.28, "learning_rate": 4.996098939557062e-05, "loss": 0.4298, "step": 117 }, { "epoch": 0.29, "learning_rate": 4.995872899508103e-05, "loss": 0.4204, "step": 118 }, { "epoch": 0.29, "learning_rate": 4.995640499242187e-05, "loss": 0.3856, "step": 119 }, { "epoch": 0.29, "learning_rate": 4.995401739351536e-05, "loss": 0.4044, "step": 120 }, { "epoch": 0.29, "learning_rate": 4.9951566204445834e-05, "loss": 0.4019, "step": 121 }, { "epoch": 0.3, "learning_rate": 4.9949051431459615e-05, "loss": 0.4484, "step": 122 }, { "epoch": 0.3, "learning_rate": 4.994647308096509e-05, "loss": 0.4149, "step": 123 }, { "epoch": 0.3, "learning_rate": 4.9943831159532665e-05, "loss": 0.4163, "step": 124 }, { "epoch": 0.3, "learning_rate": 4.994112567389471e-05, "loss": 0.4097, "step": 125 }, { "epoch": 0.31, "learning_rate": 4.9938356630945616e-05, "loss": 0.4045, "step": 126 }, { "epoch": 0.31, "learning_rate": 4.99355240377417e-05, "loss": 0.4257, "step": 127 }, { "epoch": 0.31, "learning_rate": 4.993262790150126e-05, "loss": 0.3949, "step": 128 }, { "epoch": 0.31, "learning_rate": 4.99296682296045e-05, "loss": 0.4253, "step": 129 }, { "epoch": 0.32, "learning_rate": 4.992664502959351e-05, "loss": 0.3911, "step": 130 }, { "epoch": 0.32, "learning_rate": 4.992355830917232e-05, "loss": 0.4163, "step": 131 }, { "epoch": 0.32, "learning_rate": 4.992040807620678e-05, "loss": 0.3949, "step": 132 }, { "epoch": 0.32, "learning_rate": 4.9917194338724614e-05, "loss": 0.4146, "step": 133 }, { "epoch": 0.33, "learning_rate": 4.9913917104915374e-05, "loss": 0.4143, "step": 134 }, { "epoch": 0.33, "learning_rate": 4.9910576383130414e-05, "loss": 0.4096, "step": 135 }, { "epoch": 0.33, "learning_rate": 4.990717218188286e-05, "loss": 0.3887, "step": 136 }, { "epoch": 0.33, "learning_rate": 4.990370450984763e-05, "loss": 0.4135, "step": 137 }, { "epoch": 0.34, "learning_rate": 4.990017337586137e-05, "loss": 0.426, "step": 138 }, { "epoch": 0.34, "learning_rate": 4.989657878892244e-05, "loss": 0.4379, "step": 139 }, { "epoch": 0.34, "learning_rate": 4.9892920758190907e-05, "loss": 0.4185, "step": 140 }, { "epoch": 0.34, "learning_rate": 4.988919929298851e-05, "loss": 0.4309, "step": 141 }, { "epoch": 0.35, "learning_rate": 4.9885414402798624e-05, "loss": 0.4489, "step": 142 }, { "epoch": 0.35, "learning_rate": 4.988156609726628e-05, "loss": 0.3993, "step": 143 }, { "epoch": 0.35, "learning_rate": 4.987765438619806e-05, "loss": 0.4559, "step": 144 }, { "epoch": 0.35, "learning_rate": 4.987367927956218e-05, "loss": 0.4005, "step": 145 }, { "epoch": 0.36, "learning_rate": 4.986964078748837e-05, "loss": 0.3977, "step": 146 }, { "epoch": 0.36, "learning_rate": 4.986553892026789e-05, "loss": 0.4036, "step": 147 }, { "epoch": 0.36, "learning_rate": 4.9861373688353504e-05, "loss": 0.4411, "step": 148 }, { "epoch": 0.36, "learning_rate": 4.9857145102359456e-05, "loss": 0.4303, "step": 149 }, { "epoch": 0.37, "learning_rate": 4.985285317306141e-05, "loss": 0.4416, "step": 150 }, { "epoch": 0.37, "learning_rate": 4.984849791139646e-05, "loss": 0.3917, "step": 151 }, { "epoch": 0.37, "learning_rate": 4.984407932846311e-05, "loss": 0.3887, "step": 152 }, { "epoch": 0.37, "learning_rate": 4.983959743552118e-05, "loss": 0.4235, "step": 153 }, { "epoch": 0.37, "learning_rate": 4.9835052243991874e-05, "loss": 0.3951, "step": 154 }, { "epoch": 0.38, "learning_rate": 4.983044376545767e-05, "loss": 0.3995, "step": 155 }, { "epoch": 0.38, "learning_rate": 4.982577201166232e-05, "loss": 0.3995, "step": 156 }, { "epoch": 0.38, "learning_rate": 4.982103699451082e-05, "loss": 0.4131, "step": 157 }, { "epoch": 0.38, "learning_rate": 4.981623872606938e-05, "loss": 0.4159, "step": 158 }, { "epoch": 0.39, "learning_rate": 4.981137721856541e-05, "loss": 0.4039, "step": 159 }, { "epoch": 0.39, "learning_rate": 4.980645248438745e-05, "loss": 0.442, "step": 160 }, { "epoch": 0.39, "learning_rate": 4.980146453608518e-05, "loss": 0.4113, "step": 161 }, { "epoch": 0.39, "learning_rate": 4.979641338636935e-05, "loss": 0.4177, "step": 162 }, { "epoch": 0.4, "learning_rate": 4.979129904811176e-05, "loss": 0.4017, "step": 163 }, { "epoch": 0.4, "learning_rate": 4.9786121534345265e-05, "loss": 0.4274, "step": 164 }, { "epoch": 0.4, "learning_rate": 4.978088085826368e-05, "loss": 0.4544, "step": 165 }, { "epoch": 0.4, "learning_rate": 4.977557703322178e-05, "loss": 0.39, "step": 166 }, { "epoch": 0.41, "learning_rate": 4.977021007273528e-05, "loss": 0.418, "step": 167 }, { "epoch": 0.41, "learning_rate": 4.976477999048077e-05, "loss": 0.3923, "step": 168 }, { "epoch": 0.41, "learning_rate": 4.97592868002957e-05, "loss": 0.4087, "step": 169 }, { "epoch": 0.41, "learning_rate": 4.9753730516178313e-05, "loss": 0.4061, "step": 170 }, { "epoch": 0.42, "learning_rate": 4.974811115228767e-05, "loss": 0.3747, "step": 171 }, { "epoch": 0.42, "learning_rate": 4.9742428722943545e-05, "loss": 0.399, "step": 172 }, { "epoch": 0.42, "learning_rate": 4.973668324262645e-05, "loss": 0.3833, "step": 173 }, { "epoch": 0.42, "learning_rate": 4.973087472597754e-05, "loss": 0.4333, "step": 174 }, { "epoch": 0.43, "learning_rate": 4.972500318779863e-05, "loss": 0.406, "step": 175 }, { "epoch": 0.43, "learning_rate": 4.9719068643052135e-05, "loss": 0.39, "step": 176 }, { "epoch": 0.43, "learning_rate": 4.9713071106860996e-05, "loss": 0.4317, "step": 177 }, { "epoch": 0.43, "learning_rate": 4.970701059450872e-05, "loss": 0.4173, "step": 178 }, { "epoch": 0.44, "learning_rate": 4.9700887121439244e-05, "loss": 0.3884, "step": 179 }, { "epoch": 0.44, "learning_rate": 4.969470070325699e-05, "loss": 0.3944, "step": 180 }, { "epoch": 0.44, "learning_rate": 4.968845135572677e-05, "loss": 0.4076, "step": 181 }, { "epoch": 0.44, "learning_rate": 4.968213909477376e-05, "loss": 0.4195, "step": 182 }, { "epoch": 0.45, "learning_rate": 4.967576393648344e-05, "loss": 0.4093, "step": 183 }, { "epoch": 0.45, "learning_rate": 4.9669325897101604e-05, "loss": 0.3974, "step": 184 }, { "epoch": 0.45, "learning_rate": 4.966282499303424e-05, "loss": 0.4025, "step": 185 }, { "epoch": 0.45, "learning_rate": 4.965626124084759e-05, "loss": 0.4058, "step": 186 }, { "epoch": 0.46, "learning_rate": 4.9649634657267995e-05, "loss": 0.4007, "step": 187 }, { "epoch": 0.46, "learning_rate": 4.964294525918196e-05, "loss": 0.4218, "step": 188 }, { "epoch": 0.46, "learning_rate": 4.963619306363602e-05, "loss": 0.4141, "step": 189 }, { "epoch": 0.46, "learning_rate": 4.962937808783675e-05, "loss": 0.4233, "step": 190 }, { "epoch": 0.46, "learning_rate": 4.9622500349150716e-05, "loss": 0.3931, "step": 191 }, { "epoch": 0.47, "learning_rate": 4.961555986510442e-05, "loss": 0.4144, "step": 192 }, { "epoch": 0.47, "learning_rate": 4.960855665338424e-05, "loss": 0.3957, "step": 193 }, { "epoch": 0.47, "learning_rate": 4.960149073183643e-05, "loss": 0.3879, "step": 194 }, { "epoch": 0.47, "learning_rate": 4.959436211846703e-05, "loss": 0.4152, "step": 195 }, { "epoch": 0.48, "learning_rate": 4.958717083144182e-05, "loss": 0.4143, "step": 196 }, { "epoch": 0.48, "learning_rate": 4.957991688908634e-05, "loss": 0.3976, "step": 197 }, { "epoch": 0.48, "learning_rate": 4.9572600309885744e-05, "loss": 0.4072, "step": 198 }, { "epoch": 0.48, "learning_rate": 4.956522111248483e-05, "loss": 0.3903, "step": 199 }, { "epoch": 0.49, "learning_rate": 4.955777931568797e-05, "loss": 0.3908, "step": 200 }, { "epoch": 0.49, "learning_rate": 4.955027493845903e-05, "loss": 0.4284, "step": 201 }, { "epoch": 0.49, "learning_rate": 4.954270799992138e-05, "loss": 0.4072, "step": 202 }, { "epoch": 0.49, "learning_rate": 4.953507851935779e-05, "loss": 0.43, "step": 203 }, { "epoch": 0.5, "learning_rate": 4.952738651621043e-05, "loss": 0.4228, "step": 204 }, { "epoch": 0.5, "learning_rate": 4.951963201008076e-05, "loss": 0.3991, "step": 205 }, { "epoch": 0.5, "learning_rate": 4.951181502072957e-05, "loss": 0.4057, "step": 206 }, { "epoch": 0.5, "learning_rate": 4.950393556807682e-05, "loss": 0.3987, "step": 207 }, { "epoch": 0.51, "learning_rate": 4.949599367220168e-05, "loss": 0.4142, "step": 208 }, { "epoch": 0.51, "learning_rate": 4.948798935334242e-05, "loss": 0.3994, "step": 209 }, { "epoch": 0.51, "learning_rate": 4.9479922631896405e-05, "loss": 0.3989, "step": 210 }, { "epoch": 0.51, "learning_rate": 4.947179352842001e-05, "loss": 0.4186, "step": 211 }, { "epoch": 0.52, "learning_rate": 4.946360206362858e-05, "loss": 0.3896, "step": 212 }, { "epoch": 0.52, "learning_rate": 4.9455348258396364e-05, "loss": 0.4122, "step": 213 }, { "epoch": 0.52, "learning_rate": 4.944703213375648e-05, "loss": 0.4319, "step": 214 }, { "epoch": 0.52, "learning_rate": 4.9438653710900864e-05, "loss": 0.3997, "step": 215 }, { "epoch": 0.53, "learning_rate": 4.943021301118019e-05, "loss": 0.3924, "step": 216 }, { "epoch": 0.53, "learning_rate": 4.942171005610385e-05, "loss": 0.3952, "step": 217 }, { "epoch": 0.53, "learning_rate": 4.941314486733986e-05, "loss": 0.4137, "step": 218 }, { "epoch": 0.53, "learning_rate": 4.940451746671484e-05, "loss": 0.4277, "step": 219 }, { "epoch": 0.54, "learning_rate": 4.9395827876213936e-05, "loss": 0.4003, "step": 220 }, { "epoch": 0.54, "learning_rate": 4.938707611798078e-05, "loss": 0.3884, "step": 221 }, { "epoch": 0.54, "learning_rate": 4.937826221431742e-05, "loss": 0.4003, "step": 222 }, { "epoch": 0.54, "learning_rate": 4.936938618768426e-05, "loss": 0.4183, "step": 223 }, { "epoch": 0.55, "learning_rate": 4.936044806070004e-05, "loss": 0.4319, "step": 224 }, { "epoch": 0.55, "learning_rate": 4.935144785614173e-05, "loss": 0.3968, "step": 225 }, { "epoch": 0.55, "learning_rate": 4.934238559694448e-05, "loss": 0.3749, "step": 226 }, { "epoch": 0.55, "learning_rate": 4.9333261306201595e-05, "loss": 0.4044, "step": 227 }, { "epoch": 0.55, "learning_rate": 4.932407500716445e-05, "loss": 0.4067, "step": 228 }, { "epoch": 0.56, "learning_rate": 4.9314826723242425e-05, "loss": 0.417, "step": 229 }, { "epoch": 0.56, "learning_rate": 4.9305516478002865e-05, "loss": 0.4099, "step": 230 }, { "epoch": 0.56, "learning_rate": 4.9296144295171024e-05, "loss": 0.4201, "step": 231 }, { "epoch": 0.56, "learning_rate": 4.928671019862995e-05, "loss": 0.3848, "step": 232 }, { "epoch": 0.57, "learning_rate": 4.92772142124205e-05, "loss": 0.3959, "step": 233 }, { "epoch": 0.57, "learning_rate": 4.9267656360741245e-05, "loss": 0.3794, "step": 234 }, { "epoch": 0.57, "learning_rate": 4.925803666794838e-05, "loss": 0.3956, "step": 235 }, { "epoch": 0.57, "learning_rate": 4.924835515855572e-05, "loss": 0.423, "step": 236 }, { "epoch": 0.58, "learning_rate": 4.92386118572346e-05, "loss": 0.4015, "step": 237 }, { "epoch": 0.58, "learning_rate": 4.92288067888138e-05, "loss": 0.4043, "step": 238 }, { "epoch": 0.58, "learning_rate": 4.921893997827951e-05, "loss": 0.3711, "step": 239 }, { "epoch": 0.58, "learning_rate": 4.920901145077527e-05, "loss": 0.4248, "step": 240 }, { "epoch": 0.59, "learning_rate": 4.919902123160187e-05, "loss": 0.4235, "step": 241 }, { "epoch": 0.59, "learning_rate": 4.918896934621734e-05, "loss": 0.4214, "step": 242 }, { "epoch": 0.59, "learning_rate": 4.9178855820236824e-05, "loss": 0.3827, "step": 243 }, { "epoch": 0.59, "learning_rate": 4.916868067943256e-05, "loss": 0.3948, "step": 244 }, { "epoch": 0.6, "learning_rate": 4.915844394973379e-05, "loss": 0.3697, "step": 245 }, { "epoch": 0.6, "learning_rate": 4.914814565722671e-05, "loss": 0.4164, "step": 246 }, { "epoch": 0.6, "learning_rate": 4.9137785828154393e-05, "loss": 0.3942, "step": 247 }, { "epoch": 0.6, "learning_rate": 4.9127364488916716e-05, "loss": 0.3949, "step": 248 }, { "epoch": 0.61, "learning_rate": 4.9116881666070327e-05, "loss": 0.3867, "step": 249 }, { "epoch": 0.61, "learning_rate": 4.9106337386328524e-05, "loss": 0.3842, "step": 250 }, { "epoch": 0.61, "learning_rate": 4.909573167656124e-05, "loss": 0.3975, "step": 251 }, { "epoch": 0.61, "learning_rate": 4.9085064563794925e-05, "loss": 0.4215, "step": 252 }, { "epoch": 0.62, "learning_rate": 4.907433607521251e-05, "loss": 0.3782, "step": 253 }, { "epoch": 0.62, "learning_rate": 4.906354623815336e-05, "loss": 0.399, "step": 254 }, { "epoch": 0.62, "learning_rate": 4.905269508011312e-05, "loss": 0.4041, "step": 255 }, { "epoch": 0.62, "learning_rate": 4.904178262874374e-05, "loss": 0.3899, "step": 256 }, { "epoch": 0.63, "learning_rate": 4.903080891185335e-05, "loss": 0.3772, "step": 257 }, { "epoch": 0.63, "learning_rate": 4.901977395740619e-05, "loss": 0.4334, "step": 258 }, { "epoch": 0.63, "learning_rate": 4.9008677793522584e-05, "loss": 0.383, "step": 259 }, { "epoch": 0.63, "learning_rate": 4.899752044847881e-05, "loss": 0.4064, "step": 260 }, { "epoch": 0.64, "learning_rate": 4.898630195070705e-05, "loss": 0.3921, "step": 261 }, { "epoch": 0.64, "learning_rate": 4.8975022328795325e-05, "loss": 0.415, "step": 262 }, { "epoch": 0.64, "learning_rate": 4.8963681611487445e-05, "loss": 0.4128, "step": 263 }, { "epoch": 0.64, "learning_rate": 4.895227982768287e-05, "loss": 0.4232, "step": 264 }, { "epoch": 0.64, "learning_rate": 4.89408170064367e-05, "loss": 0.3914, "step": 265 }, { "epoch": 0.65, "learning_rate": 4.892929317695957e-05, "loss": 0.404, "step": 266 }, { "epoch": 0.65, "learning_rate": 4.891770836861757e-05, "loss": 0.4274, "step": 267 }, { "epoch": 0.65, "learning_rate": 4.8906062610932215e-05, "loss": 0.4025, "step": 268 }, { "epoch": 0.65, "learning_rate": 4.889435593358029e-05, "loss": 0.3822, "step": 269 }, { "epoch": 0.66, "learning_rate": 4.888258836639386e-05, "loss": 0.4048, "step": 270 }, { "epoch": 0.66, "learning_rate": 4.8870759939360136e-05, "loss": 0.3952, "step": 271 }, { "epoch": 0.66, "learning_rate": 4.885887068262143e-05, "loss": 0.4112, "step": 272 }, { "epoch": 0.66, "learning_rate": 4.884692062647506e-05, "loss": 0.4039, "step": 273 }, { "epoch": 0.67, "learning_rate": 4.8834909801373264e-05, "loss": 0.4157, "step": 274 }, { "epoch": 0.67, "learning_rate": 4.8822838237923166e-05, "loss": 0.4066, "step": 275 }, { "epoch": 0.67, "learning_rate": 4.881070596688664e-05, "loss": 0.387, "step": 276 }, { "epoch": 0.67, "learning_rate": 4.8798513019180295e-05, "loss": 0.407, "step": 277 }, { "epoch": 0.68, "learning_rate": 4.878625942587532e-05, "loss": 0.4103, "step": 278 }, { "epoch": 0.68, "learning_rate": 4.877394521819747e-05, "loss": 0.411, "step": 279 }, { "epoch": 0.68, "learning_rate": 4.8761570427526973e-05, "loss": 0.3986, "step": 280 }, { "epoch": 0.68, "learning_rate": 4.874913508539844e-05, "loss": 0.3858, "step": 281 }, { "epoch": 0.69, "learning_rate": 4.873663922350073e-05, "loss": 0.4145, "step": 282 }, { "epoch": 0.69, "learning_rate": 4.8724082873677027e-05, "loss": 0.4027, "step": 283 }, { "epoch": 0.69, "learning_rate": 4.871146606792455e-05, "loss": 0.393, "step": 284 }, { "epoch": 0.69, "learning_rate": 4.8698788838394644e-05, "loss": 0.3802, "step": 285 }, { "epoch": 0.7, "learning_rate": 4.8686051217392606e-05, "loss": 0.3923, "step": 286 }, { "epoch": 0.7, "learning_rate": 4.867325323737765e-05, "loss": 0.3985, "step": 287 }, { "epoch": 0.7, "learning_rate": 4.866039493096276e-05, "loss": 0.3941, "step": 288 }, { "epoch": 0.7, "learning_rate": 4.86474763309147e-05, "loss": 0.3776, "step": 289 }, { "epoch": 0.71, "learning_rate": 4.863449747015384e-05, "loss": 0.4265, "step": 290 }, { "epoch": 0.71, "learning_rate": 4.862145838175413e-05, "loss": 0.4001, "step": 291 }, { "epoch": 0.71, "learning_rate": 4.860835909894301e-05, "loss": 0.4198, "step": 292 }, { "epoch": 0.71, "learning_rate": 4.859519965510129e-05, "loss": 0.383, "step": 293 }, { "epoch": 0.72, "learning_rate": 4.858198008376308e-05, "loss": 0.4056, "step": 294 }, { "epoch": 0.72, "learning_rate": 4.856870041861575e-05, "loss": 0.4108, "step": 295 }, { "epoch": 0.72, "learning_rate": 4.8555360693499786e-05, "loss": 0.3703, "step": 296 }, { "epoch": 0.72, "learning_rate": 4.8541960942408716e-05, "loss": 0.3799, "step": 297 }, { "epoch": 0.73, "learning_rate": 4.852850119948904e-05, "loss": 0.3736, "step": 298 }, { "epoch": 0.73, "learning_rate": 4.851498149904014e-05, "loss": 0.3908, "step": 299 }, { "epoch": 0.73, "learning_rate": 4.850140187551417e-05, "loss": 0.3968, "step": 300 }, { "epoch": 0.73, "learning_rate": 4.8487762363516024e-05, "loss": 0.3925, "step": 301 }, { "epoch": 0.74, "learning_rate": 4.847406299780316e-05, "loss": 0.3768, "step": 302 }, { "epoch": 0.74, "learning_rate": 4.8460303813285585e-05, "loss": 0.4419, "step": 303 }, { "epoch": 0.74, "learning_rate": 4.844648484502575e-05, "loss": 0.3688, "step": 304 }, { "epoch": 0.74, "learning_rate": 4.843260612823844e-05, "loss": 0.4208, "step": 305 }, { "epoch": 0.74, "learning_rate": 4.8418667698290696e-05, "loss": 0.4063, "step": 306 }, { "epoch": 0.75, "learning_rate": 4.840466959070174e-05, "loss": 0.3719, "step": 307 }, { "epoch": 0.75, "learning_rate": 4.839061184114285e-05, "loss": 0.3985, "step": 308 }, { "epoch": 0.75, "learning_rate": 4.837649448543731e-05, "loss": 0.3868, "step": 309 }, { "epoch": 0.75, "learning_rate": 4.8362317559560274e-05, "loss": 0.3881, "step": 310 }, { "epoch": 0.76, "learning_rate": 4.834808109963873e-05, "loss": 0.4067, "step": 311 }, { "epoch": 0.76, "learning_rate": 4.833378514195133e-05, "loss": 0.3883, "step": 312 }, { "epoch": 0.76, "learning_rate": 4.83194297229284e-05, "loss": 0.3996, "step": 313 }, { "epoch": 0.76, "learning_rate": 4.830501487915174e-05, "loss": 0.4075, "step": 314 }, { "epoch": 0.77, "learning_rate": 4.8290540647354624e-05, "loss": 0.3918, "step": 315 }, { "epoch": 0.77, "learning_rate": 4.8276007064421635e-05, "loss": 0.4206, "step": 316 }, { "epoch": 0.77, "learning_rate": 4.826141416738861e-05, "loss": 0.3924, "step": 317 }, { "epoch": 0.77, "learning_rate": 4.824676199344253e-05, "loss": 0.3814, "step": 318 }, { "epoch": 0.78, "learning_rate": 4.8232050579921445e-05, "loss": 0.3809, "step": 319 }, { "epoch": 0.78, "learning_rate": 4.821727996431435e-05, "loss": 0.3979, "step": 320 }, { "epoch": 0.78, "learning_rate": 4.8202450184261116e-05, "loss": 0.4201, "step": 321 }, { "epoch": 0.78, "learning_rate": 4.8187561277552374e-05, "loss": 0.3785, "step": 322 }, { "epoch": 0.79, "learning_rate": 4.817261328212942e-05, "loss": 0.3918, "step": 323 }, { "epoch": 0.79, "learning_rate": 4.815760623608415e-05, "loss": 0.3789, "step": 324 }, { "epoch": 0.79, "learning_rate": 4.8142540177658925e-05, "loss": 0.3967, "step": 325 }, { "epoch": 0.79, "learning_rate": 4.812741514524647e-05, "loss": 0.4155, "step": 326 }, { "epoch": 0.8, "learning_rate": 4.811223117738981e-05, "loss": 0.3727, "step": 327 }, { "epoch": 0.8, "learning_rate": 4.8096988312782174e-05, "loss": 0.396, "step": 328 }, { "epoch": 0.8, "learning_rate": 4.8081686590266835e-05, "loss": 0.3694, "step": 329 }, { "epoch": 0.8, "learning_rate": 4.806632604883708e-05, "loss": 0.3919, "step": 330 }, { "epoch": 0.81, "learning_rate": 4.8050906727636085e-05, "loss": 0.3757, "step": 331 }, { "epoch": 0.81, "learning_rate": 4.8035428665956806e-05, "loss": 0.381, "step": 332 }, { "epoch": 0.81, "learning_rate": 4.801989190324188e-05, "loss": 0.3915, "step": 333 }, { "epoch": 0.81, "learning_rate": 4.800429647908354e-05, "loss": 0.3995, "step": 334 }, { "epoch": 0.82, "learning_rate": 4.798864243322353e-05, "loss": 0.4188, "step": 335 }, { "epoch": 0.82, "learning_rate": 4.7972929805552926e-05, "loss": 0.3832, "step": 336 }, { "epoch": 0.82, "learning_rate": 4.795715863611212e-05, "loss": 0.3624, "step": 337 }, { "epoch": 0.82, "learning_rate": 4.79413289650907e-05, "loss": 0.3779, "step": 338 }, { "epoch": 0.83, "learning_rate": 4.7925440832827307e-05, "loss": 0.425, "step": 339 }, { "epoch": 0.83, "learning_rate": 4.790949427980956e-05, "loss": 0.3829, "step": 340 }, { "epoch": 0.83, "learning_rate": 4.7893489346673965e-05, "loss": 0.3877, "step": 341 }, { "epoch": 0.83, "learning_rate": 4.7877426074205786e-05, "loss": 0.4043, "step": 342 }, { "epoch": 0.83, "learning_rate": 4.786130450333897e-05, "loss": 0.3687, "step": 343 }, { "epoch": 0.84, "learning_rate": 4.784512467515599e-05, "loss": 0.3679, "step": 344 }, { "epoch": 0.84, "learning_rate": 4.782888663088781e-05, "loss": 0.3957, "step": 345 }, { "epoch": 0.84, "learning_rate": 4.781259041191375e-05, "loss": 0.4215, "step": 346 }, { "epoch": 0.84, "learning_rate": 4.7796236059761346e-05, "loss": 0.3881, "step": 347 }, { "epoch": 0.85, "learning_rate": 4.777982361610629e-05, "loss": 0.3882, "step": 348 }, { "epoch": 0.85, "learning_rate": 4.7763353122772305e-05, "loss": 0.386, "step": 349 }, { "epoch": 0.85, "learning_rate": 4.774682462173105e-05, "loss": 0.3747, "step": 350 }, { "epoch": 0.85, "learning_rate": 4.773023815510199e-05, "loss": 0.4025, "step": 351 }, { "epoch": 0.86, "learning_rate": 4.7713593765152316e-05, "loss": 0.3759, "step": 352 }, { "epoch": 0.86, "learning_rate": 4.7696891494296826e-05, "loss": 0.3693, "step": 353 }, { "epoch": 0.86, "learning_rate": 4.7680131385097806e-05, "loss": 0.3718, "step": 354 }, { "epoch": 0.86, "learning_rate": 4.766331348026493e-05, "loss": 0.3787, "step": 355 }, { "epoch": 0.87, "learning_rate": 4.764643782265516e-05, "loss": 0.3809, "step": 356 }, { "epoch": 0.87, "learning_rate": 4.762950445527264e-05, "loss": 0.416, "step": 357 }, { "epoch": 0.87, "learning_rate": 4.7612513421268544e-05, "loss": 0.3663, "step": 358 }, { "epoch": 0.87, "learning_rate": 4.7595464763941024e-05, "loss": 0.3872, "step": 359 }, { "epoch": 0.88, "learning_rate": 4.7578358526735065e-05, "loss": 0.3923, "step": 360 }, { "epoch": 0.88, "learning_rate": 4.756119475324237e-05, "loss": 0.3853, "step": 361 }, { "epoch": 0.88, "learning_rate": 4.7543973487201286e-05, "loss": 0.4108, "step": 362 }, { "epoch": 0.88, "learning_rate": 4.752669477249666e-05, "loss": 0.3972, "step": 363 }, { "epoch": 0.89, "learning_rate": 4.750935865315971e-05, "loss": 0.3796, "step": 364 }, { "epoch": 0.89, "learning_rate": 4.749196517336798e-05, "loss": 0.3624, "step": 365 }, { "epoch": 0.89, "learning_rate": 4.747451437744515e-05, "loss": 0.3902, "step": 366 }, { "epoch": 0.89, "learning_rate": 4.7457006309860976e-05, "loss": 0.4268, "step": 367 }, { "epoch": 0.9, "learning_rate": 4.7439441015231154e-05, "loss": 0.3881, "step": 368 }, { "epoch": 0.9, "learning_rate": 4.742181853831721e-05, "loss": 0.3927, "step": 369 }, { "epoch": 0.9, "learning_rate": 4.740413892402639e-05, "loss": 0.4028, "step": 370 }, { "epoch": 0.9, "learning_rate": 4.7386402217411555e-05, "loss": 0.3957, "step": 371 }, { "epoch": 0.91, "learning_rate": 4.7368608463671013e-05, "loss": 0.3859, "step": 372 }, { "epoch": 0.91, "learning_rate": 4.7350757708148495e-05, "loss": 0.4055, "step": 373 }, { "epoch": 0.91, "learning_rate": 4.733284999633297e-05, "loss": 0.4085, "step": 374 }, { "epoch": 0.91, "learning_rate": 4.731488537385853e-05, "loss": 0.3968, "step": 375 }, { "epoch": 0.92, "learning_rate": 4.729686388650432e-05, "loss": 0.4205, "step": 376 }, { "epoch": 0.92, "learning_rate": 4.7278785580194365e-05, "loss": 0.3751, "step": 377 }, { "epoch": 0.92, "learning_rate": 4.7260650500997514e-05, "loss": 0.3866, "step": 378 }, { "epoch": 0.92, "learning_rate": 4.724245869512727e-05, "loss": 0.3916, "step": 379 }, { "epoch": 0.92, "learning_rate": 4.722421020894169e-05, "loss": 0.3858, "step": 380 }, { "epoch": 0.93, "learning_rate": 4.7205905088943286e-05, "loss": 0.4032, "step": 381 }, { "epoch": 0.93, "learning_rate": 4.7187543381778864e-05, "loss": 0.3772, "step": 382 }, { "epoch": 0.93, "learning_rate": 4.716912513423945e-05, "loss": 0.3906, "step": 383 }, { "epoch": 0.93, "learning_rate": 4.715065039326015e-05, "loss": 0.4172, "step": 384 }, { "epoch": 0.94, "learning_rate": 4.7132119205920026e-05, "loss": 0.3682, "step": 385 }, { "epoch": 0.94, "learning_rate": 4.7113531619441984e-05, "loss": 0.3684, "step": 386 }, { "epoch": 0.94, "learning_rate": 4.709488768119266e-05, "loss": 0.4049, "step": 387 }, { "epoch": 0.94, "learning_rate": 4.707618743868226e-05, "loss": 0.3852, "step": 388 }, { "epoch": 0.95, "learning_rate": 4.705743093956452e-05, "loss": 0.4162, "step": 389 }, { "epoch": 0.95, "learning_rate": 4.703861823163649e-05, "loss": 0.353, "step": 390 }, { "epoch": 0.95, "learning_rate": 4.7019749362838476e-05, "loss": 0.3958, "step": 391 }, { "epoch": 0.95, "learning_rate": 4.7000824381253905e-05, "loss": 0.406, "step": 392 }, { "epoch": 0.96, "learning_rate": 4.6981843335109174e-05, "loss": 0.3851, "step": 393 }, { "epoch": 0.96, "learning_rate": 4.6962806272773564e-05, "loss": 0.3828, "step": 394 }, { "epoch": 0.96, "learning_rate": 4.69437132427591e-05, "loss": 0.4331, "step": 395 }, { "epoch": 0.96, "learning_rate": 4.6924564293720434e-05, "loss": 0.3946, "step": 396 }, { "epoch": 0.97, "learning_rate": 4.6905359474454705e-05, "loss": 0.3799, "step": 397 }, { "epoch": 0.97, "learning_rate": 4.6886098833901436e-05, "loss": 0.3543, "step": 398 }, { "epoch": 0.97, "learning_rate": 4.686678242114239e-05, "loss": 0.3772, "step": 399 }, { "epoch": 0.97, "learning_rate": 4.684741028540146e-05, "loss": 0.4009, "step": 400 }, { "epoch": 0.98, "learning_rate": 4.6827982476044534e-05, "loss": 0.3806, "step": 401 }, { "epoch": 0.98, "learning_rate": 4.680849904257938e-05, "loss": 0.3781, "step": 402 }, { "epoch": 0.98, "learning_rate": 4.678896003465549e-05, "loss": 0.4264, "step": 403 }, { "epoch": 0.98, "learning_rate": 4.6769365502064025e-05, "loss": 0.3857, "step": 404 }, { "epoch": 0.99, "learning_rate": 4.674971549473757e-05, "loss": 0.3797, "step": 405 }, { "epoch": 0.99, "learning_rate": 4.6730010062750134e-05, "loss": 0.3847, "step": 406 }, { "epoch": 0.99, "learning_rate": 4.671024925631694e-05, "loss": 0.382, "step": 407 }, { "epoch": 0.99, "learning_rate": 4.669043312579433e-05, "loss": 0.3778, "step": 408 }, { "epoch": 1.0, "learning_rate": 4.667056172167962e-05, "loss": 0.3837, "step": 409 }, { "epoch": 1.0, "learning_rate": 4.665063509461097e-05, "loss": 0.3807, "step": 410 }, { "epoch": 1.0, "eval_loss": 0.5687975287437439, "eval_runtime": 116.1454, "eval_samples_per_second": 6.561, "eval_steps_per_second": 0.413, "step": 410 }, { "epoch": 1.0, "learning_rate": 4.6630653295367286e-05, "loss": 0.3618, "step": 411 }, { "epoch": 1.0, "learning_rate": 4.6610616374868066e-05, "loss": 0.2856, "step": 412 }, { "epoch": 1.01, "learning_rate": 4.659052438417326e-05, "loss": 0.2727, "step": 413 }, { "epoch": 1.01, "learning_rate": 4.6570377374483154e-05, "loss": 0.2632, "step": 414 }, { "epoch": 1.01, "learning_rate": 4.6550175397138253e-05, "loss": 0.2758, "step": 415 }, { "epoch": 1.01, "learning_rate": 4.652991850361912e-05, "loss": 0.2561, "step": 416 }, { "epoch": 1.01, "learning_rate": 4.650960674554627e-05, "loss": 0.2807, "step": 417 }, { "epoch": 1.02, "learning_rate": 4.648924017468003e-05, "loss": 0.2686, "step": 418 }, { "epoch": 1.02, "learning_rate": 4.64688188429204e-05, "loss": 0.2584, "step": 419 }, { "epoch": 1.02, "learning_rate": 4.644834280230692e-05, "loss": 0.2368, "step": 420 }, { "epoch": 1.02, "learning_rate": 4.6427812105018576e-05, "loss": 0.2642, "step": 421 }, { "epoch": 1.03, "learning_rate": 4.6407226803373586e-05, "loss": 0.2476, "step": 422 }, { "epoch": 1.03, "learning_rate": 4.6386586949829356e-05, "loss": 0.249, "step": 423 }, { "epoch": 1.03, "learning_rate": 4.6365892596982297e-05, "loss": 0.2541, "step": 424 }, { "epoch": 1.03, "learning_rate": 4.634514379756769e-05, "loss": 0.2785, "step": 425 }, { "epoch": 1.04, "learning_rate": 4.632434060445956e-05, "loss": 0.2369, "step": 426 }, { "epoch": 1.04, "learning_rate": 4.630348307067057e-05, "loss": 0.27, "step": 427 }, { "epoch": 1.04, "learning_rate": 4.6282571249351826e-05, "loss": 0.2603, "step": 428 }, { "epoch": 1.04, "learning_rate": 4.626160519379279e-05, "loss": 0.2498, "step": 429 }, { "epoch": 1.05, "learning_rate": 4.624058495742114e-05, "loss": 0.2654, "step": 430 }, { "epoch": 1.05, "learning_rate": 4.621951059380258e-05, "loss": 0.2316, "step": 431 }, { "epoch": 1.05, "learning_rate": 4.619838215664082e-05, "loss": 0.2515, "step": 432 }, { "epoch": 1.05, "learning_rate": 4.6177199699777285e-05, "loss": 0.2387, "step": 433 }, { "epoch": 1.06, "learning_rate": 4.615596327719111e-05, "loss": 0.2628, "step": 434 }, { "epoch": 1.06, "learning_rate": 4.613467294299892e-05, "loss": 0.2586, "step": 435 }, { "epoch": 1.06, "learning_rate": 4.611332875145477e-05, "loss": 0.2698, "step": 436 }, { "epoch": 1.06, "learning_rate": 4.609193075694989e-05, "loss": 0.254, "step": 437 }, { "epoch": 1.07, "learning_rate": 4.607047901401267e-05, "loss": 0.2585, "step": 438 }, { "epoch": 1.07, "learning_rate": 4.604897357730845e-05, "loss": 0.2311, "step": 439 }, { "epoch": 1.07, "learning_rate": 4.60274145016394e-05, "loss": 0.2714, "step": 440 }, { "epoch": 1.07, "learning_rate": 4.600580184194436e-05, "loss": 0.2536, "step": 441 }, { "epoch": 1.08, "learning_rate": 4.598413565329875e-05, "loss": 0.2485, "step": 442 }, { "epoch": 1.08, "learning_rate": 4.5962415990914375e-05, "loss": 0.2466, "step": 443 }, { "epoch": 1.08, "learning_rate": 4.59406429101393e-05, "loss": 0.2465, "step": 444 }, { "epoch": 1.08, "learning_rate": 4.5918816466457746e-05, "loss": 0.2478, "step": 445 }, { "epoch": 1.09, "learning_rate": 4.5896936715489885e-05, "loss": 0.2733, "step": 446 }, { "epoch": 1.09, "learning_rate": 4.587500371299176e-05, "loss": 0.2444, "step": 447 }, { "epoch": 1.09, "learning_rate": 4.585301751485508e-05, "loss": 0.2629, "step": 448 }, { "epoch": 1.09, "learning_rate": 4.583097817710716e-05, "loss": 0.2702, "step": 449 }, { "epoch": 1.1, "learning_rate": 4.580888575591068e-05, "loss": 0.2694, "step": 450 }, { "epoch": 1.1, "learning_rate": 4.5786740307563636e-05, "loss": 0.2578, "step": 451 }, { "epoch": 1.1, "learning_rate": 4.576454188849911e-05, "loss": 0.2516, "step": 452 }, { "epoch": 1.1, "learning_rate": 4.574229055528522e-05, "loss": 0.2685, "step": 453 }, { "epoch": 1.1, "learning_rate": 4.5719986364624866e-05, "loss": 0.2617, "step": 454 }, { "epoch": 1.11, "learning_rate": 4.569762937335569e-05, "loss": 0.2532, "step": 455 }, { "epoch": 1.11, "learning_rate": 4.5675219638449876e-05, "loss": 0.2885, "step": 456 }, { "epoch": 1.11, "learning_rate": 4.5652757217013995e-05, "loss": 0.2597, "step": 457 }, { "epoch": 1.11, "learning_rate": 4.5630242166288895e-05, "loss": 0.266, "step": 458 }, { "epoch": 1.12, "learning_rate": 4.5607674543649546e-05, "loss": 0.254, "step": 459 }, { "epoch": 1.12, "learning_rate": 4.5585054406604864e-05, "loss": 0.2702, "step": 460 }, { "epoch": 1.12, "learning_rate": 4.556238181279761e-05, "loss": 0.2475, "step": 461 }, { "epoch": 1.12, "learning_rate": 4.5539656820004194e-05, "loss": 0.2458, "step": 462 }, { "epoch": 1.13, "learning_rate": 4.551687948613459e-05, "loss": 0.2492, "step": 463 }, { "epoch": 1.13, "learning_rate": 4.5494049869232125e-05, "loss": 0.269, "step": 464 }, { "epoch": 1.13, "learning_rate": 4.5471168027473356e-05, "loss": 0.2646, "step": 465 }, { "epoch": 1.13, "learning_rate": 4.5448234019167945e-05, "loss": 0.2459, "step": 466 }, { "epoch": 1.14, "learning_rate": 4.5425247902758474e-05, "loss": 0.2762, "step": 467 }, { "epoch": 1.14, "learning_rate": 4.540220973682032e-05, "loss": 0.2511, "step": 468 }, { "epoch": 1.14, "learning_rate": 4.537911958006149e-05, "loss": 0.252, "step": 469 }, { "epoch": 1.14, "learning_rate": 4.5355977491322485e-05, "loss": 0.2679, "step": 470 }, { "epoch": 1.15, "learning_rate": 4.5332783529576146e-05, "loss": 0.2551, "step": 471 }, { "epoch": 1.15, "learning_rate": 4.530953775392749e-05, "loss": 0.2731, "step": 472 }, { "epoch": 1.15, "learning_rate": 4.5286240223613584e-05, "loss": 0.2612, "step": 473 }, { "epoch": 1.15, "learning_rate": 4.526289099800337e-05, "loss": 0.2739, "step": 474 }, { "epoch": 1.16, "learning_rate": 4.523949013659753e-05, "loss": 0.2644, "step": 475 }, { "epoch": 1.16, "learning_rate": 4.521603769902835e-05, "loss": 0.2811, "step": 476 }, { "epoch": 1.16, "learning_rate": 4.519253374505949e-05, "loss": 0.2624, "step": 477 }, { "epoch": 1.16, "learning_rate": 4.5168978334585956e-05, "loss": 0.2552, "step": 478 }, { "epoch": 1.17, "learning_rate": 4.514537152763384e-05, "loss": 0.27, "step": 479 }, { "epoch": 1.17, "learning_rate": 4.5121713384360215e-05, "loss": 0.2652, "step": 480 }, { "epoch": 1.17, "learning_rate": 4.5098003965052984e-05, "loss": 0.2698, "step": 481 }, { "epoch": 1.17, "learning_rate": 4.507424333013069e-05, "loss": 0.2585, "step": 482 }, { "epoch": 1.18, "learning_rate": 4.505043154014243e-05, "loss": 0.2573, "step": 483 }, { "epoch": 1.18, "learning_rate": 4.502656865576762e-05, "loss": 0.2561, "step": 484 }, { "epoch": 1.18, "learning_rate": 4.5002654737815905e-05, "loss": 0.2629, "step": 485 }, { "epoch": 1.18, "learning_rate": 4.497868984722697e-05, "loss": 0.2696, "step": 486 }, { "epoch": 1.19, "learning_rate": 4.4954674045070387e-05, "loss": 0.2727, "step": 487 }, { "epoch": 1.19, "learning_rate": 4.493060739254548e-05, "loss": 0.2718, "step": 488 }, { "epoch": 1.19, "learning_rate": 4.4906489950981126e-05, "loss": 0.2537, "step": 489 }, { "epoch": 1.19, "learning_rate": 4.488232178183567e-05, "loss": 0.2565, "step": 490 }, { "epoch": 1.2, "learning_rate": 4.4858102946696676e-05, "loss": 0.2554, "step": 491 }, { "epoch": 1.2, "learning_rate": 4.4833833507280884e-05, "loss": 0.2904, "step": 492 }, { "epoch": 1.2, "learning_rate": 4.4809513525433925e-05, "loss": 0.262, "step": 493 }, { "epoch": 1.2, "learning_rate": 4.478514306313025e-05, "loss": 0.2537, "step": 494 }, { "epoch": 1.2, "learning_rate": 4.476072218247297e-05, "loss": 0.2583, "step": 495 }, { "epoch": 1.21, "learning_rate": 4.4736250945693655e-05, "loss": 0.2712, "step": 496 }, { "epoch": 1.21, "learning_rate": 4.471172941515219e-05, "loss": 0.257, "step": 497 }, { "epoch": 1.21, "learning_rate": 4.468715765333664e-05, "loss": 0.2617, "step": 498 }, { "epoch": 1.21, "learning_rate": 4.466253572286308e-05, "loss": 0.2528, "step": 499 }, { "epoch": 1.22, "learning_rate": 4.46378636864754e-05, "loss": 0.2711, "step": 500 }, { "epoch": 1.22, "learning_rate": 4.46131416070452e-05, "loss": 0.2568, "step": 501 }, { "epoch": 1.22, "learning_rate": 4.458836954757161e-05, "loss": 0.2702, "step": 502 }, { "epoch": 1.22, "learning_rate": 4.4563547571181086e-05, "loss": 0.2596, "step": 503 }, { "epoch": 1.23, "learning_rate": 4.4538675741127326e-05, "loss": 0.2478, "step": 504 }, { "epoch": 1.23, "learning_rate": 4.451375412079106e-05, "loss": 0.2438, "step": 505 }, { "epoch": 1.23, "learning_rate": 4.4488782773679885e-05, "loss": 0.2797, "step": 506 }, { "epoch": 1.23, "learning_rate": 4.4463761763428125e-05, "loss": 0.2355, "step": 507 }, { "epoch": 1.24, "learning_rate": 4.443869115379667e-05, "loss": 0.2718, "step": 508 }, { "epoch": 1.24, "learning_rate": 4.441357100867278e-05, "loss": 0.2654, "step": 509 }, { "epoch": 1.24, "learning_rate": 4.4388401392069975e-05, "loss": 0.2776, "step": 510 }, { "epoch": 1.24, "learning_rate": 4.4363182368127824e-05, "loss": 0.2631, "step": 511 }, { "epoch": 1.25, "learning_rate": 4.433791400111179e-05, "loss": 0.2599, "step": 512 }, { "epoch": 1.25, "learning_rate": 4.4312596355413116e-05, "loss": 0.2629, "step": 513 }, { "epoch": 1.25, "learning_rate": 4.428722949554857e-05, "loss": 0.25, "step": 514 }, { "epoch": 1.25, "learning_rate": 4.426181348616039e-05, "loss": 0.2557, "step": 515 }, { "epoch": 1.26, "learning_rate": 4.4236348392016e-05, "loss": 0.2793, "step": 516 }, { "epoch": 1.26, "learning_rate": 4.421083427800795e-05, "loss": 0.2641, "step": 517 }, { "epoch": 1.26, "learning_rate": 4.41852712091537e-05, "loss": 0.2696, "step": 518 }, { "epoch": 1.26, "learning_rate": 4.415965925059544e-05, "loss": 0.2637, "step": 519 }, { "epoch": 1.27, "learning_rate": 4.413399846759998e-05, "loss": 0.2772, "step": 520 }, { "epoch": 1.27, "learning_rate": 4.4108288925558505e-05, "loss": 0.2432, "step": 521 }, { "epoch": 1.27, "learning_rate": 4.40825306899865e-05, "loss": 0.2657, "step": 522 }, { "epoch": 1.27, "learning_rate": 4.405672382652349e-05, "loss": 0.2635, "step": 523 }, { "epoch": 1.28, "learning_rate": 4.403086840093297e-05, "loss": 0.2551, "step": 524 }, { "epoch": 1.28, "learning_rate": 4.400496447910212e-05, "loss": 0.2555, "step": 525 }, { "epoch": 1.28, "learning_rate": 4.397901212704176e-05, "loss": 0.2785, "step": 526 }, { "epoch": 1.28, "learning_rate": 4.395301141088611e-05, "loss": 0.2866, "step": 527 }, { "epoch": 1.29, "learning_rate": 4.3926962396892606e-05, "loss": 0.256, "step": 528 }, { "epoch": 1.29, "learning_rate": 4.3900865151441796e-05, "loss": 0.2585, "step": 529 }, { "epoch": 1.29, "learning_rate": 4.387471974103713e-05, "loss": 0.265, "step": 530 }, { "epoch": 1.29, "learning_rate": 4.384852623230478e-05, "loss": 0.2445, "step": 531 }, { "epoch": 1.29, "learning_rate": 4.38222846919935e-05, "loss": 0.2608, "step": 532 }, { "epoch": 1.3, "learning_rate": 4.379599518697444e-05, "loss": 0.2823, "step": 533 }, { "epoch": 1.3, "learning_rate": 4.3769657784240976e-05, "loss": 0.2688, "step": 534 }, { "epoch": 1.3, "learning_rate": 4.3743272550908543e-05, "loss": 0.2572, "step": 535 }, { "epoch": 1.3, "learning_rate": 4.371683955421447e-05, "loss": 0.2635, "step": 536 }, { "epoch": 1.31, "learning_rate": 4.369035886151778e-05, "loss": 0.2713, "step": 537 }, { "epoch": 1.31, "learning_rate": 4.366383054029906e-05, "loss": 0.2651, "step": 538 }, { "epoch": 1.31, "learning_rate": 4.363725465816028e-05, "loss": 0.2487, "step": 539 }, { "epoch": 1.31, "learning_rate": 4.3610631282824556e-05, "loss": 0.2513, "step": 540 }, { "epoch": 1.32, "learning_rate": 4.3583960482136085e-05, "loss": 0.2683, "step": 541 }, { "epoch": 1.32, "learning_rate": 4.3557242324059896e-05, "loss": 0.2706, "step": 542 }, { "epoch": 1.32, "learning_rate": 4.3530476876681696e-05, "loss": 0.2741, "step": 543 }, { "epoch": 1.32, "learning_rate": 4.350366420820771e-05, "loss": 0.238, "step": 544 }, { "epoch": 1.33, "learning_rate": 4.347680438696449e-05, "loss": 0.2656, "step": 545 }, { "epoch": 1.33, "learning_rate": 4.344989748139873e-05, "loss": 0.2534, "step": 546 }, { "epoch": 1.33, "learning_rate": 4.342294356007715e-05, "loss": 0.2832, "step": 547 }, { "epoch": 1.33, "learning_rate": 4.339594269168624e-05, "loss": 0.259, "step": 548 }, { "epoch": 1.34, "learning_rate": 4.3368894945032146e-05, "loss": 0.2734, "step": 549 }, { "epoch": 1.34, "learning_rate": 4.334180038904046e-05, "loss": 0.2805, "step": 550 }, { "epoch": 1.34, "learning_rate": 4.331465909275608e-05, "loss": 0.2837, "step": 551 }, { "epoch": 1.34, "learning_rate": 4.3287471125342996e-05, "loss": 0.2763, "step": 552 }, { "epoch": 1.35, "learning_rate": 4.326023655608411e-05, "loss": 0.2678, "step": 553 }, { "epoch": 1.35, "learning_rate": 4.323295545438112e-05, "loss": 0.2583, "step": 554 }, { "epoch": 1.35, "learning_rate": 4.3205627889754286e-05, "loss": 0.2657, "step": 555 }, { "epoch": 1.35, "learning_rate": 4.317825393184226e-05, "loss": 0.2653, "step": 556 }, { "epoch": 1.36, "learning_rate": 4.315083365040192e-05, "loss": 0.2566, "step": 557 }, { "epoch": 1.36, "learning_rate": 4.31233671153082e-05, "loss": 0.2443, "step": 558 }, { "epoch": 1.36, "learning_rate": 4.309585439655389e-05, "loss": 0.262, "step": 559 }, { "epoch": 1.36, "learning_rate": 4.306829556424948e-05, "loss": 0.2639, "step": 560 }, { "epoch": 1.37, "learning_rate": 4.304069068862296e-05, "loss": 0.2558, "step": 561 }, { "epoch": 1.37, "learning_rate": 4.301303984001967e-05, "loss": 0.2791, "step": 562 }, { "epoch": 1.37, "learning_rate": 4.298534308890209e-05, "loss": 0.2564, "step": 563 }, { "epoch": 1.37, "learning_rate": 4.295760050584966e-05, "loss": 0.2749, "step": 564 }, { "epoch": 1.38, "learning_rate": 4.2929812161558636e-05, "loss": 0.2839, "step": 565 }, { "epoch": 1.38, "learning_rate": 4.290197812684188e-05, "loss": 0.2432, "step": 566 }, { "epoch": 1.38, "learning_rate": 4.2874098472628675e-05, "loss": 0.2716, "step": 567 }, { "epoch": 1.38, "learning_rate": 4.284617326996458e-05, "loss": 0.2628, "step": 568 }, { "epoch": 1.38, "learning_rate": 4.28182025900112e-05, "loss": 0.2797, "step": 569 }, { "epoch": 1.39, "learning_rate": 4.279018650404604e-05, "loss": 0.2704, "step": 570 }, { "epoch": 1.39, "learning_rate": 4.276212508346232e-05, "loss": 0.2701, "step": 571 }, { "epoch": 1.39, "learning_rate": 4.273401839976877e-05, "loss": 0.2713, "step": 572 }, { "epoch": 1.39, "learning_rate": 4.270586652458948e-05, "loss": 0.249, "step": 573 }, { "epoch": 1.4, "learning_rate": 4.267766952966369e-05, "loss": 0.2755, "step": 574 }, { "epoch": 1.4, "learning_rate": 4.264942748684563e-05, "loss": 0.2882, "step": 575 }, { "epoch": 1.4, "learning_rate": 4.2621140468104295e-05, "loss": 0.2686, "step": 576 }, { "epoch": 1.4, "learning_rate": 4.2592808545523335e-05, "loss": 0.2901, "step": 577 }, { "epoch": 1.41, "learning_rate": 4.256443179130081e-05, "loss": 0.251, "step": 578 }, { "epoch": 1.41, "learning_rate": 4.2536010277748996e-05, "loss": 0.2522, "step": 579 }, { "epoch": 1.41, "learning_rate": 4.250754407729428e-05, "loss": 0.2773, "step": 580 }, { "epoch": 1.41, "learning_rate": 4.2479033262476884e-05, "loss": 0.2574, "step": 581 }, { "epoch": 1.42, "learning_rate": 4.245047790595075e-05, "loss": 0.2835, "step": 582 }, { "epoch": 1.42, "learning_rate": 4.242187808048329e-05, "loss": 0.2646, "step": 583 }, { "epoch": 1.42, "learning_rate": 4.239323385895527e-05, "loss": 0.2786, "step": 584 }, { "epoch": 1.42, "learning_rate": 4.2364545314360585e-05, "loss": 0.282, "step": 585 }, { "epoch": 1.43, "learning_rate": 4.233581251980604e-05, "loss": 0.2655, "step": 586 }, { "epoch": 1.43, "learning_rate": 4.2307035548511265e-05, "loss": 0.259, "step": 587 }, { "epoch": 1.43, "learning_rate": 4.227821447380842e-05, "loss": 0.2476, "step": 588 }, { "epoch": 1.43, "learning_rate": 4.224934936914206e-05, "loss": 0.2628, "step": 589 }, { "epoch": 1.44, "learning_rate": 4.222044030806894e-05, "loss": 0.2629, "step": 590 }, { "epoch": 1.44, "learning_rate": 4.2191487364257854e-05, "loss": 0.2776, "step": 591 }, { "epoch": 1.44, "learning_rate": 4.216249061148939e-05, "loss": 0.2549, "step": 592 }, { "epoch": 1.44, "learning_rate": 4.21334501236558e-05, "loss": 0.2513, "step": 593 }, { "epoch": 1.45, "learning_rate": 4.210436597476076e-05, "loss": 0.2596, "step": 594 }, { "epoch": 1.45, "learning_rate": 4.207523823891923e-05, "loss": 0.2767, "step": 595 }, { "epoch": 1.45, "learning_rate": 4.2046066990357235e-05, "loss": 0.2735, "step": 596 }, { "epoch": 1.45, "learning_rate": 4.201685230341168e-05, "loss": 0.2487, "step": 597 }, { "epoch": 1.46, "learning_rate": 4.198759425253014e-05, "loss": 0.2558, "step": 598 }, { "epoch": 1.46, "learning_rate": 4.195829291227076e-05, "loss": 0.2773, "step": 599 }, { "epoch": 1.46, "learning_rate": 4.192894835730193e-05, "loss": 0.2716, "step": 600 }, { "epoch": 1.46, "learning_rate": 4.1899560662402206e-05, "loss": 0.2724, "step": 601 }, { "epoch": 1.47, "learning_rate": 4.1870129902460056e-05, "loss": 0.27, "step": 602 }, { "epoch": 1.47, "learning_rate": 4.18406561524737e-05, "loss": 0.2594, "step": 603 }, { "epoch": 1.47, "learning_rate": 4.18111394875509e-05, "loss": 0.2581, "step": 604 }, { "epoch": 1.47, "learning_rate": 4.178157998290879e-05, "loss": 0.265, "step": 605 }, { "epoch": 1.47, "learning_rate": 4.175197771387368e-05, "loss": 0.2653, "step": 606 }, { "epoch": 1.48, "learning_rate": 4.172233275588082e-05, "loss": 0.2808, "step": 607 }, { "epoch": 1.48, "learning_rate": 4.169264518447428e-05, "loss": 0.27, "step": 608 }, { "epoch": 1.48, "learning_rate": 4.16629150753067e-05, "loss": 0.2522, "step": 609 }, { "epoch": 1.48, "learning_rate": 4.163314250413913e-05, "loss": 0.253, "step": 610 }, { "epoch": 1.49, "learning_rate": 4.160332754684084e-05, "loss": 0.2572, "step": 611 }, { "epoch": 1.49, "learning_rate": 4.157347027938907e-05, "loss": 0.2799, "step": 612 }, { "epoch": 1.49, "learning_rate": 4.1543570777868924e-05, "loss": 0.2816, "step": 613 }, { "epoch": 1.49, "learning_rate": 4.151362911847309e-05, "loss": 0.2859, "step": 614 }, { "epoch": 1.5, "learning_rate": 4.148364537750172e-05, "loss": 0.2601, "step": 615 }, { "epoch": 1.5, "learning_rate": 4.1453619631362195e-05, "loss": 0.2725, "step": 616 }, { "epoch": 1.5, "learning_rate": 4.142355195656892e-05, "loss": 0.2669, "step": 617 }, { "epoch": 1.5, "learning_rate": 4.1393442429743166e-05, "loss": 0.2955, "step": 618 }, { "epoch": 1.51, "learning_rate": 4.1363291127612845e-05, "loss": 0.2655, "step": 619 }, { "epoch": 1.51, "learning_rate": 4.1333098127012326e-05, "loss": 0.2545, "step": 620 }, { "epoch": 1.51, "learning_rate": 4.130286350488224e-05, "loss": 0.2724, "step": 621 }, { "epoch": 1.51, "learning_rate": 4.127258733826929e-05, "loss": 0.2633, "step": 622 }, { "epoch": 1.52, "learning_rate": 4.124226970432602e-05, "loss": 0.2643, "step": 623 }, { "epoch": 1.52, "learning_rate": 4.121191068031067e-05, "loss": 0.2817, "step": 624 }, { "epoch": 1.52, "learning_rate": 4.118151034358696e-05, "loss": 0.263, "step": 625 }, { "epoch": 1.52, "learning_rate": 4.1151068771623866e-05, "loss": 0.2869, "step": 626 }, { "epoch": 1.53, "learning_rate": 4.112058604199544e-05, "loss": 0.2666, "step": 627 }, { "epoch": 1.53, "learning_rate": 4.109006223238064e-05, "loss": 0.2692, "step": 628 }, { "epoch": 1.53, "learning_rate": 4.1059497420563094e-05, "loss": 0.2615, "step": 629 }, { "epoch": 1.53, "learning_rate": 4.102889168443091e-05, "loss": 0.2571, "step": 630 }, { "epoch": 1.54, "learning_rate": 4.099824510197649e-05, "loss": 0.2914, "step": 631 }, { "epoch": 1.54, "learning_rate": 4.0967557751296336e-05, "loss": 0.2808, "step": 632 }, { "epoch": 1.54, "learning_rate": 4.093682971059081e-05, "loss": 0.2658, "step": 633 }, { "epoch": 1.54, "learning_rate": 4.0906061058163995e-05, "loss": 0.2727, "step": 634 }, { "epoch": 1.55, "learning_rate": 4.087525187242345e-05, "loss": 0.2541, "step": 635 }, { "epoch": 1.55, "learning_rate": 4.0844402231880016e-05, "loss": 0.2676, "step": 636 }, { "epoch": 1.55, "learning_rate": 4.0813512215147654e-05, "loss": 0.2555, "step": 637 }, { "epoch": 1.55, "learning_rate": 4.078258190094318e-05, "loss": 0.2597, "step": 638 }, { "epoch": 1.56, "learning_rate": 4.075161136808612e-05, "loss": 0.2589, "step": 639 }, { "epoch": 1.56, "learning_rate": 4.0720600695498486e-05, "loss": 0.2852, "step": 640 }, { "epoch": 1.56, "learning_rate": 4.068954996220457e-05, "loss": 0.2557, "step": 641 }, { "epoch": 1.56, "learning_rate": 4.0658459247330766e-05, "loss": 0.2697, "step": 642 }, { "epoch": 1.56, "learning_rate": 4.062732863010534e-05, "loss": 0.2678, "step": 643 }, { "epoch": 1.57, "learning_rate": 4.0596158189858255e-05, "loss": 0.2631, "step": 644 }, { "epoch": 1.57, "learning_rate": 4.0564948006020934e-05, "loss": 0.2559, "step": 645 }, { "epoch": 1.57, "learning_rate": 4.0533698158126085e-05, "loss": 0.2833, "step": 646 }, { "epoch": 1.57, "learning_rate": 4.050240872580749e-05, "loss": 0.2542, "step": 647 }, { "epoch": 1.58, "learning_rate": 4.047107978879985e-05, "loss": 0.28, "step": 648 }, { "epoch": 1.58, "learning_rate": 4.043971142693844e-05, "loss": 0.2607, "step": 649 }, { "epoch": 1.58, "learning_rate": 4.040830372015909e-05, "loss": 0.278, "step": 650 }, { "epoch": 1.58, "learning_rate": 4.037685674849786e-05, "loss": 0.2569, "step": 651 }, { "epoch": 1.59, "learning_rate": 4.034537059209085e-05, "loss": 0.2844, "step": 652 }, { "epoch": 1.59, "learning_rate": 4.0313845331174036e-05, "loss": 0.2639, "step": 653 }, { "epoch": 1.59, "learning_rate": 4.0282281046083045e-05, "loss": 0.2735, "step": 654 }, { "epoch": 1.59, "learning_rate": 4.025067781725294e-05, "loss": 0.2713, "step": 655 }, { "epoch": 1.6, "learning_rate": 4.021903572521802e-05, "loss": 0.2515, "step": 656 }, { "epoch": 1.6, "learning_rate": 4.0187354850611636e-05, "loss": 0.2651, "step": 657 }, { "epoch": 1.6, "learning_rate": 4.015563527416595e-05, "loss": 0.2788, "step": 658 }, { "epoch": 1.6, "learning_rate": 4.012387707671177e-05, "loss": 0.2753, "step": 659 }, { "epoch": 1.61, "learning_rate": 4.00920803391783e-05, "loss": 0.2589, "step": 660 }, { "epoch": 1.61, "learning_rate": 4.0060245142592944e-05, "loss": 0.2748, "step": 661 }, { "epoch": 1.61, "learning_rate": 4.002837156808116e-05, "loss": 0.2559, "step": 662 }, { "epoch": 1.61, "learning_rate": 3.999645969686616e-05, "loss": 0.2563, "step": 663 }, { "epoch": 1.62, "learning_rate": 3.996450961026876e-05, "loss": 0.251, "step": 664 }, { "epoch": 1.62, "learning_rate": 3.9932521389707155e-05, "loss": 0.2661, "step": 665 }, { "epoch": 1.62, "learning_rate": 3.990049511669675e-05, "loss": 0.2563, "step": 666 }, { "epoch": 1.62, "learning_rate": 3.986843087284986e-05, "loss": 0.2754, "step": 667 }, { "epoch": 1.63, "learning_rate": 3.9836328739875615e-05, "loss": 0.2591, "step": 668 }, { "epoch": 1.63, "learning_rate": 3.980418879957967e-05, "loss": 0.2764, "step": 669 }, { "epoch": 1.63, "learning_rate": 3.977201113386402e-05, "loss": 0.2801, "step": 670 }, { "epoch": 1.63, "learning_rate": 3.9739795824726804e-05, "loss": 0.2768, "step": 671 }, { "epoch": 1.64, "learning_rate": 3.9707542954262115e-05, "loss": 0.2933, "step": 672 }, { "epoch": 1.64, "learning_rate": 3.96752526046597e-05, "loss": 0.2757, "step": 673 }, { "epoch": 1.64, "learning_rate": 3.964292485820487e-05, "loss": 0.2557, "step": 674 }, { "epoch": 1.64, "learning_rate": 3.9610559797278216e-05, "loss": 0.2624, "step": 675 }, { "epoch": 1.65, "learning_rate": 3.957815750435542e-05, "loss": 0.2618, "step": 676 }, { "epoch": 1.65, "learning_rate": 3.954571806200702e-05, "loss": 0.2689, "step": 677 }, { "epoch": 1.65, "learning_rate": 3.951324155289825e-05, "loss": 0.2581, "step": 678 }, { "epoch": 1.65, "learning_rate": 3.9480728059788796e-05, "loss": 0.2589, "step": 679 }, { "epoch": 1.66, "learning_rate": 3.9448177665532574e-05, "loss": 0.2733, "step": 680 }, { "epoch": 1.66, "learning_rate": 3.941559045307755e-05, "loss": 0.2653, "step": 681 }, { "epoch": 1.66, "learning_rate": 3.938296650546552e-05, "loss": 0.2799, "step": 682 }, { "epoch": 1.66, "learning_rate": 3.935030590583186e-05, "loss": 0.2583, "step": 683 }, { "epoch": 1.66, "learning_rate": 3.931760873740539e-05, "loss": 0.271, "step": 684 }, { "epoch": 1.67, "learning_rate": 3.9284875083508076e-05, "loss": 0.2534, "step": 685 }, { "epoch": 1.67, "learning_rate": 3.9252105027554887e-05, "loss": 0.2576, "step": 686 }, { "epoch": 1.67, "learning_rate": 3.9219298653053546e-05, "loss": 0.2464, "step": 687 }, { "epoch": 1.67, "learning_rate": 3.918645604360433e-05, "loss": 0.2738, "step": 688 }, { "epoch": 1.68, "learning_rate": 3.915357728289985e-05, "loss": 0.2593, "step": 689 }, { "epoch": 1.68, "learning_rate": 3.9120662454724836e-05, "loss": 0.2795, "step": 690 }, { "epoch": 1.68, "learning_rate": 3.908771164295595e-05, "loss": 0.2759, "step": 691 }, { "epoch": 1.68, "learning_rate": 3.905472493156151e-05, "loss": 0.2606, "step": 692 }, { "epoch": 1.69, "learning_rate": 3.9021702404601366e-05, "loss": 0.2867, "step": 693 }, { "epoch": 1.69, "learning_rate": 3.8988644146226606e-05, "loss": 0.2693, "step": 694 }, { "epoch": 1.69, "learning_rate": 3.8955550240679364e-05, "loss": 0.2601, "step": 695 }, { "epoch": 1.69, "learning_rate": 3.8922420772292644e-05, "loss": 0.2574, "step": 696 }, { "epoch": 1.7, "learning_rate": 3.888925582549006e-05, "loss": 0.2737, "step": 697 }, { "epoch": 1.7, "learning_rate": 3.8856055484785625e-05, "loss": 0.2752, "step": 698 }, { "epoch": 1.7, "learning_rate": 3.882281983478355e-05, "loss": 0.2807, "step": 699 }, { "epoch": 1.7, "learning_rate": 3.878954896017804e-05, "loss": 0.2779, "step": 700 }, { "epoch": 1.71, "learning_rate": 3.875624294575305e-05, "loss": 0.2837, "step": 701 }, { "epoch": 1.71, "learning_rate": 3.872290187638208e-05, "loss": 0.268, "step": 702 }, { "epoch": 1.71, "learning_rate": 3.8689525837027975e-05, "loss": 0.2621, "step": 703 }, { "epoch": 1.71, "learning_rate": 3.865611491274267e-05, "loss": 0.2694, "step": 704 }, { "epoch": 1.72, "learning_rate": 3.8622669188667015e-05, "loss": 0.2759, "step": 705 }, { "epoch": 1.72, "learning_rate": 3.858918875003053e-05, "loss": 0.2643, "step": 706 }, { "epoch": 1.72, "learning_rate": 3.8555673682151215e-05, "loss": 0.2663, "step": 707 }, { "epoch": 1.72, "learning_rate": 3.852212407043528e-05, "loss": 0.2871, "step": 708 }, { "epoch": 1.73, "learning_rate": 3.8488540000377016e-05, "loss": 0.2718, "step": 709 }, { "epoch": 1.73, "learning_rate": 3.8454921557558476e-05, "loss": 0.2712, "step": 710 }, { "epoch": 1.73, "learning_rate": 3.842126882764933e-05, "loss": 0.2579, "step": 711 }, { "epoch": 1.73, "learning_rate": 3.8387581896406606e-05, "loss": 0.2695, "step": 712 }, { "epoch": 1.74, "learning_rate": 3.835386084967451e-05, "loss": 0.2619, "step": 713 }, { "epoch": 1.74, "learning_rate": 3.8320105773384144e-05, "loss": 0.2744, "step": 714 }, { "epoch": 1.74, "learning_rate": 3.828631675355338e-05, "loss": 0.2606, "step": 715 }, { "epoch": 1.74, "learning_rate": 3.8252493876286546e-05, "loss": 0.2703, "step": 716 }, { "epoch": 1.75, "learning_rate": 3.8218637227774276e-05, "loss": 0.2657, "step": 717 }, { "epoch": 1.75, "learning_rate": 3.818474689429323e-05, "loss": 0.2827, "step": 718 }, { "epoch": 1.75, "learning_rate": 3.8150822962205956e-05, "loss": 0.263, "step": 719 }, { "epoch": 1.75, "learning_rate": 3.8116865517960585e-05, "loss": 0.2702, "step": 720 }, { "epoch": 1.75, "learning_rate": 3.808287464809063e-05, "loss": 0.2659, "step": 721 }, { "epoch": 1.76, "learning_rate": 3.8048850439214844e-05, "loss": 0.2564, "step": 722 }, { "epoch": 1.76, "learning_rate": 3.801479297803687e-05, "loss": 0.2758, "step": 723 }, { "epoch": 1.76, "learning_rate": 3.7980702351345146e-05, "loss": 0.2742, "step": 724 }, { "epoch": 1.76, "learning_rate": 3.7946578646012574e-05, "loss": 0.2741, "step": 725 }, { "epoch": 1.77, "learning_rate": 3.791242194899639e-05, "loss": 0.2695, "step": 726 }, { "epoch": 1.77, "learning_rate": 3.7878232347337875e-05, "loss": 0.2749, "step": 727 }, { "epoch": 1.77, "learning_rate": 3.784400992816219e-05, "loss": 0.2679, "step": 728 }, { "epoch": 1.77, "learning_rate": 3.78097547786781e-05, "loss": 0.2617, "step": 729 }, { "epoch": 1.78, "learning_rate": 3.777546698617776e-05, "loss": 0.2756, "step": 730 }, { "epoch": 1.78, "learning_rate": 3.774114663803657e-05, "loss": 0.2704, "step": 731 }, { "epoch": 1.78, "learning_rate": 3.7706793821712826e-05, "loss": 0.2742, "step": 732 }, { "epoch": 1.78, "learning_rate": 3.76724086247476e-05, "loss": 0.2686, "step": 733 }, { "epoch": 1.79, "learning_rate": 3.763799113476447e-05, "loss": 0.2548, "step": 734 }, { "epoch": 1.79, "learning_rate": 3.7603541439469315e-05, "loss": 0.2788, "step": 735 }, { "epoch": 1.79, "learning_rate": 3.756905962665005e-05, "loss": 0.2525, "step": 736 }, { "epoch": 1.79, "learning_rate": 3.753454578417648e-05, "loss": 0.2758, "step": 737 }, { "epoch": 1.8, "learning_rate": 3.7500000000000003e-05, "loss": 0.2523, "step": 738 }, { "epoch": 1.8, "learning_rate": 3.746542236215341e-05, "loss": 0.2652, "step": 739 }, { "epoch": 1.8, "learning_rate": 3.743081295875069e-05, "loss": 0.2821, "step": 740 }, { "epoch": 1.8, "learning_rate": 3.7396171877986764e-05, "loss": 0.2833, "step": 741 }, { "epoch": 1.81, "learning_rate": 3.7361499208137254e-05, "loss": 0.2846, "step": 742 }, { "epoch": 1.81, "learning_rate": 3.732679503755833e-05, "loss": 0.2651, "step": 743 }, { "epoch": 1.81, "learning_rate": 3.72920594546864e-05, "loss": 0.2594, "step": 744 }, { "epoch": 1.81, "learning_rate": 3.725729254803791e-05, "loss": 0.2776, "step": 745 }, { "epoch": 1.82, "learning_rate": 3.722249440620917e-05, "loss": 0.2637, "step": 746 }, { "epoch": 1.82, "learning_rate": 3.718766511787606e-05, "loss": 0.2872, "step": 747 }, { "epoch": 1.82, "learning_rate": 3.715280477179382e-05, "loss": 0.2563, "step": 748 }, { "epoch": 1.82, "learning_rate": 3.7117913456796854e-05, "loss": 0.2727, "step": 749 }, { "epoch": 1.83, "learning_rate": 3.708299126179847e-05, "loss": 0.2601, "step": 750 }, { "epoch": 1.83, "learning_rate": 3.7048038275790694e-05, "loss": 0.2784, "step": 751 }, { "epoch": 1.83, "learning_rate": 3.701305458784397e-05, "loss": 0.2644, "step": 752 }, { "epoch": 1.83, "learning_rate": 3.697804028710703e-05, "loss": 0.2705, "step": 753 }, { "epoch": 1.84, "learning_rate": 3.694299546280657e-05, "loss": 0.2628, "step": 754 }, { "epoch": 1.84, "learning_rate": 3.690792020424712e-05, "loss": 0.2715, "step": 755 }, { "epoch": 1.84, "learning_rate": 3.687281460081071e-05, "loss": 0.2728, "step": 756 }, { "epoch": 1.84, "learning_rate": 3.683767874195674e-05, "loss": 0.2767, "step": 757 }, { "epoch": 1.84, "learning_rate": 3.680251271722169e-05, "loss": 0.2652, "step": 758 }, { "epoch": 1.85, "learning_rate": 3.676731661621893e-05, "loss": 0.2726, "step": 759 }, { "epoch": 1.85, "learning_rate": 3.673209052863843e-05, "loss": 0.2687, "step": 760 }, { "epoch": 1.85, "learning_rate": 3.6696834544246625e-05, "loss": 0.2561, "step": 761 }, { "epoch": 1.85, "learning_rate": 3.666154875288611e-05, "loss": 0.2781, "step": 762 }, { "epoch": 1.86, "learning_rate": 3.662623324447544e-05, "loss": 0.2867, "step": 763 }, { "epoch": 1.86, "learning_rate": 3.65908881090089e-05, "loss": 0.2711, "step": 764 }, { "epoch": 1.86, "learning_rate": 3.655551343655628e-05, "loss": 0.2668, "step": 765 }, { "epoch": 1.86, "learning_rate": 3.652010931726262e-05, "loss": 0.2522, "step": 766 }, { "epoch": 1.87, "learning_rate": 3.648467584134802e-05, "loss": 0.2658, "step": 767 }, { "epoch": 1.87, "learning_rate": 3.6449213099107373e-05, "loss": 0.2757, "step": 768 }, { "epoch": 1.87, "learning_rate": 3.641372118091017e-05, "loss": 0.2865, "step": 769 }, { "epoch": 1.87, "learning_rate": 3.6378200177200224e-05, "loss": 0.2468, "step": 770 }, { "epoch": 1.88, "learning_rate": 3.634265017849549e-05, "loss": 0.2828, "step": 771 }, { "epoch": 1.88, "learning_rate": 3.63070712753878e-05, "loss": 0.2562, "step": 772 }, { "epoch": 1.88, "learning_rate": 3.6271463558542645e-05, "loss": 0.2701, "step": 773 }, { "epoch": 1.88, "learning_rate": 3.623582711869895e-05, "loss": 0.2851, "step": 774 }, { "epoch": 1.89, "learning_rate": 3.620016204666882e-05, "loss": 0.2844, "step": 775 }, { "epoch": 1.89, "learning_rate": 3.616446843333733e-05, "loss": 0.284, "step": 776 }, { "epoch": 1.89, "learning_rate": 3.612874636966228e-05, "loss": 0.2673, "step": 777 }, { "epoch": 1.89, "learning_rate": 3.6092995946673994e-05, "loss": 0.2512, "step": 778 }, { "epoch": 1.9, "learning_rate": 3.6057217255475034e-05, "loss": 0.2598, "step": 779 }, { "epoch": 1.9, "learning_rate": 3.602141038724001e-05, "loss": 0.2664, "step": 780 }, { "epoch": 1.9, "learning_rate": 3.598557543321535e-05, "loss": 0.2745, "step": 781 }, { "epoch": 1.9, "learning_rate": 3.5949712484719014e-05, "loss": 0.2582, "step": 782 }, { "epoch": 1.91, "learning_rate": 3.5913821633140336e-05, "loss": 0.2668, "step": 783 }, { "epoch": 1.91, "learning_rate": 3.5877902969939755e-05, "loss": 0.2593, "step": 784 }, { "epoch": 1.91, "learning_rate": 3.584195658664855e-05, "loss": 0.2607, "step": 785 }, { "epoch": 1.91, "learning_rate": 3.580598257486867e-05, "loss": 0.2493, "step": 786 }, { "epoch": 1.92, "learning_rate": 3.5769981026272475e-05, "loss": 0.272, "step": 787 }, { "epoch": 1.92, "learning_rate": 3.573395203260245e-05, "loss": 0.2687, "step": 788 }, { "epoch": 1.92, "learning_rate": 3.569789568567107e-05, "loss": 0.2735, "step": 789 }, { "epoch": 1.92, "learning_rate": 3.56618120773605e-05, "loss": 0.254, "step": 790 }, { "epoch": 1.93, "learning_rate": 3.5625701299622336e-05, "loss": 0.2665, "step": 791 }, { "epoch": 1.93, "learning_rate": 3.558956344447748e-05, "loss": 0.2654, "step": 792 }, { "epoch": 1.93, "learning_rate": 3.555339860401578e-05, "loss": 0.2718, "step": 793 }, { "epoch": 1.93, "learning_rate": 3.551720687039585e-05, "loss": 0.2475, "step": 794 }, { "epoch": 1.93, "learning_rate": 3.5480988335844886e-05, "loss": 0.269, "step": 795 }, { "epoch": 1.94, "learning_rate": 3.544474309265834e-05, "loss": 0.2577, "step": 796 }, { "epoch": 1.94, "learning_rate": 3.5408471233199716e-05, "loss": 0.2848, "step": 797 }, { "epoch": 1.94, "learning_rate": 3.5372172849900374e-05, "loss": 0.2677, "step": 798 }, { "epoch": 1.94, "learning_rate": 3.533584803525926e-05, "loss": 0.2583, "step": 799 }, { "epoch": 1.95, "learning_rate": 3.529949688184265e-05, "loss": 0.2596, "step": 800 }, { "epoch": 1.95, "learning_rate": 3.526311948228397e-05, "loss": 0.2552, "step": 801 }, { "epoch": 1.95, "learning_rate": 3.5226715929283506e-05, "loss": 0.2709, "step": 802 }, { "epoch": 1.95, "learning_rate": 3.519028631560819e-05, "loss": 0.2602, "step": 803 }, { "epoch": 1.96, "learning_rate": 3.51538307340914e-05, "loss": 0.2537, "step": 804 }, { "epoch": 1.96, "learning_rate": 3.511734927763265e-05, "loss": 0.274, "step": 805 }, { "epoch": 1.96, "learning_rate": 3.508084203919739e-05, "loss": 0.2442, "step": 806 }, { "epoch": 1.96, "learning_rate": 3.5044309111816796e-05, "loss": 0.2676, "step": 807 }, { "epoch": 1.97, "learning_rate": 3.50077505885875e-05, "loss": 0.2875, "step": 808 }, { "epoch": 1.97, "learning_rate": 3.4971166562671324e-05, "loss": 0.2686, "step": 809 }, { "epoch": 1.97, "learning_rate": 3.493455712729514e-05, "loss": 0.2753, "step": 810 }, { "epoch": 1.97, "learning_rate": 3.4897922375750514e-05, "loss": 0.2703, "step": 811 }, { "epoch": 1.98, "learning_rate": 3.4861262401393566e-05, "loss": 0.2661, "step": 812 }, { "epoch": 1.98, "learning_rate": 3.482457729764466e-05, "loss": 0.2644, "step": 813 }, { "epoch": 1.98, "learning_rate": 3.478786715798823e-05, "loss": 0.3001, "step": 814 }, { "epoch": 1.98, "learning_rate": 3.475113207597247e-05, "loss": 0.269, "step": 815 }, { "epoch": 1.99, "learning_rate": 3.4714372145209166e-05, "loss": 0.2618, "step": 816 }, { "epoch": 1.99, "learning_rate": 3.467758745937342e-05, "loss": 0.2592, "step": 817 }, { "epoch": 1.99, "learning_rate": 3.46407781122034e-05, "loss": 0.2805, "step": 818 }, { "epoch": 1.99, "learning_rate": 3.460394419750013e-05, "loss": 0.2432, "step": 819 }, { "epoch": 2.0, "learning_rate": 3.456708580912725e-05, "loss": 0.2524, "step": 820 }, { "epoch": 2.0, "learning_rate": 3.4530203041010745e-05, "loss": 0.2529, "step": 821 }, { "epoch": 2.0, "eval_loss": 0.5900537967681885, "eval_runtime": 116.0624, "eval_samples_per_second": 6.565, "eval_steps_per_second": 0.414, "step": 821 }, { "epoch": 2.0, "learning_rate": 3.449329598713874e-05, "loss": 0.2271, "step": 822 }, { "epoch": 2.0, "learning_rate": 3.445636474156125e-05, "loss": 0.1519, "step": 823 }, { "epoch": 2.01, "learning_rate": 3.4419409398389935e-05, "loss": 0.1477, "step": 824 }, { "epoch": 2.01, "learning_rate": 3.438243005179784e-05, "loss": 0.1407, "step": 825 }, { "epoch": 2.01, "learning_rate": 3.434542679601922e-05, "loss": 0.1235, "step": 826 }, { "epoch": 2.01, "learning_rate": 3.4308399725349226e-05, "loss": 0.1323, "step": 827 }, { "epoch": 2.02, "learning_rate": 3.42713489341437e-05, "loss": 0.1445, "step": 828 }, { "epoch": 2.02, "learning_rate": 3.423427451681895e-05, "loss": 0.1257, "step": 829 }, { "epoch": 2.02, "learning_rate": 3.419717656785146e-05, "loss": 0.1249, "step": 830 }, { "epoch": 2.02, "learning_rate": 3.416005518177771e-05, "loss": 0.1279, "step": 831 }, { "epoch": 2.02, "learning_rate": 3.4122910453193885e-05, "loss": 0.1278, "step": 832 }, { "epoch": 2.03, "learning_rate": 3.408574247675566e-05, "loss": 0.1207, "step": 833 }, { "epoch": 2.03, "learning_rate": 3.4048551347177945e-05, "loss": 0.1184, "step": 834 }, { "epoch": 2.03, "learning_rate": 3.401133715923467e-05, "loss": 0.1308, "step": 835 }, { "epoch": 2.03, "learning_rate": 3.3974100007758514e-05, "loss": 0.1291, "step": 836 }, { "epoch": 2.04, "learning_rate": 3.3936839987640664e-05, "loss": 0.1129, "step": 837 }, { "epoch": 2.04, "learning_rate": 3.389955719383058e-05, "loss": 0.1264, "step": 838 }, { "epoch": 2.04, "learning_rate": 3.3862251721335794e-05, "loss": 0.1149, "step": 839 }, { "epoch": 2.04, "learning_rate": 3.382492366522158e-05, "loss": 0.1234, "step": 840 }, { "epoch": 2.05, "learning_rate": 3.378757312061079e-05, "loss": 0.1245, "step": 841 }, { "epoch": 2.05, "learning_rate": 3.375020018268359e-05, "loss": 0.1154, "step": 842 }, { "epoch": 2.05, "learning_rate": 3.371280494667719e-05, "loss": 0.1231, "step": 843 }, { "epoch": 2.05, "learning_rate": 3.367538750788563e-05, "loss": 0.1224, "step": 844 }, { "epoch": 2.06, "learning_rate": 3.363794796165953e-05, "loss": 0.1196, "step": 845 }, { "epoch": 2.06, "learning_rate": 3.360048640340585e-05, "loss": 0.1189, "step": 846 }, { "epoch": 2.06, "learning_rate": 3.3563002928587627e-05, "loss": 0.1173, "step": 847 }, { "epoch": 2.06, "learning_rate": 3.352549763272379e-05, "loss": 0.1248, "step": 848 }, { "epoch": 2.07, "learning_rate": 3.348797061138881e-05, "loss": 0.1255, "step": 849 }, { "epoch": 2.07, "learning_rate": 3.3450421960212566e-05, "loss": 0.1178, "step": 850 }, { "epoch": 2.07, "learning_rate": 3.3412851774880064e-05, "loss": 0.121, "step": 851 }, { "epoch": 2.07, "learning_rate": 3.337526015113115e-05, "loss": 0.1186, "step": 852 }, { "epoch": 2.08, "learning_rate": 3.3337647184760315e-05, "loss": 0.1191, "step": 853 }, { "epoch": 2.08, "learning_rate": 3.3300012971616467e-05, "loss": 0.1223, "step": 854 }, { "epoch": 2.08, "learning_rate": 3.3262357607602596e-05, "loss": 0.1104, "step": 855 }, { "epoch": 2.08, "learning_rate": 3.322468118867564e-05, "loss": 0.125, "step": 856 }, { "epoch": 2.09, "learning_rate": 3.318698381084619e-05, "loss": 0.1221, "step": 857 }, { "epoch": 2.09, "learning_rate": 3.314926557017821e-05, "loss": 0.1181, "step": 858 }, { "epoch": 2.09, "learning_rate": 3.3111526562788864e-05, "loss": 0.1197, "step": 859 }, { "epoch": 2.09, "learning_rate": 3.3073766884848234e-05, "loss": 0.1168, "step": 860 }, { "epoch": 2.1, "learning_rate": 3.303598663257904e-05, "loss": 0.1186, "step": 861 }, { "epoch": 2.1, "learning_rate": 3.299818590225647e-05, "loss": 0.1192, "step": 862 }, { "epoch": 2.1, "learning_rate": 3.29603647902079e-05, "loss": 0.1192, "step": 863 }, { "epoch": 2.1, "learning_rate": 3.2922523392812605e-05, "loss": 0.1285, "step": 864 }, { "epoch": 2.11, "learning_rate": 3.2884661806501574e-05, "loss": 0.1299, "step": 865 }, { "epoch": 2.11, "learning_rate": 3.284678012775727e-05, "loss": 0.1322, "step": 866 }, { "epoch": 2.11, "learning_rate": 3.280887845311332e-05, "loss": 0.1174, "step": 867 }, { "epoch": 2.11, "learning_rate": 3.27709568791543e-05, "loss": 0.1225, "step": 868 }, { "epoch": 2.11, "learning_rate": 3.273301550251555e-05, "loss": 0.1194, "step": 869 }, { "epoch": 2.12, "learning_rate": 3.269505441988281e-05, "loss": 0.1139, "step": 870 }, { "epoch": 2.12, "learning_rate": 3.265707372799208e-05, "loss": 0.1294, "step": 871 }, { "epoch": 2.12, "learning_rate": 3.2619073523629304e-05, "loss": 0.1244, "step": 872 }, { "epoch": 2.12, "learning_rate": 3.258105390363016e-05, "loss": 0.1284, "step": 873 }, { "epoch": 2.13, "learning_rate": 3.2543014964879816e-05, "loss": 0.1234, "step": 874 }, { "epoch": 2.13, "learning_rate": 3.250495680431264e-05, "loss": 0.1163, "step": 875 }, { "epoch": 2.13, "learning_rate": 3.246687951891201e-05, "loss": 0.1269, "step": 876 }, { "epoch": 2.13, "learning_rate": 3.2428783205710026e-05, "loss": 0.1174, "step": 877 }, { "epoch": 2.14, "learning_rate": 3.2390667961787275e-05, "loss": 0.1226, "step": 878 }, { "epoch": 2.14, "learning_rate": 3.23525338842726e-05, "loss": 0.1174, "step": 879 }, { "epoch": 2.14, "learning_rate": 3.231438107034281e-05, "loss": 0.1212, "step": 880 }, { "epoch": 2.14, "learning_rate": 3.22762096172225e-05, "loss": 0.1189, "step": 881 }, { "epoch": 2.15, "learning_rate": 3.223801962218372e-05, "loss": 0.1232, "step": 882 }, { "epoch": 2.15, "learning_rate": 3.21998111825458e-05, "loss": 0.1271, "step": 883 }, { "epoch": 2.15, "learning_rate": 3.216158439567506e-05, "loss": 0.1229, "step": 884 }, { "epoch": 2.15, "learning_rate": 3.2123339358984575e-05, "loss": 0.1144, "step": 885 }, { "epoch": 2.16, "learning_rate": 3.208507616993393e-05, "loss": 0.1251, "step": 886 }, { "epoch": 2.16, "learning_rate": 3.2046794926028964e-05, "loss": 0.1176, "step": 887 }, { "epoch": 2.16, "learning_rate": 3.200849572482153e-05, "loss": 0.1346, "step": 888 }, { "epoch": 2.16, "learning_rate": 3.1970178663909236e-05, "loss": 0.1159, "step": 889 }, { "epoch": 2.17, "learning_rate": 3.19318438409352e-05, "loss": 0.1366, "step": 890 }, { "epoch": 2.17, "learning_rate": 3.189349135358781e-05, "loss": 0.1249, "step": 891 }, { "epoch": 2.17, "learning_rate": 3.1855121299600456e-05, "loss": 0.1261, "step": 892 }, { "epoch": 2.17, "learning_rate": 3.181673377675131e-05, "loss": 0.1217, "step": 893 }, { "epoch": 2.18, "learning_rate": 3.1778328882863054e-05, "loss": 0.1191, "step": 894 }, { "epoch": 2.18, "learning_rate": 3.173990671580263e-05, "loss": 0.1155, "step": 895 }, { "epoch": 2.18, "learning_rate": 3.1701467373480995e-05, "loss": 0.1107, "step": 896 }, { "epoch": 2.18, "learning_rate": 3.166301095385288e-05, "loss": 0.1177, "step": 897 }, { "epoch": 2.19, "learning_rate": 3.162453755491655e-05, "loss": 0.1212, "step": 898 }, { "epoch": 2.19, "learning_rate": 3.1586047274713494e-05, "loss": 0.126, "step": 899 }, { "epoch": 2.19, "learning_rate": 3.154754021132827e-05, "loss": 0.1171, "step": 900 }, { "epoch": 2.19, "learning_rate": 3.1509016462888174e-05, "loss": 0.1225, "step": 901 }, { "epoch": 2.2, "learning_rate": 3.147047612756302e-05, "loss": 0.1315, "step": 902 }, { "epoch": 2.2, "learning_rate": 3.143191930356491e-05, "loss": 0.1207, "step": 903 }, { "epoch": 2.2, "learning_rate": 3.139334608914795e-05, "loss": 0.1307, "step": 904 }, { "epoch": 2.2, "learning_rate": 3.135475658260801e-05, "loss": 0.1163, "step": 905 }, { "epoch": 2.21, "learning_rate": 3.131615088228249e-05, "loss": 0.1162, "step": 906 }, { "epoch": 2.21, "learning_rate": 3.127752908655004e-05, "loss": 0.1234, "step": 907 }, { "epoch": 2.21, "learning_rate": 3.123889129383034e-05, "loss": 0.1168, "step": 908 }, { "epoch": 2.21, "learning_rate": 3.1200237602583834e-05, "loss": 0.1238, "step": 909 }, { "epoch": 2.21, "learning_rate": 3.116156811131148e-05, "loss": 0.1255, "step": 910 }, { "epoch": 2.22, "learning_rate": 3.112288291855449e-05, "loss": 0.124, "step": 911 }, { "epoch": 2.22, "learning_rate": 3.108418212289408e-05, "loss": 0.1267, "step": 912 }, { "epoch": 2.22, "learning_rate": 3.104546582295126e-05, "loss": 0.124, "step": 913 }, { "epoch": 2.22, "learning_rate": 3.100673411738652e-05, "loss": 0.1308, "step": 914 }, { "epoch": 2.23, "learning_rate": 3.096798710489962e-05, "loss": 0.1213, "step": 915 }, { "epoch": 2.23, "learning_rate": 3.092922488422933e-05, "loss": 0.1277, "step": 916 }, { "epoch": 2.23, "learning_rate": 3.089044755415315e-05, "loss": 0.1201, "step": 917 }, { "epoch": 2.23, "learning_rate": 3.0851655213487124e-05, "loss": 0.115, "step": 918 }, { "epoch": 2.24, "learning_rate": 3.0812847961085526e-05, "loss": 0.1257, "step": 919 }, { "epoch": 2.24, "learning_rate": 3.077402589584061e-05, "loss": 0.1203, "step": 920 }, { "epoch": 2.24, "learning_rate": 3.0735189116682414e-05, "loss": 0.1255, "step": 921 }, { "epoch": 2.24, "learning_rate": 3.0696337722578444e-05, "loss": 0.1215, "step": 922 }, { "epoch": 2.25, "learning_rate": 3.065747181253346e-05, "loss": 0.1275, "step": 923 }, { "epoch": 2.25, "learning_rate": 3.0618591485589224e-05, "loss": 0.1346, "step": 924 }, { "epoch": 2.25, "learning_rate": 3.0579696840824206e-05, "loss": 0.1285, "step": 925 }, { "epoch": 2.25, "learning_rate": 3.05407879773534e-05, "loss": 0.1261, "step": 926 }, { "epoch": 2.26, "learning_rate": 3.0501864994328e-05, "loss": 0.1192, "step": 927 }, { "epoch": 2.26, "learning_rate": 3.04629279909352e-05, "loss": 0.1186, "step": 928 }, { "epoch": 2.26, "learning_rate": 3.0423977066397912e-05, "loss": 0.1244, "step": 929 }, { "epoch": 2.26, "learning_rate": 3.0385012319974537e-05, "loss": 0.1248, "step": 930 }, { "epoch": 2.27, "learning_rate": 3.034603385095868e-05, "loss": 0.1155, "step": 931 }, { "epoch": 2.27, "learning_rate": 3.0307041758678932e-05, "loss": 0.1267, "step": 932 }, { "epoch": 2.27, "learning_rate": 3.0268036142498596e-05, "loss": 0.1219, "step": 933 }, { "epoch": 2.27, "learning_rate": 3.022901710181542e-05, "loss": 0.126, "step": 934 }, { "epoch": 2.28, "learning_rate": 3.018998473606139e-05, "loss": 0.1219, "step": 935 }, { "epoch": 2.28, "learning_rate": 3.0150939144702423e-05, "loss": 0.1208, "step": 936 }, { "epoch": 2.28, "learning_rate": 3.011188042723816e-05, "loss": 0.1234, "step": 937 }, { "epoch": 2.28, "learning_rate": 3.007280868320167e-05, "loss": 0.1252, "step": 938 }, { "epoch": 2.29, "learning_rate": 3.0033724012159242e-05, "loss": 0.1185, "step": 939 }, { "epoch": 2.29, "learning_rate": 2.9994626513710084e-05, "loss": 0.1194, "step": 940 }, { "epoch": 2.29, "learning_rate": 2.99555162874861e-05, "loss": 0.1238, "step": 941 }, { "epoch": 2.29, "learning_rate": 2.9916393433151634e-05, "loss": 0.1208, "step": 942 }, { "epoch": 2.3, "learning_rate": 2.9877258050403212e-05, "loss": 0.1218, "step": 943 }, { "epoch": 2.3, "learning_rate": 2.9838110238969264e-05, "loss": 0.1254, "step": 944 }, { "epoch": 2.3, "learning_rate": 2.9798950098609923e-05, "loss": 0.1208, "step": 945 }, { "epoch": 2.3, "learning_rate": 2.975977772911671e-05, "loss": 0.1211, "step": 946 }, { "epoch": 2.3, "learning_rate": 2.9720593230312337e-05, "loss": 0.1177, "step": 947 }, { "epoch": 2.31, "learning_rate": 2.9681396702050406e-05, "loss": 0.1187, "step": 948 }, { "epoch": 2.31, "learning_rate": 2.964218824421518e-05, "loss": 0.125, "step": 949 }, { "epoch": 2.31, "learning_rate": 2.9602967956721316e-05, "loss": 0.1174, "step": 950 }, { "epoch": 2.31, "learning_rate": 2.9563735939513636e-05, "loss": 0.1167, "step": 951 }, { "epoch": 2.32, "learning_rate": 2.9524492292566823e-05, "loss": 0.1175, "step": 952 }, { "epoch": 2.32, "learning_rate": 2.948523711588522e-05, "loss": 0.1295, "step": 953 }, { "epoch": 2.32, "learning_rate": 2.9445970509502546e-05, "loss": 0.1336, "step": 954 }, { "epoch": 2.32, "learning_rate": 2.940669257348163e-05, "loss": 0.1218, "step": 955 }, { "epoch": 2.33, "learning_rate": 2.9367403407914202e-05, "loss": 0.1205, "step": 956 }, { "epoch": 2.33, "learning_rate": 2.932810311292058e-05, "loss": 0.1311, "step": 957 }, { "epoch": 2.33, "learning_rate": 2.9288791788649462e-05, "loss": 0.1258, "step": 958 }, { "epoch": 2.33, "learning_rate": 2.9249469535277636e-05, "loss": 0.1255, "step": 959 }, { "epoch": 2.34, "learning_rate": 2.921013645300975e-05, "loss": 0.1263, "step": 960 }, { "epoch": 2.34, "learning_rate": 2.9170792642078055e-05, "loss": 0.1219, "step": 961 }, { "epoch": 2.34, "learning_rate": 2.9131438202742124e-05, "loss": 0.1224, "step": 962 }, { "epoch": 2.34, "learning_rate": 2.909207323528863e-05, "loss": 0.122, "step": 963 }, { "epoch": 2.35, "learning_rate": 2.9052697840031064e-05, "loss": 0.1199, "step": 964 }, { "epoch": 2.35, "learning_rate": 2.9013312117309488e-05, "loss": 0.1206, "step": 965 }, { "epoch": 2.35, "learning_rate": 2.8973916167490307e-05, "loss": 0.1163, "step": 966 }, { "epoch": 2.35, "learning_rate": 2.8934510090965944e-05, "loss": 0.1222, "step": 967 }, { "epoch": 2.36, "learning_rate": 2.889509398815467e-05, "loss": 0.114, "step": 968 }, { "epoch": 2.36, "learning_rate": 2.8855667959500276e-05, "loss": 0.1218, "step": 969 }, { "epoch": 2.36, "learning_rate": 2.8816232105471863e-05, "loss": 0.1257, "step": 970 }, { "epoch": 2.36, "learning_rate": 2.8776786526563575e-05, "loss": 0.1216, "step": 971 }, { "epoch": 2.37, "learning_rate": 2.8737331323294314e-05, "loss": 0.1285, "step": 972 }, { "epoch": 2.37, "learning_rate": 2.8697866596207524e-05, "loss": 0.1285, "step": 973 }, { "epoch": 2.37, "learning_rate": 2.8658392445870928e-05, "loss": 0.1286, "step": 974 }, { "epoch": 2.37, "learning_rate": 2.8618908972876246e-05, "loss": 0.1267, "step": 975 }, { "epoch": 2.38, "learning_rate": 2.857941627783895e-05, "loss": 0.1182, "step": 976 }, { "epoch": 2.38, "learning_rate": 2.8539914461398043e-05, "loss": 0.1193, "step": 977 }, { "epoch": 2.38, "learning_rate": 2.8500403624215734e-05, "loss": 0.1157, "step": 978 }, { "epoch": 2.38, "learning_rate": 2.846088386697723e-05, "loss": 0.1269, "step": 979 }, { "epoch": 2.39, "learning_rate": 2.8421355290390506e-05, "loss": 0.1251, "step": 980 }, { "epoch": 2.39, "learning_rate": 2.838181799518595e-05, "loss": 0.1176, "step": 981 }, { "epoch": 2.39, "learning_rate": 2.834227208211621e-05, "loss": 0.1238, "step": 982 }, { "epoch": 2.39, "learning_rate": 2.8302717651955895e-05, "loss": 0.1226, "step": 983 }, { "epoch": 2.39, "learning_rate": 2.8263154805501297e-05, "loss": 0.1294, "step": 984 }, { "epoch": 2.4, "learning_rate": 2.822358364357015e-05, "loss": 0.1201, "step": 985 }, { "epoch": 2.4, "learning_rate": 2.8184004267001425e-05, "loss": 0.1255, "step": 986 }, { "epoch": 2.4, "learning_rate": 2.8144416776654963e-05, "loss": 0.1228, "step": 987 }, { "epoch": 2.4, "learning_rate": 2.810482127341133e-05, "loss": 0.1222, "step": 988 }, { "epoch": 2.41, "learning_rate": 2.8065217858171495e-05, "loss": 0.118, "step": 989 }, { "epoch": 2.41, "learning_rate": 2.8025606631856578e-05, "loss": 0.1261, "step": 990 }, { "epoch": 2.41, "learning_rate": 2.7985987695407616e-05, "loss": 0.1333, "step": 991 }, { "epoch": 2.41, "learning_rate": 2.7946361149785306e-05, "loss": 0.1165, "step": 992 }, { "epoch": 2.42, "learning_rate": 2.79067270959697e-05, "loss": 0.1295, "step": 993 }, { "epoch": 2.42, "learning_rate": 2.7867085634960016e-05, "loss": 0.125, "step": 994 }, { "epoch": 2.42, "learning_rate": 2.782743686777433e-05, "loss": 0.1143, "step": 995 }, { "epoch": 2.42, "learning_rate": 2.778778089544935e-05, "loss": 0.1247, "step": 996 }, { "epoch": 2.43, "learning_rate": 2.7748117819040127e-05, "loss": 0.1219, "step": 997 }, { "epoch": 2.43, "learning_rate": 2.770844773961983e-05, "loss": 0.1319, "step": 998 }, { "epoch": 2.43, "learning_rate": 2.7668770758279473e-05, "loss": 0.1291, "step": 999 }, { "epoch": 2.43, "learning_rate": 2.762908697612765e-05, "loss": 0.118, "step": 1000 }, { "epoch": 2.44, "learning_rate": 2.7589396494290287e-05, "loss": 0.134, "step": 1001 }, { "epoch": 2.44, "learning_rate": 2.7549699413910384e-05, "loss": 0.1295, "step": 1002 }, { "epoch": 2.44, "learning_rate": 2.7509995836147766e-05, "loss": 0.1227, "step": 1003 }, { "epoch": 2.44, "learning_rate": 2.74702858621788e-05, "loss": 0.118, "step": 1004 }, { "epoch": 2.45, "learning_rate": 2.743056959319616e-05, "loss": 0.1226, "step": 1005 }, { "epoch": 2.45, "learning_rate": 2.739084713040856e-05, "loss": 0.1257, "step": 1006 }, { "epoch": 2.45, "learning_rate": 2.7351118575040496e-05, "loss": 0.1215, "step": 1007 }, { "epoch": 2.45, "learning_rate": 2.7311384028332e-05, "loss": 0.1232, "step": 1008 }, { "epoch": 2.46, "learning_rate": 2.7271643591538353e-05, "loss": 0.1208, "step": 1009 }, { "epoch": 2.46, "learning_rate": 2.723189736592986e-05, "loss": 0.1248, "step": 1010 }, { "epoch": 2.46, "learning_rate": 2.719214545279158e-05, "loss": 0.119, "step": 1011 }, { "epoch": 2.46, "learning_rate": 2.715238795342305e-05, "loss": 0.1213, "step": 1012 }, { "epoch": 2.47, "learning_rate": 2.711262496913805e-05, "loss": 0.122, "step": 1013 }, { "epoch": 2.47, "learning_rate": 2.7072856601264345e-05, "loss": 0.1218, "step": 1014 }, { "epoch": 2.47, "learning_rate": 2.7033082951143418e-05, "loss": 0.1178, "step": 1015 }, { "epoch": 2.47, "learning_rate": 2.6993304120130196e-05, "loss": 0.127, "step": 1016 }, { "epoch": 2.48, "learning_rate": 2.6953520209592824e-05, "loss": 0.1145, "step": 1017 }, { "epoch": 2.48, "learning_rate": 2.69137313209124e-05, "loss": 0.1256, "step": 1018 }, { "epoch": 2.48, "learning_rate": 2.6873937555482663e-05, "loss": 0.1305, "step": 1019 }, { "epoch": 2.48, "learning_rate": 2.6834139014709843e-05, "loss": 0.1268, "step": 1020 }, { "epoch": 2.48, "learning_rate": 2.6794335800012293e-05, "loss": 0.1235, "step": 1021 }, { "epoch": 2.49, "learning_rate": 2.6754528012820283e-05, "loss": 0.125, "step": 1022 }, { "epoch": 2.49, "learning_rate": 2.671471575457576e-05, "loss": 0.1309, "step": 1023 }, { "epoch": 2.49, "learning_rate": 2.6674899126732045e-05, "loss": 0.115, "step": 1024 }, { "epoch": 2.49, "learning_rate": 2.663507823075358e-05, "loss": 0.1269, "step": 1025 }, { "epoch": 2.5, "learning_rate": 2.659525316811571e-05, "loss": 0.1276, "step": 1026 }, { "epoch": 2.5, "learning_rate": 2.6555424040304398e-05, "loss": 0.1118, "step": 1027 }, { "epoch": 2.5, "learning_rate": 2.6515590948815933e-05, "loss": 0.1252, "step": 1028 }, { "epoch": 2.5, "learning_rate": 2.6475753995156743e-05, "loss": 0.1184, "step": 1029 }, { "epoch": 2.51, "learning_rate": 2.643591328084309e-05, "loss": 0.1217, "step": 1030 }, { "epoch": 2.51, "learning_rate": 2.6396068907400784e-05, "loss": 0.1271, "step": 1031 }, { "epoch": 2.51, "learning_rate": 2.635622097636501e-05, "loss": 0.1175, "step": 1032 }, { "epoch": 2.51, "learning_rate": 2.6316369589279998e-05, "loss": 0.1184, "step": 1033 }, { "epoch": 2.52, "learning_rate": 2.6276514847698762e-05, "loss": 0.1197, "step": 1034 }, { "epoch": 2.52, "learning_rate": 2.623665685318291e-05, "loss": 0.1269, "step": 1035 }, { "epoch": 2.52, "learning_rate": 2.6196795707302302e-05, "loss": 0.1257, "step": 1036 }, { "epoch": 2.52, "learning_rate": 2.6156931511634834e-05, "loss": 0.1276, "step": 1037 }, { "epoch": 2.53, "learning_rate": 2.6117064367766197e-05, "loss": 0.1322, "step": 1038 }, { "epoch": 2.53, "learning_rate": 2.607719437728957e-05, "loss": 0.1239, "step": 1039 }, { "epoch": 2.53, "learning_rate": 2.603732164180539e-05, "loss": 0.1169, "step": 1040 }, { "epoch": 2.53, "learning_rate": 2.5997446262921106e-05, "loss": 0.1144, "step": 1041 }, { "epoch": 2.54, "learning_rate": 2.595756834225089e-05, "loss": 0.1223, "step": 1042 }, { "epoch": 2.54, "learning_rate": 2.5917687981415373e-05, "loss": 0.1238, "step": 1043 }, { "epoch": 2.54, "learning_rate": 2.5877805282041455e-05, "loss": 0.1201, "step": 1044 }, { "epoch": 2.54, "learning_rate": 2.583792034576194e-05, "loss": 0.1327, "step": 1045 }, { "epoch": 2.55, "learning_rate": 2.579803327421536e-05, "loss": 0.1336, "step": 1046 }, { "epoch": 2.55, "learning_rate": 2.575814416904569e-05, "loss": 0.1177, "step": 1047 }, { "epoch": 2.55, "learning_rate": 2.571825313190208e-05, "loss": 0.1153, "step": 1048 }, { "epoch": 2.55, "learning_rate": 2.5678360264438606e-05, "loss": 0.1199, "step": 1049 }, { "epoch": 2.56, "learning_rate": 2.5638465668314006e-05, "loss": 0.1191, "step": 1050 }, { "epoch": 2.56, "learning_rate": 2.5598569445191418e-05, "loss": 0.1132, "step": 1051 }, { "epoch": 2.56, "learning_rate": 2.5558671696738146e-05, "loss": 0.1266, "step": 1052 }, { "epoch": 2.56, "learning_rate": 2.5518772524625357e-05, "loss": 0.1191, "step": 1053 }, { "epoch": 2.57, "learning_rate": 2.5478872030527855e-05, "loss": 0.1148, "step": 1054 }, { "epoch": 2.57, "learning_rate": 2.5438970316123822e-05, "loss": 0.1224, "step": 1055 }, { "epoch": 2.57, "learning_rate": 2.539906748309454e-05, "loss": 0.1136, "step": 1056 }, { "epoch": 2.57, "learning_rate": 2.535916363312414e-05, "loss": 0.1199, "step": 1057 }, { "epoch": 2.57, "learning_rate": 2.5319258867899348e-05, "loss": 0.1241, "step": 1058 }, { "epoch": 2.58, "learning_rate": 2.5279353289109227e-05, "loss": 0.1202, "step": 1059 }, { "epoch": 2.58, "learning_rate": 2.5239446998444898e-05, "loss": 0.1247, "step": 1060 }, { "epoch": 2.58, "learning_rate": 2.5199540097599318e-05, "loss": 0.1345, "step": 1061 }, { "epoch": 2.58, "learning_rate": 2.5159632688266982e-05, "loss": 0.1223, "step": 1062 }, { "epoch": 2.59, "learning_rate": 2.511972487214369e-05, "loss": 0.1141, "step": 1063 }, { "epoch": 2.59, "learning_rate": 2.5079816750926265e-05, "loss": 0.1257, "step": 1064 }, { "epoch": 2.59, "learning_rate": 2.5039908426312332e-05, "loss": 0.1235, "step": 1065 }, { "epoch": 2.59, "learning_rate": 2.5e-05, "loss": 0.1297, "step": 1066 }, { "epoch": 2.6, "learning_rate": 2.4960091573687677e-05, "loss": 0.1281, "step": 1067 }, { "epoch": 2.6, "learning_rate": 2.4920183249073744e-05, "loss": 0.1176, "step": 1068 }, { "epoch": 2.6, "learning_rate": 2.488027512785632e-05, "loss": 0.1204, "step": 1069 }, { "epoch": 2.6, "learning_rate": 2.4840367311733024e-05, "loss": 0.1318, "step": 1070 }, { "epoch": 2.61, "learning_rate": 2.4800459902400684e-05, "loss": 0.1293, "step": 1071 }, { "epoch": 2.61, "learning_rate": 2.4760553001555108e-05, "loss": 0.1154, "step": 1072 }, { "epoch": 2.61, "learning_rate": 2.472064671089078e-05, "loss": 0.1178, "step": 1073 }, { "epoch": 2.61, "learning_rate": 2.468074113210066e-05, "loss": 0.125, "step": 1074 }, { "epoch": 2.62, "learning_rate": 2.4640836366875873e-05, "loss": 0.1191, "step": 1075 }, { "epoch": 2.62, "learning_rate": 2.4600932516905466e-05, "loss": 0.1264, "step": 1076 }, { "epoch": 2.62, "learning_rate": 2.4561029683876184e-05, "loss": 0.1207, "step": 1077 }, { "epoch": 2.62, "learning_rate": 2.4521127969472148e-05, "loss": 0.1253, "step": 1078 }, { "epoch": 2.63, "learning_rate": 2.4481227475374652e-05, "loss": 0.1255, "step": 1079 }, { "epoch": 2.63, "learning_rate": 2.4441328303261867e-05, "loss": 0.1287, "step": 1080 }, { "epoch": 2.63, "learning_rate": 2.440143055480859e-05, "loss": 0.1176, "step": 1081 }, { "epoch": 2.63, "learning_rate": 2.4361534331686003e-05, "loss": 0.1223, "step": 1082 }, { "epoch": 2.64, "learning_rate": 2.4321639735561403e-05, "loss": 0.1321, "step": 1083 }, { "epoch": 2.64, "learning_rate": 2.4281746868097926e-05, "loss": 0.1268, "step": 1084 }, { "epoch": 2.64, "learning_rate": 2.4241855830954316e-05, "loss": 0.1229, "step": 1085 }, { "epoch": 2.64, "learning_rate": 2.420196672578465e-05, "loss": 0.118, "step": 1086 }, { "epoch": 2.65, "learning_rate": 2.4162079654238073e-05, "loss": 0.135, "step": 1087 }, { "epoch": 2.65, "learning_rate": 2.412219471795855e-05, "loss": 0.1135, "step": 1088 }, { "epoch": 2.65, "learning_rate": 2.4082312018584626e-05, "loss": 0.1158, "step": 1089 }, { "epoch": 2.65, "learning_rate": 2.4042431657749117e-05, "loss": 0.125, "step": 1090 }, { "epoch": 2.66, "learning_rate": 2.40025537370789e-05, "loss": 0.1245, "step": 1091 }, { "epoch": 2.66, "learning_rate": 2.3962678358194614e-05, "loss": 0.1259, "step": 1092 }, { "epoch": 2.66, "learning_rate": 2.3922805622710438e-05, "loss": 0.1157, "step": 1093 }, { "epoch": 2.66, "learning_rate": 2.3882935632233805e-05, "loss": 0.1228, "step": 1094 }, { "epoch": 2.67, "learning_rate": 2.3843068488365168e-05, "loss": 0.1255, "step": 1095 }, { "epoch": 2.67, "learning_rate": 2.3803204292697704e-05, "loss": 0.1207, "step": 1096 }, { "epoch": 2.67, "learning_rate": 2.3763343146817096e-05, "loss": 0.1201, "step": 1097 }, { "epoch": 2.67, "learning_rate": 2.372348515230124e-05, "loss": 0.1203, "step": 1098 }, { "epoch": 2.67, "learning_rate": 2.368363041072001e-05, "loss": 0.1234, "step": 1099 }, { "epoch": 2.68, "learning_rate": 2.364377902363499e-05, "loss": 0.1252, "step": 1100 }, { "epoch": 2.68, "learning_rate": 2.3603931092599215e-05, "loss": 0.1239, "step": 1101 }, { "epoch": 2.68, "learning_rate": 2.356408671915692e-05, "loss": 0.1148, "step": 1102 }, { "epoch": 2.68, "learning_rate": 2.3524246004843263e-05, "loss": 0.12, "step": 1103 }, { "epoch": 2.69, "learning_rate": 2.3484409051184076e-05, "loss": 0.12, "step": 1104 }, { "epoch": 2.69, "learning_rate": 2.3444575959695614e-05, "loss": 0.1235, "step": 1105 }, { "epoch": 2.69, "learning_rate": 2.340474683188429e-05, "loss": 0.121, "step": 1106 }, { "epoch": 2.69, "learning_rate": 2.3364921769246423e-05, "loss": 0.1218, "step": 1107 }, { "epoch": 2.7, "learning_rate": 2.332510087326796e-05, "loss": 0.1246, "step": 1108 }, { "epoch": 2.7, "learning_rate": 2.3285284245424244e-05, "loss": 0.1243, "step": 1109 }, { "epoch": 2.7, "learning_rate": 2.324547198717972e-05, "loss": 0.1206, "step": 1110 }, { "epoch": 2.7, "learning_rate": 2.3205664199987716e-05, "loss": 0.1172, "step": 1111 }, { "epoch": 2.71, "learning_rate": 2.316586098529017e-05, "loss": 0.111, "step": 1112 }, { "epoch": 2.71, "learning_rate": 2.3126062444517336e-05, "loss": 0.1272, "step": 1113 }, { "epoch": 2.71, "learning_rate": 2.3086268679087607e-05, "loss": 0.1196, "step": 1114 }, { "epoch": 2.71, "learning_rate": 2.3046479790407178e-05, "loss": 0.126, "step": 1115 }, { "epoch": 2.72, "learning_rate": 2.3006695879869807e-05, "loss": 0.1232, "step": 1116 }, { "epoch": 2.72, "learning_rate": 2.2966917048856588e-05, "loss": 0.115, "step": 1117 }, { "epoch": 2.72, "learning_rate": 2.292714339873566e-05, "loss": 0.1258, "step": 1118 }, { "epoch": 2.72, "learning_rate": 2.288737503086195e-05, "loss": 0.1246, "step": 1119 }, { "epoch": 2.73, "learning_rate": 2.284761204657696e-05, "loss": 0.1262, "step": 1120 }, { "epoch": 2.73, "learning_rate": 2.280785454720843e-05, "loss": 0.1126, "step": 1121 }, { "epoch": 2.73, "learning_rate": 2.2768102634070147e-05, "loss": 0.1244, "step": 1122 }, { "epoch": 2.73, "learning_rate": 2.2728356408461653e-05, "loss": 0.1271, "step": 1123 }, { "epoch": 2.74, "learning_rate": 2.268861597166801e-05, "loss": 0.1243, "step": 1124 }, { "epoch": 2.74, "learning_rate": 2.26488814249595e-05, "loss": 0.1275, "step": 1125 }, { "epoch": 2.74, "learning_rate": 2.2609152869591446e-05, "loss": 0.1219, "step": 1126 }, { "epoch": 2.74, "learning_rate": 2.2569430406803846e-05, "loss": 0.1455, "step": 1127 }, { "epoch": 2.75, "learning_rate": 2.2529714137821206e-05, "loss": 0.12, "step": 1128 }, { "epoch": 2.75, "learning_rate": 2.249000416385224e-05, "loss": 0.1142, "step": 1129 }, { "epoch": 2.75, "learning_rate": 2.2450300586089622e-05, "loss": 0.1285, "step": 1130 }, { "epoch": 2.75, "learning_rate": 2.2410603505709715e-05, "loss": 0.1238, "step": 1131 }, { "epoch": 2.76, "learning_rate": 2.2370913023872355e-05, "loss": 0.1238, "step": 1132 }, { "epoch": 2.76, "learning_rate": 2.233122924172053e-05, "loss": 0.1251, "step": 1133 }, { "epoch": 2.76, "learning_rate": 2.229155226038017e-05, "loss": 0.1346, "step": 1134 }, { "epoch": 2.76, "learning_rate": 2.2251882180959875e-05, "loss": 0.1255, "step": 1135 }, { "epoch": 2.76, "learning_rate": 2.2212219104550665e-05, "loss": 0.1289, "step": 1136 }, { "epoch": 2.77, "learning_rate": 2.217256313222567e-05, "loss": 0.1172, "step": 1137 }, { "epoch": 2.77, "learning_rate": 2.2132914365039993e-05, "loss": 0.1186, "step": 1138 }, { "epoch": 2.77, "learning_rate": 2.2093272904030307e-05, "loss": 0.1089, "step": 1139 }, { "epoch": 2.77, "learning_rate": 2.2053638850214704e-05, "loss": 0.1273, "step": 1140 }, { "epoch": 2.78, "learning_rate": 2.201401230459239e-05, "loss": 0.1231, "step": 1141 }, { "epoch": 2.78, "learning_rate": 2.197439336814343e-05, "loss": 0.1146, "step": 1142 }, { "epoch": 2.78, "learning_rate": 2.1934782141828504e-05, "loss": 0.1198, "step": 1143 }, { "epoch": 2.78, "learning_rate": 2.1895178726588674e-05, "loss": 0.1205, "step": 1144 }, { "epoch": 2.79, "learning_rate": 2.185558322334504e-05, "loss": 0.1223, "step": 1145 }, { "epoch": 2.79, "learning_rate": 2.1815995732998584e-05, "loss": 0.1212, "step": 1146 }, { "epoch": 2.79, "learning_rate": 2.1776416356429856e-05, "loss": 0.122, "step": 1147 }, { "epoch": 2.79, "learning_rate": 2.173684519449872e-05, "loss": 0.1261, "step": 1148 }, { "epoch": 2.8, "learning_rate": 2.169728234804411e-05, "loss": 0.1241, "step": 1149 }, { "epoch": 2.8, "learning_rate": 2.165772791788379e-05, "loss": 0.1185, "step": 1150 }, { "epoch": 2.8, "learning_rate": 2.1618182004814054e-05, "loss": 0.1188, "step": 1151 }, { "epoch": 2.8, "learning_rate": 2.1578644709609503e-05, "loss": 0.1063, "step": 1152 }, { "epoch": 2.81, "learning_rate": 2.1539116133022773e-05, "loss": 0.1121, "step": 1153 }, { "epoch": 2.81, "learning_rate": 2.1499596375784282e-05, "loss": 0.1195, "step": 1154 }, { "epoch": 2.81, "learning_rate": 2.146008553860197e-05, "loss": 0.1131, "step": 1155 }, { "epoch": 2.81, "learning_rate": 2.142058372216105e-05, "loss": 0.1156, "step": 1156 }, { "epoch": 2.82, "learning_rate": 2.138109102712376e-05, "loss": 0.1224, "step": 1157 }, { "epoch": 2.82, "learning_rate": 2.1341607554129074e-05, "loss": 0.1217, "step": 1158 }, { "epoch": 2.82, "learning_rate": 2.1302133403792482e-05, "loss": 0.1213, "step": 1159 }, { "epoch": 2.82, "learning_rate": 2.1262668676705695e-05, "loss": 0.1266, "step": 1160 }, { "epoch": 2.83, "learning_rate": 2.1223213473436438e-05, "loss": 0.1141, "step": 1161 }, { "epoch": 2.83, "learning_rate": 2.1183767894528136e-05, "loss": 0.1194, "step": 1162 }, { "epoch": 2.83, "learning_rate": 2.1144332040499726e-05, "loss": 0.1157, "step": 1163 }, { "epoch": 2.83, "learning_rate": 2.1104906011845334e-05, "loss": 0.1156, "step": 1164 }, { "epoch": 2.84, "learning_rate": 2.1065489909034065e-05, "loss": 0.1224, "step": 1165 }, { "epoch": 2.84, "learning_rate": 2.1026083832509702e-05, "loss": 0.1194, "step": 1166 }, { "epoch": 2.84, "learning_rate": 2.0986687882690515e-05, "loss": 0.11, "step": 1167 }, { "epoch": 2.84, "learning_rate": 2.094730215996894e-05, "loss": 0.115, "step": 1168 }, { "epoch": 2.85, "learning_rate": 2.090792676471137e-05, "loss": 0.1267, "step": 1169 }, { "epoch": 2.85, "learning_rate": 2.0868561797257878e-05, "loss": 0.131, "step": 1170 }, { "epoch": 2.85, "learning_rate": 2.082920735792195e-05, "loss": 0.123, "step": 1171 }, { "epoch": 2.85, "learning_rate": 2.0789863546990253e-05, "loss": 0.123, "step": 1172 }, { "epoch": 2.85, "learning_rate": 2.0750530464722373e-05, "loss": 0.126, "step": 1173 }, { "epoch": 2.86, "learning_rate": 2.071120821135054e-05, "loss": 0.1151, "step": 1174 }, { "epoch": 2.86, "learning_rate": 2.0671896887079418e-05, "loss": 0.1242, "step": 1175 }, { "epoch": 2.86, "learning_rate": 2.0632596592085804e-05, "loss": 0.1138, "step": 1176 }, { "epoch": 2.86, "learning_rate": 2.0593307426518373e-05, "loss": 0.1184, "step": 1177 }, { "epoch": 2.87, "learning_rate": 2.0554029490497463e-05, "loss": 0.1216, "step": 1178 }, { "epoch": 2.87, "learning_rate": 2.0514762884114784e-05, "loss": 0.1114, "step": 1179 }, { "epoch": 2.87, "learning_rate": 2.047550770743318e-05, "loss": 0.1239, "step": 1180 }, { "epoch": 2.87, "learning_rate": 2.0436264060486366e-05, "loss": 0.122, "step": 1181 }, { "epoch": 2.88, "learning_rate": 2.0397032043278687e-05, "loss": 0.1221, "step": 1182 }, { "epoch": 2.88, "learning_rate": 2.035781175578483e-05, "loss": 0.1218, "step": 1183 }, { "epoch": 2.88, "learning_rate": 2.03186032979496e-05, "loss": 0.1251, "step": 1184 }, { "epoch": 2.88, "learning_rate": 2.0279406769687666e-05, "loss": 0.1135, "step": 1185 }, { "epoch": 2.89, "learning_rate": 2.0240222270883288e-05, "loss": 0.1229, "step": 1186 }, { "epoch": 2.89, "learning_rate": 2.020104990139008e-05, "loss": 0.1183, "step": 1187 }, { "epoch": 2.89, "learning_rate": 2.016188976103074e-05, "loss": 0.1207, "step": 1188 }, { "epoch": 2.89, "learning_rate": 2.0122741949596797e-05, "loss": 0.1142, "step": 1189 }, { "epoch": 2.9, "learning_rate": 2.008360656684837e-05, "loss": 0.1243, "step": 1190 }, { "epoch": 2.9, "learning_rate": 2.0044483712513908e-05, "loss": 0.1127, "step": 1191 }, { "epoch": 2.9, "learning_rate": 2.000537348628993e-05, "loss": 0.113, "step": 1192 }, { "epoch": 2.9, "learning_rate": 1.9966275987840764e-05, "loss": 0.1221, "step": 1193 }, { "epoch": 2.91, "learning_rate": 1.9927191316798332e-05, "loss": 0.121, "step": 1194 }, { "epoch": 2.91, "learning_rate": 1.9888119572761845e-05, "loss": 0.1184, "step": 1195 }, { "epoch": 2.91, "learning_rate": 1.984906085529758e-05, "loss": 0.1143, "step": 1196 }, { "epoch": 2.91, "learning_rate": 1.9810015263938624e-05, "loss": 0.1155, "step": 1197 }, { "epoch": 2.92, "learning_rate": 1.977098289818459e-05, "loss": 0.1211, "step": 1198 }, { "epoch": 2.92, "learning_rate": 1.973196385750141e-05, "loss": 0.1397, "step": 1199 }, { "epoch": 2.92, "learning_rate": 1.969295824132107e-05, "loss": 0.1072, "step": 1200 }, { "epoch": 2.92, "learning_rate": 1.965396614904132e-05, "loss": 0.1223, "step": 1201 }, { "epoch": 2.93, "learning_rate": 1.961498768002547e-05, "loss": 0.1206, "step": 1202 }, { "epoch": 2.93, "learning_rate": 1.9576022933602097e-05, "loss": 0.1168, "step": 1203 }, { "epoch": 2.93, "learning_rate": 1.9537072009064814e-05, "loss": 0.116, "step": 1204 }, { "epoch": 2.93, "learning_rate": 1.949813500567201e-05, "loss": 0.1186, "step": 1205 }, { "epoch": 2.94, "learning_rate": 1.9459212022646606e-05, "loss": 0.1121, "step": 1206 }, { "epoch": 2.94, "learning_rate": 1.9420303159175796e-05, "loss": 0.1251, "step": 1207 }, { "epoch": 2.94, "learning_rate": 1.9381408514410782e-05, "loss": 0.1256, "step": 1208 }, { "epoch": 2.94, "learning_rate": 1.9342528187466548e-05, "loss": 0.1354, "step": 1209 }, { "epoch": 2.94, "learning_rate": 1.9303662277421568e-05, "loss": 0.1258, "step": 1210 }, { "epoch": 2.95, "learning_rate": 1.9264810883317592e-05, "loss": 0.1149, "step": 1211 }, { "epoch": 2.95, "learning_rate": 1.922597410415939e-05, "loss": 0.1202, "step": 1212 }, { "epoch": 2.95, "learning_rate": 1.918715203891448e-05, "loss": 0.1244, "step": 1213 }, { "epoch": 2.95, "learning_rate": 1.9148344786512878e-05, "loss": 0.1198, "step": 1214 }, { "epoch": 2.96, "learning_rate": 1.9109552445846854e-05, "loss": 0.1153, "step": 1215 }, { "epoch": 2.96, "learning_rate": 1.907077511577068e-05, "loss": 0.1194, "step": 1216 }, { "epoch": 2.96, "learning_rate": 1.9032012895100383e-05, "loss": 0.1181, "step": 1217 }, { "epoch": 2.96, "learning_rate": 1.8993265882613482e-05, "loss": 0.1173, "step": 1218 }, { "epoch": 2.97, "learning_rate": 1.8954534177048744e-05, "loss": 0.1196, "step": 1219 }, { "epoch": 2.97, "learning_rate": 1.8915817877105926e-05, "loss": 0.1218, "step": 1220 }, { "epoch": 2.97, "learning_rate": 1.8877117081445524e-05, "loss": 0.117, "step": 1221 }, { "epoch": 2.97, "learning_rate": 1.8838431888688527e-05, "loss": 0.1167, "step": 1222 }, { "epoch": 2.98, "learning_rate": 1.8799762397416158e-05, "loss": 0.1194, "step": 1223 }, { "epoch": 2.98, "learning_rate": 1.8761108706169655e-05, "loss": 0.1177, "step": 1224 }, { "epoch": 2.98, "learning_rate": 1.872247091344996e-05, "loss": 0.1223, "step": 1225 }, { "epoch": 2.98, "learning_rate": 1.8683849117717518e-05, "loss": 0.1231, "step": 1226 }, { "epoch": 2.99, "learning_rate": 1.8645243417391995e-05, "loss": 0.1212, "step": 1227 }, { "epoch": 2.99, "learning_rate": 1.8606653910852056e-05, "loss": 0.1163, "step": 1228 }, { "epoch": 2.99, "learning_rate": 1.856808069643509e-05, "loss": 0.1265, "step": 1229 }, { "epoch": 2.99, "learning_rate": 1.852952387243698e-05, "loss": 0.1148, "step": 1230 }, { "epoch": 3.0, "learning_rate": 1.849098353711183e-05, "loss": 0.12, "step": 1231 }, { "epoch": 3.0, "learning_rate": 1.8452459788671738e-05, "loss": 0.1195, "step": 1232 }, { "epoch": 3.0, "eval_loss": 0.7090210318565369, "eval_runtime": 116.3629, "eval_samples_per_second": 6.548, "eval_steps_per_second": 0.413, "step": 1232 }, { "epoch": 3.0, "learning_rate": 1.841395272528651e-05, "loss": 0.0877, "step": 1233 }, { "epoch": 3.0, "learning_rate": 1.8375462445083464e-05, "loss": 0.0432, "step": 1234 }, { "epoch": 3.01, "learning_rate": 1.8336989046147128e-05, "loss": 0.0427, "step": 1235 }, { "epoch": 3.01, "learning_rate": 1.8298532626519007e-05, "loss": 0.0441, "step": 1236 }, { "epoch": 3.01, "learning_rate": 1.826009328419737e-05, "loss": 0.0398, "step": 1237 }, { "epoch": 3.01, "learning_rate": 1.822167111713695e-05, "loss": 0.0429, "step": 1238 }, { "epoch": 3.02, "learning_rate": 1.818326622324869e-05, "loss": 0.0366, "step": 1239 }, { "epoch": 3.02, "learning_rate": 1.814487870039955e-05, "loss": 0.034, "step": 1240 }, { "epoch": 3.02, "learning_rate": 1.81065086464122e-05, "loss": 0.0367, "step": 1241 }, { "epoch": 3.02, "learning_rate": 1.80681561590648e-05, "loss": 0.0336, "step": 1242 }, { "epoch": 3.03, "learning_rate": 1.802982133609077e-05, "loss": 0.0367, "step": 1243 }, { "epoch": 3.03, "learning_rate": 1.7991504275178473e-05, "loss": 0.0373, "step": 1244 }, { "epoch": 3.03, "learning_rate": 1.7953205073971035e-05, "loss": 0.0351, "step": 1245 }, { "epoch": 3.03, "learning_rate": 1.7914923830066074e-05, "loss": 0.0341, "step": 1246 }, { "epoch": 3.03, "learning_rate": 1.7876660641015437e-05, "loss": 0.0392, "step": 1247 }, { "epoch": 3.04, "learning_rate": 1.7838415604324943e-05, "loss": 0.0373, "step": 1248 }, { "epoch": 3.04, "learning_rate": 1.7800188817454208e-05, "loss": 0.037, "step": 1249 }, { "epoch": 3.04, "learning_rate": 1.7761980377816287e-05, "loss": 0.0337, "step": 1250 }, { "epoch": 3.04, "learning_rate": 1.772379038277751e-05, "loss": 0.0368, "step": 1251 }, { "epoch": 3.05, "learning_rate": 1.7685618929657194e-05, "loss": 0.0413, "step": 1252 }, { "epoch": 3.05, "learning_rate": 1.764746611572742e-05, "loss": 0.0331, "step": 1253 }, { "epoch": 3.05, "learning_rate": 1.7609332038212728e-05, "loss": 0.0329, "step": 1254 }, { "epoch": 3.05, "learning_rate": 1.7571216794289984e-05, "loss": 0.0317, "step": 1255 }, { "epoch": 3.06, "learning_rate": 1.7533120481088e-05, "loss": 0.035, "step": 1256 }, { "epoch": 3.06, "learning_rate": 1.7495043195687368e-05, "loss": 0.0352, "step": 1257 }, { "epoch": 3.06, "learning_rate": 1.7456985035120193e-05, "loss": 0.0373, "step": 1258 }, { "epoch": 3.06, "learning_rate": 1.741894609636985e-05, "loss": 0.0337, "step": 1259 }, { "epoch": 3.07, "learning_rate": 1.7380926476370702e-05, "loss": 0.0368, "step": 1260 }, { "epoch": 3.07, "learning_rate": 1.734292627200793e-05, "loss": 0.0369, "step": 1261 }, { "epoch": 3.07, "learning_rate": 1.7304945580117193e-05, "loss": 0.0316, "step": 1262 }, { "epoch": 3.07, "learning_rate": 1.7266984497484458e-05, "loss": 0.0343, "step": 1263 }, { "epoch": 3.08, "learning_rate": 1.7229043120845708e-05, "loss": 0.035, "step": 1264 }, { "epoch": 3.08, "learning_rate": 1.7191121546886697e-05, "loss": 0.0375, "step": 1265 }, { "epoch": 3.08, "learning_rate": 1.7153219872242727e-05, "loss": 0.0331, "step": 1266 }, { "epoch": 3.08, "learning_rate": 1.711533819349842e-05, "loss": 0.0323, "step": 1267 }, { "epoch": 3.09, "learning_rate": 1.7077476607187397e-05, "loss": 0.0329, "step": 1268 }, { "epoch": 3.09, "learning_rate": 1.7039635209792105e-05, "loss": 0.0306, "step": 1269 }, { "epoch": 3.09, "learning_rate": 1.7001814097743528e-05, "loss": 0.0312, "step": 1270 }, { "epoch": 3.09, "learning_rate": 1.6964013367420966e-05, "loss": 0.0314, "step": 1271 }, { "epoch": 3.1, "learning_rate": 1.692623311515178e-05, "loss": 0.0333, "step": 1272 }, { "epoch": 3.1, "learning_rate": 1.6888473437211132e-05, "loss": 0.0365, "step": 1273 }, { "epoch": 3.1, "learning_rate": 1.685073442982179e-05, "loss": 0.03, "step": 1274 }, { "epoch": 3.1, "learning_rate": 1.6813016189153814e-05, "loss": 0.0329, "step": 1275 }, { "epoch": 3.11, "learning_rate": 1.6775318811324364e-05, "loss": 0.0369, "step": 1276 }, { "epoch": 3.11, "learning_rate": 1.6737642392397414e-05, "loss": 0.0331, "step": 1277 }, { "epoch": 3.11, "learning_rate": 1.6699987028383546e-05, "loss": 0.037, "step": 1278 }, { "epoch": 3.11, "learning_rate": 1.6662352815239678e-05, "loss": 0.0362, "step": 1279 }, { "epoch": 3.12, "learning_rate": 1.6624739848868854e-05, "loss": 0.0323, "step": 1280 }, { "epoch": 3.12, "learning_rate": 1.6587148225119935e-05, "loss": 0.0339, "step": 1281 }, { "epoch": 3.12, "learning_rate": 1.6549578039787436e-05, "loss": 0.0306, "step": 1282 }, { "epoch": 3.12, "learning_rate": 1.65120293886112e-05, "loss": 0.0311, "step": 1283 }, { "epoch": 3.13, "learning_rate": 1.6474502367276222e-05, "loss": 0.0361, "step": 1284 }, { "epoch": 3.13, "learning_rate": 1.643699707141237e-05, "loss": 0.0342, "step": 1285 }, { "epoch": 3.13, "learning_rate": 1.6399513596594158e-05, "loss": 0.0307, "step": 1286 }, { "epoch": 3.13, "learning_rate": 1.6362052038340475e-05, "loss": 0.0322, "step": 1287 }, { "epoch": 3.13, "learning_rate": 1.6324612492114378e-05, "loss": 0.0339, "step": 1288 }, { "epoch": 3.14, "learning_rate": 1.6287195053322816e-05, "loss": 0.0331, "step": 1289 }, { "epoch": 3.14, "learning_rate": 1.6249799817316415e-05, "loss": 0.0387, "step": 1290 }, { "epoch": 3.14, "learning_rate": 1.6212426879389205e-05, "loss": 0.035, "step": 1291 }, { "epoch": 3.14, "learning_rate": 1.617507633477842e-05, "loss": 0.0325, "step": 1292 }, { "epoch": 3.15, "learning_rate": 1.6137748278664215e-05, "loss": 0.0334, "step": 1293 }, { "epoch": 3.15, "learning_rate": 1.6100442806169422e-05, "loss": 0.0318, "step": 1294 }, { "epoch": 3.15, "learning_rate": 1.6063160012359345e-05, "loss": 0.0325, "step": 1295 }, { "epoch": 3.15, "learning_rate": 1.602589999224149e-05, "loss": 0.0322, "step": 1296 }, { "epoch": 3.16, "learning_rate": 1.598866284076532e-05, "loss": 0.0326, "step": 1297 }, { "epoch": 3.16, "learning_rate": 1.5951448652822047e-05, "loss": 0.0341, "step": 1298 }, { "epoch": 3.16, "learning_rate": 1.5914257523244347e-05, "loss": 0.0321, "step": 1299 }, { "epoch": 3.16, "learning_rate": 1.5877089546806125e-05, "loss": 0.0318, "step": 1300 }, { "epoch": 3.17, "learning_rate": 1.5839944818222295e-05, "loss": 0.0323, "step": 1301 }, { "epoch": 3.17, "learning_rate": 1.5802823432148546e-05, "loss": 0.0355, "step": 1302 }, { "epoch": 3.17, "learning_rate": 1.5765725483181053e-05, "loss": 0.0349, "step": 1303 }, { "epoch": 3.17, "learning_rate": 1.5728651065856297e-05, "loss": 0.0316, "step": 1304 }, { "epoch": 3.18, "learning_rate": 1.569160027465078e-05, "loss": 0.029, "step": 1305 }, { "epoch": 3.18, "learning_rate": 1.5654573203980784e-05, "loss": 0.0323, "step": 1306 }, { "epoch": 3.18, "learning_rate": 1.561756994820216e-05, "loss": 0.0314, "step": 1307 }, { "epoch": 3.18, "learning_rate": 1.5580590601610074e-05, "loss": 0.0322, "step": 1308 }, { "epoch": 3.19, "learning_rate": 1.5543635258438745e-05, "loss": 0.0319, "step": 1309 }, { "epoch": 3.19, "learning_rate": 1.5506704012861256e-05, "loss": 0.0331, "step": 1310 }, { "epoch": 3.19, "learning_rate": 1.546979695898926e-05, "loss": 0.0312, "step": 1311 }, { "epoch": 3.19, "learning_rate": 1.5432914190872757e-05, "loss": 0.0378, "step": 1312 }, { "epoch": 3.2, "learning_rate": 1.5396055802499875e-05, "loss": 0.0304, "step": 1313 }, { "epoch": 3.2, "learning_rate": 1.5359221887796616e-05, "loss": 0.0327, "step": 1314 }, { "epoch": 3.2, "learning_rate": 1.5322412540626592e-05, "loss": 0.0338, "step": 1315 }, { "epoch": 3.2, "learning_rate": 1.528562785479084e-05, "loss": 0.0297, "step": 1316 }, { "epoch": 3.21, "learning_rate": 1.5248867924027534e-05, "loss": 0.0319, "step": 1317 }, { "epoch": 3.21, "learning_rate": 1.5212132842011779e-05, "loss": 0.0336, "step": 1318 }, { "epoch": 3.21, "learning_rate": 1.5175422702355343e-05, "loss": 0.032, "step": 1319 }, { "epoch": 3.21, "learning_rate": 1.5138737598606448e-05, "loss": 0.0357, "step": 1320 }, { "epoch": 3.22, "learning_rate": 1.5102077624249497e-05, "loss": 0.0327, "step": 1321 }, { "epoch": 3.22, "learning_rate": 1.506544287270487e-05, "loss": 0.0356, "step": 1322 }, { "epoch": 3.22, "learning_rate": 1.5028833437328682e-05, "loss": 0.0289, "step": 1323 }, { "epoch": 3.22, "learning_rate": 1.4992249411412513e-05, "loss": 0.0334, "step": 1324 }, { "epoch": 3.22, "learning_rate": 1.4955690888183205e-05, "loss": 0.0309, "step": 1325 }, { "epoch": 3.23, "learning_rate": 1.4919157960802618e-05, "loss": 0.0363, "step": 1326 }, { "epoch": 3.23, "learning_rate": 1.4882650722367364e-05, "loss": 0.0311, "step": 1327 }, { "epoch": 3.23, "learning_rate": 1.4846169265908603e-05, "loss": 0.0286, "step": 1328 }, { "epoch": 3.23, "learning_rate": 1.4809713684391807e-05, "loss": 0.0344, "step": 1329 }, { "epoch": 3.24, "learning_rate": 1.4773284070716503e-05, "loss": 0.0339, "step": 1330 }, { "epoch": 3.24, "learning_rate": 1.4736880517716039e-05, "loss": 0.0309, "step": 1331 }, { "epoch": 3.24, "learning_rate": 1.470050311815736e-05, "loss": 0.0345, "step": 1332 }, { "epoch": 3.24, "learning_rate": 1.4664151964740752e-05, "loss": 0.0308, "step": 1333 }, { "epoch": 3.25, "learning_rate": 1.4627827150099627e-05, "loss": 0.0331, "step": 1334 }, { "epoch": 3.25, "learning_rate": 1.4591528766800283e-05, "loss": 0.0328, "step": 1335 }, { "epoch": 3.25, "learning_rate": 1.4555256907341667e-05, "loss": 0.0301, "step": 1336 }, { "epoch": 3.25, "learning_rate": 1.4519011664155118e-05, "loss": 0.0303, "step": 1337 }, { "epoch": 3.26, "learning_rate": 1.4482793129604148e-05, "loss": 0.0367, "step": 1338 }, { "epoch": 3.26, "learning_rate": 1.4446601395984233e-05, "loss": 0.0353, "step": 1339 }, { "epoch": 3.26, "learning_rate": 1.4410436555522522e-05, "loss": 0.0292, "step": 1340 }, { "epoch": 3.26, "learning_rate": 1.4374298700377665e-05, "loss": 0.0312, "step": 1341 }, { "epoch": 3.27, "learning_rate": 1.4338187922639507e-05, "loss": 0.0334, "step": 1342 }, { "epoch": 3.27, "learning_rate": 1.4302104314328935e-05, "loss": 0.0326, "step": 1343 }, { "epoch": 3.27, "learning_rate": 1.426604796739755e-05, "loss": 0.032, "step": 1344 }, { "epoch": 3.27, "learning_rate": 1.4230018973727535e-05, "loss": 0.0307, "step": 1345 }, { "epoch": 3.28, "learning_rate": 1.4194017425131323e-05, "loss": 0.03, "step": 1346 }, { "epoch": 3.28, "learning_rate": 1.4158043413351455e-05, "loss": 0.0322, "step": 1347 }, { "epoch": 3.28, "learning_rate": 1.4122097030060249e-05, "loss": 0.0335, "step": 1348 }, { "epoch": 3.28, "learning_rate": 1.408617836685967e-05, "loss": 0.0306, "step": 1349 }, { "epoch": 3.29, "learning_rate": 1.405028751528099e-05, "loss": 0.0299, "step": 1350 }, { "epoch": 3.29, "learning_rate": 1.4014424566784661e-05, "loss": 0.0294, "step": 1351 }, { "epoch": 3.29, "learning_rate": 1.397858961275999e-05, "loss": 0.0326, "step": 1352 }, { "epoch": 3.29, "learning_rate": 1.3942782744524973e-05, "loss": 0.0332, "step": 1353 }, { "epoch": 3.3, "learning_rate": 1.3907004053326006e-05, "loss": 0.0316, "step": 1354 }, { "epoch": 3.3, "learning_rate": 1.3871253630337722e-05, "loss": 0.0325, "step": 1355 }, { "epoch": 3.3, "learning_rate": 1.3835531566662673e-05, "loss": 0.0337, "step": 1356 }, { "epoch": 3.3, "learning_rate": 1.379983795333119e-05, "loss": 0.0306, "step": 1357 }, { "epoch": 3.31, "learning_rate": 1.3764172881301062e-05, "loss": 0.0366, "step": 1358 }, { "epoch": 3.31, "learning_rate": 1.3728536441457357e-05, "loss": 0.0309, "step": 1359 }, { "epoch": 3.31, "learning_rate": 1.3692928724612203e-05, "loss": 0.0266, "step": 1360 }, { "epoch": 3.31, "learning_rate": 1.3657349821504517e-05, "loss": 0.0304, "step": 1361 }, { "epoch": 3.31, "learning_rate": 1.3621799822799788e-05, "loss": 0.0308, "step": 1362 }, { "epoch": 3.32, "learning_rate": 1.3586278819089837e-05, "loss": 0.0344, "step": 1363 }, { "epoch": 3.32, "learning_rate": 1.3550786900892634e-05, "loss": 0.0316, "step": 1364 }, { "epoch": 3.32, "learning_rate": 1.3515324158651981e-05, "loss": 0.0356, "step": 1365 }, { "epoch": 3.32, "learning_rate": 1.3479890682737379e-05, "loss": 0.0307, "step": 1366 }, { "epoch": 3.33, "learning_rate": 1.3444486563443723e-05, "loss": 0.0291, "step": 1367 }, { "epoch": 3.33, "learning_rate": 1.3409111890991105e-05, "loss": 0.0316, "step": 1368 }, { "epoch": 3.33, "learning_rate": 1.3373766755524564e-05, "loss": 0.0318, "step": 1369 }, { "epoch": 3.33, "learning_rate": 1.3338451247113897e-05, "loss": 0.0308, "step": 1370 }, { "epoch": 3.34, "learning_rate": 1.330316545575338e-05, "loss": 0.0324, "step": 1371 }, { "epoch": 3.34, "learning_rate": 1.3267909471361572e-05, "loss": 0.0329, "step": 1372 }, { "epoch": 3.34, "learning_rate": 1.323268338378108e-05, "loss": 0.0317, "step": 1373 }, { "epoch": 3.34, "learning_rate": 1.3197487282778315e-05, "loss": 0.0336, "step": 1374 }, { "epoch": 3.35, "learning_rate": 1.3162321258043261e-05, "loss": 0.0318, "step": 1375 }, { "epoch": 3.35, "learning_rate": 1.3127185399189295e-05, "loss": 0.0363, "step": 1376 }, { "epoch": 3.35, "learning_rate": 1.3092079795752887e-05, "loss": 0.0336, "step": 1377 }, { "epoch": 3.35, "learning_rate": 1.3057004537193423e-05, "loss": 0.0299, "step": 1378 }, { "epoch": 3.36, "learning_rate": 1.3021959712892979e-05, "loss": 0.0321, "step": 1379 }, { "epoch": 3.36, "learning_rate": 1.2986945412156038e-05, "loss": 0.0309, "step": 1380 }, { "epoch": 3.36, "learning_rate": 1.2951961724209317e-05, "loss": 0.0324, "step": 1381 }, { "epoch": 3.36, "learning_rate": 1.2917008738201537e-05, "loss": 0.0296, "step": 1382 }, { "epoch": 3.37, "learning_rate": 1.2882086543203154e-05, "loss": 0.031, "step": 1383 }, { "epoch": 3.37, "learning_rate": 1.284719522820618e-05, "loss": 0.0281, "step": 1384 }, { "epoch": 3.37, "learning_rate": 1.2812334882123944e-05, "loss": 0.0302, "step": 1385 }, { "epoch": 3.37, "learning_rate": 1.2777505593790834e-05, "loss": 0.0317, "step": 1386 }, { "epoch": 3.38, "learning_rate": 1.2742707451962088e-05, "loss": 0.0308, "step": 1387 }, { "epoch": 3.38, "learning_rate": 1.2707940545313613e-05, "loss": 0.0312, "step": 1388 }, { "epoch": 3.38, "learning_rate": 1.2673204962441671e-05, "loss": 0.0294, "step": 1389 }, { "epoch": 3.38, "learning_rate": 1.263850079186274e-05, "loss": 0.0312, "step": 1390 }, { "epoch": 3.39, "learning_rate": 1.2603828122013246e-05, "loss": 0.0329, "step": 1391 }, { "epoch": 3.39, "learning_rate": 1.2569187041249315e-05, "loss": 0.0285, "step": 1392 }, { "epoch": 3.39, "learning_rate": 1.253457763784659e-05, "loss": 0.0304, "step": 1393 }, { "epoch": 3.39, "learning_rate": 1.2500000000000006e-05, "loss": 0.0295, "step": 1394 }, { "epoch": 3.4, "learning_rate": 1.246545421582353e-05, "loss": 0.0317, "step": 1395 }, { "epoch": 3.4, "learning_rate": 1.2430940373349945e-05, "loss": 0.0315, "step": 1396 }, { "epoch": 3.4, "learning_rate": 1.2396458560530694e-05, "loss": 0.0333, "step": 1397 }, { "epoch": 3.4, "learning_rate": 1.2362008865235536e-05, "loss": 0.0306, "step": 1398 }, { "epoch": 3.4, "learning_rate": 1.2327591375252403e-05, "loss": 0.0301, "step": 1399 }, { "epoch": 3.41, "learning_rate": 1.2293206178287184e-05, "loss": 0.0311, "step": 1400 }, { "epoch": 3.41, "learning_rate": 1.2258853361963448e-05, "loss": 0.0328, "step": 1401 }, { "epoch": 3.41, "learning_rate": 1.2224533013822238e-05, "loss": 0.0303, "step": 1402 }, { "epoch": 3.41, "learning_rate": 1.2190245221321912e-05, "loss": 0.0355, "step": 1403 }, { "epoch": 3.42, "learning_rate": 1.2155990071837817e-05, "loss": 0.0314, "step": 1404 }, { "epoch": 3.42, "learning_rate": 1.2121767652662123e-05, "loss": 0.0331, "step": 1405 }, { "epoch": 3.42, "learning_rate": 1.2087578051003617e-05, "loss": 0.0345, "step": 1406 }, { "epoch": 3.42, "learning_rate": 1.2053421353987437e-05, "loss": 0.0276, "step": 1407 }, { "epoch": 3.43, "learning_rate": 1.2019297648654857e-05, "loss": 0.0291, "step": 1408 }, { "epoch": 3.43, "learning_rate": 1.198520702196313e-05, "loss": 0.0306, "step": 1409 }, { "epoch": 3.43, "learning_rate": 1.1951149560785167e-05, "loss": 0.0319, "step": 1410 }, { "epoch": 3.43, "learning_rate": 1.191712535190937e-05, "loss": 0.0301, "step": 1411 }, { "epoch": 3.44, "learning_rate": 1.188313448203943e-05, "loss": 0.0246, "step": 1412 }, { "epoch": 3.44, "learning_rate": 1.1849177037794051e-05, "loss": 0.0307, "step": 1413 }, { "epoch": 3.44, "learning_rate": 1.181525310570677e-05, "loss": 0.0311, "step": 1414 }, { "epoch": 3.44, "learning_rate": 1.178136277222573e-05, "loss": 0.0298, "step": 1415 }, { "epoch": 3.45, "learning_rate": 1.1747506123713458e-05, "loss": 0.0332, "step": 1416 }, { "epoch": 3.45, "learning_rate": 1.1713683246446622e-05, "loss": 0.0277, "step": 1417 }, { "epoch": 3.45, "learning_rate": 1.1679894226615862e-05, "loss": 0.0343, "step": 1418 }, { "epoch": 3.45, "learning_rate": 1.1646139150325507e-05, "loss": 0.0324, "step": 1419 }, { "epoch": 3.46, "learning_rate": 1.16124181035934e-05, "loss": 0.0311, "step": 1420 }, { "epoch": 3.46, "learning_rate": 1.157873117235067e-05, "loss": 0.0322, "step": 1421 }, { "epoch": 3.46, "learning_rate": 1.1545078442441526e-05, "loss": 0.0311, "step": 1422 }, { "epoch": 3.46, "learning_rate": 1.1511459999622981e-05, "loss": 0.033, "step": 1423 }, { "epoch": 3.47, "learning_rate": 1.147787592956472e-05, "loss": 0.0335, "step": 1424 }, { "epoch": 3.47, "learning_rate": 1.14443263178488e-05, "loss": 0.0307, "step": 1425 }, { "epoch": 3.47, "learning_rate": 1.1410811249969475e-05, "loss": 0.0314, "step": 1426 }, { "epoch": 3.47, "learning_rate": 1.1377330811332988e-05, "loss": 0.0313, "step": 1427 }, { "epoch": 3.48, "learning_rate": 1.1343885087257337e-05, "loss": 0.03, "step": 1428 }, { "epoch": 3.48, "learning_rate": 1.1310474162972026e-05, "loss": 0.0284, "step": 1429 }, { "epoch": 3.48, "learning_rate": 1.1277098123617922e-05, "loss": 0.032, "step": 1430 }, { "epoch": 3.48, "learning_rate": 1.124375705424696e-05, "loss": 0.0383, "step": 1431 }, { "epoch": 3.49, "learning_rate": 1.1210451039821965e-05, "loss": 0.0352, "step": 1432 }, { "epoch": 3.49, "learning_rate": 1.117718016521645e-05, "loss": 0.0317, "step": 1433 }, { "epoch": 3.49, "learning_rate": 1.1143944515214386e-05, "loss": 0.0316, "step": 1434 }, { "epoch": 3.49, "learning_rate": 1.1110744174509952e-05, "loss": 0.0285, "step": 1435 }, { "epoch": 3.49, "learning_rate": 1.1077579227707357e-05, "loss": 0.0301, "step": 1436 }, { "epoch": 3.5, "learning_rate": 1.104444975932064e-05, "loss": 0.0307, "step": 1437 }, { "epoch": 3.5, "learning_rate": 1.10113558537734e-05, "loss": 0.0309, "step": 1438 }, { "epoch": 3.5, "learning_rate": 1.0978297595398632e-05, "loss": 0.0328, "step": 1439 }, { "epoch": 3.5, "learning_rate": 1.094527506843849e-05, "loss": 0.0277, "step": 1440 }, { "epoch": 3.51, "learning_rate": 1.0912288357044062e-05, "loss": 0.03, "step": 1441 }, { "epoch": 3.51, "learning_rate": 1.0879337545275165e-05, "loss": 0.0302, "step": 1442 }, { "epoch": 3.51, "learning_rate": 1.084642271710016e-05, "loss": 0.0309, "step": 1443 }, { "epoch": 3.51, "learning_rate": 1.0813543956395675e-05, "loss": 0.0292, "step": 1444 }, { "epoch": 3.52, "learning_rate": 1.0780701346946453e-05, "loss": 0.0297, "step": 1445 }, { "epoch": 3.52, "learning_rate": 1.074789497244512e-05, "loss": 0.0303, "step": 1446 }, { "epoch": 3.52, "learning_rate": 1.0715124916491937e-05, "loss": 0.0309, "step": 1447 }, { "epoch": 3.52, "learning_rate": 1.0682391262594618e-05, "loss": 0.0278, "step": 1448 }, { "epoch": 3.53, "learning_rate": 1.0649694094168147e-05, "loss": 0.0285, "step": 1449 }, { "epoch": 3.53, "learning_rate": 1.0617033494534486e-05, "loss": 0.0329, "step": 1450 }, { "epoch": 3.53, "learning_rate": 1.0584409546922445e-05, "loss": 0.0299, "step": 1451 }, { "epoch": 3.53, "learning_rate": 1.0551822334467429e-05, "loss": 0.0333, "step": 1452 }, { "epoch": 3.54, "learning_rate": 1.0519271940211215e-05, "loss": 0.0296, "step": 1453 }, { "epoch": 3.54, "learning_rate": 1.0486758447101751e-05, "loss": 0.0322, "step": 1454 }, { "epoch": 3.54, "learning_rate": 1.0454281937992989e-05, "loss": 0.031, "step": 1455 }, { "epoch": 3.54, "learning_rate": 1.0421842495644587e-05, "loss": 0.0293, "step": 1456 }, { "epoch": 3.55, "learning_rate": 1.0389440202721778e-05, "loss": 0.0341, "step": 1457 }, { "epoch": 3.55, "learning_rate": 1.035707514179513e-05, "loss": 0.0311, "step": 1458 }, { "epoch": 3.55, "learning_rate": 1.0324747395340309e-05, "loss": 0.0335, "step": 1459 }, { "epoch": 3.55, "learning_rate": 1.0292457045737895e-05, "loss": 0.0347, "step": 1460 }, { "epoch": 3.56, "learning_rate": 1.02602041752732e-05, "loss": 0.0323, "step": 1461 }, { "epoch": 3.56, "learning_rate": 1.0227988866135996e-05, "loss": 0.0282, "step": 1462 }, { "epoch": 3.56, "learning_rate": 1.0195811200420333e-05, "loss": 0.0285, "step": 1463 }, { "epoch": 3.56, "learning_rate": 1.0163671260124385e-05, "loss": 0.0313, "step": 1464 }, { "epoch": 3.57, "learning_rate": 1.0131569127150142e-05, "loss": 0.0255, "step": 1465 }, { "epoch": 3.57, "learning_rate": 1.0099504883303254e-05, "loss": 0.0303, "step": 1466 }, { "epoch": 3.57, "learning_rate": 1.0067478610292847e-05, "loss": 0.0282, "step": 1467 }, { "epoch": 3.57, "learning_rate": 1.0035490389731255e-05, "loss": 0.0268, "step": 1468 }, { "epoch": 3.58, "learning_rate": 1.0003540303133843e-05, "loss": 0.0289, "step": 1469 }, { "epoch": 3.58, "learning_rate": 9.971628431918845e-06, "loss": 0.0339, "step": 1470 }, { "epoch": 3.58, "learning_rate": 9.939754857407063e-06, "loss": 0.0288, "step": 1471 }, { "epoch": 3.58, "learning_rate": 9.90791966082171e-06, "loss": 0.0332, "step": 1472 }, { "epoch": 3.59, "learning_rate": 9.876122923288239e-06, "loss": 0.0286, "step": 1473 }, { "epoch": 3.59, "learning_rate": 9.844364725834057e-06, "loss": 0.0265, "step": 1474 }, { "epoch": 3.59, "learning_rate": 9.812645149388363e-06, "loss": 0.0325, "step": 1475 }, { "epoch": 3.59, "learning_rate": 9.780964274781984e-06, "loss": 0.0295, "step": 1476 }, { "epoch": 3.59, "learning_rate": 9.749322182747072e-06, "loss": 0.0308, "step": 1477 }, { "epoch": 3.6, "learning_rate": 9.71771895391696e-06, "loss": 0.0317, "step": 1478 }, { "epoch": 3.6, "learning_rate": 9.686154668825973e-06, "loss": 0.0275, "step": 1479 }, { "epoch": 3.6, "learning_rate": 9.654629407909163e-06, "loss": 0.0283, "step": 1480 }, { "epoch": 3.6, "learning_rate": 9.623143251502148e-06, "loss": 0.0287, "step": 1481 }, { "epoch": 3.61, "learning_rate": 9.591696279840906e-06, "loss": 0.0304, "step": 1482 }, { "epoch": 3.61, "learning_rate": 9.560288573061563e-06, "loss": 0.037, "step": 1483 }, { "epoch": 3.61, "learning_rate": 9.52892021120016e-06, "loss": 0.0308, "step": 1484 }, { "epoch": 3.61, "learning_rate": 9.497591274192508e-06, "loss": 0.0303, "step": 1485 }, { "epoch": 3.62, "learning_rate": 9.46630184187393e-06, "loss": 0.0295, "step": 1486 }, { "epoch": 3.62, "learning_rate": 9.435051993979077e-06, "loss": 0.0314, "step": 1487 }, { "epoch": 3.62, "learning_rate": 9.403841810141747e-06, "loss": 0.0323, "step": 1488 }, { "epoch": 3.62, "learning_rate": 9.372671369894661e-06, "loss": 0.0294, "step": 1489 }, { "epoch": 3.63, "learning_rate": 9.341540752669235e-06, "loss": 0.0316, "step": 1490 }, { "epoch": 3.63, "learning_rate": 9.310450037795435e-06, "loss": 0.0279, "step": 1491 }, { "epoch": 3.63, "learning_rate": 9.279399304501526e-06, "loss": 0.0302, "step": 1492 }, { "epoch": 3.63, "learning_rate": 9.248388631913887e-06, "loss": 0.0302, "step": 1493 }, { "epoch": 3.64, "learning_rate": 9.21741809905682e-06, "loss": 0.0298, "step": 1494 }, { "epoch": 3.64, "learning_rate": 9.186487784852349e-06, "loss": 0.029, "step": 1495 }, { "epoch": 3.64, "learning_rate": 9.155597768119978e-06, "loss": 0.0307, "step": 1496 }, { "epoch": 3.64, "learning_rate": 9.124748127576552e-06, "loss": 0.0299, "step": 1497 }, { "epoch": 3.65, "learning_rate": 9.09393894183601e-06, "loss": 0.0307, "step": 1498 }, { "epoch": 3.65, "learning_rate": 9.063170289409192e-06, "loss": 0.0293, "step": 1499 }, { "epoch": 3.65, "learning_rate": 9.032442248703666e-06, "loss": 0.0323, "step": 1500 }, { "epoch": 3.65, "learning_rate": 9.001754898023512e-06, "loss": 0.0289, "step": 1501 }, { "epoch": 3.66, "learning_rate": 8.971108315569094e-06, "loss": 0.0323, "step": 1502 }, { "epoch": 3.66, "learning_rate": 8.940502579436913e-06, "loss": 0.0276, "step": 1503 }, { "epoch": 3.66, "learning_rate": 8.90993776761937e-06, "loss": 0.0294, "step": 1504 }, { "epoch": 3.66, "learning_rate": 8.879413958004566e-06, "loss": 0.036, "step": 1505 }, { "epoch": 3.67, "learning_rate": 8.848931228376136e-06, "loss": 0.0305, "step": 1506 }, { "epoch": 3.67, "learning_rate": 8.818489656413043e-06, "loss": 0.0326, "step": 1507 }, { "epoch": 3.67, "learning_rate": 8.788089319689324e-06, "loss": 0.0323, "step": 1508 }, { "epoch": 3.67, "learning_rate": 8.757730295673985e-06, "loss": 0.0318, "step": 1509 }, { "epoch": 3.68, "learning_rate": 8.727412661730724e-06, "loss": 0.0285, "step": 1510 }, { "epoch": 3.68, "learning_rate": 8.697136495117763e-06, "loss": 0.0315, "step": 1511 }, { "epoch": 3.68, "learning_rate": 8.666901872987676e-06, "loss": 0.0302, "step": 1512 }, { "epoch": 3.68, "learning_rate": 8.63670887238716e-06, "loss": 0.0298, "step": 1513 }, { "epoch": 3.68, "learning_rate": 8.606557570256843e-06, "loss": 0.0332, "step": 1514 }, { "epoch": 3.69, "learning_rate": 8.576448043431082e-06, "loss": 0.0327, "step": 1515 }, { "epoch": 3.69, "learning_rate": 8.546380368637812e-06, "loss": 0.0314, "step": 1516 }, { "epoch": 3.69, "learning_rate": 8.51635462249828e-06, "loss": 0.0287, "step": 1517 }, { "epoch": 3.69, "learning_rate": 8.486370881526917e-06, "loss": 0.0282, "step": 1518 }, { "epoch": 3.7, "learning_rate": 8.456429222131082e-06, "loss": 0.0327, "step": 1519 }, { "epoch": 3.7, "learning_rate": 8.426529720610934e-06, "loss": 0.0285, "step": 1520 }, { "epoch": 3.7, "learning_rate": 8.396672453159163e-06, "loss": 0.0258, "step": 1521 }, { "epoch": 3.7, "learning_rate": 8.36685749586087e-06, "loss": 0.0292, "step": 1522 }, { "epoch": 3.71, "learning_rate": 8.337084924693303e-06, "loss": 0.0266, "step": 1523 }, { "epoch": 3.71, "learning_rate": 8.307354815525731e-06, "loss": 0.0299, "step": 1524 }, { "epoch": 3.71, "learning_rate": 8.277667244119187e-06, "loss": 0.0293, "step": 1525 }, { "epoch": 3.71, "learning_rate": 8.24802228612633e-06, "loss": 0.0298, "step": 1526 }, { "epoch": 3.72, "learning_rate": 8.218420017091208e-06, "loss": 0.0274, "step": 1527 }, { "epoch": 3.72, "learning_rate": 8.188860512449107e-06, "loss": 0.0281, "step": 1528 }, { "epoch": 3.72, "learning_rate": 8.159343847526308e-06, "loss": 0.026, "step": 1529 }, { "epoch": 3.72, "learning_rate": 8.129870097539951e-06, "loss": 0.0282, "step": 1530 }, { "epoch": 3.73, "learning_rate": 8.100439337597798e-06, "loss": 0.0328, "step": 1531 }, { "epoch": 3.73, "learning_rate": 8.071051642698074e-06, "loss": 0.0301, "step": 1532 }, { "epoch": 3.73, "learning_rate": 8.041707087729244e-06, "loss": 0.0283, "step": 1533 }, { "epoch": 3.73, "learning_rate": 8.012405747469862e-06, "loss": 0.032, "step": 1534 }, { "epoch": 3.74, "learning_rate": 7.983147696588339e-06, "loss": 0.0303, "step": 1535 }, { "epoch": 3.74, "learning_rate": 7.953933009642773e-06, "loss": 0.0317, "step": 1536 }, { "epoch": 3.74, "learning_rate": 7.924761761080768e-06, "loss": 0.029, "step": 1537 }, { "epoch": 3.74, "learning_rate": 7.895634025239243e-06, "loss": 0.034, "step": 1538 }, { "epoch": 3.75, "learning_rate": 7.866549876344201e-06, "loss": 0.0298, "step": 1539 }, { "epoch": 3.75, "learning_rate": 7.837509388510611e-06, "loss": 0.03, "step": 1540 }, { "epoch": 3.75, "learning_rate": 7.808512635742157e-06, "loss": 0.027, "step": 1541 }, { "epoch": 3.75, "learning_rate": 7.779559691931066e-06, "loss": 0.0287, "step": 1542 }, { "epoch": 3.76, "learning_rate": 7.750650630857947e-06, "loss": 0.0319, "step": 1543 }, { "epoch": 3.76, "learning_rate": 7.721785526191588e-06, "loss": 0.0297, "step": 1544 }, { "epoch": 3.76, "learning_rate": 7.692964451488734e-06, "loss": 0.0292, "step": 1545 }, { "epoch": 3.76, "learning_rate": 7.66418748019396e-06, "loss": 0.0264, "step": 1546 }, { "epoch": 3.77, "learning_rate": 7.63545468563943e-06, "loss": 0.0296, "step": 1547 }, { "epoch": 3.77, "learning_rate": 7.606766141044733e-06, "loss": 0.0279, "step": 1548 }, { "epoch": 3.77, "learning_rate": 7.578121919516712e-06, "loss": 0.0273, "step": 1549 }, { "epoch": 3.77, "learning_rate": 7.54952209404926e-06, "loss": 0.0302, "step": 1550 }, { "epoch": 3.77, "learning_rate": 7.520966737523116e-06, "loss": 0.024, "step": 1551 }, { "epoch": 3.78, "learning_rate": 7.4924559227057265e-06, "loss": 0.033, "step": 1552 }, { "epoch": 3.78, "learning_rate": 7.463989722251014e-06, "loss": 0.0283, "step": 1553 }, { "epoch": 3.78, "learning_rate": 7.435568208699203e-06, "loss": 0.0268, "step": 1554 }, { "epoch": 3.78, "learning_rate": 7.407191454476667e-06, "loss": 0.0264, "step": 1555 }, { "epoch": 3.79, "learning_rate": 7.37885953189571e-06, "loss": 0.0283, "step": 1556 }, { "epoch": 3.79, "learning_rate": 7.350572513154377e-06, "loss": 0.0288, "step": 1557 }, { "epoch": 3.79, "learning_rate": 7.3223304703363135e-06, "loss": 0.0291, "step": 1558 }, { "epoch": 3.79, "learning_rate": 7.294133475410528e-06, "loss": 0.0288, "step": 1559 }, { "epoch": 3.8, "learning_rate": 7.265981600231234e-06, "loss": 0.029, "step": 1560 }, { "epoch": 3.8, "learning_rate": 7.23787491653769e-06, "loss": 0.0321, "step": 1561 }, { "epoch": 3.8, "learning_rate": 7.209813495953963e-06, "loss": 0.0284, "step": 1562 }, { "epoch": 3.8, "learning_rate": 7.181797409988802e-06, "loss": 0.0291, "step": 1563 }, { "epoch": 3.81, "learning_rate": 7.153826730035423e-06, "loss": 0.0289, "step": 1564 }, { "epoch": 3.81, "learning_rate": 7.125901527371329e-06, "loss": 0.0286, "step": 1565 }, { "epoch": 3.81, "learning_rate": 7.0980218731581255e-06, "loss": 0.0292, "step": 1566 }, { "epoch": 3.81, "learning_rate": 7.070187838441369e-06, "loss": 0.0299, "step": 1567 }, { "epoch": 3.82, "learning_rate": 7.042399494150342e-06, "loss": 0.0293, "step": 1568 }, { "epoch": 3.82, "learning_rate": 7.0146569110979086e-06, "loss": 0.0291, "step": 1569 }, { "epoch": 3.82, "learning_rate": 6.986960159980327e-06, "loss": 0.0306, "step": 1570 }, { "epoch": 3.82, "learning_rate": 6.959309311377038e-06, "loss": 0.0302, "step": 1571 }, { "epoch": 3.83, "learning_rate": 6.931704435750522e-06, "loss": 0.0352, "step": 1572 }, { "epoch": 3.83, "learning_rate": 6.904145603446116e-06, "loss": 0.0274, "step": 1573 }, { "epoch": 3.83, "learning_rate": 6.876632884691803e-06, "loss": 0.0321, "step": 1574 }, { "epoch": 3.83, "learning_rate": 6.849166349598079e-06, "loss": 0.0282, "step": 1575 }, { "epoch": 3.84, "learning_rate": 6.821746068157741e-06, "loss": 0.0289, "step": 1576 }, { "epoch": 3.84, "learning_rate": 6.794372110245717e-06, "loss": 0.029, "step": 1577 }, { "epoch": 3.84, "learning_rate": 6.767044545618878e-06, "loss": 0.0286, "step": 1578 }, { "epoch": 3.84, "learning_rate": 6.739763443915895e-06, "loss": 0.0288, "step": 1579 }, { "epoch": 3.85, "learning_rate": 6.712528874657012e-06, "loss": 0.0274, "step": 1580 }, { "epoch": 3.85, "learning_rate": 6.685340907243915e-06, "loss": 0.0312, "step": 1581 }, { "epoch": 3.85, "learning_rate": 6.658199610959537e-06, "loss": 0.032, "step": 1582 }, { "epoch": 3.85, "learning_rate": 6.6311050549678595e-06, "loss": 0.0284, "step": 1583 }, { "epoch": 3.86, "learning_rate": 6.604057308313763e-06, "loss": 0.0303, "step": 1584 }, { "epoch": 3.86, "learning_rate": 6.577056439922857e-06, "loss": 0.0278, "step": 1585 }, { "epoch": 3.86, "learning_rate": 6.55010251860127e-06, "loss": 0.0302, "step": 1586 }, { "epoch": 3.86, "learning_rate": 6.523195613035521e-06, "loss": 0.0285, "step": 1587 }, { "epoch": 3.86, "learning_rate": 6.496335791792293e-06, "loss": 0.0279, "step": 1588 }, { "epoch": 3.87, "learning_rate": 6.469523123318308e-06, "loss": 0.0302, "step": 1589 }, { "epoch": 3.87, "learning_rate": 6.442757675940109e-06, "loss": 0.0319, "step": 1590 }, { "epoch": 3.87, "learning_rate": 6.4160395178639196e-06, "loss": 0.0297, "step": 1591 }, { "epoch": 3.87, "learning_rate": 6.389368717175448e-06, "loss": 0.0272, "step": 1592 }, { "epoch": 3.88, "learning_rate": 6.362745341839729e-06, "loss": 0.0292, "step": 1593 }, { "epoch": 3.88, "learning_rate": 6.336169459700933e-06, "loss": 0.027, "step": 1594 }, { "epoch": 3.88, "learning_rate": 6.309641138482222e-06, "loss": 0.0302, "step": 1595 }, { "epoch": 3.88, "learning_rate": 6.283160445785532e-06, "loss": 0.0258, "step": 1596 }, { "epoch": 3.89, "learning_rate": 6.25672744909146e-06, "loss": 0.028, "step": 1597 }, { "epoch": 3.89, "learning_rate": 6.230342215759028e-06, "loss": 0.0255, "step": 1598 }, { "epoch": 3.89, "learning_rate": 6.204004813025568e-06, "loss": 0.0309, "step": 1599 }, { "epoch": 3.89, "learning_rate": 6.177715308006505e-06, "loss": 0.0329, "step": 1600 }, { "epoch": 3.9, "learning_rate": 6.151473767695229e-06, "loss": 0.0294, "step": 1601 }, { "epoch": 3.9, "learning_rate": 6.125280258962873e-06, "loss": 0.0273, "step": 1602 }, { "epoch": 3.9, "learning_rate": 6.099134848558208e-06, "loss": 0.0266, "step": 1603 }, { "epoch": 3.9, "learning_rate": 6.073037603107404e-06, "loss": 0.0281, "step": 1604 }, { "epoch": 3.91, "learning_rate": 6.0469885891139e-06, "loss": 0.0286, "step": 1605 }, { "epoch": 3.91, "learning_rate": 6.020987872958236e-06, "loss": 0.0251, "step": 1606 }, { "epoch": 3.91, "learning_rate": 5.995035520897882e-06, "loss": 0.0266, "step": 1607 }, { "epoch": 3.91, "learning_rate": 5.969131599067044e-06, "loss": 0.0288, "step": 1608 }, { "epoch": 3.92, "learning_rate": 5.943276173476509e-06, "loss": 0.0282, "step": 1609 }, { "epoch": 3.92, "learning_rate": 5.91746931001351e-06, "loss": 0.0272, "step": 1610 }, { "epoch": 3.92, "learning_rate": 5.891711074441495e-06, "loss": 0.0275, "step": 1611 }, { "epoch": 3.92, "learning_rate": 5.866001532400023e-06, "loss": 0.0284, "step": 1612 }, { "epoch": 3.93, "learning_rate": 5.84034074940456e-06, "loss": 0.029, "step": 1613 }, { "epoch": 3.93, "learning_rate": 5.814728790846308e-06, "loss": 0.0275, "step": 1614 }, { "epoch": 3.93, "learning_rate": 5.789165721992052e-06, "loss": 0.0286, "step": 1615 }, { "epoch": 3.93, "learning_rate": 5.763651607984008e-06, "loss": 0.0252, "step": 1616 }, { "epoch": 3.94, "learning_rate": 5.738186513839619e-06, "loss": 0.0278, "step": 1617 }, { "epoch": 3.94, "learning_rate": 5.712770504451426e-06, "loss": 0.0271, "step": 1618 }, { "epoch": 3.94, "learning_rate": 5.687403644586891e-06, "loss": 0.0298, "step": 1619 }, { "epoch": 3.94, "learning_rate": 5.662085998888214e-06, "loss": 0.0299, "step": 1620 }, { "epoch": 3.95, "learning_rate": 5.636817631872185e-06, "loss": 0.0254, "step": 1621 }, { "epoch": 3.95, "learning_rate": 5.611598607930032e-06, "loss": 0.027, "step": 1622 }, { "epoch": 3.95, "learning_rate": 5.586428991327223e-06, "loss": 0.0297, "step": 1623 }, { "epoch": 3.95, "learning_rate": 5.561308846203333e-06, "loss": 0.0311, "step": 1624 }, { "epoch": 3.95, "learning_rate": 5.5362382365718775e-06, "loss": 0.0297, "step": 1625 }, { "epoch": 3.96, "learning_rate": 5.511217226320125e-06, "loss": 0.0296, "step": 1626 }, { "epoch": 3.96, "learning_rate": 5.486245879208945e-06, "loss": 0.0279, "step": 1627 }, { "epoch": 3.96, "learning_rate": 5.46132425887268e-06, "loss": 0.0267, "step": 1628 }, { "epoch": 3.96, "learning_rate": 5.436452428818919e-06, "loss": 0.0269, "step": 1629 }, { "epoch": 3.97, "learning_rate": 5.411630452428395e-06, "loss": 0.0305, "step": 1630 }, { "epoch": 3.97, "learning_rate": 5.386858392954799e-06, "loss": 0.026, "step": 1631 }, { "epoch": 3.97, "learning_rate": 5.362136313524607e-06, "loss": 0.0271, "step": 1632 }, { "epoch": 3.97, "learning_rate": 5.337464277136925e-06, "loss": 0.0284, "step": 1633 }, { "epoch": 3.98, "learning_rate": 5.3128423466633634e-06, "loss": 0.0275, "step": 1634 }, { "epoch": 3.98, "learning_rate": 5.288270584847813e-06, "loss": 0.0271, "step": 1635 }, { "epoch": 3.98, "learning_rate": 5.263749054306347e-06, "loss": 0.028, "step": 1636 }, { "epoch": 3.98, "learning_rate": 5.23927781752703e-06, "loss": 0.0257, "step": 1637 }, { "epoch": 3.99, "learning_rate": 5.214856936869752e-06, "loss": 0.0269, "step": 1638 }, { "epoch": 3.99, "learning_rate": 5.1904864745660835e-06, "loss": 0.0233, "step": 1639 }, { "epoch": 3.99, "learning_rate": 5.166166492719124e-06, "loss": 0.0302, "step": 1640 }, { "epoch": 3.99, "learning_rate": 5.141897053303327e-06, "loss": 0.0278, "step": 1641 }, { "epoch": 4.0, "learning_rate": 5.117678218164338e-06, "loss": 0.0327, "step": 1642 }, { "epoch": 4.0, "learning_rate": 5.0935100490188795e-06, "loss": 0.0293, "step": 1643 }, { "epoch": 4.0, "eval_loss": 0.9072719812393188, "eval_runtime": 116.1375, "eval_samples_per_second": 6.561, "eval_steps_per_second": 0.413, "step": 1643 }, { "epoch": 4.0, "learning_rate": 5.0693926074545315e-06, "loss": 0.0203, "step": 1644 }, { "epoch": 4.0, "learning_rate": 5.045325954929614e-06, "loss": 0.0074, "step": 1645 }, { "epoch": 4.01, "learning_rate": 5.0213101527730345e-06, "loss": 0.0079, "step": 1646 }, { "epoch": 4.01, "learning_rate": 4.9973452621841e-06, "loss": 0.0079, "step": 1647 }, { "epoch": 4.01, "learning_rate": 4.973431344232377e-06, "loss": 0.0085, "step": 1648 }, { "epoch": 4.01, "learning_rate": 4.9495684598575735e-06, "loss": 0.0075, "step": 1649 }, { "epoch": 4.02, "learning_rate": 4.925756669869314e-06, "loss": 0.0068, "step": 1650 }, { "epoch": 4.02, "learning_rate": 4.9019960349470265e-06, "loss": 0.0074, "step": 1651 }, { "epoch": 4.02, "learning_rate": 4.878286615639791e-06, "loss": 0.0053, "step": 1652 }, { "epoch": 4.02, "learning_rate": 4.8546284723661715e-06, "loss": 0.0074, "step": 1653 }, { "epoch": 4.03, "learning_rate": 4.8310216654140425e-06, "loss": 0.005, "step": 1654 }, { "epoch": 4.03, "learning_rate": 4.80746625494051e-06, "loss": 0.006, "step": 1655 }, { "epoch": 4.03, "learning_rate": 4.7839623009716615e-06, "loss": 0.0054, "step": 1656 }, { "epoch": 4.03, "learning_rate": 4.760509863402468e-06, "loss": 0.0068, "step": 1657 }, { "epoch": 4.04, "learning_rate": 4.737109001996637e-06, "loss": 0.0047, "step": 1658 }, { "epoch": 4.04, "learning_rate": 4.7137597763864286e-06, "loss": 0.0056, "step": 1659 }, { "epoch": 4.04, "learning_rate": 4.690462246072516e-06, "loss": 0.0059, "step": 1660 }, { "epoch": 4.04, "learning_rate": 4.667216470423858e-06, "loss": 0.0051, "step": 1661 }, { "epoch": 4.05, "learning_rate": 4.644022508677518e-06, "loss": 0.0063, "step": 1662 }, { "epoch": 4.05, "learning_rate": 4.620880419938511e-06, "loss": 0.0059, "step": 1663 }, { "epoch": 4.05, "learning_rate": 4.5977902631796855e-06, "loss": 0.0067, "step": 1664 }, { "epoch": 4.05, "learning_rate": 4.574752097241533e-06, "loss": 0.005, "step": 1665 }, { "epoch": 4.05, "learning_rate": 4.551765980832059e-06, "loss": 0.0048, "step": 1666 }, { "epoch": 4.06, "learning_rate": 4.528831972526645e-06, "loss": 0.0066, "step": 1667 }, { "epoch": 4.06, "learning_rate": 4.505950130767883e-06, "loss": 0.0045, "step": 1668 }, { "epoch": 4.06, "learning_rate": 4.483120513865411e-06, "loss": 0.0046, "step": 1669 }, { "epoch": 4.06, "learning_rate": 4.460343179995807e-06, "loss": 0.006, "step": 1670 }, { "epoch": 4.07, "learning_rate": 4.4376181872024e-06, "loss": 0.0047, "step": 1671 }, { "epoch": 4.07, "learning_rate": 4.4149455933951396e-06, "loss": 0.0059, "step": 1672 }, { "epoch": 4.07, "learning_rate": 4.392325456350454e-06, "loss": 0.0052, "step": 1673 }, { "epoch": 4.07, "learning_rate": 4.369757833711105e-06, "loss": 0.0062, "step": 1674 }, { "epoch": 4.08, "learning_rate": 4.347242782986008e-06, "loss": 0.004, "step": 1675 }, { "epoch": 4.08, "learning_rate": 4.324780361550129e-06, "loss": 0.006, "step": 1676 }, { "epoch": 4.08, "learning_rate": 4.302370626644314e-06, "loss": 0.0052, "step": 1677 }, { "epoch": 4.08, "learning_rate": 4.280013635375138e-06, "loss": 0.0048, "step": 1678 }, { "epoch": 4.09, "learning_rate": 4.2577094447147856e-06, "loss": 0.0047, "step": 1679 }, { "epoch": 4.09, "learning_rate": 4.235458111500889e-06, "loss": 0.0046, "step": 1680 }, { "epoch": 4.09, "learning_rate": 4.213259692436367e-06, "loss": 0.0047, "step": 1681 }, { "epoch": 4.09, "learning_rate": 4.19111424408932e-06, "loss": 0.0048, "step": 1682 }, { "epoch": 4.1, "learning_rate": 4.169021822892849e-06, "loss": 0.0045, "step": 1683 }, { "epoch": 4.1, "learning_rate": 4.146982485144921e-06, "loss": 0.0083, "step": 1684 }, { "epoch": 4.1, "learning_rate": 4.124996287008245e-06, "loss": 0.0056, "step": 1685 }, { "epoch": 4.1, "learning_rate": 4.103063284510117e-06, "loss": 0.0061, "step": 1686 }, { "epoch": 4.11, "learning_rate": 4.081183533542262e-06, "loss": 0.0058, "step": 1687 }, { "epoch": 4.11, "learning_rate": 4.059357089860702e-06, "loss": 0.0057, "step": 1688 }, { "epoch": 4.11, "learning_rate": 4.037584009085635e-06, "loss": 0.005, "step": 1689 }, { "epoch": 4.11, "learning_rate": 4.015864346701251e-06, "loss": 0.0049, "step": 1690 }, { "epoch": 4.12, "learning_rate": 3.994198158055637e-06, "loss": 0.0048, "step": 1691 }, { "epoch": 4.12, "learning_rate": 3.972585498360606e-06, "loss": 0.0067, "step": 1692 }, { "epoch": 4.12, "learning_rate": 3.951026422691556e-06, "loss": 0.0054, "step": 1693 }, { "epoch": 4.12, "learning_rate": 3.929520985987334e-06, "loss": 0.0043, "step": 1694 }, { "epoch": 4.13, "learning_rate": 3.908069243050122e-06, "loss": 0.0051, "step": 1695 }, { "epoch": 4.13, "learning_rate": 3.886671248545243e-06, "loss": 0.0045, "step": 1696 }, { "epoch": 4.13, "learning_rate": 3.865327057001078e-06, "loss": 0.0054, "step": 1697 }, { "epoch": 4.13, "learning_rate": 3.8440367228088995e-06, "loss": 0.0051, "step": 1698 }, { "epoch": 4.14, "learning_rate": 3.8228003002227255e-06, "loss": 0.0048, "step": 1699 }, { "epoch": 4.14, "learning_rate": 3.801617843359187e-06, "loss": 0.0048, "step": 1700 }, { "epoch": 4.14, "learning_rate": 3.7804894061974183e-06, "loss": 0.0059, "step": 1701 }, { "epoch": 4.14, "learning_rate": 3.7594150425788675e-06, "loss": 0.0057, "step": 1702 }, { "epoch": 4.14, "learning_rate": 3.738394806207207e-06, "loss": 0.0057, "step": 1703 }, { "epoch": 4.15, "learning_rate": 3.7174287506481776e-06, "loss": 0.0046, "step": 1704 }, { "epoch": 4.15, "learning_rate": 3.6965169293294357e-06, "loss": 0.0039, "step": 1705 }, { "epoch": 4.15, "learning_rate": 3.67565939554044e-06, "loss": 0.0045, "step": 1706 }, { "epoch": 4.15, "learning_rate": 3.654856202432319e-06, "loss": 0.0069, "step": 1707 }, { "epoch": 4.16, "learning_rate": 3.6341074030177114e-06, "loss": 0.0053, "step": 1708 }, { "epoch": 4.16, "learning_rate": 3.6134130501706417e-06, "loss": 0.0061, "step": 1709 }, { "epoch": 4.16, "learning_rate": 3.592773196626417e-06, "loss": 0.0049, "step": 1710 }, { "epoch": 4.16, "learning_rate": 3.5721878949814323e-06, "loss": 0.0051, "step": 1711 }, { "epoch": 4.17, "learning_rate": 3.5516571976930786e-06, "loss": 0.0053, "step": 1712 }, { "epoch": 4.17, "learning_rate": 3.531181157079605e-06, "loss": 0.0045, "step": 1713 }, { "epoch": 4.17, "learning_rate": 3.5107598253199758e-06, "loss": 0.0048, "step": 1714 }, { "epoch": 4.17, "learning_rate": 3.4903932544537276e-06, "loss": 0.0044, "step": 1715 }, { "epoch": 4.18, "learning_rate": 3.470081496380881e-06, "loss": 0.0047, "step": 1716 }, { "epoch": 4.18, "learning_rate": 3.4498246028617536e-06, "loss": 0.0041, "step": 1717 }, { "epoch": 4.18, "learning_rate": 3.4296226255168485e-06, "loss": 0.0053, "step": 1718 }, { "epoch": 4.18, "learning_rate": 3.409475615826746e-06, "loss": 0.0057, "step": 1719 }, { "epoch": 4.19, "learning_rate": 3.3893836251319422e-06, "loss": 0.0044, "step": 1720 }, { "epoch": 4.19, "learning_rate": 3.3693467046327117e-06, "loss": 0.005, "step": 1721 }, { "epoch": 4.19, "learning_rate": 3.3493649053890326e-06, "loss": 0.0045, "step": 1722 }, { "epoch": 4.19, "learning_rate": 3.32943827832039e-06, "loss": 0.0049, "step": 1723 }, { "epoch": 4.2, "learning_rate": 3.309566874205672e-06, "loss": 0.0052, "step": 1724 }, { "epoch": 4.2, "learning_rate": 3.289750743683062e-06, "loss": 0.0046, "step": 1725 }, { "epoch": 4.2, "learning_rate": 3.2699899372498733e-06, "loss": 0.0048, "step": 1726 }, { "epoch": 4.2, "learning_rate": 3.2502845052624354e-06, "loss": 0.0055, "step": 1727 }, { "epoch": 4.21, "learning_rate": 3.230634497935983e-06, "loss": 0.0042, "step": 1728 }, { "epoch": 4.21, "learning_rate": 3.211039965344512e-06, "loss": 0.0037, "step": 1729 }, { "epoch": 4.21, "learning_rate": 3.1915009574206262e-06, "loss": 0.0051, "step": 1730 }, { "epoch": 4.21, "learning_rate": 3.17201752395547e-06, "loss": 0.0051, "step": 1731 }, { "epoch": 4.22, "learning_rate": 3.1525897145985472e-06, "loss": 0.0041, "step": 1732 }, { "epoch": 4.22, "learning_rate": 3.133217578857611e-06, "loss": 0.0048, "step": 1733 }, { "epoch": 4.22, "learning_rate": 3.113901166098562e-06, "loss": 0.0053, "step": 1734 }, { "epoch": 4.22, "learning_rate": 3.0946405255452947e-06, "loss": 0.0044, "step": 1735 }, { "epoch": 4.23, "learning_rate": 3.075435706279567e-06, "loss": 0.0044, "step": 1736 }, { "epoch": 4.23, "learning_rate": 3.0562867572409034e-06, "loss": 0.0051, "step": 1737 }, { "epoch": 4.23, "learning_rate": 3.037193727226445e-06, "loss": 0.0046, "step": 1738 }, { "epoch": 4.23, "learning_rate": 3.018156664890834e-06, "loss": 0.0039, "step": 1739 }, { "epoch": 4.23, "learning_rate": 2.9991756187461e-06, "loss": 0.0054, "step": 1740 }, { "epoch": 4.24, "learning_rate": 2.9802506371615246e-06, "loss": 0.0043, "step": 1741 }, { "epoch": 4.24, "learning_rate": 2.961381768363511e-06, "loss": 0.0049, "step": 1742 }, { "epoch": 4.24, "learning_rate": 2.942569060435482e-06, "loss": 0.0047, "step": 1743 }, { "epoch": 4.24, "learning_rate": 2.9238125613177403e-06, "loss": 0.0047, "step": 1744 }, { "epoch": 4.25, "learning_rate": 2.905112318807346e-06, "loss": 0.0047, "step": 1745 }, { "epoch": 4.25, "learning_rate": 2.8864683805580133e-06, "loss": 0.0054, "step": 1746 }, { "epoch": 4.25, "learning_rate": 2.8678807940799744e-06, "loss": 0.0037, "step": 1747 }, { "epoch": 4.25, "learning_rate": 2.8493496067398483e-06, "loss": 0.0062, "step": 1748 }, { "epoch": 4.26, "learning_rate": 2.8308748657605522e-06, "loss": 0.0042, "step": 1749 }, { "epoch": 4.26, "learning_rate": 2.812456618221143e-06, "loss": 0.006, "step": 1750 }, { "epoch": 4.26, "learning_rate": 2.794094911056719e-06, "loss": 0.0043, "step": 1751 }, { "epoch": 4.26, "learning_rate": 2.775789791058306e-06, "loss": 0.0049, "step": 1752 }, { "epoch": 4.27, "learning_rate": 2.757541304872732e-06, "loss": 0.0052, "step": 1753 }, { "epoch": 4.27, "learning_rate": 2.7393494990024834e-06, "loss": 0.0052, "step": 1754 }, { "epoch": 4.27, "learning_rate": 2.7212144198056374e-06, "loss": 0.0065, "step": 1755 }, { "epoch": 4.27, "learning_rate": 2.7031361134956913e-06, "loss": 0.0049, "step": 1756 }, { "epoch": 4.28, "learning_rate": 2.6851146261414747e-06, "loss": 0.0063, "step": 1757 }, { "epoch": 4.28, "learning_rate": 2.667150003667032e-06, "loss": 0.0043, "step": 1758 }, { "epoch": 4.28, "learning_rate": 2.649242291851503e-06, "loss": 0.0048, "step": 1759 }, { "epoch": 4.28, "learning_rate": 2.631391536328992e-06, "loss": 0.005, "step": 1760 }, { "epoch": 4.29, "learning_rate": 2.6135977825884533e-06, "loss": 0.0066, "step": 1761 }, { "epoch": 4.29, "learning_rate": 2.595861075973613e-06, "loss": 0.0053, "step": 1762 }, { "epoch": 4.29, "learning_rate": 2.578181461682794e-06, "loss": 0.0031, "step": 1763 }, { "epoch": 4.29, "learning_rate": 2.5605589847688518e-06, "loss": 0.0028, "step": 1764 }, { "epoch": 4.3, "learning_rate": 2.5429936901390284e-06, "loss": 0.004, "step": 1765 }, { "epoch": 4.3, "learning_rate": 2.5254856225548544e-06, "loss": 0.0034, "step": 1766 }, { "epoch": 4.3, "learning_rate": 2.508034826632022e-06, "loss": 0.0068, "step": 1767 }, { "epoch": 4.3, "learning_rate": 2.4906413468402916e-06, "loss": 0.0049, "step": 1768 }, { "epoch": 4.31, "learning_rate": 2.4733052275033448e-06, "loss": 0.0042, "step": 1769 }, { "epoch": 4.31, "learning_rate": 2.4560265127987147e-06, "loss": 0.0048, "step": 1770 }, { "epoch": 4.31, "learning_rate": 2.4388052467576308e-06, "loss": 0.0053, "step": 1771 }, { "epoch": 4.31, "learning_rate": 2.4216414732649432e-06, "loss": 0.0047, "step": 1772 }, { "epoch": 4.32, "learning_rate": 2.40453523605898e-06, "loss": 0.0046, "step": 1773 }, { "epoch": 4.32, "learning_rate": 2.3874865787314598e-06, "loss": 0.0043, "step": 1774 }, { "epoch": 4.32, "learning_rate": 2.3704955447273636e-06, "loss": 0.0053, "step": 1775 }, { "epoch": 4.32, "learning_rate": 2.3535621773448395e-06, "loss": 0.005, "step": 1776 }, { "epoch": 4.32, "learning_rate": 2.3366865197350733e-06, "loss": 0.0045, "step": 1777 }, { "epoch": 4.33, "learning_rate": 2.3198686149022013e-06, "loss": 0.006, "step": 1778 }, { "epoch": 4.33, "learning_rate": 2.303108505703178e-06, "loss": 0.0047, "step": 1779 }, { "epoch": 4.33, "learning_rate": 2.2864062348476905e-06, "loss": 0.0038, "step": 1780 }, { "epoch": 4.33, "learning_rate": 2.2697618448980217e-06, "loss": 0.0054, "step": 1781 }, { "epoch": 4.34, "learning_rate": 2.2531753782689598e-06, "loss": 0.0052, "step": 1782 }, { "epoch": 4.34, "learning_rate": 2.2366468772276994e-06, "loss": 0.0038, "step": 1783 }, { "epoch": 4.34, "learning_rate": 2.2201763838937184e-06, "loss": 0.0043, "step": 1784 }, { "epoch": 4.34, "learning_rate": 2.2037639402386566e-06, "loss": 0.0038, "step": 1785 }, { "epoch": 4.35, "learning_rate": 2.1874095880862505e-06, "loss": 0.0052, "step": 1786 }, { "epoch": 4.35, "learning_rate": 2.1711133691121903e-06, "loss": 0.0052, "step": 1787 }, { "epoch": 4.35, "learning_rate": 2.1548753248440164e-06, "loss": 0.0055, "step": 1788 }, { "epoch": 4.35, "learning_rate": 2.138695496661039e-06, "loss": 0.0044, "step": 1789 }, { "epoch": 4.36, "learning_rate": 2.122573925794219e-06, "loss": 0.0037, "step": 1790 }, { "epoch": 4.36, "learning_rate": 2.1065106533260383e-06, "loss": 0.0063, "step": 1791 }, { "epoch": 4.36, "learning_rate": 2.0905057201904445e-06, "loss": 0.0035, "step": 1792 }, { "epoch": 4.36, "learning_rate": 2.0745591671727018e-06, "loss": 0.0048, "step": 1793 }, { "epoch": 4.37, "learning_rate": 2.0586710349093013e-06, "loss": 0.0057, "step": 1794 }, { "epoch": 4.37, "learning_rate": 2.0428413638878764e-06, "loss": 0.0055, "step": 1795 }, { "epoch": 4.37, "learning_rate": 2.027070194447081e-06, "loss": 0.0052, "step": 1796 }, { "epoch": 4.37, "learning_rate": 2.0113575667764755e-06, "loss": 0.0036, "step": 1797 }, { "epoch": 4.38, "learning_rate": 1.995703520916456e-06, "loss": 0.0041, "step": 1798 }, { "epoch": 4.38, "learning_rate": 1.9801080967581263e-06, "loss": 0.0054, "step": 1799 }, { "epoch": 4.38, "learning_rate": 1.9645713340431997e-06, "loss": 0.0044, "step": 1800 }, { "epoch": 4.38, "learning_rate": 1.9490932723639165e-06, "loss": 0.0047, "step": 1801 }, { "epoch": 4.39, "learning_rate": 1.9336739511629233e-06, "loss": 0.0049, "step": 1802 }, { "epoch": 4.39, "learning_rate": 1.918313409733169e-06, "loss": 0.0041, "step": 1803 }, { "epoch": 4.39, "learning_rate": 1.9030116872178316e-06, "loss": 0.0044, "step": 1804 }, { "epoch": 4.39, "learning_rate": 1.8877688226101919e-06, "loss": 0.0059, "step": 1805 }, { "epoch": 4.4, "learning_rate": 1.8725848547535368e-06, "loss": 0.0041, "step": 1806 }, { "epoch": 4.4, "learning_rate": 1.8574598223410872e-06, "loss": 0.0061, "step": 1807 }, { "epoch": 4.4, "learning_rate": 1.8423937639158534e-06, "loss": 0.0046, "step": 1808 }, { "epoch": 4.4, "learning_rate": 1.82738671787058e-06, "loss": 0.0043, "step": 1809 }, { "epoch": 4.41, "learning_rate": 1.8124387224476347e-06, "loss": 0.0052, "step": 1810 }, { "epoch": 4.41, "learning_rate": 1.7975498157388915e-06, "loss": 0.0051, "step": 1811 }, { "epoch": 4.41, "learning_rate": 1.7827200356856533e-06, "loss": 0.0046, "step": 1812 }, { "epoch": 4.41, "learning_rate": 1.7679494200785601e-06, "loss": 0.0069, "step": 1813 }, { "epoch": 4.41, "learning_rate": 1.7532380065574726e-06, "loss": 0.005, "step": 1814 }, { "epoch": 4.42, "learning_rate": 1.7385858326113918e-06, "loss": 0.004, "step": 1815 }, { "epoch": 4.42, "learning_rate": 1.7239929355783668e-06, "loss": 0.004, "step": 1816 }, { "epoch": 4.42, "learning_rate": 1.709459352645379e-06, "loss": 0.0052, "step": 1817 }, { "epoch": 4.42, "learning_rate": 1.694985120848258e-06, "loss": 0.0054, "step": 1818 }, { "epoch": 4.43, "learning_rate": 1.6805702770716053e-06, "loss": 0.0044, "step": 1819 }, { "epoch": 4.43, "learning_rate": 1.6662148580486702e-06, "loss": 0.0044, "step": 1820 }, { "epoch": 4.43, "learning_rate": 1.6519189003612767e-06, "loss": 0.0045, "step": 1821 }, { "epoch": 4.43, "learning_rate": 1.6376824404397251e-06, "loss": 0.0045, "step": 1822 }, { "epoch": 4.44, "learning_rate": 1.6235055145626953e-06, "loss": 0.0045, "step": 1823 }, { "epoch": 4.44, "learning_rate": 1.6093881588571501e-06, "loss": 0.0058, "step": 1824 }, { "epoch": 4.44, "learning_rate": 1.5953304092982624e-06, "loss": 0.0047, "step": 1825 }, { "epoch": 4.44, "learning_rate": 1.581332301709304e-06, "loss": 0.005, "step": 1826 }, { "epoch": 4.45, "learning_rate": 1.5673938717615605e-06, "loss": 0.0065, "step": 1827 }, { "epoch": 4.45, "learning_rate": 1.5535151549742528e-06, "loss": 0.0043, "step": 1828 }, { "epoch": 4.45, "learning_rate": 1.5396961867144206e-06, "loss": 0.0044, "step": 1829 }, { "epoch": 4.45, "learning_rate": 1.525937002196845e-06, "loss": 0.004, "step": 1830 }, { "epoch": 4.46, "learning_rate": 1.512237636483982e-06, "loss": 0.0058, "step": 1831 }, { "epoch": 4.46, "learning_rate": 1.4985981244858254e-06, "loss": 0.0032, "step": 1832 }, { "epoch": 4.46, "learning_rate": 1.4850185009598645e-06, "loss": 0.0058, "step": 1833 }, { "epoch": 4.46, "learning_rate": 1.471498800510962e-06, "loss": 0.0051, "step": 1834 }, { "epoch": 4.47, "learning_rate": 1.4580390575912872e-06, "loss": 0.005, "step": 1835 }, { "epoch": 4.47, "learning_rate": 1.4446393065002144e-06, "loss": 0.0044, "step": 1836 }, { "epoch": 4.47, "learning_rate": 1.431299581384249e-06, "loss": 0.005, "step": 1837 }, { "epoch": 4.47, "learning_rate": 1.4180199162369207e-06, "loss": 0.0053, "step": 1838 }, { "epoch": 4.48, "learning_rate": 1.4048003448987213e-06, "loss": 0.0044, "step": 1839 }, { "epoch": 4.48, "learning_rate": 1.3916409010569926e-06, "loss": 0.0061, "step": 1840 }, { "epoch": 4.48, "learning_rate": 1.378541618245871e-06, "loss": 0.0043, "step": 1841 }, { "epoch": 4.48, "learning_rate": 1.365502529846166e-06, "loss": 0.0038, "step": 1842 }, { "epoch": 4.49, "learning_rate": 1.3525236690853093e-06, "loss": 0.0038, "step": 1843 }, { "epoch": 4.49, "learning_rate": 1.3396050690372418e-06, "loss": 0.0042, "step": 1844 }, { "epoch": 4.49, "learning_rate": 1.3267467626223606e-06, "loss": 0.0058, "step": 1845 }, { "epoch": 4.49, "learning_rate": 1.3139487826073937e-06, "loss": 0.0041, "step": 1846 }, { "epoch": 4.5, "learning_rate": 1.3012111616053618e-06, "loss": 0.0047, "step": 1847 }, { "epoch": 4.5, "learning_rate": 1.288533932075453e-06, "loss": 0.0047, "step": 1848 }, { "epoch": 4.5, "learning_rate": 1.2759171263229813e-06, "loss": 0.0043, "step": 1849 }, { "epoch": 4.5, "learning_rate": 1.2633607764992671e-06, "loss": 0.0044, "step": 1850 }, { "epoch": 4.51, "learning_rate": 1.250864914601571e-06, "loss": 0.0059, "step": 1851 }, { "epoch": 4.51, "learning_rate": 1.2384295724730266e-06, "loss": 0.0062, "step": 1852 }, { "epoch": 4.51, "learning_rate": 1.2260547818025326e-06, "loss": 0.0041, "step": 1853 }, { "epoch": 4.51, "learning_rate": 1.2137405741246916e-06, "loss": 0.0065, "step": 1854 }, { "epoch": 4.51, "learning_rate": 1.201486980819716e-06, "loss": 0.0047, "step": 1855 }, { "epoch": 4.52, "learning_rate": 1.1892940331133612e-06, "loss": 0.0041, "step": 1856 }, { "epoch": 4.52, "learning_rate": 1.1771617620768394e-06, "loss": 0.0051, "step": 1857 }, { "epoch": 4.52, "learning_rate": 1.1650901986267365e-06, "loss": 0.0042, "step": 1858 }, { "epoch": 4.52, "learning_rate": 1.1530793735249458e-06, "loss": 0.0048, "step": 1859 }, { "epoch": 4.53, "learning_rate": 1.1411293173785726e-06, "loss": 0.0042, "step": 1860 }, { "epoch": 4.53, "learning_rate": 1.1292400606398635e-06, "loss": 0.0034, "step": 1861 }, { "epoch": 4.53, "learning_rate": 1.1174116336061468e-06, "loss": 0.005, "step": 1862 }, { "epoch": 4.53, "learning_rate": 1.1056440664197144e-06, "loss": 0.0053, "step": 1863 }, { "epoch": 4.54, "learning_rate": 1.0939373890677923e-06, "loss": 0.0043, "step": 1864 }, { "epoch": 4.54, "learning_rate": 1.0822916313824316e-06, "loss": 0.0046, "step": 1865 }, { "epoch": 4.54, "learning_rate": 1.0707068230404404e-06, "loss": 0.0041, "step": 1866 }, { "epoch": 4.54, "learning_rate": 1.059182993563304e-06, "loss": 0.0043, "step": 1867 }, { "epoch": 4.55, "learning_rate": 1.0477201723171377e-06, "loss": 0.0052, "step": 1868 }, { "epoch": 4.55, "learning_rate": 1.036318388512561e-06, "loss": 0.004, "step": 1869 }, { "epoch": 4.55, "learning_rate": 1.0249776712046744e-06, "loss": 0.0045, "step": 1870 }, { "epoch": 4.55, "learning_rate": 1.0136980492929605e-06, "loss": 0.0043, "step": 1871 }, { "epoch": 4.56, "learning_rate": 1.0024795515211988e-06, "loss": 0.0048, "step": 1872 }, { "epoch": 4.56, "learning_rate": 9.913222064774157e-07, "loss": 0.0039, "step": 1873 }, { "epoch": 4.56, "learning_rate": 9.802260425938099e-07, "loss": 0.0051, "step": 1874 }, { "epoch": 4.56, "learning_rate": 9.691910881466564e-07, "loss": 0.0039, "step": 1875 }, { "epoch": 4.57, "learning_rate": 9.58217371256262e-07, "loss": 0.0055, "step": 1876 }, { "epoch": 4.57, "learning_rate": 9.473049198868822e-07, "loss": 0.0047, "step": 1877 }, { "epoch": 4.57, "learning_rate": 9.364537618466451e-07, "loss": 0.0047, "step": 1878 }, { "epoch": 4.57, "learning_rate": 9.25663924787487e-07, "loss": 0.0053, "step": 1879 }, { "epoch": 4.58, "learning_rate": 9.149354362050805e-07, "loss": 0.0037, "step": 1880 }, { "epoch": 4.58, "learning_rate": 9.042683234387645e-07, "loss": 0.0044, "step": 1881 }, { "epoch": 4.58, "learning_rate": 8.936626136714754e-07, "loss": 0.0058, "step": 1882 }, { "epoch": 4.58, "learning_rate": 8.831183339296751e-07, "loss": 0.0045, "step": 1883 }, { "epoch": 4.59, "learning_rate": 8.726355110832862e-07, "loss": 0.0049, "step": 1884 }, { "epoch": 4.59, "learning_rate": 8.622141718456128e-07, "loss": 0.0042, "step": 1885 }, { "epoch": 4.59, "learning_rate": 8.51854342773295e-07, "loss": 0.005, "step": 1886 }, { "epoch": 4.59, "learning_rate": 8.415560502662151e-07, "loss": 0.008, "step": 1887 }, { "epoch": 4.6, "learning_rate": 8.313193205674391e-07, "loss": 0.0055, "step": 1888 }, { "epoch": 4.6, "learning_rate": 8.211441797631752e-07, "loss": 0.004, "step": 1889 }, { "epoch": 4.6, "learning_rate": 8.110306537826601e-07, "loss": 0.0051, "step": 1890 }, { "epoch": 4.6, "learning_rate": 8.009787683981279e-07, "loss": 0.0055, "step": 1891 }, { "epoch": 4.6, "learning_rate": 7.909885492247359e-07, "loss": 0.0037, "step": 1892 }, { "epoch": 4.61, "learning_rate": 7.81060021720495e-07, "loss": 0.0039, "step": 1893 }, { "epoch": 4.61, "learning_rate": 7.711932111862025e-07, "loss": 0.0047, "step": 1894 }, { "epoch": 4.61, "learning_rate": 7.613881427654013e-07, "loss": 0.0039, "step": 1895 }, { "epoch": 4.61, "learning_rate": 7.516448414442739e-07, "loss": 0.0035, "step": 1896 }, { "epoch": 4.62, "learning_rate": 7.419633320516178e-07, "loss": 0.0054, "step": 1897 }, { "epoch": 4.62, "learning_rate": 7.32343639258759e-07, "loss": 0.0055, "step": 1898 }, { "epoch": 4.62, "learning_rate": 7.227857875795025e-07, "loss": 0.0049, "step": 1899 }, { "epoch": 4.62, "learning_rate": 7.13289801370054e-07, "loss": 0.0048, "step": 1900 }, { "epoch": 4.63, "learning_rate": 7.038557048289818e-07, "loss": 0.004, "step": 1901 }, { "epoch": 4.63, "learning_rate": 6.944835219971329e-07, "loss": 0.0051, "step": 1902 }, { "epoch": 4.63, "learning_rate": 6.851732767575752e-07, "loss": 0.0037, "step": 1903 }, { "epoch": 4.63, "learning_rate": 6.759249928355554e-07, "loss": 0.0045, "step": 1904 }, { "epoch": 4.64, "learning_rate": 6.667386937984105e-07, "loss": 0.0055, "step": 1905 }, { "epoch": 4.64, "learning_rate": 6.576144030555259e-07, "loss": 0.0039, "step": 1906 }, { "epoch": 4.64, "learning_rate": 6.485521438582748e-07, "loss": 0.0044, "step": 1907 }, { "epoch": 4.64, "learning_rate": 6.395519392999621e-07, "loss": 0.0048, "step": 1908 }, { "epoch": 4.65, "learning_rate": 6.30613812315739e-07, "loss": 0.0053, "step": 1909 }, { "epoch": 4.65, "learning_rate": 6.217377856825885e-07, "loss": 0.0057, "step": 1910 }, { "epoch": 4.65, "learning_rate": 6.129238820192285e-07, "loss": 0.0045, "step": 1911 }, { "epoch": 4.65, "learning_rate": 6.041721237860676e-07, "loss": 0.0047, "step": 1912 }, { "epoch": 4.66, "learning_rate": 5.954825332851632e-07, "loss": 0.0055, "step": 1913 }, { "epoch": 4.66, "learning_rate": 5.868551326601413e-07, "loss": 0.0037, "step": 1914 }, { "epoch": 4.66, "learning_rate": 5.782899438961487e-07, "loss": 0.0054, "step": 1915 }, { "epoch": 4.66, "learning_rate": 5.697869888198065e-07, "loss": 0.0041, "step": 1916 }, { "epoch": 4.67, "learning_rate": 5.613462890991378e-07, "loss": 0.0047, "step": 1917 }, { "epoch": 4.67, "learning_rate": 5.529678662435228e-07, "loss": 0.0028, "step": 1918 }, { "epoch": 4.67, "learning_rate": 5.446517416036412e-07, "loss": 0.0041, "step": 1919 }, { "epoch": 4.67, "learning_rate": 5.363979363714245e-07, "loss": 0.0039, "step": 1920 }, { "epoch": 4.68, "learning_rate": 5.282064715799895e-07, "loss": 0.0046, "step": 1921 }, { "epoch": 4.68, "learning_rate": 5.20077368103597e-07, "loss": 0.0038, "step": 1922 }, { "epoch": 4.68, "learning_rate": 5.120106466575875e-07, "loss": 0.0053, "step": 1923 }, { "epoch": 4.68, "learning_rate": 5.040063277983287e-07, "loss": 0.0039, "step": 1924 }, { "epoch": 4.69, "learning_rate": 4.96064431923185e-07, "loss": 0.0053, "step": 1925 }, { "epoch": 4.69, "learning_rate": 4.881849792704368e-07, "loss": 0.0041, "step": 1926 }, { "epoch": 4.69, "learning_rate": 4.803679899192392e-07, "loss": 0.0042, "step": 1927 }, { "epoch": 4.69, "learning_rate": 4.7261348378958016e-07, "loss": 0.0048, "step": 1928 }, { "epoch": 4.69, "learning_rate": 4.649214806422164e-07, "loss": 0.0068, "step": 1929 }, { "epoch": 4.7, "learning_rate": 4.5729200007862683e-07, "loss": 0.0053, "step": 1930 }, { "epoch": 4.7, "learning_rate": 4.497250615409732e-07, "loss": 0.004, "step": 1931 }, { "epoch": 4.7, "learning_rate": 4.4222068431203634e-07, "loss": 0.0038, "step": 1932 }, { "epoch": 4.7, "learning_rate": 4.34778887515172e-07, "loss": 0.005, "step": 1933 }, { "epoch": 4.71, "learning_rate": 4.2739969011426074e-07, "loss": 0.0055, "step": 1934 }, { "epoch": 4.71, "learning_rate": 4.2008311091366606e-07, "loss": 0.0051, "step": 1935 }, { "epoch": 4.71, "learning_rate": 4.128291685581792e-07, "loss": 0.0055, "step": 1936 }, { "epoch": 4.71, "learning_rate": 4.0563788153297755e-07, "loss": 0.0043, "step": 1937 }, { "epoch": 4.72, "learning_rate": 3.9850926816357157e-07, "loss": 0.004, "step": 1938 }, { "epoch": 4.72, "learning_rate": 3.9144334661576074e-07, "loss": 0.0039, "step": 1939 }, { "epoch": 4.72, "learning_rate": 3.8444013489558337e-07, "loss": 0.0042, "step": 1940 }, { "epoch": 4.72, "learning_rate": 3.774996508492834e-07, "loss": 0.0054, "step": 1941 }, { "epoch": 4.73, "learning_rate": 3.70621912163252e-07, "loss": 0.0062, "step": 1942 }, { "epoch": 4.73, "learning_rate": 3.6380693636398343e-07, "loss": 0.0051, "step": 1943 }, { "epoch": 4.73, "learning_rate": 3.570547408180441e-07, "loss": 0.0043, "step": 1944 }, { "epoch": 4.73, "learning_rate": 3.503653427320036e-07, "loss": 0.0035, "step": 1945 }, { "epoch": 4.74, "learning_rate": 3.4373875915241493e-07, "loss": 0.0057, "step": 1946 }, { "epoch": 4.74, "learning_rate": 3.371750069657592e-07, "loss": 0.0045, "step": 1947 }, { "epoch": 4.74, "learning_rate": 3.306741028984012e-07, "loss": 0.0058, "step": 1948 }, { "epoch": 4.74, "learning_rate": 3.242360635165559e-07, "loss": 0.0044, "step": 1949 }, { "epoch": 4.75, "learning_rate": 3.1786090522624156e-07, "loss": 0.0047, "step": 1950 }, { "epoch": 4.75, "learning_rate": 3.1154864427322685e-07, "loss": 0.0034, "step": 1951 }, { "epoch": 4.75, "learning_rate": 3.052992967430085e-07, "loss": 0.0035, "step": 1952 }, { "epoch": 4.75, "learning_rate": 2.991128785607589e-07, "loss": 0.0039, "step": 1953 }, { "epoch": 4.76, "learning_rate": 2.9298940549128964e-07, "loss": 0.0039, "step": 1954 }, { "epoch": 4.76, "learning_rate": 2.8692889313900186e-07, "loss": 0.0047, "step": 1955 }, { "epoch": 4.76, "learning_rate": 2.8093135694786667e-07, "loss": 0.0056, "step": 1956 }, { "epoch": 4.76, "learning_rate": 2.749968122013669e-07, "loss": 0.0051, "step": 1957 }, { "epoch": 4.77, "learning_rate": 2.6912527402246367e-07, "loss": 0.0043, "step": 1958 }, { "epoch": 4.77, "learning_rate": 2.633167573735579e-07, "loss": 0.0054, "step": 1959 }, { "epoch": 4.77, "learning_rate": 2.575712770564592e-07, "loss": 0.0035, "step": 1960 }, { "epoch": 4.77, "learning_rate": 2.5188884771233656e-07, "loss": 0.0043, "step": 1961 }, { "epoch": 4.78, "learning_rate": 2.4626948382168726e-07, "loss": 0.0038, "step": 1962 }, { "epoch": 4.78, "learning_rate": 2.407131997043038e-07, "loss": 0.0039, "step": 1963 }, { "epoch": 4.78, "learning_rate": 2.3522000951922417e-07, "loss": 0.0031, "step": 1964 }, { "epoch": 4.78, "learning_rate": 2.2978992726471748e-07, "loss": 0.0045, "step": 1965 }, { "epoch": 4.78, "learning_rate": 2.244229667782205e-07, "loss": 0.0081, "step": 1966 }, { "epoch": 4.79, "learning_rate": 2.1911914173632643e-07, "loss": 0.0046, "step": 1967 }, { "epoch": 4.79, "learning_rate": 2.1387846565474045e-07, "loss": 0.0048, "step": 1968 }, { "epoch": 4.79, "learning_rate": 2.08700951888241e-07, "loss": 0.0039, "step": 1969 }, { "epoch": 4.79, "learning_rate": 2.0358661363065746e-07, "loss": 0.0042, "step": 1970 }, { "epoch": 4.8, "learning_rate": 1.985354639148229e-07, "loss": 0.0051, "step": 1971 }, { "epoch": 4.8, "learning_rate": 1.9354751561254937e-07, "loss": 0.004, "step": 1972 }, { "epoch": 4.8, "learning_rate": 1.8862278143459144e-07, "loss": 0.0043, "step": 1973 }, { "epoch": 4.8, "learning_rate": 1.8376127393062158e-07, "loss": 0.0051, "step": 1974 }, { "epoch": 4.81, "learning_rate": 1.7896300548918832e-07, "loss": 0.0058, "step": 1975 }, { "epoch": 4.81, "learning_rate": 1.7422798833768572e-07, "loss": 0.004, "step": 1976 }, { "epoch": 4.81, "learning_rate": 1.6955623454233128e-07, "loss": 0.0051, "step": 1977 }, { "epoch": 4.81, "learning_rate": 1.6494775600812417e-07, "loss": 0.0048, "step": 1978 }, { "epoch": 4.82, "learning_rate": 1.6040256447881763e-07, "loss": 0.0056, "step": 1979 }, { "epoch": 4.82, "learning_rate": 1.559206715368966e-07, "loss": 0.0039, "step": 1980 }, { "epoch": 4.82, "learning_rate": 1.5150208860354176e-07, "loss": 0.0053, "step": 1981 }, { "epoch": 4.82, "learning_rate": 1.4714682693859617e-07, "loss": 0.0039, "step": 1982 }, { "epoch": 4.83, "learning_rate": 1.428548976405486e-07, "loss": 0.0058, "step": 1983 }, { "epoch": 4.83, "learning_rate": 1.3862631164649475e-07, "loss": 0.0039, "step": 1984 }, { "epoch": 4.83, "learning_rate": 1.344610797321122e-07, "loss": 0.0048, "step": 1985 }, { "epoch": 4.83, "learning_rate": 1.3035921251163263e-07, "loss": 0.0039, "step": 1986 }, { "epoch": 4.84, "learning_rate": 1.2632072043782252e-07, "loss": 0.0032, "step": 1987 }, { "epoch": 4.84, "learning_rate": 1.223456138019413e-07, "loss": 0.0042, "step": 1988 }, { "epoch": 4.84, "learning_rate": 1.1843390273373057e-07, "loss": 0.0056, "step": 1989 }, { "epoch": 4.84, "learning_rate": 1.1458559720137762e-07, "loss": 0.0042, "step": 1990 }, { "epoch": 4.85, "learning_rate": 1.1080070701149359e-07, "loss": 0.004, "step": 1991 }, { "epoch": 4.85, "learning_rate": 1.0707924180909379e-07, "loss": 0.0047, "step": 1992 }, { "epoch": 4.85, "learning_rate": 1.0342121107755898e-07, "loss": 0.0057, "step": 1993 }, { "epoch": 4.85, "learning_rate": 9.982662413862975e-08, "loss": 0.0046, "step": 1994 }, { "epoch": 4.86, "learning_rate": 9.629549015237049e-08, "loss": 0.0037, "step": 1995 }, { "epoch": 4.86, "learning_rate": 9.282781811714159e-08, "loss": 0.0055, "step": 1996 }, { "epoch": 4.86, "learning_rate": 8.94236168695911e-08, "loss": 0.004, "step": 1997 }, { "epoch": 4.86, "learning_rate": 8.608289508462708e-08, "loss": 0.0036, "step": 1998 }, { "epoch": 4.87, "learning_rate": 8.280566127538691e-08, "loss": 0.0047, "step": 1999 }, { "epoch": 4.87, "learning_rate": 7.959192379322077e-08, "loss": 0.0043, "step": 2000 }, { "epoch": 4.87, "learning_rate": 7.644169082768326e-08, "loss": 0.0043, "step": 2001 }, { "epoch": 4.87, "learning_rate": 7.335497040648898e-08, "loss": 0.004, "step": 2002 }, { "epoch": 4.87, "learning_rate": 7.033177039550698e-08, "loss": 0.0037, "step": 2003 }, { "epoch": 4.88, "learning_rate": 6.73720984987386e-08, "loss": 0.0051, "step": 2004 }, { "epoch": 4.88, "learning_rate": 6.4475962258298e-08, "loss": 0.0046, "step": 2005 }, { "epoch": 4.88, "learning_rate": 6.164336905438994e-08, "loss": 0.005, "step": 2006 }, { "epoch": 4.88, "learning_rate": 5.8874326105293196e-08, "loss": 0.0045, "step": 2007 }, { "epoch": 4.89, "learning_rate": 5.616884046734383e-08, "loss": 0.0051, "step": 2008 }, { "epoch": 4.89, "learning_rate": 5.352691903491303e-08, "loss": 0.0046, "step": 2009 }, { "epoch": 4.89, "learning_rate": 5.094856854039043e-08, "loss": 0.0049, "step": 2010 }, { "epoch": 4.89, "learning_rate": 4.8433795554173046e-08, "loss": 0.0037, "step": 2011 }, { "epoch": 4.9, "learning_rate": 4.598260648463748e-08, "loss": 0.0039, "step": 2012 }, { "epoch": 4.9, "learning_rate": 4.359500757813717e-08, "loss": 0.0041, "step": 2013 }, { "epoch": 4.9, "learning_rate": 4.1271004918971847e-08, "loss": 0.004, "step": 2014 }, { "epoch": 4.9, "learning_rate": 3.901060442938198e-08, "loss": 0.0056, "step": 2015 }, { "epoch": 4.91, "learning_rate": 3.68138118695377e-08, "loss": 0.0049, "step": 2016 }, { "epoch": 4.91, "learning_rate": 3.468063283750267e-08, "loss": 0.0051, "step": 2017 }, { "epoch": 4.91, "learning_rate": 3.2611072769250795e-08, "loss": 0.0039, "step": 2018 }, { "epoch": 4.91, "learning_rate": 3.0605136938624544e-08, "loss": 0.004, "step": 2019 }, { "epoch": 4.92, "learning_rate": 2.866283045734053e-08, "loss": 0.0045, "step": 2020 }, { "epoch": 4.92, "learning_rate": 2.6784158274964498e-08, "loss": 0.0063, "step": 2021 }, { "epoch": 4.92, "learning_rate": 2.496912517890304e-08, "loss": 0.0056, "step": 2022 }, { "epoch": 4.92, "learning_rate": 2.3217735794392458e-08, "loss": 0.0039, "step": 2023 }, { "epoch": 4.93, "learning_rate": 2.152999458449323e-08, "loss": 0.004, "step": 2024 }, { "epoch": 4.93, "learning_rate": 1.990590585005947e-08, "loss": 0.0045, "step": 2025 }, { "epoch": 4.93, "learning_rate": 1.834547372975004e-08, "loss": 0.0046, "step": 2026 }, { "epoch": 4.93, "learning_rate": 1.6848702200000786e-08, "loss": 0.0051, "step": 2027 }, { "epoch": 4.94, "learning_rate": 1.5415595075027324e-08, "loss": 0.0054, "step": 2028 }, { "epoch": 4.94, "learning_rate": 1.4046156006808364e-08, "loss": 0.0048, "step": 2029 }, { "epoch": 4.94, "learning_rate": 1.2740388485071863e-08, "loss": 0.0041, "step": 2030 }, { "epoch": 4.94, "learning_rate": 1.149829583730333e-08, "loss": 0.0048, "step": 2031 }, { "epoch": 4.95, "learning_rate": 1.03198812287153e-08, "loss": 0.0045, "step": 2032 }, { "epoch": 4.95, "learning_rate": 9.20514766225289e-09, "loss": 0.0046, "step": 2033 }, { "epoch": 4.95, "learning_rate": 8.154097978591014e-09, "loss": 0.0039, "step": 2034 }, { "epoch": 4.95, "learning_rate": 7.166734856103863e-09, "loss": 0.0044, "step": 2035 }, { "epoch": 4.96, "learning_rate": 6.243060810892654e-09, "loss": 0.0043, "step": 2036 }, { "epoch": 4.96, "learning_rate": 5.3830781967412205e-09, "loss": 0.006, "step": 2037 }, { "epoch": 4.96, "learning_rate": 4.586789205140995e-09, "loss": 0.004, "step": 2038 }, { "epoch": 4.96, "learning_rate": 3.854195865271582e-09, "loss": 0.0048, "step": 2039 }, { "epoch": 4.97, "learning_rate": 3.1853000439951987e-09, "loss": 0.0054, "step": 2040 }, { "epoch": 4.97, "learning_rate": 2.58010344585391e-09, "loss": 0.0038, "step": 2041 }, { "epoch": 4.97, "learning_rate": 2.038607613066845e-09, "loss": 0.0041, "step": 2042 }, { "epoch": 4.97, "learning_rate": 1.5608139255246512e-09, "loss": 0.0047, "step": 2043 }, { "epoch": 4.97, "learning_rate": 1.1467236007867144e-09, "loss": 0.0036, "step": 2044 }, { "epoch": 4.98, "learning_rate": 7.963376940728351e-10, "loss": 0.0051, "step": 2045 }, { "epoch": 4.98, "learning_rate": 5.096570982743298e-10, "loss": 0.0049, "step": 2046 }, { "epoch": 4.98, "learning_rate": 2.866825439346021e-10, "loss": 0.004, "step": 2047 }, { "epoch": 4.98, "learning_rate": 1.2741459925746935e-10, "loss": 0.0057, "step": 2048 }, { "epoch": 4.99, "learning_rate": 3.185367010716256e-11, "loss": 0.004, "step": 2049 }, { "epoch": 4.99, "learning_rate": 0.0, "loss": 0.0058, "step": 2050 }, { "epoch": 4.99, "eval_loss": 1.1227930784225464, "eval_runtime": 115.4648, "eval_samples_per_second": 6.599, "eval_steps_per_second": 0.416, "step": 2050 }, { "epoch": 4.99, "step": 2050, "total_flos": 2.0983989578550477e+19, "train_loss": 0.1699243627804354, "train_runtime": 174534.9388, "train_samples_per_second": 1.506, "train_steps_per_second": 0.012 } ], "max_steps": 2050, "num_train_epochs": 5, "total_flos": 2.0983989578550477e+19, "trial_name": null, "trial_params": null }