{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 4095, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6260162601626018e-07, "loss": 6.6992, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.2520325203252037e-07, "loss": 6.1932, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.878048780487805e-07, "loss": 5.3841, "step": 3 }, { "epoch": 0.0, "learning_rate": 6.504065040650407e-07, "loss": 5.2712, "step": 4 }, { "epoch": 0.0, "learning_rate": 8.130081300813009e-07, "loss": 5.0076, "step": 5 }, { "epoch": 0.0, "learning_rate": 9.75609756097561e-07, "loss": 4.9959, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.1382113821138213e-06, "loss": 4.7726, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.3008130081300815e-06, "loss": 4.7841, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.4634146341463414e-06, "loss": 4.7834, "step": 9 }, { "epoch": 0.01, "learning_rate": 1.6260162601626018e-06, "loss": 4.7161, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.788617886178862e-06, "loss": 4.6349, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.951219512195122e-06, "loss": 4.6362, "step": 12 }, { "epoch": 0.01, "learning_rate": 2.1138211382113824e-06, "loss": 4.5521, "step": 13 }, { "epoch": 0.01, "learning_rate": 2.2764227642276426e-06, "loss": 4.5048, "step": 14 }, { "epoch": 0.01, "learning_rate": 2.4390243902439027e-06, "loss": 4.2868, "step": 15 }, { "epoch": 0.01, "learning_rate": 2.601626016260163e-06, "loss": 4.3228, "step": 16 }, { "epoch": 0.01, "learning_rate": 2.764227642276423e-06, "loss": 4.1938, "step": 17 }, { "epoch": 0.01, "learning_rate": 2.926829268292683e-06, "loss": 4.1673, "step": 18 }, { "epoch": 0.01, "learning_rate": 3.0894308943089435e-06, "loss": 4.1621, "step": 19 }, { "epoch": 0.01, "learning_rate": 3.2520325203252037e-06, "loss": 4.0656, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.414634146341464e-06, "loss": 4.1216, "step": 21 }, { "epoch": 0.02, "learning_rate": 3.577235772357724e-06, "loss": 4.0245, "step": 22 }, { "epoch": 0.02, "learning_rate": 3.7398373983739838e-06, "loss": 3.9729, "step": 23 }, { "epoch": 0.02, "learning_rate": 3.902439024390244e-06, "loss": 3.998, "step": 24 }, { "epoch": 0.02, "learning_rate": 4.0650406504065046e-06, "loss": 4.0535, "step": 25 }, { "epoch": 0.02, "learning_rate": 4.227642276422765e-06, "loss": 4.0039, "step": 26 }, { "epoch": 0.02, "learning_rate": 4.390243902439025e-06, "loss": 4.019, "step": 27 }, { "epoch": 0.02, "learning_rate": 4.552845528455285e-06, "loss": 3.6912, "step": 28 }, { "epoch": 0.02, "learning_rate": 4.715447154471545e-06, "loss": 3.7932, "step": 29 }, { "epoch": 0.02, "learning_rate": 4.8780487804878055e-06, "loss": 3.7252, "step": 30 }, { "epoch": 0.02, "learning_rate": 5.040650406504065e-06, "loss": 3.7714, "step": 31 }, { "epoch": 0.02, "learning_rate": 5.203252032520326e-06, "loss": 3.7869, "step": 32 }, { "epoch": 0.02, "learning_rate": 5.365853658536586e-06, "loss": 3.8037, "step": 33 }, { "epoch": 0.02, "learning_rate": 5.528455284552846e-06, "loss": 3.8603, "step": 34 }, { "epoch": 0.03, "learning_rate": 5.691056910569106e-06, "loss": 3.6487, "step": 35 }, { "epoch": 0.03, "learning_rate": 5.853658536585366e-06, "loss": 3.6666, "step": 36 }, { "epoch": 0.03, "learning_rate": 6.016260162601627e-06, "loss": 3.6814, "step": 37 }, { "epoch": 0.03, "learning_rate": 6.178861788617887e-06, "loss": 3.7174, "step": 38 }, { "epoch": 0.03, "learning_rate": 6.341463414634147e-06, "loss": 3.6394, "step": 39 }, { "epoch": 0.03, "learning_rate": 6.504065040650407e-06, "loss": 3.6138, "step": 40 }, { "epoch": 0.03, "learning_rate": 6.666666666666667e-06, "loss": 3.6356, "step": 41 }, { "epoch": 0.03, "learning_rate": 6.829268292682928e-06, "loss": 3.6149, "step": 42 }, { "epoch": 0.03, "learning_rate": 6.991869918699188e-06, "loss": 3.6127, "step": 43 }, { "epoch": 0.03, "learning_rate": 7.154471544715448e-06, "loss": 3.5481, "step": 44 }, { "epoch": 0.03, "learning_rate": 7.317073170731707e-06, "loss": 3.5133, "step": 45 }, { "epoch": 0.03, "learning_rate": 7.4796747967479676e-06, "loss": 3.5558, "step": 46 }, { "epoch": 0.03, "learning_rate": 7.64227642276423e-06, "loss": 3.5729, "step": 47 }, { "epoch": 0.04, "learning_rate": 7.804878048780489e-06, "loss": 3.5241, "step": 48 }, { "epoch": 0.04, "learning_rate": 7.967479674796748e-06, "loss": 3.4811, "step": 49 }, { "epoch": 0.04, "learning_rate": 8.130081300813009e-06, "loss": 3.4876, "step": 50 }, { "epoch": 0.04, "learning_rate": 8.292682926829268e-06, "loss": 3.4186, "step": 51 }, { "epoch": 0.04, "learning_rate": 8.45528455284553e-06, "loss": 3.4558, "step": 52 }, { "epoch": 0.04, "learning_rate": 8.617886178861789e-06, "loss": 3.3733, "step": 53 }, { "epoch": 0.04, "learning_rate": 8.78048780487805e-06, "loss": 3.4063, "step": 54 }, { "epoch": 0.04, "learning_rate": 8.94308943089431e-06, "loss": 3.3896, "step": 55 }, { "epoch": 0.04, "learning_rate": 9.10569105691057e-06, "loss": 3.2754, "step": 56 }, { "epoch": 0.04, "learning_rate": 9.268292682926831e-06, "loss": 3.2887, "step": 57 }, { "epoch": 0.04, "learning_rate": 9.43089430894309e-06, "loss": 3.3858, "step": 58 }, { "epoch": 0.04, "learning_rate": 9.59349593495935e-06, "loss": 3.3795, "step": 59 }, { "epoch": 0.04, "learning_rate": 9.756097560975611e-06, "loss": 3.3417, "step": 60 }, { "epoch": 0.04, "learning_rate": 9.91869918699187e-06, "loss": 3.1951, "step": 61 }, { "epoch": 0.05, "learning_rate": 1.008130081300813e-05, "loss": 3.2293, "step": 62 }, { "epoch": 0.05, "learning_rate": 1.024390243902439e-05, "loss": 3.4115, "step": 63 }, { "epoch": 0.05, "learning_rate": 1.0406504065040652e-05, "loss": 3.3561, "step": 64 }, { "epoch": 0.05, "learning_rate": 1.0569105691056911e-05, "loss": 3.241, "step": 65 }, { "epoch": 0.05, "learning_rate": 1.0731707317073172e-05, "loss": 3.3011, "step": 66 }, { "epoch": 0.05, "learning_rate": 1.0894308943089431e-05, "loss": 3.3404, "step": 67 }, { "epoch": 0.05, "learning_rate": 1.1056910569105692e-05, "loss": 3.1462, "step": 68 }, { "epoch": 0.05, "learning_rate": 1.1219512195121953e-05, "loss": 3.4653, "step": 69 }, { "epoch": 0.05, "learning_rate": 1.1382113821138213e-05, "loss": 3.3504, "step": 70 }, { "epoch": 0.05, "learning_rate": 1.1544715447154474e-05, "loss": 3.2062, "step": 71 }, { "epoch": 0.05, "learning_rate": 1.1707317073170731e-05, "loss": 3.3346, "step": 72 }, { "epoch": 0.05, "learning_rate": 1.1869918699186992e-05, "loss": 3.2838, "step": 73 }, { "epoch": 0.05, "learning_rate": 1.2032520325203254e-05, "loss": 3.1758, "step": 74 }, { "epoch": 0.05, "learning_rate": 1.2195121951219513e-05, "loss": 3.1911, "step": 75 }, { "epoch": 0.06, "learning_rate": 1.2357723577235774e-05, "loss": 3.1299, "step": 76 }, { "epoch": 0.06, "learning_rate": 1.2520325203252033e-05, "loss": 3.0991, "step": 77 }, { "epoch": 0.06, "learning_rate": 1.2682926829268294e-05, "loss": 3.2234, "step": 78 }, { "epoch": 0.06, "learning_rate": 1.2845528455284555e-05, "loss": 3.1587, "step": 79 }, { "epoch": 0.06, "learning_rate": 1.3008130081300815e-05, "loss": 3.1245, "step": 80 }, { "epoch": 0.06, "learning_rate": 1.3170731707317076e-05, "loss": 3.0742, "step": 81 }, { "epoch": 0.06, "learning_rate": 1.3333333333333333e-05, "loss": 3.2437, "step": 82 }, { "epoch": 0.06, "learning_rate": 1.3495934959349594e-05, "loss": 3.2226, "step": 83 }, { "epoch": 0.06, "learning_rate": 1.3658536585365855e-05, "loss": 3.0976, "step": 84 }, { "epoch": 0.06, "learning_rate": 1.3821138211382115e-05, "loss": 3.1464, "step": 85 }, { "epoch": 0.06, "learning_rate": 1.3983739837398376e-05, "loss": 3.0499, "step": 86 }, { "epoch": 0.06, "learning_rate": 1.4146341463414635e-05, "loss": 3.1848, "step": 87 }, { "epoch": 0.06, "learning_rate": 1.4308943089430896e-05, "loss": 3.1426, "step": 88 }, { "epoch": 0.07, "learning_rate": 1.4471544715447157e-05, "loss": 3.1442, "step": 89 }, { "epoch": 0.07, "learning_rate": 1.4634146341463415e-05, "loss": 2.9993, "step": 90 }, { "epoch": 0.07, "learning_rate": 1.4796747967479676e-05, "loss": 3.0216, "step": 91 }, { "epoch": 0.07, "learning_rate": 1.4959349593495935e-05, "loss": 2.9955, "step": 92 }, { "epoch": 0.07, "learning_rate": 1.5121951219512196e-05, "loss": 3.1186, "step": 93 }, { "epoch": 0.07, "learning_rate": 1.528455284552846e-05, "loss": 3.0444, "step": 94 }, { "epoch": 0.07, "learning_rate": 1.5447154471544717e-05, "loss": 2.904, "step": 95 }, { "epoch": 0.07, "learning_rate": 1.5609756097560978e-05, "loss": 3.0089, "step": 96 }, { "epoch": 0.07, "learning_rate": 1.5772357723577235e-05, "loss": 3.161, "step": 97 }, { "epoch": 0.07, "learning_rate": 1.5934959349593496e-05, "loss": 3.0699, "step": 98 }, { "epoch": 0.07, "learning_rate": 1.6097560975609757e-05, "loss": 3.025, "step": 99 }, { "epoch": 0.07, "learning_rate": 1.6260162601626018e-05, "loss": 2.9744, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.642276422764228e-05, "loss": 2.9456, "step": 101 }, { "epoch": 0.07, "learning_rate": 1.6585365853658537e-05, "loss": 3.0036, "step": 102 }, { "epoch": 0.08, "learning_rate": 1.6747967479674798e-05, "loss": 3.2183, "step": 103 }, { "epoch": 0.08, "learning_rate": 1.691056910569106e-05, "loss": 3.125, "step": 104 }, { "epoch": 0.08, "learning_rate": 1.7073170731707317e-05, "loss": 3.0519, "step": 105 }, { "epoch": 0.08, "learning_rate": 1.7235772357723578e-05, "loss": 3.2332, "step": 106 }, { "epoch": 0.08, "learning_rate": 1.739837398373984e-05, "loss": 2.9392, "step": 107 }, { "epoch": 0.08, "learning_rate": 1.75609756097561e-05, "loss": 3.0026, "step": 108 }, { "epoch": 0.08, "learning_rate": 1.772357723577236e-05, "loss": 3.0309, "step": 109 }, { "epoch": 0.08, "learning_rate": 1.788617886178862e-05, "loss": 3.2763, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.804878048780488e-05, "loss": 2.9774, "step": 111 }, { "epoch": 0.08, "learning_rate": 1.821138211382114e-05, "loss": 3.0089, "step": 112 }, { "epoch": 0.08, "learning_rate": 1.83739837398374e-05, "loss": 3.1183, "step": 113 }, { "epoch": 0.08, "learning_rate": 1.8536585365853663e-05, "loss": 3.2371, "step": 114 }, { "epoch": 0.08, "learning_rate": 1.869918699186992e-05, "loss": 2.9699, "step": 115 }, { "epoch": 0.08, "learning_rate": 1.886178861788618e-05, "loss": 3.0926, "step": 116 }, { "epoch": 0.09, "learning_rate": 1.902439024390244e-05, "loss": 3.0821, "step": 117 }, { "epoch": 0.09, "learning_rate": 1.91869918699187e-05, "loss": 2.8519, "step": 118 }, { "epoch": 0.09, "learning_rate": 1.934959349593496e-05, "loss": 3.0601, "step": 119 }, { "epoch": 0.09, "learning_rate": 1.9512195121951222e-05, "loss": 2.9155, "step": 120 }, { "epoch": 0.09, "learning_rate": 1.9674796747967483e-05, "loss": 2.9966, "step": 121 }, { "epoch": 0.09, "learning_rate": 1.983739837398374e-05, "loss": 3.0809, "step": 122 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 3.0559, "step": 123 }, { "epoch": 0.09, "learning_rate": 1.9999996872111618e-05, "loss": 2.8504, "step": 124 }, { "epoch": 0.09, "learning_rate": 1.9999987488448416e-05, "loss": 2.9786, "step": 125 }, { "epoch": 0.09, "learning_rate": 1.9999971849016274e-05, "loss": 3.0621, "step": 126 }, { "epoch": 0.09, "learning_rate": 1.999994995382497e-05, "loss": 3.0205, "step": 127 }, { "epoch": 0.09, "learning_rate": 1.99999218028882e-05, "loss": 2.8792, "step": 128 }, { "epoch": 0.09, "learning_rate": 1.9999887396223584e-05, "loss": 3.0367, "step": 129 }, { "epoch": 0.1, "learning_rate": 1.9999846733852636e-05, "loss": 3.08, "step": 130 }, { "epoch": 0.1, "learning_rate": 1.9999799815800798e-05, "loss": 3.1082, "step": 131 }, { "epoch": 0.1, "learning_rate": 1.999974664209742e-05, "loss": 3.0766, "step": 132 }, { "epoch": 0.1, "learning_rate": 1.9999687212775766e-05, "loss": 2.9957, "step": 133 }, { "epoch": 0.1, "learning_rate": 1.999962152787301e-05, "loss": 2.9507, "step": 134 }, { "epoch": 0.1, "learning_rate": 1.9999549587430252e-05, "loss": 2.9199, "step": 135 }, { "epoch": 0.1, "learning_rate": 1.999947139149249e-05, "loss": 2.893, "step": 136 }, { "epoch": 0.1, "learning_rate": 1.9999386940108643e-05, "loss": 3.0152, "step": 137 }, { "epoch": 0.1, "learning_rate": 1.999929623333154e-05, "loss": 2.9554, "step": 138 }, { "epoch": 0.1, "learning_rate": 1.9999199271217927e-05, "loss": 2.9035, "step": 139 }, { "epoch": 0.1, "learning_rate": 1.9999096053828465e-05, "loss": 3.0563, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.9998986581227718e-05, "loss": 3.0524, "step": 141 }, { "epoch": 0.1, "learning_rate": 1.999887085348417e-05, "loss": 2.8906, "step": 142 }, { "epoch": 0.1, "learning_rate": 1.9998748870670225e-05, "loss": 2.9259, "step": 143 }, { "epoch": 0.11, "learning_rate": 1.9998620632862184e-05, "loss": 3.0438, "step": 144 }, { "epoch": 0.11, "learning_rate": 1.9998486140140273e-05, "loss": 2.9044, "step": 145 }, { "epoch": 0.11, "learning_rate": 1.999834539258863e-05, "loss": 2.8014, "step": 146 }, { "epoch": 0.11, "learning_rate": 1.99981983902953e-05, "loss": 2.9316, "step": 147 }, { "epoch": 0.11, "learning_rate": 1.999804513335225e-05, "loss": 2.9433, "step": 148 }, { "epoch": 0.11, "learning_rate": 1.9997885621855346e-05, "loss": 2.7814, "step": 149 }, { "epoch": 0.11, "learning_rate": 1.999771985590438e-05, "loss": 2.8892, "step": 150 }, { "epoch": 0.11, "learning_rate": 1.9997547835603052e-05, "loss": 2.9199, "step": 151 }, { "epoch": 0.11, "learning_rate": 1.999736956105897e-05, "loss": 2.7877, "step": 152 }, { "epoch": 0.11, "learning_rate": 1.9997185032383663e-05, "loss": 2.9946, "step": 153 }, { "epoch": 0.11, "learning_rate": 1.9996994249692565e-05, "loss": 2.8637, "step": 154 }, { "epoch": 0.11, "learning_rate": 1.9996797213105026e-05, "loss": 2.9238, "step": 155 }, { "epoch": 0.11, "learning_rate": 1.9996593922744308e-05, "loss": 2.7352, "step": 156 }, { "epoch": 0.12, "learning_rate": 1.999638437873759e-05, "loss": 2.9629, "step": 157 }, { "epoch": 0.12, "learning_rate": 1.9996168581215945e-05, "loss": 2.8777, "step": 158 }, { "epoch": 0.12, "learning_rate": 1.9995946530314384e-05, "loss": 2.8627, "step": 159 }, { "epoch": 0.12, "learning_rate": 1.9995718226171812e-05, "loss": 2.792, "step": 160 }, { "epoch": 0.12, "learning_rate": 1.9995483668931052e-05, "loss": 2.9102, "step": 161 }, { "epoch": 0.12, "learning_rate": 1.9995242858738834e-05, "loss": 2.6865, "step": 162 }, { "epoch": 0.12, "learning_rate": 1.9994995795745808e-05, "loss": 2.8572, "step": 163 }, { "epoch": 0.12, "learning_rate": 1.999474248010653e-05, "loss": 2.8355, "step": 164 }, { "epoch": 0.12, "learning_rate": 1.999448291197947e-05, "loss": 2.8978, "step": 165 }, { "epoch": 0.12, "learning_rate": 1.9994217091527e-05, "loss": 2.9618, "step": 166 }, { "epoch": 0.12, "learning_rate": 1.9993945018915423e-05, "loss": 2.8053, "step": 167 }, { "epoch": 0.12, "learning_rate": 1.9993666694314938e-05, "loss": 2.948, "step": 168 }, { "epoch": 0.12, "learning_rate": 1.9993382117899652e-05, "loss": 2.8396, "step": 169 }, { "epoch": 0.12, "learning_rate": 1.9993091289847595e-05, "loss": 2.8319, "step": 170 }, { "epoch": 0.13, "learning_rate": 1.9992794210340707e-05, "loss": 2.7599, "step": 171 }, { "epoch": 0.13, "learning_rate": 1.9992490879564825e-05, "loss": 2.866, "step": 172 }, { "epoch": 0.13, "learning_rate": 1.9992181297709712e-05, "loss": 2.7847, "step": 173 }, { "epoch": 0.13, "learning_rate": 1.9991865464969035e-05, "loss": 2.9144, "step": 174 }, { "epoch": 0.13, "learning_rate": 1.9991543381540368e-05, "loss": 2.9882, "step": 175 }, { "epoch": 0.13, "learning_rate": 1.99912150476252e-05, "loss": 2.7822, "step": 176 }, { "epoch": 0.13, "learning_rate": 1.9990880463428938e-05, "loss": 2.8177, "step": 177 }, { "epoch": 0.13, "learning_rate": 1.9990539629160875e-05, "loss": 2.9798, "step": 178 }, { "epoch": 0.13, "learning_rate": 1.9990192545034245e-05, "loss": 2.9726, "step": 179 }, { "epoch": 0.13, "learning_rate": 1.9989839211266164e-05, "loss": 2.7583, "step": 180 }, { "epoch": 0.13, "learning_rate": 1.998947962807768e-05, "loss": 2.8343, "step": 181 }, { "epoch": 0.13, "learning_rate": 1.9989113795693732e-05, "loss": 2.7647, "step": 182 }, { "epoch": 0.13, "learning_rate": 1.998874171434318e-05, "loss": 2.7466, "step": 183 }, { "epoch": 0.13, "learning_rate": 1.998836338425879e-05, "loss": 2.8908, "step": 184 }, { "epoch": 0.14, "learning_rate": 1.9987978805677237e-05, "loss": 2.7695, "step": 185 }, { "epoch": 0.14, "learning_rate": 1.99875879788391e-05, "loss": 2.7966, "step": 186 }, { "epoch": 0.14, "learning_rate": 1.998719090398888e-05, "loss": 2.7963, "step": 187 }, { "epoch": 0.14, "learning_rate": 1.998678758137497e-05, "loss": 2.6633, "step": 188 }, { "epoch": 0.14, "learning_rate": 1.9986378011249684e-05, "loss": 2.8187, "step": 189 }, { "epoch": 0.14, "learning_rate": 1.998596219386924e-05, "loss": 2.7977, "step": 190 }, { "epoch": 0.14, "learning_rate": 1.9985540129493763e-05, "loss": 2.866, "step": 191 }, { "epoch": 0.14, "learning_rate": 1.998511181838729e-05, "loss": 2.8588, "step": 192 }, { "epoch": 0.14, "learning_rate": 1.9984677260817754e-05, "loss": 2.7316, "step": 193 }, { "epoch": 0.14, "learning_rate": 1.998423645705702e-05, "loss": 2.7966, "step": 194 }, { "epoch": 0.14, "learning_rate": 1.9983789407380828e-05, "loss": 2.8674, "step": 195 }, { "epoch": 0.14, "learning_rate": 1.9983336112068853e-05, "loss": 2.8855, "step": 196 }, { "epoch": 0.14, "learning_rate": 1.9982876571404664e-05, "loss": 2.876, "step": 197 }, { "epoch": 0.15, "learning_rate": 1.9982410785675735e-05, "loss": 2.9647, "step": 198 }, { "epoch": 0.15, "learning_rate": 1.998193875517346e-05, "loss": 2.88, "step": 199 }, { "epoch": 0.15, "learning_rate": 1.9981460480193124e-05, "loss": 2.7533, "step": 200 }, { "epoch": 0.15, "learning_rate": 1.9980975961033925e-05, "loss": 2.7098, "step": 201 }, { "epoch": 0.15, "learning_rate": 1.9980485197998975e-05, "loss": 2.8053, "step": 202 }, { "epoch": 0.15, "learning_rate": 1.9979988191395272e-05, "loss": 2.8201, "step": 203 }, { "epoch": 0.15, "learning_rate": 1.997948494153374e-05, "loss": 2.813, "step": 204 }, { "epoch": 0.15, "learning_rate": 1.9978975448729203e-05, "loss": 2.7009, "step": 205 }, { "epoch": 0.15, "learning_rate": 1.9978459713300383e-05, "loss": 2.7588, "step": 206 }, { "epoch": 0.15, "learning_rate": 1.9977937735569915e-05, "loss": 2.8489, "step": 207 }, { "epoch": 0.15, "learning_rate": 1.9977409515864336e-05, "loss": 2.8269, "step": 208 }, { "epoch": 0.15, "learning_rate": 1.997687505451409e-05, "loss": 2.9165, "step": 209 }, { "epoch": 0.15, "learning_rate": 1.9976334351853522e-05, "loss": 2.8468, "step": 210 }, { "epoch": 0.15, "learning_rate": 1.9975787408220882e-05, "loss": 2.7336, "step": 211 }, { "epoch": 0.16, "learning_rate": 1.997523422395833e-05, "loss": 2.8496, "step": 212 }, { "epoch": 0.16, "learning_rate": 1.9974674799411927e-05, "loss": 2.6769, "step": 213 }, { "epoch": 0.16, "learning_rate": 1.997410913493163e-05, "loss": 2.7647, "step": 214 }, { "epoch": 0.16, "learning_rate": 1.997353723087131e-05, "loss": 2.7654, "step": 215 }, { "epoch": 0.16, "learning_rate": 1.9972959087588734e-05, "loss": 2.7806, "step": 216 }, { "epoch": 0.16, "learning_rate": 1.9972374705445584e-05, "loss": 2.744, "step": 217 }, { "epoch": 0.16, "learning_rate": 1.9971784084807426e-05, "loss": 2.783, "step": 218 }, { "epoch": 0.16, "learning_rate": 1.9971187226043746e-05, "loss": 2.8252, "step": 219 }, { "epoch": 0.16, "learning_rate": 1.997058412952792e-05, "loss": 2.8938, "step": 220 }, { "epoch": 0.16, "learning_rate": 1.996997479563724e-05, "loss": 2.7479, "step": 221 }, { "epoch": 0.16, "learning_rate": 1.9969359224752884e-05, "loss": 2.6406, "step": 222 }, { "epoch": 0.16, "learning_rate": 1.9968737417259945e-05, "loss": 2.6211, "step": 223 }, { "epoch": 0.16, "learning_rate": 1.9968109373547406e-05, "loss": 2.7438, "step": 224 }, { "epoch": 0.16, "learning_rate": 1.996747509400816e-05, "loss": 2.6309, "step": 225 }, { "epoch": 0.17, "learning_rate": 1.9966834579039003e-05, "loss": 2.7878, "step": 226 }, { "epoch": 0.17, "learning_rate": 1.996618782904062e-05, "loss": 2.7786, "step": 227 }, { "epoch": 0.17, "learning_rate": 1.99655348444176e-05, "loss": 2.6937, "step": 228 }, { "epoch": 0.17, "learning_rate": 1.9964875625578447e-05, "loss": 2.774, "step": 229 }, { "epoch": 0.17, "learning_rate": 1.9964210172935544e-05, "loss": 2.5835, "step": 230 }, { "epoch": 0.17, "learning_rate": 1.996353848690519e-05, "loss": 2.6658, "step": 231 }, { "epoch": 0.17, "learning_rate": 1.9962860567907573e-05, "loss": 2.8612, "step": 232 }, { "epoch": 0.17, "learning_rate": 1.9962176416366785e-05, "loss": 2.7317, "step": 233 }, { "epoch": 0.17, "learning_rate": 1.9961486032710813e-05, "loss": 2.7484, "step": 234 }, { "epoch": 0.17, "learning_rate": 1.9960789417371555e-05, "loss": 2.7338, "step": 235 }, { "epoch": 0.17, "learning_rate": 1.9960086570784787e-05, "loss": 2.7731, "step": 236 }, { "epoch": 0.17, "learning_rate": 1.9959377493390198e-05, "loss": 2.8201, "step": 237 }, { "epoch": 0.17, "learning_rate": 1.9958662185631372e-05, "loss": 2.6629, "step": 238 }, { "epoch": 0.18, "learning_rate": 1.995794064795579e-05, "loss": 2.762, "step": 239 }, { "epoch": 0.18, "learning_rate": 1.9957212880814826e-05, "loss": 2.8015, "step": 240 }, { "epoch": 0.18, "learning_rate": 1.9956478884663762e-05, "loss": 2.7798, "step": 241 }, { "epoch": 0.18, "learning_rate": 1.995573865996176e-05, "loss": 2.7122, "step": 242 }, { "epoch": 0.18, "learning_rate": 1.9954992207171898e-05, "loss": 2.672, "step": 243 }, { "epoch": 0.18, "learning_rate": 1.9954239526761136e-05, "loss": 2.8468, "step": 244 }, { "epoch": 0.18, "learning_rate": 1.9953480619200332e-05, "loss": 2.716, "step": 245 }, { "epoch": 0.18, "learning_rate": 1.9952715484964242e-05, "loss": 2.5839, "step": 246 }, { "epoch": 0.18, "learning_rate": 1.995194412453152e-05, "loss": 2.7894, "step": 247 }, { "epoch": 0.18, "learning_rate": 1.995116653838471e-05, "loss": 2.7336, "step": 248 }, { "epoch": 0.18, "learning_rate": 1.9950382727010254e-05, "loss": 2.6646, "step": 249 }, { "epoch": 0.18, "learning_rate": 1.9949592690898484e-05, "loss": 2.8071, "step": 250 }, { "epoch": 0.18, "learning_rate": 1.994879643054363e-05, "loss": 2.6957, "step": 251 }, { "epoch": 0.18, "learning_rate": 1.9947993946443814e-05, "loss": 2.7763, "step": 252 }, { "epoch": 0.19, "learning_rate": 1.9947185239101057e-05, "loss": 2.7006, "step": 253 }, { "epoch": 0.19, "learning_rate": 1.9946370309021262e-05, "loss": 2.6627, "step": 254 }, { "epoch": 0.19, "learning_rate": 1.9945549156714236e-05, "loss": 2.7601, "step": 255 }, { "epoch": 0.19, "learning_rate": 1.994472178269367e-05, "loss": 2.647, "step": 256 }, { "epoch": 0.19, "learning_rate": 1.994388818747715e-05, "loss": 2.8232, "step": 257 }, { "epoch": 0.19, "learning_rate": 1.9943048371586158e-05, "loss": 2.7891, "step": 258 }, { "epoch": 0.19, "learning_rate": 1.9942202335546063e-05, "loss": 2.78, "step": 259 }, { "epoch": 0.19, "learning_rate": 1.9941350079886125e-05, "loss": 2.817, "step": 260 }, { "epoch": 0.19, "learning_rate": 1.99404916051395e-05, "loss": 2.7752, "step": 261 }, { "epoch": 0.19, "learning_rate": 1.993962691184322e-05, "loss": 2.5869, "step": 262 }, { "epoch": 0.19, "learning_rate": 1.9938756000538234e-05, "loss": 2.7367, "step": 263 }, { "epoch": 0.19, "learning_rate": 1.993787887176935e-05, "loss": 2.675, "step": 264 }, { "epoch": 0.19, "learning_rate": 1.9936995526085288e-05, "loss": 2.6234, "step": 265 }, { "epoch": 0.19, "learning_rate": 1.993610596403865e-05, "loss": 2.5741, "step": 266 }, { "epoch": 0.2, "learning_rate": 1.993521018618592e-05, "loss": 2.7328, "step": 267 }, { "epoch": 0.2, "learning_rate": 1.9934308193087484e-05, "loss": 2.6042, "step": 268 }, { "epoch": 0.2, "learning_rate": 1.99333999853076e-05, "loss": 2.6703, "step": 269 }, { "epoch": 0.2, "learning_rate": 1.9932485563414436e-05, "loss": 2.6837, "step": 270 }, { "epoch": 0.2, "learning_rate": 1.993156492798002e-05, "loss": 2.6386, "step": 271 }, { "epoch": 0.2, "learning_rate": 1.9930638079580286e-05, "loss": 2.5694, "step": 272 }, { "epoch": 0.2, "learning_rate": 1.9929705018795055e-05, "loss": 2.6468, "step": 273 }, { "epoch": 0.2, "learning_rate": 1.992876574620802e-05, "loss": 2.6777, "step": 274 }, { "epoch": 0.2, "learning_rate": 1.9927820262406772e-05, "loss": 2.6371, "step": 275 }, { "epoch": 0.2, "learning_rate": 1.9926868567982788e-05, "loss": 2.7301, "step": 276 }, { "epoch": 0.2, "learning_rate": 1.9925910663531424e-05, "loss": 2.8484, "step": 277 }, { "epoch": 0.2, "learning_rate": 1.9924946549651923e-05, "loss": 2.6616, "step": 278 }, { "epoch": 0.2, "learning_rate": 1.9923976226947417e-05, "loss": 2.6752, "step": 279 }, { "epoch": 0.21, "learning_rate": 1.9922999696024912e-05, "loss": 2.6676, "step": 280 }, { "epoch": 0.21, "learning_rate": 1.992201695749531e-05, "loss": 2.6193, "step": 281 }, { "epoch": 0.21, "learning_rate": 1.9921028011973388e-05, "loss": 2.7586, "step": 282 }, { "epoch": 0.21, "learning_rate": 1.9920032860077807e-05, "loss": 2.6874, "step": 283 }, { "epoch": 0.21, "learning_rate": 1.9919031502431113e-05, "loss": 2.6812, "step": 284 }, { "epoch": 0.21, "learning_rate": 1.9918023939659735e-05, "loss": 2.7353, "step": 285 }, { "epoch": 0.21, "learning_rate": 1.9917010172393976e-05, "loss": 2.6612, "step": 286 }, { "epoch": 0.21, "learning_rate": 1.991599020126803e-05, "loss": 2.5527, "step": 287 }, { "epoch": 0.21, "learning_rate": 1.991496402691997e-05, "loss": 2.7122, "step": 288 }, { "epoch": 0.21, "learning_rate": 1.9913931649991746e-05, "loss": 2.7246, "step": 289 }, { "epoch": 0.21, "learning_rate": 1.991289307112919e-05, "loss": 2.638, "step": 290 }, { "epoch": 0.21, "learning_rate": 1.991184829098201e-05, "loss": 2.5151, "step": 291 }, { "epoch": 0.21, "learning_rate": 1.9910797310203805e-05, "loss": 2.6953, "step": 292 }, { "epoch": 0.21, "learning_rate": 1.9909740129452035e-05, "loss": 2.6927, "step": 293 }, { "epoch": 0.22, "learning_rate": 1.990867674938806e-05, "loss": 2.7006, "step": 294 }, { "epoch": 0.22, "learning_rate": 1.9907607170677096e-05, "loss": 2.6107, "step": 295 }, { "epoch": 0.22, "learning_rate": 1.9906531393988256e-05, "loss": 2.81, "step": 296 }, { "epoch": 0.22, "learning_rate": 1.990544941999452e-05, "loss": 2.7974, "step": 297 }, { "epoch": 0.22, "learning_rate": 1.990436124937274e-05, "loss": 2.7551, "step": 298 }, { "epoch": 0.22, "learning_rate": 1.990326688280366e-05, "loss": 2.6229, "step": 299 }, { "epoch": 0.22, "learning_rate": 1.9902166320971888e-05, "loss": 2.8121, "step": 300 }, { "epoch": 0.22, "learning_rate": 1.990105956456591e-05, "loss": 2.6795, "step": 301 }, { "epoch": 0.22, "learning_rate": 1.989994661427809e-05, "loss": 2.7049, "step": 302 }, { "epoch": 0.22, "learning_rate": 1.9898827470804663e-05, "loss": 2.7219, "step": 303 }, { "epoch": 0.22, "learning_rate": 1.989770213484574e-05, "loss": 2.7126, "step": 304 }, { "epoch": 0.22, "learning_rate": 1.9896570607105308e-05, "loss": 2.6802, "step": 305 }, { "epoch": 0.22, "learning_rate": 1.9895432888291228e-05, "loss": 2.5975, "step": 306 }, { "epoch": 0.22, "learning_rate": 1.9894288979115224e-05, "loss": 2.6219, "step": 307 }, { "epoch": 0.23, "learning_rate": 1.9893138880292904e-05, "loss": 2.633, "step": 308 }, { "epoch": 0.23, "learning_rate": 1.9891982592543748e-05, "loss": 2.6176, "step": 309 }, { "epoch": 0.23, "learning_rate": 1.989082011659109e-05, "loss": 2.6134, "step": 310 }, { "epoch": 0.23, "learning_rate": 1.988965145316217e-05, "loss": 2.6985, "step": 311 }, { "epoch": 0.23, "learning_rate": 1.9888476602988063e-05, "loss": 2.522, "step": 312 }, { "epoch": 0.23, "learning_rate": 1.9887295566803733e-05, "loss": 2.6772, "step": 313 }, { "epoch": 0.23, "learning_rate": 1.988610834534801e-05, "loss": 2.6133, "step": 314 }, { "epoch": 0.23, "learning_rate": 1.988491493936359e-05, "loss": 2.6023, "step": 315 }, { "epoch": 0.23, "learning_rate": 1.9883715349597045e-05, "loss": 2.7036, "step": 316 }, { "epoch": 0.23, "learning_rate": 1.988250957679881e-05, "loss": 2.6411, "step": 317 }, { "epoch": 0.23, "learning_rate": 1.9881297621723194e-05, "loss": 2.6206, "step": 318 }, { "epoch": 0.23, "learning_rate": 1.9880079485128365e-05, "loss": 2.5999, "step": 319 }, { "epoch": 0.23, "learning_rate": 1.987885516777636e-05, "loss": 2.6495, "step": 320 }, { "epoch": 0.24, "learning_rate": 1.9877624670433086e-05, "loss": 2.5045, "step": 321 }, { "epoch": 0.24, "learning_rate": 1.9876387993868317e-05, "loss": 2.5735, "step": 322 }, { "epoch": 0.24, "learning_rate": 1.987514513885569e-05, "loss": 2.7171, "step": 323 }, { "epoch": 0.24, "learning_rate": 1.9873896106172705e-05, "loss": 2.4999, "step": 324 }, { "epoch": 0.24, "learning_rate": 1.987264089660073e-05, "loss": 2.7714, "step": 325 }, { "epoch": 0.24, "learning_rate": 1.9871379510925e-05, "loss": 2.4512, "step": 326 }, { "epoch": 0.24, "learning_rate": 1.9870111949934597e-05, "loss": 2.7868, "step": 327 }, { "epoch": 0.24, "learning_rate": 1.9868838214422493e-05, "loss": 2.5259, "step": 328 }, { "epoch": 0.24, "learning_rate": 1.9867558305185504e-05, "loss": 2.5852, "step": 329 }, { "epoch": 0.24, "learning_rate": 1.9866272223024314e-05, "loss": 2.6597, "step": 330 }, { "epoch": 0.24, "learning_rate": 1.986497996874346e-05, "loss": 2.67, "step": 331 }, { "epoch": 0.24, "learning_rate": 1.9863681543151355e-05, "loss": 2.7521, "step": 332 }, { "epoch": 0.24, "learning_rate": 1.9862376947060263e-05, "loss": 2.7244, "step": 333 }, { "epoch": 0.24, "learning_rate": 1.986106618128631e-05, "loss": 2.7155, "step": 334 }, { "epoch": 0.25, "learning_rate": 1.9859749246649482e-05, "loss": 2.6271, "step": 335 }, { "epoch": 0.25, "learning_rate": 1.9858426143973623e-05, "loss": 2.4743, "step": 336 }, { "epoch": 0.25, "learning_rate": 1.985709687408644e-05, "loss": 2.6998, "step": 337 }, { "epoch": 0.25, "learning_rate": 1.9855761437819486e-05, "loss": 2.6932, "step": 338 }, { "epoch": 0.25, "learning_rate": 1.985441983600819e-05, "loss": 2.638, "step": 339 }, { "epoch": 0.25, "learning_rate": 1.9853072069491825e-05, "loss": 2.7572, "step": 340 }, { "epoch": 0.25, "learning_rate": 1.985171813911352e-05, "loss": 2.5822, "step": 341 }, { "epoch": 0.25, "learning_rate": 1.9850358045720268e-05, "loss": 2.7331, "step": 342 }, { "epoch": 0.25, "learning_rate": 1.984899179016291e-05, "loss": 2.6356, "step": 343 }, { "epoch": 0.25, "learning_rate": 1.9847619373296146e-05, "loss": 2.4603, "step": 344 }, { "epoch": 0.25, "learning_rate": 1.984624079597853e-05, "loss": 2.5767, "step": 345 }, { "epoch": 0.25, "learning_rate": 1.9844856059072467e-05, "loss": 2.6661, "step": 346 }, { "epoch": 0.25, "learning_rate": 1.984346516344422e-05, "loss": 2.6491, "step": 347 }, { "epoch": 0.25, "learning_rate": 1.9842068109963904e-05, "loss": 2.6934, "step": 348 }, { "epoch": 0.26, "learning_rate": 1.9840664899505482e-05, "loss": 2.6159, "step": 349 }, { "epoch": 0.26, "learning_rate": 1.983925553294677e-05, "loss": 2.6397, "step": 350 }, { "epoch": 0.26, "learning_rate": 1.983784001116944e-05, "loss": 2.6072, "step": 351 }, { "epoch": 0.26, "learning_rate": 1.9836418335059005e-05, "loss": 2.6071, "step": 352 }, { "epoch": 0.26, "learning_rate": 1.9834990505504836e-05, "loss": 2.5743, "step": 353 }, { "epoch": 0.26, "learning_rate": 1.9833556523400156e-05, "loss": 2.7744, "step": 354 }, { "epoch": 0.26, "learning_rate": 1.9832116389642027e-05, "loss": 2.6362, "step": 355 }, { "epoch": 0.26, "learning_rate": 1.9830670105131366e-05, "loss": 2.6796, "step": 356 }, { "epoch": 0.26, "learning_rate": 1.9829217670772936e-05, "loss": 2.5944, "step": 357 }, { "epoch": 0.26, "learning_rate": 1.982775908747535e-05, "loss": 2.6778, "step": 358 }, { "epoch": 0.26, "learning_rate": 1.9826294356151063e-05, "loss": 2.6686, "step": 359 }, { "epoch": 0.26, "learning_rate": 1.9824823477716374e-05, "loss": 2.6367, "step": 360 }, { "epoch": 0.26, "learning_rate": 1.9823346453091442e-05, "loss": 2.6479, "step": 361 }, { "epoch": 0.27, "learning_rate": 1.982186328320025e-05, "loss": 2.6475, "step": 362 }, { "epoch": 0.27, "learning_rate": 1.9820373968970642e-05, "loss": 2.7361, "step": 363 }, { "epoch": 0.27, "learning_rate": 1.98188785113343e-05, "loss": 2.5261, "step": 364 }, { "epoch": 0.27, "learning_rate": 1.9817376911226747e-05, "loss": 2.5383, "step": 365 }, { "epoch": 0.27, "learning_rate": 1.981586916958735e-05, "loss": 2.6479, "step": 366 }, { "epoch": 0.27, "learning_rate": 1.981435528735932e-05, "loss": 2.6417, "step": 367 }, { "epoch": 0.27, "learning_rate": 1.9812835265489708e-05, "loss": 2.5823, "step": 368 }, { "epoch": 0.27, "learning_rate": 1.98113091049294e-05, "loss": 2.5912, "step": 369 }, { "epoch": 0.27, "learning_rate": 1.9809776806633137e-05, "loss": 2.5271, "step": 370 }, { "epoch": 0.27, "learning_rate": 1.9808238371559488e-05, "loss": 2.5565, "step": 371 }, { "epoch": 0.27, "learning_rate": 1.980669380067086e-05, "loss": 2.5699, "step": 372 }, { "epoch": 0.27, "learning_rate": 1.9805143094933507e-05, "loss": 2.5061, "step": 373 }, { "epoch": 0.27, "learning_rate": 1.980358625531751e-05, "loss": 2.4867, "step": 374 }, { "epoch": 0.27, "learning_rate": 1.9802023282796797e-05, "loss": 2.5736, "step": 375 }, { "epoch": 0.28, "learning_rate": 1.9800454178349125e-05, "loss": 2.526, "step": 376 }, { "epoch": 0.28, "learning_rate": 1.97988789429561e-05, "loss": 2.6284, "step": 377 }, { "epoch": 0.28, "learning_rate": 1.9797297577603144e-05, "loss": 2.6049, "step": 378 }, { "epoch": 0.28, "learning_rate": 1.979571008327953e-05, "loss": 2.5661, "step": 379 }, { "epoch": 0.28, "learning_rate": 1.9794116460978353e-05, "loss": 2.5393, "step": 380 }, { "epoch": 0.28, "learning_rate": 1.9792516711696555e-05, "loss": 2.789, "step": 381 }, { "epoch": 0.28, "learning_rate": 1.97909108364349e-05, "loss": 2.6351, "step": 382 }, { "epoch": 0.28, "learning_rate": 1.9789298836197985e-05, "loss": 2.6968, "step": 383 }, { "epoch": 0.28, "learning_rate": 1.9787680711994244e-05, "loss": 2.6954, "step": 384 }, { "epoch": 0.28, "learning_rate": 1.978605646483594e-05, "loss": 2.6803, "step": 385 }, { "epoch": 0.28, "learning_rate": 1.9784426095739163e-05, "loss": 2.5473, "step": 386 }, { "epoch": 0.28, "learning_rate": 1.978278960572384e-05, "loss": 2.5859, "step": 387 }, { "epoch": 0.28, "learning_rate": 1.9781146995813717e-05, "loss": 2.5262, "step": 388 }, { "epoch": 0.28, "learning_rate": 1.977949826703638e-05, "loss": 2.5889, "step": 389 }, { "epoch": 0.29, "learning_rate": 1.977784342042323e-05, "loss": 2.585, "step": 390 }, { "epoch": 0.29, "learning_rate": 1.9776182457009515e-05, "loss": 2.4717, "step": 391 }, { "epoch": 0.29, "learning_rate": 1.977451537783428e-05, "loss": 2.4883, "step": 392 }, { "epoch": 0.29, "learning_rate": 1.977284218394042e-05, "loss": 2.6667, "step": 393 }, { "epoch": 0.29, "learning_rate": 1.9771162876374654e-05, "loss": 2.51, "step": 394 }, { "epoch": 0.29, "learning_rate": 1.9769477456187508e-05, "loss": 2.504, "step": 395 }, { "epoch": 0.29, "learning_rate": 1.9767785924433354e-05, "loss": 2.7233, "step": 396 }, { "epoch": 0.29, "learning_rate": 1.9766088282170365e-05, "loss": 2.4685, "step": 397 }, { "epoch": 0.29, "learning_rate": 1.976438453046056e-05, "loss": 2.5925, "step": 398 }, { "epoch": 0.29, "learning_rate": 1.9762674670369757e-05, "loss": 2.4488, "step": 399 }, { "epoch": 0.29, "learning_rate": 1.9760958702967613e-05, "loss": 2.5392, "step": 400 }, { "epoch": 0.29, "learning_rate": 1.9759236629327598e-05, "loss": 2.6027, "step": 401 }, { "epoch": 0.29, "learning_rate": 1.9757508450527006e-05, "loss": 2.438, "step": 402 }, { "epoch": 0.3, "learning_rate": 1.975577416764694e-05, "loss": 2.5648, "step": 403 }, { "epoch": 0.3, "learning_rate": 1.975403378177233e-05, "loss": 2.5854, "step": 404 }, { "epoch": 0.3, "learning_rate": 1.9752287293991927e-05, "loss": 2.7238, "step": 405 }, { "epoch": 0.3, "learning_rate": 1.975053470539829e-05, "loss": 2.5599, "step": 406 }, { "epoch": 0.3, "learning_rate": 1.9748776017087806e-05, "loss": 2.5141, "step": 407 }, { "epoch": 0.3, "learning_rate": 1.9747011230160664e-05, "loss": 2.4388, "step": 408 }, { "epoch": 0.3, "learning_rate": 1.974524034572088e-05, "loss": 2.6178, "step": 409 }, { "epoch": 0.3, "learning_rate": 1.9743463364876275e-05, "loss": 2.6006, "step": 410 }, { "epoch": 0.3, "learning_rate": 1.9741680288738495e-05, "loss": 2.471, "step": 411 }, { "epoch": 0.3, "learning_rate": 1.9739891118422988e-05, "loss": 2.6248, "step": 412 }, { "epoch": 0.3, "learning_rate": 1.9738095855049016e-05, "loss": 2.4377, "step": 413 }, { "epoch": 0.3, "learning_rate": 1.9736294499739667e-05, "loss": 2.3969, "step": 414 }, { "epoch": 0.3, "learning_rate": 1.9734487053621817e-05, "loss": 2.6171, "step": 415 }, { "epoch": 0.3, "learning_rate": 1.9732673517826167e-05, "loss": 2.4592, "step": 416 }, { "epoch": 0.31, "learning_rate": 1.973085389348723e-05, "loss": 2.4991, "step": 417 }, { "epoch": 0.31, "learning_rate": 1.9729028181743316e-05, "loss": 2.5383, "step": 418 }, { "epoch": 0.31, "learning_rate": 1.972719638373655e-05, "loss": 2.5731, "step": 419 }, { "epoch": 0.31, "learning_rate": 1.9725358500612865e-05, "loss": 2.5564, "step": 420 }, { "epoch": 0.31, "learning_rate": 1.9723514533522005e-05, "loss": 2.549, "step": 421 }, { "epoch": 0.31, "learning_rate": 1.9721664483617506e-05, "loss": 2.4814, "step": 422 }, { "epoch": 0.31, "learning_rate": 1.9719808352056728e-05, "loss": 2.4536, "step": 423 }, { "epoch": 0.31, "learning_rate": 1.9717946140000813e-05, "loss": 2.4552, "step": 424 }, { "epoch": 0.31, "learning_rate": 1.9716077848614727e-05, "loss": 2.5155, "step": 425 }, { "epoch": 0.31, "learning_rate": 1.9714203479067232e-05, "loss": 2.5304, "step": 426 }, { "epoch": 0.31, "learning_rate": 1.971232303253089e-05, "loss": 2.6303, "step": 427 }, { "epoch": 0.31, "learning_rate": 1.9710436510182065e-05, "loss": 2.5382, "step": 428 }, { "epoch": 0.31, "learning_rate": 1.9708543913200925e-05, "loss": 2.4324, "step": 429 }, { "epoch": 0.32, "learning_rate": 1.9706645242771435e-05, "loss": 2.5141, "step": 430 }, { "epoch": 0.32, "learning_rate": 1.9704740500081365e-05, "loss": 2.5933, "step": 431 }, { "epoch": 0.32, "learning_rate": 1.9702829686322272e-05, "loss": 2.5178, "step": 432 }, { "epoch": 0.32, "learning_rate": 1.970091280268952e-05, "loss": 2.5429, "step": 433 }, { "epoch": 0.32, "learning_rate": 1.9698989850382277e-05, "loss": 2.4242, "step": 434 }, { "epoch": 0.32, "learning_rate": 1.9697060830603495e-05, "loss": 2.4442, "step": 435 }, { "epoch": 0.32, "learning_rate": 1.9695125744559917e-05, "loss": 2.5202, "step": 436 }, { "epoch": 0.32, "learning_rate": 1.9693184593462104e-05, "loss": 2.4873, "step": 437 }, { "epoch": 0.32, "learning_rate": 1.9691237378524387e-05, "loss": 2.4928, "step": 438 }, { "epoch": 0.32, "learning_rate": 1.96892841009649e-05, "loss": 2.5916, "step": 439 }, { "epoch": 0.32, "learning_rate": 1.9687324762005574e-05, "loss": 2.4623, "step": 440 }, { "epoch": 0.32, "learning_rate": 1.9685359362872124e-05, "loss": 2.5356, "step": 441 }, { "epoch": 0.32, "learning_rate": 1.968338790479407e-05, "loss": 2.6751, "step": 442 }, { "epoch": 0.32, "learning_rate": 1.9681410389004697e-05, "loss": 2.5555, "step": 443 }, { "epoch": 0.33, "learning_rate": 1.9679426816741102e-05, "loss": 2.4999, "step": 444 }, { "epoch": 0.33, "learning_rate": 1.9677437189244166e-05, "loss": 2.5405, "step": 445 }, { "epoch": 0.33, "learning_rate": 1.9675441507758552e-05, "loss": 2.75, "step": 446 }, { "epoch": 0.33, "learning_rate": 1.9673439773532714e-05, "loss": 2.5285, "step": 447 }, { "epoch": 0.33, "learning_rate": 1.9671431987818894e-05, "loss": 2.4952, "step": 448 }, { "epoch": 0.33, "learning_rate": 1.966941815187312e-05, "loss": 2.5047, "step": 449 }, { "epoch": 0.33, "learning_rate": 1.9667398266955195e-05, "loss": 2.5418, "step": 450 }, { "epoch": 0.33, "learning_rate": 1.9665372334328722e-05, "loss": 2.5912, "step": 451 }, { "epoch": 0.33, "learning_rate": 1.9663340355261073e-05, "loss": 2.5031, "step": 452 }, { "epoch": 0.33, "learning_rate": 1.9661302331023413e-05, "loss": 2.5444, "step": 453 }, { "epoch": 0.33, "learning_rate": 1.9659258262890683e-05, "loss": 2.5517, "step": 454 }, { "epoch": 0.33, "learning_rate": 1.965720815214161e-05, "loss": 2.4743, "step": 455 }, { "epoch": 0.33, "learning_rate": 1.965515200005869e-05, "loss": 2.4388, "step": 456 }, { "epoch": 0.33, "learning_rate": 1.965308980792821e-05, "loss": 2.5791, "step": 457 }, { "epoch": 0.34, "learning_rate": 1.9651021577040233e-05, "loss": 2.4656, "step": 458 }, { "epoch": 0.34, "learning_rate": 1.9648947308688594e-05, "loss": 2.4668, "step": 459 }, { "epoch": 0.34, "learning_rate": 1.9646867004170915e-05, "loss": 2.5433, "step": 460 }, { "epoch": 0.34, "learning_rate": 1.964478066478858e-05, "loss": 2.5101, "step": 461 }, { "epoch": 0.34, "learning_rate": 1.9642688291846762e-05, "loss": 2.5288, "step": 462 }, { "epoch": 0.34, "learning_rate": 1.9640589886654404e-05, "loss": 2.4915, "step": 463 }, { "epoch": 0.34, "learning_rate": 1.9638485450524218e-05, "loss": 2.4754, "step": 464 }, { "epoch": 0.34, "learning_rate": 1.963637498477269e-05, "loss": 2.3609, "step": 465 }, { "epoch": 0.34, "learning_rate": 1.963425849072009e-05, "loss": 2.5629, "step": 466 }, { "epoch": 0.34, "learning_rate": 1.9632135969690437e-05, "loss": 2.5145, "step": 467 }, { "epoch": 0.34, "learning_rate": 1.9630007423011543e-05, "loss": 2.4569, "step": 468 }, { "epoch": 0.34, "learning_rate": 1.9627872852014974e-05, "loss": 2.5672, "step": 469 }, { "epoch": 0.34, "learning_rate": 1.962573225803607e-05, "loss": 2.6019, "step": 470 }, { "epoch": 0.35, "learning_rate": 1.962358564241394e-05, "loss": 2.5329, "step": 471 }, { "epoch": 0.35, "learning_rate": 1.962143300649146e-05, "loss": 2.4783, "step": 472 }, { "epoch": 0.35, "learning_rate": 1.9619274351615268e-05, "loss": 2.6436, "step": 473 }, { "epoch": 0.35, "learning_rate": 1.961710967913577e-05, "loss": 2.4735, "step": 474 }, { "epoch": 0.35, "learning_rate": 1.961493899040714e-05, "loss": 2.4856, "step": 475 }, { "epoch": 0.35, "learning_rate": 1.9612762286787313e-05, "loss": 2.613, "step": 476 }, { "epoch": 0.35, "learning_rate": 1.9610579569637982e-05, "loss": 2.563, "step": 477 }, { "epoch": 0.35, "learning_rate": 1.960839084032461e-05, "loss": 2.4421, "step": 478 }, { "epoch": 0.35, "learning_rate": 1.9606196100216413e-05, "loss": 2.5109, "step": 479 }, { "epoch": 0.35, "learning_rate": 1.960399535068638e-05, "loss": 2.547, "step": 480 }, { "epoch": 0.35, "learning_rate": 1.9601788593111236e-05, "loss": 2.5538, "step": 481 }, { "epoch": 0.35, "learning_rate": 1.9599575828871496e-05, "loss": 2.5287, "step": 482 }, { "epoch": 0.35, "learning_rate": 1.9597357059351404e-05, "loss": 2.4993, "step": 483 }, { "epoch": 0.35, "learning_rate": 1.9595132285938976e-05, "loss": 2.6549, "step": 484 }, { "epoch": 0.36, "learning_rate": 1.9592901510025984e-05, "loss": 2.4188, "step": 485 }, { "epoch": 0.36, "learning_rate": 1.9590664733007947e-05, "loss": 2.4034, "step": 486 }, { "epoch": 0.36, "learning_rate": 1.9588421956284146e-05, "loss": 2.5648, "step": 487 }, { "epoch": 0.36, "learning_rate": 1.958617318125761e-05, "loss": 2.575, "step": 488 }, { "epoch": 0.36, "learning_rate": 1.958391840933512e-05, "loss": 2.4924, "step": 489 }, { "epoch": 0.36, "learning_rate": 1.9581657641927217e-05, "loss": 2.6006, "step": 490 }, { "epoch": 0.36, "learning_rate": 1.9579390880448187e-05, "loss": 2.5661, "step": 491 }, { "epoch": 0.36, "learning_rate": 1.957711812631606e-05, "loss": 2.3598, "step": 492 }, { "epoch": 0.36, "learning_rate": 1.957483938095262e-05, "loss": 2.4307, "step": 493 }, { "epoch": 0.36, "learning_rate": 1.9572554645783405e-05, "loss": 2.4586, "step": 494 }, { "epoch": 0.36, "learning_rate": 1.9570263922237686e-05, "loss": 2.4745, "step": 495 }, { "epoch": 0.36, "learning_rate": 1.95679672117485e-05, "loss": 2.4487, "step": 496 }, { "epoch": 0.36, "learning_rate": 1.9565664515752605e-05, "loss": 2.5729, "step": 497 }, { "epoch": 0.36, "learning_rate": 1.9563355835690526e-05, "loss": 2.5093, "step": 498 }, { "epoch": 0.37, "learning_rate": 1.9561041173006517e-05, "loss": 2.5777, "step": 499 }, { "epoch": 0.37, "learning_rate": 1.955872052914858e-05, "loss": 2.6505, "step": 500 }, { "epoch": 0.37, "learning_rate": 1.955639390556846e-05, "loss": 2.5492, "step": 501 }, { "epoch": 0.37, "learning_rate": 1.955406130372164e-05, "loss": 2.5102, "step": 502 }, { "epoch": 0.37, "learning_rate": 1.955172272506734e-05, "loss": 2.6323, "step": 503 }, { "epoch": 0.37, "learning_rate": 1.954937817106853e-05, "loss": 2.4196, "step": 504 }, { "epoch": 0.37, "learning_rate": 1.9547027643191902e-05, "loss": 2.4757, "step": 505 }, { "epoch": 0.37, "learning_rate": 1.9544671142907896e-05, "loss": 2.6914, "step": 506 }, { "epoch": 0.37, "learning_rate": 1.954230867169069e-05, "loss": 2.5187, "step": 507 }, { "epoch": 0.37, "learning_rate": 1.9539940231018196e-05, "loss": 2.6078, "step": 508 }, { "epoch": 0.37, "learning_rate": 1.9537565822372045e-05, "loss": 2.3803, "step": 509 }, { "epoch": 0.37, "learning_rate": 1.9535185447237626e-05, "loss": 2.4574, "step": 510 }, { "epoch": 0.37, "learning_rate": 1.9532799107104044e-05, "loss": 2.3897, "step": 511 }, { "epoch": 0.38, "learning_rate": 1.9530406803464142e-05, "loss": 2.6311, "step": 512 }, { "epoch": 0.38, "learning_rate": 1.9528008537814488e-05, "loss": 2.4248, "step": 513 }, { "epoch": 0.38, "learning_rate": 1.9525604311655387e-05, "loss": 2.4499, "step": 514 }, { "epoch": 0.38, "learning_rate": 1.952319412649087e-05, "loss": 2.6316, "step": 515 }, { "epoch": 0.38, "learning_rate": 1.952077798382869e-05, "loss": 2.5454, "step": 516 }, { "epoch": 0.38, "learning_rate": 1.9518355885180333e-05, "loss": 2.6925, "step": 517 }, { "epoch": 0.38, "learning_rate": 1.951592783206101e-05, "loss": 2.4289, "step": 518 }, { "epoch": 0.38, "learning_rate": 1.9513493825989664e-05, "loss": 2.6821, "step": 519 }, { "epoch": 0.38, "learning_rate": 1.951105386848895e-05, "loss": 2.3664, "step": 520 }, { "epoch": 0.38, "learning_rate": 1.9508607961085247e-05, "loss": 2.3593, "step": 521 }, { "epoch": 0.38, "learning_rate": 1.9506156105308665e-05, "loss": 2.5153, "step": 522 }, { "epoch": 0.38, "learning_rate": 1.9503698302693026e-05, "loss": 2.4713, "step": 523 }, { "epoch": 0.38, "learning_rate": 1.9501234554775883e-05, "loss": 2.5411, "step": 524 }, { "epoch": 0.38, "learning_rate": 1.9498764863098494e-05, "loss": 2.5943, "step": 525 }, { "epoch": 0.39, "learning_rate": 1.949628922920585e-05, "loss": 2.347, "step": 526 }, { "epoch": 0.39, "learning_rate": 1.9493807654646647e-05, "loss": 2.5883, "step": 527 }, { "epoch": 0.39, "learning_rate": 1.9491320140973305e-05, "loss": 2.5823, "step": 528 }, { "epoch": 0.39, "learning_rate": 1.948882668974196e-05, "loss": 2.4344, "step": 529 }, { "epoch": 0.39, "learning_rate": 1.9486327302512454e-05, "loss": 2.4228, "step": 530 }, { "epoch": 0.39, "learning_rate": 1.9483821980848346e-05, "loss": 2.3727, "step": 531 }, { "epoch": 0.39, "learning_rate": 1.948131072631692e-05, "loss": 2.5978, "step": 532 }, { "epoch": 0.39, "learning_rate": 1.947879354048915e-05, "loss": 2.4934, "step": 533 }, { "epoch": 0.39, "learning_rate": 1.9476270424939736e-05, "loss": 2.5568, "step": 534 }, { "epoch": 0.39, "learning_rate": 1.9473741381247082e-05, "loss": 2.5402, "step": 535 }, { "epoch": 0.39, "learning_rate": 1.9471206410993302e-05, "loss": 2.5137, "step": 536 }, { "epoch": 0.39, "learning_rate": 1.9468665515764216e-05, "loss": 2.49, "step": 537 }, { "epoch": 0.39, "learning_rate": 1.946611869714935e-05, "loss": 2.4821, "step": 538 }, { "epoch": 0.39, "learning_rate": 1.946356595674194e-05, "loss": 2.3614, "step": 539 }, { "epoch": 0.4, "learning_rate": 1.9461007296138925e-05, "loss": 2.5611, "step": 540 }, { "epoch": 0.4, "learning_rate": 1.9458442716940936e-05, "loss": 2.586, "step": 541 }, { "epoch": 0.4, "learning_rate": 1.945587222075233e-05, "loss": 2.4724, "step": 542 }, { "epoch": 0.4, "learning_rate": 1.9453295809181144e-05, "loss": 2.3778, "step": 543 }, { "epoch": 0.4, "learning_rate": 1.945071348383912e-05, "loss": 2.4674, "step": 544 }, { "epoch": 0.4, "learning_rate": 1.944812524634171e-05, "loss": 2.4791, "step": 545 }, { "epoch": 0.4, "learning_rate": 1.944553109830806e-05, "loss": 2.4108, "step": 546 }, { "epoch": 0.4, "learning_rate": 1.9442931041361003e-05, "loss": 2.5884, "step": 547 }, { "epoch": 0.4, "learning_rate": 1.944032507712708e-05, "loss": 2.4285, "step": 548 }, { "epoch": 0.4, "learning_rate": 1.9437713207236525e-05, "loss": 2.6394, "step": 549 }, { "epoch": 0.4, "learning_rate": 1.9435095433323265e-05, "loss": 2.4135, "step": 550 }, { "epoch": 0.4, "learning_rate": 1.9432471757024924e-05, "loss": 2.3487, "step": 551 }, { "epoch": 0.4, "learning_rate": 1.942984217998281e-05, "loss": 2.3766, "step": 552 }, { "epoch": 0.41, "learning_rate": 1.9427206703841928e-05, "loss": 2.3588, "step": 553 }, { "epoch": 0.41, "learning_rate": 1.9424565330250975e-05, "loss": 2.5763, "step": 554 }, { "epoch": 0.41, "learning_rate": 1.9421918060862333e-05, "loss": 2.4055, "step": 555 }, { "epoch": 0.41, "learning_rate": 1.9419264897332078e-05, "loss": 2.5733, "step": 556 }, { "epoch": 0.41, "learning_rate": 1.9416605841319974e-05, "loss": 2.434, "step": 557 }, { "epoch": 0.41, "learning_rate": 1.9413940894489456e-05, "loss": 2.5239, "step": 558 }, { "epoch": 0.41, "learning_rate": 1.9411270058507665e-05, "loss": 2.4267, "step": 559 }, { "epoch": 0.41, "learning_rate": 1.940859333504541e-05, "loss": 2.4515, "step": 560 }, { "epoch": 0.41, "learning_rate": 1.940591072577719e-05, "loss": 2.4215, "step": 561 }, { "epoch": 0.41, "learning_rate": 1.9403222232381193e-05, "loss": 2.6581, "step": 562 }, { "epoch": 0.41, "learning_rate": 1.940052785653927e-05, "loss": 2.4265, "step": 563 }, { "epoch": 0.41, "learning_rate": 1.9397827599936967e-05, "loss": 2.4221, "step": 564 }, { "epoch": 0.41, "learning_rate": 1.9395121464263506e-05, "loss": 2.4876, "step": 565 }, { "epoch": 0.41, "learning_rate": 1.9392409451211783e-05, "loss": 2.5289, "step": 566 }, { "epoch": 0.42, "learning_rate": 1.9389691562478375e-05, "loss": 2.4377, "step": 567 }, { "epoch": 0.42, "learning_rate": 1.9386967799763527e-05, "loss": 2.3645, "step": 568 }, { "epoch": 0.42, "learning_rate": 1.9384238164771175e-05, "loss": 2.5307, "step": 569 }, { "epoch": 0.42, "learning_rate": 1.9381502659208903e-05, "loss": 2.4716, "step": 570 }, { "epoch": 0.42, "learning_rate": 1.9378761284787996e-05, "loss": 2.5626, "step": 571 }, { "epoch": 0.42, "learning_rate": 1.9376014043223387e-05, "loss": 2.3941, "step": 572 }, { "epoch": 0.42, "learning_rate": 1.9373260936233692e-05, "loss": 2.4647, "step": 573 }, { "epoch": 0.42, "learning_rate": 1.9370501965541194e-05, "loss": 2.4575, "step": 574 }, { "epoch": 0.42, "learning_rate": 1.9367737132871845e-05, "loss": 2.3579, "step": 575 }, { "epoch": 0.42, "learning_rate": 1.936496643995526e-05, "loss": 2.6493, "step": 576 }, { "epoch": 0.42, "learning_rate": 1.9362189888524723e-05, "loss": 2.3263, "step": 577 }, { "epoch": 0.42, "learning_rate": 1.9359407480317184e-05, "loss": 2.4835, "step": 578 }, { "epoch": 0.42, "learning_rate": 1.9356619217073252e-05, "loss": 2.3059, "step": 579 }, { "epoch": 0.42, "learning_rate": 1.935382510053721e-05, "loss": 2.6022, "step": 580 }, { "epoch": 0.43, "learning_rate": 1.9351025132456986e-05, "loss": 2.5637, "step": 581 }, { "epoch": 0.43, "learning_rate": 1.9348219314584182e-05, "loss": 2.5236, "step": 582 }, { "epoch": 0.43, "learning_rate": 1.9345407648674052e-05, "loss": 2.4125, "step": 583 }, { "epoch": 0.43, "learning_rate": 1.934259013648552e-05, "loss": 2.6042, "step": 584 }, { "epoch": 0.43, "learning_rate": 1.9339766779781145e-05, "loss": 2.5621, "step": 585 }, { "epoch": 0.43, "learning_rate": 1.9336937580327167e-05, "loss": 2.5174, "step": 586 }, { "epoch": 0.43, "learning_rate": 1.9334102539893468e-05, "loss": 2.4226, "step": 587 }, { "epoch": 0.43, "learning_rate": 1.9331261660253582e-05, "loss": 2.4724, "step": 588 }, { "epoch": 0.43, "learning_rate": 1.9328414943184707e-05, "loss": 2.5412, "step": 589 }, { "epoch": 0.43, "learning_rate": 1.9325562390467673e-05, "loss": 2.5002, "step": 590 }, { "epoch": 0.43, "learning_rate": 1.9322704003886988e-05, "loss": 2.3018, "step": 591 }, { "epoch": 0.43, "learning_rate": 1.9319839785230785e-05, "loss": 2.4467, "step": 592 }, { "epoch": 0.43, "learning_rate": 1.9316969736290857e-05, "loss": 2.4687, "step": 593 }, { "epoch": 0.44, "learning_rate": 1.931409385886265e-05, "loss": 2.3975, "step": 594 }, { "epoch": 0.44, "learning_rate": 1.9311212154745237e-05, "loss": 2.5123, "step": 595 }, { "epoch": 0.44, "learning_rate": 1.9308324625741353e-05, "loss": 2.4771, "step": 596 }, { "epoch": 0.44, "learning_rate": 1.9305431273657373e-05, "loss": 2.342, "step": 597 }, { "epoch": 0.44, "learning_rate": 1.9302532100303315e-05, "loss": 2.4464, "step": 598 }, { "epoch": 0.44, "learning_rate": 1.9299627107492833e-05, "loss": 2.6239, "step": 599 }, { "epoch": 0.44, "learning_rate": 1.929671629704323e-05, "loss": 2.3372, "step": 600 }, { "epoch": 0.44, "learning_rate": 1.9293799670775435e-05, "loss": 2.4446, "step": 601 }, { "epoch": 0.44, "learning_rate": 1.929087723051404e-05, "loss": 2.4306, "step": 602 }, { "epoch": 0.44, "learning_rate": 1.928794897808724e-05, "loss": 2.4588, "step": 603 }, { "epoch": 0.44, "learning_rate": 1.9285014915326897e-05, "loss": 2.3786, "step": 604 }, { "epoch": 0.44, "learning_rate": 1.9282075044068493e-05, "loss": 2.6653, "step": 605 }, { "epoch": 0.44, "learning_rate": 1.9279129366151143e-05, "loss": 2.403, "step": 606 }, { "epoch": 0.44, "learning_rate": 1.9276177883417597e-05, "loss": 2.3863, "step": 607 }, { "epoch": 0.45, "learning_rate": 1.9273220597714242e-05, "loss": 2.5074, "step": 608 }, { "epoch": 0.45, "learning_rate": 1.9270257510891083e-05, "loss": 2.573, "step": 609 }, { "epoch": 0.45, "learning_rate": 1.9267288624801766e-05, "loss": 2.4761, "step": 610 }, { "epoch": 0.45, "learning_rate": 1.926431394130356e-05, "loss": 2.4371, "step": 611 }, { "epoch": 0.45, "learning_rate": 1.9261333462257357e-05, "loss": 2.4203, "step": 612 }, { "epoch": 0.45, "learning_rate": 1.925834718952768e-05, "loss": 2.552, "step": 613 }, { "epoch": 0.45, "learning_rate": 1.9255355124982675e-05, "loss": 2.5227, "step": 614 }, { "epoch": 0.45, "learning_rate": 1.9252357270494108e-05, "loss": 2.3633, "step": 615 }, { "epoch": 0.45, "learning_rate": 1.9249353627937376e-05, "loss": 2.5291, "step": 616 }, { "epoch": 0.45, "learning_rate": 1.9246344199191486e-05, "loss": 2.5205, "step": 617 }, { "epoch": 0.45, "learning_rate": 1.9243328986139067e-05, "loss": 2.4704, "step": 618 }, { "epoch": 0.45, "learning_rate": 1.9240307990666376e-05, "loss": 2.3497, "step": 619 }, { "epoch": 0.45, "learning_rate": 1.9237281214663276e-05, "loss": 2.4405, "step": 620 }, { "epoch": 0.45, "learning_rate": 1.923424866002325e-05, "loss": 2.4367, "step": 621 }, { "epoch": 0.46, "learning_rate": 1.92312103286434e-05, "loss": 2.5084, "step": 622 }, { "epoch": 0.46, "learning_rate": 1.9228166222424432e-05, "loss": 2.4694, "step": 623 }, { "epoch": 0.46, "learning_rate": 1.9225116343270677e-05, "loss": 2.2804, "step": 624 }, { "epoch": 0.46, "learning_rate": 1.922206069309007e-05, "loss": 2.3963, "step": 625 }, { "epoch": 0.46, "learning_rate": 1.9218999273794155e-05, "loss": 2.4393, "step": 626 }, { "epoch": 0.46, "learning_rate": 1.9215932087298093e-05, "loss": 2.3904, "step": 627 }, { "epoch": 0.46, "learning_rate": 1.9212859135520638e-05, "loss": 2.5819, "step": 628 }, { "epoch": 0.46, "learning_rate": 1.920978042038417e-05, "loss": 2.6074, "step": 629 }, { "epoch": 0.46, "learning_rate": 1.9206695943814657e-05, "loss": 2.4288, "step": 630 }, { "epoch": 0.46, "learning_rate": 1.9203605707741686e-05, "loss": 2.4525, "step": 631 }, { "epoch": 0.46, "learning_rate": 1.9200509714098433e-05, "loss": 2.5001, "step": 632 }, { "epoch": 0.46, "learning_rate": 1.9197407964821684e-05, "loss": 2.3938, "step": 633 }, { "epoch": 0.46, "learning_rate": 1.9194300461851826e-05, "loss": 2.3328, "step": 634 }, { "epoch": 0.47, "learning_rate": 1.9191187207132845e-05, "loss": 2.4014, "step": 635 }, { "epoch": 0.47, "learning_rate": 1.9188068202612317e-05, "loss": 2.6514, "step": 636 }, { "epoch": 0.47, "learning_rate": 1.9184943450241428e-05, "loss": 2.3562, "step": 637 }, { "epoch": 0.47, "learning_rate": 1.9181812951974952e-05, "loss": 2.3258, "step": 638 }, { "epoch": 0.47, "learning_rate": 1.917867670977126e-05, "loss": 2.3104, "step": 639 }, { "epoch": 0.47, "learning_rate": 1.9175534725592308e-05, "loss": 2.4481, "step": 640 }, { "epoch": 0.47, "learning_rate": 1.917238700140366e-05, "loss": 2.4389, "step": 641 }, { "epoch": 0.47, "learning_rate": 1.9169233539174458e-05, "loss": 2.4681, "step": 642 }, { "epoch": 0.47, "learning_rate": 1.9166074340877437e-05, "loss": 2.2781, "step": 643 }, { "epoch": 0.47, "learning_rate": 1.916290940848892e-05, "loss": 2.5167, "step": 644 }, { "epoch": 0.47, "learning_rate": 1.9159738743988824e-05, "loss": 2.5443, "step": 645 }, { "epoch": 0.47, "learning_rate": 1.9156562349360638e-05, "loss": 2.5709, "step": 646 }, { "epoch": 0.47, "learning_rate": 1.915338022659145e-05, "loss": 2.4134, "step": 647 }, { "epoch": 0.47, "learning_rate": 1.9150192377671923e-05, "loss": 2.3681, "step": 648 }, { "epoch": 0.48, "learning_rate": 1.9146998804596305e-05, "loss": 2.4501, "step": 649 }, { "epoch": 0.48, "learning_rate": 1.9143799509362418e-05, "loss": 2.5365, "step": 650 }, { "epoch": 0.48, "learning_rate": 1.9140594493971676e-05, "loss": 2.2341, "step": 651 }, { "epoch": 0.48, "learning_rate": 1.9137383760429064e-05, "loss": 2.4181, "step": 652 }, { "epoch": 0.48, "learning_rate": 1.9134167310743144e-05, "loss": 2.4591, "step": 653 }, { "epoch": 0.48, "learning_rate": 1.9130945146926054e-05, "loss": 2.5213, "step": 654 }, { "epoch": 0.48, "learning_rate": 1.912771727099351e-05, "loss": 2.2571, "step": 655 }, { "epoch": 0.48, "learning_rate": 1.91244836849648e-05, "loss": 2.4807, "step": 656 }, { "epoch": 0.48, "learning_rate": 1.912124439086278e-05, "loss": 2.3541, "step": 657 }, { "epoch": 0.48, "learning_rate": 1.9117999390713886e-05, "loss": 2.2656, "step": 658 }, { "epoch": 0.48, "learning_rate": 1.911474868654811e-05, "loss": 2.5892, "step": 659 }, { "epoch": 0.48, "learning_rate": 1.9111492280399022e-05, "loss": 2.4934, "step": 660 }, { "epoch": 0.48, "learning_rate": 1.910823017430376e-05, "loss": 2.3208, "step": 661 }, { "epoch": 0.48, "learning_rate": 1.9104962370303025e-05, "loss": 2.2884, "step": 662 }, { "epoch": 0.49, "learning_rate": 1.910168887044108e-05, "loss": 2.395, "step": 663 }, { "epoch": 0.49, "learning_rate": 1.9098409676765753e-05, "loss": 2.3268, "step": 664 }, { "epoch": 0.49, "learning_rate": 1.9095124791328437e-05, "loss": 2.4269, "step": 665 }, { "epoch": 0.49, "learning_rate": 1.9091834216184078e-05, "loss": 2.4878, "step": 666 }, { "epoch": 0.49, "learning_rate": 1.908853795339119e-05, "loss": 2.3698, "step": 667 }, { "epoch": 0.49, "learning_rate": 1.9085236005011843e-05, "loss": 2.3982, "step": 668 }, { "epoch": 0.49, "learning_rate": 1.908192837311166e-05, "loss": 2.4681, "step": 669 }, { "epoch": 0.49, "learning_rate": 1.9078615059759823e-05, "loss": 2.3549, "step": 670 }, { "epoch": 0.49, "learning_rate": 1.907529606702906e-05, "loss": 2.2933, "step": 671 }, { "epoch": 0.49, "learning_rate": 1.9071971396995673e-05, "loss": 2.4705, "step": 672 }, { "epoch": 0.49, "learning_rate": 1.9068641051739484e-05, "loss": 2.4329, "step": 673 }, { "epoch": 0.49, "learning_rate": 1.9065305033343898e-05, "loss": 2.3599, "step": 674 }, { "epoch": 0.49, "learning_rate": 1.9061963343895848e-05, "loss": 2.3928, "step": 675 }, { "epoch": 0.5, "learning_rate": 1.9058615985485814e-05, "loss": 2.5059, "step": 676 }, { "epoch": 0.5, "learning_rate": 1.9055262960207838e-05, "loss": 2.2812, "step": 677 }, { "epoch": 0.5, "learning_rate": 1.9051904270159492e-05, "loss": 2.3581, "step": 678 }, { "epoch": 0.5, "learning_rate": 1.9048539917441903e-05, "loss": 2.4873, "step": 679 }, { "epoch": 0.5, "learning_rate": 1.9045169904159726e-05, "loss": 2.4902, "step": 680 }, { "epoch": 0.5, "learning_rate": 1.9041794232421177e-05, "loss": 2.286, "step": 681 }, { "epoch": 0.5, "learning_rate": 1.9038412904337992e-05, "loss": 2.495, "step": 682 }, { "epoch": 0.5, "learning_rate": 1.9035025922025458e-05, "loss": 2.419, "step": 683 }, { "epoch": 0.5, "learning_rate": 1.9031633287602396e-05, "loss": 2.4048, "step": 684 }, { "epoch": 0.5, "learning_rate": 1.9028235003191164e-05, "loss": 2.4529, "step": 685 }, { "epoch": 0.5, "learning_rate": 1.9024831070917646e-05, "loss": 2.3126, "step": 686 }, { "epoch": 0.5, "learning_rate": 1.9021421492911272e-05, "loss": 2.3057, "step": 687 }, { "epoch": 0.5, "learning_rate": 1.9018006271304998e-05, "loss": 2.3468, "step": 688 }, { "epoch": 0.5, "learning_rate": 1.901458540823531e-05, "loss": 2.5489, "step": 689 }, { "epoch": 0.51, "learning_rate": 1.9011158905842218e-05, "loss": 2.3734, "step": 690 }, { "epoch": 0.51, "learning_rate": 1.9007726766269273e-05, "loss": 2.4706, "step": 691 }, { "epoch": 0.51, "learning_rate": 1.9004288991663543e-05, "loss": 2.4531, "step": 692 }, { "epoch": 0.51, "learning_rate": 1.9000845584175617e-05, "loss": 2.5838, "step": 693 }, { "epoch": 0.51, "learning_rate": 1.8997396545959624e-05, "loss": 2.3801, "step": 694 }, { "epoch": 0.51, "learning_rate": 1.8993941879173196e-05, "loss": 2.3923, "step": 695 }, { "epoch": 0.51, "learning_rate": 1.8990481585977503e-05, "loss": 2.4567, "step": 696 }, { "epoch": 0.51, "learning_rate": 1.8987015668537217e-05, "loss": 2.309, "step": 697 }, { "epoch": 0.51, "learning_rate": 1.8983544129020552e-05, "loss": 2.5455, "step": 698 }, { "epoch": 0.51, "learning_rate": 1.8980066969599216e-05, "loss": 2.5861, "step": 699 }, { "epoch": 0.51, "learning_rate": 1.8976584192448447e-05, "loss": 2.2674, "step": 700 }, { "epoch": 0.51, "learning_rate": 1.897309579974699e-05, "loss": 2.2908, "step": 701 }, { "epoch": 0.51, "learning_rate": 1.8969601793677107e-05, "loss": 2.4696, "step": 702 }, { "epoch": 0.52, "learning_rate": 1.8966102176424566e-05, "loss": 2.5122, "step": 703 }, { "epoch": 0.52, "learning_rate": 1.8962596950178652e-05, "loss": 2.3301, "step": 704 }, { "epoch": 0.52, "learning_rate": 1.8959086117132162e-05, "loss": 2.6086, "step": 705 }, { "epoch": 0.52, "learning_rate": 1.895556967948139e-05, "loss": 2.3805, "step": 706 }, { "epoch": 0.52, "learning_rate": 1.895204763942614e-05, "loss": 2.3905, "step": 707 }, { "epoch": 0.52, "learning_rate": 1.8948519999169723e-05, "loss": 2.4049, "step": 708 }, { "epoch": 0.52, "learning_rate": 1.8944986760918947e-05, "loss": 2.353, "step": 709 }, { "epoch": 0.52, "learning_rate": 1.8941447926884136e-05, "loss": 2.3569, "step": 710 }, { "epoch": 0.52, "learning_rate": 1.8937903499279104e-05, "loss": 2.3866, "step": 711 }, { "epoch": 0.52, "learning_rate": 1.8934353480321157e-05, "loss": 2.5054, "step": 712 }, { "epoch": 0.52, "learning_rate": 1.8930797872231114e-05, "loss": 2.4123, "step": 713 }, { "epoch": 0.52, "learning_rate": 1.8927236677233286e-05, "loss": 2.4945, "step": 714 }, { "epoch": 0.52, "learning_rate": 1.8923669897555476e-05, "loss": 2.3268, "step": 715 }, { "epoch": 0.52, "learning_rate": 1.8920097535428978e-05, "loss": 2.3509, "step": 716 }, { "epoch": 0.53, "learning_rate": 1.8916519593088586e-05, "loss": 2.238, "step": 717 }, { "epoch": 0.53, "learning_rate": 1.8912936072772576e-05, "loss": 2.2985, "step": 718 }, { "epoch": 0.53, "learning_rate": 1.8909346976722725e-05, "loss": 2.3621, "step": 719 }, { "epoch": 0.53, "learning_rate": 1.8905752307184287e-05, "loss": 2.4062, "step": 720 }, { "epoch": 0.53, "learning_rate": 1.8902152066406006e-05, "loss": 2.5898, "step": 721 }, { "epoch": 0.53, "learning_rate": 1.8898546256640115e-05, "loss": 2.2905, "step": 722 }, { "epoch": 0.53, "learning_rate": 1.889493488014233e-05, "loss": 2.2991, "step": 723 }, { "epoch": 0.53, "learning_rate": 1.8891317939171846e-05, "loss": 2.4012, "step": 724 }, { "epoch": 0.53, "learning_rate": 1.8887695435991333e-05, "loss": 2.4986, "step": 725 }, { "epoch": 0.53, "learning_rate": 1.888406737286696e-05, "loss": 2.4915, "step": 726 }, { "epoch": 0.53, "learning_rate": 1.8880433752068353e-05, "loss": 2.5593, "step": 727 }, { "epoch": 0.53, "learning_rate": 1.887679457586863e-05, "loss": 2.3988, "step": 728 }, { "epoch": 0.53, "learning_rate": 1.8873149846544377e-05, "loss": 2.5834, "step": 729 }, { "epoch": 0.53, "learning_rate": 1.8869499566375655e-05, "loss": 2.5279, "step": 730 }, { "epoch": 0.54, "learning_rate": 1.8865843737645994e-05, "loss": 2.5368, "step": 731 }, { "epoch": 0.54, "learning_rate": 1.8862182362642406e-05, "loss": 2.3786, "step": 732 }, { "epoch": 0.54, "learning_rate": 1.885851544365536e-05, "loss": 2.4294, "step": 733 }, { "epoch": 0.54, "learning_rate": 1.88548429829788e-05, "loss": 2.4478, "step": 734 }, { "epoch": 0.54, "learning_rate": 1.8851164982910135e-05, "loss": 2.3478, "step": 735 }, { "epoch": 0.54, "learning_rate": 1.884748144575024e-05, "loss": 2.5018, "step": 736 }, { "epoch": 0.54, "learning_rate": 1.8843792373803458e-05, "loss": 2.3998, "step": 737 }, { "epoch": 0.54, "learning_rate": 1.8840097769377585e-05, "loss": 2.4296, "step": 738 }, { "epoch": 0.54, "learning_rate": 1.8836397634783882e-05, "loss": 2.3852, "step": 739 }, { "epoch": 0.54, "learning_rate": 1.8832691972337074e-05, "loss": 2.3854, "step": 740 }, { "epoch": 0.54, "learning_rate": 1.882898078435534e-05, "loss": 2.2861, "step": 741 }, { "epoch": 0.54, "learning_rate": 1.8825264073160315e-05, "loss": 2.3343, "step": 742 }, { "epoch": 0.54, "learning_rate": 1.882154184107709e-05, "loss": 2.2303, "step": 743 }, { "epoch": 0.55, "learning_rate": 1.8817814090434218e-05, "loss": 2.4297, "step": 744 }, { "epoch": 0.55, "learning_rate": 1.8814080823563686e-05, "loss": 2.3234, "step": 745 }, { "epoch": 0.55, "learning_rate": 1.8810342042800944e-05, "loss": 2.4396, "step": 746 }, { "epoch": 0.55, "learning_rate": 1.8806597750484895e-05, "loss": 2.5135, "step": 747 }, { "epoch": 0.55, "learning_rate": 1.8802847948957883e-05, "loss": 2.4925, "step": 748 }, { "epoch": 0.55, "learning_rate": 1.87990926405657e-05, "loss": 2.386, "step": 749 }, { "epoch": 0.55, "learning_rate": 1.879533182765758e-05, "loss": 2.3512, "step": 750 }, { "epoch": 0.55, "learning_rate": 1.879156551258621e-05, "loss": 2.4587, "step": 751 }, { "epoch": 0.55, "learning_rate": 1.8787793697707704e-05, "loss": 2.2643, "step": 752 }, { "epoch": 0.55, "learning_rate": 1.8784016385381633e-05, "loss": 2.3187, "step": 753 }, { "epoch": 0.55, "learning_rate": 1.8780233577970994e-05, "loss": 2.3855, "step": 754 }, { "epoch": 0.55, "learning_rate": 1.877644527784223e-05, "loss": 2.4072, "step": 755 }, { "epoch": 0.55, "learning_rate": 1.8772651487365217e-05, "loss": 2.369, "step": 756 }, { "epoch": 0.55, "learning_rate": 1.8768852208913264e-05, "loss": 2.3621, "step": 757 }, { "epoch": 0.56, "learning_rate": 1.8765047444863116e-05, "loss": 2.2807, "step": 758 }, { "epoch": 0.56, "learning_rate": 1.8761237197594945e-05, "loss": 2.3193, "step": 759 }, { "epoch": 0.56, "learning_rate": 1.8757421469492363e-05, "loss": 2.526, "step": 760 }, { "epoch": 0.56, "learning_rate": 1.87536002629424e-05, "loss": 2.4426, "step": 761 }, { "epoch": 0.56, "learning_rate": 1.874977358033552e-05, "loss": 2.3998, "step": 762 }, { "epoch": 0.56, "learning_rate": 1.8745941424065607e-05, "loss": 2.3716, "step": 763 }, { "epoch": 0.56, "learning_rate": 1.8742103796529973e-05, "loss": 2.3747, "step": 764 }, { "epoch": 0.56, "learning_rate": 1.8738260700129354e-05, "loss": 2.5506, "step": 765 }, { "epoch": 0.56, "learning_rate": 1.8734412137267904e-05, "loss": 2.2591, "step": 766 }, { "epoch": 0.56, "learning_rate": 1.87305581103532e-05, "loss": 2.5552, "step": 767 }, { "epoch": 0.56, "learning_rate": 1.872669862179623e-05, "loss": 2.5304, "step": 768 }, { "epoch": 0.56, "learning_rate": 1.872283367401141e-05, "loss": 2.2561, "step": 769 }, { "epoch": 0.56, "learning_rate": 1.8718963269416566e-05, "loss": 2.3876, "step": 770 }, { "epoch": 0.56, "learning_rate": 1.871508741043293e-05, "loss": 2.3563, "step": 771 }, { "epoch": 0.57, "learning_rate": 1.8711206099485156e-05, "loss": 2.6489, "step": 772 }, { "epoch": 0.57, "learning_rate": 1.8707319339001303e-05, "loss": 2.3821, "step": 773 }, { "epoch": 0.57, "learning_rate": 1.870342713141285e-05, "loss": 2.4189, "step": 774 }, { "epoch": 0.57, "learning_rate": 1.869952947915467e-05, "loss": 2.4408, "step": 775 }, { "epoch": 0.57, "learning_rate": 1.869562638466504e-05, "loss": 2.3201, "step": 776 }, { "epoch": 0.57, "learning_rate": 1.869171785038566e-05, "loss": 2.592, "step": 777 }, { "epoch": 0.57, "learning_rate": 1.8687803878761618e-05, "loss": 2.4062, "step": 778 }, { "epoch": 0.57, "learning_rate": 1.86838844722414e-05, "loss": 2.3496, "step": 779 }, { "epoch": 0.57, "learning_rate": 1.867995963327691e-05, "loss": 2.3744, "step": 780 }, { "epoch": 0.57, "learning_rate": 1.8676029364323438e-05, "loss": 2.5148, "step": 781 }, { "epoch": 0.57, "learning_rate": 1.867209366783967e-05, "loss": 2.3995, "step": 782 }, { "epoch": 0.57, "learning_rate": 1.8668152546287686e-05, "loss": 2.3907, "step": 783 }, { "epoch": 0.57, "learning_rate": 1.8664206002132968e-05, "loss": 2.2736, "step": 784 }, { "epoch": 0.58, "learning_rate": 1.866025403784439e-05, "loss": 2.3709, "step": 785 }, { "epoch": 0.58, "learning_rate": 1.8656296655894205e-05, "loss": 2.2857, "step": 786 }, { "epoch": 0.58, "learning_rate": 1.8652333858758063e-05, "loss": 2.3721, "step": 787 }, { "epoch": 0.58, "learning_rate": 1.8648365648915007e-05, "loss": 2.4531, "step": 788 }, { "epoch": 0.58, "learning_rate": 1.8644392028847457e-05, "loss": 2.3687, "step": 789 }, { "epoch": 0.58, "learning_rate": 1.8640413001041225e-05, "loss": 2.3617, "step": 790 }, { "epoch": 0.58, "learning_rate": 1.8636428567985495e-05, "loss": 2.4352, "step": 791 }, { "epoch": 0.58, "learning_rate": 1.8632438732172846e-05, "loss": 2.4544, "step": 792 }, { "epoch": 0.58, "learning_rate": 1.8628443496099225e-05, "loss": 2.429, "step": 793 }, { "epoch": 0.58, "learning_rate": 1.8624442862263964e-05, "loss": 2.3028, "step": 794 }, { "epoch": 0.58, "learning_rate": 1.8620436833169773e-05, "loss": 2.3124, "step": 795 }, { "epoch": 0.58, "learning_rate": 1.861642541132273e-05, "loss": 2.5781, "step": 796 }, { "epoch": 0.58, "learning_rate": 1.8612408599232297e-05, "loss": 2.4167, "step": 797 }, { "epoch": 0.58, "learning_rate": 1.8608386399411296e-05, "loss": 2.3404, "step": 798 }, { "epoch": 0.59, "learning_rate": 1.860435881437593e-05, "loss": 2.3699, "step": 799 }, { "epoch": 0.59, "learning_rate": 1.8600325846645762e-05, "loss": 2.2765, "step": 800 }, { "epoch": 0.59, "learning_rate": 1.859628749874373e-05, "loss": 2.2946, "step": 801 }, { "epoch": 0.59, "learning_rate": 1.8592243773196132e-05, "loss": 2.4592, "step": 802 }, { "epoch": 0.59, "learning_rate": 1.8588194672532633e-05, "loss": 2.3868, "step": 803 }, { "epoch": 0.59, "learning_rate": 1.8584140199286263e-05, "loss": 2.3496, "step": 804 }, { "epoch": 0.59, "learning_rate": 1.8580080355993406e-05, "loss": 2.3899, "step": 805 }, { "epoch": 0.59, "learning_rate": 1.857601514519381e-05, "loss": 2.3205, "step": 806 }, { "epoch": 0.59, "learning_rate": 1.8571944569430582e-05, "loss": 2.4205, "step": 807 }, { "epoch": 0.59, "learning_rate": 1.8567868631250184e-05, "loss": 2.5212, "step": 808 }, { "epoch": 0.59, "learning_rate": 1.8563787333202427e-05, "loss": 2.4576, "step": 809 }, { "epoch": 0.59, "learning_rate": 1.8559700677840482e-05, "loss": 2.3444, "step": 810 }, { "epoch": 0.59, "learning_rate": 1.855560866772087e-05, "loss": 2.505, "step": 811 }, { "epoch": 0.59, "learning_rate": 1.8551511305403466e-05, "loss": 2.3303, "step": 812 }, { "epoch": 0.6, "learning_rate": 1.8547408593451483e-05, "loss": 2.4057, "step": 813 }, { "epoch": 0.6, "learning_rate": 1.8543300534431483e-05, "loss": 2.4554, "step": 814 }, { "epoch": 0.6, "learning_rate": 1.8539187130913382e-05, "loss": 2.5356, "step": 815 }, { "epoch": 0.6, "learning_rate": 1.8535068385470434e-05, "loss": 2.3317, "step": 816 }, { "epoch": 0.6, "learning_rate": 1.853094430067923e-05, "loss": 2.5133, "step": 817 }, { "epoch": 0.6, "learning_rate": 1.8526814879119706e-05, "loss": 2.4516, "step": 818 }, { "epoch": 0.6, "learning_rate": 1.852268012337514e-05, "loss": 2.5189, "step": 819 }, { "epoch": 0.6, "learning_rate": 1.8518540036032136e-05, "loss": 2.5467, "step": 820 }, { "epoch": 0.6, "learning_rate": 1.8514394619680645e-05, "loss": 2.2705, "step": 821 }, { "epoch": 0.6, "learning_rate": 1.851024387691395e-05, "loss": 2.4621, "step": 822 }, { "epoch": 0.6, "learning_rate": 1.8506087810328655e-05, "loss": 2.1786, "step": 823 }, { "epoch": 0.6, "learning_rate": 1.8501926422524707e-05, "loss": 2.4214, "step": 824 }, { "epoch": 0.6, "learning_rate": 1.8497759716105376e-05, "loss": 2.2414, "step": 825 }, { "epoch": 0.61, "learning_rate": 1.8493587693677263e-05, "loss": 2.2612, "step": 826 }, { "epoch": 0.61, "learning_rate": 1.8489410357850292e-05, "loss": 2.5085, "step": 827 }, { "epoch": 0.61, "learning_rate": 1.8485227711237707e-05, "loss": 2.3897, "step": 828 }, { "epoch": 0.61, "learning_rate": 1.848103975645608e-05, "loss": 2.5107, "step": 829 }, { "epoch": 0.61, "learning_rate": 1.847684649612531e-05, "loss": 2.422, "step": 830 }, { "epoch": 0.61, "learning_rate": 1.847264793286859e-05, "loss": 2.3306, "step": 831 }, { "epoch": 0.61, "learning_rate": 1.8468444069312463e-05, "loss": 2.2896, "step": 832 }, { "epoch": 0.61, "learning_rate": 1.8464234908086765e-05, "loss": 2.3358, "step": 833 }, { "epoch": 0.61, "learning_rate": 1.8460020451824658e-05, "loss": 2.3432, "step": 834 }, { "epoch": 0.61, "learning_rate": 1.8455800703162606e-05, "loss": 2.4044, "step": 835 }, { "epoch": 0.61, "learning_rate": 1.845157566474039e-05, "loss": 2.3948, "step": 836 }, { "epoch": 0.61, "learning_rate": 1.8447345339201105e-05, "loss": 2.3661, "step": 837 }, { "epoch": 0.61, "learning_rate": 1.8443109729191143e-05, "loss": 2.4071, "step": 838 }, { "epoch": 0.61, "learning_rate": 1.8438868837360214e-05, "loss": 2.3883, "step": 839 }, { "epoch": 0.62, "learning_rate": 1.8434622666361316e-05, "loss": 2.26, "step": 840 }, { "epoch": 0.62, "learning_rate": 1.8430371218850765e-05, "loss": 2.413, "step": 841 }, { "epoch": 0.62, "learning_rate": 1.8426114497488167e-05, "loss": 2.3044, "step": 842 }, { "epoch": 0.62, "learning_rate": 1.842185250493644e-05, "loss": 2.3011, "step": 843 }, { "epoch": 0.62, "learning_rate": 1.8417585243861782e-05, "loss": 2.3683, "step": 844 }, { "epoch": 0.62, "learning_rate": 1.8413312716933703e-05, "loss": 2.4512, "step": 845 }, { "epoch": 0.62, "learning_rate": 1.8409034926824995e-05, "loss": 2.3245, "step": 846 }, { "epoch": 0.62, "learning_rate": 1.840475187621175e-05, "loss": 2.2821, "step": 847 }, { "epoch": 0.62, "learning_rate": 1.8400463567773357e-05, "loss": 2.4122, "step": 848 }, { "epoch": 0.62, "learning_rate": 1.8396170004192474e-05, "loss": 2.3337, "step": 849 }, { "epoch": 0.62, "learning_rate": 1.8391871188155064e-05, "loss": 2.3029, "step": 850 }, { "epoch": 0.62, "learning_rate": 1.8387567122350368e-05, "loss": 2.2992, "step": 851 }, { "epoch": 0.62, "learning_rate": 1.838325780947092e-05, "loss": 2.4422, "step": 852 }, { "epoch": 0.62, "learning_rate": 1.8378943252212522e-05, "loss": 2.3424, "step": 853 }, { "epoch": 0.63, "learning_rate": 1.8374623453274267e-05, "loss": 2.3488, "step": 854 }, { "epoch": 0.63, "learning_rate": 1.8370298415358527e-05, "loss": 2.3475, "step": 855 }, { "epoch": 0.63, "learning_rate": 1.8365968141170945e-05, "loss": 2.2964, "step": 856 }, { "epoch": 0.63, "learning_rate": 1.836163263342045e-05, "loss": 2.5362, "step": 857 }, { "epoch": 0.63, "learning_rate": 1.8357291894819236e-05, "loss": 2.299, "step": 858 }, { "epoch": 0.63, "learning_rate": 1.835294592808277e-05, "loss": 2.3787, "step": 859 }, { "epoch": 0.63, "learning_rate": 1.8348594735929793e-05, "loss": 2.3275, "step": 860 }, { "epoch": 0.63, "learning_rate": 1.8344238321082316e-05, "loss": 2.268, "step": 861 }, { "epoch": 0.63, "learning_rate": 1.8339876686265614e-05, "loss": 2.3295, "step": 862 }, { "epoch": 0.63, "learning_rate": 1.8335509834208223e-05, "loss": 2.2822, "step": 863 }, { "epoch": 0.63, "learning_rate": 1.8331137767641958e-05, "loss": 2.2473, "step": 864 }, { "epoch": 0.63, "learning_rate": 1.8326760489301876e-05, "loss": 2.3785, "step": 865 }, { "epoch": 0.63, "learning_rate": 1.8322378001926315e-05, "loss": 2.3104, "step": 866 }, { "epoch": 0.64, "learning_rate": 1.831799030825685e-05, "loss": 2.501, "step": 867 }, { "epoch": 0.64, "learning_rate": 1.8313597411038332e-05, "loss": 2.4027, "step": 868 }, { "epoch": 0.64, "learning_rate": 1.830919931301886e-05, "loss": 2.3679, "step": 869 }, { "epoch": 0.64, "learning_rate": 1.830479601694978e-05, "loss": 2.3215, "step": 870 }, { "epoch": 0.64, "learning_rate": 1.83003875255857e-05, "loss": 2.3996, "step": 871 }, { "epoch": 0.64, "learning_rate": 1.829597384168447e-05, "loss": 2.4721, "step": 872 }, { "epoch": 0.64, "learning_rate": 1.82915549680072e-05, "loss": 2.3132, "step": 873 }, { "epoch": 0.64, "learning_rate": 1.8287130907318232e-05, "loss": 2.2498, "step": 874 }, { "epoch": 0.64, "learning_rate": 1.828270166238516e-05, "loss": 2.4102, "step": 875 }, { "epoch": 0.64, "learning_rate": 1.8278267235978823e-05, "loss": 2.4074, "step": 876 }, { "epoch": 0.64, "learning_rate": 1.82738276308733e-05, "loss": 2.3018, "step": 877 }, { "epoch": 0.64, "learning_rate": 1.8269382849845902e-05, "loss": 2.2373, "step": 878 }, { "epoch": 0.64, "learning_rate": 1.8264932895677195e-05, "loss": 2.3402, "step": 879 }, { "epoch": 0.64, "learning_rate": 1.8260477771150965e-05, "loss": 2.2351, "step": 880 }, { "epoch": 0.65, "learning_rate": 1.8256017479054238e-05, "loss": 2.4078, "step": 881 }, { "epoch": 0.65, "learning_rate": 1.8251552022177273e-05, "loss": 2.3121, "step": 882 }, { "epoch": 0.65, "learning_rate": 1.8247081403313564e-05, "loss": 2.2243, "step": 883 }, { "epoch": 0.65, "learning_rate": 1.8242605625259827e-05, "loss": 2.4712, "step": 884 }, { "epoch": 0.65, "learning_rate": 1.823812469081601e-05, "loss": 2.1628, "step": 885 }, { "epoch": 0.65, "learning_rate": 1.8233638602785286e-05, "loss": 2.5631, "step": 886 }, { "epoch": 0.65, "learning_rate": 1.822914736397405e-05, "loss": 2.348, "step": 887 }, { "epoch": 0.65, "learning_rate": 1.822465097719192e-05, "loss": 2.364, "step": 888 }, { "epoch": 0.65, "learning_rate": 1.822014944525174e-05, "loss": 2.3382, "step": 889 }, { "epoch": 0.65, "learning_rate": 1.8215642770969565e-05, "loss": 2.3821, "step": 890 }, { "epoch": 0.65, "learning_rate": 1.821113095716467e-05, "loss": 2.4523, "step": 891 }, { "epoch": 0.65, "learning_rate": 1.8206614006659544e-05, "loss": 2.3252, "step": 892 }, { "epoch": 0.65, "learning_rate": 1.820209192227989e-05, "loss": 2.3003, "step": 893 }, { "epoch": 0.65, "learning_rate": 1.8197564706854624e-05, "loss": 2.4071, "step": 894 }, { "epoch": 0.66, "learning_rate": 1.819303236321587e-05, "loss": 2.2777, "step": 895 }, { "epoch": 0.66, "learning_rate": 1.818849489419896e-05, "loss": 2.3571, "step": 896 }, { "epoch": 0.66, "learning_rate": 1.818395230264244e-05, "loss": 2.3167, "step": 897 }, { "epoch": 0.66, "learning_rate": 1.8179404591388048e-05, "loss": 2.3055, "step": 898 }, { "epoch": 0.66, "learning_rate": 1.8174851763280732e-05, "loss": 2.5026, "step": 899 }, { "epoch": 0.66, "learning_rate": 1.817029382116864e-05, "loss": 2.3758, "step": 900 }, { "epoch": 0.66, "learning_rate": 1.8165730767903116e-05, "loss": 2.3465, "step": 901 }, { "epoch": 0.66, "learning_rate": 1.8161162606338707e-05, "loss": 2.2096, "step": 902 }, { "epoch": 0.66, "learning_rate": 1.8156589339333154e-05, "loss": 2.4479, "step": 903 }, { "epoch": 0.66, "learning_rate": 1.8152010969747383e-05, "loss": 2.2401, "step": 904 }, { "epoch": 0.66, "learning_rate": 1.8147427500445533e-05, "loss": 2.3302, "step": 905 }, { "epoch": 0.66, "learning_rate": 1.814283893429491e-05, "loss": 2.3238, "step": 906 }, { "epoch": 0.66, "learning_rate": 1.8138245274166023e-05, "loss": 2.3889, "step": 907 }, { "epoch": 0.67, "learning_rate": 1.8133646522932562e-05, "loss": 2.1599, "step": 908 }, { "epoch": 0.67, "learning_rate": 1.8129042683471404e-05, "loss": 2.3042, "step": 909 }, { "epoch": 0.67, "learning_rate": 1.8124433758662605e-05, "loss": 2.4519, "step": 910 }, { "epoch": 0.67, "learning_rate": 1.8119819751389406e-05, "loss": 2.262, "step": 911 }, { "epoch": 0.67, "learning_rate": 1.8115200664538234e-05, "loss": 2.3065, "step": 912 }, { "epoch": 0.67, "learning_rate": 1.8110576500998677e-05, "loss": 2.4604, "step": 913 }, { "epoch": 0.67, "learning_rate": 1.8105947263663512e-05, "loss": 2.3899, "step": 914 }, { "epoch": 0.67, "learning_rate": 1.810131295542869e-05, "loss": 2.2899, "step": 915 }, { "epoch": 0.67, "learning_rate": 1.809667357919333e-05, "loss": 2.2756, "step": 916 }, { "epoch": 0.67, "learning_rate": 1.8092029137859713e-05, "loss": 2.4008, "step": 917 }, { "epoch": 0.67, "learning_rate": 1.808737963433331e-05, "loss": 2.3452, "step": 918 }, { "epoch": 0.67, "learning_rate": 1.808272507152274e-05, "loss": 2.2741, "step": 919 }, { "epoch": 0.67, "learning_rate": 1.80780654523398e-05, "loss": 2.4579, "step": 920 }, { "epoch": 0.67, "learning_rate": 1.8073400779699434e-05, "loss": 2.3023, "step": 921 }, { "epoch": 0.68, "learning_rate": 1.806873105651976e-05, "loss": 2.4444, "step": 922 }, { "epoch": 0.68, "learning_rate": 1.806405628572206e-05, "loss": 2.5183, "step": 923 }, { "epoch": 0.68, "learning_rate": 1.8059376470230757e-05, "loss": 2.4337, "step": 924 }, { "epoch": 0.68, "learning_rate": 1.8054691612973445e-05, "loss": 2.2421, "step": 925 }, { "epoch": 0.68, "learning_rate": 1.8050001716880864e-05, "loss": 2.3986, "step": 926 }, { "epoch": 0.68, "learning_rate": 1.804530678488691e-05, "loss": 2.2987, "step": 927 }, { "epoch": 0.68, "learning_rate": 1.8040606819928626e-05, "loss": 2.4683, "step": 928 }, { "epoch": 0.68, "learning_rate": 1.80359018249462e-05, "loss": 2.2399, "step": 929 }, { "epoch": 0.68, "learning_rate": 1.8031191802882984e-05, "loss": 2.5001, "step": 930 }, { "epoch": 0.68, "learning_rate": 1.802647675668545e-05, "loss": 2.3216, "step": 931 }, { "epoch": 0.68, "learning_rate": 1.8021756689303233e-05, "loss": 2.422, "step": 932 }, { "epoch": 0.68, "learning_rate": 1.8017031603689105e-05, "loss": 2.39, "step": 933 }, { "epoch": 0.68, "learning_rate": 1.8012301502798964e-05, "loss": 2.5044, "step": 934 }, { "epoch": 0.68, "learning_rate": 1.8007566389591863e-05, "loss": 2.3298, "step": 935 }, { "epoch": 0.69, "learning_rate": 1.8002826267029977e-05, "loss": 2.3226, "step": 936 }, { "epoch": 0.69, "learning_rate": 1.799808113807863e-05, "loss": 2.247, "step": 937 }, { "epoch": 0.69, "learning_rate": 1.7993331005706262e-05, "loss": 2.3394, "step": 938 }, { "epoch": 0.69, "learning_rate": 1.798857587288445e-05, "loss": 2.4328, "step": 939 }, { "epoch": 0.69, "learning_rate": 1.79838157425879e-05, "loss": 2.4086, "step": 940 }, { "epoch": 0.69, "learning_rate": 1.7979050617794446e-05, "loss": 2.4056, "step": 941 }, { "epoch": 0.69, "learning_rate": 1.7974280501485037e-05, "loss": 2.164, "step": 942 }, { "epoch": 0.69, "learning_rate": 1.796950539664376e-05, "loss": 2.321, "step": 943 }, { "epoch": 0.69, "learning_rate": 1.7964725306257806e-05, "loss": 2.3819, "step": 944 }, { "epoch": 0.69, "learning_rate": 1.79599402333175e-05, "loss": 2.3067, "step": 945 }, { "epoch": 0.69, "learning_rate": 1.795515018081627e-05, "loss": 2.2387, "step": 946 }, { "epoch": 0.69, "learning_rate": 1.795035515175067e-05, "loss": 2.3855, "step": 947 }, { "epoch": 0.69, "learning_rate": 1.794555514912036e-05, "loss": 2.2766, "step": 948 }, { "epoch": 0.7, "learning_rate": 1.794075017592812e-05, "loss": 2.3551, "step": 949 }, { "epoch": 0.7, "learning_rate": 1.793594023517983e-05, "loss": 2.4187, "step": 950 }, { "epoch": 0.7, "learning_rate": 1.793112532988448e-05, "loss": 2.2766, "step": 951 }, { "epoch": 0.7, "learning_rate": 1.792630546305417e-05, "loss": 2.2786, "step": 952 }, { "epoch": 0.7, "learning_rate": 1.7921480637704102e-05, "loss": 2.3138, "step": 953 }, { "epoch": 0.7, "learning_rate": 1.7916650856852577e-05, "loss": 2.3869, "step": 954 }, { "epoch": 0.7, "learning_rate": 1.7911816123521e-05, "loss": 2.3823, "step": 955 }, { "epoch": 0.7, "learning_rate": 1.790697644073387e-05, "loss": 2.3718, "step": 956 }, { "epoch": 0.7, "learning_rate": 1.7902131811518784e-05, "loss": 2.2878, "step": 957 }, { "epoch": 0.7, "learning_rate": 1.789728223890644e-05, "loss": 2.3622, "step": 958 }, { "epoch": 0.7, "learning_rate": 1.7892427725930613e-05, "loss": 2.3822, "step": 959 }, { "epoch": 0.7, "learning_rate": 1.788756827562818e-05, "loss": 2.3517, "step": 960 }, { "epoch": 0.7, "learning_rate": 1.7882703891039113e-05, "loss": 2.322, "step": 961 }, { "epoch": 0.7, "learning_rate": 1.7877834575206457e-05, "loss": 2.3786, "step": 962 }, { "epoch": 0.71, "learning_rate": 1.7872960331176347e-05, "loss": 2.357, "step": 963 }, { "epoch": 0.71, "learning_rate": 1.7868081161997996e-05, "loss": 2.3387, "step": 964 }, { "epoch": 0.71, "learning_rate": 1.786319707072371e-05, "loss": 2.4842, "step": 965 }, { "epoch": 0.71, "learning_rate": 1.785830806040887e-05, "loss": 2.4513, "step": 966 }, { "epoch": 0.71, "learning_rate": 1.7853414134111925e-05, "loss": 2.4132, "step": 967 }, { "epoch": 0.71, "learning_rate": 1.784851529489441e-05, "loss": 2.3647, "step": 968 }, { "epoch": 0.71, "learning_rate": 1.7843611545820926e-05, "loss": 2.3782, "step": 969 }, { "epoch": 0.71, "learning_rate": 1.7838702889959155e-05, "loss": 2.1994, "step": 970 }, { "epoch": 0.71, "learning_rate": 1.7833789330379836e-05, "loss": 2.2992, "step": 971 }, { "epoch": 0.71, "learning_rate": 1.7828870870156783e-05, "loss": 2.248, "step": 972 }, { "epoch": 0.71, "learning_rate": 1.782394751236688e-05, "loss": 2.3748, "step": 973 }, { "epoch": 0.71, "learning_rate": 1.7819019260090068e-05, "loss": 2.4563, "step": 974 }, { "epoch": 0.71, "learning_rate": 1.781408611640935e-05, "loss": 2.2777, "step": 975 }, { "epoch": 0.72, "learning_rate": 1.7809148084410787e-05, "loss": 2.3792, "step": 976 }, { "epoch": 0.72, "learning_rate": 1.780420516718351e-05, "loss": 2.3391, "step": 977 }, { "epoch": 0.72, "learning_rate": 1.7799257367819687e-05, "loss": 2.3647, "step": 978 }, { "epoch": 0.72, "learning_rate": 1.779430468941456e-05, "loss": 2.2768, "step": 979 }, { "epoch": 0.72, "learning_rate": 1.778934713506641e-05, "loss": 2.2855, "step": 980 }, { "epoch": 0.72, "learning_rate": 1.7784384707876576e-05, "loss": 2.3283, "step": 981 }, { "epoch": 0.72, "learning_rate": 1.7779417410949437e-05, "loss": 2.4277, "step": 982 }, { "epoch": 0.72, "learning_rate": 1.7774445247392425e-05, "loss": 2.4909, "step": 983 }, { "epoch": 0.72, "learning_rate": 1.7769468220316016e-05, "loss": 2.5645, "step": 984 }, { "epoch": 0.72, "learning_rate": 1.7764486332833723e-05, "loss": 2.2951, "step": 985 }, { "epoch": 0.72, "learning_rate": 1.775949958806211e-05, "loss": 2.4835, "step": 986 }, { "epoch": 0.72, "learning_rate": 1.7754507989120762e-05, "loss": 2.3311, "step": 987 }, { "epoch": 0.72, "learning_rate": 1.7749511539132327e-05, "loss": 2.3233, "step": 988 }, { "epoch": 0.72, "learning_rate": 1.774451024122246e-05, "loss": 2.2146, "step": 989 }, { "epoch": 0.73, "learning_rate": 1.7739504098519872e-05, "loss": 2.2038, "step": 990 }, { "epoch": 0.73, "learning_rate": 1.7734493114156282e-05, "loss": 2.3547, "step": 991 }, { "epoch": 0.73, "learning_rate": 1.772947729126646e-05, "loss": 2.2451, "step": 992 }, { "epoch": 0.73, "learning_rate": 1.7724456632988188e-05, "loss": 2.3215, "step": 993 }, { "epoch": 0.73, "learning_rate": 1.7719431142462278e-05, "loss": 2.3456, "step": 994 }, { "epoch": 0.73, "learning_rate": 1.7714400822832566e-05, "loss": 2.2376, "step": 995 }, { "epoch": 0.73, "learning_rate": 1.7709365677245906e-05, "loss": 2.2372, "step": 996 }, { "epoch": 0.73, "learning_rate": 1.7704325708852174e-05, "loss": 2.3128, "step": 997 }, { "epoch": 0.73, "learning_rate": 1.7699280920804262e-05, "loss": 2.4086, "step": 998 }, { "epoch": 0.73, "learning_rate": 1.769423131625808e-05, "loss": 2.3642, "step": 999 }, { "epoch": 0.73, "learning_rate": 1.768917689837254e-05, "loss": 2.3051, "step": 1000 }, { "epoch": 0.73, "learning_rate": 1.7684117670309578e-05, "loss": 2.4119, "step": 1001 }, { "epoch": 0.73, "learning_rate": 1.7679053635234134e-05, "loss": 2.3175, "step": 1002 }, { "epoch": 0.73, "learning_rate": 1.7673984796314153e-05, "loss": 2.25, "step": 1003 }, { "epoch": 0.74, "learning_rate": 1.766891115672059e-05, "loss": 2.3232, "step": 1004 }, { "epoch": 0.74, "learning_rate": 1.7663832719627404e-05, "loss": 2.299, "step": 1005 }, { "epoch": 0.74, "learning_rate": 1.765874948821154e-05, "loss": 2.2132, "step": 1006 }, { "epoch": 0.74, "learning_rate": 1.7653661465652968e-05, "loss": 2.4761, "step": 1007 }, { "epoch": 0.74, "learning_rate": 1.7648568655134633e-05, "loss": 2.1626, "step": 1008 }, { "epoch": 0.74, "learning_rate": 1.7643471059842487e-05, "loss": 2.4126, "step": 1009 }, { "epoch": 0.74, "learning_rate": 1.7638368682965467e-05, "loss": 2.2639, "step": 1010 }, { "epoch": 0.74, "learning_rate": 1.763326152769551e-05, "loss": 2.4132, "step": 1011 }, { "epoch": 0.74, "learning_rate": 1.762814959722754e-05, "loss": 2.4289, "step": 1012 }, { "epoch": 0.74, "learning_rate": 1.7623032894759467e-05, "loss": 2.33, "step": 1013 }, { "epoch": 0.74, "learning_rate": 1.761791142349218e-05, "loss": 2.4521, "step": 1014 }, { "epoch": 0.74, "learning_rate": 1.7612785186629562e-05, "loss": 2.2248, "step": 1015 }, { "epoch": 0.74, "learning_rate": 1.760765418737847e-05, "loss": 2.4976, "step": 1016 }, { "epoch": 0.75, "learning_rate": 1.7602518428948742e-05, "loss": 2.269, "step": 1017 }, { "epoch": 0.75, "learning_rate": 1.7597377914553195e-05, "loss": 2.4473, "step": 1018 }, { "epoch": 0.75, "learning_rate": 1.7592232647407622e-05, "loss": 2.2331, "step": 1019 }, { "epoch": 0.75, "learning_rate": 1.7587082630730786e-05, "loss": 2.3906, "step": 1020 }, { "epoch": 0.75, "learning_rate": 1.7581927867744423e-05, "loss": 2.3841, "step": 1021 }, { "epoch": 0.75, "learning_rate": 1.7576768361673233e-05, "loss": 2.2499, "step": 1022 }, { "epoch": 0.75, "learning_rate": 1.7571604115744895e-05, "loss": 2.3493, "step": 1023 }, { "epoch": 0.75, "learning_rate": 1.756643513319004e-05, "loss": 2.3448, "step": 1024 }, { "epoch": 0.75, "learning_rate": 1.756126141724227e-05, "loss": 2.2946, "step": 1025 }, { "epoch": 0.75, "learning_rate": 1.755608297113815e-05, "loss": 2.2119, "step": 1026 }, { "epoch": 0.75, "learning_rate": 1.7550899798117193e-05, "loss": 2.2764, "step": 1027 }, { "epoch": 0.75, "learning_rate": 1.7545711901421885e-05, "loss": 2.3754, "step": 1028 }, { "epoch": 0.75, "learning_rate": 1.7540519284297652e-05, "loss": 2.3153, "step": 1029 }, { "epoch": 0.75, "learning_rate": 1.753532194999288e-05, "loss": 2.2118, "step": 1030 }, { "epoch": 0.76, "learning_rate": 1.7530119901758908e-05, "loss": 2.3643, "step": 1031 }, { "epoch": 0.76, "learning_rate": 1.7524913142850017e-05, "loss": 2.5127, "step": 1032 }, { "epoch": 0.76, "learning_rate": 1.7519701676523447e-05, "loss": 2.2929, "step": 1033 }, { "epoch": 0.76, "learning_rate": 1.7514485506039365e-05, "loss": 2.4184, "step": 1034 }, { "epoch": 0.76, "learning_rate": 1.7509264634660896e-05, "loss": 2.328, "step": 1035 }, { "epoch": 0.76, "learning_rate": 1.75040390656541e-05, "loss": 2.3623, "step": 1036 }, { "epoch": 0.76, "learning_rate": 1.7498808802287982e-05, "loss": 2.3543, "step": 1037 }, { "epoch": 0.76, "learning_rate": 1.7493573847834465e-05, "loss": 2.3647, "step": 1038 }, { "epoch": 0.76, "learning_rate": 1.748833420556843e-05, "loss": 2.2679, "step": 1039 }, { "epoch": 0.76, "learning_rate": 1.748308987876768e-05, "loss": 2.3725, "step": 1040 }, { "epoch": 0.76, "learning_rate": 1.7477840870712946e-05, "loss": 2.2202, "step": 1041 }, { "epoch": 0.76, "learning_rate": 1.7472587184687884e-05, "loss": 2.4845, "step": 1042 }, { "epoch": 0.76, "learning_rate": 1.7467328823979097e-05, "loss": 2.2717, "step": 1043 }, { "epoch": 0.76, "learning_rate": 1.7462065791876087e-05, "loss": 2.2343, "step": 1044 }, { "epoch": 0.77, "learning_rate": 1.7456798091671296e-05, "loss": 2.3648, "step": 1045 }, { "epoch": 0.77, "learning_rate": 1.745152572666007e-05, "loss": 2.4841, "step": 1046 }, { "epoch": 0.77, "learning_rate": 1.7446248700140694e-05, "loss": 2.3688, "step": 1047 }, { "epoch": 0.77, "learning_rate": 1.744096701541435e-05, "loss": 2.3127, "step": 1048 }, { "epoch": 0.77, "learning_rate": 1.743568067578515e-05, "loss": 2.3042, "step": 1049 }, { "epoch": 0.77, "learning_rate": 1.74303896845601e-05, "loss": 2.4282, "step": 1050 }, { "epoch": 0.77, "learning_rate": 1.7425094045049133e-05, "loss": 2.2121, "step": 1051 }, { "epoch": 0.77, "learning_rate": 1.7419793760565086e-05, "loss": 2.2236, "step": 1052 }, { "epoch": 0.77, "learning_rate": 1.7414488834423687e-05, "loss": 2.2611, "step": 1053 }, { "epoch": 0.77, "learning_rate": 1.7409179269943588e-05, "loss": 2.4, "step": 1054 }, { "epoch": 0.77, "learning_rate": 1.7403865070446336e-05, "loss": 2.3109, "step": 1055 }, { "epoch": 0.77, "learning_rate": 1.7398546239256367e-05, "loss": 2.4186, "step": 1056 }, { "epoch": 0.77, "learning_rate": 1.7393222779701033e-05, "loss": 2.2396, "step": 1057 }, { "epoch": 0.78, "learning_rate": 1.738789469511056e-05, "loss": 2.3932, "step": 1058 }, { "epoch": 0.78, "learning_rate": 1.738256198881809e-05, "loss": 2.2449, "step": 1059 }, { "epoch": 0.78, "learning_rate": 1.7377224664159634e-05, "loss": 2.4071, "step": 1060 }, { "epoch": 0.78, "learning_rate": 1.7371882724474117e-05, "loss": 2.363, "step": 1061 }, { "epoch": 0.78, "learning_rate": 1.7366536173103325e-05, "loss": 2.2802, "step": 1062 }, { "epoch": 0.78, "learning_rate": 1.7361185013391945e-05, "loss": 2.1735, "step": 1063 }, { "epoch": 0.78, "learning_rate": 1.7355829248687545e-05, "loss": 2.2472, "step": 1064 }, { "epoch": 0.78, "learning_rate": 1.7350468882340572e-05, "loss": 2.2161, "step": 1065 }, { "epoch": 0.78, "learning_rate": 1.7345103917704348e-05, "loss": 2.4062, "step": 1066 }, { "epoch": 0.78, "learning_rate": 1.7339734358135077e-05, "loss": 2.3308, "step": 1067 }, { "epoch": 0.78, "learning_rate": 1.7334360206991842e-05, "loss": 2.2815, "step": 1068 }, { "epoch": 0.78, "learning_rate": 1.7328981467636578e-05, "loss": 2.2556, "step": 1069 }, { "epoch": 0.78, "learning_rate": 1.7323598143434116e-05, "loss": 2.3126, "step": 1070 }, { "epoch": 0.78, "learning_rate": 1.7318210237752137e-05, "loss": 2.3414, "step": 1071 }, { "epoch": 0.79, "learning_rate": 1.73128177539612e-05, "loss": 2.1497, "step": 1072 }, { "epoch": 0.79, "learning_rate": 1.730742069543472e-05, "loss": 2.2433, "step": 1073 }, { "epoch": 0.79, "learning_rate": 1.7302019065548973e-05, "loss": 2.1999, "step": 1074 }, { "epoch": 0.79, "learning_rate": 1.72966128676831e-05, "loss": 2.2173, "step": 1075 }, { "epoch": 0.79, "learning_rate": 1.7291202105219104e-05, "loss": 2.4452, "step": 1076 }, { "epoch": 0.79, "learning_rate": 1.7285786781541825e-05, "loss": 2.4684, "step": 1077 }, { "epoch": 0.79, "learning_rate": 1.728036690003898e-05, "loss": 2.2787, "step": 1078 }, { "epoch": 0.79, "learning_rate": 1.727494246410112e-05, "loss": 2.2406, "step": 1079 }, { "epoch": 0.79, "learning_rate": 1.7269513477121652e-05, "loss": 2.2275, "step": 1080 }, { "epoch": 0.79, "learning_rate": 1.7264079942496832e-05, "loss": 2.1472, "step": 1081 }, { "epoch": 0.79, "learning_rate": 1.7258641863625753e-05, "loss": 2.2652, "step": 1082 }, { "epoch": 0.79, "learning_rate": 1.7253199243910357e-05, "loss": 2.4352, "step": 1083 }, { "epoch": 0.79, "learning_rate": 1.724775208675543e-05, "loss": 2.1753, "step": 1084 }, { "epoch": 0.79, "learning_rate": 1.7242300395568587e-05, "loss": 2.2049, "step": 1085 }, { "epoch": 0.8, "learning_rate": 1.7236844173760286e-05, "loss": 2.2104, "step": 1086 }, { "epoch": 0.8, "learning_rate": 1.7231383424743813e-05, "loss": 2.2963, "step": 1087 }, { "epoch": 0.8, "learning_rate": 1.72259181519353e-05, "loss": 2.3207, "step": 1088 }, { "epoch": 0.8, "learning_rate": 1.7220448358753693e-05, "loss": 2.17, "step": 1089 }, { "epoch": 0.8, "learning_rate": 1.7214974048620775e-05, "loss": 2.2773, "step": 1090 }, { "epoch": 0.8, "learning_rate": 1.7209495224961154e-05, "loss": 2.3997, "step": 1091 }, { "epoch": 0.8, "learning_rate": 1.7204011891202256e-05, "loss": 2.1418, "step": 1092 }, { "epoch": 0.8, "learning_rate": 1.719852405077433e-05, "loss": 2.2799, "step": 1093 }, { "epoch": 0.8, "learning_rate": 1.719303170711045e-05, "loss": 2.2014, "step": 1094 }, { "epoch": 0.8, "learning_rate": 1.718753486364651e-05, "loss": 2.3305, "step": 1095 }, { "epoch": 0.8, "learning_rate": 1.71820335238212e-05, "loss": 2.1294, "step": 1096 }, { "epoch": 0.8, "learning_rate": 1.7176527691076043e-05, "loss": 2.2983, "step": 1097 }, { "epoch": 0.8, "learning_rate": 1.7171017368855363e-05, "loss": 2.2272, "step": 1098 }, { "epoch": 0.81, "learning_rate": 1.7165502560606295e-05, "loss": 2.2577, "step": 1099 }, { "epoch": 0.81, "learning_rate": 1.7159983269778783e-05, "loss": 2.1866, "step": 1100 }, { "epoch": 0.81, "learning_rate": 1.7154459499825564e-05, "loss": 2.2267, "step": 1101 }, { "epoch": 0.81, "learning_rate": 1.7148931254202193e-05, "loss": 2.2693, "step": 1102 }, { "epoch": 0.81, "learning_rate": 1.7143398536367012e-05, "loss": 2.176, "step": 1103 }, { "epoch": 0.81, "learning_rate": 1.7137861349781172e-05, "loss": 2.3531, "step": 1104 }, { "epoch": 0.81, "learning_rate": 1.713231969790861e-05, "loss": 2.4743, "step": 1105 }, { "epoch": 0.81, "learning_rate": 1.7126773584216055e-05, "loss": 2.1873, "step": 1106 }, { "epoch": 0.81, "learning_rate": 1.7121223012173037e-05, "loss": 2.2729, "step": 1107 }, { "epoch": 0.81, "learning_rate": 1.711566798525187e-05, "loss": 2.3232, "step": 1108 }, { "epoch": 0.81, "learning_rate": 1.7110108506927656e-05, "loss": 2.2392, "step": 1109 }, { "epoch": 0.81, "learning_rate": 1.7104544580678276e-05, "loss": 2.192, "step": 1110 }, { "epoch": 0.81, "learning_rate": 1.7098976209984402e-05, "loss": 2.2668, "step": 1111 }, { "epoch": 0.81, "learning_rate": 1.7093403398329478e-05, "loss": 2.3462, "step": 1112 }, { "epoch": 0.82, "learning_rate": 1.7087826149199735e-05, "loss": 2.318, "step": 1113 }, { "epoch": 0.82, "learning_rate": 1.7082244466084176e-05, "loss": 2.3003, "step": 1114 }, { "epoch": 0.82, "learning_rate": 1.7076658352474574e-05, "loss": 2.2631, "step": 1115 }, { "epoch": 0.82, "learning_rate": 1.7071067811865477e-05, "loss": 2.303, "step": 1116 }, { "epoch": 0.82, "learning_rate": 1.7065472847754204e-05, "loss": 2.205, "step": 1117 }, { "epoch": 0.82, "learning_rate": 1.7059873463640843e-05, "loss": 2.1577, "step": 1118 }, { "epoch": 0.82, "learning_rate": 1.7054269663028232e-05, "loss": 2.2287, "step": 1119 }, { "epoch": 0.82, "learning_rate": 1.7048661449422002e-05, "loss": 2.3649, "step": 1120 }, { "epoch": 0.82, "learning_rate": 1.7043048826330507e-05, "loss": 2.2138, "step": 1121 }, { "epoch": 0.82, "learning_rate": 1.703743179726489e-05, "loss": 2.3231, "step": 1122 }, { "epoch": 0.82, "learning_rate": 1.703181036573904e-05, "loss": 2.3334, "step": 1123 }, { "epoch": 0.82, "learning_rate": 1.7026184535269595e-05, "loss": 2.349, "step": 1124 }, { "epoch": 0.82, "learning_rate": 1.7020554309375947e-05, "loss": 2.2974, "step": 1125 }, { "epoch": 0.82, "learning_rate": 1.7014919691580244e-05, "loss": 2.2629, "step": 1126 }, { "epoch": 0.83, "learning_rate": 1.7009280685407374e-05, "loss": 2.3961, "step": 1127 }, { "epoch": 0.83, "learning_rate": 1.700363729438498e-05, "loss": 2.2158, "step": 1128 }, { "epoch": 0.83, "learning_rate": 1.699798952204343e-05, "loss": 2.1395, "step": 1129 }, { "epoch": 0.83, "learning_rate": 1.699233737191585e-05, "loss": 2.2966, "step": 1130 }, { "epoch": 0.83, "learning_rate": 1.6986680847538107e-05, "loss": 2.3401, "step": 1131 }, { "epoch": 0.83, "learning_rate": 1.6981019952448784e-05, "loss": 2.2369, "step": 1132 }, { "epoch": 0.83, "learning_rate": 1.6975354690189213e-05, "loss": 2.365, "step": 1133 }, { "epoch": 0.83, "learning_rate": 1.6969685064303462e-05, "loss": 2.3542, "step": 1134 }, { "epoch": 0.83, "learning_rate": 1.6964011078338315e-05, "loss": 2.3757, "step": 1135 }, { "epoch": 0.83, "learning_rate": 1.69583327358433e-05, "loss": 2.33, "step": 1136 }, { "epoch": 0.83, "learning_rate": 1.6952650040370652e-05, "loss": 2.2754, "step": 1137 }, { "epoch": 0.83, "learning_rate": 1.694696299547534e-05, "loss": 2.2326, "step": 1138 }, { "epoch": 0.83, "learning_rate": 1.694127160471506e-05, "loss": 2.3447, "step": 1139 }, { "epoch": 0.84, "learning_rate": 1.693557587165021e-05, "loss": 2.2098, "step": 1140 }, { "epoch": 0.84, "learning_rate": 1.692987579984392e-05, "loss": 2.5174, "step": 1141 }, { "epoch": 0.84, "learning_rate": 1.692417139286202e-05, "loss": 2.1169, "step": 1142 }, { "epoch": 0.84, "learning_rate": 1.6918462654273063e-05, "loss": 2.2452, "step": 1143 }, { "epoch": 0.84, "learning_rate": 1.6912749587648317e-05, "loss": 2.3718, "step": 1144 }, { "epoch": 0.84, "learning_rate": 1.6907032196561737e-05, "loss": 2.2209, "step": 1145 }, { "epoch": 0.84, "learning_rate": 1.690131048459e-05, "loss": 2.3466, "step": 1146 }, { "epoch": 0.84, "learning_rate": 1.6895584455312482e-05, "loss": 2.3028, "step": 1147 }, { "epoch": 0.84, "learning_rate": 1.688985411231126e-05, "loss": 2.2076, "step": 1148 }, { "epoch": 0.84, "learning_rate": 1.6884119459171104e-05, "loss": 2.3019, "step": 1149 }, { "epoch": 0.84, "learning_rate": 1.687838049947949e-05, "loss": 2.3408, "step": 1150 }, { "epoch": 0.84, "learning_rate": 1.687263723682658e-05, "loss": 2.2311, "step": 1151 }, { "epoch": 0.84, "learning_rate": 1.6866889674805233e-05, "loss": 2.307, "step": 1152 }, { "epoch": 0.84, "learning_rate": 1.6861137817010994e-05, "loss": 2.3244, "step": 1153 }, { "epoch": 0.85, "learning_rate": 1.6855381667042094e-05, "loss": 2.3116, "step": 1154 }, { "epoch": 0.85, "learning_rate": 1.684962122849946e-05, "loss": 2.2271, "step": 1155 }, { "epoch": 0.85, "learning_rate": 1.6843856504986687e-05, "loss": 2.3207, "step": 1156 }, { "epoch": 0.85, "learning_rate": 1.683808750011006e-05, "loss": 2.3404, "step": 1157 }, { "epoch": 0.85, "learning_rate": 1.6832314217478538e-05, "loss": 2.2897, "step": 1158 }, { "epoch": 0.85, "learning_rate": 1.6826536660703762e-05, "loss": 2.2533, "step": 1159 }, { "epoch": 0.85, "learning_rate": 1.6820754833400034e-05, "loss": 2.3676, "step": 1160 }, { "epoch": 0.85, "learning_rate": 1.681496873918434e-05, "loss": 2.2242, "step": 1161 }, { "epoch": 0.85, "learning_rate": 1.680917838167634e-05, "loss": 2.2626, "step": 1162 }, { "epoch": 0.85, "learning_rate": 1.680338376449834e-05, "loss": 2.1068, "step": 1163 }, { "epoch": 0.85, "learning_rate": 1.679758489127533e-05, "loss": 2.3763, "step": 1164 }, { "epoch": 0.85, "learning_rate": 1.6791781765634957e-05, "loss": 2.2915, "step": 1165 }, { "epoch": 0.85, "learning_rate": 1.6785974391207517e-05, "loss": 2.2077, "step": 1166 }, { "epoch": 0.85, "learning_rate": 1.6780162771625987e-05, "loss": 2.1716, "step": 1167 }, { "epoch": 0.86, "learning_rate": 1.6774346910525977e-05, "loss": 2.277, "step": 1168 }, { "epoch": 0.86, "learning_rate": 1.676852681154576e-05, "loss": 2.3896, "step": 1169 }, { "epoch": 0.86, "learning_rate": 1.676270247832627e-05, "loss": 2.2978, "step": 1170 }, { "epoch": 0.86, "learning_rate": 1.675687391451107e-05, "loss": 2.1976, "step": 1171 }, { "epoch": 0.86, "learning_rate": 1.675104112374638e-05, "loss": 2.171, "step": 1172 }, { "epoch": 0.86, "learning_rate": 1.6745204109681064e-05, "loss": 2.2354, "step": 1173 }, { "epoch": 0.86, "learning_rate": 1.6739362875966638e-05, "loss": 2.2331, "step": 1174 }, { "epoch": 0.86, "learning_rate": 1.6733517426257233e-05, "loss": 2.3208, "step": 1175 }, { "epoch": 0.86, "learning_rate": 1.6727667764209638e-05, "loss": 2.372, "step": 1176 }, { "epoch": 0.86, "learning_rate": 1.672181389348327e-05, "loss": 2.404, "step": 1177 }, { "epoch": 0.86, "learning_rate": 1.6715955817740184e-05, "loss": 2.1898, "step": 1178 }, { "epoch": 0.86, "learning_rate": 1.6710093540645056e-05, "loss": 2.2745, "step": 1179 }, { "epoch": 0.86, "learning_rate": 1.67042270658652e-05, "loss": 2.3975, "step": 1180 }, { "epoch": 0.87, "learning_rate": 1.6698356397070545e-05, "loss": 2.2029, "step": 1181 }, { "epoch": 0.87, "learning_rate": 1.669248153793366e-05, "loss": 2.2058, "step": 1182 }, { "epoch": 0.87, "learning_rate": 1.6686602492129716e-05, "loss": 2.3163, "step": 1183 }, { "epoch": 0.87, "learning_rate": 1.668071926333652e-05, "loss": 2.358, "step": 1184 }, { "epoch": 0.87, "learning_rate": 1.6674831855234486e-05, "loss": 2.2915, "step": 1185 }, { "epoch": 0.87, "learning_rate": 1.6668940271506645e-05, "loss": 2.2717, "step": 1186 }, { "epoch": 0.87, "learning_rate": 1.666304451583864e-05, "loss": 2.1996, "step": 1187 }, { "epoch": 0.87, "learning_rate": 1.6657144591918726e-05, "loss": 2.2237, "step": 1188 }, { "epoch": 0.87, "learning_rate": 1.665124050343776e-05, "loss": 2.35, "step": 1189 }, { "epoch": 0.87, "learning_rate": 1.6645332254089212e-05, "loss": 2.2623, "step": 1190 }, { "epoch": 0.87, "learning_rate": 1.6639419847569147e-05, "loss": 2.3562, "step": 1191 }, { "epoch": 0.87, "learning_rate": 1.6633503287576238e-05, "loss": 2.2678, "step": 1192 }, { "epoch": 0.87, "learning_rate": 1.662758257781175e-05, "loss": 2.3663, "step": 1193 }, { "epoch": 0.87, "learning_rate": 1.662165772197955e-05, "loss": 2.2061, "step": 1194 }, { "epoch": 0.88, "learning_rate": 1.661572872378609e-05, "loss": 2.4011, "step": 1195 }, { "epoch": 0.88, "learning_rate": 1.660979558694043e-05, "loss": 2.2728, "step": 1196 }, { "epoch": 0.88, "learning_rate": 1.6603858315154194e-05, "loss": 2.1966, "step": 1197 }, { "epoch": 0.88, "learning_rate": 1.6597916912141618e-05, "loss": 2.2464, "step": 1198 }, { "epoch": 0.88, "learning_rate": 1.6591971381619504e-05, "loss": 2.1848, "step": 1199 }, { "epoch": 0.88, "learning_rate": 1.6586021727307247e-05, "loss": 2.2772, "step": 1200 }, { "epoch": 0.88, "learning_rate": 1.6580067952926815e-05, "loss": 2.3021, "step": 1201 }, { "epoch": 0.88, "learning_rate": 1.6574110062202757e-05, "loss": 2.3481, "step": 1202 }, { "epoch": 0.88, "learning_rate": 1.65681480588622e-05, "loss": 2.3361, "step": 1203 }, { "epoch": 0.88, "learning_rate": 1.6562181946634837e-05, "loss": 2.088, "step": 1204 }, { "epoch": 0.88, "learning_rate": 1.6556211729252934e-05, "loss": 2.2724, "step": 1205 }, { "epoch": 0.88, "learning_rate": 1.655023741045133e-05, "loss": 2.3172, "step": 1206 }, { "epoch": 0.88, "learning_rate": 1.654425899396742e-05, "loss": 2.2684, "step": 1207 }, { "epoch": 0.88, "learning_rate": 1.653827648354117e-05, "loss": 2.3235, "step": 1208 }, { "epoch": 0.89, "learning_rate": 1.6532289882915104e-05, "loss": 2.3773, "step": 1209 }, { "epoch": 0.89, "learning_rate": 1.652629919583431e-05, "loss": 2.2092, "step": 1210 }, { "epoch": 0.89, "learning_rate": 1.652030442604642e-05, "loss": 2.1443, "step": 1211 }, { "epoch": 0.89, "learning_rate": 1.651430557730164e-05, "loss": 2.2354, "step": 1212 }, { "epoch": 0.89, "learning_rate": 1.650830265335271e-05, "loss": 2.3744, "step": 1213 }, { "epoch": 0.89, "learning_rate": 1.6502295657954918e-05, "loss": 2.3107, "step": 1214 }, { "epoch": 0.89, "learning_rate": 1.6496284594866115e-05, "loss": 2.2893, "step": 1215 }, { "epoch": 0.89, "learning_rate": 1.6490269467846687e-05, "loss": 2.3433, "step": 1216 }, { "epoch": 0.89, "learning_rate": 1.648425028065956e-05, "loss": 2.1119, "step": 1217 }, { "epoch": 0.89, "learning_rate": 1.6478227037070205e-05, "loss": 2.1543, "step": 1218 }, { "epoch": 0.89, "learning_rate": 1.647219974084663e-05, "loss": 2.1826, "step": 1219 }, { "epoch": 0.89, "learning_rate": 1.646616839575937e-05, "loss": 2.1976, "step": 1220 }, { "epoch": 0.89, "learning_rate": 1.6460133005581512e-05, "loss": 2.1855, "step": 1221 }, { "epoch": 0.9, "learning_rate": 1.645409357408865e-05, "loss": 2.2682, "step": 1222 }, { "epoch": 0.9, "learning_rate": 1.6448050105058925e-05, "loss": 2.2008, "step": 1223 }, { "epoch": 0.9, "learning_rate": 1.6442002602272994e-05, "loss": 2.3235, "step": 1224 }, { "epoch": 0.9, "learning_rate": 1.643595106951404e-05, "loss": 2.2005, "step": 1225 }, { "epoch": 0.9, "learning_rate": 1.6429895510567764e-05, "loss": 2.3424, "step": 1226 }, { "epoch": 0.9, "learning_rate": 1.6423835929222393e-05, "loss": 2.3074, "step": 1227 }, { "epoch": 0.9, "learning_rate": 1.641777232926866e-05, "loss": 2.1662, "step": 1228 }, { "epoch": 0.9, "learning_rate": 1.6411704714499825e-05, "loss": 2.1913, "step": 1229 }, { "epoch": 0.9, "learning_rate": 1.6405633088711646e-05, "loss": 2.2452, "step": 1230 }, { "epoch": 0.9, "learning_rate": 1.63995574557024e-05, "loss": 2.2227, "step": 1231 }, { "epoch": 0.9, "learning_rate": 1.6393477819272864e-05, "loss": 2.262, "step": 1232 }, { "epoch": 0.9, "learning_rate": 1.6387394183226327e-05, "loss": 2.2131, "step": 1233 }, { "epoch": 0.9, "learning_rate": 1.6381306551368577e-05, "loss": 2.1104, "step": 1234 }, { "epoch": 0.9, "learning_rate": 1.637521492750789e-05, "loss": 2.194, "step": 1235 }, { "epoch": 0.91, "learning_rate": 1.6369119315455067e-05, "loss": 2.4575, "step": 1236 }, { "epoch": 0.91, "learning_rate": 1.6363019719023373e-05, "loss": 2.2734, "step": 1237 }, { "epoch": 0.91, "learning_rate": 1.6356916142028583e-05, "loss": 2.0836, "step": 1238 }, { "epoch": 0.91, "learning_rate": 1.6350808588288964e-05, "loss": 2.3387, "step": 1239 }, { "epoch": 0.91, "learning_rate": 1.6344697061625257e-05, "loss": 2.1806, "step": 1240 }, { "epoch": 0.91, "learning_rate": 1.63385815658607e-05, "loss": 2.1569, "step": 1241 }, { "epoch": 0.91, "learning_rate": 1.633246210482101e-05, "loss": 2.0888, "step": 1242 }, { "epoch": 0.91, "learning_rate": 1.632633868233439e-05, "loss": 2.2373, "step": 1243 }, { "epoch": 0.91, "learning_rate": 1.6320211302231513e-05, "loss": 2.2469, "step": 1244 }, { "epoch": 0.91, "learning_rate": 1.6314079968345527e-05, "loss": 2.1256, "step": 1245 }, { "epoch": 0.91, "learning_rate": 1.6307944684512065e-05, "loss": 2.287, "step": 1246 }, { "epoch": 0.91, "learning_rate": 1.6301805454569217e-05, "loss": 2.2958, "step": 1247 }, { "epoch": 0.91, "learning_rate": 1.6295662282357555e-05, "loss": 2.2586, "step": 1248 }, { "epoch": 0.92, "learning_rate": 1.6289515171720106e-05, "loss": 2.227, "step": 1249 }, { "epoch": 0.92, "learning_rate": 1.6283364126502364e-05, "loss": 2.2245, "step": 1250 }, { "epoch": 0.92, "learning_rate": 1.6277209150552285e-05, "loss": 2.2099, "step": 1251 }, { "epoch": 0.92, "learning_rate": 1.6271050247720292e-05, "loss": 2.0895, "step": 1252 }, { "epoch": 0.92, "learning_rate": 1.626488742185925e-05, "loss": 2.135, "step": 1253 }, { "epoch": 0.92, "learning_rate": 1.6258720676824487e-05, "loss": 2.197, "step": 1254 }, { "epoch": 0.92, "learning_rate": 1.6252550016473782e-05, "loss": 2.1923, "step": 1255 }, { "epoch": 0.92, "learning_rate": 1.6246375444667363e-05, "loss": 2.2247, "step": 1256 }, { "epoch": 0.92, "learning_rate": 1.62401969652679e-05, "loss": 2.5215, "step": 1257 }, { "epoch": 0.92, "learning_rate": 1.6234014582140516e-05, "loss": 2.3785, "step": 1258 }, { "epoch": 0.92, "learning_rate": 1.6227828299152776e-05, "loss": 2.2564, "step": 1259 }, { "epoch": 0.92, "learning_rate": 1.6221638120174668e-05, "loss": 2.174, "step": 1260 }, { "epoch": 0.92, "learning_rate": 1.6215444049078642e-05, "loss": 2.1919, "step": 1261 }, { "epoch": 0.92, "learning_rate": 1.6209246089739563e-05, "loss": 2.3086, "step": 1262 }, { "epoch": 0.93, "learning_rate": 1.620304424603474e-05, "loss": 2.293, "step": 1263 }, { "epoch": 0.93, "learning_rate": 1.6196838521843905e-05, "loss": 2.41, "step": 1264 }, { "epoch": 0.93, "learning_rate": 1.6190628921049224e-05, "loss": 2.2768, "step": 1265 }, { "epoch": 0.93, "learning_rate": 1.6184415447535284e-05, "loss": 2.249, "step": 1266 }, { "epoch": 0.93, "learning_rate": 1.617819810518909e-05, "loss": 2.2055, "step": 1267 }, { "epoch": 0.93, "learning_rate": 1.6171976897900082e-05, "loss": 2.171, "step": 1268 }, { "epoch": 0.93, "learning_rate": 1.61657518295601e-05, "loss": 2.1101, "step": 1269 }, { "epoch": 0.93, "learning_rate": 1.6159522904063415e-05, "loss": 2.226, "step": 1270 }, { "epoch": 0.93, "learning_rate": 1.6153290125306692e-05, "loss": 2.1055, "step": 1271 }, { "epoch": 0.93, "learning_rate": 1.6147053497189032e-05, "loss": 2.3485, "step": 1272 }, { "epoch": 0.93, "learning_rate": 1.6140813023611923e-05, "loss": 2.377, "step": 1273 }, { "epoch": 0.93, "learning_rate": 1.6134568708479267e-05, "loss": 2.3283, "step": 1274 }, { "epoch": 0.93, "learning_rate": 1.6128320555697365e-05, "loss": 2.3706, "step": 1275 }, { "epoch": 0.93, "learning_rate": 1.612206856917493e-05, "loss": 2.1726, "step": 1276 }, { "epoch": 0.94, "learning_rate": 1.611581275282306e-05, "loss": 2.1804, "step": 1277 }, { "epoch": 0.94, "learning_rate": 1.6109553110555256e-05, "loss": 2.334, "step": 1278 }, { "epoch": 0.94, "learning_rate": 1.6103289646287405e-05, "loss": 2.2959, "step": 1279 }, { "epoch": 0.94, "learning_rate": 1.6097022363937798e-05, "loss": 2.1994, "step": 1280 }, { "epoch": 0.94, "learning_rate": 1.60907512674271e-05, "loss": 2.2781, "step": 1281 }, { "epoch": 0.94, "learning_rate": 1.6084476360678375e-05, "loss": 2.2532, "step": 1282 }, { "epoch": 0.94, "learning_rate": 1.607819764761706e-05, "loss": 2.204, "step": 1283 }, { "epoch": 0.94, "learning_rate": 1.6071915132170986e-05, "loss": 2.2288, "step": 1284 }, { "epoch": 0.94, "learning_rate": 1.606562881827034e-05, "loss": 2.1814, "step": 1285 }, { "epoch": 0.94, "learning_rate": 1.6059338709847713e-05, "loss": 2.1005, "step": 1286 }, { "epoch": 0.94, "learning_rate": 1.6053044810838048e-05, "loss": 2.2224, "step": 1287 }, { "epoch": 0.94, "learning_rate": 1.604674712517867e-05, "loss": 2.4642, "step": 1288 }, { "epoch": 0.94, "learning_rate": 1.6040445656809276e-05, "loss": 2.4288, "step": 1289 }, { "epoch": 0.95, "learning_rate": 1.6034140409671916e-05, "loss": 2.2149, "step": 1290 }, { "epoch": 0.95, "learning_rate": 1.602783138771102e-05, "loss": 2.3135, "step": 1291 }, { "epoch": 0.95, "learning_rate": 1.602151859487336e-05, "loss": 2.3349, "step": 1292 }, { "epoch": 0.95, "learning_rate": 1.601520203510809e-05, "loss": 2.4464, "step": 1293 }, { "epoch": 0.95, "learning_rate": 1.6008881712366702e-05, "loss": 2.2407, "step": 1294 }, { "epoch": 0.95, "learning_rate": 1.600255763060305e-05, "loss": 2.2536, "step": 1295 }, { "epoch": 0.95, "learning_rate": 1.599622979377334e-05, "loss": 2.3734, "step": 1296 }, { "epoch": 0.95, "learning_rate": 1.5989898205836123e-05, "loss": 2.29, "step": 1297 }, { "epoch": 0.95, "learning_rate": 1.59835628707523e-05, "loss": 2.4459, "step": 1298 }, { "epoch": 0.95, "learning_rate": 1.597722379248512e-05, "loss": 2.3016, "step": 1299 }, { "epoch": 0.95, "learning_rate": 1.597088097500016e-05, "loss": 2.3029, "step": 1300 }, { "epoch": 0.95, "learning_rate": 1.596453442226535e-05, "loss": 2.3509, "step": 1301 }, { "epoch": 0.95, "learning_rate": 1.595818413825095e-05, "loss": 2.125, "step": 1302 }, { "epoch": 0.95, "learning_rate": 1.595183012692956e-05, "loss": 2.1624, "step": 1303 }, { "epoch": 0.96, "learning_rate": 1.59454723922761e-05, "loss": 2.3965, "step": 1304 }, { "epoch": 0.96, "learning_rate": 1.593911093826784e-05, "loss": 2.245, "step": 1305 }, { "epoch": 0.96, "learning_rate": 1.593274576888435e-05, "loss": 2.217, "step": 1306 }, { "epoch": 0.96, "learning_rate": 1.5926376888107545e-05, "loss": 2.3843, "step": 1307 }, { "epoch": 0.96, "learning_rate": 1.5920004299921652e-05, "loss": 2.2262, "step": 1308 }, { "epoch": 0.96, "learning_rate": 1.591362800831322e-05, "loss": 2.4262, "step": 1309 }, { "epoch": 0.96, "learning_rate": 1.5907248017271117e-05, "loss": 2.2693, "step": 1310 }, { "epoch": 0.96, "learning_rate": 1.590086433078652e-05, "loss": 2.193, "step": 1311 }, { "epoch": 0.96, "learning_rate": 1.589447695285292e-05, "loss": 2.3975, "step": 1312 }, { "epoch": 0.96, "learning_rate": 1.5888085887466123e-05, "loss": 2.2839, "step": 1313 }, { "epoch": 0.96, "learning_rate": 1.5881691138624236e-05, "loss": 2.2144, "step": 1314 }, { "epoch": 0.96, "learning_rate": 1.5875292710327666e-05, "loss": 2.3209, "step": 1315 }, { "epoch": 0.96, "learning_rate": 1.5868890606579134e-05, "loss": 2.1229, "step": 1316 }, { "epoch": 0.96, "learning_rate": 1.5862484831383643e-05, "loss": 2.2964, "step": 1317 }, { "epoch": 0.97, "learning_rate": 1.585607538874851e-05, "loss": 2.1936, "step": 1318 }, { "epoch": 0.97, "learning_rate": 1.5849662282683342e-05, "loss": 2.1328, "step": 1319 }, { "epoch": 0.97, "learning_rate": 1.5843245517200033e-05, "loss": 2.3721, "step": 1320 }, { "epoch": 0.97, "learning_rate": 1.5836825096312764e-05, "loss": 2.2131, "step": 1321 }, { "epoch": 0.97, "learning_rate": 1.5830401024038012e-05, "loss": 2.228, "step": 1322 }, { "epoch": 0.97, "learning_rate": 1.5823973304394526e-05, "loss": 2.248, "step": 1323 }, { "epoch": 0.97, "learning_rate": 1.5817541941403352e-05, "loss": 2.2678, "step": 1324 }, { "epoch": 0.97, "learning_rate": 1.5811106939087805e-05, "loss": 2.2304, "step": 1325 }, { "epoch": 0.97, "learning_rate": 1.5804668301473473e-05, "loss": 2.226, "step": 1326 }, { "epoch": 0.97, "learning_rate": 1.5798226032588236e-05, "loss": 2.1736, "step": 1327 }, { "epoch": 0.97, "learning_rate": 1.579178013646222e-05, "loss": 2.2927, "step": 1328 }, { "epoch": 0.97, "learning_rate": 1.5785330617127844e-05, "loss": 2.2141, "step": 1329 }, { "epoch": 0.97, "learning_rate": 1.5778877478619778e-05, "loss": 2.2414, "step": 1330 }, { "epoch": 0.98, "learning_rate": 1.577242072497496e-05, "loss": 2.2388, "step": 1331 }, { "epoch": 0.98, "learning_rate": 1.57659603602326e-05, "loss": 2.2863, "step": 1332 }, { "epoch": 0.98, "learning_rate": 1.5759496388434147e-05, "loss": 2.2894, "step": 1333 }, { "epoch": 0.98, "learning_rate": 1.5753028813623325e-05, "loss": 2.2824, "step": 1334 }, { "epoch": 0.98, "learning_rate": 1.5746557639846095e-05, "loss": 2.1936, "step": 1335 }, { "epoch": 0.98, "learning_rate": 1.5740082871150692e-05, "loss": 2.0511, "step": 1336 }, { "epoch": 0.98, "learning_rate": 1.5733604511587577e-05, "loss": 2.3932, "step": 1337 }, { "epoch": 0.98, "learning_rate": 1.5727122565209474e-05, "loss": 2.1522, "step": 1338 }, { "epoch": 0.98, "learning_rate": 1.5720637036071336e-05, "loss": 2.3741, "step": 1339 }, { "epoch": 0.98, "learning_rate": 1.5714147928230374e-05, "loss": 2.2943, "step": 1340 }, { "epoch": 0.98, "learning_rate": 1.5707655245746022e-05, "loss": 2.3844, "step": 1341 }, { "epoch": 0.98, "learning_rate": 1.5701158992679957e-05, "loss": 2.1784, "step": 1342 }, { "epoch": 0.98, "learning_rate": 1.5694659173096097e-05, "loss": 2.3915, "step": 1343 }, { "epoch": 0.98, "learning_rate": 1.5688155791060577e-05, "loss": 2.3351, "step": 1344 }, { "epoch": 0.99, "learning_rate": 1.5681648850641776e-05, "loss": 2.1259, "step": 1345 }, { "epoch": 0.99, "learning_rate": 1.567513835591028e-05, "loss": 2.1554, "step": 1346 }, { "epoch": 0.99, "learning_rate": 1.5668624310938914e-05, "loss": 2.3963, "step": 1347 }, { "epoch": 0.99, "learning_rate": 1.566210671980272e-05, "loss": 2.2937, "step": 1348 }, { "epoch": 0.99, "learning_rate": 1.5655585586578954e-05, "loss": 2.1801, "step": 1349 }, { "epoch": 0.99, "learning_rate": 1.56490609153471e-05, "loss": 2.2343, "step": 1350 }, { "epoch": 0.99, "learning_rate": 1.5642532710188838e-05, "loss": 2.3284, "step": 1351 }, { "epoch": 0.99, "learning_rate": 1.5636000975188067e-05, "loss": 2.2948, "step": 1352 }, { "epoch": 0.99, "learning_rate": 1.5629465714430906e-05, "loss": 2.3504, "step": 1353 }, { "epoch": 0.99, "learning_rate": 1.5622926932005653e-05, "loss": 2.3583, "step": 1354 }, { "epoch": 0.99, "learning_rate": 1.5616384632002834e-05, "loss": 2.2422, "step": 1355 }, { "epoch": 0.99, "learning_rate": 1.5609838818515162e-05, "loss": 2.2529, "step": 1356 }, { "epoch": 0.99, "learning_rate": 1.5603289495637555e-05, "loss": 2.3579, "step": 1357 }, { "epoch": 0.99, "learning_rate": 1.559673666746712e-05, "loss": 2.3048, "step": 1358 }, { "epoch": 1.0, "learning_rate": 1.559018033810316e-05, "loss": 2.1925, "step": 1359 }, { "epoch": 1.0, "learning_rate": 1.5583620511647174e-05, "loss": 2.364, "step": 1360 }, { "epoch": 1.0, "learning_rate": 1.5577057192202837e-05, "loss": 2.1969, "step": 1361 }, { "epoch": 1.0, "learning_rate": 1.5570490383876015e-05, "loss": 2.4151, "step": 1362 }, { "epoch": 1.0, "learning_rate": 1.556392009077476e-05, "loss": 2.3037, "step": 1363 }, { "epoch": 1.0, "learning_rate": 1.5557346317009297e-05, "loss": 2.1572, "step": 1364 }, { "epoch": 1.0, "learning_rate": 1.5550769066692036e-05, "loss": 2.2334, "step": 1365 }, { "epoch": 1.0, "learning_rate": 1.5544188343937555e-05, "loss": 2.2497, "step": 1366 }, { "epoch": 1.0, "learning_rate": 1.553760415286261e-05, "loss": 2.0167, "step": 1367 }, { "epoch": 1.0, "learning_rate": 1.553101649758612e-05, "loss": 1.854, "step": 1368 }, { "epoch": 1.0, "learning_rate": 1.552442538222918e-05, "loss": 1.9719, "step": 1369 }, { "epoch": 1.0, "learning_rate": 1.5517830810915043e-05, "loss": 1.901, "step": 1370 }, { "epoch": 1.0, "learning_rate": 1.5511232787769124e-05, "loss": 2.0184, "step": 1371 }, { "epoch": 1.01, "learning_rate": 1.5504631316919e-05, "loss": 1.9248, "step": 1372 }, { "epoch": 1.01, "learning_rate": 1.54980264024944e-05, "loss": 2.1254, "step": 1373 }, { "epoch": 1.01, "learning_rate": 1.5491418048627214e-05, "loss": 2.1749, "step": 1374 }, { "epoch": 1.01, "learning_rate": 1.5484806259451485e-05, "loss": 2.0648, "step": 1375 }, { "epoch": 1.01, "learning_rate": 1.5478191039103397e-05, "loss": 1.9985, "step": 1376 }, { "epoch": 1.01, "learning_rate": 1.5471572391721282e-05, "loss": 2.0514, "step": 1377 }, { "epoch": 1.01, "learning_rate": 1.5464950321445624e-05, "loss": 1.8766, "step": 1378 }, { "epoch": 1.01, "learning_rate": 1.5458324832419037e-05, "loss": 1.9221, "step": 1379 }, { "epoch": 1.01, "learning_rate": 1.545169592878628e-05, "loss": 2.091, "step": 1380 }, { "epoch": 1.01, "learning_rate": 1.5445063614694244e-05, "loss": 2.308, "step": 1381 }, { "epoch": 1.01, "learning_rate": 1.5438427894291966e-05, "loss": 2.1183, "step": 1382 }, { "epoch": 1.01, "learning_rate": 1.5431788771730597e-05, "loss": 1.8545, "step": 1383 }, { "epoch": 1.01, "learning_rate": 1.5425146251163424e-05, "loss": 2.0399, "step": 1384 }, { "epoch": 1.01, "learning_rate": 1.5418500336745864e-05, "loss": 2.0819, "step": 1385 }, { "epoch": 1.02, "learning_rate": 1.541185103263545e-05, "loss": 2.0888, "step": 1386 }, { "epoch": 1.02, "learning_rate": 1.5405198342991837e-05, "loss": 1.8876, "step": 1387 }, { "epoch": 1.02, "learning_rate": 1.53985422719768e-05, "loss": 2.0306, "step": 1388 }, { "epoch": 1.02, "learning_rate": 1.539188282375423e-05, "loss": 1.965, "step": 1389 }, { "epoch": 1.02, "learning_rate": 1.5385220002490125e-05, "loss": 2.011, "step": 1390 }, { "epoch": 1.02, "learning_rate": 1.5378553812352603e-05, "loss": 1.9263, "step": 1391 }, { "epoch": 1.02, "learning_rate": 1.537188425751188e-05, "loss": 2.0104, "step": 1392 }, { "epoch": 1.02, "learning_rate": 1.5365211342140284e-05, "loss": 1.8525, "step": 1393 }, { "epoch": 1.02, "learning_rate": 1.5358535070412237e-05, "loss": 1.8773, "step": 1394 }, { "epoch": 1.02, "learning_rate": 1.5351855446504268e-05, "loss": 2.2428, "step": 1395 }, { "epoch": 1.02, "learning_rate": 1.5345172474595003e-05, "loss": 2.0498, "step": 1396 }, { "epoch": 1.02, "learning_rate": 1.5338486158865157e-05, "loss": 2.0223, "step": 1397 }, { "epoch": 1.02, "learning_rate": 1.533179650349754e-05, "loss": 2.0261, "step": 1398 }, { "epoch": 1.02, "learning_rate": 1.532510351267705e-05, "loss": 2.0597, "step": 1399 }, { "epoch": 1.03, "learning_rate": 1.5318407190590678e-05, "loss": 2.1006, "step": 1400 }, { "epoch": 1.03, "learning_rate": 1.531170754142749e-05, "loss": 1.8787, "step": 1401 }, { "epoch": 1.03, "learning_rate": 1.530500456937863e-05, "loss": 2.0091, "step": 1402 }, { "epoch": 1.03, "learning_rate": 1.529829827863734e-05, "loss": 2.2702, "step": 1403 }, { "epoch": 1.03, "learning_rate": 1.529158867339892e-05, "loss": 2.1124, "step": 1404 }, { "epoch": 1.03, "learning_rate": 1.5284875757860752e-05, "loss": 2.0846, "step": 1405 }, { "epoch": 1.03, "learning_rate": 1.5278159536222278e-05, "loss": 1.8908, "step": 1406 }, { "epoch": 1.03, "learning_rate": 1.5271440012685027e-05, "loss": 2.0986, "step": 1407 }, { "epoch": 1.03, "learning_rate": 1.5264717191452576e-05, "loss": 2.1667, "step": 1408 }, { "epoch": 1.03, "learning_rate": 1.5257991076730574e-05, "loss": 2.1615, "step": 1409 }, { "epoch": 1.03, "learning_rate": 1.5251261672726727e-05, "loss": 2.04, "step": 1410 }, { "epoch": 1.03, "learning_rate": 1.5244528983650801e-05, "loss": 2.1472, "step": 1411 }, { "epoch": 1.03, "learning_rate": 1.5237793013714616e-05, "loss": 2.1107, "step": 1412 }, { "epoch": 1.04, "learning_rate": 1.5231053767132047e-05, "loss": 1.8154, "step": 1413 }, { "epoch": 1.04, "learning_rate": 1.5224311248119011e-05, "loss": 1.7919, "step": 1414 }, { "epoch": 1.04, "learning_rate": 1.5217565460893482e-05, "loss": 1.9107, "step": 1415 }, { "epoch": 1.04, "learning_rate": 1.5210816409675473e-05, "loss": 2.0086, "step": 1416 }, { "epoch": 1.04, "learning_rate": 1.5204064098687035e-05, "loss": 1.9323, "step": 1417 }, { "epoch": 1.04, "learning_rate": 1.5197308532152265e-05, "loss": 2.0034, "step": 1418 }, { "epoch": 1.04, "learning_rate": 1.5190549714297303e-05, "loss": 2.033, "step": 1419 }, { "epoch": 1.04, "learning_rate": 1.5183787649350304e-05, "loss": 2.2468, "step": 1420 }, { "epoch": 1.04, "learning_rate": 1.5177022341541473e-05, "loss": 2.0201, "step": 1421 }, { "epoch": 1.04, "learning_rate": 1.5170253795103025e-05, "loss": 1.7521, "step": 1422 }, { "epoch": 1.04, "learning_rate": 1.5163482014269223e-05, "loss": 1.9762, "step": 1423 }, { "epoch": 1.04, "learning_rate": 1.5156707003276335e-05, "loss": 1.9839, "step": 1424 }, { "epoch": 1.04, "learning_rate": 1.5149928766362658e-05, "loss": 2.1429, "step": 1425 }, { "epoch": 1.04, "learning_rate": 1.5143147307768504e-05, "loss": 2.0575, "step": 1426 }, { "epoch": 1.05, "learning_rate": 1.5136362631736207e-05, "loss": 2.2776, "step": 1427 }, { "epoch": 1.05, "learning_rate": 1.5129574742510107e-05, "loss": 1.9977, "step": 1428 }, { "epoch": 1.05, "learning_rate": 1.5122783644336554e-05, "loss": 1.9566, "step": 1429 }, { "epoch": 1.05, "learning_rate": 1.5115989341463908e-05, "loss": 2.0496, "step": 1430 }, { "epoch": 1.05, "learning_rate": 1.5109191838142537e-05, "loss": 1.9799, "step": 1431 }, { "epoch": 1.05, "learning_rate": 1.51023911386248e-05, "loss": 2.0757, "step": 1432 }, { "epoch": 1.05, "learning_rate": 1.5095587247165069e-05, "loss": 2.0658, "step": 1433 }, { "epoch": 1.05, "learning_rate": 1.5088780168019704e-05, "loss": 2.0555, "step": 1434 }, { "epoch": 1.05, "learning_rate": 1.5081969905447062e-05, "loss": 2.0196, "step": 1435 }, { "epoch": 1.05, "learning_rate": 1.5075156463707491e-05, "loss": 2.2649, "step": 1436 }, { "epoch": 1.05, "learning_rate": 1.506833984706333e-05, "loss": 1.8341, "step": 1437 }, { "epoch": 1.05, "learning_rate": 1.50615200597789e-05, "loss": 2.0076, "step": 1438 }, { "epoch": 1.05, "learning_rate": 1.5054697106120508e-05, "loss": 2.0674, "step": 1439 }, { "epoch": 1.05, "learning_rate": 1.5047870990356443e-05, "loss": 2.207, "step": 1440 }, { "epoch": 1.06, "learning_rate": 1.5041041716756972e-05, "loss": 1.9774, "step": 1441 }, { "epoch": 1.06, "learning_rate": 1.5034209289594328e-05, "loss": 1.921, "step": 1442 }, { "epoch": 1.06, "learning_rate": 1.5027373713142735e-05, "loss": 1.9557, "step": 1443 }, { "epoch": 1.06, "learning_rate": 1.5020534991678371e-05, "loss": 2.0816, "step": 1444 }, { "epoch": 1.06, "learning_rate": 1.5013693129479388e-05, "loss": 1.8598, "step": 1445 }, { "epoch": 1.06, "learning_rate": 1.5006848130825909e-05, "loss": 2.1936, "step": 1446 }, { "epoch": 1.06, "learning_rate": 1.5000000000000002e-05, "loss": 2.0569, "step": 1447 }, { "epoch": 1.06, "learning_rate": 1.499314874128571e-05, "loss": 2.0055, "step": 1448 }, { "epoch": 1.06, "learning_rate": 1.4986294358969029e-05, "loss": 2.25, "step": 1449 }, { "epoch": 1.06, "learning_rate": 1.4979436857337904e-05, "loss": 2.1686, "step": 1450 }, { "epoch": 1.06, "learning_rate": 1.4972576240682236e-05, "loss": 1.8496, "step": 1451 }, { "epoch": 1.06, "learning_rate": 1.4965712513293874e-05, "loss": 2.2, "step": 1452 }, { "epoch": 1.06, "learning_rate": 1.4958845679466614e-05, "loss": 2.0723, "step": 1453 }, { "epoch": 1.07, "learning_rate": 1.4951975743496194e-05, "loss": 1.9499, "step": 1454 }, { "epoch": 1.07, "learning_rate": 1.4945102709680291e-05, "loss": 1.9238, "step": 1455 }, { "epoch": 1.07, "learning_rate": 1.493822658231852e-05, "loss": 2.0574, "step": 1456 }, { "epoch": 1.07, "learning_rate": 1.4931347365712437e-05, "loss": 1.9431, "step": 1457 }, { "epoch": 1.07, "learning_rate": 1.492446506416552e-05, "loss": 2.0143, "step": 1458 }, { "epoch": 1.07, "learning_rate": 1.4917579681983192e-05, "loss": 2.0567, "step": 1459 }, { "epoch": 1.07, "learning_rate": 1.4910691223472789e-05, "loss": 2.0548, "step": 1460 }, { "epoch": 1.07, "learning_rate": 1.4903799692943575e-05, "loss": 2.0205, "step": 1461 }, { "epoch": 1.07, "learning_rate": 1.4896905094706742e-05, "loss": 1.9838, "step": 1462 }, { "epoch": 1.07, "learning_rate": 1.4890007433075395e-05, "loss": 2.254, "step": 1463 }, { "epoch": 1.07, "learning_rate": 1.4883106712364558e-05, "loss": 1.9087, "step": 1464 }, { "epoch": 1.07, "learning_rate": 1.4876202936891165e-05, "loss": 1.949, "step": 1465 }, { "epoch": 1.07, "learning_rate": 1.4869296110974065e-05, "loss": 1.9828, "step": 1466 }, { "epoch": 1.07, "learning_rate": 1.4862386238934017e-05, "loss": 2.2267, "step": 1467 }, { "epoch": 1.08, "learning_rate": 1.485547332509368e-05, "loss": 1.9, "step": 1468 }, { "epoch": 1.08, "learning_rate": 1.484855737377762e-05, "loss": 2.0714, "step": 1469 }, { "epoch": 1.08, "learning_rate": 1.4841638389312298e-05, "loss": 2.0314, "step": 1470 }, { "epoch": 1.08, "learning_rate": 1.4834716376026083e-05, "loss": 2.0343, "step": 1471 }, { "epoch": 1.08, "learning_rate": 1.4827791338249224e-05, "loss": 2.0662, "step": 1472 }, { "epoch": 1.08, "learning_rate": 1.4820863280313874e-05, "loss": 2.0106, "step": 1473 }, { "epoch": 1.08, "learning_rate": 1.4813932206554076e-05, "loss": 2.0329, "step": 1474 }, { "epoch": 1.08, "learning_rate": 1.4806998121305749e-05, "loss": 1.8883, "step": 1475 }, { "epoch": 1.08, "learning_rate": 1.4800061028906703e-05, "loss": 2.1193, "step": 1476 }, { "epoch": 1.08, "learning_rate": 1.4793120933696629e-05, "loss": 2.0579, "step": 1477 }, { "epoch": 1.08, "learning_rate": 1.4786177840017095e-05, "loss": 1.9221, "step": 1478 }, { "epoch": 1.08, "learning_rate": 1.4779231752211546e-05, "loss": 1.9376, "step": 1479 }, { "epoch": 1.08, "learning_rate": 1.4772282674625298e-05, "loss": 2.1942, "step": 1480 }, { "epoch": 1.08, "learning_rate": 1.476533061160554e-05, "loss": 2.0417, "step": 1481 }, { "epoch": 1.09, "learning_rate": 1.475837556750133e-05, "loss": 1.9637, "step": 1482 }, { "epoch": 1.09, "learning_rate": 1.4751417546663582e-05, "loss": 1.9155, "step": 1483 }, { "epoch": 1.09, "learning_rate": 1.4744456553445086e-05, "loss": 2.1915, "step": 1484 }, { "epoch": 1.09, "learning_rate": 1.473749259220048e-05, "loss": 2.1631, "step": 1485 }, { "epoch": 1.09, "learning_rate": 1.4730525667286261e-05, "loss": 1.9371, "step": 1486 }, { "epoch": 1.09, "learning_rate": 1.4723555783060783e-05, "loss": 2.0779, "step": 1487 }, { "epoch": 1.09, "learning_rate": 1.4716582943884254e-05, "loss": 2.1144, "step": 1488 }, { "epoch": 1.09, "learning_rate": 1.4709607154118719e-05, "loss": 2.0099, "step": 1489 }, { "epoch": 1.09, "learning_rate": 1.4702628418128084e-05, "loss": 1.9823, "step": 1490 }, { "epoch": 1.09, "learning_rate": 1.4695646740278084e-05, "loss": 2.0644, "step": 1491 }, { "epoch": 1.09, "learning_rate": 1.4688662124936306e-05, "loss": 2.0888, "step": 1492 }, { "epoch": 1.09, "learning_rate": 1.468167457647217e-05, "loss": 1.9328, "step": 1493 }, { "epoch": 1.09, "learning_rate": 1.4674684099256923e-05, "loss": 2.105, "step": 1494 }, { "epoch": 1.1, "learning_rate": 1.4667690697663657e-05, "loss": 2.0413, "step": 1495 }, { "epoch": 1.1, "learning_rate": 1.4660694376067287e-05, "loss": 2.2034, "step": 1496 }, { "epoch": 1.1, "learning_rate": 1.4653695138844556e-05, "loss": 1.9819, "step": 1497 }, { "epoch": 1.1, "learning_rate": 1.4646692990374032e-05, "loss": 2.0201, "step": 1498 }, { "epoch": 1.1, "learning_rate": 1.4639687935036097e-05, "loss": 1.9295, "step": 1499 }, { "epoch": 1.1, "learning_rate": 1.4632679977212965e-05, "loss": 2.0466, "step": 1500 }, { "epoch": 1.1, "learning_rate": 1.4625669121288653e-05, "loss": 1.9956, "step": 1501 }, { "epoch": 1.1, "learning_rate": 1.4618655371648997e-05, "loss": 2.0138, "step": 1502 }, { "epoch": 1.1, "learning_rate": 1.461163873268164e-05, "loss": 2.0505, "step": 1503 }, { "epoch": 1.1, "learning_rate": 1.4604619208776042e-05, "loss": 2.2131, "step": 1504 }, { "epoch": 1.1, "learning_rate": 1.459759680432345e-05, "loss": 2.0451, "step": 1505 }, { "epoch": 1.1, "learning_rate": 1.4590571523716932e-05, "loss": 2.0999, "step": 1506 }, { "epoch": 1.1, "learning_rate": 1.4583543371351346e-05, "loss": 2.013, "step": 1507 }, { "epoch": 1.1, "learning_rate": 1.4576512351623342e-05, "loss": 2.1241, "step": 1508 }, { "epoch": 1.11, "learning_rate": 1.4569478468931371e-05, "loss": 2.0168, "step": 1509 }, { "epoch": 1.11, "learning_rate": 1.4562441727675676e-05, "loss": 1.9461, "step": 1510 }, { "epoch": 1.11, "learning_rate": 1.4555402132258282e-05, "loss": 1.9999, "step": 1511 }, { "epoch": 1.11, "learning_rate": 1.4548359687083005e-05, "loss": 1.9828, "step": 1512 }, { "epoch": 1.11, "learning_rate": 1.454131439655544e-05, "loss": 1.9901, "step": 1513 }, { "epoch": 1.11, "learning_rate": 1.4534266265082962e-05, "loss": 1.9578, "step": 1514 }, { "epoch": 1.11, "learning_rate": 1.4527215297074729e-05, "loss": 1.9444, "step": 1515 }, { "epoch": 1.11, "learning_rate": 1.4520161496941667e-05, "loss": 2.0248, "step": 1516 }, { "epoch": 1.11, "learning_rate": 1.4513104869096473e-05, "loss": 2.1589, "step": 1517 }, { "epoch": 1.11, "learning_rate": 1.4506045417953623e-05, "loss": 2.0011, "step": 1518 }, { "epoch": 1.11, "learning_rate": 1.4498983147929342e-05, "loss": 2.0625, "step": 1519 }, { "epoch": 1.11, "learning_rate": 1.4491918063441636e-05, "loss": 1.9709, "step": 1520 }, { "epoch": 1.11, "learning_rate": 1.4484850168910264e-05, "loss": 1.9783, "step": 1521 }, { "epoch": 1.12, "learning_rate": 1.447777946875674e-05, "loss": 1.9571, "step": 1522 }, { "epoch": 1.12, "learning_rate": 1.447070596740434e-05, "loss": 1.9665, "step": 1523 }, { "epoch": 1.12, "learning_rate": 1.4463629669278083e-05, "loss": 2.0286, "step": 1524 }, { "epoch": 1.12, "learning_rate": 1.4456550578804746e-05, "loss": 1.9934, "step": 1525 }, { "epoch": 1.12, "learning_rate": 1.4449468700412854e-05, "loss": 1.867, "step": 1526 }, { "epoch": 1.12, "learning_rate": 1.4442384038532664e-05, "loss": 1.9427, "step": 1527 }, { "epoch": 1.12, "learning_rate": 1.4435296597596187e-05, "loss": 2.1461, "step": 1528 }, { "epoch": 1.12, "learning_rate": 1.4428206382037168e-05, "loss": 2.024, "step": 1529 }, { "epoch": 1.12, "learning_rate": 1.4421113396291087e-05, "loss": 2.216, "step": 1530 }, { "epoch": 1.12, "learning_rate": 1.4414017644795157e-05, "loss": 2.1286, "step": 1531 }, { "epoch": 1.12, "learning_rate": 1.4406919131988324e-05, "loss": 2.0417, "step": 1532 }, { "epoch": 1.12, "learning_rate": 1.4399817862311255e-05, "loss": 1.9494, "step": 1533 }, { "epoch": 1.12, "learning_rate": 1.4392713840206352e-05, "loss": 1.8219, "step": 1534 }, { "epoch": 1.12, "learning_rate": 1.4385607070117722e-05, "loss": 1.894, "step": 1535 }, { "epoch": 1.13, "learning_rate": 1.4378497556491212e-05, "loss": 2.0844, "step": 1536 }, { "epoch": 1.13, "learning_rate": 1.4371385303774372e-05, "loss": 1.9836, "step": 1537 }, { "epoch": 1.13, "learning_rate": 1.436427031641647e-05, "loss": 2.0817, "step": 1538 }, { "epoch": 1.13, "learning_rate": 1.4357152598868478e-05, "loss": 2.0174, "step": 1539 }, { "epoch": 1.13, "learning_rate": 1.4350032155583087e-05, "loss": 2.0674, "step": 1540 }, { "epoch": 1.13, "learning_rate": 1.4342908991014682e-05, "loss": 1.9489, "step": 1541 }, { "epoch": 1.13, "learning_rate": 1.4335783109619356e-05, "loss": 1.98, "step": 1542 }, { "epoch": 1.13, "learning_rate": 1.4328654515854911e-05, "loss": 1.9882, "step": 1543 }, { "epoch": 1.13, "learning_rate": 1.4321523214180823e-05, "loss": 2.242, "step": 1544 }, { "epoch": 1.13, "learning_rate": 1.4314389209058287e-05, "loss": 1.9897, "step": 1545 }, { "epoch": 1.13, "learning_rate": 1.4307252504950167e-05, "loss": 1.9199, "step": 1546 }, { "epoch": 1.13, "learning_rate": 1.4300113106321034e-05, "loss": 2.0647, "step": 1547 }, { "epoch": 1.13, "learning_rate": 1.4292971017637131e-05, "loss": 2.0033, "step": 1548 }, { "epoch": 1.13, "learning_rate": 1.428582624336639e-05, "loss": 2.0394, "step": 1549 }, { "epoch": 1.14, "learning_rate": 1.4278678787978424e-05, "loss": 2.0319, "step": 1550 }, { "epoch": 1.14, "learning_rate": 1.4271528655944522e-05, "loss": 1.8497, "step": 1551 }, { "epoch": 1.14, "learning_rate": 1.4264375851737644e-05, "loss": 1.8076, "step": 1552 }, { "epoch": 1.14, "learning_rate": 1.4257220379832426e-05, "loss": 1.9954, "step": 1553 }, { "epoch": 1.14, "learning_rate": 1.4250062244705174e-05, "loss": 1.8501, "step": 1554 }, { "epoch": 1.14, "learning_rate": 1.4242901450833855e-05, "loss": 1.9792, "step": 1555 }, { "epoch": 1.14, "learning_rate": 1.42357380026981e-05, "loss": 2.0795, "step": 1556 }, { "epoch": 1.14, "learning_rate": 1.4228571904779209e-05, "loss": 2.072, "step": 1557 }, { "epoch": 1.14, "learning_rate": 1.4221403161560126e-05, "loss": 2.2759, "step": 1558 }, { "epoch": 1.14, "learning_rate": 1.4214231777525461e-05, "loss": 1.9332, "step": 1559 }, { "epoch": 1.14, "learning_rate": 1.4207057757161465e-05, "loss": 2.2355, "step": 1560 }, { "epoch": 1.14, "learning_rate": 1.4199881104956053e-05, "loss": 1.8494, "step": 1561 }, { "epoch": 1.14, "learning_rate": 1.4192701825398775e-05, "loss": 2.0487, "step": 1562 }, { "epoch": 1.15, "learning_rate": 1.418551992298083e-05, "loss": 1.9971, "step": 1563 }, { "epoch": 1.15, "learning_rate": 1.4178335402195052e-05, "loss": 2.1512, "step": 1564 }, { "epoch": 1.15, "learning_rate": 1.417114826753592e-05, "loss": 2.063, "step": 1565 }, { "epoch": 1.15, "learning_rate": 1.4163958523499543e-05, "loss": 2.0669, "step": 1566 }, { "epoch": 1.15, "learning_rate": 1.4156766174583667e-05, "loss": 2.2459, "step": 1567 }, { "epoch": 1.15, "learning_rate": 1.4149571225287662e-05, "loss": 2.0185, "step": 1568 }, { "epoch": 1.15, "learning_rate": 1.4142373680112528e-05, "loss": 1.9533, "step": 1569 }, { "epoch": 1.15, "learning_rate": 1.4135173543560891e-05, "loss": 1.9463, "step": 1570 }, { "epoch": 1.15, "learning_rate": 1.4127970820136999e-05, "loss": 1.995, "step": 1571 }, { "epoch": 1.15, "learning_rate": 1.4120765514346703e-05, "loss": 2.0532, "step": 1572 }, { "epoch": 1.15, "learning_rate": 1.4113557630697492e-05, "loss": 1.7927, "step": 1573 }, { "epoch": 1.15, "learning_rate": 1.4106347173698454e-05, "loss": 1.932, "step": 1574 }, { "epoch": 1.15, "learning_rate": 1.4099134147860287e-05, "loss": 1.9373, "step": 1575 }, { "epoch": 1.15, "learning_rate": 1.4091918557695304e-05, "loss": 2.0992, "step": 1576 }, { "epoch": 1.16, "learning_rate": 1.4084700407717413e-05, "loss": 2.0015, "step": 1577 }, { "epoch": 1.16, "learning_rate": 1.4077479702442132e-05, "loss": 2.1714, "step": 1578 }, { "epoch": 1.16, "learning_rate": 1.4070256446386573e-05, "loss": 2.0097, "step": 1579 }, { "epoch": 1.16, "learning_rate": 1.4063030644069436e-05, "loss": 2.1564, "step": 1580 }, { "epoch": 1.16, "learning_rate": 1.4055802300011027e-05, "loss": 2.1757, "step": 1581 }, { "epoch": 1.16, "learning_rate": 1.4048571418733237e-05, "loss": 1.8546, "step": 1582 }, { "epoch": 1.16, "learning_rate": 1.4041338004759544e-05, "loss": 1.9997, "step": 1583 }, { "epoch": 1.16, "learning_rate": 1.4034102062615009e-05, "loss": 1.9687, "step": 1584 }, { "epoch": 1.16, "learning_rate": 1.4026863596826279e-05, "loss": 1.9946, "step": 1585 }, { "epoch": 1.16, "learning_rate": 1.4019622611921572e-05, "loss": 1.8729, "step": 1586 }, { "epoch": 1.16, "learning_rate": 1.4012379112430692e-05, "loss": 2.0041, "step": 1587 }, { "epoch": 1.16, "learning_rate": 1.4005133102885002e-05, "loss": 2.0897, "step": 1588 }, { "epoch": 1.16, "learning_rate": 1.399788458781745e-05, "loss": 2.0248, "step": 1589 }, { "epoch": 1.16, "learning_rate": 1.3990633571762547e-05, "loss": 2.0561, "step": 1590 }, { "epoch": 1.17, "learning_rate": 1.3983380059256363e-05, "loss": 2.0885, "step": 1591 }, { "epoch": 1.17, "learning_rate": 1.3976124054836536e-05, "loss": 1.9138, "step": 1592 }, { "epoch": 1.17, "learning_rate": 1.3968865563042255e-05, "loss": 2.2166, "step": 1593 }, { "epoch": 1.17, "learning_rate": 1.3961604588414277e-05, "loss": 1.9118, "step": 1594 }, { "epoch": 1.17, "learning_rate": 1.3954341135494904e-05, "loss": 1.9892, "step": 1595 }, { "epoch": 1.17, "learning_rate": 1.3947075208827986e-05, "loss": 1.9537, "step": 1596 }, { "epoch": 1.17, "learning_rate": 1.393980681295893e-05, "loss": 2.0164, "step": 1597 }, { "epoch": 1.17, "learning_rate": 1.3932535952434681e-05, "loss": 1.8532, "step": 1598 }, { "epoch": 1.17, "learning_rate": 1.3925262631803722e-05, "loss": 2.0411, "step": 1599 }, { "epoch": 1.17, "learning_rate": 1.3917986855616088e-05, "loss": 2.06, "step": 1600 }, { "epoch": 1.17, "learning_rate": 1.3910708628423338e-05, "loss": 1.8678, "step": 1601 }, { "epoch": 1.17, "learning_rate": 1.3903427954778566e-05, "loss": 2.0284, "step": 1602 }, { "epoch": 1.17, "learning_rate": 1.3896144839236401e-05, "loss": 1.9297, "step": 1603 }, { "epoch": 1.18, "learning_rate": 1.3888859286352998e-05, "loss": 2.0506, "step": 1604 }, { "epoch": 1.18, "learning_rate": 1.3881571300686037e-05, "loss": 1.9055, "step": 1605 }, { "epoch": 1.18, "learning_rate": 1.3874280886794717e-05, "loss": 1.8695, "step": 1606 }, { "epoch": 1.18, "learning_rate": 1.386698804923976e-05, "loss": 2.1046, "step": 1607 }, { "epoch": 1.18, "learning_rate": 1.3859692792583403e-05, "loss": 2.006, "step": 1608 }, { "epoch": 1.18, "learning_rate": 1.3852395121389397e-05, "loss": 1.9384, "step": 1609 }, { "epoch": 1.18, "learning_rate": 1.3845095040222996e-05, "loss": 2.0869, "step": 1610 }, { "epoch": 1.18, "learning_rate": 1.383779255365097e-05, "loss": 2.0683, "step": 1611 }, { "epoch": 1.18, "learning_rate": 1.3830487666241598e-05, "loss": 1.8499, "step": 1612 }, { "epoch": 1.18, "learning_rate": 1.3823180382564645e-05, "loss": 2.1217, "step": 1613 }, { "epoch": 1.18, "learning_rate": 1.3815870707191393e-05, "loss": 2.118, "step": 1614 }, { "epoch": 1.18, "learning_rate": 1.3808558644694605e-05, "loss": 2.0188, "step": 1615 }, { "epoch": 1.18, "learning_rate": 1.3801244199648549e-05, "loss": 1.8827, "step": 1616 }, { "epoch": 1.18, "learning_rate": 1.3793927376628977e-05, "loss": 1.942, "step": 1617 }, { "epoch": 1.19, "learning_rate": 1.3786608180213129e-05, "loss": 2.0242, "step": 1618 }, { "epoch": 1.19, "learning_rate": 1.377928661497973e-05, "loss": 1.9969, "step": 1619 }, { "epoch": 1.19, "learning_rate": 1.3771962685508991e-05, "loss": 2.047, "step": 1620 }, { "epoch": 1.19, "learning_rate": 1.3764636396382595e-05, "loss": 1.9177, "step": 1621 }, { "epoch": 1.19, "learning_rate": 1.3757307752183708e-05, "loss": 1.9539, "step": 1622 }, { "epoch": 1.19, "learning_rate": 1.3749976757496969e-05, "loss": 2.1443, "step": 1623 }, { "epoch": 1.19, "learning_rate": 1.3742643416908477e-05, "loss": 1.9493, "step": 1624 }, { "epoch": 1.19, "learning_rate": 1.3735307735005811e-05, "loss": 2.0444, "step": 1625 }, { "epoch": 1.19, "learning_rate": 1.3727969716378012e-05, "loss": 1.8985, "step": 1626 }, { "epoch": 1.19, "learning_rate": 1.3720629365615573e-05, "loss": 1.8107, "step": 1627 }, { "epoch": 1.19, "learning_rate": 1.3713286687310461e-05, "loss": 2.1825, "step": 1628 }, { "epoch": 1.19, "learning_rate": 1.3705941686056086e-05, "loss": 2.187, "step": 1629 }, { "epoch": 1.19, "learning_rate": 1.369859436644732e-05, "loss": 2.009, "step": 1630 }, { "epoch": 1.19, "learning_rate": 1.3691244733080484e-05, "loss": 3.0687, "step": 1631 }, { "epoch": 1.2, "learning_rate": 1.3683892790553344e-05, "loss": 1.9666, "step": 1632 }, { "epoch": 1.2, "learning_rate": 1.3676538543465105e-05, "loss": 2.2578, "step": 1633 }, { "epoch": 1.2, "learning_rate": 1.3669181996416427e-05, "loss": 1.9028, "step": 1634 }, { "epoch": 1.2, "learning_rate": 1.3661823154009397e-05, "loss": 1.8256, "step": 1635 }, { "epoch": 1.2, "learning_rate": 1.3654462020847548e-05, "loss": 2.1917, "step": 1636 }, { "epoch": 1.2, "learning_rate": 1.3647098601535831e-05, "loss": 2.0386, "step": 1637 }, { "epoch": 1.2, "learning_rate": 1.3639732900680646e-05, "loss": 1.9319, "step": 1638 }, { "epoch": 1.2, "learning_rate": 1.363236492288981e-05, "loss": 1.8646, "step": 1639 }, { "epoch": 1.2, "learning_rate": 1.3624994672772562e-05, "loss": 2.104, "step": 1640 }, { "epoch": 1.2, "learning_rate": 1.3617622154939565e-05, "loss": 1.8404, "step": 1641 }, { "epoch": 1.2, "learning_rate": 1.3610247374002904e-05, "loss": 1.8821, "step": 1642 }, { "epoch": 1.2, "learning_rate": 1.3602870334576079e-05, "loss": 1.797, "step": 1643 }, { "epoch": 1.2, "learning_rate": 1.3595491041273999e-05, "loss": 2.1744, "step": 1644 }, { "epoch": 1.21, "learning_rate": 1.3588109498712985e-05, "loss": 1.9962, "step": 1645 }, { "epoch": 1.21, "learning_rate": 1.3580725711510763e-05, "loss": 2.0217, "step": 1646 }, { "epoch": 1.21, "learning_rate": 1.3573339684286472e-05, "loss": 2.0986, "step": 1647 }, { "epoch": 1.21, "learning_rate": 1.3565951421660644e-05, "loss": 2.045, "step": 1648 }, { "epoch": 1.21, "learning_rate": 1.3558560928255202e-05, "loss": 2.0644, "step": 1649 }, { "epoch": 1.21, "learning_rate": 1.3551168208693486e-05, "loss": 2.0669, "step": 1650 }, { "epoch": 1.21, "learning_rate": 1.3543773267600209e-05, "loss": 1.9238, "step": 1651 }, { "epoch": 1.21, "learning_rate": 1.3536376109601483e-05, "loss": 2.0555, "step": 1652 }, { "epoch": 1.21, "learning_rate": 1.3528976739324807e-05, "loss": 1.8676, "step": 1653 }, { "epoch": 1.21, "learning_rate": 1.3521575161399056e-05, "loss": 1.9345, "step": 1654 }, { "epoch": 1.21, "learning_rate": 1.35141713804545e-05, "loss": 1.8823, "step": 1655 }, { "epoch": 1.21, "learning_rate": 1.3506765401122774e-05, "loss": 1.7788, "step": 1656 }, { "epoch": 1.21, "learning_rate": 1.3499357228036893e-05, "loss": 2.1428, "step": 1657 }, { "epoch": 1.21, "learning_rate": 1.3491946865831246e-05, "loss": 2.1481, "step": 1658 }, { "epoch": 1.22, "learning_rate": 1.3484534319141592e-05, "loss": 1.9213, "step": 1659 }, { "epoch": 1.22, "learning_rate": 1.347711959260505e-05, "loss": 2.0192, "step": 1660 }, { "epoch": 1.22, "learning_rate": 1.3469702690860114e-05, "loss": 2.0507, "step": 1661 }, { "epoch": 1.22, "learning_rate": 1.3462283618546623e-05, "loss": 2.0154, "step": 1662 }, { "epoch": 1.22, "learning_rate": 1.3454862380305796e-05, "loss": 2.0336, "step": 1663 }, { "epoch": 1.22, "learning_rate": 1.3447438980780181e-05, "loss": 1.8933, "step": 1664 }, { "epoch": 1.22, "learning_rate": 1.3440013424613699e-05, "loss": 1.9731, "step": 1665 }, { "epoch": 1.22, "learning_rate": 1.3432585716451607e-05, "loss": 2.0061, "step": 1666 }, { "epoch": 1.22, "learning_rate": 1.3425155860940517e-05, "loss": 1.9448, "step": 1667 }, { "epoch": 1.22, "learning_rate": 1.341772386272838e-05, "loss": 2.1518, "step": 1668 }, { "epoch": 1.22, "learning_rate": 1.3410289726464491e-05, "loss": 2.0441, "step": 1669 }, { "epoch": 1.22, "learning_rate": 1.3402853456799474e-05, "loss": 1.9829, "step": 1670 }, { "epoch": 1.22, "learning_rate": 1.3395415058385297e-05, "loss": 1.99, "step": 1671 }, { "epoch": 1.22, "learning_rate": 1.3387974535875254e-05, "loss": 1.9576, "step": 1672 }, { "epoch": 1.23, "learning_rate": 1.3380531893923971e-05, "loss": 2.1519, "step": 1673 }, { "epoch": 1.23, "learning_rate": 1.33730871371874e-05, "loss": 1.9035, "step": 1674 }, { "epoch": 1.23, "learning_rate": 1.336564027032281e-05, "loss": 1.9106, "step": 1675 }, { "epoch": 1.23, "learning_rate": 1.33581912979888e-05, "loss": 2.0887, "step": 1676 }, { "epoch": 1.23, "learning_rate": 1.3350740224845277e-05, "loss": 1.9558, "step": 1677 }, { "epoch": 1.23, "learning_rate": 1.3343287055553472e-05, "loss": 2.0347, "step": 1678 }, { "epoch": 1.23, "learning_rate": 1.3335831794775914e-05, "loss": 1.8899, "step": 1679 }, { "epoch": 1.23, "learning_rate": 1.3328374447176448e-05, "loss": 2.1024, "step": 1680 }, { "epoch": 1.23, "learning_rate": 1.3320915017420233e-05, "loss": 1.906, "step": 1681 }, { "epoch": 1.23, "learning_rate": 1.3313453510173711e-05, "loss": 1.9418, "step": 1682 }, { "epoch": 1.23, "learning_rate": 1.3305989930104639e-05, "loss": 2.1334, "step": 1683 }, { "epoch": 1.23, "learning_rate": 1.3298524281882069e-05, "loss": 2.1325, "step": 1684 }, { "epoch": 1.23, "learning_rate": 1.3291056570176337e-05, "loss": 1.9628, "step": 1685 }, { "epoch": 1.24, "learning_rate": 1.3283586799659083e-05, "loss": 2.1124, "step": 1686 }, { "epoch": 1.24, "learning_rate": 1.3276114975003227e-05, "loss": 2.0169, "step": 1687 }, { "epoch": 1.24, "learning_rate": 1.3268641100882973e-05, "loss": 1.9871, "step": 1688 }, { "epoch": 1.24, "learning_rate": 1.3261165181973814e-05, "loss": 1.8791, "step": 1689 }, { "epoch": 1.24, "learning_rate": 1.3253687222952513e-05, "loss": 1.8355, "step": 1690 }, { "epoch": 1.24, "learning_rate": 1.3246207228497122e-05, "loss": 1.911, "step": 1691 }, { "epoch": 1.24, "learning_rate": 1.3238725203286951e-05, "loss": 1.8923, "step": 1692 }, { "epoch": 1.24, "learning_rate": 1.3231241152002587e-05, "loss": 1.8182, "step": 1693 }, { "epoch": 1.24, "learning_rate": 1.3223755079325895e-05, "loss": 1.9728, "step": 1694 }, { "epoch": 1.24, "learning_rate": 1.3216266989939987e-05, "loss": 1.8828, "step": 1695 }, { "epoch": 1.24, "learning_rate": 1.3208776888529244e-05, "loss": 1.8747, "step": 1696 }, { "epoch": 1.24, "learning_rate": 1.3201284779779314e-05, "loss": 2.125, "step": 1697 }, { "epoch": 1.24, "learning_rate": 1.3193790668377082e-05, "loss": 1.8086, "step": 1698 }, { "epoch": 1.24, "learning_rate": 1.3186294559010705e-05, "loss": 2.1279, "step": 1699 }, { "epoch": 1.25, "learning_rate": 1.317879645636958e-05, "loss": 2.1698, "step": 1700 }, { "epoch": 1.25, "learning_rate": 1.3171296365144351e-05, "loss": 1.9497, "step": 1701 }, { "epoch": 1.25, "learning_rate": 1.3163794290026908e-05, "loss": 2.15, "step": 1702 }, { "epoch": 1.25, "learning_rate": 1.3156290235710384e-05, "loss": 1.8961, "step": 1703 }, { "epoch": 1.25, "learning_rate": 1.3148784206889144e-05, "loss": 2.001, "step": 1704 }, { "epoch": 1.25, "learning_rate": 1.3141276208258798e-05, "loss": 1.967, "step": 1705 }, { "epoch": 1.25, "learning_rate": 1.3133766244516175e-05, "loss": 1.9199, "step": 1706 }, { "epoch": 1.25, "learning_rate": 1.3126254320359344e-05, "loss": 1.9158, "step": 1707 }, { "epoch": 1.25, "learning_rate": 1.3118740440487598e-05, "loss": 1.9241, "step": 1708 }, { "epoch": 1.25, "learning_rate": 1.3111224609601453e-05, "loss": 1.9824, "step": 1709 }, { "epoch": 1.25, "learning_rate": 1.3103706832402643e-05, "loss": 2.0926, "step": 1710 }, { "epoch": 1.25, "learning_rate": 1.3096187113594126e-05, "loss": 1.8518, "step": 1711 }, { "epoch": 1.25, "learning_rate": 1.3088665457880061e-05, "loss": 1.9418, "step": 1712 }, { "epoch": 1.25, "learning_rate": 1.3081141869965834e-05, "loss": 2.0463, "step": 1713 }, { "epoch": 1.26, "learning_rate": 1.3073616354558036e-05, "loss": 2.1483, "step": 1714 }, { "epoch": 1.26, "learning_rate": 1.3066088916364458e-05, "loss": 1.9279, "step": 1715 }, { "epoch": 1.26, "learning_rate": 1.3058559560094097e-05, "loss": 2.1172, "step": 1716 }, { "epoch": 1.26, "learning_rate": 1.3051028290457153e-05, "loss": 2.0232, "step": 1717 }, { "epoch": 1.26, "learning_rate": 1.3043495112165019e-05, "loss": 1.7542, "step": 1718 }, { "epoch": 1.26, "learning_rate": 1.303596002993028e-05, "loss": 1.8701, "step": 1719 }, { "epoch": 1.26, "learning_rate": 1.3028423048466719e-05, "loss": 2.0007, "step": 1720 }, { "epoch": 1.26, "learning_rate": 1.3020884172489299e-05, "loss": 2.031, "step": 1721 }, { "epoch": 1.26, "learning_rate": 1.3013343406714181e-05, "loss": 1.899, "step": 1722 }, { "epoch": 1.26, "learning_rate": 1.3005800755858688e-05, "loss": 2.1647, "step": 1723 }, { "epoch": 1.26, "learning_rate": 1.2998256224641346e-05, "loss": 2.1139, "step": 1724 }, { "epoch": 1.26, "learning_rate": 1.2990709817781839e-05, "loss": 2.2365, "step": 1725 }, { "epoch": 1.26, "learning_rate": 1.2983161540001029e-05, "loss": 2.1204, "step": 1726 }, { "epoch": 1.27, "learning_rate": 1.2975611396020952e-05, "loss": 2.0411, "step": 1727 }, { "epoch": 1.27, "learning_rate": 1.2968059390564813e-05, "loss": 2.0152, "step": 1728 }, { "epoch": 1.27, "learning_rate": 1.2960505528356973e-05, "loss": 1.9725, "step": 1729 }, { "epoch": 1.27, "learning_rate": 1.2952949814122961e-05, "loss": 1.9302, "step": 1730 }, { "epoch": 1.27, "learning_rate": 1.2945392252589466e-05, "loss": 1.962, "step": 1731 }, { "epoch": 1.27, "learning_rate": 1.2937832848484324e-05, "loss": 1.896, "step": 1732 }, { "epoch": 1.27, "learning_rate": 1.2930271606536539e-05, "loss": 1.8969, "step": 1733 }, { "epoch": 1.27, "learning_rate": 1.2922708531476245e-05, "loss": 1.9996, "step": 1734 }, { "epoch": 1.27, "learning_rate": 1.2915143628034736e-05, "loss": 1.9669, "step": 1735 }, { "epoch": 1.27, "learning_rate": 1.2907576900944449e-05, "loss": 2.1259, "step": 1736 }, { "epoch": 1.27, "learning_rate": 1.290000835493896e-05, "loss": 2.1306, "step": 1737 }, { "epoch": 1.27, "learning_rate": 1.289243799475298e-05, "loss": 2.2128, "step": 1738 }, { "epoch": 1.27, "learning_rate": 1.2884865825122357e-05, "loss": 2.0517, "step": 1739 }, { "epoch": 1.27, "learning_rate": 1.287729185078407e-05, "loss": 1.9975, "step": 1740 }, { "epoch": 1.28, "learning_rate": 1.2869716076476237e-05, "loss": 1.8725, "step": 1741 }, { "epoch": 1.28, "learning_rate": 1.2862138506938085e-05, "loss": 2.004, "step": 1742 }, { "epoch": 1.28, "learning_rate": 1.285455914690997e-05, "loss": 1.9994, "step": 1743 }, { "epoch": 1.28, "learning_rate": 1.2846978001133378e-05, "loss": 1.896, "step": 1744 }, { "epoch": 1.28, "learning_rate": 1.2839395074350903e-05, "loss": 1.967, "step": 1745 }, { "epoch": 1.28, "learning_rate": 1.2831810371306247e-05, "loss": 1.793, "step": 1746 }, { "epoch": 1.28, "learning_rate": 1.2824223896744243e-05, "loss": 2.0395, "step": 1747 }, { "epoch": 1.28, "learning_rate": 1.2816635655410812e-05, "loss": 2.1212, "step": 1748 }, { "epoch": 1.28, "learning_rate": 1.2809045652052992e-05, "loss": 1.9114, "step": 1749 }, { "epoch": 1.28, "learning_rate": 1.2801453891418918e-05, "loss": 1.9512, "step": 1750 }, { "epoch": 1.28, "learning_rate": 1.2793860378257825e-05, "loss": 2.0541, "step": 1751 }, { "epoch": 1.28, "learning_rate": 1.2786265117320047e-05, "loss": 2.0075, "step": 1752 }, { "epoch": 1.28, "learning_rate": 1.2778668113357009e-05, "loss": 1.9889, "step": 1753 }, { "epoch": 1.28, "learning_rate": 1.2771069371121225e-05, "loss": 2.0162, "step": 1754 }, { "epoch": 1.29, "learning_rate": 1.2763468895366304e-05, "loss": 2.05, "step": 1755 }, { "epoch": 1.29, "learning_rate": 1.2755866690846927e-05, "loss": 2.0668, "step": 1756 }, { "epoch": 1.29, "learning_rate": 1.2748262762318872e-05, "loss": 1.9893, "step": 1757 }, { "epoch": 1.29, "learning_rate": 1.274065711453898e-05, "loss": 1.9822, "step": 1758 }, { "epoch": 1.29, "learning_rate": 1.2733049752265177e-05, "loss": 1.9605, "step": 1759 }, { "epoch": 1.29, "learning_rate": 1.2725440680256457e-05, "loss": 2.0432, "step": 1760 }, { "epoch": 1.29, "learning_rate": 1.2717829903272889e-05, "loss": 1.8067, "step": 1761 }, { "epoch": 1.29, "learning_rate": 1.2710217426075601e-05, "loss": 2.0946, "step": 1762 }, { "epoch": 1.29, "learning_rate": 1.2702603253426795e-05, "loss": 2.0684, "step": 1763 }, { "epoch": 1.29, "learning_rate": 1.2694987390089723e-05, "loss": 2.0349, "step": 1764 }, { "epoch": 1.29, "learning_rate": 1.2687369840828695e-05, "loss": 1.8545, "step": 1765 }, { "epoch": 1.29, "learning_rate": 1.2679750610409091e-05, "loss": 1.9331, "step": 1766 }, { "epoch": 1.29, "learning_rate": 1.2672129703597321e-05, "loss": 2.1279, "step": 1767 }, { "epoch": 1.3, "learning_rate": 1.2664507125160856e-05, "loss": 2.0524, "step": 1768 }, { "epoch": 1.3, "learning_rate": 1.265688287986822e-05, "loss": 1.9099, "step": 1769 }, { "epoch": 1.3, "learning_rate": 1.2649256972488956e-05, "loss": 2.1007, "step": 1770 }, { "epoch": 1.3, "learning_rate": 1.2641629407793676e-05, "loss": 1.91, "step": 1771 }, { "epoch": 1.3, "learning_rate": 1.2634000190554005e-05, "loss": 2.1023, "step": 1772 }, { "epoch": 1.3, "learning_rate": 1.262636932554261e-05, "loss": 2.035, "step": 1773 }, { "epoch": 1.3, "learning_rate": 1.2618736817533199e-05, "loss": 1.8452, "step": 1774 }, { "epoch": 1.3, "learning_rate": 1.2611102671300488e-05, "loss": 2.0989, "step": 1775 }, { "epoch": 1.3, "learning_rate": 1.2603466891620232e-05, "loss": 2.1537, "step": 1776 }, { "epoch": 1.3, "learning_rate": 1.259582948326921e-05, "loss": 2.078, "step": 1777 }, { "epoch": 1.3, "learning_rate": 1.2588190451025209e-05, "loss": 2.0113, "step": 1778 }, { "epoch": 1.3, "learning_rate": 1.2580549799667034e-05, "loss": 2.0501, "step": 1779 }, { "epoch": 1.3, "learning_rate": 1.2572907533974517e-05, "loss": 1.8365, "step": 1780 }, { "epoch": 1.3, "learning_rate": 1.2565263658728477e-05, "loss": 2.0272, "step": 1781 }, { "epoch": 1.31, "learning_rate": 1.2557618178710756e-05, "loss": 1.9429, "step": 1782 }, { "epoch": 1.31, "learning_rate": 1.2549971098704196e-05, "loss": 1.9319, "step": 1783 }, { "epoch": 1.31, "learning_rate": 1.2542322423492638e-05, "loss": 1.9946, "step": 1784 }, { "epoch": 1.31, "learning_rate": 1.2534672157860928e-05, "loss": 2.2356, "step": 1785 }, { "epoch": 1.31, "learning_rate": 1.2527020306594895e-05, "loss": 2.1346, "step": 1786 }, { "epoch": 1.31, "learning_rate": 1.2519366874481366e-05, "loss": 2.1373, "step": 1787 }, { "epoch": 1.31, "learning_rate": 1.2511711866308167e-05, "loss": 2.01, "step": 1788 }, { "epoch": 1.31, "learning_rate": 1.2504055286864087e-05, "loss": 2.0068, "step": 1789 }, { "epoch": 1.31, "learning_rate": 1.2496397140938918e-05, "loss": 2.1036, "step": 1790 }, { "epoch": 1.31, "learning_rate": 1.2488737433323429e-05, "loss": 2.0034, "step": 1791 }, { "epoch": 1.31, "learning_rate": 1.2481076168809352e-05, "loss": 1.9399, "step": 1792 }, { "epoch": 1.31, "learning_rate": 1.247341335218941e-05, "loss": 1.9491, "step": 1793 }, { "epoch": 1.31, "learning_rate": 1.246574898825729e-05, "loss": 1.9579, "step": 1794 }, { "epoch": 1.32, "learning_rate": 1.2458083081807645e-05, "loss": 2.0759, "step": 1795 }, { "epoch": 1.32, "learning_rate": 1.2450415637636097e-05, "loss": 2.0005, "step": 1796 }, { "epoch": 1.32, "learning_rate": 1.2442746660539226e-05, "loss": 1.9983, "step": 1797 }, { "epoch": 1.32, "learning_rate": 1.2435076155314572e-05, "loss": 2.1149, "step": 1798 }, { "epoch": 1.32, "learning_rate": 1.2427404126760634e-05, "loss": 2.0736, "step": 1799 }, { "epoch": 1.32, "learning_rate": 1.2419730579676861e-05, "loss": 2.1671, "step": 1800 }, { "epoch": 1.32, "learning_rate": 1.241205551886365e-05, "loss": 2.1298, "step": 1801 }, { "epoch": 1.32, "learning_rate": 1.2404378949122355e-05, "loss": 1.8711, "step": 1802 }, { "epoch": 1.32, "learning_rate": 1.2396700875255263e-05, "loss": 1.9345, "step": 1803 }, { "epoch": 1.32, "learning_rate": 1.23890213020656e-05, "loss": 1.9835, "step": 1804 }, { "epoch": 1.32, "learning_rate": 1.2381340234357544e-05, "loss": 1.9551, "step": 1805 }, { "epoch": 1.32, "learning_rate": 1.2373657676936194e-05, "loss": 2.0346, "step": 1806 }, { "epoch": 1.32, "learning_rate": 1.2365973634607588e-05, "loss": 1.8884, "step": 1807 }, { "epoch": 1.32, "learning_rate": 1.2358288112178694e-05, "loss": 1.8989, "step": 1808 }, { "epoch": 1.33, "learning_rate": 1.2350601114457397e-05, "loss": 1.9809, "step": 1809 }, { "epoch": 1.33, "learning_rate": 1.2342912646252518e-05, "loss": 1.8844, "step": 1810 }, { "epoch": 1.33, "learning_rate": 1.233522271237379e-05, "loss": 1.8547, "step": 1811 }, { "epoch": 1.33, "learning_rate": 1.2327531317631858e-05, "loss": 2.1425, "step": 1812 }, { "epoch": 1.33, "learning_rate": 1.2319838466838295e-05, "loss": 1.9771, "step": 1813 }, { "epoch": 1.33, "learning_rate": 1.2312144164805568e-05, "loss": 2.047, "step": 1814 }, { "epoch": 1.33, "learning_rate": 1.2304448416347066e-05, "loss": 2.0401, "step": 1815 }, { "epoch": 1.33, "learning_rate": 1.229675122627708e-05, "loss": 2.0045, "step": 1816 }, { "epoch": 1.33, "learning_rate": 1.2289052599410793e-05, "loss": 2.0183, "step": 1817 }, { "epoch": 1.33, "learning_rate": 1.2281352540564302e-05, "loss": 2.016, "step": 1818 }, { "epoch": 1.33, "learning_rate": 1.227365105455459e-05, "loss": 1.9788, "step": 1819 }, { "epoch": 1.33, "learning_rate": 1.2265948146199527e-05, "loss": 1.9991, "step": 1820 }, { "epoch": 1.33, "learning_rate": 1.225824382031789e-05, "loss": 1.9932, "step": 1821 }, { "epoch": 1.33, "learning_rate": 1.2250538081729326e-05, "loss": 1.805, "step": 1822 }, { "epoch": 1.34, "learning_rate": 1.224283093525438e-05, "loss": 1.9359, "step": 1823 }, { "epoch": 1.34, "learning_rate": 1.2235122385714467e-05, "loss": 2.0468, "step": 1824 }, { "epoch": 1.34, "learning_rate": 1.222741243793188e-05, "loss": 1.9435, "step": 1825 }, { "epoch": 1.34, "learning_rate": 1.22197010967298e-05, "loss": 2.0433, "step": 1826 }, { "epoch": 1.34, "learning_rate": 1.2211988366932262e-05, "loss": 2.0987, "step": 1827 }, { "epoch": 1.34, "learning_rate": 1.2204274253364178e-05, "loss": 2.1046, "step": 1828 }, { "epoch": 1.34, "learning_rate": 1.2196558760851327e-05, "loss": 2.0157, "step": 1829 }, { "epoch": 1.34, "learning_rate": 1.2188841894220351e-05, "loss": 1.9361, "step": 1830 }, { "epoch": 1.34, "learning_rate": 1.2181123658298745e-05, "loss": 2.0065, "step": 1831 }, { "epoch": 1.34, "learning_rate": 1.217340405791487e-05, "loss": 2.1047, "step": 1832 }, { "epoch": 1.34, "learning_rate": 1.2165683097897931e-05, "loss": 2.0334, "step": 1833 }, { "epoch": 1.34, "learning_rate": 1.2157960783077994e-05, "loss": 1.9468, "step": 1834 }, { "epoch": 1.34, "learning_rate": 1.215023711828596e-05, "loss": 1.8983, "step": 1835 }, { "epoch": 1.35, "learning_rate": 1.2142512108353583e-05, "loss": 2.1194, "step": 1836 }, { "epoch": 1.35, "learning_rate": 1.2134785758113458e-05, "loss": 2.119, "step": 1837 }, { "epoch": 1.35, "learning_rate": 1.2127058072399021e-05, "loss": 2.0391, "step": 1838 }, { "epoch": 1.35, "learning_rate": 1.2119329056044533e-05, "loss": 1.9531, "step": 1839 }, { "epoch": 1.35, "learning_rate": 1.21115987138851e-05, "loss": 1.975, "step": 1840 }, { "epoch": 1.35, "learning_rate": 1.2103867050756646e-05, "loss": 2.1578, "step": 1841 }, { "epoch": 1.35, "learning_rate": 1.209613407149593e-05, "loss": 1.9508, "step": 1842 }, { "epoch": 1.35, "learning_rate": 1.2088399780940533e-05, "loss": 1.7991, "step": 1843 }, { "epoch": 1.35, "learning_rate": 1.208066418392885e-05, "loss": 2.1528, "step": 1844 }, { "epoch": 1.35, "learning_rate": 1.2072927285300099e-05, "loss": 2.0821, "step": 1845 }, { "epoch": 1.35, "learning_rate": 1.2065189089894313e-05, "loss": 2.0108, "step": 1846 }, { "epoch": 1.35, "learning_rate": 1.2057449602552333e-05, "loss": 1.799, "step": 1847 }, { "epoch": 1.35, "learning_rate": 1.2049708828115811e-05, "loss": 1.9412, "step": 1848 }, { "epoch": 1.35, "learning_rate": 1.2041966771427203e-05, "loss": 1.8505, "step": 1849 }, { "epoch": 1.36, "learning_rate": 1.2034223437329764e-05, "loss": 2.1811, "step": 1850 }, { "epoch": 1.36, "learning_rate": 1.2026478830667551e-05, "loss": 2.0273, "step": 1851 }, { "epoch": 1.36, "learning_rate": 1.2018732956285422e-05, "loss": 1.8773, "step": 1852 }, { "epoch": 1.36, "learning_rate": 1.2010985819029014e-05, "loss": 1.8032, "step": 1853 }, { "epoch": 1.36, "learning_rate": 1.2003237423744773e-05, "loss": 2.1094, "step": 1854 }, { "epoch": 1.36, "learning_rate": 1.1995487775279917e-05, "loss": 2.0714, "step": 1855 }, { "epoch": 1.36, "learning_rate": 1.1987736878482453e-05, "loss": 1.8671, "step": 1856 }, { "epoch": 1.36, "learning_rate": 1.1979984738201172e-05, "loss": 2.0279, "step": 1857 }, { "epoch": 1.36, "learning_rate": 1.1972231359285635e-05, "loss": 2.2438, "step": 1858 }, { "epoch": 1.36, "learning_rate": 1.1964476746586186e-05, "loss": 2.1528, "step": 1859 }, { "epoch": 1.36, "learning_rate": 1.195672090495394e-05, "loss": 2.1754, "step": 1860 }, { "epoch": 1.36, "learning_rate": 1.1948963839240772e-05, "loss": 2.0578, "step": 1861 }, { "epoch": 1.36, "learning_rate": 1.1941205554299334e-05, "loss": 1.8997, "step": 1862 }, { "epoch": 1.36, "learning_rate": 1.1933446054983035e-05, "loss": 2.0028, "step": 1863 }, { "epoch": 1.37, "learning_rate": 1.1925685346146044e-05, "loss": 1.8885, "step": 1864 }, { "epoch": 1.37, "learning_rate": 1.1917923432643289e-05, "loss": 2.038, "step": 1865 }, { "epoch": 1.37, "learning_rate": 1.1910160319330446e-05, "loss": 1.8932, "step": 1866 }, { "epoch": 1.37, "learning_rate": 1.1902396011063948e-05, "loss": 1.9233, "step": 1867 }, { "epoch": 1.37, "learning_rate": 1.1894630512700976e-05, "loss": 1.9372, "step": 1868 }, { "epoch": 1.37, "learning_rate": 1.1886863829099442e-05, "loss": 1.8323, "step": 1869 }, { "epoch": 1.37, "learning_rate": 1.187909596511802e-05, "loss": 2.0883, "step": 1870 }, { "epoch": 1.37, "learning_rate": 1.1871326925616111e-05, "loss": 1.9053, "step": 1871 }, { "epoch": 1.37, "learning_rate": 1.1863556715453847e-05, "loss": 1.7928, "step": 1872 }, { "epoch": 1.37, "learning_rate": 1.1855785339492108e-05, "loss": 2.1635, "step": 1873 }, { "epoch": 1.37, "learning_rate": 1.1848012802592483e-05, "loss": 2.1588, "step": 1874 }, { "epoch": 1.37, "learning_rate": 1.1840239109617302e-05, "loss": 1.8459, "step": 1875 }, { "epoch": 1.37, "learning_rate": 1.1832464265429614e-05, "loss": 1.9853, "step": 1876 }, { "epoch": 1.38, "learning_rate": 1.1824688274893185e-05, "loss": 1.8969, "step": 1877 }, { "epoch": 1.38, "learning_rate": 1.1816911142872507e-05, "loss": 2.1751, "step": 1878 }, { "epoch": 1.38, "learning_rate": 1.1809132874232775e-05, "loss": 2.0232, "step": 1879 }, { "epoch": 1.38, "learning_rate": 1.1801353473839902e-05, "loss": 1.8647, "step": 1880 }, { "epoch": 1.38, "learning_rate": 1.179357294656051e-05, "loss": 2.2059, "step": 1881 }, { "epoch": 1.38, "learning_rate": 1.1785791297261917e-05, "loss": 2.0779, "step": 1882 }, { "epoch": 1.38, "learning_rate": 1.1778008530812155e-05, "loss": 2.0199, "step": 1883 }, { "epoch": 1.38, "learning_rate": 1.1770224652079943e-05, "loss": 2.0617, "step": 1884 }, { "epoch": 1.38, "learning_rate": 1.1762439665934709e-05, "loss": 2.0258, "step": 1885 }, { "epoch": 1.38, "learning_rate": 1.175465357724656e-05, "loss": 2.0644, "step": 1886 }, { "epoch": 1.38, "learning_rate": 1.1746866390886304e-05, "loss": 1.937, "step": 1887 }, { "epoch": 1.38, "learning_rate": 1.1739078111725429e-05, "loss": 1.9512, "step": 1888 }, { "epoch": 1.38, "learning_rate": 1.1731288744636106e-05, "loss": 1.9324, "step": 1889 }, { "epoch": 1.38, "learning_rate": 1.1723498294491196e-05, "loss": 1.9389, "step": 1890 }, { "epoch": 1.39, "learning_rate": 1.1715706766164223e-05, "loss": 1.9946, "step": 1891 }, { "epoch": 1.39, "learning_rate": 1.1707914164529398e-05, "loss": 1.9227, "step": 1892 }, { "epoch": 1.39, "learning_rate": 1.1700120494461595e-05, "loss": 2.01, "step": 1893 }, { "epoch": 1.39, "learning_rate": 1.1692325760836366e-05, "loss": 2.0732, "step": 1894 }, { "epoch": 1.39, "learning_rate": 1.1684529968529916e-05, "loss": 2.1251, "step": 1895 }, { "epoch": 1.39, "learning_rate": 1.1676733122419124e-05, "loss": 2.0848, "step": 1896 }, { "epoch": 1.39, "learning_rate": 1.1668935227381517e-05, "loss": 1.9832, "step": 1897 }, { "epoch": 1.39, "learning_rate": 1.1661136288295288e-05, "loss": 2.0907, "step": 1898 }, { "epoch": 1.39, "learning_rate": 1.165333631003928e-05, "loss": 1.9153, "step": 1899 }, { "epoch": 1.39, "learning_rate": 1.1645535297492982e-05, "loss": 2.0165, "step": 1900 }, { "epoch": 1.39, "learning_rate": 1.1637733255536539e-05, "loss": 1.9364, "step": 1901 }, { "epoch": 1.39, "learning_rate": 1.1629930189050724e-05, "loss": 1.9598, "step": 1902 }, { "epoch": 1.39, "learning_rate": 1.1622126102916971e-05, "loss": 1.9687, "step": 1903 }, { "epoch": 1.39, "learning_rate": 1.1614321002017342e-05, "loss": 2.0158, "step": 1904 }, { "epoch": 1.4, "learning_rate": 1.1606514891234526e-05, "loss": 2.1448, "step": 1905 }, { "epoch": 1.4, "learning_rate": 1.1598707775451853e-05, "loss": 2.117, "step": 1906 }, { "epoch": 1.4, "learning_rate": 1.159089965955329e-05, "loss": 1.8543, "step": 1907 }, { "epoch": 1.4, "learning_rate": 1.158309054842341e-05, "loss": 1.8398, "step": 1908 }, { "epoch": 1.4, "learning_rate": 1.1575280446947425e-05, "loss": 2.1176, "step": 1909 }, { "epoch": 1.4, "learning_rate": 1.1567469360011155e-05, "loss": 2.0414, "step": 1910 }, { "epoch": 1.4, "learning_rate": 1.1559657292501044e-05, "loss": 2.0022, "step": 1911 }, { "epoch": 1.4, "learning_rate": 1.1551844249304147e-05, "loss": 2.0445, "step": 1912 }, { "epoch": 1.4, "learning_rate": 1.154403023530813e-05, "loss": 1.9602, "step": 1913 }, { "epoch": 1.4, "learning_rate": 1.1536215255401265e-05, "loss": 2.1525, "step": 1914 }, { "epoch": 1.4, "learning_rate": 1.1528399314472429e-05, "loss": 1.9582, "step": 1915 }, { "epoch": 1.4, "learning_rate": 1.15205824174111e-05, "loss": 2.1383, "step": 1916 }, { "epoch": 1.4, "learning_rate": 1.151276456910735e-05, "loss": 1.937, "step": 1917 }, { "epoch": 1.41, "learning_rate": 1.1504945774451862e-05, "loss": 1.8241, "step": 1918 }, { "epoch": 1.41, "learning_rate": 1.1497126038335891e-05, "loss": 1.9676, "step": 1919 }, { "epoch": 1.41, "learning_rate": 1.1489305365651286e-05, "loss": 2.2148, "step": 1920 }, { "epoch": 1.41, "learning_rate": 1.1481483761290496e-05, "loss": 1.9866, "step": 1921 }, { "epoch": 1.41, "learning_rate": 1.1473661230146531e-05, "loss": 1.984, "step": 1922 }, { "epoch": 1.41, "learning_rate": 1.1465837777113e-05, "loss": 1.9511, "step": 1923 }, { "epoch": 1.41, "learning_rate": 1.1458013407084075e-05, "loss": 1.8605, "step": 1924 }, { "epoch": 1.41, "learning_rate": 1.1450188124954512e-05, "loss": 1.8295, "step": 1925 }, { "epoch": 1.41, "learning_rate": 1.1442361935619628e-05, "loss": 1.8875, "step": 1926 }, { "epoch": 1.41, "learning_rate": 1.1434534843975318e-05, "loss": 1.897, "step": 1927 }, { "epoch": 1.41, "learning_rate": 1.1426706854918028e-05, "loss": 1.9223, "step": 1928 }, { "epoch": 1.41, "learning_rate": 1.1418877973344781e-05, "loss": 1.981, "step": 1929 }, { "epoch": 1.41, "learning_rate": 1.1411048204153147e-05, "loss": 1.9218, "step": 1930 }, { "epoch": 1.41, "learning_rate": 1.1403217552241254e-05, "loss": 1.9843, "step": 1931 }, { "epoch": 1.42, "learning_rate": 1.1395386022507786e-05, "loss": 2.0351, "step": 1932 }, { "epoch": 1.42, "learning_rate": 1.1387553619851969e-05, "loss": 2.0117, "step": 1933 }, { "epoch": 1.42, "learning_rate": 1.1379720349173586e-05, "loss": 1.7959, "step": 1934 }, { "epoch": 1.42, "learning_rate": 1.1371886215372952e-05, "loss": 1.8886, "step": 1935 }, { "epoch": 1.42, "learning_rate": 1.136405122335092e-05, "loss": 1.9286, "step": 1936 }, { "epoch": 1.42, "learning_rate": 1.1356215378008898e-05, "loss": 1.9627, "step": 1937 }, { "epoch": 1.42, "learning_rate": 1.1348378684248806e-05, "loss": 1.9015, "step": 1938 }, { "epoch": 1.42, "learning_rate": 1.134054114697311e-05, "loss": 1.8669, "step": 1939 }, { "epoch": 1.42, "learning_rate": 1.13327027710848e-05, "loss": 1.9101, "step": 1940 }, { "epoch": 1.42, "learning_rate": 1.1324863561487383e-05, "loss": 2.1009, "step": 1941 }, { "epoch": 1.42, "learning_rate": 1.1317023523084898e-05, "loss": 1.9558, "step": 1942 }, { "epoch": 1.42, "learning_rate": 1.1309182660781899e-05, "loss": 2.0208, "step": 1943 }, { "epoch": 1.42, "learning_rate": 1.1301340979483448e-05, "loss": 2.1556, "step": 1944 }, { "epoch": 1.42, "learning_rate": 1.1293498484095134e-05, "loss": 1.997, "step": 1945 }, { "epoch": 1.43, "learning_rate": 1.128565517952304e-05, "loss": 2.2433, "step": 1946 }, { "epoch": 1.43, "learning_rate": 1.1277811070673765e-05, "loss": 1.9879, "step": 1947 }, { "epoch": 1.43, "learning_rate": 1.1269966162454412e-05, "loss": 1.883, "step": 1948 }, { "epoch": 1.43, "learning_rate": 1.1262120459772574e-05, "loss": 1.8826, "step": 1949 }, { "epoch": 1.43, "learning_rate": 1.1254273967536352e-05, "loss": 1.9187, "step": 1950 }, { "epoch": 1.43, "learning_rate": 1.1246426690654335e-05, "loss": 2.0955, "step": 1951 }, { "epoch": 1.43, "learning_rate": 1.1238578634035604e-05, "loss": 2.0329, "step": 1952 }, { "epoch": 1.43, "learning_rate": 1.1230729802589727e-05, "loss": 1.9525, "step": 1953 }, { "epoch": 1.43, "learning_rate": 1.122288020122676e-05, "loss": 1.8602, "step": 1954 }, { "epoch": 1.43, "learning_rate": 1.1215029834857236e-05, "loss": 1.9588, "step": 1955 }, { "epoch": 1.43, "learning_rate": 1.1207178708392173e-05, "loss": 2.1249, "step": 1956 }, { "epoch": 1.43, "learning_rate": 1.1199326826743055e-05, "loss": 1.9736, "step": 1957 }, { "epoch": 1.43, "learning_rate": 1.119147419482185e-05, "loss": 1.9684, "step": 1958 }, { "epoch": 1.44, "learning_rate": 1.1183620817540985e-05, "loss": 2.0251, "step": 1959 }, { "epoch": 1.44, "learning_rate": 1.1175766699813355e-05, "loss": 2.0288, "step": 1960 }, { "epoch": 1.44, "learning_rate": 1.1167911846552326e-05, "loss": 1.9625, "step": 1961 }, { "epoch": 1.44, "learning_rate": 1.1160056262671718e-05, "loss": 1.9486, "step": 1962 }, { "epoch": 1.44, "learning_rate": 1.1152199953085808e-05, "loss": 1.9257, "step": 1963 }, { "epoch": 1.44, "learning_rate": 1.1144342922709326e-05, "loss": 1.947, "step": 1964 }, { "epoch": 1.44, "learning_rate": 1.113648517645746e-05, "loss": 2.0194, "step": 1965 }, { "epoch": 1.44, "learning_rate": 1.1128626719245835e-05, "loss": 2.1919, "step": 1966 }, { "epoch": 1.44, "learning_rate": 1.1120767555990528e-05, "loss": 1.7747, "step": 1967 }, { "epoch": 1.44, "learning_rate": 1.1112907691608059e-05, "loss": 1.995, "step": 1968 }, { "epoch": 1.44, "learning_rate": 1.1105047131015378e-05, "loss": 1.884, "step": 1969 }, { "epoch": 1.44, "learning_rate": 1.1097185879129884e-05, "loss": 1.9481, "step": 1970 }, { "epoch": 1.44, "learning_rate": 1.1089323940869392e-05, "loss": 1.955, "step": 1971 }, { "epoch": 1.44, "learning_rate": 1.108146132115216e-05, "loss": 2.1263, "step": 1972 }, { "epoch": 1.45, "learning_rate": 1.107359802489687e-05, "loss": 2.1154, "step": 1973 }, { "epoch": 1.45, "learning_rate": 1.1065734057022622e-05, "loss": 1.9521, "step": 1974 }, { "epoch": 1.45, "learning_rate": 1.1057869422448934e-05, "loss": 1.9798, "step": 1975 }, { "epoch": 1.45, "learning_rate": 1.1050004126095755e-05, "loss": 1.9428, "step": 1976 }, { "epoch": 1.45, "learning_rate": 1.104213817288343e-05, "loss": 1.934, "step": 1977 }, { "epoch": 1.45, "learning_rate": 1.1034271567732728e-05, "loss": 1.9923, "step": 1978 }, { "epoch": 1.45, "learning_rate": 1.1026404315564822e-05, "loss": 1.9848, "step": 1979 }, { "epoch": 1.45, "learning_rate": 1.1018536421301287e-05, "loss": 2.042, "step": 1980 }, { "epoch": 1.45, "learning_rate": 1.1010667889864106e-05, "loss": 2.0386, "step": 1981 }, { "epoch": 1.45, "learning_rate": 1.1002798726175654e-05, "loss": 2.0582, "step": 1982 }, { "epoch": 1.45, "learning_rate": 1.0994928935158703e-05, "loss": 1.8663, "step": 1983 }, { "epoch": 1.45, "learning_rate": 1.0987058521736422e-05, "loss": 2.0507, "step": 1984 }, { "epoch": 1.45, "learning_rate": 1.097918749083236e-05, "loss": 1.855, "step": 1985 }, { "epoch": 1.45, "learning_rate": 1.0971315847370463e-05, "loss": 1.9139, "step": 1986 }, { "epoch": 1.46, "learning_rate": 1.0963443596275057e-05, "loss": 1.8852, "step": 1987 }, { "epoch": 1.46, "learning_rate": 1.0955570742470842e-05, "loss": 1.8363, "step": 1988 }, { "epoch": 1.46, "learning_rate": 1.0947697290882903e-05, "loss": 1.9261, "step": 1989 }, { "epoch": 1.46, "learning_rate": 1.0939823246436693e-05, "loss": 1.91, "step": 1990 }, { "epoch": 1.46, "learning_rate": 1.0931948614058042e-05, "loss": 2.0014, "step": 1991 }, { "epoch": 1.46, "learning_rate": 1.092407339867314e-05, "loss": 1.9382, "step": 1992 }, { "epoch": 1.46, "learning_rate": 1.0916197605208547e-05, "loss": 1.9901, "step": 1993 }, { "epoch": 1.46, "learning_rate": 1.0908321238591185e-05, "loss": 1.891, "step": 1994 }, { "epoch": 1.46, "learning_rate": 1.0900444303748333e-05, "loss": 1.9424, "step": 1995 }, { "epoch": 1.46, "learning_rate": 1.0892566805607624e-05, "loss": 1.9157, "step": 1996 }, { "epoch": 1.46, "learning_rate": 1.0884688749097048e-05, "loss": 1.9645, "step": 1997 }, { "epoch": 1.46, "learning_rate": 1.087681013914494e-05, "loss": 1.9274, "step": 1998 }, { "epoch": 1.46, "learning_rate": 1.086893098067998e-05, "loss": 1.9154, "step": 1999 }, { "epoch": 1.47, "learning_rate": 1.0861051278631194e-05, "loss": 1.854, "step": 2000 }, { "epoch": 1.47, "learning_rate": 1.0853171037927952e-05, "loss": 2.0462, "step": 2001 }, { "epoch": 1.47, "learning_rate": 1.084529026349995e-05, "loss": 2.1246, "step": 2002 }, { "epoch": 1.47, "learning_rate": 1.0837408960277233e-05, "loss": 1.9904, "step": 2003 }, { "epoch": 1.47, "learning_rate": 1.0829527133190163e-05, "loss": 1.9713, "step": 2004 }, { "epoch": 1.47, "learning_rate": 1.0821644787169436e-05, "loss": 1.941, "step": 2005 }, { "epoch": 1.47, "learning_rate": 1.0813761927146072e-05, "loss": 1.9422, "step": 2006 }, { "epoch": 1.47, "learning_rate": 1.080587855805141e-05, "loss": 1.9034, "step": 2007 }, { "epoch": 1.47, "learning_rate": 1.079799468481711e-05, "loss": 2.0222, "step": 2008 }, { "epoch": 1.47, "learning_rate": 1.0790110312375152e-05, "loss": 2.0904, "step": 2009 }, { "epoch": 1.47, "learning_rate": 1.0782225445657818e-05, "loss": 2.1518, "step": 2010 }, { "epoch": 1.47, "learning_rate": 1.0774340089597711e-05, "loss": 1.9044, "step": 2011 }, { "epoch": 1.47, "learning_rate": 1.0766454249127725e-05, "loss": 1.9909, "step": 2012 }, { "epoch": 1.47, "learning_rate": 1.0758567929181074e-05, "loss": 2.0306, "step": 2013 }, { "epoch": 1.48, "learning_rate": 1.0750681134691259e-05, "loss": 1.9337, "step": 2014 }, { "epoch": 1.48, "learning_rate": 1.074279387059208e-05, "loss": 1.8912, "step": 2015 }, { "epoch": 1.48, "learning_rate": 1.0734906141817638e-05, "loss": 1.9519, "step": 2016 }, { "epoch": 1.48, "learning_rate": 1.0727017953302323e-05, "loss": 1.7415, "step": 2017 }, { "epoch": 1.48, "learning_rate": 1.07191293099808e-05, "loss": 1.776, "step": 2018 }, { "epoch": 1.48, "learning_rate": 1.0711240216788036e-05, "loss": 1.9651, "step": 2019 }, { "epoch": 1.48, "learning_rate": 1.0703350678659275e-05, "loss": 1.8408, "step": 2020 }, { "epoch": 1.48, "learning_rate": 1.0695460700530022e-05, "loss": 1.9509, "step": 2021 }, { "epoch": 1.48, "learning_rate": 1.0687570287336082e-05, "loss": 2.0783, "step": 2022 }, { "epoch": 1.48, "learning_rate": 1.067967944401352e-05, "loss": 1.9682, "step": 2023 }, { "epoch": 1.48, "learning_rate": 1.067178817549867e-05, "loss": 1.9994, "step": 2024 }, { "epoch": 1.48, "learning_rate": 1.0663896486728134e-05, "loss": 2.0783, "step": 2025 }, { "epoch": 1.48, "learning_rate": 1.0656004382638774e-05, "loss": 2.0178, "step": 2026 }, { "epoch": 1.48, "learning_rate": 1.0648111868167716e-05, "loss": 2.0805, "step": 2027 }, { "epoch": 1.49, "learning_rate": 1.0640218948252345e-05, "loss": 1.7947, "step": 2028 }, { "epoch": 1.49, "learning_rate": 1.0632325627830289e-05, "loss": 2.1146, "step": 2029 }, { "epoch": 1.49, "learning_rate": 1.0624431911839434e-05, "loss": 2.1877, "step": 2030 }, { "epoch": 1.49, "learning_rate": 1.0616537805217918e-05, "loss": 2.057, "step": 2031 }, { "epoch": 1.49, "learning_rate": 1.060864331290411e-05, "loss": 1.98, "step": 2032 }, { "epoch": 1.49, "learning_rate": 1.0600748439836631e-05, "loss": 1.9619, "step": 2033 }, { "epoch": 1.49, "learning_rate": 1.0592853190954345e-05, "loss": 1.8674, "step": 2034 }, { "epoch": 1.49, "learning_rate": 1.0584957571196331e-05, "loss": 1.9113, "step": 2035 }, { "epoch": 1.49, "learning_rate": 1.0577061585501922e-05, "loss": 1.996, "step": 2036 }, { "epoch": 1.49, "learning_rate": 1.0569165238810666e-05, "loss": 1.7819, "step": 2037 }, { "epoch": 1.49, "learning_rate": 1.0561268536062342e-05, "loss": 1.9495, "step": 2038 }, { "epoch": 1.49, "learning_rate": 1.055337148219695e-05, "loss": 1.8352, "step": 2039 }, { "epoch": 1.49, "learning_rate": 1.0545474082154711e-05, "loss": 1.8979, "step": 2040 }, { "epoch": 1.5, "learning_rate": 1.0537576340876063e-05, "loss": 1.7999, "step": 2041 }, { "epoch": 1.5, "learning_rate": 1.0529678263301659e-05, "loss": 2.2351, "step": 2042 }, { "epoch": 1.5, "learning_rate": 1.0521779854372353e-05, "loss": 1.9396, "step": 2043 }, { "epoch": 1.5, "learning_rate": 1.0513881119029223e-05, "loss": 1.9128, "step": 2044 }, { "epoch": 1.5, "learning_rate": 1.0505982062213536e-05, "loss": 1.9682, "step": 2045 }, { "epoch": 1.5, "learning_rate": 1.0498082688866762e-05, "loss": 1.8335, "step": 2046 }, { "epoch": 1.5, "learning_rate": 1.0490183003930579e-05, "loss": 1.9524, "step": 2047 }, { "epoch": 1.5, "learning_rate": 1.0482283012346848e-05, "loss": 1.9833, "step": 2048 }, { "epoch": 1.5, "learning_rate": 1.0474382719057633e-05, "loss": 2.1011, "step": 2049 }, { "epoch": 1.5, "learning_rate": 1.0466482129005177e-05, "loss": 1.9917, "step": 2050 }, { "epoch": 1.5, "learning_rate": 1.0458581247131918e-05, "loss": 2.117, "step": 2051 }, { "epoch": 1.5, "learning_rate": 1.0450680078380465e-05, "loss": 2.0558, "step": 2052 }, { "epoch": 1.5, "learning_rate": 1.0442778627693617e-05, "loss": 1.842, "step": 2053 }, { "epoch": 1.5, "learning_rate": 1.0434876900014341e-05, "loss": 1.9437, "step": 2054 }, { "epoch": 1.51, "learning_rate": 1.0426974900285784e-05, "loss": 1.9603, "step": 2055 }, { "epoch": 1.51, "learning_rate": 1.0419072633451264e-05, "loss": 1.9584, "step": 2056 }, { "epoch": 1.51, "learning_rate": 1.0411170104454256e-05, "loss": 1.8934, "step": 2057 }, { "epoch": 1.51, "learning_rate": 1.040326731823841e-05, "loss": 1.9609, "step": 2058 }, { "epoch": 1.51, "learning_rate": 1.0395364279747536e-05, "loss": 2.1649, "step": 2059 }, { "epoch": 1.51, "learning_rate": 1.0387460993925588e-05, "loss": 1.8018, "step": 2060 }, { "epoch": 1.51, "learning_rate": 1.0379557465716696e-05, "loss": 1.8574, "step": 2061 }, { "epoch": 1.51, "learning_rate": 1.0371653700065124e-05, "loss": 1.9446, "step": 2062 }, { "epoch": 1.51, "learning_rate": 1.0363749701915291e-05, "loss": 2.1915, "step": 2063 }, { "epoch": 1.51, "learning_rate": 1.035584547621177e-05, "loss": 2.0961, "step": 2064 }, { "epoch": 1.51, "learning_rate": 1.0347941027899258e-05, "loss": 2.0349, "step": 2065 }, { "epoch": 1.51, "learning_rate": 1.0340036361922609e-05, "loss": 1.8688, "step": 2066 }, { "epoch": 1.51, "learning_rate": 1.0332131483226805e-05, "loss": 2.0881, "step": 2067 }, { "epoch": 1.52, "learning_rate": 1.0324226396756954e-05, "loss": 1.9843, "step": 2068 }, { "epoch": 1.52, "learning_rate": 1.0316321107458307e-05, "loss": 2.1201, "step": 2069 }, { "epoch": 1.52, "learning_rate": 1.030841562027624e-05, "loss": 1.8094, "step": 2070 }, { "epoch": 1.52, "learning_rate": 1.0300509940156243e-05, "loss": 2.0308, "step": 2071 }, { "epoch": 1.52, "learning_rate": 1.0292604072043936e-05, "loss": 1.8089, "step": 2072 }, { "epoch": 1.52, "learning_rate": 1.0284698020885054e-05, "loss": 1.9271, "step": 2073 }, { "epoch": 1.52, "learning_rate": 1.0276791791625444e-05, "loss": 2.1594, "step": 2074 }, { "epoch": 1.52, "learning_rate": 1.0268885389211073e-05, "loss": 2.2345, "step": 2075 }, { "epoch": 1.52, "learning_rate": 1.0260978818588e-05, "loss": 1.8087, "step": 2076 }, { "epoch": 1.52, "learning_rate": 1.0253072084702404e-05, "loss": 1.9609, "step": 2077 }, { "epoch": 1.52, "learning_rate": 1.0245165192500564e-05, "loss": 2.0407, "step": 2078 }, { "epoch": 1.52, "learning_rate": 1.0237258146928849e-05, "loss": 1.8403, "step": 2079 }, { "epoch": 1.52, "learning_rate": 1.0229350952933737e-05, "loss": 1.9326, "step": 2080 }, { "epoch": 1.52, "learning_rate": 1.0221443615461784e-05, "loss": 2.0596, "step": 2081 }, { "epoch": 1.53, "learning_rate": 1.0213536139459651e-05, "loss": 1.8542, "step": 2082 }, { "epoch": 1.53, "learning_rate": 1.0205628529874077e-05, "loss": 2.0601, "step": 2083 }, { "epoch": 1.53, "learning_rate": 1.0197720791651884e-05, "loss": 1.8914, "step": 2084 }, { "epoch": 1.53, "learning_rate": 1.0189812929739976e-05, "loss": 2.0366, "step": 2085 }, { "epoch": 1.53, "learning_rate": 1.018190494908534e-05, "loss": 2.2147, "step": 2086 }, { "epoch": 1.53, "learning_rate": 1.0173996854635024e-05, "loss": 1.9756, "step": 2087 }, { "epoch": 1.53, "learning_rate": 1.016608865133616e-05, "loss": 1.8391, "step": 2088 }, { "epoch": 1.53, "learning_rate": 1.0158180344135948e-05, "loss": 1.9483, "step": 2089 }, { "epoch": 1.53, "learning_rate": 1.0150271937981644e-05, "loss": 1.8839, "step": 2090 }, { "epoch": 1.53, "learning_rate": 1.0142363437820566e-05, "loss": 1.9491, "step": 2091 }, { "epoch": 1.53, "learning_rate": 1.0134454848600102e-05, "loss": 1.9713, "step": 2092 }, { "epoch": 1.53, "learning_rate": 1.0126546175267683e-05, "loss": 1.8647, "step": 2093 }, { "epoch": 1.53, "learning_rate": 1.0118637422770803e-05, "loss": 2.0348, "step": 2094 }, { "epoch": 1.53, "learning_rate": 1.0110728596057e-05, "loss": 2.0685, "step": 2095 }, { "epoch": 1.54, "learning_rate": 1.0102819700073859e-05, "loss": 2.0859, "step": 2096 }, { "epoch": 1.54, "learning_rate": 1.0094910739769007e-05, "loss": 1.9422, "step": 2097 }, { "epoch": 1.54, "learning_rate": 1.008700172009012e-05, "loss": 1.9316, "step": 2098 }, { "epoch": 1.54, "learning_rate": 1.0079092645984894e-05, "loss": 2.0572, "step": 2099 }, { "epoch": 1.54, "learning_rate": 1.0071183522401078e-05, "loss": 1.784, "step": 2100 }, { "epoch": 1.54, "learning_rate": 1.0063274354286436e-05, "loss": 2.1043, "step": 2101 }, { "epoch": 1.54, "learning_rate": 1.0055365146588772e-05, "loss": 2.0185, "step": 2102 }, { "epoch": 1.54, "learning_rate": 1.004745590425591e-05, "loss": 2.0662, "step": 2103 }, { "epoch": 1.54, "learning_rate": 1.0039546632235689e-05, "loss": 1.9971, "step": 2104 }, { "epoch": 1.54, "learning_rate": 1.0031637335475981e-05, "loss": 1.9029, "step": 2105 }, { "epoch": 1.54, "learning_rate": 1.0023728018924663e-05, "loss": 2.2556, "step": 2106 }, { "epoch": 1.54, "learning_rate": 1.0015818687529624e-05, "loss": 2.0451, "step": 2107 }, { "epoch": 1.54, "learning_rate": 1.0007909346238769e-05, "loss": 2.1451, "step": 2108 }, { "epoch": 1.55, "learning_rate": 1e-05, "loss": 2.0563, "step": 2109 }, { "epoch": 1.55, "learning_rate": 9.992090653761233e-06, "loss": 2.1319, "step": 2110 }, { "epoch": 1.55, "learning_rate": 9.98418131247038e-06, "loss": 2.0872, "step": 2111 }, { "epoch": 1.55, "learning_rate": 9.976271981075339e-06, "loss": 2.1106, "step": 2112 }, { "epoch": 1.55, "learning_rate": 9.968362664524022e-06, "loss": 2.0471, "step": 2113 }, { "epoch": 1.55, "learning_rate": 9.960453367764311e-06, "loss": 1.9748, "step": 2114 }, { "epoch": 1.55, "learning_rate": 9.952544095744092e-06, "loss": 2.1032, "step": 2115 }, { "epoch": 1.55, "learning_rate": 9.944634853411228e-06, "loss": 1.7613, "step": 2116 }, { "epoch": 1.55, "learning_rate": 9.93672564571357e-06, "loss": 1.7576, "step": 2117 }, { "epoch": 1.55, "learning_rate": 9.928816477598927e-06, "loss": 2.0499, "step": 2118 }, { "epoch": 1.55, "learning_rate": 9.920907354015107e-06, "loss": 2.0096, "step": 2119 }, { "epoch": 1.55, "learning_rate": 9.912998279909885e-06, "loss": 2.0999, "step": 2120 }, { "epoch": 1.55, "learning_rate": 9.905089260230994e-06, "loss": 1.8091, "step": 2121 }, { "epoch": 1.55, "learning_rate": 9.897180299926145e-06, "loss": 1.8778, "step": 2122 }, { "epoch": 1.56, "learning_rate": 9.889271403943002e-06, "loss": 1.9164, "step": 2123 }, { "epoch": 1.56, "learning_rate": 9.881362577229199e-06, "loss": 1.8887, "step": 2124 }, { "epoch": 1.56, "learning_rate": 9.87345382473232e-06, "loss": 1.9295, "step": 2125 }, { "epoch": 1.56, "learning_rate": 9.865545151399903e-06, "loss": 1.7956, "step": 2126 }, { "epoch": 1.56, "learning_rate": 9.857636562179439e-06, "loss": 1.8524, "step": 2127 }, { "epoch": 1.56, "learning_rate": 9.84972806201836e-06, "loss": 1.8627, "step": 2128 }, { "epoch": 1.56, "learning_rate": 9.841819655864052e-06, "loss": 1.9908, "step": 2129 }, { "epoch": 1.56, "learning_rate": 9.833911348663838e-06, "loss": 1.9659, "step": 2130 }, { "epoch": 1.56, "learning_rate": 9.826003145364976e-06, "loss": 1.85, "step": 2131 }, { "epoch": 1.56, "learning_rate": 9.818095050914664e-06, "loss": 2.1859, "step": 2132 }, { "epoch": 1.56, "learning_rate": 9.810187070260029e-06, "loss": 1.9548, "step": 2133 }, { "epoch": 1.56, "learning_rate": 9.80227920834812e-06, "loss": 2.0063, "step": 2134 }, { "epoch": 1.56, "learning_rate": 9.794371470125927e-06, "loss": 1.9029, "step": 2135 }, { "epoch": 1.56, "learning_rate": 9.786463860540352e-06, "loss": 2.0958, "step": 2136 }, { "epoch": 1.57, "learning_rate": 9.778556384538217e-06, "loss": 1.9452, "step": 2137 }, { "epoch": 1.57, "learning_rate": 9.770649047066267e-06, "loss": 1.8272, "step": 2138 }, { "epoch": 1.57, "learning_rate": 9.762741853071153e-06, "loss": 1.8262, "step": 2139 }, { "epoch": 1.57, "learning_rate": 9.75483480749944e-06, "loss": 2.029, "step": 2140 }, { "epoch": 1.57, "learning_rate": 9.7469279152976e-06, "loss": 1.8722, "step": 2141 }, { "epoch": 1.57, "learning_rate": 9.739021181412003e-06, "loss": 1.8791, "step": 2142 }, { "epoch": 1.57, "learning_rate": 9.73111461078893e-06, "loss": 1.9643, "step": 2143 }, { "epoch": 1.57, "learning_rate": 9.723208208374556e-06, "loss": 1.8888, "step": 2144 }, { "epoch": 1.57, "learning_rate": 9.715301979114946e-06, "loss": 2.0345, "step": 2145 }, { "epoch": 1.57, "learning_rate": 9.707395927956065e-06, "loss": 2.0765, "step": 2146 }, { "epoch": 1.57, "learning_rate": 9.699490059843759e-06, "loss": 1.9925, "step": 2147 }, { "epoch": 1.57, "learning_rate": 9.691584379723766e-06, "loss": 1.8501, "step": 2148 }, { "epoch": 1.57, "learning_rate": 9.683678892541696e-06, "loss": 2.1321, "step": 2149 }, { "epoch": 1.58, "learning_rate": 9.67577360324305e-06, "loss": 2.008, "step": 2150 }, { "epoch": 1.58, "learning_rate": 9.6678685167732e-06, "loss": 1.9132, "step": 2151 }, { "epoch": 1.58, "learning_rate": 9.659963638077393e-06, "loss": 1.9132, "step": 2152 }, { "epoch": 1.58, "learning_rate": 9.652058972100744e-06, "loss": 1.7593, "step": 2153 }, { "epoch": 1.58, "learning_rate": 9.644154523788233e-06, "loss": 2.1163, "step": 2154 }, { "epoch": 1.58, "learning_rate": 9.63625029808471e-06, "loss": 1.9922, "step": 2155 }, { "epoch": 1.58, "learning_rate": 9.62834629993488e-06, "loss": 1.9556, "step": 2156 }, { "epoch": 1.58, "learning_rate": 9.620442534283308e-06, "loss": 1.9432, "step": 2157 }, { "epoch": 1.58, "learning_rate": 9.612539006074415e-06, "loss": 1.8931, "step": 2158 }, { "epoch": 1.58, "learning_rate": 9.60463572025247e-06, "loss": 2.0941, "step": 2159 }, { "epoch": 1.58, "learning_rate": 9.596732681761591e-06, "loss": 1.9717, "step": 2160 }, { "epoch": 1.58, "learning_rate": 9.588829895545746e-06, "loss": 1.9893, "step": 2161 }, { "epoch": 1.58, "learning_rate": 9.580927366548738e-06, "loss": 1.9246, "step": 2162 }, { "epoch": 1.58, "learning_rate": 9.573025099714218e-06, "loss": 1.7698, "step": 2163 }, { "epoch": 1.59, "learning_rate": 9.565123099985665e-06, "loss": 1.9538, "step": 2164 }, { "epoch": 1.59, "learning_rate": 9.55722137230639e-06, "loss": 1.9194, "step": 2165 }, { "epoch": 1.59, "learning_rate": 9.54931992161954e-06, "loss": 2.0387, "step": 2166 }, { "epoch": 1.59, "learning_rate": 9.541418752868087e-06, "loss": 2.0579, "step": 2167 }, { "epoch": 1.59, "learning_rate": 9.533517870994824e-06, "loss": 1.9273, "step": 2168 }, { "epoch": 1.59, "learning_rate": 9.52561728094237e-06, "loss": 1.9456, "step": 2169 }, { "epoch": 1.59, "learning_rate": 9.517716987653153e-06, "loss": 1.8423, "step": 2170 }, { "epoch": 1.59, "learning_rate": 9.509816996069425e-06, "loss": 2.1197, "step": 2171 }, { "epoch": 1.59, "learning_rate": 9.501917311133241e-06, "loss": 1.7319, "step": 2172 }, { "epoch": 1.59, "learning_rate": 9.494017937786469e-06, "loss": 1.9672, "step": 2173 }, { "epoch": 1.59, "learning_rate": 9.486118880970779e-06, "loss": 1.8874, "step": 2174 }, { "epoch": 1.59, "learning_rate": 9.478220145627645e-06, "loss": 2.1378, "step": 2175 }, { "epoch": 1.59, "learning_rate": 9.470321736698343e-06, "loss": 1.9759, "step": 2176 }, { "epoch": 1.59, "learning_rate": 9.462423659123937e-06, "loss": 2.1137, "step": 2177 }, { "epoch": 1.6, "learning_rate": 9.454525917845289e-06, "loss": 1.9526, "step": 2178 }, { "epoch": 1.6, "learning_rate": 9.446628517803055e-06, "loss": 2.0217, "step": 2179 }, { "epoch": 1.6, "learning_rate": 9.438731463937663e-06, "loss": 1.9708, "step": 2180 }, { "epoch": 1.6, "learning_rate": 9.430834761189339e-06, "loss": 1.9421, "step": 2181 }, { "epoch": 1.6, "learning_rate": 9.422938414498081e-06, "loss": 2.0818, "step": 2182 }, { "epoch": 1.6, "learning_rate": 9.41504242880367e-06, "loss": 1.8742, "step": 2183 }, { "epoch": 1.6, "learning_rate": 9.407146809045658e-06, "loss": 2.1136, "step": 2184 }, { "epoch": 1.6, "learning_rate": 9.39925156016337e-06, "loss": 1.8925, "step": 2185 }, { "epoch": 1.6, "learning_rate": 9.391356687095892e-06, "loss": 1.967, "step": 2186 }, { "epoch": 1.6, "learning_rate": 9.383462194782086e-06, "loss": 2.0952, "step": 2187 }, { "epoch": 1.6, "learning_rate": 9.375568088160568e-06, "loss": 1.9055, "step": 2188 }, { "epoch": 1.6, "learning_rate": 9.367674372169713e-06, "loss": 2.0037, "step": 2189 }, { "epoch": 1.6, "learning_rate": 9.359781051747658e-06, "loss": 1.838, "step": 2190 }, { "epoch": 1.61, "learning_rate": 9.351888131832286e-06, "loss": 2.0626, "step": 2191 }, { "epoch": 1.61, "learning_rate": 9.343995617361226e-06, "loss": 1.9625, "step": 2192 }, { "epoch": 1.61, "learning_rate": 9.33610351327187e-06, "loss": 2.0175, "step": 2193 }, { "epoch": 1.61, "learning_rate": 9.328211824501332e-06, "loss": 1.8092, "step": 2194 }, { "epoch": 1.61, "learning_rate": 9.320320555986485e-06, "loss": 2.0023, "step": 2195 }, { "epoch": 1.61, "learning_rate": 9.312429712663922e-06, "loss": 2.0542, "step": 2196 }, { "epoch": 1.61, "learning_rate": 9.304539299469983e-06, "loss": 1.8677, "step": 2197 }, { "epoch": 1.61, "learning_rate": 9.296649321340731e-06, "loss": 1.8742, "step": 2198 }, { "epoch": 1.61, "learning_rate": 9.288759783211967e-06, "loss": 2.0663, "step": 2199 }, { "epoch": 1.61, "learning_rate": 9.280870690019201e-06, "loss": 1.8203, "step": 2200 }, { "epoch": 1.61, "learning_rate": 9.27298204669768e-06, "loss": 1.8642, "step": 2201 }, { "epoch": 1.61, "learning_rate": 9.265093858182364e-06, "loss": 1.9759, "step": 2202 }, { "epoch": 1.61, "learning_rate": 9.257206129407922e-06, "loss": 1.9931, "step": 2203 }, { "epoch": 1.61, "learning_rate": 9.249318865308744e-06, "loss": 1.9923, "step": 2204 }, { "epoch": 1.62, "learning_rate": 9.241432070818929e-06, "loss": 1.872, "step": 2205 }, { "epoch": 1.62, "learning_rate": 9.233545750872275e-06, "loss": 1.9022, "step": 2206 }, { "epoch": 1.62, "learning_rate": 9.22565991040229e-06, "loss": 1.8603, "step": 2207 }, { "epoch": 1.62, "learning_rate": 9.21777455434218e-06, "loss": 1.8683, "step": 2208 }, { "epoch": 1.62, "learning_rate": 9.209889687624846e-06, "loss": 1.8674, "step": 2209 }, { "epoch": 1.62, "learning_rate": 9.202005315182892e-06, "loss": 2.0004, "step": 2210 }, { "epoch": 1.62, "learning_rate": 9.194121441948596e-06, "loss": 1.9488, "step": 2211 }, { "epoch": 1.62, "learning_rate": 9.186238072853933e-06, "loss": 1.9808, "step": 2212 }, { "epoch": 1.62, "learning_rate": 9.178355212830569e-06, "loss": 1.9449, "step": 2213 }, { "epoch": 1.62, "learning_rate": 9.170472866809842e-06, "loss": 2.002, "step": 2214 }, { "epoch": 1.62, "learning_rate": 9.162591039722769e-06, "loss": 1.9752, "step": 2215 }, { "epoch": 1.62, "learning_rate": 9.154709736500052e-06, "loss": 1.9761, "step": 2216 }, { "epoch": 1.62, "learning_rate": 9.146828962072051e-06, "loss": 1.8711, "step": 2217 }, { "epoch": 1.62, "learning_rate": 9.13894872136881e-06, "loss": 1.7005, "step": 2218 }, { "epoch": 1.63, "learning_rate": 9.131069019320025e-06, "loss": 1.8639, "step": 2219 }, { "epoch": 1.63, "learning_rate": 9.123189860855064e-06, "loss": 2.046, "step": 2220 }, { "epoch": 1.63, "learning_rate": 9.115311250902955e-06, "loss": 1.88, "step": 2221 }, { "epoch": 1.63, "learning_rate": 9.107433194392376e-06, "loss": 1.9383, "step": 2222 }, { "epoch": 1.63, "learning_rate": 9.099555696251667e-06, "loss": 1.8342, "step": 2223 }, { "epoch": 1.63, "learning_rate": 9.091678761408817e-06, "loss": 1.993, "step": 2224 }, { "epoch": 1.63, "learning_rate": 9.083802394791453e-06, "loss": 1.7722, "step": 2225 }, { "epoch": 1.63, "learning_rate": 9.075926601326865e-06, "loss": 1.9173, "step": 2226 }, { "epoch": 1.63, "learning_rate": 9.068051385941963e-06, "loss": 1.8676, "step": 2227 }, { "epoch": 1.63, "learning_rate": 9.06017675356331e-06, "loss": 2.0454, "step": 2228 }, { "epoch": 1.63, "learning_rate": 9.0523027091171e-06, "loss": 2.1003, "step": 2229 }, { "epoch": 1.63, "learning_rate": 9.044429257529162e-06, "loss": 2.0903, "step": 2230 }, { "epoch": 1.63, "learning_rate": 9.036556403724945e-06, "loss": 1.9295, "step": 2231 }, { "epoch": 1.64, "learning_rate": 9.02868415262954e-06, "loss": 2.048, "step": 2232 }, { "epoch": 1.64, "learning_rate": 9.020812509167642e-06, "loss": 1.9442, "step": 2233 }, { "epoch": 1.64, "learning_rate": 9.012941478263581e-06, "loss": 1.9626, "step": 2234 }, { "epoch": 1.64, "learning_rate": 9.005071064841299e-06, "loss": 1.9458, "step": 2235 }, { "epoch": 1.64, "learning_rate": 8.99720127382435e-06, "loss": 2.1064, "step": 2236 }, { "epoch": 1.64, "learning_rate": 8.989332110135897e-06, "loss": 1.8264, "step": 2237 }, { "epoch": 1.64, "learning_rate": 8.981463578698714e-06, "loss": 1.9699, "step": 2238 }, { "epoch": 1.64, "learning_rate": 8.97359568443518e-06, "loss": 1.9333, "step": 2239 }, { "epoch": 1.64, "learning_rate": 8.965728432267274e-06, "loss": 1.8401, "step": 2240 }, { "epoch": 1.64, "learning_rate": 8.957861827116576e-06, "loss": 2.1629, "step": 2241 }, { "epoch": 1.64, "learning_rate": 8.949995873904251e-06, "loss": 1.9079, "step": 2242 }, { "epoch": 1.64, "learning_rate": 8.942130577551068e-06, "loss": 1.8813, "step": 2243 }, { "epoch": 1.64, "learning_rate": 8.934265942977383e-06, "loss": 1.9514, "step": 2244 }, { "epoch": 1.64, "learning_rate": 8.926401975103132e-06, "loss": 1.9048, "step": 2245 }, { "epoch": 1.65, "learning_rate": 8.918538678847842e-06, "loss": 1.8287, "step": 2246 }, { "epoch": 1.65, "learning_rate": 8.910676059130611e-06, "loss": 1.9546, "step": 2247 }, { "epoch": 1.65, "learning_rate": 8.90281412087012e-06, "loss": 1.8126, "step": 2248 }, { "epoch": 1.65, "learning_rate": 8.894952868984625e-06, "loss": 1.969, "step": 2249 }, { "epoch": 1.65, "learning_rate": 8.887092308391945e-06, "loss": 2.1077, "step": 2250 }, { "epoch": 1.65, "learning_rate": 8.879232444009476e-06, "loss": 2.0025, "step": 2251 }, { "epoch": 1.65, "learning_rate": 8.871373280754168e-06, "loss": 1.8192, "step": 2252 }, { "epoch": 1.65, "learning_rate": 8.863514823542542e-06, "loss": 1.9833, "step": 2253 }, { "epoch": 1.65, "learning_rate": 8.855657077290675e-06, "loss": 2.0523, "step": 2254 }, { "epoch": 1.65, "learning_rate": 8.847800046914194e-06, "loss": 2.0201, "step": 2255 }, { "epoch": 1.65, "learning_rate": 8.839943737328282e-06, "loss": 2.0359, "step": 2256 }, { "epoch": 1.65, "learning_rate": 8.832088153447676e-06, "loss": 1.9916, "step": 2257 }, { "epoch": 1.65, "learning_rate": 8.824233300186648e-06, "loss": 1.904, "step": 2258 }, { "epoch": 1.65, "learning_rate": 8.81637918245902e-06, "loss": 1.991, "step": 2259 }, { "epoch": 1.66, "learning_rate": 8.808525805178155e-06, "loss": 2.0035, "step": 2260 }, { "epoch": 1.66, "learning_rate": 8.800673173256947e-06, "loss": 1.9341, "step": 2261 }, { "epoch": 1.66, "learning_rate": 8.79282129160783e-06, "loss": 1.9724, "step": 2262 }, { "epoch": 1.66, "learning_rate": 8.784970165142767e-06, "loss": 1.8786, "step": 2263 }, { "epoch": 1.66, "learning_rate": 8.777119798773243e-06, "loss": 1.9171, "step": 2264 }, { "epoch": 1.66, "learning_rate": 8.769270197410277e-06, "loss": 1.897, "step": 2265 }, { "epoch": 1.66, "learning_rate": 8.7614213659644e-06, "loss": 1.8873, "step": 2266 }, { "epoch": 1.66, "learning_rate": 8.753573309345668e-06, "loss": 2.1181, "step": 2267 }, { "epoch": 1.66, "learning_rate": 8.74572603246365e-06, "loss": 1.8491, "step": 2268 }, { "epoch": 1.66, "learning_rate": 8.737879540227428e-06, "loss": 1.8739, "step": 2269 }, { "epoch": 1.66, "learning_rate": 8.730033837545592e-06, "loss": 2.056, "step": 2270 }, { "epoch": 1.66, "learning_rate": 8.722188929326237e-06, "loss": 2.0599, "step": 2271 }, { "epoch": 1.66, "learning_rate": 8.714344820476966e-06, "loss": 1.9299, "step": 2272 }, { "epoch": 1.67, "learning_rate": 8.706501515904871e-06, "loss": 2.0232, "step": 2273 }, { "epoch": 1.67, "learning_rate": 8.698659020516556e-06, "loss": 2.091, "step": 2274 }, { "epoch": 1.67, "learning_rate": 8.690817339218106e-06, "loss": 1.8528, "step": 2275 }, { "epoch": 1.67, "learning_rate": 8.682976476915103e-06, "loss": 1.8427, "step": 2276 }, { "epoch": 1.67, "learning_rate": 8.67513643851262e-06, "loss": 2.1508, "step": 2277 }, { "epoch": 1.67, "learning_rate": 8.667297228915204e-06, "loss": 1.8332, "step": 2278 }, { "epoch": 1.67, "learning_rate": 8.659458853026892e-06, "loss": 2.0438, "step": 2279 }, { "epoch": 1.67, "learning_rate": 8.651621315751197e-06, "loss": 1.899, "step": 2280 }, { "epoch": 1.67, "learning_rate": 8.643784621991104e-06, "loss": 1.9613, "step": 2281 }, { "epoch": 1.67, "learning_rate": 8.635948776649081e-06, "loss": 1.8474, "step": 2282 }, { "epoch": 1.67, "learning_rate": 8.628113784627053e-06, "loss": 1.9904, "step": 2283 }, { "epoch": 1.67, "learning_rate": 8.620279650826417e-06, "loss": 1.8017, "step": 2284 }, { "epoch": 1.67, "learning_rate": 8.61244638014803e-06, "loss": 1.9721, "step": 2285 }, { "epoch": 1.67, "learning_rate": 8.604613977492212e-06, "loss": 1.8136, "step": 2286 }, { "epoch": 1.68, "learning_rate": 8.596782447758746e-06, "loss": 1.8204, "step": 2287 }, { "epoch": 1.68, "learning_rate": 8.588951795846858e-06, "loss": 2.0672, "step": 2288 }, { "epoch": 1.68, "learning_rate": 8.581122026655222e-06, "loss": 1.9497, "step": 2289 }, { "epoch": 1.68, "learning_rate": 8.573293145081974e-06, "loss": 1.9809, "step": 2290 }, { "epoch": 1.68, "learning_rate": 8.565465156024687e-06, "loss": 2.083, "step": 2291 }, { "epoch": 1.68, "learning_rate": 8.557638064380375e-06, "loss": 2.0217, "step": 2292 }, { "epoch": 1.68, "learning_rate": 8.549811875045493e-06, "loss": 2.0076, "step": 2293 }, { "epoch": 1.68, "learning_rate": 8.541986592915927e-06, "loss": 2.0611, "step": 2294 }, { "epoch": 1.68, "learning_rate": 8.534162222887003e-06, "loss": 1.9626, "step": 2295 }, { "epoch": 1.68, "learning_rate": 8.526338769853472e-06, "loss": 1.8355, "step": 2296 }, { "epoch": 1.68, "learning_rate": 8.518516238709507e-06, "loss": 1.8935, "step": 2297 }, { "epoch": 1.68, "learning_rate": 8.510694634348715e-06, "loss": 1.9936, "step": 2298 }, { "epoch": 1.68, "learning_rate": 8.502873961664112e-06, "loss": 1.9059, "step": 2299 }, { "epoch": 1.68, "learning_rate": 8.495054225548138e-06, "loss": 1.9056, "step": 2300 }, { "epoch": 1.69, "learning_rate": 8.487235430892648e-06, "loss": 1.8835, "step": 2301 }, { "epoch": 1.69, "learning_rate": 8.479417582588902e-06, "loss": 1.9299, "step": 2302 }, { "epoch": 1.69, "learning_rate": 8.471600685527576e-06, "loss": 2.2025, "step": 2303 }, { "epoch": 1.69, "learning_rate": 8.46378474459874e-06, "loss": 1.8655, "step": 2304 }, { "epoch": 1.69, "learning_rate": 8.455969764691875e-06, "loss": 1.953, "step": 2305 }, { "epoch": 1.69, "learning_rate": 8.448155750695855e-06, "loss": 1.7958, "step": 2306 }, { "epoch": 1.69, "learning_rate": 8.440342707498961e-06, "loss": 2.0604, "step": 2307 }, { "epoch": 1.69, "learning_rate": 8.432530639988848e-06, "loss": 1.9824, "step": 2308 }, { "epoch": 1.69, "learning_rate": 8.424719553052578e-06, "loss": 1.9984, "step": 2309 }, { "epoch": 1.69, "learning_rate": 8.416909451576592e-06, "loss": 1.8437, "step": 2310 }, { "epoch": 1.69, "learning_rate": 8.409100340446711e-06, "loss": 1.8982, "step": 2311 }, { "epoch": 1.69, "learning_rate": 8.401292224548148e-06, "loss": 1.9799, "step": 2312 }, { "epoch": 1.69, "learning_rate": 8.393485108765477e-06, "loss": 2.0426, "step": 2313 }, { "epoch": 1.7, "learning_rate": 8.385678997982662e-06, "loss": 1.9316, "step": 2314 }, { "epoch": 1.7, "learning_rate": 8.377873897083029e-06, "loss": 1.9405, "step": 2315 }, { "epoch": 1.7, "learning_rate": 8.370069810949276e-06, "loss": 1.9504, "step": 2316 }, { "epoch": 1.7, "learning_rate": 8.362266744463466e-06, "loss": 1.8681, "step": 2317 }, { "epoch": 1.7, "learning_rate": 8.354464702507018e-06, "loss": 1.8898, "step": 2318 }, { "epoch": 1.7, "learning_rate": 8.346663689960724e-06, "loss": 1.8727, "step": 2319 }, { "epoch": 1.7, "learning_rate": 8.338863711704714e-06, "loss": 1.9647, "step": 2320 }, { "epoch": 1.7, "learning_rate": 8.331064772618486e-06, "loss": 1.8778, "step": 2321 }, { "epoch": 1.7, "learning_rate": 8.323266877580881e-06, "loss": 1.7946, "step": 2322 }, { "epoch": 1.7, "learning_rate": 8.315470031470086e-06, "loss": 1.8861, "step": 2323 }, { "epoch": 1.7, "learning_rate": 8.307674239163638e-06, "loss": 1.8508, "step": 2324 }, { "epoch": 1.7, "learning_rate": 8.299879505538407e-06, "loss": 1.8801, "step": 2325 }, { "epoch": 1.7, "learning_rate": 8.292085835470607e-06, "loss": 1.993, "step": 2326 }, { "epoch": 1.7, "learning_rate": 8.28429323383578e-06, "loss": 2.0137, "step": 2327 }, { "epoch": 1.71, "learning_rate": 8.276501705508808e-06, "loss": 1.8557, "step": 2328 }, { "epoch": 1.71, "learning_rate": 8.268711255363896e-06, "loss": 1.9753, "step": 2329 }, { "epoch": 1.71, "learning_rate": 8.260921888274573e-06, "loss": 1.852, "step": 2330 }, { "epoch": 1.71, "learning_rate": 8.253133609113699e-06, "loss": 1.9339, "step": 2331 }, { "epoch": 1.71, "learning_rate": 8.245346422753442e-06, "loss": 1.8643, "step": 2332 }, { "epoch": 1.71, "learning_rate": 8.237560334065293e-06, "loss": 1.9861, "step": 2333 }, { "epoch": 1.71, "learning_rate": 8.22977534792006e-06, "loss": 1.947, "step": 2334 }, { "epoch": 1.71, "learning_rate": 8.22199146918785e-06, "loss": 1.9934, "step": 2335 }, { "epoch": 1.71, "learning_rate": 8.214208702738086e-06, "loss": 1.9143, "step": 2336 }, { "epoch": 1.71, "learning_rate": 8.206427053439497e-06, "loss": 1.9831, "step": 2337 }, { "epoch": 1.71, "learning_rate": 8.198646526160101e-06, "loss": 1.9475, "step": 2338 }, { "epoch": 1.71, "learning_rate": 8.190867125767228e-06, "loss": 1.8805, "step": 2339 }, { "epoch": 1.71, "learning_rate": 8.183088857127496e-06, "loss": 1.9839, "step": 2340 }, { "epoch": 1.72, "learning_rate": 8.175311725106816e-06, "loss": 1.9572, "step": 2341 }, { "epoch": 1.72, "learning_rate": 8.167535734570389e-06, "loss": 1.8524, "step": 2342 }, { "epoch": 1.72, "learning_rate": 8.159760890382703e-06, "loss": 1.9617, "step": 2343 }, { "epoch": 1.72, "learning_rate": 8.151987197407519e-06, "loss": 1.9363, "step": 2344 }, { "epoch": 1.72, "learning_rate": 8.144214660507897e-06, "loss": 2.0463, "step": 2345 }, { "epoch": 1.72, "learning_rate": 8.136443284546153e-06, "loss": 1.9526, "step": 2346 }, { "epoch": 1.72, "learning_rate": 8.12867307438389e-06, "loss": 2.0147, "step": 2347 }, { "epoch": 1.72, "learning_rate": 8.12090403488198e-06, "loss": 1.8798, "step": 2348 }, { "epoch": 1.72, "learning_rate": 8.113136170900558e-06, "loss": 1.9744, "step": 2349 }, { "epoch": 1.72, "learning_rate": 8.105369487299031e-06, "loss": 1.9221, "step": 2350 }, { "epoch": 1.72, "learning_rate": 8.097603988936057e-06, "loss": 2.0026, "step": 2351 }, { "epoch": 1.72, "learning_rate": 8.089839680669557e-06, "loss": 2.0195, "step": 2352 }, { "epoch": 1.72, "learning_rate": 8.082076567356715e-06, "loss": 1.9312, "step": 2353 }, { "epoch": 1.72, "learning_rate": 8.074314653853958e-06, "loss": 2.0437, "step": 2354 }, { "epoch": 1.73, "learning_rate": 8.066553945016968e-06, "loss": 2.0397, "step": 2355 }, { "epoch": 1.73, "learning_rate": 8.05879444570067e-06, "loss": 2.0087, "step": 2356 }, { "epoch": 1.73, "learning_rate": 8.05103616075923e-06, "loss": 1.8879, "step": 2357 }, { "epoch": 1.73, "learning_rate": 8.043279095046064e-06, "loss": 2.152, "step": 2358 }, { "epoch": 1.73, "learning_rate": 8.035523253413815e-06, "loss": 1.902, "step": 2359 }, { "epoch": 1.73, "learning_rate": 8.027768640714367e-06, "loss": 2.0188, "step": 2360 }, { "epoch": 1.73, "learning_rate": 8.02001526179883e-06, "loss": 1.943, "step": 2361 }, { "epoch": 1.73, "learning_rate": 8.012263121517549e-06, "loss": 1.845, "step": 2362 }, { "epoch": 1.73, "learning_rate": 8.004512224720084e-06, "loss": 1.9079, "step": 2363 }, { "epoch": 1.73, "learning_rate": 7.996762576255229e-06, "loss": 1.931, "step": 2364 }, { "epoch": 1.73, "learning_rate": 7.989014180970987e-06, "loss": 1.9708, "step": 2365 }, { "epoch": 1.73, "learning_rate": 7.981267043714585e-06, "loss": 1.9667, "step": 2366 }, { "epoch": 1.73, "learning_rate": 7.973521169332452e-06, "loss": 2.1859, "step": 2367 }, { "epoch": 1.73, "learning_rate": 7.965776562670241e-06, "loss": 1.8038, "step": 2368 }, { "epoch": 1.74, "learning_rate": 7.9580332285728e-06, "loss": 1.9625, "step": 2369 }, { "epoch": 1.74, "learning_rate": 7.950291171884192e-06, "loss": 1.8718, "step": 2370 }, { "epoch": 1.74, "learning_rate": 7.94255039744767e-06, "loss": 2.0683, "step": 2371 }, { "epoch": 1.74, "learning_rate": 7.934810910105688e-06, "loss": 1.8368, "step": 2372 }, { "epoch": 1.74, "learning_rate": 7.927072714699903e-06, "loss": 2.0543, "step": 2373 }, { "epoch": 1.74, "learning_rate": 7.919335816071155e-06, "loss": 1.8971, "step": 2374 }, { "epoch": 1.74, "learning_rate": 7.91160021905947e-06, "loss": 1.8165, "step": 2375 }, { "epoch": 1.74, "learning_rate": 7.903865928504073e-06, "loss": 2.0222, "step": 2376 }, { "epoch": 1.74, "learning_rate": 7.896132949243356e-06, "loss": 1.9575, "step": 2377 }, { "epoch": 1.74, "learning_rate": 7.888401286114902e-06, "loss": 2.0074, "step": 2378 }, { "epoch": 1.74, "learning_rate": 7.880670943955467e-06, "loss": 1.9286, "step": 2379 }, { "epoch": 1.74, "learning_rate": 7.872941927600979e-06, "loss": 1.881, "step": 2380 }, { "epoch": 1.74, "learning_rate": 7.865214241886544e-06, "loss": 1.9571, "step": 2381 }, { "epoch": 1.75, "learning_rate": 7.857487891646422e-06, "loss": 1.9944, "step": 2382 }, { "epoch": 1.75, "learning_rate": 7.849762881714046e-06, "loss": 1.9213, "step": 2383 }, { "epoch": 1.75, "learning_rate": 7.842039216922013e-06, "loss": 1.8979, "step": 2384 }, { "epoch": 1.75, "learning_rate": 7.834316902102072e-06, "loss": 1.8685, "step": 2385 }, { "epoch": 1.75, "learning_rate": 7.826595942085132e-06, "loss": 1.9829, "step": 2386 }, { "epoch": 1.75, "learning_rate": 7.818876341701256e-06, "loss": 1.9006, "step": 2387 }, { "epoch": 1.75, "learning_rate": 7.81115810577965e-06, "loss": 1.869, "step": 2388 }, { "epoch": 1.75, "learning_rate": 7.803441239148675e-06, "loss": 2.067, "step": 2389 }, { "epoch": 1.75, "learning_rate": 7.795725746635823e-06, "loss": 1.9055, "step": 2390 }, { "epoch": 1.75, "learning_rate": 7.78801163306774e-06, "loss": 1.9184, "step": 2391 }, { "epoch": 1.75, "learning_rate": 7.780298903270202e-06, "loss": 2.1107, "step": 2392 }, { "epoch": 1.75, "learning_rate": 7.77258756206812e-06, "loss": 1.9195, "step": 2393 }, { "epoch": 1.75, "learning_rate": 7.764877614285533e-06, "loss": 2.0606, "step": 2394 }, { "epoch": 1.75, "learning_rate": 7.757169064745622e-06, "loss": 2.0096, "step": 2395 }, { "epoch": 1.76, "learning_rate": 7.749461918270674e-06, "loss": 1.9803, "step": 2396 }, { "epoch": 1.76, "learning_rate": 7.741756179682116e-06, "loss": 1.9538, "step": 2397 }, { "epoch": 1.76, "learning_rate": 7.734051853800478e-06, "loss": 1.8188, "step": 2398 }, { "epoch": 1.76, "learning_rate": 7.726348945445417e-06, "loss": 1.886, "step": 2399 }, { "epoch": 1.76, "learning_rate": 7.7186474594357e-06, "loss": 2.076, "step": 2400 }, { "epoch": 1.76, "learning_rate": 7.710947400589209e-06, "loss": 2.0474, "step": 2401 }, { "epoch": 1.76, "learning_rate": 7.703248773722924e-06, "loss": 1.9531, "step": 2402 }, { "epoch": 1.76, "learning_rate": 7.695551583652935e-06, "loss": 1.9405, "step": 2403 }, { "epoch": 1.76, "learning_rate": 7.687855835194435e-06, "loss": 1.8898, "step": 2404 }, { "epoch": 1.76, "learning_rate": 7.68016153316171e-06, "loss": 2.0219, "step": 2405 }, { "epoch": 1.76, "learning_rate": 7.672468682368145e-06, "loss": 1.9202, "step": 2406 }, { "epoch": 1.76, "learning_rate": 7.664777287626214e-06, "loss": 1.8002, "step": 2407 }, { "epoch": 1.76, "learning_rate": 7.657087353747484e-06, "loss": 1.8881, "step": 2408 }, { "epoch": 1.76, "learning_rate": 7.649398885542603e-06, "loss": 1.8142, "step": 2409 }, { "epoch": 1.77, "learning_rate": 7.641711887821307e-06, "loss": 1.773, "step": 2410 }, { "epoch": 1.77, "learning_rate": 7.634026365392413e-06, "loss": 1.8807, "step": 2411 }, { "epoch": 1.77, "learning_rate": 7.626342323063811e-06, "loss": 1.9416, "step": 2412 }, { "epoch": 1.77, "learning_rate": 7.618659765642461e-06, "loss": 1.8958, "step": 2413 }, { "epoch": 1.77, "learning_rate": 7.6109786979344025e-06, "loss": 1.9272, "step": 2414 }, { "epoch": 1.77, "learning_rate": 7.603299124744743e-06, "loss": 1.9075, "step": 2415 }, { "epoch": 1.77, "learning_rate": 7.595621050877647e-06, "loss": 1.7432, "step": 2416 }, { "epoch": 1.77, "learning_rate": 7.587944481136351e-06, "loss": 1.9196, "step": 2417 }, { "epoch": 1.77, "learning_rate": 7.580269420323143e-06, "loss": 1.9371, "step": 2418 }, { "epoch": 1.77, "learning_rate": 7.572595873239368e-06, "loss": 1.8582, "step": 2419 }, { "epoch": 1.77, "learning_rate": 7.564923844685431e-06, "loss": 2.165, "step": 2420 }, { "epoch": 1.77, "learning_rate": 7.557253339460777e-06, "loss": 2.1001, "step": 2421 }, { "epoch": 1.77, "learning_rate": 7.549584362363907e-06, "loss": 1.7851, "step": 2422 }, { "epoch": 1.78, "learning_rate": 7.541916918192356e-06, "loss": 1.8861, "step": 2423 }, { "epoch": 1.78, "learning_rate": 7.53425101174271e-06, "loss": 2.0358, "step": 2424 }, { "epoch": 1.78, "learning_rate": 7.526586647810591e-06, "loss": 1.9882, "step": 2425 }, { "epoch": 1.78, "learning_rate": 7.51892383119065e-06, "loss": 2.0535, "step": 2426 }, { "epoch": 1.78, "learning_rate": 7.511262566676574e-06, "loss": 1.937, "step": 2427 }, { "epoch": 1.78, "learning_rate": 7.5036028590610846e-06, "loss": 1.967, "step": 2428 }, { "epoch": 1.78, "learning_rate": 7.495944713135918e-06, "loss": 1.9268, "step": 2429 }, { "epoch": 1.78, "learning_rate": 7.488288133691839e-06, "loss": 1.9293, "step": 2430 }, { "epoch": 1.78, "learning_rate": 7.480633125518636e-06, "loss": 1.9313, "step": 2431 }, { "epoch": 1.78, "learning_rate": 7.472979693405109e-06, "loss": 1.9471, "step": 2432 }, { "epoch": 1.78, "learning_rate": 7.465327842139074e-06, "loss": 1.7287, "step": 2433 }, { "epoch": 1.78, "learning_rate": 7.457677576507364e-06, "loss": 2.0216, "step": 2434 }, { "epoch": 1.78, "learning_rate": 7.450028901295807e-06, "loss": 1.8743, "step": 2435 }, { "epoch": 1.78, "learning_rate": 7.442381821289248e-06, "loss": 1.9628, "step": 2436 }, { "epoch": 1.79, "learning_rate": 7.434736341271527e-06, "loss": 1.9448, "step": 2437 }, { "epoch": 1.79, "learning_rate": 7.427092466025485e-06, "loss": 1.8487, "step": 2438 }, { "epoch": 1.79, "learning_rate": 7.419450200332965e-06, "loss": 2.0061, "step": 2439 }, { "epoch": 1.79, "learning_rate": 7.411809548974792e-06, "loss": 2.1054, "step": 2440 }, { "epoch": 1.79, "learning_rate": 7.404170516730791e-06, "loss": 1.9556, "step": 2441 }, { "epoch": 1.79, "learning_rate": 7.396533108379767e-06, "loss": 1.8281, "step": 2442 }, { "epoch": 1.79, "learning_rate": 7.388897328699517e-06, "loss": 1.949, "step": 2443 }, { "epoch": 1.79, "learning_rate": 7.381263182466807e-06, "loss": 1.7641, "step": 2444 }, { "epoch": 1.79, "learning_rate": 7.373630674457393e-06, "loss": 1.9978, "step": 2445 }, { "epoch": 1.79, "learning_rate": 7.365999809446001e-06, "loss": 1.9094, "step": 2446 }, { "epoch": 1.79, "learning_rate": 7.358370592206328e-06, "loss": 1.9923, "step": 2447 }, { "epoch": 1.79, "learning_rate": 7.350743027511045e-06, "loss": 1.7159, "step": 2448 }, { "epoch": 1.79, "learning_rate": 7.343117120131784e-06, "loss": 1.9369, "step": 2449 }, { "epoch": 1.79, "learning_rate": 7.335492874839145e-06, "loss": 1.9586, "step": 2450 }, { "epoch": 1.8, "learning_rate": 7.327870296402682e-06, "loss": 2.0066, "step": 2451 }, { "epoch": 1.8, "learning_rate": 7.320249389590912e-06, "loss": 1.8748, "step": 2452 }, { "epoch": 1.8, "learning_rate": 7.3126301591713055e-06, "loss": 1.9203, "step": 2453 }, { "epoch": 1.8, "learning_rate": 7.30501260991028e-06, "loss": 1.8077, "step": 2454 }, { "epoch": 1.8, "learning_rate": 7.297396746573208e-06, "loss": 1.7239, "step": 2455 }, { "epoch": 1.8, "learning_rate": 7.289782573924398e-06, "loss": 1.8336, "step": 2456 }, { "epoch": 1.8, "learning_rate": 7.28217009672711e-06, "loss": 2.0061, "step": 2457 }, { "epoch": 1.8, "learning_rate": 7.2745593197435434e-06, "loss": 2.0223, "step": 2458 }, { "epoch": 1.8, "learning_rate": 7.266950247734828e-06, "loss": 1.9211, "step": 2459 }, { "epoch": 1.8, "learning_rate": 7.259342885461023e-06, "loss": 1.9353, "step": 2460 }, { "epoch": 1.8, "learning_rate": 7.25173723768113e-06, "loss": 2.0923, "step": 2461 }, { "epoch": 1.8, "learning_rate": 7.244133309153074e-06, "loss": 1.8014, "step": 2462 }, { "epoch": 1.8, "learning_rate": 7.236531104633699e-06, "loss": 1.9205, "step": 2463 }, { "epoch": 1.81, "learning_rate": 7.228930628878777e-06, "loss": 1.8921, "step": 2464 }, { "epoch": 1.81, "learning_rate": 7.221331886642995e-06, "loss": 2.0303, "step": 2465 }, { "epoch": 1.81, "learning_rate": 7.213734882679955e-06, "loss": 1.9701, "step": 2466 }, { "epoch": 1.81, "learning_rate": 7.206139621742179e-06, "loss": 1.9208, "step": 2467 }, { "epoch": 1.81, "learning_rate": 7.198546108581084e-06, "loss": 1.908, "step": 2468 }, { "epoch": 1.81, "learning_rate": 7.19095434794701e-06, "loss": 1.8297, "step": 2469 }, { "epoch": 1.81, "learning_rate": 7.183364344589188e-06, "loss": 1.7978, "step": 2470 }, { "epoch": 1.81, "learning_rate": 7.175776103255756e-06, "loss": 1.9786, "step": 2471 }, { "epoch": 1.81, "learning_rate": 7.168189628693752e-06, "loss": 1.818, "step": 2472 }, { "epoch": 1.81, "learning_rate": 7.1606049256491e-06, "loss": 1.9279, "step": 2473 }, { "epoch": 1.81, "learning_rate": 7.153021998866625e-06, "loss": 1.8004, "step": 2474 }, { "epoch": 1.81, "learning_rate": 7.145440853090033e-06, "loss": 1.949, "step": 2475 }, { "epoch": 1.81, "learning_rate": 7.137861493061921e-06, "loss": 1.9085, "step": 2476 }, { "epoch": 1.81, "learning_rate": 7.130283923523767e-06, "loss": 2.0164, "step": 2477 }, { "epoch": 1.82, "learning_rate": 7.122708149215931e-06, "loss": 2.0716, "step": 2478 }, { "epoch": 1.82, "learning_rate": 7.115134174877647e-06, "loss": 1.8126, "step": 2479 }, { "epoch": 1.82, "learning_rate": 7.107562005247023e-06, "loss": 2.004, "step": 2480 }, { "epoch": 1.82, "learning_rate": 7.099991645061044e-06, "loss": 2.0045, "step": 2481 }, { "epoch": 1.82, "learning_rate": 7.092423099055552e-06, "loss": 2.1053, "step": 2482 }, { "epoch": 1.82, "learning_rate": 7.084856371965268e-06, "loss": 1.8484, "step": 2483 }, { "epoch": 1.82, "learning_rate": 7.0772914685237585e-06, "loss": 1.9047, "step": 2484 }, { "epoch": 1.82, "learning_rate": 7.0697283934634645e-06, "loss": 1.8623, "step": 2485 }, { "epoch": 1.82, "learning_rate": 7.062167151515675e-06, "loss": 1.9764, "step": 2486 }, { "epoch": 1.82, "learning_rate": 7.054607747410535e-06, "loss": 1.9952, "step": 2487 }, { "epoch": 1.82, "learning_rate": 7.04705018587704e-06, "loss": 1.9396, "step": 2488 }, { "epoch": 1.82, "learning_rate": 7.039494471643028e-06, "loss": 1.9924, "step": 2489 }, { "epoch": 1.82, "learning_rate": 7.031940609435192e-06, "loss": 1.8608, "step": 2490 }, { "epoch": 1.82, "learning_rate": 7.024388603979051e-06, "loss": 1.9848, "step": 2491 }, { "epoch": 1.83, "learning_rate": 7.016838459998976e-06, "loss": 2.0666, "step": 2492 }, { "epoch": 1.83, "learning_rate": 7.009290182218166e-06, "loss": 1.8346, "step": 2493 }, { "epoch": 1.83, "learning_rate": 7.001743775358656e-06, "loss": 1.8992, "step": 2494 }, { "epoch": 1.83, "learning_rate": 6.994199244141313e-06, "loss": 2.0246, "step": 2495 }, { "epoch": 1.83, "learning_rate": 6.986656593285824e-06, "loss": 1.9626, "step": 2496 }, { "epoch": 1.83, "learning_rate": 6.979115827510703e-06, "loss": 1.9435, "step": 2497 }, { "epoch": 1.83, "learning_rate": 6.971576951533285e-06, "loss": 1.8545, "step": 2498 }, { "epoch": 1.83, "learning_rate": 6.964039970069722e-06, "loss": 1.8332, "step": 2499 }, { "epoch": 1.83, "learning_rate": 6.956504887834985e-06, "loss": 2.0006, "step": 2500 }, { "epoch": 1.83, "learning_rate": 6.948971709542849e-06, "loss": 1.9084, "step": 2501 }, { "epoch": 1.83, "learning_rate": 6.941440439905904e-06, "loss": 1.9441, "step": 2502 }, { "epoch": 1.83, "learning_rate": 6.933911083635543e-06, "loss": 2.0522, "step": 2503 }, { "epoch": 1.83, "learning_rate": 6.926383645441963e-06, "loss": 1.9646, "step": 2504 }, { "epoch": 1.84, "learning_rate": 6.918858130034167e-06, "loss": 2.0036, "step": 2505 }, { "epoch": 1.84, "learning_rate": 6.911334542119944e-06, "loss": 1.9673, "step": 2506 }, { "epoch": 1.84, "learning_rate": 6.903812886405881e-06, "loss": 1.8863, "step": 2507 }, { "epoch": 1.84, "learning_rate": 6.896293167597361e-06, "loss": 1.8382, "step": 2508 }, { "epoch": 1.84, "learning_rate": 6.88877539039855e-06, "loss": 1.9152, "step": 2509 }, { "epoch": 1.84, "learning_rate": 6.881259559512403e-06, "loss": 1.9937, "step": 2510 }, { "epoch": 1.84, "learning_rate": 6.8737456796406594e-06, "loss": 1.912, "step": 2511 }, { "epoch": 1.84, "learning_rate": 6.866233755483829e-06, "loss": 1.9297, "step": 2512 }, { "epoch": 1.84, "learning_rate": 6.858723791741205e-06, "loss": 1.8413, "step": 2513 }, { "epoch": 1.84, "learning_rate": 6.851215793110857e-06, "loss": 2.0182, "step": 2514 }, { "epoch": 1.84, "learning_rate": 6.843709764289618e-06, "loss": 1.895, "step": 2515 }, { "epoch": 1.84, "learning_rate": 6.836205709973094e-06, "loss": 1.8064, "step": 2516 }, { "epoch": 1.84, "learning_rate": 6.828703634855651e-06, "loss": 1.8805, "step": 2517 }, { "epoch": 1.84, "learning_rate": 6.821203543630421e-06, "loss": 1.8982, "step": 2518 }, { "epoch": 1.85, "learning_rate": 6.813705440989296e-06, "loss": 1.8484, "step": 2519 }, { "epoch": 1.85, "learning_rate": 6.806209331622919e-06, "loss": 1.9376, "step": 2520 }, { "epoch": 1.85, "learning_rate": 6.798715220220693e-06, "loss": 1.8098, "step": 2521 }, { "epoch": 1.85, "learning_rate": 6.79122311147076e-06, "loss": 1.9542, "step": 2522 }, { "epoch": 1.85, "learning_rate": 6.783733010060018e-06, "loss": 1.9468, "step": 2523 }, { "epoch": 1.85, "learning_rate": 6.7762449206741085e-06, "loss": 1.9319, "step": 2524 }, { "epoch": 1.85, "learning_rate": 6.768758847997414e-06, "loss": 1.9438, "step": 2525 }, { "epoch": 1.85, "learning_rate": 6.761274796713053e-06, "loss": 2.0704, "step": 2526 }, { "epoch": 1.85, "learning_rate": 6.753792771502883e-06, "loss": 1.7, "step": 2527 }, { "epoch": 1.85, "learning_rate": 6.746312777047488e-06, "loss": 2.0677, "step": 2528 }, { "epoch": 1.85, "learning_rate": 6.738834818026187e-06, "loss": 2.1531, "step": 2529 }, { "epoch": 1.85, "learning_rate": 6.731358899117028e-06, "loss": 2.1697, "step": 2530 }, { "epoch": 1.85, "learning_rate": 6.723885024996775e-06, "loss": 1.8811, "step": 2531 }, { "epoch": 1.85, "learning_rate": 6.716413200340917e-06, "loss": 2.0345, "step": 2532 }, { "epoch": 1.86, "learning_rate": 6.708943429823664e-06, "loss": 1.986, "step": 2533 }, { "epoch": 1.86, "learning_rate": 6.701475718117934e-06, "loss": 1.8257, "step": 2534 }, { "epoch": 1.86, "learning_rate": 6.694010069895363e-06, "loss": 1.8845, "step": 2535 }, { "epoch": 1.86, "learning_rate": 6.686546489826296e-06, "loss": 1.9018, "step": 2536 }, { "epoch": 1.86, "learning_rate": 6.679084982579773e-06, "loss": 1.8979, "step": 2537 }, { "epoch": 1.86, "learning_rate": 6.671625552823553e-06, "loss": 1.99, "step": 2538 }, { "epoch": 1.86, "learning_rate": 6.6641682052240904e-06, "loss": 1.8132, "step": 2539 }, { "epoch": 1.86, "learning_rate": 6.656712944446532e-06, "loss": 1.8196, "step": 2540 }, { "epoch": 1.86, "learning_rate": 6.649259775154725e-06, "loss": 1.7018, "step": 2541 }, { "epoch": 1.86, "learning_rate": 6.641808702011202e-06, "loss": 1.9748, "step": 2542 }, { "epoch": 1.86, "learning_rate": 6.634359729677191e-06, "loss": 1.8792, "step": 2543 }, { "epoch": 1.86, "learning_rate": 6.626912862812605e-06, "loss": 1.9554, "step": 2544 }, { "epoch": 1.86, "learning_rate": 6.6194681060760306e-06, "loss": 1.8558, "step": 2545 }, { "epoch": 1.87, "learning_rate": 6.612025464124747e-06, "loss": 1.98, "step": 2546 }, { "epoch": 1.87, "learning_rate": 6.604584941614706e-06, "loss": 1.9356, "step": 2547 }, { "epoch": 1.87, "learning_rate": 6.597146543200528e-06, "loss": 1.9671, "step": 2548 }, { "epoch": 1.87, "learning_rate": 6.589710273535512e-06, "loss": 1.8356, "step": 2549 }, { "epoch": 1.87, "learning_rate": 6.5822761372716195e-06, "loss": 1.8221, "step": 2550 }, { "epoch": 1.87, "learning_rate": 6.574844139059482e-06, "loss": 2.0812, "step": 2551 }, { "epoch": 1.87, "learning_rate": 6.5674142835483965e-06, "loss": 1.9424, "step": 2552 }, { "epoch": 1.87, "learning_rate": 6.559986575386307e-06, "loss": 1.8166, "step": 2553 }, { "epoch": 1.87, "learning_rate": 6.552561019219823e-06, "loss": 1.976, "step": 2554 }, { "epoch": 1.87, "learning_rate": 6.545137619694211e-06, "loss": 1.6952, "step": 2555 }, { "epoch": 1.87, "learning_rate": 6.537716381453379e-06, "loss": 1.9596, "step": 2556 }, { "epoch": 1.87, "learning_rate": 6.53029730913989e-06, "loss": 1.772, "step": 2557 }, { "epoch": 1.87, "learning_rate": 6.522880407394953e-06, "loss": 1.9874, "step": 2558 }, { "epoch": 1.87, "learning_rate": 6.515465680858412e-06, "loss": 1.9313, "step": 2559 }, { "epoch": 1.88, "learning_rate": 6.508053134168757e-06, "loss": 1.9836, "step": 2560 }, { "epoch": 1.88, "learning_rate": 6.500642771963111e-06, "loss": 1.8999, "step": 2561 }, { "epoch": 1.88, "learning_rate": 6.493234598877229e-06, "loss": 1.9676, "step": 2562 }, { "epoch": 1.88, "learning_rate": 6.485828619545502e-06, "loss": 1.8408, "step": 2563 }, { "epoch": 1.88, "learning_rate": 6.4784248386009455e-06, "loss": 1.7406, "step": 2564 }, { "epoch": 1.88, "learning_rate": 6.471023260675196e-06, "loss": 1.9101, "step": 2565 }, { "epoch": 1.88, "learning_rate": 6.463623890398518e-06, "loss": 1.965, "step": 2566 }, { "epoch": 1.88, "learning_rate": 6.456226732399796e-06, "loss": 1.9409, "step": 2567 }, { "epoch": 1.88, "learning_rate": 6.448831791306518e-06, "loss": 1.9194, "step": 2568 }, { "epoch": 1.88, "learning_rate": 6.441439071744802e-06, "loss": 1.9376, "step": 2569 }, { "epoch": 1.88, "learning_rate": 6.4340485783393624e-06, "loss": 2.0948, "step": 2570 }, { "epoch": 1.88, "learning_rate": 6.4266603157135295e-06, "loss": 1.8465, "step": 2571 }, { "epoch": 1.88, "learning_rate": 6.419274288489239e-06, "loss": 1.9124, "step": 2572 }, { "epoch": 1.88, "learning_rate": 6.411890501287019e-06, "loss": 2.0736, "step": 2573 }, { "epoch": 1.89, "learning_rate": 6.4045089587260054e-06, "loss": 1.7862, "step": 2574 }, { "epoch": 1.89, "learning_rate": 6.3971296654239235e-06, "loss": 1.9222, "step": 2575 }, { "epoch": 1.89, "learning_rate": 6.3897526259970965e-06, "loss": 1.9921, "step": 2576 }, { "epoch": 1.89, "learning_rate": 6.3823778450604376e-06, "loss": 2.0457, "step": 2577 }, { "epoch": 1.89, "learning_rate": 6.375005327227441e-06, "loss": 1.993, "step": 2578 }, { "epoch": 1.89, "learning_rate": 6.367635077110194e-06, "loss": 1.8474, "step": 2579 }, { "epoch": 1.89, "learning_rate": 6.360267099319355e-06, "loss": 2.2291, "step": 2580 }, { "epoch": 1.89, "learning_rate": 6.352901398464169e-06, "loss": 1.8838, "step": 2581 }, { "epoch": 1.89, "learning_rate": 6.345537979152456e-06, "loss": 2.0056, "step": 2582 }, { "epoch": 1.89, "learning_rate": 6.338176845990608e-06, "loss": 2.0822, "step": 2583 }, { "epoch": 1.89, "learning_rate": 6.330818003583578e-06, "loss": 1.9442, "step": 2584 }, { "epoch": 1.89, "learning_rate": 6.323461456534898e-06, "loss": 1.8625, "step": 2585 }, { "epoch": 1.89, "learning_rate": 6.316107209446662e-06, "loss": 2.0205, "step": 2586 }, { "epoch": 1.9, "learning_rate": 6.308755266919518e-06, "loss": 1.8977, "step": 2587 }, { "epoch": 1.9, "learning_rate": 6.301405633552682e-06, "loss": 2.0887, "step": 2588 }, { "epoch": 1.9, "learning_rate": 6.294058313943916e-06, "loss": 1.9408, "step": 2589 }, { "epoch": 1.9, "learning_rate": 6.286713312689543e-06, "loss": 2.0368, "step": 2590 }, { "epoch": 1.9, "learning_rate": 6.27937063438443e-06, "loss": 1.8638, "step": 2591 }, { "epoch": 1.9, "learning_rate": 6.2720302836219925e-06, "loss": 2.0367, "step": 2592 }, { "epoch": 1.9, "learning_rate": 6.264692264994191e-06, "loss": 2.0028, "step": 2593 }, { "epoch": 1.9, "learning_rate": 6.2573565830915255e-06, "loss": 1.721, "step": 2594 }, { "epoch": 1.9, "learning_rate": 6.250023242503031e-06, "loss": 1.8173, "step": 2595 }, { "epoch": 1.9, "learning_rate": 6.242692247816291e-06, "loss": 1.9534, "step": 2596 }, { "epoch": 1.9, "learning_rate": 6.235363603617404e-06, "loss": 2.0201, "step": 2597 }, { "epoch": 1.9, "learning_rate": 6.228037314491013e-06, "loss": 1.8981, "step": 2598 }, { "epoch": 1.9, "learning_rate": 6.220713385020273e-06, "loss": 1.9846, "step": 2599 }, { "epoch": 1.9, "learning_rate": 6.2133918197868756e-06, "loss": 1.8068, "step": 2600 }, { "epoch": 1.91, "learning_rate": 6.206072623371027e-06, "loss": 2.0093, "step": 2601 }, { "epoch": 1.91, "learning_rate": 6.198755800351455e-06, "loss": 2.0138, "step": 2602 }, { "epoch": 1.91, "learning_rate": 6.191441355305397e-06, "loss": 1.9389, "step": 2603 }, { "epoch": 1.91, "learning_rate": 6.18412929280861e-06, "loss": 2.0297, "step": 2604 }, { "epoch": 1.91, "learning_rate": 6.1768196174353565e-06, "loss": 2.0404, "step": 2605 }, { "epoch": 1.91, "learning_rate": 6.169512333758405e-06, "loss": 2.0163, "step": 2606 }, { "epoch": 1.91, "learning_rate": 6.162207446349031e-06, "loss": 2.018, "step": 2607 }, { "epoch": 1.91, "learning_rate": 6.154904959777007e-06, "loss": 1.9028, "step": 2608 }, { "epoch": 1.91, "learning_rate": 6.147604878610606e-06, "loss": 1.8916, "step": 2609 }, { "epoch": 1.91, "learning_rate": 6.140307207416598e-06, "loss": 2.0167, "step": 2610 }, { "epoch": 1.91, "learning_rate": 6.133011950760239e-06, "loss": 1.8719, "step": 2611 }, { "epoch": 1.91, "learning_rate": 6.125719113205284e-06, "loss": 1.8648, "step": 2612 }, { "epoch": 1.91, "learning_rate": 6.118428699313965e-06, "loss": 1.9272, "step": 2613 }, { "epoch": 1.92, "learning_rate": 6.111140713647006e-06, "loss": 1.8593, "step": 2614 }, { "epoch": 1.92, "learning_rate": 6.103855160763603e-06, "loss": 2.0759, "step": 2615 }, { "epoch": 1.92, "learning_rate": 6.09657204522144e-06, "loss": 1.8346, "step": 2616 }, { "epoch": 1.92, "learning_rate": 6.089291371576668e-06, "loss": 1.9523, "step": 2617 }, { "epoch": 1.92, "learning_rate": 6.082013144383915e-06, "loss": 2.0419, "step": 2618 }, { "epoch": 1.92, "learning_rate": 6.074737368196279e-06, "loss": 1.962, "step": 2619 }, { "epoch": 1.92, "learning_rate": 6.067464047565322e-06, "loss": 2.0088, "step": 2620 }, { "epoch": 1.92, "learning_rate": 6.060193187041073e-06, "loss": 1.9056, "step": 2621 }, { "epoch": 1.92, "learning_rate": 6.0529247911720145e-06, "loss": 2.0207, "step": 2622 }, { "epoch": 1.92, "learning_rate": 6.045658864505097e-06, "loss": 1.9661, "step": 2623 }, { "epoch": 1.92, "learning_rate": 6.038395411585725e-06, "loss": 1.9479, "step": 2624 }, { "epoch": 1.92, "learning_rate": 6.031134436957747e-06, "loss": 1.8967, "step": 2625 }, { "epoch": 1.92, "learning_rate": 6.023875945163468e-06, "loss": 1.8649, "step": 2626 }, { "epoch": 1.92, "learning_rate": 6.016619940743639e-06, "loss": 1.8876, "step": 2627 }, { "epoch": 1.93, "learning_rate": 6.009366428237453e-06, "loss": 1.9102, "step": 2628 }, { "epoch": 1.93, "learning_rate": 6.002115412182552e-06, "loss": 2.0944, "step": 2629 }, { "epoch": 1.93, "learning_rate": 5.994866897115004e-06, "loss": 1.7032, "step": 2630 }, { "epoch": 1.93, "learning_rate": 5.987620887569314e-06, "loss": 1.845, "step": 2631 }, { "epoch": 1.93, "learning_rate": 5.980377388078431e-06, "loss": 1.8453, "step": 2632 }, { "epoch": 1.93, "learning_rate": 5.973136403173725e-06, "loss": 1.7782, "step": 2633 }, { "epoch": 1.93, "learning_rate": 5.965897937384992e-06, "loss": 2.0298, "step": 2634 }, { "epoch": 1.93, "learning_rate": 5.958661995240459e-06, "loss": 2.0466, "step": 2635 }, { "epoch": 1.93, "learning_rate": 5.951428581266765e-06, "loss": 1.9249, "step": 2636 }, { "epoch": 1.93, "learning_rate": 5.944197699988975e-06, "loss": 1.7704, "step": 2637 }, { "epoch": 1.93, "learning_rate": 5.936969355930568e-06, "loss": 1.9077, "step": 2638 }, { "epoch": 1.93, "learning_rate": 5.9297435536134315e-06, "loss": 1.8728, "step": 2639 }, { "epoch": 1.93, "learning_rate": 5.92252029755787e-06, "loss": 1.87, "step": 2640 }, { "epoch": 1.93, "learning_rate": 5.9152995922825864e-06, "loss": 1.6842, "step": 2641 }, { "epoch": 1.94, "learning_rate": 5.908081442304696e-06, "loss": 2.0912, "step": 2642 }, { "epoch": 1.94, "learning_rate": 5.900865852139715e-06, "loss": 1.9839, "step": 2643 }, { "epoch": 1.94, "learning_rate": 5.893652826301548e-06, "loss": 2.0752, "step": 2644 }, { "epoch": 1.94, "learning_rate": 5.886442369302512e-06, "loss": 1.8337, "step": 2645 }, { "epoch": 1.94, "learning_rate": 5.879234485653302e-06, "loss": 1.9167, "step": 2646 }, { "epoch": 1.94, "learning_rate": 5.872029179863008e-06, "loss": 1.904, "step": 2647 }, { "epoch": 1.94, "learning_rate": 5.86482645643911e-06, "loss": 2.0123, "step": 2648 }, { "epoch": 1.94, "learning_rate": 5.857626319887475e-06, "loss": 1.7904, "step": 2649 }, { "epoch": 1.94, "learning_rate": 5.85042877471234e-06, "loss": 1.8928, "step": 2650 }, { "epoch": 1.94, "learning_rate": 5.843233825416335e-06, "loss": 1.9301, "step": 2651 }, { "epoch": 1.94, "learning_rate": 5.836041476500458e-06, "loss": 1.8794, "step": 2652 }, { "epoch": 1.94, "learning_rate": 5.828851732464085e-06, "loss": 1.7514, "step": 2653 }, { "epoch": 1.94, "learning_rate": 5.821664597804951e-06, "loss": 1.7675, "step": 2654 }, { "epoch": 1.95, "learning_rate": 5.814480077019173e-06, "loss": 1.9201, "step": 2655 }, { "epoch": 1.95, "learning_rate": 5.8072981746012255e-06, "loss": 1.9643, "step": 2656 }, { "epoch": 1.95, "learning_rate": 5.800118895043947e-06, "loss": 1.9739, "step": 2657 }, { "epoch": 1.95, "learning_rate": 5.792942242838537e-06, "loss": 1.8166, "step": 2658 }, { "epoch": 1.95, "learning_rate": 5.785768222474544e-06, "loss": 2.0354, "step": 2659 }, { "epoch": 1.95, "learning_rate": 5.77859683843988e-06, "loss": 1.9715, "step": 2660 }, { "epoch": 1.95, "learning_rate": 5.7714280952207955e-06, "loss": 1.9435, "step": 2661 }, { "epoch": 1.95, "learning_rate": 5.764261997301901e-06, "loss": 1.9809, "step": 2662 }, { "epoch": 1.95, "learning_rate": 5.757098549166147e-06, "loss": 1.8413, "step": 2663 }, { "epoch": 1.95, "learning_rate": 5.749937755294831e-06, "loss": 2.0487, "step": 2664 }, { "epoch": 1.95, "learning_rate": 5.7427796201675775e-06, "loss": 1.7961, "step": 2665 }, { "epoch": 1.95, "learning_rate": 5.73562414826236e-06, "loss": 1.944, "step": 2666 }, { "epoch": 1.95, "learning_rate": 5.728471344055482e-06, "loss": 1.905, "step": 2667 }, { "epoch": 1.95, "learning_rate": 5.721321212021577e-06, "loss": 2.1058, "step": 2668 }, { "epoch": 1.96, "learning_rate": 5.714173756633614e-06, "loss": 1.8395, "step": 2669 }, { "epoch": 1.96, "learning_rate": 5.707028982362873e-06, "loss": 2.124, "step": 2670 }, { "epoch": 1.96, "learning_rate": 5.699886893678969e-06, "loss": 1.9064, "step": 2671 }, { "epoch": 1.96, "learning_rate": 5.692747495049833e-06, "loss": 1.9475, "step": 2672 }, { "epoch": 1.96, "learning_rate": 5.685610790941713e-06, "loss": 1.8889, "step": 2673 }, { "epoch": 1.96, "learning_rate": 5.678476785819178e-06, "loss": 1.8909, "step": 2674 }, { "epoch": 1.96, "learning_rate": 5.671345484145092e-06, "loss": 2.0267, "step": 2675 }, { "epoch": 1.96, "learning_rate": 5.664216890380647e-06, "loss": 1.8997, "step": 2676 }, { "epoch": 1.96, "learning_rate": 5.657091008985324e-06, "loss": 2.0375, "step": 2677 }, { "epoch": 1.96, "learning_rate": 5.649967844416919e-06, "loss": 1.8598, "step": 2678 }, { "epoch": 1.96, "learning_rate": 5.642847401131526e-06, "loss": 1.95, "step": 2679 }, { "epoch": 1.96, "learning_rate": 5.635729683583533e-06, "loss": 2.0295, "step": 2680 }, { "epoch": 1.96, "learning_rate": 5.6286146962256315e-06, "loss": 1.8579, "step": 2681 }, { "epoch": 1.96, "learning_rate": 5.621502443508791e-06, "loss": 1.9461, "step": 2682 }, { "epoch": 1.97, "learning_rate": 5.61439292988228e-06, "loss": 1.8395, "step": 2683 }, { "epoch": 1.97, "learning_rate": 5.607286159793652e-06, "loss": 1.8553, "step": 2684 }, { "epoch": 1.97, "learning_rate": 5.6001821376887454e-06, "loss": 1.8446, "step": 2685 }, { "epoch": 1.97, "learning_rate": 5.59308086801168e-06, "loss": 2.0834, "step": 2686 }, { "epoch": 1.97, "learning_rate": 5.585982355204844e-06, "loss": 2.1339, "step": 2687 }, { "epoch": 1.97, "learning_rate": 5.578886603708914e-06, "loss": 2.0683, "step": 2688 }, { "epoch": 1.97, "learning_rate": 5.5717936179628314e-06, "loss": 2.0613, "step": 2689 }, { "epoch": 1.97, "learning_rate": 5.5647034024038125e-06, "loss": 1.9425, "step": 2690 }, { "epoch": 1.97, "learning_rate": 5.557615961467338e-06, "loss": 1.8332, "step": 2691 }, { "epoch": 1.97, "learning_rate": 5.550531299587153e-06, "loss": 2.0134, "step": 2692 }, { "epoch": 1.97, "learning_rate": 5.543449421195257e-06, "loss": 1.8629, "step": 2693 }, { "epoch": 1.97, "learning_rate": 5.536370330721921e-06, "loss": 1.8604, "step": 2694 }, { "epoch": 1.97, "learning_rate": 5.5292940325956625e-06, "loss": 2.0295, "step": 2695 }, { "epoch": 1.98, "learning_rate": 5.5222205312432605e-06, "loss": 1.8703, "step": 2696 }, { "epoch": 1.98, "learning_rate": 5.515149831089739e-06, "loss": 1.9645, "step": 2697 }, { "epoch": 1.98, "learning_rate": 5.508081936558366e-06, "loss": 1.9738, "step": 2698 }, { "epoch": 1.98, "learning_rate": 5.5010168520706596e-06, "loss": 1.9702, "step": 2699 }, { "epoch": 1.98, "learning_rate": 5.493954582046381e-06, "loss": 1.8369, "step": 2700 }, { "epoch": 1.98, "learning_rate": 5.4868951309035265e-06, "loss": 1.9803, "step": 2701 }, { "epoch": 1.98, "learning_rate": 5.479838503058338e-06, "loss": 1.9841, "step": 2702 }, { "epoch": 1.98, "learning_rate": 5.4727847029252735e-06, "loss": 1.9044, "step": 2703 }, { "epoch": 1.98, "learning_rate": 5.465733734917039e-06, "loss": 1.8034, "step": 2704 }, { "epoch": 1.98, "learning_rate": 5.458685603444562e-06, "loss": 1.9754, "step": 2705 }, { "epoch": 1.98, "learning_rate": 5.451640312916995e-06, "loss": 1.9127, "step": 2706 }, { "epoch": 1.98, "learning_rate": 5.444597867741721e-06, "loss": 1.9579, "step": 2707 }, { "epoch": 1.98, "learning_rate": 5.437558272324326e-06, "loss": 1.9732, "step": 2708 }, { "epoch": 1.98, "learning_rate": 5.430521531068634e-06, "loss": 2.0431, "step": 2709 }, { "epoch": 1.99, "learning_rate": 5.423487648376663e-06, "loss": 1.9692, "step": 2710 }, { "epoch": 1.99, "learning_rate": 5.416456628648659e-06, "loss": 2.2089, "step": 2711 }, { "epoch": 1.99, "learning_rate": 5.409428476283068e-06, "loss": 1.931, "step": 2712 }, { "epoch": 1.99, "learning_rate": 5.402403195676549e-06, "loss": 1.8944, "step": 2713 }, { "epoch": 1.99, "learning_rate": 5.3953807912239635e-06, "loss": 1.9839, "step": 2714 }, { "epoch": 1.99, "learning_rate": 5.388361267318362e-06, "loss": 1.8873, "step": 2715 }, { "epoch": 1.99, "learning_rate": 5.381344628351006e-06, "loss": 1.8285, "step": 2716 }, { "epoch": 1.99, "learning_rate": 5.374330878711349e-06, "loss": 1.8286, "step": 2717 }, { "epoch": 1.99, "learning_rate": 5.3673200227870345e-06, "loss": 1.9026, "step": 2718 }, { "epoch": 1.99, "learning_rate": 5.360312064963904e-06, "loss": 1.8152, "step": 2719 }, { "epoch": 1.99, "learning_rate": 5.353307009625971e-06, "loss": 1.8173, "step": 2720 }, { "epoch": 1.99, "learning_rate": 5.346304861155445e-06, "loss": 2.0129, "step": 2721 }, { "epoch": 1.99, "learning_rate": 5.339305623932716e-06, "loss": 1.9534, "step": 2722 }, { "epoch": 1.99, "learning_rate": 5.332309302336346e-06, "loss": 1.9361, "step": 2723 }, { "epoch": 2.0, "learning_rate": 5.32531590074308e-06, "loss": 2.1157, "step": 2724 }, { "epoch": 2.0, "learning_rate": 5.318325423527837e-06, "loss": 1.8209, "step": 2725 }, { "epoch": 2.0, "learning_rate": 5.311337875063697e-06, "loss": 1.9333, "step": 2726 }, { "epoch": 2.0, "learning_rate": 5.304353259721917e-06, "loss": 1.9302, "step": 2727 }, { "epoch": 2.0, "learning_rate": 5.297371581871918e-06, "loss": 1.9259, "step": 2728 }, { "epoch": 2.0, "learning_rate": 5.290392845881281e-06, "loss": 2.072, "step": 2729 }, { "epoch": 2.0, "learning_rate": 5.2834170561157514e-06, "loss": 1.8643, "step": 2730 }, { "epoch": 2.0, "learning_rate": 5.276444216939219e-06, "loss": 1.6796, "step": 2731 }, { "epoch": 2.0, "learning_rate": 5.269474332713742e-06, "loss": 1.5868, "step": 2732 }, { "epoch": 2.0, "learning_rate": 5.262507407799522e-06, "loss": 1.6698, "step": 2733 }, { "epoch": 2.0, "learning_rate": 5.2555434465549135e-06, "loss": 1.5823, "step": 2734 }, { "epoch": 2.0, "learning_rate": 5.248582453336419e-06, "loss": 1.5859, "step": 2735 }, { "epoch": 2.0, "learning_rate": 5.241624432498673e-06, "loss": 1.6478, "step": 2736 }, { "epoch": 2.01, "learning_rate": 5.23466938839446e-06, "loss": 1.3928, "step": 2737 }, { "epoch": 2.01, "learning_rate": 5.227717325374706e-06, "loss": 1.5575, "step": 2738 }, { "epoch": 2.01, "learning_rate": 5.220768247788458e-06, "loss": 1.391, "step": 2739 }, { "epoch": 2.01, "learning_rate": 5.2138221599829084e-06, "loss": 1.4858, "step": 2740 }, { "epoch": 2.01, "learning_rate": 5.206879066303376e-06, "loss": 1.4969, "step": 2741 }, { "epoch": 2.01, "learning_rate": 5.1999389710933015e-06, "loss": 1.6263, "step": 2742 }, { "epoch": 2.01, "learning_rate": 5.193001878694255e-06, "loss": 1.4258, "step": 2743 }, { "epoch": 2.01, "learning_rate": 5.186067793445926e-06, "loss": 1.3421, "step": 2744 }, { "epoch": 2.01, "learning_rate": 5.179136719686124e-06, "loss": 1.4294, "step": 2745 }, { "epoch": 2.01, "learning_rate": 5.172208661750776e-06, "loss": 1.6069, "step": 2746 }, { "epoch": 2.01, "learning_rate": 5.165283623973923e-06, "loss": 1.4161, "step": 2747 }, { "epoch": 2.01, "learning_rate": 5.158361610687704e-06, "loss": 1.317, "step": 2748 }, { "epoch": 2.01, "learning_rate": 5.151442626222382e-06, "loss": 1.4937, "step": 2749 }, { "epoch": 2.01, "learning_rate": 5.144526674906321e-06, "loss": 1.4021, "step": 2750 }, { "epoch": 2.02, "learning_rate": 5.137613761065983e-06, "loss": 1.4844, "step": 2751 }, { "epoch": 2.02, "learning_rate": 5.130703889025936e-06, "loss": 1.5272, "step": 2752 }, { "epoch": 2.02, "learning_rate": 5.123797063108842e-06, "loss": 1.5956, "step": 2753 }, { "epoch": 2.02, "learning_rate": 5.116893287635448e-06, "loss": 1.5819, "step": 2754 }, { "epoch": 2.02, "learning_rate": 5.109992566924609e-06, "loss": 1.5294, "step": 2755 }, { "epoch": 2.02, "learning_rate": 5.10309490529326e-06, "loss": 1.347, "step": 2756 }, { "epoch": 2.02, "learning_rate": 5.096200307056426e-06, "loss": 1.5037, "step": 2757 }, { "epoch": 2.02, "learning_rate": 5.089308776527216e-06, "loss": 1.5012, "step": 2758 }, { "epoch": 2.02, "learning_rate": 5.082420318016812e-06, "loss": 1.4531, "step": 2759 }, { "epoch": 2.02, "learning_rate": 5.075534935834481e-06, "loss": 1.4419, "step": 2760 }, { "epoch": 2.02, "learning_rate": 5.068652634287565e-06, "loss": 1.5382, "step": 2761 }, { "epoch": 2.02, "learning_rate": 5.061773417681481e-06, "loss": 1.3771, "step": 2762 }, { "epoch": 2.02, "learning_rate": 5.054897290319713e-06, "loss": 1.339, "step": 2763 }, { "epoch": 2.02, "learning_rate": 5.04802425650381e-06, "loss": 1.4389, "step": 2764 }, { "epoch": 2.03, "learning_rate": 5.041154320533387e-06, "loss": 1.4504, "step": 2765 }, { "epoch": 2.03, "learning_rate": 5.034287486706126e-06, "loss": 1.4, "step": 2766 }, { "epoch": 2.03, "learning_rate": 5.027423759317764e-06, "loss": 1.3624, "step": 2767 }, { "epoch": 2.03, "learning_rate": 5.0205631426621004e-06, "loss": 1.6452, "step": 2768 }, { "epoch": 2.03, "learning_rate": 5.013705641030978e-06, "loss": 1.4434, "step": 2769 }, { "epoch": 2.03, "learning_rate": 5.006851258714295e-06, "loss": 1.3969, "step": 2770 }, { "epoch": 2.03, "learning_rate": 5.000000000000003e-06, "loss": 1.3858, "step": 2771 }, { "epoch": 2.03, "learning_rate": 4.9931518691740954e-06, "loss": 1.4051, "step": 2772 }, { "epoch": 2.03, "learning_rate": 4.986306870520612e-06, "loss": 1.4525, "step": 2773 }, { "epoch": 2.03, "learning_rate": 4.979465008321633e-06, "loss": 1.5104, "step": 2774 }, { "epoch": 2.03, "learning_rate": 4.972626286857268e-06, "loss": 1.5013, "step": 2775 }, { "epoch": 2.03, "learning_rate": 4.965790710405673e-06, "loss": 1.4496, "step": 2776 }, { "epoch": 2.03, "learning_rate": 4.958958283243031e-06, "loss": 1.4835, "step": 2777 }, { "epoch": 2.04, "learning_rate": 4.952129009643557e-06, "loss": 1.4421, "step": 2778 }, { "epoch": 2.04, "learning_rate": 4.945302893879491e-06, "loss": 1.5278, "step": 2779 }, { "epoch": 2.04, "learning_rate": 4.938479940221103e-06, "loss": 1.5338, "step": 2780 }, { "epoch": 2.04, "learning_rate": 4.931660152936673e-06, "loss": 1.4015, "step": 2781 }, { "epoch": 2.04, "learning_rate": 4.9248435362925095e-06, "loss": 1.3663, "step": 2782 }, { "epoch": 2.04, "learning_rate": 4.918030094552939e-06, "loss": 1.4822, "step": 2783 }, { "epoch": 2.04, "learning_rate": 4.911219831980299e-06, "loss": 1.5146, "step": 2784 }, { "epoch": 2.04, "learning_rate": 4.904412752834934e-06, "loss": 1.4829, "step": 2785 }, { "epoch": 2.04, "learning_rate": 4.897608861375206e-06, "loss": 1.3409, "step": 2786 }, { "epoch": 2.04, "learning_rate": 4.8908081618574685e-06, "loss": 1.4136, "step": 2787 }, { "epoch": 2.04, "learning_rate": 4.8840106585360944e-06, "loss": 1.3387, "step": 2788 }, { "epoch": 2.04, "learning_rate": 4.8772163556634485e-06, "loss": 1.4491, "step": 2789 }, { "epoch": 2.04, "learning_rate": 4.870425257489895e-06, "loss": 1.5658, "step": 2790 }, { "epoch": 2.04, "learning_rate": 4.863637368263796e-06, "loss": 1.4593, "step": 2791 }, { "epoch": 2.05, "learning_rate": 4.856852692231498e-06, "loss": 1.4642, "step": 2792 }, { "epoch": 2.05, "learning_rate": 4.850071233637345e-06, "loss": 1.4495, "step": 2793 }, { "epoch": 2.05, "learning_rate": 4.843292996723669e-06, "loss": 1.322, "step": 2794 }, { "epoch": 2.05, "learning_rate": 4.836517985730779e-06, "loss": 1.4871, "step": 2795 }, { "epoch": 2.05, "learning_rate": 4.829746204896978e-06, "loss": 1.5241, "step": 2796 }, { "epoch": 2.05, "learning_rate": 4.822977658458532e-06, "loss": 1.324, "step": 2797 }, { "epoch": 2.05, "learning_rate": 4.816212350649696e-06, "loss": 1.4949, "step": 2798 }, { "epoch": 2.05, "learning_rate": 4.809450285702697e-06, "loss": 1.4088, "step": 2799 }, { "epoch": 2.05, "learning_rate": 4.802691467847735e-06, "loss": 1.4962, "step": 2800 }, { "epoch": 2.05, "learning_rate": 4.795935901312968e-06, "loss": 1.4404, "step": 2801 }, { "epoch": 2.05, "learning_rate": 4.7891835903245345e-06, "loss": 1.5537, "step": 2802 }, { "epoch": 2.05, "learning_rate": 4.782434539106522e-06, "loss": 1.4311, "step": 2803 }, { "epoch": 2.05, "learning_rate": 4.775688751880992e-06, "loss": 1.3634, "step": 2804 }, { "epoch": 2.05, "learning_rate": 4.768946232867956e-06, "loss": 1.3982, "step": 2805 }, { "epoch": 2.06, "learning_rate": 4.762206986285384e-06, "loss": 1.4922, "step": 2806 }, { "epoch": 2.06, "learning_rate": 4.755471016349203e-06, "loss": 1.3329, "step": 2807 }, { "epoch": 2.06, "learning_rate": 4.748738327273277e-06, "loss": 1.5245, "step": 2808 }, { "epoch": 2.06, "learning_rate": 4.74200892326943e-06, "loss": 1.3428, "step": 2809 }, { "epoch": 2.06, "learning_rate": 4.735282808547427e-06, "loss": 1.5304, "step": 2810 }, { "epoch": 2.06, "learning_rate": 4.728559987314975e-06, "loss": 1.5592, "step": 2811 }, { "epoch": 2.06, "learning_rate": 4.721840463777721e-06, "loss": 1.5607, "step": 2812 }, { "epoch": 2.06, "learning_rate": 4.715124242139253e-06, "loss": 1.416, "step": 2813 }, { "epoch": 2.06, "learning_rate": 4.708411326601081e-06, "loss": 1.5386, "step": 2814 }, { "epoch": 2.06, "learning_rate": 4.701701721362664e-06, "loss": 1.499, "step": 2815 }, { "epoch": 2.06, "learning_rate": 4.694995430621372e-06, "loss": 1.4923, "step": 2816 }, { "epoch": 2.06, "learning_rate": 4.6882924585725155e-06, "loss": 1.5808, "step": 2817 }, { "epoch": 2.06, "learning_rate": 4.681592809409324e-06, "loss": 1.4905, "step": 2818 }, { "epoch": 2.07, "learning_rate": 4.674896487322953e-06, "loss": 1.4361, "step": 2819 }, { "epoch": 2.07, "learning_rate": 4.668203496502464e-06, "loss": 1.3649, "step": 2820 }, { "epoch": 2.07, "learning_rate": 4.661513841134846e-06, "loss": 1.4361, "step": 2821 }, { "epoch": 2.07, "learning_rate": 4.654827525404998e-06, "loss": 1.3581, "step": 2822 }, { "epoch": 2.07, "learning_rate": 4.648144553495732e-06, "loss": 1.3742, "step": 2823 }, { "epoch": 2.07, "learning_rate": 4.641464929587766e-06, "loss": 1.5172, "step": 2824 }, { "epoch": 2.07, "learning_rate": 4.634788657859719e-06, "loss": 1.4427, "step": 2825 }, { "epoch": 2.07, "learning_rate": 4.628115742488119e-06, "loss": 1.5462, "step": 2826 }, { "epoch": 2.07, "learning_rate": 4.621446187647397e-06, "loss": 1.221, "step": 2827 }, { "epoch": 2.07, "learning_rate": 4.614779997509874e-06, "loss": 1.438, "step": 2828 }, { "epoch": 2.07, "learning_rate": 4.608117176245773e-06, "loss": 1.3826, "step": 2829 }, { "epoch": 2.07, "learning_rate": 4.601457728023202e-06, "loss": 1.4774, "step": 2830 }, { "epoch": 2.07, "learning_rate": 4.594801657008169e-06, "loss": 1.3127, "step": 2831 }, { "epoch": 2.07, "learning_rate": 4.588148967364555e-06, "loss": 1.4634, "step": 2832 }, { "epoch": 2.08, "learning_rate": 4.581499663254139e-06, "loss": 1.46, "step": 2833 }, { "epoch": 2.08, "learning_rate": 4.574853748836577e-06, "loss": 1.4667, "step": 2834 }, { "epoch": 2.08, "learning_rate": 4.568211228269408e-06, "loss": 1.3803, "step": 2835 }, { "epoch": 2.08, "learning_rate": 4.561572105708038e-06, "loss": 1.4859, "step": 2836 }, { "epoch": 2.08, "learning_rate": 4.554936385305757e-06, "loss": 1.5286, "step": 2837 }, { "epoch": 2.08, "learning_rate": 4.548304071213723e-06, "loss": 1.4016, "step": 2838 }, { "epoch": 2.08, "learning_rate": 4.541675167580964e-06, "loss": 1.5032, "step": 2839 }, { "epoch": 2.08, "learning_rate": 4.5350496785543795e-06, "loss": 1.4688, "step": 2840 }, { "epoch": 2.08, "learning_rate": 4.528427608278718e-06, "loss": 1.4781, "step": 2841 }, { "epoch": 2.08, "learning_rate": 4.521808960896605e-06, "loss": 1.5066, "step": 2842 }, { "epoch": 2.08, "learning_rate": 4.515193740548515e-06, "loss": 1.4222, "step": 2843 }, { "epoch": 2.08, "learning_rate": 4.508581951372783e-06, "loss": 1.3882, "step": 2844 }, { "epoch": 2.08, "learning_rate": 4.501973597505603e-06, "loss": 1.3917, "step": 2845 }, { "epoch": 2.08, "learning_rate": 4.495368683081005e-06, "loss": 1.4505, "step": 2846 }, { "epoch": 2.09, "learning_rate": 4.488767212230884e-06, "loss": 1.469, "step": 2847 }, { "epoch": 2.09, "learning_rate": 4.4821691890849625e-06, "loss": 1.5225, "step": 2848 }, { "epoch": 2.09, "learning_rate": 4.4755746177708225e-06, "loss": 1.4039, "step": 2849 }, { "epoch": 2.09, "learning_rate": 4.468983502413882e-06, "loss": 1.3781, "step": 2850 }, { "epoch": 2.09, "learning_rate": 4.462395847137392e-06, "loss": 1.4826, "step": 2851 }, { "epoch": 2.09, "learning_rate": 4.455811656062449e-06, "loss": 1.3966, "step": 2852 }, { "epoch": 2.09, "learning_rate": 4.4492309333079685e-06, "loss": 1.3697, "step": 2853 }, { "epoch": 2.09, "learning_rate": 4.442653682990706e-06, "loss": 1.347, "step": 2854 }, { "epoch": 2.09, "learning_rate": 4.436079909225243e-06, "loss": 1.2763, "step": 2855 }, { "epoch": 2.09, "learning_rate": 4.429509616123987e-06, "loss": 1.4211, "step": 2856 }, { "epoch": 2.09, "learning_rate": 4.4229428077971685e-06, "loss": 1.4595, "step": 2857 }, { "epoch": 2.09, "learning_rate": 4.416379488352829e-06, "loss": 1.3735, "step": 2858 }, { "epoch": 2.09, "learning_rate": 4.409819661896839e-06, "loss": 1.4696, "step": 2859 }, { "epoch": 2.1, "learning_rate": 4.403263332532881e-06, "loss": 1.3135, "step": 2860 }, { "epoch": 2.1, "learning_rate": 4.396710504362445e-06, "loss": 1.6113, "step": 2861 }, { "epoch": 2.1, "learning_rate": 4.3901611814848396e-06, "loss": 1.4026, "step": 2862 }, { "epoch": 2.1, "learning_rate": 4.3836153679971714e-06, "loss": 1.3635, "step": 2863 }, { "epoch": 2.1, "learning_rate": 4.377073067994352e-06, "loss": 1.4018, "step": 2864 }, { "epoch": 2.1, "learning_rate": 4.3705342855691e-06, "loss": 1.3714, "step": 2865 }, { "epoch": 2.1, "learning_rate": 4.3639990248119335e-06, "loss": 1.4018, "step": 2866 }, { "epoch": 2.1, "learning_rate": 4.357467289811165e-06, "loss": 1.4338, "step": 2867 }, { "epoch": 2.1, "learning_rate": 4.350939084652906e-06, "loss": 1.587, "step": 2868 }, { "epoch": 2.1, "learning_rate": 4.344414413421047e-06, "loss": 1.5651, "step": 2869 }, { "epoch": 2.1, "learning_rate": 4.337893280197284e-06, "loss": 1.5649, "step": 2870 }, { "epoch": 2.1, "learning_rate": 4.331375689061089e-06, "loss": 1.5662, "step": 2871 }, { "epoch": 2.1, "learning_rate": 4.324861644089723e-06, "loss": 1.5068, "step": 2872 }, { "epoch": 2.1, "learning_rate": 4.318351149358231e-06, "loss": 1.2324, "step": 2873 }, { "epoch": 2.11, "learning_rate": 4.311844208939424e-06, "loss": 1.4472, "step": 2874 }, { "epoch": 2.11, "learning_rate": 4.305340826903904e-06, "loss": 1.42, "step": 2875 }, { "epoch": 2.11, "learning_rate": 4.298841007320042e-06, "loss": 1.5312, "step": 2876 }, { "epoch": 2.11, "learning_rate": 4.2923447542539785e-06, "loss": 1.3997, "step": 2877 }, { "epoch": 2.11, "learning_rate": 4.28585207176963e-06, "loss": 1.4412, "step": 2878 }, { "epoch": 2.11, "learning_rate": 4.2793629639286645e-06, "loss": 1.4521, "step": 2879 }, { "epoch": 2.11, "learning_rate": 4.272877434790531e-06, "loss": 1.37, "step": 2880 }, { "epoch": 2.11, "learning_rate": 4.266395488412425e-06, "loss": 1.4831, "step": 2881 }, { "epoch": 2.11, "learning_rate": 4.2599171288493106e-06, "loss": 1.4118, "step": 2882 }, { "epoch": 2.11, "learning_rate": 4.253442360153905e-06, "loss": 1.3566, "step": 2883 }, { "epoch": 2.11, "learning_rate": 4.246971186376679e-06, "loss": 1.5171, "step": 2884 }, { "epoch": 2.11, "learning_rate": 4.240503611565859e-06, "loss": 1.4484, "step": 2885 }, { "epoch": 2.11, "learning_rate": 4.234039639767406e-06, "loss": 1.3874, "step": 2886 }, { "epoch": 2.12, "learning_rate": 4.22757927502504e-06, "loss": 1.4007, "step": 2887 }, { "epoch": 2.12, "learning_rate": 4.221122521380225e-06, "loss": 1.4288, "step": 2888 }, { "epoch": 2.12, "learning_rate": 4.214669382872157e-06, "loss": 1.456, "step": 2889 }, { "epoch": 2.12, "learning_rate": 4.2082198635377824e-06, "loss": 1.4497, "step": 2890 }, { "epoch": 2.12, "learning_rate": 4.201773967411768e-06, "loss": 1.3964, "step": 2891 }, { "epoch": 2.12, "learning_rate": 4.195331698526526e-06, "loss": 1.4486, "step": 2892 }, { "epoch": 2.12, "learning_rate": 4.1888930609122e-06, "loss": 1.3776, "step": 2893 }, { "epoch": 2.12, "learning_rate": 4.182458058596651e-06, "loss": 1.6099, "step": 2894 }, { "epoch": 2.12, "learning_rate": 4.176026695605476e-06, "loss": 1.3681, "step": 2895 }, { "epoch": 2.12, "learning_rate": 4.169598975961996e-06, "loss": 1.4946, "step": 2896 }, { "epoch": 2.12, "learning_rate": 4.1631749036872405e-06, "loss": 1.4434, "step": 2897 }, { "epoch": 2.12, "learning_rate": 4.1567544827999705e-06, "loss": 1.4204, "step": 2898 }, { "epoch": 2.12, "learning_rate": 4.150337717316658e-06, "loss": 1.3883, "step": 2899 }, { "epoch": 2.12, "learning_rate": 4.143924611251489e-06, "loss": 1.4487, "step": 2900 }, { "epoch": 2.13, "learning_rate": 4.13751516861636e-06, "loss": 1.4599, "step": 2901 }, { "epoch": 2.13, "learning_rate": 4.131109393420873e-06, "loss": 1.3922, "step": 2902 }, { "epoch": 2.13, "learning_rate": 4.124707289672336e-06, "loss": 1.5499, "step": 2903 }, { "epoch": 2.13, "learning_rate": 4.118308861375766e-06, "loss": 1.5548, "step": 2904 }, { "epoch": 2.13, "learning_rate": 4.111914112533877e-06, "loss": 1.4657, "step": 2905 }, { "epoch": 2.13, "learning_rate": 4.1055230471470815e-06, "loss": 1.4989, "step": 2906 }, { "epoch": 2.13, "learning_rate": 4.099135669213483e-06, "loss": 1.5535, "step": 2907 }, { "epoch": 2.13, "learning_rate": 4.092751982728887e-06, "loss": 1.3538, "step": 2908 }, { "epoch": 2.13, "learning_rate": 4.086371991686785e-06, "loss": 1.469, "step": 2909 }, { "epoch": 2.13, "learning_rate": 4.079995700078352e-06, "loss": 1.377, "step": 2910 }, { "epoch": 2.13, "learning_rate": 4.073623111892458e-06, "loss": 1.4008, "step": 2911 }, { "epoch": 2.13, "learning_rate": 4.067254231115655e-06, "loss": 1.3639, "step": 2912 }, { "epoch": 2.13, "learning_rate": 4.060889061732165e-06, "loss": 1.6068, "step": 2913 }, { "epoch": 2.13, "learning_rate": 4.0545276077238995e-06, "loss": 1.4065, "step": 2914 }, { "epoch": 2.14, "learning_rate": 4.0481698730704426e-06, "loss": 1.5202, "step": 2915 }, { "epoch": 2.14, "learning_rate": 4.04181586174905e-06, "loss": 1.3377, "step": 2916 }, { "epoch": 2.14, "learning_rate": 4.035465577734652e-06, "loss": 1.4739, "step": 2917 }, { "epoch": 2.14, "learning_rate": 4.029119024999845e-06, "loss": 1.334, "step": 2918 }, { "epoch": 2.14, "learning_rate": 4.022776207514885e-06, "loss": 1.4442, "step": 2919 }, { "epoch": 2.14, "learning_rate": 4.016437129247701e-06, "loss": 1.3185, "step": 2920 }, { "epoch": 2.14, "learning_rate": 4.0101017941638775e-06, "loss": 1.5016, "step": 2921 }, { "epoch": 2.14, "learning_rate": 4.00377020622666e-06, "loss": 1.4425, "step": 2922 }, { "epoch": 2.14, "learning_rate": 3.9974423693969526e-06, "loss": 1.5263, "step": 2923 }, { "epoch": 2.14, "learning_rate": 3.991118287633303e-06, "loss": 1.5865, "step": 2924 }, { "epoch": 2.14, "learning_rate": 3.984797964891914e-06, "loss": 1.426, "step": 2925 }, { "epoch": 2.14, "learning_rate": 3.978481405126642e-06, "loss": 1.3626, "step": 2926 }, { "epoch": 2.14, "learning_rate": 3.972168612288985e-06, "loss": 1.4045, "step": 2927 }, { "epoch": 2.15, "learning_rate": 3.9658595903280836e-06, "loss": 1.3457, "step": 2928 }, { "epoch": 2.15, "learning_rate": 3.959554343190728e-06, "loss": 1.3318, "step": 2929 }, { "epoch": 2.15, "learning_rate": 3.9532528748213304e-06, "loss": 1.3578, "step": 2930 }, { "epoch": 2.15, "learning_rate": 3.946955189161954e-06, "loss": 1.5046, "step": 2931 }, { "epoch": 2.15, "learning_rate": 3.94066129015229e-06, "loss": 1.5705, "step": 2932 }, { "epoch": 2.15, "learning_rate": 3.93437118172966e-06, "loss": 1.3338, "step": 2933 }, { "epoch": 2.15, "learning_rate": 3.928084867829021e-06, "loss": 1.4029, "step": 2934 }, { "epoch": 2.15, "learning_rate": 3.9218023523829406e-06, "loss": 1.549, "step": 2935 }, { "epoch": 2.15, "learning_rate": 3.915523639321626e-06, "loss": 1.387, "step": 2936 }, { "epoch": 2.15, "learning_rate": 3.9092487325729e-06, "loss": 1.5359, "step": 2937 }, { "epoch": 2.15, "learning_rate": 3.902977636062203e-06, "loss": 1.4262, "step": 2938 }, { "epoch": 2.15, "learning_rate": 3.896710353712597e-06, "loss": 1.531, "step": 2939 }, { "epoch": 2.15, "learning_rate": 3.890446889444751e-06, "loss": 1.519, "step": 2940 }, { "epoch": 2.15, "learning_rate": 3.8841872471769435e-06, "loss": 1.2397, "step": 2941 }, { "epoch": 2.16, "learning_rate": 3.877931430825072e-06, "loss": 1.4029, "step": 2942 }, { "epoch": 2.16, "learning_rate": 3.871679444302635e-06, "loss": 1.2701, "step": 2943 }, { "epoch": 2.16, "learning_rate": 3.865431291520736e-06, "loss": 1.5357, "step": 2944 }, { "epoch": 2.16, "learning_rate": 3.859186976388083e-06, "loss": 1.4804, "step": 2945 }, { "epoch": 2.16, "learning_rate": 3.8529465028109725e-06, "loss": 1.4444, "step": 2946 }, { "epoch": 2.16, "learning_rate": 3.84670987469331e-06, "loss": 1.5076, "step": 2947 }, { "epoch": 2.16, "learning_rate": 3.84047709593659e-06, "loss": 1.3503, "step": 2948 }, { "epoch": 2.16, "learning_rate": 3.834248170439901e-06, "loss": 1.3871, "step": 2949 }, { "epoch": 2.16, "learning_rate": 3.828023102099919e-06, "loss": 1.3443, "step": 2950 }, { "epoch": 2.16, "learning_rate": 3.821801894810911e-06, "loss": 1.4837, "step": 2951 }, { "epoch": 2.16, "learning_rate": 3.81558455246472e-06, "loss": 1.5693, "step": 2952 }, { "epoch": 2.16, "learning_rate": 3.8093710789507765e-06, "loss": 1.4491, "step": 2953 }, { "epoch": 2.16, "learning_rate": 3.8031614781560943e-06, "loss": 1.2329, "step": 2954 }, { "epoch": 2.16, "learning_rate": 3.7969557539652636e-06, "loss": 1.317, "step": 2955 }, { "epoch": 2.17, "learning_rate": 3.7907539102604394e-06, "loss": 1.447, "step": 2956 }, { "epoch": 2.17, "learning_rate": 3.7845559509213637e-06, "loss": 1.3505, "step": 2957 }, { "epoch": 2.17, "learning_rate": 3.7783618798253354e-06, "loss": 1.5664, "step": 2958 }, { "epoch": 2.17, "learning_rate": 3.7721717008472294e-06, "loss": 1.3269, "step": 2959 }, { "epoch": 2.17, "learning_rate": 3.7659854178594847e-06, "loss": 1.3059, "step": 2960 }, { "epoch": 2.17, "learning_rate": 3.7598030347321e-06, "loss": 1.4379, "step": 2961 }, { "epoch": 2.17, "learning_rate": 3.7536245553326423e-06, "loss": 1.3868, "step": 2962 }, { "epoch": 2.17, "learning_rate": 3.7474499835262214e-06, "loss": 1.4763, "step": 2963 }, { "epoch": 2.17, "learning_rate": 3.741279323175515e-06, "loss": 1.4802, "step": 2964 }, { "epoch": 2.17, "learning_rate": 3.7351125781407516e-06, "loss": 1.4563, "step": 2965 }, { "epoch": 2.17, "learning_rate": 3.728949752279709e-06, "loss": 1.4392, "step": 2966 }, { "epoch": 2.17, "learning_rate": 3.7227908494477174e-06, "loss": 1.3827, "step": 2967 }, { "epoch": 2.17, "learning_rate": 3.716635873497639e-06, "loss": 1.6514, "step": 2968 }, { "epoch": 2.18, "learning_rate": 3.7104848282798977e-06, "loss": 1.4347, "step": 2969 }, { "epoch": 2.18, "learning_rate": 3.7043377176424467e-06, "loss": 1.469, "step": 2970 }, { "epoch": 2.18, "learning_rate": 3.6981945454307845e-06, "loss": 1.3238, "step": 2971 }, { "epoch": 2.18, "learning_rate": 3.6920553154879367e-06, "loss": 1.4513, "step": 2972 }, { "epoch": 2.18, "learning_rate": 3.685920031654476e-06, "loss": 1.4041, "step": 2973 }, { "epoch": 2.18, "learning_rate": 3.6797886977684917e-06, "loss": 1.4054, "step": 2974 }, { "epoch": 2.18, "learning_rate": 3.6736613176656123e-06, "loss": 1.4371, "step": 2975 }, { "epoch": 2.18, "learning_rate": 3.66753789517899e-06, "loss": 1.4348, "step": 2976 }, { "epoch": 2.18, "learning_rate": 3.6614184341393013e-06, "loss": 1.4215, "step": 2977 }, { "epoch": 2.18, "learning_rate": 3.6553029383747475e-06, "loss": 1.352, "step": 2978 }, { "epoch": 2.18, "learning_rate": 3.6491914117110405e-06, "loss": 1.4091, "step": 2979 }, { "epoch": 2.18, "learning_rate": 3.6430838579714177e-06, "loss": 1.4422, "step": 2980 }, { "epoch": 2.18, "learning_rate": 3.6369802809766287e-06, "loss": 1.3298, "step": 2981 }, { "epoch": 2.18, "learning_rate": 3.630880684544934e-06, "loss": 1.5374, "step": 2982 }, { "epoch": 2.19, "learning_rate": 3.6247850724921063e-06, "loss": 1.4576, "step": 2983 }, { "epoch": 2.19, "learning_rate": 3.618693448631427e-06, "loss": 1.5869, "step": 2984 }, { "epoch": 2.19, "learning_rate": 3.6126058167736742e-06, "loss": 1.5668, "step": 2985 }, { "epoch": 2.19, "learning_rate": 3.60652218072714e-06, "loss": 1.5002, "step": 2986 }, { "epoch": 2.19, "learning_rate": 3.6004425442976055e-06, "loss": 1.5165, "step": 2987 }, { "epoch": 2.19, "learning_rate": 3.594366911288358e-06, "loss": 1.346, "step": 2988 }, { "epoch": 2.19, "learning_rate": 3.5882952855001786e-06, "loss": 1.4773, "step": 2989 }, { "epoch": 2.19, "learning_rate": 3.5822276707313443e-06, "loss": 1.5019, "step": 2990 }, { "epoch": 2.19, "learning_rate": 3.5761640707776115e-06, "loss": 1.5769, "step": 2991 }, { "epoch": 2.19, "learning_rate": 3.570104489432238e-06, "loss": 1.4673, "step": 2992 }, { "epoch": 2.19, "learning_rate": 3.5640489304859614e-06, "loss": 1.3702, "step": 2993 }, { "epoch": 2.19, "learning_rate": 3.5579973977270057e-06, "loss": 1.3157, "step": 2994 }, { "epoch": 2.19, "learning_rate": 3.551949894941076e-06, "loss": 1.4245, "step": 2995 }, { "epoch": 2.19, "learning_rate": 3.5459064259113496e-06, "loss": 1.6121, "step": 2996 }, { "epoch": 2.2, "learning_rate": 3.5398669944184894e-06, "loss": 1.4292, "step": 2997 }, { "epoch": 2.2, "learning_rate": 3.5338316042406283e-06, "loss": 1.5306, "step": 2998 }, { "epoch": 2.2, "learning_rate": 3.527800259153371e-06, "loss": 1.3143, "step": 2999 }, { "epoch": 2.2, "learning_rate": 3.521772962929798e-06, "loss": 1.5451, "step": 3000 }, { "epoch": 2.2, "learning_rate": 3.515749719340442e-06, "loss": 1.234, "step": 3001 }, { "epoch": 2.2, "learning_rate": 3.5097305321533183e-06, "loss": 1.3499, "step": 3002 }, { "epoch": 2.2, "learning_rate": 3.5037154051338883e-06, "loss": 1.5044, "step": 3003 }, { "epoch": 2.2, "learning_rate": 3.4977043420450853e-06, "loss": 1.5075, "step": 3004 }, { "epoch": 2.2, "learning_rate": 3.491697346647295e-06, "loss": 1.5248, "step": 3005 }, { "epoch": 2.2, "learning_rate": 3.4856944226983637e-06, "loss": 1.4058, "step": 3006 }, { "epoch": 2.2, "learning_rate": 3.4796955739535798e-06, "loss": 1.3089, "step": 3007 }, { "epoch": 2.2, "learning_rate": 3.473700804165694e-06, "loss": 1.4396, "step": 3008 }, { "epoch": 2.2, "learning_rate": 3.467710117084897e-06, "loss": 1.3992, "step": 3009 }, { "epoch": 2.21, "learning_rate": 3.4617235164588327e-06, "loss": 1.4114, "step": 3010 }, { "epoch": 2.21, "learning_rate": 3.455741006032586e-06, "loss": 1.2773, "step": 3011 }, { "epoch": 2.21, "learning_rate": 3.4497625895486755e-06, "loss": 1.4213, "step": 3012 }, { "epoch": 2.21, "learning_rate": 3.443788270747068e-06, "loss": 1.3645, "step": 3013 }, { "epoch": 2.21, "learning_rate": 3.437818053365165e-06, "loss": 1.3765, "step": 3014 }, { "epoch": 2.21, "learning_rate": 3.4318519411378006e-06, "loss": 1.4434, "step": 3015 }, { "epoch": 2.21, "learning_rate": 3.4258899377972455e-06, "loss": 1.3956, "step": 3016 }, { "epoch": 2.21, "learning_rate": 3.419932047073188e-06, "loss": 1.5954, "step": 3017 }, { "epoch": 2.21, "learning_rate": 3.4139782726927597e-06, "loss": 1.4192, "step": 3018 }, { "epoch": 2.21, "learning_rate": 3.4080286183805012e-06, "loss": 1.4563, "step": 3019 }, { "epoch": 2.21, "learning_rate": 3.4020830878583867e-06, "loss": 1.4641, "step": 3020 }, { "epoch": 2.21, "learning_rate": 3.3961416848458073e-06, "loss": 1.3394, "step": 3021 }, { "epoch": 2.21, "learning_rate": 3.3902044130595735e-06, "loss": 1.4026, "step": 3022 }, { "epoch": 2.21, "learning_rate": 3.3842712762139117e-06, "loss": 1.5368, "step": 3023 }, { "epoch": 2.22, "learning_rate": 3.3783422780204535e-06, "loss": 1.5661, "step": 3024 }, { "epoch": 2.22, "learning_rate": 3.372417422188252e-06, "loss": 1.3673, "step": 3025 }, { "epoch": 2.22, "learning_rate": 3.3664967124237645e-06, "loss": 1.4634, "step": 3026 }, { "epoch": 2.22, "learning_rate": 3.3605801524308535e-06, "loss": 1.4996, "step": 3027 }, { "epoch": 2.22, "learning_rate": 3.3546677459107925e-06, "loss": 1.459, "step": 3028 }, { "epoch": 2.22, "learning_rate": 3.3487594965622415e-06, "loss": 1.4421, "step": 3029 }, { "epoch": 2.22, "learning_rate": 3.342855408081276e-06, "loss": 1.4564, "step": 3030 }, { "epoch": 2.22, "learning_rate": 3.3369554841613592e-06, "loss": 1.4018, "step": 3031 }, { "epoch": 2.22, "learning_rate": 3.3310597284933545e-06, "loss": 1.3843, "step": 3032 }, { "epoch": 2.22, "learning_rate": 3.325168144765515e-06, "loss": 1.5225, "step": 3033 }, { "epoch": 2.22, "learning_rate": 3.319280736663484e-06, "loss": 1.3446, "step": 3034 }, { "epoch": 2.22, "learning_rate": 3.3133975078702864e-06, "loss": 1.4236, "step": 3035 }, { "epoch": 2.22, "learning_rate": 3.307518462066344e-06, "loss": 1.4579, "step": 3036 }, { "epoch": 2.22, "learning_rate": 3.301643602929455e-06, "loss": 1.585, "step": 3037 }, { "epoch": 2.23, "learning_rate": 3.295772934134802e-06, "loss": 1.3726, "step": 3038 }, { "epoch": 2.23, "learning_rate": 3.2899064593549477e-06, "loss": 1.4694, "step": 3039 }, { "epoch": 2.23, "learning_rate": 3.284044182259819e-06, "loss": 1.405, "step": 3040 }, { "epoch": 2.23, "learning_rate": 3.2781861065167308e-06, "loss": 1.5583, "step": 3041 }, { "epoch": 2.23, "learning_rate": 3.272332235790363e-06, "loss": 1.4544, "step": 3042 }, { "epoch": 2.23, "learning_rate": 3.2664825737427685e-06, "loss": 1.4449, "step": 3043 }, { "epoch": 2.23, "learning_rate": 3.260637124033367e-06, "loss": 1.4123, "step": 3044 }, { "epoch": 2.23, "learning_rate": 3.254795890318935e-06, "loss": 1.305, "step": 3045 }, { "epoch": 2.23, "learning_rate": 3.2489588762536216e-06, "loss": 1.5217, "step": 3046 }, { "epoch": 2.23, "learning_rate": 3.2431260854889324e-06, "loss": 1.5001, "step": 3047 }, { "epoch": 2.23, "learning_rate": 3.2372975216737335e-06, "loss": 1.4252, "step": 3048 }, { "epoch": 2.23, "learning_rate": 3.2314731884542392e-06, "loss": 1.4473, "step": 3049 }, { "epoch": 2.23, "learning_rate": 3.2256530894740245e-06, "loss": 1.5132, "step": 3050 }, { "epoch": 2.24, "learning_rate": 3.2198372283740176e-06, "loss": 1.4356, "step": 3051 }, { "epoch": 2.24, "learning_rate": 3.214025608792484e-06, "loss": 1.2954, "step": 3052 }, { "epoch": 2.24, "learning_rate": 3.208218234365047e-06, "loss": 1.4812, "step": 3053 }, { "epoch": 2.24, "learning_rate": 3.2024151087246704e-06, "loss": 1.55, "step": 3054 }, { "epoch": 2.24, "learning_rate": 3.1966162355016597e-06, "loss": 1.4537, "step": 3055 }, { "epoch": 2.24, "learning_rate": 3.190821618323664e-06, "loss": 1.3828, "step": 3056 }, { "epoch": 2.24, "learning_rate": 3.1850312608156596e-06, "loss": 1.5226, "step": 3057 }, { "epoch": 2.24, "learning_rate": 3.179245166599968e-06, "loss": 1.5649, "step": 3058 }, { "epoch": 2.24, "learning_rate": 3.173463339296242e-06, "loss": 1.4478, "step": 3059 }, { "epoch": 2.24, "learning_rate": 3.1676857825214623e-06, "loss": 1.3898, "step": 3060 }, { "epoch": 2.24, "learning_rate": 3.161912499889943e-06, "loss": 1.4077, "step": 3061 }, { "epoch": 2.24, "learning_rate": 3.156143495013315e-06, "loss": 1.4176, "step": 3062 }, { "epoch": 2.24, "learning_rate": 3.150378771500542e-06, "loss": 1.3231, "step": 3063 }, { "epoch": 2.24, "learning_rate": 3.1446183329579082e-06, "loss": 1.2293, "step": 3064 }, { "epoch": 2.25, "learning_rate": 3.138862182989011e-06, "loss": 1.4242, "step": 3065 }, { "epoch": 2.25, "learning_rate": 3.1331103251947703e-06, "loss": 1.5719, "step": 3066 }, { "epoch": 2.25, "learning_rate": 3.127362763173425e-06, "loss": 1.2977, "step": 3067 }, { "epoch": 2.25, "learning_rate": 3.121619500520514e-06, "loss": 1.5322, "step": 3068 }, { "epoch": 2.25, "learning_rate": 3.1158805408288995e-06, "loss": 1.3478, "step": 3069 }, { "epoch": 2.25, "learning_rate": 3.1101458876887434e-06, "loss": 1.4312, "step": 3070 }, { "epoch": 2.25, "learning_rate": 3.104415544687519e-06, "loss": 1.4848, "step": 3071 }, { "epoch": 2.25, "learning_rate": 3.098689515410004e-06, "loss": 1.3563, "step": 3072 }, { "epoch": 2.25, "learning_rate": 3.092967803438267e-06, "loss": 1.3842, "step": 3073 }, { "epoch": 2.25, "learning_rate": 3.0872504123516866e-06, "loss": 1.4542, "step": 3074 }, { "epoch": 2.25, "learning_rate": 3.081537345726936e-06, "loss": 1.3572, "step": 3075 }, { "epoch": 2.25, "learning_rate": 3.0758286071379816e-06, "loss": 1.4058, "step": 3076 }, { "epoch": 2.25, "learning_rate": 3.070124200156086e-06, "loss": 1.4306, "step": 3077 }, { "epoch": 2.25, "learning_rate": 3.0644241283497934e-06, "loss": 1.4093, "step": 3078 }, { "epoch": 2.26, "learning_rate": 3.0587283952849455e-06, "loss": 1.4861, "step": 3079 }, { "epoch": 2.26, "learning_rate": 3.0530370045246626e-06, "loss": 1.3378, "step": 3080 }, { "epoch": 2.26, "learning_rate": 3.047349959629352e-06, "loss": 1.4176, "step": 3081 }, { "epoch": 2.26, "learning_rate": 3.0416672641567035e-06, "loss": 1.5065, "step": 3082 }, { "epoch": 2.26, "learning_rate": 3.035988921661688e-06, "loss": 1.4675, "step": 3083 }, { "epoch": 2.26, "learning_rate": 3.0303149356965424e-06, "loss": 1.5791, "step": 3084 }, { "epoch": 2.26, "learning_rate": 3.0246453098107886e-06, "loss": 1.4597, "step": 3085 }, { "epoch": 2.26, "learning_rate": 3.0189800475512198e-06, "loss": 1.5407, "step": 3086 }, { "epoch": 2.26, "learning_rate": 3.0133191524618956e-06, "loss": 1.3408, "step": 3087 }, { "epoch": 2.26, "learning_rate": 3.007662628084147e-06, "loss": 1.4091, "step": 3088 }, { "epoch": 2.26, "learning_rate": 3.002010477956573e-06, "loss": 1.6106, "step": 3089 }, { "epoch": 2.26, "learning_rate": 2.9963627056150234e-06, "loss": 1.3976, "step": 3090 }, { "epoch": 2.26, "learning_rate": 2.990719314592625e-06, "loss": 1.4169, "step": 3091 }, { "epoch": 2.27, "learning_rate": 2.9850803084197554e-06, "loss": 1.4988, "step": 3092 }, { "epoch": 2.27, "learning_rate": 2.979445690624051e-06, "loss": 1.4075, "step": 3093 }, { "epoch": 2.27, "learning_rate": 2.9738154647304075e-06, "loss": 1.4311, "step": 3094 }, { "epoch": 2.27, "learning_rate": 2.9681896342609638e-06, "loss": 1.308, "step": 3095 }, { "epoch": 2.27, "learning_rate": 2.962568202735111e-06, "loss": 1.3797, "step": 3096 }, { "epoch": 2.27, "learning_rate": 2.9569511736694945e-06, "loss": 1.4157, "step": 3097 }, { "epoch": 2.27, "learning_rate": 2.9513385505780025e-06, "loss": 1.385, "step": 3098 }, { "epoch": 2.27, "learning_rate": 2.945730336971767e-06, "loss": 1.5028, "step": 3099 }, { "epoch": 2.27, "learning_rate": 2.9401265363591637e-06, "loss": 1.54, "step": 3100 }, { "epoch": 2.27, "learning_rate": 2.9345271522457986e-06, "loss": 1.4789, "step": 3101 }, { "epoch": 2.27, "learning_rate": 2.9289321881345257e-06, "loss": 1.4726, "step": 3102 }, { "epoch": 2.27, "learning_rate": 2.9233416475254285e-06, "loss": 1.439, "step": 3103 }, { "epoch": 2.27, "learning_rate": 2.9177555339158258e-06, "loss": 1.4019, "step": 3104 }, { "epoch": 2.27, "learning_rate": 2.9121738508002675e-06, "loss": 1.4719, "step": 3105 }, { "epoch": 2.28, "learning_rate": 2.9065966016705237e-06, "loss": 1.4482, "step": 3106 }, { "epoch": 2.28, "learning_rate": 2.9010237900156015e-06, "loss": 1.356, "step": 3107 }, { "epoch": 2.28, "learning_rate": 2.8954554193217254e-06, "loss": 1.4886, "step": 3108 }, { "epoch": 2.28, "learning_rate": 2.8898914930723443e-06, "loss": 1.5043, "step": 3109 }, { "epoch": 2.28, "learning_rate": 2.884332014748131e-06, "loss": 1.4499, "step": 3110 }, { "epoch": 2.28, "learning_rate": 2.8787769878269667e-06, "loss": 1.3761, "step": 3111 }, { "epoch": 2.28, "learning_rate": 2.8732264157839486e-06, "loss": 1.3875, "step": 3112 }, { "epoch": 2.28, "learning_rate": 2.8676803020913944e-06, "loss": 1.3934, "step": 3113 }, { "epoch": 2.28, "learning_rate": 2.8621386502188296e-06, "loss": 1.5985, "step": 3114 }, { "epoch": 2.28, "learning_rate": 2.8566014636329875e-06, "loss": 1.3038, "step": 3115 }, { "epoch": 2.28, "learning_rate": 2.8510687457978116e-06, "loss": 1.5702, "step": 3116 }, { "epoch": 2.28, "learning_rate": 2.8455405001744397e-06, "loss": 1.4758, "step": 3117 }, { "epoch": 2.28, "learning_rate": 2.8400167302212224e-06, "loss": 1.4479, "step": 3118 }, { "epoch": 2.28, "learning_rate": 2.8344974393937063e-06, "loss": 1.5561, "step": 3119 }, { "epoch": 2.29, "learning_rate": 2.828982631144639e-06, "loss": 1.5119, "step": 3120 }, { "epoch": 2.29, "learning_rate": 2.823472308923958e-06, "loss": 1.4117, "step": 3121 }, { "epoch": 2.29, "learning_rate": 2.8179664761788026e-06, "loss": 1.5925, "step": 3122 }, { "epoch": 2.29, "learning_rate": 2.812465136353494e-06, "loss": 1.4391, "step": 3123 }, { "epoch": 2.29, "learning_rate": 2.8069682928895482e-06, "loss": 1.4762, "step": 3124 }, { "epoch": 2.29, "learning_rate": 2.801475949225669e-06, "loss": 1.4816, "step": 3125 }, { "epoch": 2.29, "learning_rate": 2.795988108797748e-06, "loss": 1.3739, "step": 3126 }, { "epoch": 2.29, "learning_rate": 2.790504775038848e-06, "loss": 1.436, "step": 3127 }, { "epoch": 2.29, "learning_rate": 2.7850259513792266e-06, "loss": 1.4312, "step": 3128 }, { "epoch": 2.29, "learning_rate": 2.7795516412463077e-06, "loss": 1.3994, "step": 3129 }, { "epoch": 2.29, "learning_rate": 2.7740818480647002e-06, "loss": 1.349, "step": 3130 }, { "epoch": 2.29, "learning_rate": 2.7686165752561857e-06, "loss": 1.4645, "step": 3131 }, { "epoch": 2.29, "learning_rate": 2.7631558262397164e-06, "loss": 1.4199, "step": 3132 }, { "epoch": 2.3, "learning_rate": 2.7576996044314173e-06, "loss": 1.3236, "step": 3133 }, { "epoch": 2.3, "learning_rate": 2.7522479132445733e-06, "loss": 1.4971, "step": 3134 }, { "epoch": 2.3, "learning_rate": 2.7468007560896435e-06, "loss": 1.4933, "step": 3135 }, { "epoch": 2.3, "learning_rate": 2.741358136374249e-06, "loss": 1.6224, "step": 3136 }, { "epoch": 2.3, "learning_rate": 2.7359200575031695e-06, "loss": 1.5013, "step": 3137 }, { "epoch": 2.3, "learning_rate": 2.7304865228783507e-06, "loss": 1.3957, "step": 3138 }, { "epoch": 2.3, "learning_rate": 2.7250575358988817e-06, "loss": 1.4995, "step": 3139 }, { "epoch": 2.3, "learning_rate": 2.719633099961022e-06, "loss": 1.5579, "step": 3140 }, { "epoch": 2.3, "learning_rate": 2.714213218458178e-06, "loss": 1.3048, "step": 3141 }, { "epoch": 2.3, "learning_rate": 2.7087978947809013e-06, "loss": 1.4581, "step": 3142 }, { "epoch": 2.3, "learning_rate": 2.7033871323169014e-06, "loss": 1.4905, "step": 3143 }, { "epoch": 2.3, "learning_rate": 2.6979809344510323e-06, "loss": 1.4899, "step": 3144 }, { "epoch": 2.3, "learning_rate": 2.692579304565286e-06, "loss": 1.4803, "step": 3145 }, { "epoch": 2.3, "learning_rate": 2.687182246038803e-06, "loss": 1.3931, "step": 3146 }, { "epoch": 2.31, "learning_rate": 2.681789762247864e-06, "loss": 1.4169, "step": 3147 }, { "epoch": 2.31, "learning_rate": 2.6764018565658856e-06, "loss": 1.2108, "step": 3148 }, { "epoch": 2.31, "learning_rate": 2.6710185323634252e-06, "loss": 1.4917, "step": 3149 }, { "epoch": 2.31, "learning_rate": 2.6656397930081635e-06, "loss": 1.4135, "step": 3150 }, { "epoch": 2.31, "learning_rate": 2.660265641864923e-06, "loss": 1.4562, "step": 3151 }, { "epoch": 2.31, "learning_rate": 2.6548960822956528e-06, "loss": 1.4637, "step": 3152 }, { "epoch": 2.31, "learning_rate": 2.6495311176594286e-06, "loss": 1.3451, "step": 3153 }, { "epoch": 2.31, "learning_rate": 2.6441707513124572e-06, "loss": 1.3571, "step": 3154 }, { "epoch": 2.31, "learning_rate": 2.6388149866080557e-06, "loss": 1.4734, "step": 3155 }, { "epoch": 2.31, "learning_rate": 2.6334638268966773e-06, "loss": 1.4821, "step": 3156 }, { "epoch": 2.31, "learning_rate": 2.628117275525889e-06, "loss": 1.3195, "step": 3157 }, { "epoch": 2.31, "learning_rate": 2.6227753358403673e-06, "loss": 1.4383, "step": 3158 }, { "epoch": 2.31, "learning_rate": 2.6174380111819144e-06, "loss": 1.2645, "step": 3159 }, { "epoch": 2.32, "learning_rate": 2.612105304889442e-06, "loss": 1.5201, "step": 3160 }, { "epoch": 2.32, "learning_rate": 2.606777220298974e-06, "loss": 1.4916, "step": 3161 }, { "epoch": 2.32, "learning_rate": 2.6014537607436365e-06, "loss": 1.5157, "step": 3162 }, { "epoch": 2.32, "learning_rate": 2.5961349295536686e-06, "loss": 1.3421, "step": 3163 }, { "epoch": 2.32, "learning_rate": 2.5908207300564127e-06, "loss": 1.5337, "step": 3164 }, { "epoch": 2.32, "learning_rate": 2.5855111655763134e-06, "loss": 1.4229, "step": 3165 }, { "epoch": 2.32, "learning_rate": 2.5802062394349194e-06, "loss": 1.4513, "step": 3166 }, { "epoch": 2.32, "learning_rate": 2.5749059549508672e-06, "loss": 1.2901, "step": 3167 }, { "epoch": 2.32, "learning_rate": 2.5696103154399e-06, "loss": 1.5667, "step": 3168 }, { "epoch": 2.32, "learning_rate": 2.5643193242148524e-06, "loss": 1.4018, "step": 3169 }, { "epoch": 2.32, "learning_rate": 2.559032984585649e-06, "loss": 1.3725, "step": 3170 }, { "epoch": 2.32, "learning_rate": 2.553751299859308e-06, "loss": 1.4853, "step": 3171 }, { "epoch": 2.32, "learning_rate": 2.5484742733399325e-06, "loss": 1.417, "step": 3172 }, { "epoch": 2.32, "learning_rate": 2.5432019083287096e-06, "loss": 1.2773, "step": 3173 }, { "epoch": 2.33, "learning_rate": 2.5379342081239157e-06, "loss": 1.352, "step": 3174 }, { "epoch": 2.33, "learning_rate": 2.5326711760209043e-06, "loss": 1.5311, "step": 3175 }, { "epoch": 2.33, "learning_rate": 2.527412815312115e-06, "loss": 1.4, "step": 3176 }, { "epoch": 2.33, "learning_rate": 2.5221591292870595e-06, "loss": 1.4812, "step": 3177 }, { "epoch": 2.33, "learning_rate": 2.516910121232323e-06, "loss": 1.3729, "step": 3178 }, { "epoch": 2.33, "learning_rate": 2.5116657944315703e-06, "loss": 1.3462, "step": 3179 }, { "epoch": 2.33, "learning_rate": 2.5064261521655355e-06, "loss": 1.3666, "step": 3180 }, { "epoch": 2.33, "learning_rate": 2.501191197712022e-06, "loss": 1.5198, "step": 3181 }, { "epoch": 2.33, "learning_rate": 2.4959609343459014e-06, "loss": 1.2912, "step": 3182 }, { "epoch": 2.33, "learning_rate": 2.4907353653391062e-06, "loss": 1.361, "step": 3183 }, { "epoch": 2.33, "learning_rate": 2.485514493960638e-06, "loss": 1.4447, "step": 3184 }, { "epoch": 2.33, "learning_rate": 2.4802983234765566e-06, "loss": 1.4528, "step": 3185 }, { "epoch": 2.33, "learning_rate": 2.475086857149982e-06, "loss": 1.3916, "step": 3186 }, { "epoch": 2.33, "learning_rate": 2.4698800982410955e-06, "loss": 1.3789, "step": 3187 }, { "epoch": 2.34, "learning_rate": 2.464678050007122e-06, "loss": 1.4217, "step": 3188 }, { "epoch": 2.34, "learning_rate": 2.4594807157023525e-06, "loss": 1.618, "step": 3189 }, { "epoch": 2.34, "learning_rate": 2.4542880985781192e-06, "loss": 1.4221, "step": 3190 }, { "epoch": 2.34, "learning_rate": 2.449100201882808e-06, "loss": 1.366, "step": 3191 }, { "epoch": 2.34, "learning_rate": 2.443917028861853e-06, "loss": 1.4086, "step": 3192 }, { "epoch": 2.34, "learning_rate": 2.4387385827577303e-06, "loss": 1.3952, "step": 3193 }, { "epoch": 2.34, "learning_rate": 2.4335648668099644e-06, "loss": 1.5808, "step": 3194 }, { "epoch": 2.34, "learning_rate": 2.428395884255109e-06, "loss": 1.5121, "step": 3195 }, { "epoch": 2.34, "learning_rate": 2.4232316383267685e-06, "loss": 1.2448, "step": 3196 }, { "epoch": 2.34, "learning_rate": 2.41807213225558e-06, "loss": 1.4355, "step": 3197 }, { "epoch": 2.34, "learning_rate": 2.4129173692692155e-06, "loss": 1.4149, "step": 3198 }, { "epoch": 2.34, "learning_rate": 2.4077673525923807e-06, "loss": 1.5192, "step": 3199 }, { "epoch": 2.34, "learning_rate": 2.402622085446806e-06, "loss": 1.2765, "step": 3200 }, { "epoch": 2.35, "learning_rate": 2.39748157105126e-06, "loss": 1.3481, "step": 3201 }, { "epoch": 2.35, "learning_rate": 2.3923458126215326e-06, "loss": 1.3385, "step": 3202 }, { "epoch": 2.35, "learning_rate": 2.387214813370443e-06, "loss": 1.4389, "step": 3203 }, { "epoch": 2.35, "learning_rate": 2.3820885765078226e-06, "loss": 1.4336, "step": 3204 }, { "epoch": 2.35, "learning_rate": 2.3769671052405386e-06, "loss": 1.4234, "step": 3205 }, { "epoch": 2.35, "learning_rate": 2.371850402772462e-06, "loss": 1.2729, "step": 3206 }, { "epoch": 2.35, "learning_rate": 2.3667384723044918e-06, "loss": 1.3439, "step": 3207 }, { "epoch": 2.35, "learning_rate": 2.361631317034535e-06, "loss": 1.5002, "step": 3208 }, { "epoch": 2.35, "learning_rate": 2.3565289401575165e-06, "loss": 1.3664, "step": 3209 }, { "epoch": 2.35, "learning_rate": 2.3514313448653715e-06, "loss": 1.4492, "step": 3210 }, { "epoch": 2.35, "learning_rate": 2.346338534347036e-06, "loss": 1.4421, "step": 3211 }, { "epoch": 2.35, "learning_rate": 2.341250511788461e-06, "loss": 1.5551, "step": 3212 }, { "epoch": 2.35, "learning_rate": 2.3361672803725997e-06, "loss": 1.5069, "step": 3213 }, { "epoch": 2.35, "learning_rate": 2.33108884327941e-06, "loss": 1.3711, "step": 3214 }, { "epoch": 2.36, "learning_rate": 2.32601520368585e-06, "loss": 1.3009, "step": 3215 }, { "epoch": 2.36, "learning_rate": 2.320946364765869e-06, "loss": 1.3857, "step": 3216 }, { "epoch": 2.36, "learning_rate": 2.315882329690424e-06, "loss": 1.4002, "step": 3217 }, { "epoch": 2.36, "learning_rate": 2.310823101627462e-06, "loss": 1.4018, "step": 3218 }, { "epoch": 2.36, "learning_rate": 2.3057686837419246e-06, "loss": 1.5437, "step": 3219 }, { "epoch": 2.36, "learning_rate": 2.300719079195739e-06, "loss": 1.4742, "step": 3220 }, { "epoch": 2.36, "learning_rate": 2.295674291147829e-06, "loss": 1.5114, "step": 3221 }, { "epoch": 2.36, "learning_rate": 2.2906343227540973e-06, "loss": 1.4248, "step": 3222 }, { "epoch": 2.36, "learning_rate": 2.285599177167438e-06, "loss": 1.603, "step": 3223 }, { "epoch": 2.36, "learning_rate": 2.280568857537725e-06, "loss": 1.4743, "step": 3224 }, { "epoch": 2.36, "learning_rate": 2.2755433670118156e-06, "loss": 1.5087, "step": 3225 }, { "epoch": 2.36, "learning_rate": 2.2705227087335414e-06, "loss": 1.4459, "step": 3226 }, { "epoch": 2.36, "learning_rate": 2.2655068858437202e-06, "loss": 1.3397, "step": 3227 }, { "epoch": 2.36, "learning_rate": 2.260495901480132e-06, "loss": 1.4263, "step": 3228 }, { "epoch": 2.37, "learning_rate": 2.255489758777539e-06, "loss": 1.5479, "step": 3229 }, { "epoch": 2.37, "learning_rate": 2.2504884608676734e-06, "loss": 1.324, "step": 3230 }, { "epoch": 2.37, "learning_rate": 2.2454920108792354e-06, "loss": 1.4776, "step": 3231 }, { "epoch": 2.37, "learning_rate": 2.2405004119378936e-06, "loss": 1.4169, "step": 3232 }, { "epoch": 2.37, "learning_rate": 2.2355136671662782e-06, "loss": 1.405, "step": 3233 }, { "epoch": 2.37, "learning_rate": 2.2305317796839887e-06, "loss": 1.4182, "step": 3234 }, { "epoch": 2.37, "learning_rate": 2.2255547526075773e-06, "loss": 1.5783, "step": 3235 }, { "epoch": 2.37, "learning_rate": 2.220582589050565e-06, "loss": 1.386, "step": 3236 }, { "epoch": 2.37, "learning_rate": 2.2156152921234254e-06, "loss": 1.4222, "step": 3237 }, { "epoch": 2.37, "learning_rate": 2.2106528649335925e-06, "loss": 1.5479, "step": 3238 }, { "epoch": 2.37, "learning_rate": 2.205695310585443e-06, "loss": 1.4234, "step": 3239 }, { "epoch": 2.37, "learning_rate": 2.200742632180315e-06, "loss": 1.5145, "step": 3240 }, { "epoch": 2.37, "learning_rate": 2.1957948328164946e-06, "loss": 1.5176, "step": 3241 }, { "epoch": 2.38, "learning_rate": 2.190851915589215e-06, "loss": 1.4227, "step": 3242 }, { "epoch": 2.38, "learning_rate": 2.1859138835906557e-06, "loss": 1.4709, "step": 3243 }, { "epoch": 2.38, "learning_rate": 2.180980739909935e-06, "loss": 1.3884, "step": 3244 }, { "epoch": 2.38, "learning_rate": 2.17605248763312e-06, "loss": 1.2862, "step": 3245 }, { "epoch": 2.38, "learning_rate": 2.1711291298432157e-06, "loss": 1.4307, "step": 3246 }, { "epoch": 2.38, "learning_rate": 2.1662106696201647e-06, "loss": 1.3717, "step": 3247 }, { "epoch": 2.38, "learning_rate": 2.1612971100408484e-06, "loss": 1.5455, "step": 3248 }, { "epoch": 2.38, "learning_rate": 2.1563884541790747e-06, "loss": 1.4364, "step": 3249 }, { "epoch": 2.38, "learning_rate": 2.151484705105594e-06, "loss": 1.4117, "step": 3250 }, { "epoch": 2.38, "learning_rate": 2.146585865888078e-06, "loss": 1.4604, "step": 3251 }, { "epoch": 2.38, "learning_rate": 2.1416919395911327e-06, "loss": 1.4985, "step": 3252 }, { "epoch": 2.38, "learning_rate": 2.1368029292762902e-06, "loss": 1.4214, "step": 3253 }, { "epoch": 2.38, "learning_rate": 2.1319188380020085e-06, "loss": 1.54, "step": 3254 }, { "epoch": 2.38, "learning_rate": 2.1270396688236595e-06, "loss": 1.4014, "step": 3255 }, { "epoch": 2.39, "learning_rate": 2.1221654247935476e-06, "loss": 1.3727, "step": 3256 }, { "epoch": 2.39, "learning_rate": 2.117296108960888e-06, "loss": 1.4854, "step": 3257 }, { "epoch": 2.39, "learning_rate": 2.1124317243718184e-06, "loss": 1.3098, "step": 3258 }, { "epoch": 2.39, "learning_rate": 2.1075722740693893e-06, "loss": 1.3668, "step": 3259 }, { "epoch": 2.39, "learning_rate": 2.1027177610935655e-06, "loss": 1.299, "step": 3260 }, { "epoch": 2.39, "learning_rate": 2.097868188481217e-06, "loss": 1.434, "step": 3261 }, { "epoch": 2.39, "learning_rate": 2.093023559266132e-06, "loss": 1.3912, "step": 3262 }, { "epoch": 2.39, "learning_rate": 2.0881838764790007e-06, "loss": 1.4507, "step": 3263 }, { "epoch": 2.39, "learning_rate": 2.083349143147422e-06, "loss": 1.5234, "step": 3264 }, { "epoch": 2.39, "learning_rate": 2.0785193622958998e-06, "loss": 1.4134, "step": 3265 }, { "epoch": 2.39, "learning_rate": 2.073694536945833e-06, "loss": 1.5767, "step": 3266 }, { "epoch": 2.39, "learning_rate": 2.068874670115524e-06, "loss": 1.4095, "step": 3267 }, { "epoch": 2.39, "learning_rate": 2.0640597648201744e-06, "loss": 1.4459, "step": 3268 }, { "epoch": 2.39, "learning_rate": 2.0592498240718826e-06, "loss": 1.4613, "step": 3269 }, { "epoch": 2.4, "learning_rate": 2.054444850879641e-06, "loss": 1.4163, "step": 3270 }, { "epoch": 2.4, "learning_rate": 2.0496448482493346e-06, "loss": 1.3064, "step": 3271 }, { "epoch": 2.4, "learning_rate": 2.044849819183734e-06, "loss": 1.2841, "step": 3272 }, { "epoch": 2.4, "learning_rate": 2.040059766682504e-06, "loss": 1.3169, "step": 3273 }, { "epoch": 2.4, "learning_rate": 2.035274693742195e-06, "loss": 1.3533, "step": 3274 }, { "epoch": 2.4, "learning_rate": 2.0304946033562425e-06, "loss": 1.3588, "step": 3275 }, { "epoch": 2.4, "learning_rate": 2.0257194985149653e-06, "loss": 1.3454, "step": 3276 }, { "epoch": 2.4, "learning_rate": 2.020949382205558e-06, "loss": 1.3282, "step": 3277 }, { "epoch": 2.4, "learning_rate": 2.0161842574121017e-06, "loss": 1.3826, "step": 3278 }, { "epoch": 2.4, "learning_rate": 2.011424127115552e-06, "loss": 1.4504, "step": 3279 }, { "epoch": 2.4, "learning_rate": 2.00666899429374e-06, "loss": 1.5321, "step": 3280 }, { "epoch": 2.4, "learning_rate": 2.0019188619213723e-06, "loss": 1.39, "step": 3281 }, { "epoch": 2.4, "learning_rate": 1.9971737329700256e-06, "loss": 1.2957, "step": 3282 }, { "epoch": 2.41, "learning_rate": 1.992433610408142e-06, "loss": 1.4139, "step": 3283 }, { "epoch": 2.41, "learning_rate": 1.9876984972010395e-06, "loss": 1.3442, "step": 3284 }, { "epoch": 2.41, "learning_rate": 1.9829683963108992e-06, "loss": 1.5233, "step": 3285 }, { "epoch": 2.41, "learning_rate": 1.978243310696767e-06, "loss": 1.3286, "step": 3286 }, { "epoch": 2.41, "learning_rate": 1.9735232433145524e-06, "loss": 1.3366, "step": 3287 }, { "epoch": 2.41, "learning_rate": 1.9688081971170202e-06, "loss": 1.4679, "step": 3288 }, { "epoch": 2.41, "learning_rate": 1.9640981750538004e-06, "loss": 1.284, "step": 3289 }, { "epoch": 2.41, "learning_rate": 1.9593931800713774e-06, "loss": 1.4803, "step": 3290 }, { "epoch": 2.41, "learning_rate": 1.9546932151130913e-06, "loss": 1.4428, "step": 3291 }, { "epoch": 2.41, "learning_rate": 1.9499982831191345e-06, "loss": 1.4454, "step": 3292 }, { "epoch": 2.41, "learning_rate": 1.9453083870265556e-06, "loss": 1.5355, "step": 3293 }, { "epoch": 2.41, "learning_rate": 1.9406235297692434e-06, "loss": 1.4887, "step": 3294 }, { "epoch": 2.41, "learning_rate": 1.9359437142779415e-06, "loss": 1.4524, "step": 3295 }, { "epoch": 2.41, "learning_rate": 1.931268943480241e-06, "loss": 1.3953, "step": 3296 }, { "epoch": 2.42, "learning_rate": 1.926599220300569e-06, "loss": 1.5268, "step": 3297 }, { "epoch": 2.42, "learning_rate": 1.9219345476602036e-06, "loss": 1.2913, "step": 3298 }, { "epoch": 2.42, "learning_rate": 1.917274928477262e-06, "loss": 1.5512, "step": 3299 }, { "epoch": 2.42, "learning_rate": 1.9126203656666918e-06, "loss": 1.4932, "step": 3300 }, { "epoch": 2.42, "learning_rate": 1.9079708621402883e-06, "loss": 1.4915, "step": 3301 }, { "epoch": 2.42, "learning_rate": 1.9033264208066748e-06, "loss": 1.4181, "step": 3302 }, { "epoch": 2.42, "learning_rate": 1.8986870445713112e-06, "loss": 1.3932, "step": 3303 }, { "epoch": 2.42, "learning_rate": 1.8940527363364903e-06, "loss": 1.5136, "step": 3304 }, { "epoch": 2.42, "learning_rate": 1.8894234990013261e-06, "loss": 1.4605, "step": 3305 }, { "epoch": 2.42, "learning_rate": 1.8847993354617689e-06, "loss": 1.3926, "step": 3306 }, { "epoch": 2.42, "learning_rate": 1.8801802486105936e-06, "loss": 1.7461, "step": 3307 }, { "epoch": 2.42, "learning_rate": 1.8755662413373964e-06, "loss": 1.4532, "step": 3308 }, { "epoch": 2.42, "learning_rate": 1.8709573165286e-06, "loss": 1.4372, "step": 3309 }, { "epoch": 2.42, "learning_rate": 1.8663534770674396e-06, "loss": 1.5306, "step": 3310 }, { "epoch": 2.43, "learning_rate": 1.8617547258339775e-06, "loss": 1.5024, "step": 3311 }, { "epoch": 2.43, "learning_rate": 1.8571610657050921e-06, "loss": 1.4341, "step": 3312 }, { "epoch": 2.43, "learning_rate": 1.8525724995544692e-06, "loss": 1.3616, "step": 3313 }, { "epoch": 2.43, "learning_rate": 1.8479890302526171e-06, "loss": 1.4521, "step": 3314 }, { "epoch": 2.43, "learning_rate": 1.8434106606668522e-06, "loss": 1.4026, "step": 3315 }, { "epoch": 2.43, "learning_rate": 1.838837393661298e-06, "loss": 1.4205, "step": 3316 }, { "epoch": 2.43, "learning_rate": 1.834269232096888e-06, "loss": 1.4717, "step": 3317 }, { "epoch": 2.43, "learning_rate": 1.8297061788313652e-06, "loss": 1.4061, "step": 3318 }, { "epoch": 2.43, "learning_rate": 1.8251482367192707e-06, "loss": 1.4148, "step": 3319 }, { "epoch": 2.43, "learning_rate": 1.820595408611956e-06, "loss": 1.3364, "step": 3320 }, { "epoch": 2.43, "learning_rate": 1.8160476973575624e-06, "loss": 1.5109, "step": 3321 }, { "epoch": 2.43, "learning_rate": 1.8115051058010403e-06, "loss": 1.4017, "step": 3322 }, { "epoch": 2.43, "learning_rate": 1.8069676367841326e-06, "loss": 1.383, "step": 3323 }, { "epoch": 2.44, "learning_rate": 1.8024352931453791e-06, "loss": 1.396, "step": 3324 }, { "epoch": 2.44, "learning_rate": 1.7979080777201152e-06, "loss": 1.3543, "step": 3325 }, { "epoch": 2.44, "learning_rate": 1.7933859933404606e-06, "loss": 1.4088, "step": 3326 }, { "epoch": 2.44, "learning_rate": 1.788869042835335e-06, "loss": 1.453, "step": 3327 }, { "epoch": 2.44, "learning_rate": 1.784357229030438e-06, "loss": 1.4273, "step": 3328 }, { "epoch": 2.44, "learning_rate": 1.7798505547482614e-06, "loss": 1.4444, "step": 3329 }, { "epoch": 2.44, "learning_rate": 1.7753490228080795e-06, "loss": 1.2785, "step": 3330 }, { "epoch": 2.44, "learning_rate": 1.7708526360259514e-06, "loss": 1.4456, "step": 3331 }, { "epoch": 2.44, "learning_rate": 1.7663613972147175e-06, "loss": 1.4923, "step": 3332 }, { "epoch": 2.44, "learning_rate": 1.7618753091839924e-06, "loss": 1.3709, "step": 3333 }, { "epoch": 2.44, "learning_rate": 1.7573943747401755e-06, "loss": 1.5305, "step": 3334 }, { "epoch": 2.44, "learning_rate": 1.7529185966864381e-06, "loss": 1.3142, "step": 3335 }, { "epoch": 2.44, "learning_rate": 1.7484479778227281e-06, "loss": 1.3417, "step": 3336 }, { "epoch": 2.44, "learning_rate": 1.7439825209457672e-06, "loss": 1.357, "step": 3337 }, { "epoch": 2.45, "learning_rate": 1.73952222884904e-06, "loss": 1.4248, "step": 3338 }, { "epoch": 2.45, "learning_rate": 1.7350671043228072e-06, "loss": 1.3465, "step": 3339 }, { "epoch": 2.45, "learning_rate": 1.7306171501540981e-06, "loss": 1.4495, "step": 3340 }, { "epoch": 2.45, "learning_rate": 1.726172369126703e-06, "loss": 1.437, "step": 3341 }, { "epoch": 2.45, "learning_rate": 1.72173276402118e-06, "loss": 1.2505, "step": 3342 }, { "epoch": 2.45, "learning_rate": 1.7172983376148443e-06, "loss": 1.409, "step": 3343 }, { "epoch": 2.45, "learning_rate": 1.7128690926817726e-06, "loss": 1.5532, "step": 3344 }, { "epoch": 2.45, "learning_rate": 1.7084450319928037e-06, "loss": 1.3085, "step": 3345 }, { "epoch": 2.45, "learning_rate": 1.7040261583155316e-06, "loss": 1.4369, "step": 3346 }, { "epoch": 2.45, "learning_rate": 1.6996124744143039e-06, "loss": 1.4863, "step": 3347 }, { "epoch": 2.45, "learning_rate": 1.6952039830502253e-06, "loss": 1.4889, "step": 3348 }, { "epoch": 2.45, "learning_rate": 1.6908006869811455e-06, "loss": 1.4179, "step": 3349 }, { "epoch": 2.45, "learning_rate": 1.686402588961671e-06, "loss": 1.334, "step": 3350 }, { "epoch": 2.45, "learning_rate": 1.6820096917431527e-06, "loss": 1.3878, "step": 3351 }, { "epoch": 2.46, "learning_rate": 1.6776219980736895e-06, "loss": 1.4687, "step": 3352 }, { "epoch": 2.46, "learning_rate": 1.673239510698127e-06, "loss": 1.2689, "step": 3353 }, { "epoch": 2.46, "learning_rate": 1.6688622323580461e-06, "loss": 1.5341, "step": 3354 }, { "epoch": 2.46, "learning_rate": 1.6644901657917777e-06, "loss": 1.4164, "step": 3355 }, { "epoch": 2.46, "learning_rate": 1.6601233137343885e-06, "loss": 1.4613, "step": 3356 }, { "epoch": 2.46, "learning_rate": 1.6557616789176844e-06, "loss": 1.4229, "step": 3357 }, { "epoch": 2.46, "learning_rate": 1.6514052640702082e-06, "loss": 1.4894, "step": 3358 }, { "epoch": 2.46, "learning_rate": 1.6470540719172312e-06, "loss": 1.3616, "step": 3359 }, { "epoch": 2.46, "learning_rate": 1.642708105180768e-06, "loss": 1.6077, "step": 3360 }, { "epoch": 2.46, "learning_rate": 1.6383673665795519e-06, "loss": 1.4396, "step": 3361 }, { "epoch": 2.46, "learning_rate": 1.634031858829055e-06, "loss": 1.2858, "step": 3362 }, { "epoch": 2.46, "learning_rate": 1.6297015846414755e-06, "loss": 1.4567, "step": 3363 }, { "epoch": 2.46, "learning_rate": 1.6253765467257342e-06, "loss": 1.4252, "step": 3364 }, { "epoch": 2.47, "learning_rate": 1.6210567477874816e-06, "loss": 1.5388, "step": 3365 }, { "epoch": 2.47, "learning_rate": 1.6167421905290837e-06, "loss": 1.657, "step": 3366 }, { "epoch": 2.47, "learning_rate": 1.6124328776496323e-06, "loss": 1.4884, "step": 3367 }, { "epoch": 2.47, "learning_rate": 1.6081288118449367e-06, "loss": 1.4199, "step": 3368 }, { "epoch": 2.47, "learning_rate": 1.6038299958075266e-06, "loss": 1.3129, "step": 3369 }, { "epoch": 2.47, "learning_rate": 1.599536432226646e-06, "loss": 1.2818, "step": 3370 }, { "epoch": 2.47, "learning_rate": 1.5952481237882478e-06, "loss": 1.5465, "step": 3371 }, { "epoch": 2.47, "learning_rate": 1.5909650731750048e-06, "loss": 1.3678, "step": 3372 }, { "epoch": 2.47, "learning_rate": 1.5866872830662982e-06, "loss": 1.2705, "step": 3373 }, { "epoch": 2.47, "learning_rate": 1.5824147561382208e-06, "loss": 1.43, "step": 3374 }, { "epoch": 2.47, "learning_rate": 1.5781474950635633e-06, "loss": 1.5669, "step": 3375 }, { "epoch": 2.47, "learning_rate": 1.573885502511836e-06, "loss": 1.2925, "step": 3376 }, { "epoch": 2.47, "learning_rate": 1.5696287811492395e-06, "loss": 1.3999, "step": 3377 }, { "epoch": 2.47, "learning_rate": 1.565377333638688e-06, "loss": 1.3958, "step": 3378 }, { "epoch": 2.48, "learning_rate": 1.5611311626397906e-06, "loss": 1.3793, "step": 3379 }, { "epoch": 2.48, "learning_rate": 1.5568902708088573e-06, "loss": 1.4552, "step": 3380 }, { "epoch": 2.48, "learning_rate": 1.552654660798899e-06, "loss": 1.3146, "step": 3381 }, { "epoch": 2.48, "learning_rate": 1.5484243352596128e-06, "loss": 1.3419, "step": 3382 }, { "epoch": 2.48, "learning_rate": 1.5441992968373988e-06, "loss": 1.3978, "step": 3383 }, { "epoch": 2.48, "learning_rate": 1.5399795481753454e-06, "loss": 1.4063, "step": 3384 }, { "epoch": 2.48, "learning_rate": 1.535765091913236e-06, "loss": 1.4583, "step": 3385 }, { "epoch": 2.48, "learning_rate": 1.5315559306875406e-06, "loss": 1.5339, "step": 3386 }, { "epoch": 2.48, "learning_rate": 1.5273520671314113e-06, "loss": 1.335, "step": 3387 }, { "epoch": 2.48, "learning_rate": 1.5231535038746959e-06, "loss": 1.3976, "step": 3388 }, { "epoch": 2.48, "learning_rate": 1.51896024354392e-06, "loss": 1.2808, "step": 3389 }, { "epoch": 2.48, "learning_rate": 1.5147722887622961e-06, "loss": 1.5215, "step": 3390 }, { "epoch": 2.48, "learning_rate": 1.5105896421497113e-06, "loss": 1.3669, "step": 3391 }, { "epoch": 2.48, "learning_rate": 1.5064123063227397e-06, "loss": 1.5664, "step": 3392 }, { "epoch": 2.49, "learning_rate": 1.502240283894626e-06, "loss": 1.4557, "step": 3393 }, { "epoch": 2.49, "learning_rate": 1.4980735774752963e-06, "loss": 1.393, "step": 3394 }, { "epoch": 2.49, "learning_rate": 1.4939121896713482e-06, "loss": 1.4343, "step": 3395 }, { "epoch": 2.49, "learning_rate": 1.4897561230860536e-06, "loss": 1.5301, "step": 3396 }, { "epoch": 2.49, "learning_rate": 1.4856053803193548e-06, "loss": 1.5397, "step": 3397 }, { "epoch": 2.49, "learning_rate": 1.4814599639678663e-06, "loss": 1.3579, "step": 3398 }, { "epoch": 2.49, "learning_rate": 1.4773198766248642e-06, "loss": 1.3966, "step": 3399 }, { "epoch": 2.49, "learning_rate": 1.473185120880295e-06, "loss": 1.2797, "step": 3400 }, { "epoch": 2.49, "learning_rate": 1.4690556993207716e-06, "loss": 1.43, "step": 3401 }, { "epoch": 2.49, "learning_rate": 1.4649316145295668e-06, "loss": 1.4966, "step": 3402 }, { "epoch": 2.49, "learning_rate": 1.4608128690866185e-06, "loss": 1.4592, "step": 3403 }, { "epoch": 2.49, "learning_rate": 1.4566994655685173e-06, "loss": 1.3549, "step": 3404 }, { "epoch": 2.49, "learning_rate": 1.4525914065485225e-06, "loss": 1.3953, "step": 3405 }, { "epoch": 2.5, "learning_rate": 1.448488694596537e-06, "loss": 1.4636, "step": 3406 }, { "epoch": 2.5, "learning_rate": 1.4443913322791304e-06, "loss": 1.2959, "step": 3407 }, { "epoch": 2.5, "learning_rate": 1.4402993221595197e-06, "loss": 1.4844, "step": 3408 }, { "epoch": 2.5, "learning_rate": 1.4362126667975784e-06, "loss": 1.4074, "step": 3409 }, { "epoch": 2.5, "learning_rate": 1.4321313687498218e-06, "loss": 1.3426, "step": 3410 }, { "epoch": 2.5, "learning_rate": 1.4280554305694205e-06, "loss": 1.3812, "step": 3411 }, { "epoch": 2.5, "learning_rate": 1.4239848548061907e-06, "loss": 1.5571, "step": 3412 }, { "epoch": 2.5, "learning_rate": 1.4199196440065955e-06, "loss": 1.3373, "step": 3413 }, { "epoch": 2.5, "learning_rate": 1.4158598007137391e-06, "loss": 1.4509, "step": 3414 }, { "epoch": 2.5, "learning_rate": 1.411805327467367e-06, "loss": 1.3787, "step": 3415 }, { "epoch": 2.5, "learning_rate": 1.4077562268038691e-06, "loss": 1.2798, "step": 3416 }, { "epoch": 2.5, "learning_rate": 1.4037125012562702e-06, "loss": 1.3781, "step": 3417 }, { "epoch": 2.5, "learning_rate": 1.399674153354238e-06, "loss": 1.4928, "step": 3418 }, { "epoch": 2.5, "learning_rate": 1.3956411856240725e-06, "loss": 1.6148, "step": 3419 }, { "epoch": 2.51, "learning_rate": 1.3916136005887048e-06, "loss": 1.5888, "step": 3420 }, { "epoch": 2.51, "learning_rate": 1.387591400767706e-06, "loss": 1.2819, "step": 3421 }, { "epoch": 2.51, "learning_rate": 1.3835745886772711e-06, "loss": 1.3413, "step": 3422 }, { "epoch": 2.51, "learning_rate": 1.37956316683023e-06, "loss": 1.3301, "step": 3423 }, { "epoch": 2.51, "learning_rate": 1.3755571377360378e-06, "loss": 1.3068, "step": 3424 }, { "epoch": 2.51, "learning_rate": 1.3715565039007795e-06, "loss": 1.4888, "step": 3425 }, { "epoch": 2.51, "learning_rate": 1.3675612678271588e-06, "loss": 1.5707, "step": 3426 }, { "epoch": 2.51, "learning_rate": 1.3635714320145076e-06, "loss": 1.407, "step": 3427 }, { "epoch": 2.51, "learning_rate": 1.3595869989587785e-06, "loss": 1.3679, "step": 3428 }, { "epoch": 2.51, "learning_rate": 1.3556079711525439e-06, "loss": 1.4443, "step": 3429 }, { "epoch": 2.51, "learning_rate": 1.351634351084994e-06, "loss": 1.4169, "step": 3430 }, { "epoch": 2.51, "learning_rate": 1.347666141241939e-06, "loss": 1.3015, "step": 3431 }, { "epoch": 2.51, "learning_rate": 1.3437033441057989e-06, "loss": 1.4593, "step": 3432 }, { "epoch": 2.52, "learning_rate": 1.339745962155613e-06, "loss": 1.5416, "step": 3433 }, { "epoch": 2.52, "learning_rate": 1.335793997867032e-06, "loss": 1.491, "step": 3434 }, { "epoch": 2.52, "learning_rate": 1.3318474537123138e-06, "loss": 1.4245, "step": 3435 }, { "epoch": 2.52, "learning_rate": 1.3279063321603335e-06, "loss": 1.4278, "step": 3436 }, { "epoch": 2.52, "learning_rate": 1.3239706356765657e-06, "loss": 1.404, "step": 3437 }, { "epoch": 2.52, "learning_rate": 1.320040366723091e-06, "loss": 1.5656, "step": 3438 }, { "epoch": 2.52, "learning_rate": 1.3161155277586013e-06, "loss": 1.4496, "step": 3439 }, { "epoch": 2.52, "learning_rate": 1.3121961212383872e-06, "loss": 1.3656, "step": 3440 }, { "epoch": 2.52, "learning_rate": 1.3082821496143428e-06, "loss": 1.4938, "step": 3441 }, { "epoch": 2.52, "learning_rate": 1.304373615334964e-06, "loss": 1.3741, "step": 3442 }, { "epoch": 2.52, "learning_rate": 1.300470520845336e-06, "loss": 1.3605, "step": 3443 }, { "epoch": 2.52, "learning_rate": 1.2965728685871525e-06, "loss": 1.3999, "step": 3444 }, { "epoch": 2.52, "learning_rate": 1.2926806609986964e-06, "loss": 1.4886, "step": 3445 }, { "epoch": 2.52, "learning_rate": 1.2887939005148454e-06, "loss": 1.2436, "step": 3446 }, { "epoch": 2.53, "learning_rate": 1.2849125895670733e-06, "loss": 1.5468, "step": 3447 }, { "epoch": 2.53, "learning_rate": 1.281036730583437e-06, "loss": 1.3708, "step": 3448 }, { "epoch": 2.53, "learning_rate": 1.277166325988589e-06, "loss": 1.4725, "step": 3449 }, { "epoch": 2.53, "learning_rate": 1.2733013782037695e-06, "loss": 1.4596, "step": 3450 }, { "epoch": 2.53, "learning_rate": 1.2694418896468008e-06, "loss": 1.3907, "step": 3451 }, { "epoch": 2.53, "learning_rate": 1.2655878627320972e-06, "loss": 1.3917, "step": 3452 }, { "epoch": 2.53, "learning_rate": 1.2617392998706502e-06, "loss": 1.4724, "step": 3453 }, { "epoch": 2.53, "learning_rate": 1.2578962034700304e-06, "loss": 1.2678, "step": 3454 }, { "epoch": 2.53, "learning_rate": 1.2540585759343959e-06, "loss": 1.3825, "step": 3455 }, { "epoch": 2.53, "learning_rate": 1.250226419664483e-06, "loss": 1.2988, "step": 3456 }, { "epoch": 2.53, "learning_rate": 1.2463997370576008e-06, "loss": 1.3926, "step": 3457 }, { "epoch": 2.53, "learning_rate": 1.2425785305076387e-06, "loss": 1.4203, "step": 3458 }, { "epoch": 2.53, "learning_rate": 1.2387628024050557e-06, "loss": 1.4465, "step": 3459 }, { "epoch": 2.53, "learning_rate": 1.2349525551368858e-06, "loss": 1.495, "step": 3460 }, { "epoch": 2.54, "learning_rate": 1.2311477910867376e-06, "loss": 1.5688, "step": 3461 }, { "epoch": 2.54, "learning_rate": 1.227348512634784e-06, "loss": 1.4776, "step": 3462 }, { "epoch": 2.54, "learning_rate": 1.2235547221577693e-06, "loss": 1.4097, "step": 3463 }, { "epoch": 2.54, "learning_rate": 1.2197664220290074e-06, "loss": 1.478, "step": 3464 }, { "epoch": 2.54, "learning_rate": 1.215983614618369e-06, "loss": 1.4052, "step": 3465 }, { "epoch": 2.54, "learning_rate": 1.2122063022922969e-06, "loss": 1.5163, "step": 3466 }, { "epoch": 2.54, "learning_rate": 1.2084344874137942e-06, "loss": 1.3525, "step": 3467 }, { "epoch": 2.54, "learning_rate": 1.2046681723424214e-06, "loss": 1.6661, "step": 3468 }, { "epoch": 2.54, "learning_rate": 1.2009073594343023e-06, "loss": 1.3667, "step": 3469 }, { "epoch": 2.54, "learning_rate": 1.1971520510421197e-06, "loss": 1.3491, "step": 3470 }, { "epoch": 2.54, "learning_rate": 1.1934022495151064e-06, "loss": 1.4966, "step": 3471 }, { "epoch": 2.54, "learning_rate": 1.1896579571990575e-06, "loss": 1.4103, "step": 3472 }, { "epoch": 2.54, "learning_rate": 1.1859191764363175e-06, "loss": 1.3704, "step": 3473 }, { "epoch": 2.55, "learning_rate": 1.182185909565785e-06, "loss": 1.3299, "step": 3474 }, { "epoch": 2.55, "learning_rate": 1.1784581589229104e-06, "loss": 1.4968, "step": 3475 }, { "epoch": 2.55, "learning_rate": 1.1747359268396873e-06, "loss": 1.3864, "step": 3476 }, { "epoch": 2.55, "learning_rate": 1.171019215644662e-06, "loss": 1.4531, "step": 3477 }, { "epoch": 2.55, "learning_rate": 1.1673080276629268e-06, "loss": 1.3705, "step": 3478 }, { "epoch": 2.55, "learning_rate": 1.1636023652161187e-06, "loss": 1.3802, "step": 3479 }, { "epoch": 2.55, "learning_rate": 1.1599022306224184e-06, "loss": 1.3822, "step": 3480 }, { "epoch": 2.55, "learning_rate": 1.1562076261965438e-06, "loss": 1.4231, "step": 3481 }, { "epoch": 2.55, "learning_rate": 1.1525185542497586e-06, "loss": 1.4495, "step": 3482 }, { "epoch": 2.55, "learning_rate": 1.1488350170898676e-06, "loss": 1.3138, "step": 3483 }, { "epoch": 2.55, "learning_rate": 1.1451570170212024e-06, "loss": 1.2915, "step": 3484 }, { "epoch": 2.55, "learning_rate": 1.141484556344643e-06, "loss": 1.3375, "step": 3485 }, { "epoch": 2.55, "learning_rate": 1.1378176373575977e-06, "loss": 1.4204, "step": 3486 }, { "epoch": 2.55, "learning_rate": 1.1341562623540081e-06, "loss": 1.411, "step": 3487 }, { "epoch": 2.56, "learning_rate": 1.1305004336243486e-06, "loss": 1.4546, "step": 3488 }, { "epoch": 2.56, "learning_rate": 1.1268501534556242e-06, "loss": 1.3808, "step": 3489 }, { "epoch": 2.56, "learning_rate": 1.1232054241313707e-06, "loss": 1.3275, "step": 3490 }, { "epoch": 2.56, "learning_rate": 1.1195662479316483e-06, "loss": 1.4548, "step": 3491 }, { "epoch": 2.56, "learning_rate": 1.115932627133043e-06, "loss": 1.418, "step": 3492 }, { "epoch": 2.56, "learning_rate": 1.1123045640086683e-06, "loss": 1.3123, "step": 3493 }, { "epoch": 2.56, "learning_rate": 1.1086820608281579e-06, "loss": 1.4985, "step": 3494 }, { "epoch": 2.56, "learning_rate": 1.1050651198576713e-06, "loss": 1.3165, "step": 3495 }, { "epoch": 2.56, "learning_rate": 1.101453743359886e-06, "loss": 1.4888, "step": 3496 }, { "epoch": 2.56, "learning_rate": 1.097847933593995e-06, "loss": 1.3261, "step": 3497 }, { "epoch": 2.56, "learning_rate": 1.0942476928157175e-06, "loss": 1.4032, "step": 3498 }, { "epoch": 2.56, "learning_rate": 1.0906530232772783e-06, "loss": 1.4467, "step": 3499 }, { "epoch": 2.56, "learning_rate": 1.0870639272274263e-06, "loss": 1.3559, "step": 3500 }, { "epoch": 2.56, "learning_rate": 1.083480406911418e-06, "loss": 1.4496, "step": 3501 }, { "epoch": 2.57, "learning_rate": 1.0799024645710244e-06, "loss": 1.5535, "step": 3502 }, { "epoch": 2.57, "learning_rate": 1.0763301024445283e-06, "loss": 1.4824, "step": 3503 }, { "epoch": 2.57, "learning_rate": 1.0727633227667157e-06, "loss": 1.4798, "step": 3504 }, { "epoch": 2.57, "learning_rate": 1.0692021277688868e-06, "loss": 1.4915, "step": 3505 }, { "epoch": 2.57, "learning_rate": 1.0656465196788445e-06, "loss": 1.3677, "step": 3506 }, { "epoch": 2.57, "learning_rate": 1.0620965007208993e-06, "loss": 1.4202, "step": 3507 }, { "epoch": 2.57, "learning_rate": 1.058552073115865e-06, "loss": 1.4535, "step": 3508 }, { "epoch": 2.57, "learning_rate": 1.055013239081053e-06, "loss": 1.3697, "step": 3509 }, { "epoch": 2.57, "learning_rate": 1.0514800008302806e-06, "loss": 1.4798, "step": 3510 }, { "epoch": 2.57, "learning_rate": 1.0479523605738618e-06, "loss": 1.4554, "step": 3511 }, { "epoch": 2.57, "learning_rate": 1.0444303205186114e-06, "loss": 1.3818, "step": 3512 }, { "epoch": 2.57, "learning_rate": 1.0409138828678389e-06, "loss": 1.4327, "step": 3513 }, { "epoch": 2.57, "learning_rate": 1.0374030498213483e-06, "loss": 1.2978, "step": 3514 }, { "epoch": 2.58, "learning_rate": 1.0338978235754371e-06, "loss": 1.3731, "step": 3515 }, { "epoch": 2.58, "learning_rate": 1.0303982063228978e-06, "loss": 1.3346, "step": 3516 }, { "epoch": 2.58, "learning_rate": 1.0269042002530139e-06, "loss": 1.3382, "step": 3517 }, { "epoch": 2.58, "learning_rate": 1.0234158075515555e-06, "loss": 1.3629, "step": 3518 }, { "epoch": 2.58, "learning_rate": 1.0199330304007858e-06, "loss": 1.3714, "step": 3519 }, { "epoch": 2.58, "learning_rate": 1.0164558709794504e-06, "loss": 1.4117, "step": 3520 }, { "epoch": 2.58, "learning_rate": 1.012984331462783e-06, "loss": 1.3415, "step": 3521 }, { "epoch": 2.58, "learning_rate": 1.0095184140225011e-06, "loss": 1.4714, "step": 3522 }, { "epoch": 2.58, "learning_rate": 1.006058120826805e-06, "loss": 1.3938, "step": 3523 }, { "epoch": 2.58, "learning_rate": 1.0026034540403806e-06, "loss": 1.4056, "step": 3524 }, { "epoch": 2.58, "learning_rate": 9.991544158243848e-07, "loss": 1.4133, "step": 3525 }, { "epoch": 2.58, "learning_rate": 9.957110083364607e-07, "loss": 1.4251, "step": 3526 }, { "epoch": 2.58, "learning_rate": 9.922732337307283e-07, "loss": 1.3641, "step": 3527 }, { "epoch": 2.58, "learning_rate": 9.888410941577819e-07, "loss": 1.4765, "step": 3528 }, { "epoch": 2.59, "learning_rate": 9.85414591764694e-07, "loss": 1.2783, "step": 3529 }, { "epoch": 2.59, "learning_rate": 9.819937286950043e-07, "loss": 1.3422, "step": 3530 }, { "epoch": 2.59, "learning_rate": 9.78578507088731e-07, "loss": 1.4279, "step": 3531 }, { "epoch": 2.59, "learning_rate": 9.751689290823575e-07, "loss": 1.4235, "step": 3532 }, { "epoch": 2.59, "learning_rate": 9.717649968088405e-07, "loss": 1.4019, "step": 3533 }, { "epoch": 2.59, "learning_rate": 9.68366712397606e-07, "loss": 1.4497, "step": 3534 }, { "epoch": 2.59, "learning_rate": 9.649740779745431e-07, "loss": 1.5395, "step": 3535 }, { "epoch": 2.59, "learning_rate": 9.61587095662011e-07, "loss": 1.3106, "step": 3536 }, { "epoch": 2.59, "learning_rate": 9.58205767578827e-07, "loss": 1.4526, "step": 3537 }, { "epoch": 2.59, "learning_rate": 9.548300958402744e-07, "loss": 1.2625, "step": 3538 }, { "epoch": 2.59, "learning_rate": 9.514600825581e-07, "loss": 1.3236, "step": 3539 }, { "epoch": 2.59, "learning_rate": 9.48095729840508e-07, "loss": 1.5032, "step": 3540 }, { "epoch": 2.59, "learning_rate": 9.447370397921652e-07, "loss": 1.2442, "step": 3541 }, { "epoch": 2.59, "learning_rate": 9.413840145141873e-07, "loss": 1.4019, "step": 3542 }, { "epoch": 2.6, "learning_rate": 9.380366561041553e-07, "loss": 1.4424, "step": 3543 }, { "epoch": 2.6, "learning_rate": 9.346949666561023e-07, "loss": 1.4128, "step": 3544 }, { "epoch": 2.6, "learning_rate": 9.313589482605156e-07, "loss": 1.4624, "step": 3545 }, { "epoch": 2.6, "learning_rate": 9.280286030043306e-07, "loss": 1.3731, "step": 3546 }, { "epoch": 2.6, "learning_rate": 9.247039329709406e-07, "loss": 1.4809, "step": 3547 }, { "epoch": 2.6, "learning_rate": 9.21384940240182e-07, "loss": 1.5095, "step": 3548 }, { "epoch": 2.6, "learning_rate": 9.180716268883427e-07, "loss": 1.4819, "step": 3549 }, { "epoch": 2.6, "learning_rate": 9.147639949881593e-07, "loss": 1.4401, "step": 3550 }, { "epoch": 2.6, "learning_rate": 9.114620466088109e-07, "loss": 1.5089, "step": 3551 }, { "epoch": 2.6, "learning_rate": 9.081657838159253e-07, "loss": 1.3641, "step": 3552 }, { "epoch": 2.6, "learning_rate": 9.048752086715673e-07, "loss": 1.3582, "step": 3553 }, { "epoch": 2.6, "learning_rate": 9.015903232342493e-07, "loss": 1.6741, "step": 3554 }, { "epoch": 2.6, "learning_rate": 8.98311129558922e-07, "loss": 1.4183, "step": 3555 }, { "epoch": 2.61, "learning_rate": 8.950376296969754e-07, "loss": 1.3907, "step": 3556 }, { "epoch": 2.61, "learning_rate": 8.917698256962403e-07, "loss": 1.4745, "step": 3557 }, { "epoch": 2.61, "learning_rate": 8.885077196009783e-07, "loss": 1.3494, "step": 3558 }, { "epoch": 2.61, "learning_rate": 8.852513134518925e-07, "loss": 1.3178, "step": 3559 }, { "epoch": 2.61, "learning_rate": 8.820006092861177e-07, "loss": 1.4595, "step": 3560 }, { "epoch": 2.61, "learning_rate": 8.787556091372207e-07, "loss": 1.442, "step": 3561 }, { "epoch": 2.61, "learning_rate": 8.75516315035202e-07, "loss": 1.4051, "step": 3562 }, { "epoch": 2.61, "learning_rate": 8.722827290064929e-07, "loss": 1.3638, "step": 3563 }, { "epoch": 2.61, "learning_rate": 8.690548530739496e-07, "loss": 1.4189, "step": 3564 }, { "epoch": 2.61, "learning_rate": 8.658326892568602e-07, "loss": 1.4707, "step": 3565 }, { "epoch": 2.61, "learning_rate": 8.626162395709392e-07, "loss": 1.4218, "step": 3566 }, { "epoch": 2.61, "learning_rate": 8.594055060283268e-07, "loss": 1.4507, "step": 3567 }, { "epoch": 2.61, "learning_rate": 8.562004906375843e-07, "loss": 1.3349, "step": 3568 }, { "epoch": 2.61, "learning_rate": 8.530011954036998e-07, "loss": 1.3622, "step": 3569 }, { "epoch": 2.62, "learning_rate": 8.498076223280794e-07, "loss": 1.2825, "step": 3570 }, { "epoch": 2.62, "learning_rate": 8.466197734085502e-07, "loss": 1.3827, "step": 3571 }, { "epoch": 2.62, "learning_rate": 8.434376506393616e-07, "loss": 1.4661, "step": 3572 }, { "epoch": 2.62, "learning_rate": 8.402612560111767e-07, "loss": 1.5397, "step": 3573 }, { "epoch": 2.62, "learning_rate": 8.370905915110805e-07, "loss": 1.488, "step": 3574 }, { "epoch": 2.62, "learning_rate": 8.339256591225653e-07, "loss": 1.4157, "step": 3575 }, { "epoch": 2.62, "learning_rate": 8.307664608255461e-07, "loss": 1.1911, "step": 3576 }, { "epoch": 2.62, "learning_rate": 8.276129985963432e-07, "loss": 1.5097, "step": 3577 }, { "epoch": 2.62, "learning_rate": 8.244652744076941e-07, "loss": 1.5415, "step": 3578 }, { "epoch": 2.62, "learning_rate": 8.213232902287438e-07, "loss": 1.4447, "step": 3579 }, { "epoch": 2.62, "learning_rate": 8.181870480250509e-07, "loss": 1.4929, "step": 3580 }, { "epoch": 2.62, "learning_rate": 8.150565497585739e-07, "loss": 1.5528, "step": 3581 }, { "epoch": 2.62, "learning_rate": 8.11931797387685e-07, "loss": 1.3736, "step": 3582 }, { "epoch": 2.62, "learning_rate": 8.088127928671586e-07, "loss": 1.52, "step": 3583 }, { "epoch": 2.63, "learning_rate": 8.056995381481747e-07, "loss": 1.2744, "step": 3584 }, { "epoch": 2.63, "learning_rate": 8.025920351783189e-07, "loss": 1.2826, "step": 3585 }, { "epoch": 2.63, "learning_rate": 7.994902859015707e-07, "loss": 1.4421, "step": 3586 }, { "epoch": 2.63, "learning_rate": 7.963942922583167e-07, "loss": 1.4349, "step": 3587 }, { "epoch": 2.63, "learning_rate": 7.933040561853433e-07, "loss": 1.3704, "step": 3588 }, { "epoch": 2.63, "learning_rate": 7.902195796158319e-07, "loss": 1.5646, "step": 3589 }, { "epoch": 2.63, "learning_rate": 7.871408644793632e-07, "loss": 1.4026, "step": 3590 }, { "epoch": 2.63, "learning_rate": 7.840679127019124e-07, "loss": 1.3832, "step": 3591 }, { "epoch": 2.63, "learning_rate": 7.810007262058472e-07, "loss": 1.4976, "step": 3592 }, { "epoch": 2.63, "learning_rate": 7.779393069099328e-07, "loss": 1.4062, "step": 3593 }, { "epoch": 2.63, "learning_rate": 7.748836567293238e-07, "loss": 1.4593, "step": 3594 }, { "epoch": 2.63, "learning_rate": 7.718337775755691e-07, "loss": 1.3195, "step": 3595 }, { "epoch": 2.63, "learning_rate": 7.687896713566045e-07, "loss": 1.4337, "step": 3596 }, { "epoch": 2.64, "learning_rate": 7.657513399767525e-07, "loss": 1.4863, "step": 3597 }, { "epoch": 2.64, "learning_rate": 7.627187853367268e-07, "loss": 1.4177, "step": 3598 }, { "epoch": 2.64, "learning_rate": 7.596920093336258e-07, "loss": 1.515, "step": 3599 }, { "epoch": 2.64, "learning_rate": 7.566710138609323e-07, "loss": 1.3753, "step": 3600 }, { "epoch": 2.64, "learning_rate": 7.53655800808516e-07, "loss": 1.4119, "step": 3601 }, { "epoch": 2.64, "learning_rate": 7.506463720626267e-07, "loss": 1.4868, "step": 3602 }, { "epoch": 2.64, "learning_rate": 7.476427295058918e-07, "loss": 1.308, "step": 3603 }, { "epoch": 2.64, "learning_rate": 7.446448750173262e-07, "loss": 1.501, "step": 3604 }, { "epoch": 2.64, "learning_rate": 7.416528104723209e-07, "loss": 1.3877, "step": 3605 }, { "epoch": 2.64, "learning_rate": 7.386665377426438e-07, "loss": 1.5469, "step": 3606 }, { "epoch": 2.64, "learning_rate": 7.356860586964421e-07, "loss": 1.4213, "step": 3607 }, { "epoch": 2.64, "learning_rate": 7.327113751982362e-07, "loss": 1.4318, "step": 3608 }, { "epoch": 2.64, "learning_rate": 7.297424891089189e-07, "loss": 1.496, "step": 3609 }, { "epoch": 2.64, "learning_rate": 7.267794022857611e-07, "loss": 1.4027, "step": 3610 }, { "epoch": 2.65, "learning_rate": 7.238221165824033e-07, "loss": 1.558, "step": 3611 }, { "epoch": 2.65, "learning_rate": 7.208706338488591e-07, "loss": 1.2924, "step": 3612 }, { "epoch": 2.65, "learning_rate": 7.179249559315104e-07, "loss": 1.5597, "step": 3613 }, { "epoch": 2.65, "learning_rate": 7.149850846731043e-07, "loss": 1.5294, "step": 3614 }, { "epoch": 2.65, "learning_rate": 7.120510219127619e-07, "loss": 1.3897, "step": 3615 }, { "epoch": 2.65, "learning_rate": 7.091227694859649e-07, "loss": 1.4731, "step": 3616 }, { "epoch": 2.65, "learning_rate": 7.062003292245645e-07, "loss": 1.4132, "step": 3617 }, { "epoch": 2.65, "learning_rate": 7.032837029567741e-07, "loss": 1.4902, "step": 3618 }, { "epoch": 2.65, "learning_rate": 7.003728925071684e-07, "loss": 1.4275, "step": 3619 }, { "epoch": 2.65, "learning_rate": 6.974678996966866e-07, "loss": 1.5261, "step": 3620 }, { "epoch": 2.65, "learning_rate": 6.945687263426259e-07, "loss": 1.4805, "step": 3621 }, { "epoch": 2.65, "learning_rate": 6.91675374258649e-07, "loss": 1.3901, "step": 3622 }, { "epoch": 2.65, "learning_rate": 6.887878452547658e-07, "loss": 1.3966, "step": 3623 }, { "epoch": 2.65, "learning_rate": 6.859061411373557e-07, "loss": 1.4708, "step": 3624 }, { "epoch": 2.66, "learning_rate": 6.830302637091446e-07, "loss": 1.342, "step": 3625 }, { "epoch": 2.66, "learning_rate": 6.801602147692177e-07, "loss": 1.3314, "step": 3626 }, { "epoch": 2.66, "learning_rate": 6.772959961130154e-07, "loss": 1.2297, "step": 3627 }, { "epoch": 2.66, "learning_rate": 6.744376095323269e-07, "loss": 1.4614, "step": 3628 }, { "epoch": 2.66, "learning_rate": 6.715850568152982e-07, "loss": 1.331, "step": 3629 }, { "epoch": 2.66, "learning_rate": 6.687383397464187e-07, "loss": 1.3715, "step": 3630 }, { "epoch": 2.66, "learning_rate": 6.658974601065338e-07, "loss": 1.5185, "step": 3631 }, { "epoch": 2.66, "learning_rate": 6.63062419672833e-07, "loss": 1.4402, "step": 3632 }, { "epoch": 2.66, "learning_rate": 6.602332202188544e-07, "loss": 1.3313, "step": 3633 }, { "epoch": 2.66, "learning_rate": 6.574098635144832e-07, "loss": 1.3368, "step": 3634 }, { "epoch": 2.66, "learning_rate": 6.545923513259477e-07, "loss": 1.4523, "step": 3635 }, { "epoch": 2.66, "learning_rate": 6.517806854158204e-07, "loss": 1.5287, "step": 3636 }, { "epoch": 2.66, "learning_rate": 6.489748675430164e-07, "loss": 1.4011, "step": 3637 }, { "epoch": 2.67, "learning_rate": 6.461748994627937e-07, "loss": 1.399, "step": 3638 }, { "epoch": 2.67, "learning_rate": 6.433807829267491e-07, "loss": 1.4223, "step": 3639 }, { "epoch": 2.67, "learning_rate": 6.405925196828189e-07, "loss": 1.3866, "step": 3640 }, { "epoch": 2.67, "learning_rate": 6.378101114752799e-07, "loss": 1.446, "step": 3641 }, { "epoch": 2.67, "learning_rate": 6.350335600447433e-07, "loss": 1.4876, "step": 3642 }, { "epoch": 2.67, "learning_rate": 6.322628671281584e-07, "loss": 1.4052, "step": 3643 }, { "epoch": 2.67, "learning_rate": 6.294980344588075e-07, "loss": 1.2974, "step": 3644 }, { "epoch": 2.67, "learning_rate": 6.267390637663107e-07, "loss": 1.49, "step": 3645 }, { "epoch": 2.67, "learning_rate": 6.239859567766172e-07, "loss": 1.3019, "step": 3646 }, { "epoch": 2.67, "learning_rate": 6.212387152120092e-07, "loss": 1.6074, "step": 3647 }, { "epoch": 2.67, "learning_rate": 6.184973407910977e-07, "loss": 1.3366, "step": 3648 }, { "epoch": 2.67, "learning_rate": 6.157618352288286e-07, "loss": 1.4095, "step": 3649 }, { "epoch": 2.67, "learning_rate": 6.130322002364719e-07, "loss": 1.4441, "step": 3650 }, { "epoch": 2.67, "learning_rate": 6.103084375216273e-07, "loss": 1.368, "step": 3651 }, { "epoch": 2.68, "learning_rate": 6.075905487882172e-07, "loss": 1.2519, "step": 3652 }, { "epoch": 2.68, "learning_rate": 6.048785357364961e-07, "loss": 1.4003, "step": 3653 }, { "epoch": 2.68, "learning_rate": 6.021724000630347e-07, "loss": 1.3958, "step": 3654 }, { "epoch": 2.68, "learning_rate": 5.994721434607331e-07, "loss": 1.4354, "step": 3655 }, { "epoch": 2.68, "learning_rate": 5.967777676188103e-07, "loss": 1.5112, "step": 3656 }, { "epoch": 2.68, "learning_rate": 5.940892742228111e-07, "loss": 1.5008, "step": 3657 }, { "epoch": 2.68, "learning_rate": 5.914066649545935e-07, "loss": 1.4826, "step": 3658 }, { "epoch": 2.68, "learning_rate": 5.887299414923386e-07, "loss": 2.9757, "step": 3659 }, { "epoch": 2.68, "learning_rate": 5.860591055105446e-07, "loss": 1.4066, "step": 3660 }, { "epoch": 2.68, "learning_rate": 5.833941586800284e-07, "loss": 1.5339, "step": 3661 }, { "epoch": 2.68, "learning_rate": 5.80735102667922e-07, "loss": 1.4313, "step": 3662 }, { "epoch": 2.68, "learning_rate": 5.78081939137668e-07, "loss": 1.2941, "step": 3663 }, { "epoch": 2.68, "learning_rate": 5.754346697490287e-07, "loss": 1.4353, "step": 3664 }, { "epoch": 2.68, "learning_rate": 5.727932961580751e-07, "loss": 1.3333, "step": 3665 }, { "epoch": 2.69, "learning_rate": 5.701578200171942e-07, "loss": 1.5961, "step": 3666 }, { "epoch": 2.69, "learning_rate": 5.675282429750795e-07, "loss": 1.5154, "step": 3667 }, { "epoch": 2.69, "learning_rate": 5.64904566676735e-07, "loss": 1.3268, "step": 3668 }, { "epoch": 2.69, "learning_rate": 5.622867927634768e-07, "loss": 1.426, "step": 3669 }, { "epoch": 2.69, "learning_rate": 5.596749228729214e-07, "loss": 1.4435, "step": 3670 }, { "epoch": 2.69, "learning_rate": 5.570689586389988e-07, "loss": 1.4421, "step": 3671 }, { "epoch": 2.69, "learning_rate": 5.544689016919425e-07, "loss": 1.2758, "step": 3672 }, { "epoch": 2.69, "learning_rate": 5.518747536582891e-07, "loss": 1.4106, "step": 3673 }, { "epoch": 2.69, "learning_rate": 5.492865161608818e-07, "loss": 1.5214, "step": 3674 }, { "epoch": 2.69, "learning_rate": 5.467041908188608e-07, "loss": 1.422, "step": 3675 }, { "epoch": 2.69, "learning_rate": 5.441277792476729e-07, "loss": 1.499, "step": 3676 }, { "epoch": 2.69, "learning_rate": 5.415572830590632e-07, "loss": 1.3489, "step": 3677 }, { "epoch": 2.69, "learning_rate": 5.389927038610776e-07, "loss": 1.4226, "step": 3678 }, { "epoch": 2.7, "learning_rate": 5.364340432580605e-07, "loss": 1.4378, "step": 3679 }, { "epoch": 2.7, "learning_rate": 5.338813028506506e-07, "loss": 1.4398, "step": 3680 }, { "epoch": 2.7, "learning_rate": 5.31334484235786e-07, "loss": 1.4055, "step": 3681 }, { "epoch": 2.7, "learning_rate": 5.287935890066998e-07, "loss": 1.2958, "step": 3682 }, { "epoch": 2.7, "learning_rate": 5.262586187529195e-07, "loss": 1.4219, "step": 3683 }, { "epoch": 2.7, "learning_rate": 5.237295750602667e-07, "loss": 1.5355, "step": 3684 }, { "epoch": 2.7, "learning_rate": 5.212064595108546e-07, "loss": 1.3046, "step": 3685 }, { "epoch": 2.7, "learning_rate": 5.186892736830851e-07, "loss": 1.4261, "step": 3686 }, { "epoch": 2.7, "learning_rate": 5.161780191516552e-07, "loss": 1.5075, "step": 3687 }, { "epoch": 2.7, "learning_rate": 5.136726974875506e-07, "loss": 1.5305, "step": 3688 }, { "epoch": 2.7, "learning_rate": 5.111733102580429e-07, "loss": 1.358, "step": 3689 }, { "epoch": 2.7, "learning_rate": 5.086798590266961e-07, "loss": 1.3598, "step": 3690 }, { "epoch": 2.7, "learning_rate": 5.061923453533546e-07, "loss": 1.5671, "step": 3691 }, { "epoch": 2.7, "learning_rate": 5.037107707941524e-07, "loss": 1.4212, "step": 3692 }, { "epoch": 2.71, "learning_rate": 5.012351369015067e-07, "loss": 1.4613, "step": 3693 }, { "epoch": 2.71, "learning_rate": 4.987654452241187e-07, "loss": 1.4121, "step": 3694 }, { "epoch": 2.71, "learning_rate": 4.963016973069757e-07, "loss": 1.2549, "step": 3695 }, { "epoch": 2.71, "learning_rate": 4.938438946913382e-07, "loss": 1.3299, "step": 3696 }, { "epoch": 2.71, "learning_rate": 4.913920389147553e-07, "loss": 1.3907, "step": 3697 }, { "epoch": 2.71, "learning_rate": 4.889461315110522e-07, "loss": 1.3571, "step": 3698 }, { "epoch": 2.71, "learning_rate": 4.865061740103361e-07, "loss": 1.3993, "step": 3699 }, { "epoch": 2.71, "learning_rate": 4.840721679389893e-07, "loss": 1.4701, "step": 3700 }, { "epoch": 2.71, "learning_rate": 4.816441148196693e-07, "loss": 1.5276, "step": 3701 }, { "epoch": 2.71, "learning_rate": 4.792220161713157e-07, "loss": 1.4435, "step": 3702 }, { "epoch": 2.71, "learning_rate": 4.768058735091352e-07, "loss": 1.5172, "step": 3703 }, { "epoch": 2.71, "learning_rate": 4.7439568834461545e-07, "loss": 1.2428, "step": 3704 }, { "epoch": 2.71, "learning_rate": 4.719914621855137e-07, "loss": 1.4193, "step": 3705 }, { "epoch": 2.72, "learning_rate": 4.695931965358602e-07, "loss": 1.3842, "step": 3706 }, { "epoch": 2.72, "learning_rate": 4.6720089289595705e-07, "loss": 1.3731, "step": 3707 }, { "epoch": 2.72, "learning_rate": 4.6481455276237485e-07, "loss": 1.3734, "step": 3708 }, { "epoch": 2.72, "learning_rate": 4.624341776279551e-07, "loss": 1.3692, "step": 3709 }, { "epoch": 2.72, "learning_rate": 4.600597689818076e-07, "loss": 1.4728, "step": 3710 }, { "epoch": 2.72, "learning_rate": 4.576913283093098e-07, "loss": 1.3509, "step": 3711 }, { "epoch": 2.72, "learning_rate": 4.553288570921055e-07, "loss": 1.5037, "step": 3712 }, { "epoch": 2.72, "learning_rate": 4.5297235680810234e-07, "loss": 1.4073, "step": 3713 }, { "epoch": 2.72, "learning_rate": 4.506218289314757e-07, "loss": 1.4385, "step": 3714 }, { "epoch": 2.72, "learning_rate": 4.482772749326636e-07, "loss": 1.4835, "step": 3715 }, { "epoch": 2.72, "learning_rate": 4.4593869627836405e-07, "loss": 1.4077, "step": 3716 }, { "epoch": 2.72, "learning_rate": 4.4360609443154233e-07, "loss": 1.2876, "step": 3717 }, { "epoch": 2.72, "learning_rate": 4.412794708514223e-07, "loss": 1.3965, "step": 3718 }, { "epoch": 2.72, "learning_rate": 4.3895882699348636e-07, "loss": 1.3816, "step": 3719 }, { "epoch": 2.73, "learning_rate": 4.3664416430947766e-07, "loss": 1.3493, "step": 3720 }, { "epoch": 2.73, "learning_rate": 4.343354842473968e-07, "loss": 1.5108, "step": 3721 }, { "epoch": 2.73, "learning_rate": 4.320327882515041e-07, "loss": 1.3843, "step": 3722 }, { "epoch": 2.73, "learning_rate": 4.297360777623161e-07, "loss": 1.4018, "step": 3723 }, { "epoch": 2.73, "learning_rate": 4.2744535421659904e-07, "loss": 1.3489, "step": 3724 }, { "epoch": 2.73, "learning_rate": 4.251606190473823e-07, "loss": 1.4685, "step": 3725 }, { "epoch": 2.73, "learning_rate": 4.2288187368394353e-07, "loss": 1.252, "step": 3726 }, { "epoch": 2.73, "learning_rate": 4.2060911955181474e-07, "loss": 1.4688, "step": 3727 }, { "epoch": 2.73, "learning_rate": 4.18342358072783e-07, "loss": 1.3508, "step": 3728 }, { "epoch": 2.73, "learning_rate": 4.160815906648796e-07, "loss": 1.3249, "step": 3729 }, { "epoch": 2.73, "learning_rate": 4.138268187423922e-07, "loss": 1.4872, "step": 3730 }, { "epoch": 2.73, "learning_rate": 4.1157804371585786e-07, "loss": 1.3425, "step": 3731 }, { "epoch": 2.73, "learning_rate": 4.0933526699205475e-07, "loss": 1.3394, "step": 3732 }, { "epoch": 2.73, "learning_rate": 4.0709848997401823e-07, "loss": 1.4182, "step": 3733 }, { "epoch": 2.74, "learning_rate": 4.048677140610258e-07, "loss": 1.4313, "step": 3734 }, { "epoch": 2.74, "learning_rate": 4.026429406485988e-07, "loss": 1.3854, "step": 3735 }, { "epoch": 2.74, "learning_rate": 4.004241711285073e-07, "loss": 1.2742, "step": 3736 }, { "epoch": 2.74, "learning_rate": 3.9821140688876434e-07, "loss": 1.36, "step": 3737 }, { "epoch": 2.74, "learning_rate": 3.9600464931362494e-07, "loss": 1.628, "step": 3738 }, { "epoch": 2.74, "learning_rate": 3.938038997835869e-07, "loss": 1.3487, "step": 3739 }, { "epoch": 2.74, "learning_rate": 3.916091596753935e-07, "loss": 1.4138, "step": 3740 }, { "epoch": 2.74, "learning_rate": 3.894204303620197e-07, "loss": 1.4067, "step": 3741 }, { "epoch": 2.74, "learning_rate": 3.872377132126892e-07, "loss": 1.449, "step": 3742 }, { "epoch": 2.74, "learning_rate": 3.8506100959286085e-07, "loss": 1.2905, "step": 3743 }, { "epoch": 2.74, "learning_rate": 3.8289032086423095e-07, "loss": 1.5082, "step": 3744 }, { "epoch": 2.74, "learning_rate": 3.807256483847366e-07, "loss": 1.3746, "step": 3745 }, { "epoch": 2.74, "learning_rate": 3.7856699350854453e-07, "loss": 1.3141, "step": 3746 }, { "epoch": 2.75, "learning_rate": 3.7641435758606347e-07, "loss": 1.5215, "step": 3747 }, { "epoch": 2.75, "learning_rate": 3.742677419639329e-07, "loss": 1.4579, "step": 3748 }, { "epoch": 2.75, "learning_rate": 3.721271479850286e-07, "loss": 1.3912, "step": 3749 }, { "epoch": 2.75, "learning_rate": 3.6999257698845825e-07, "loss": 1.3352, "step": 3750 }, { "epoch": 2.75, "learning_rate": 3.678640303095626e-07, "loss": 1.2633, "step": 3751 }, { "epoch": 2.75, "learning_rate": 3.6574150927991216e-07, "loss": 1.2407, "step": 3752 }, { "epoch": 2.75, "learning_rate": 3.6362501522730797e-07, "loss": 1.4304, "step": 3753 }, { "epoch": 2.75, "learning_rate": 3.615145494757832e-07, "loss": 1.2547, "step": 3754 }, { "epoch": 2.75, "learning_rate": 3.5941011334559627e-07, "loss": 1.3881, "step": 3755 }, { "epoch": 2.75, "learning_rate": 3.5731170815323733e-07, "loss": 1.277, "step": 3756 }, { "epoch": 2.75, "learning_rate": 3.5521933521142085e-07, "loss": 1.3773, "step": 3757 }, { "epoch": 2.75, "learning_rate": 3.5313299582908744e-07, "loss": 1.3965, "step": 3758 }, { "epoch": 2.75, "learning_rate": 3.510526913114065e-07, "loss": 1.5507, "step": 3759 }, { "epoch": 2.75, "learning_rate": 3.4897842295976815e-07, "loss": 1.4802, "step": 3760 }, { "epoch": 2.76, "learning_rate": 3.46910192071791e-07, "loss": 1.5821, "step": 3761 }, { "epoch": 2.76, "learning_rate": 3.4484799994131345e-07, "loss": 1.5345, "step": 3762 }, { "epoch": 2.76, "learning_rate": 3.427918478583936e-07, "loss": 1.506, "step": 3763 }, { "epoch": 2.76, "learning_rate": 3.4074173710931804e-07, "loss": 1.4872, "step": 3764 }, { "epoch": 2.76, "learning_rate": 3.3869766897658753e-07, "loss": 1.4125, "step": 3765 }, { "epoch": 2.76, "learning_rate": 3.3665964473892807e-07, "loss": 1.3694, "step": 3766 }, { "epoch": 2.76, "learning_rate": 3.34627665671281e-07, "loss": 1.5119, "step": 3767 }, { "epoch": 2.76, "learning_rate": 3.3260173304480724e-07, "loss": 1.3376, "step": 3768 }, { "epoch": 2.76, "learning_rate": 3.3058184812688296e-07, "loss": 1.4613, "step": 3769 }, { "epoch": 2.76, "learning_rate": 3.2856801218110636e-07, "loss": 1.4207, "step": 3770 }, { "epoch": 2.76, "learning_rate": 3.265602264672862e-07, "loss": 1.2396, "step": 3771 }, { "epoch": 2.76, "learning_rate": 3.245584922414491e-07, "loss": 1.459, "step": 3772 }, { "epoch": 2.76, "learning_rate": 3.225628107558365e-07, "loss": 1.432, "step": 3773 }, { "epoch": 2.76, "learning_rate": 3.2057318325889877e-07, "loss": 1.5105, "step": 3774 }, { "epoch": 2.77, "learning_rate": 3.185896109953057e-07, "loss": 1.3697, "step": 3775 }, { "epoch": 2.77, "learning_rate": 3.1661209520593484e-07, "loss": 1.4213, "step": 3776 }, { "epoch": 2.77, "learning_rate": 3.146406371278754e-07, "loss": 1.252, "step": 3777 }, { "epoch": 2.77, "learning_rate": 3.126752379944276e-07, "loss": 1.524, "step": 3778 }, { "epoch": 2.77, "learning_rate": 3.1071589903510334e-07, "loss": 1.4868, "step": 3779 }, { "epoch": 2.77, "learning_rate": 3.0876262147561784e-07, "loss": 1.3404, "step": 3780 }, { "epoch": 2.77, "learning_rate": 3.068154065378992e-07, "loss": 1.3441, "step": 3781 }, { "epoch": 2.77, "learning_rate": 3.04874255440083e-07, "loss": 1.4694, "step": 3782 }, { "epoch": 2.77, "learning_rate": 3.029391693965089e-07, "loss": 1.3585, "step": 3783 }, { "epoch": 2.77, "learning_rate": 3.010101496177242e-07, "loss": 1.2613, "step": 3784 }, { "epoch": 2.77, "learning_rate": 2.990871973104792e-07, "loss": 1.3718, "step": 3785 }, { "epoch": 2.77, "learning_rate": 2.971703136777315e-07, "loss": 1.5077, "step": 3786 }, { "epoch": 2.77, "learning_rate": 2.952594999186398e-07, "loss": 1.4291, "step": 3787 }, { "epoch": 2.78, "learning_rate": 2.9335475722856667e-07, "loss": 1.477, "step": 3788 }, { "epoch": 2.78, "learning_rate": 2.91456086799079e-07, "loss": 1.5473, "step": 3789 }, { "epoch": 2.78, "learning_rate": 2.895634898179378e-07, "loss": 1.2483, "step": 3790 }, { "epoch": 2.78, "learning_rate": 2.876769674691127e-07, "loss": 1.3818, "step": 3791 }, { "epoch": 2.78, "learning_rate": 2.857965209327695e-07, "loss": 1.4782, "step": 3792 }, { "epoch": 2.78, "learning_rate": 2.8392215138527414e-07, "loss": 1.3881, "step": 3793 }, { "epoch": 2.78, "learning_rate": 2.8205385999918954e-07, "loss": 1.2765, "step": 3794 }, { "epoch": 2.78, "learning_rate": 2.8019164794327756e-07, "loss": 1.4162, "step": 3795 }, { "epoch": 2.78, "learning_rate": 2.7833551638249523e-07, "loss": 1.3491, "step": 3796 }, { "epoch": 2.78, "learning_rate": 2.7648546647799814e-07, "loss": 1.52, "step": 3797 }, { "epoch": 2.78, "learning_rate": 2.746414993871349e-07, "loss": 1.4551, "step": 3798 }, { "epoch": 2.78, "learning_rate": 2.7280361626345286e-07, "loss": 1.3829, "step": 3799 }, { "epoch": 2.78, "learning_rate": 2.7097181825668897e-07, "loss": 1.5097, "step": 3800 }, { "epoch": 2.78, "learning_rate": 2.6914610651277427e-07, "loss": 1.4629, "step": 3801 }, { "epoch": 2.79, "learning_rate": 2.673264821738353e-07, "loss": 1.3158, "step": 3802 }, { "epoch": 2.79, "learning_rate": 2.655129463781869e-07, "loss": 1.4057, "step": 3803 }, { "epoch": 2.79, "learning_rate": 2.637055002603373e-07, "loss": 1.5694, "step": 3804 }, { "epoch": 2.79, "learning_rate": 2.6190414495098404e-07, "loss": 1.4252, "step": 3805 }, { "epoch": 2.79, "learning_rate": 2.6010888157701587e-07, "loss": 1.373, "step": 3806 }, { "epoch": 2.79, "learning_rate": 2.5831971126150767e-07, "loss": 1.3046, "step": 3807 }, { "epoch": 2.79, "learning_rate": 2.565366351237264e-07, "loss": 1.4537, "step": 3808 }, { "epoch": 2.79, "learning_rate": 2.547596542791231e-07, "loss": 1.2729, "step": 3809 }, { "epoch": 2.79, "learning_rate": 2.529887698393374e-07, "loss": 1.4901, "step": 3810 }, { "epoch": 2.79, "learning_rate": 2.512239829121954e-07, "loss": 1.3541, "step": 3811 }, { "epoch": 2.79, "learning_rate": 2.4946529460171066e-07, "loss": 1.3703, "step": 3812 }, { "epoch": 2.79, "learning_rate": 2.477127060080753e-07, "loss": 1.2747, "step": 3813 }, { "epoch": 2.79, "learning_rate": 2.459662182276712e-07, "loss": 1.3579, "step": 3814 }, { "epoch": 2.79, "learning_rate": 2.442258323530633e-07, "loss": 1.5604, "step": 3815 }, { "epoch": 2.8, "learning_rate": 2.4249154947299734e-07, "loss": 1.3363, "step": 3816 }, { "epoch": 2.8, "learning_rate": 2.4076337067240217e-07, "loss": 1.3989, "step": 3817 }, { "epoch": 2.8, "learning_rate": 2.390412970323874e-07, "loss": 1.4126, "step": 3818 }, { "epoch": 2.8, "learning_rate": 2.3732532963024468e-07, "loss": 1.4729, "step": 3819 }, { "epoch": 2.8, "learning_rate": 2.3561546953944303e-07, "loss": 1.3791, "step": 3820 }, { "epoch": 2.8, "learning_rate": 2.3391171782963462e-07, "loss": 1.4462, "step": 3821 }, { "epoch": 2.8, "learning_rate": 2.322140755666491e-07, "loss": 1.4532, "step": 3822 }, { "epoch": 2.8, "learning_rate": 2.3052254381249138e-07, "loss": 1.3449, "step": 3823 }, { "epoch": 2.8, "learning_rate": 2.2883712362534838e-07, "loss": 1.1841, "step": 3824 }, { "epoch": 2.8, "learning_rate": 2.2715781605957886e-07, "loss": 1.1907, "step": 3825 }, { "epoch": 2.8, "learning_rate": 2.2548462216572143e-07, "loss": 1.3066, "step": 3826 }, { "epoch": 2.8, "learning_rate": 2.2381754299048764e-07, "loss": 1.3461, "step": 3827 }, { "epoch": 2.8, "learning_rate": 2.2215657957676774e-07, "loss": 1.3894, "step": 3828 }, { "epoch": 2.81, "learning_rate": 2.2050173296362164e-07, "loss": 1.4879, "step": 3829 }, { "epoch": 2.81, "learning_rate": 2.1885300418628353e-07, "loss": 1.5135, "step": 3830 }, { "epoch": 2.81, "learning_rate": 2.1721039427616164e-07, "loss": 1.3034, "step": 3831 }, { "epoch": 2.81, "learning_rate": 2.1557390426083736e-07, "loss": 1.3442, "step": 3832 }, { "epoch": 2.81, "learning_rate": 2.1394353516406285e-07, "loss": 1.3438, "step": 3833 }, { "epoch": 2.81, "learning_rate": 2.1231928800575897e-07, "loss": 1.303, "step": 3834 }, { "epoch": 2.81, "learning_rate": 2.1070116380201734e-07, "loss": 1.3207, "step": 3835 }, { "epoch": 2.81, "learning_rate": 2.0908916356510267e-07, "loss": 1.5011, "step": 3836 }, { "epoch": 2.81, "learning_rate": 2.074832883034461e-07, "loss": 1.478, "step": 3837 }, { "epoch": 2.81, "learning_rate": 2.0588353902164737e-07, "loss": 1.4784, "step": 3838 }, { "epoch": 2.81, "learning_rate": 2.042899167204726e-07, "loss": 1.3398, "step": 3839 }, { "epoch": 2.81, "learning_rate": 2.0270242239685768e-07, "loss": 1.4032, "step": 3840 }, { "epoch": 2.81, "learning_rate": 2.011210570439026e-07, "loss": 1.3935, "step": 3841 }, { "epoch": 2.81, "learning_rate": 1.9954582165087498e-07, "loss": 1.3895, "step": 3842 }, { "epoch": 2.82, "learning_rate": 1.9797671720320543e-07, "loss": 1.5207, "step": 3843 }, { "epoch": 2.82, "learning_rate": 1.964137446824932e-07, "loss": 1.323, "step": 3844 }, { "epoch": 2.82, "learning_rate": 1.9485690506649723e-07, "loss": 1.3238, "step": 3845 }, { "epoch": 2.82, "learning_rate": 1.9330619932914184e-07, "loss": 1.4221, "step": 3846 }, { "epoch": 2.82, "learning_rate": 1.9176162844051438e-07, "loss": 1.4007, "step": 3847 }, { "epoch": 2.82, "learning_rate": 1.90223193366863e-07, "loss": 1.4242, "step": 3848 }, { "epoch": 2.82, "learning_rate": 1.886908950706001e-07, "loss": 1.4182, "step": 3849 }, { "epoch": 2.82, "learning_rate": 1.871647345102967e-07, "loss": 1.4276, "step": 3850 }, { "epoch": 2.82, "learning_rate": 1.8564471264068241e-07, "loss": 1.34, "step": 3851 }, { "epoch": 2.82, "learning_rate": 1.8413083041265213e-07, "loss": 1.2858, "step": 3852 }, { "epoch": 2.82, "learning_rate": 1.8262308877325496e-07, "loss": 1.4327, "step": 3853 }, { "epoch": 2.82, "learning_rate": 1.811214886657009e-07, "loss": 1.3636, "step": 3854 }, { "epoch": 2.82, "learning_rate": 1.7962603102935848e-07, "loss": 1.4824, "step": 3855 }, { "epoch": 2.82, "learning_rate": 1.7813671679975165e-07, "loss": 1.5782, "step": 3856 }, { "epoch": 2.83, "learning_rate": 1.7665354690856175e-07, "loss": 1.3163, "step": 3857 }, { "epoch": 2.83, "learning_rate": 1.751765222836266e-07, "loss": 1.4818, "step": 3858 }, { "epoch": 2.83, "learning_rate": 1.737056438489404e-07, "loss": 1.3373, "step": 3859 }, { "epoch": 2.83, "learning_rate": 1.7224091252465158e-07, "loss": 1.5136, "step": 3860 }, { "epoch": 2.83, "learning_rate": 1.7078232922706495e-07, "loss": 1.3379, "step": 3861 }, { "epoch": 2.83, "learning_rate": 1.6932989486863616e-07, "loss": 1.5231, "step": 3862 }, { "epoch": 2.83, "learning_rate": 1.6788361035797506e-07, "loss": 1.4323, "step": 3863 }, { "epoch": 2.83, "learning_rate": 1.6644347659984571e-07, "loss": 1.298, "step": 3864 }, { "epoch": 2.83, "learning_rate": 1.6500949449516412e-07, "loss": 1.4258, "step": 3865 }, { "epoch": 2.83, "learning_rate": 1.6358166494099824e-07, "loss": 1.4183, "step": 3866 }, { "epoch": 2.83, "learning_rate": 1.621599888305636e-07, "loss": 1.3975, "step": 3867 }, { "epoch": 2.83, "learning_rate": 1.60744467053231e-07, "loss": 1.3589, "step": 3868 }, { "epoch": 2.83, "learning_rate": 1.5933510049451873e-07, "loss": 1.5754, "step": 3869 }, { "epoch": 2.84, "learning_rate": 1.57931890036096e-07, "loss": 1.472, "step": 3870 }, { "epoch": 2.84, "learning_rate": 1.565348365557795e-07, "loss": 1.4166, "step": 3871 }, { "epoch": 2.84, "learning_rate": 1.551439409275335e-07, "loss": 1.3604, "step": 3872 }, { "epoch": 2.84, "learning_rate": 1.5375920402147305e-07, "loss": 1.2815, "step": 3873 }, { "epoch": 2.84, "learning_rate": 1.5238062670385745e-07, "loss": 1.4458, "step": 3874 }, { "epoch": 2.84, "learning_rate": 1.5100820983709353e-07, "loss": 1.3446, "step": 3875 }, { "epoch": 2.84, "learning_rate": 1.496419542797356e-07, "loss": 1.3004, "step": 3876 }, { "epoch": 2.84, "learning_rate": 1.482818608864822e-07, "loss": 1.5169, "step": 3877 }, { "epoch": 2.84, "learning_rate": 1.4692793050817833e-07, "loss": 1.3253, "step": 3878 }, { "epoch": 2.84, "learning_rate": 1.4558016399181086e-07, "loss": 1.3548, "step": 3879 }, { "epoch": 2.84, "learning_rate": 1.4423856218051423e-07, "loss": 1.3435, "step": 3880 }, { "epoch": 2.84, "learning_rate": 1.429031259135627e-07, "loss": 1.3698, "step": 3881 }, { "epoch": 2.84, "learning_rate": 1.4157385602637685e-07, "loss": 1.3658, "step": 3882 }, { "epoch": 2.84, "learning_rate": 1.402507533505193e-07, "loss": 1.3499, "step": 3883 }, { "epoch": 2.85, "learning_rate": 1.3893381871369127e-07, "loss": 1.2652, "step": 3884 }, { "epoch": 2.85, "learning_rate": 1.376230529397371e-07, "loss": 1.5263, "step": 3885 }, { "epoch": 2.85, "learning_rate": 1.3631845684864642e-07, "loss": 1.2995, "step": 3886 }, { "epoch": 2.85, "learning_rate": 1.3502003125654085e-07, "loss": 1.4917, "step": 3887 }, { "epoch": 2.85, "learning_rate": 1.337277769756895e-07, "loss": 1.5336, "step": 3888 }, { "epoch": 2.85, "learning_rate": 1.3244169481449687e-07, "loss": 1.4078, "step": 3889 }, { "epoch": 2.85, "learning_rate": 1.3116178557750715e-07, "loss": 1.3862, "step": 3890 }, { "epoch": 2.85, "learning_rate": 1.2988805006540317e-07, "loss": 1.3313, "step": 3891 }, { "epoch": 2.85, "learning_rate": 1.2862048907500535e-07, "loss": 1.4178, "step": 3892 }, { "epoch": 2.85, "learning_rate": 1.273591033992716e-07, "loss": 1.3474, "step": 3893 }, { "epoch": 2.85, "learning_rate": 1.2610389382729738e-07, "loss": 1.3767, "step": 3894 }, { "epoch": 2.85, "learning_rate": 1.2485486114431233e-07, "loss": 1.3806, "step": 3895 }, { "epoch": 2.85, "learning_rate": 1.2361200613168366e-07, "loss": 1.3231, "step": 3896 }, { "epoch": 2.85, "learning_rate": 1.22375329566915e-07, "loss": 1.4478, "step": 3897 }, { "epoch": 2.86, "learning_rate": 1.2114483222364194e-07, "loss": 1.5652, "step": 3898 }, { "epoch": 2.86, "learning_rate": 1.1992051487163758e-07, "loss": 1.4003, "step": 3899 }, { "epoch": 2.86, "learning_rate": 1.1870237827680708e-07, "loss": 1.4443, "step": 3900 }, { "epoch": 2.86, "learning_rate": 1.1749042320118864e-07, "loss": 1.3378, "step": 3901 }, { "epoch": 2.86, "learning_rate": 1.1628465040295578e-07, "loss": 1.3899, "step": 3902 }, { "epoch": 2.86, "learning_rate": 1.1508506063641178e-07, "loss": 1.415, "step": 3903 }, { "epoch": 2.86, "learning_rate": 1.1389165465199415e-07, "loss": 1.353, "step": 3904 }, { "epoch": 2.86, "learning_rate": 1.1270443319627123e-07, "loss": 1.3749, "step": 3905 }, { "epoch": 2.86, "learning_rate": 1.1152339701194004e-07, "loss": 1.333, "step": 3906 }, { "epoch": 2.86, "learning_rate": 1.1034854683783291e-07, "loss": 1.4696, "step": 3907 }, { "epoch": 2.86, "learning_rate": 1.0917988340890861e-07, "loss": 1.514, "step": 3908 }, { "epoch": 2.86, "learning_rate": 1.0801740745625677e-07, "loss": 1.5374, "step": 3909 }, { "epoch": 2.86, "learning_rate": 1.0686111970709789e-07, "loss": 1.4684, "step": 3910 }, { "epoch": 2.87, "learning_rate": 1.0571102088477891e-07, "loss": 1.4801, "step": 3911 }, { "epoch": 2.87, "learning_rate": 1.0456711170877543e-07, "loss": 1.5062, "step": 3912 }, { "epoch": 2.87, "learning_rate": 1.0342939289469278e-07, "loss": 1.3383, "step": 3913 }, { "epoch": 2.87, "learning_rate": 1.0229786515426055e-07, "loss": 1.3525, "step": 3914 }, { "epoch": 2.87, "learning_rate": 1.0117252919533804e-07, "loss": 1.4496, "step": 3915 }, { "epoch": 2.87, "learning_rate": 1.0005338572191215e-07, "loss": 1.5404, "step": 3916 }, { "epoch": 2.87, "learning_rate": 9.894043543409171e-08, "loss": 1.4229, "step": 3917 }, { "epoch": 2.87, "learning_rate": 9.783367902811424e-08, "loss": 1.4865, "step": 3918 }, { "epoch": 2.87, "learning_rate": 9.673311719634148e-08, "loss": 1.4331, "step": 3919 }, { "epoch": 2.87, "learning_rate": 9.563875062726157e-08, "loss": 1.3305, "step": 3920 }, { "epoch": 2.87, "learning_rate": 9.455058000548467e-08, "loss": 1.417, "step": 3921 }, { "epoch": 2.87, "learning_rate": 9.346860601174623e-08, "loss": 1.4476, "step": 3922 }, { "epoch": 2.87, "learning_rate": 9.239282932290594e-08, "loss": 1.3185, "step": 3923 }, { "epoch": 2.87, "learning_rate": 9.132325061194325e-08, "loss": 1.3319, "step": 3924 }, { "epoch": 2.88, "learning_rate": 9.025987054796625e-08, "loss": 1.413, "step": 3925 }, { "epoch": 2.88, "learning_rate": 8.920268979619839e-08, "loss": 1.3858, "step": 3926 }, { "epoch": 2.88, "learning_rate": 8.815170901799174e-08, "loss": 1.4736, "step": 3927 }, { "epoch": 2.88, "learning_rate": 8.710692887081373e-08, "loss": 1.4988, "step": 3928 }, { "epoch": 2.88, "learning_rate": 8.606835000825598e-08, "loss": 1.498, "step": 3929 }, { "epoch": 2.88, "learning_rate": 8.503597308002986e-08, "loss": 1.2703, "step": 3930 }, { "epoch": 2.88, "learning_rate": 8.400979873196879e-08, "loss": 1.4666, "step": 3931 }, { "epoch": 2.88, "learning_rate": 8.298982760602481e-08, "loss": 1.4285, "step": 3932 }, { "epoch": 2.88, "learning_rate": 8.19760603402675e-08, "loss": 1.4632, "step": 3933 }, { "epoch": 2.88, "learning_rate": 8.096849756888736e-08, "loss": 1.5351, "step": 3934 }, { "epoch": 2.88, "learning_rate": 7.996713992219352e-08, "loss": 1.3717, "step": 3935 }, { "epoch": 2.88, "learning_rate": 7.897198802661266e-08, "loss": 1.4282, "step": 3936 }, { "epoch": 2.88, "learning_rate": 7.798304250469013e-08, "loss": 1.5729, "step": 3937 }, { "epoch": 2.88, "learning_rate": 7.700030397508773e-08, "loss": 1.4068, "step": 3938 }, { "epoch": 2.89, "learning_rate": 7.602377305258479e-08, "loss": 1.4533, "step": 3939 }, { "epoch": 2.89, "learning_rate": 7.50534503480771e-08, "loss": 1.3247, "step": 3940 }, { "epoch": 2.89, "learning_rate": 7.408933646857796e-08, "loss": 1.5716, "step": 3941 }, { "epoch": 2.89, "learning_rate": 7.313143201721384e-08, "loss": 1.2799, "step": 3942 }, { "epoch": 2.89, "learning_rate": 7.217973759322982e-08, "loss": 1.5811, "step": 3943 }, { "epoch": 2.89, "learning_rate": 7.123425379198412e-08, "loss": 1.2152, "step": 3944 }, { "epoch": 2.89, "learning_rate": 7.029498120494916e-08, "loss": 1.2491, "step": 3945 }, { "epoch": 2.89, "learning_rate": 6.936192041971491e-08, "loss": 1.3822, "step": 3946 }, { "epoch": 2.89, "learning_rate": 6.843507201998224e-08, "loss": 1.2595, "step": 3947 }, { "epoch": 2.89, "learning_rate": 6.751443658556733e-08, "loss": 1.4861, "step": 3948 }, { "epoch": 2.89, "learning_rate": 6.660001469239841e-08, "loss": 1.4531, "step": 3949 }, { "epoch": 2.89, "learning_rate": 6.569180691251786e-08, "loss": 1.5141, "step": 3950 }, { "epoch": 2.89, "learning_rate": 6.478981381408012e-08, "loss": 1.5072, "step": 3951 }, { "epoch": 2.9, "learning_rate": 6.389403596135268e-08, "loss": 1.3374, "step": 3952 }, { "epoch": 2.9, "learning_rate": 6.300447391471287e-08, "loss": 1.4895, "step": 3953 }, { "epoch": 2.9, "learning_rate": 6.212112823065109e-08, "loss": 1.33, "step": 3954 }, { "epoch": 2.9, "learning_rate": 6.124399946176974e-08, "loss": 1.4758, "step": 3955 }, { "epoch": 2.9, "learning_rate": 6.037308815677989e-08, "loss": 1.3572, "step": 3956 }, { "epoch": 2.9, "learning_rate": 5.9508394860504635e-08, "loss": 1.3684, "step": 3957 }, { "epoch": 2.9, "learning_rate": 5.8649920113876823e-08, "loss": 1.2664, "step": 3958 }, { "epoch": 2.9, "learning_rate": 5.779766445393908e-08, "loss": 1.4714, "step": 3959 }, { "epoch": 2.9, "learning_rate": 5.695162841384383e-08, "loss": 1.3595, "step": 3960 }, { "epoch": 2.9, "learning_rate": 5.6111812522852136e-08, "loss": 1.4, "step": 3961 }, { "epoch": 2.9, "learning_rate": 5.527821730633376e-08, "loss": 1.5646, "step": 3962 }, { "epoch": 2.9, "learning_rate": 5.445084328576711e-08, "loss": 1.465, "step": 3963 }, { "epoch": 2.9, "learning_rate": 5.362969097873927e-08, "loss": 1.4426, "step": 3964 }, { "epoch": 2.9, "learning_rate": 5.2814760898944886e-08, "loss": 1.4371, "step": 3965 }, { "epoch": 2.91, "learning_rate": 5.2006053556186155e-08, "loss": 1.4727, "step": 3966 }, { "epoch": 2.91, "learning_rate": 5.120356945637173e-08, "loss": 1.608, "step": 3967 }, { "epoch": 2.91, "learning_rate": 5.040730910151892e-08, "loss": 1.3399, "step": 3968 }, { "epoch": 2.91, "learning_rate": 4.9617272989748166e-08, "loss": 1.2039, "step": 3969 }, { "epoch": 2.91, "learning_rate": 4.883346161529079e-08, "loss": 1.2983, "step": 3970 }, { "epoch": 2.91, "learning_rate": 4.805587546848123e-08, "loss": 1.4641, "step": 3971 }, { "epoch": 2.91, "learning_rate": 4.728451503575815e-08, "loss": 1.4798, "step": 3972 }, { "epoch": 2.91, "learning_rate": 4.651938079967e-08, "loss": 1.3979, "step": 3973 }, { "epoch": 2.91, "learning_rate": 4.576047323886612e-08, "loss": 1.4008, "step": 3974 }, { "epoch": 2.91, "learning_rate": 4.5007792828102305e-08, "loss": 1.2601, "step": 3975 }, { "epoch": 2.91, "learning_rate": 4.4261340038238565e-08, "loss": 1.3422, "step": 3976 }, { "epoch": 2.91, "learning_rate": 4.352111533624026e-08, "loss": 1.2483, "step": 3977 }, { "epoch": 2.91, "learning_rate": 4.2787119185174756e-08, "loss": 1.4028, "step": 3978 }, { "epoch": 2.92, "learning_rate": 4.205935204421252e-08, "loss": 1.4076, "step": 3979 }, { "epoch": 2.92, "learning_rate": 4.133781436863049e-08, "loss": 1.4617, "step": 3980 }, { "epoch": 2.92, "learning_rate": 4.062250660980427e-08, "loss": 1.3614, "step": 3981 }, { "epoch": 2.92, "learning_rate": 3.991342921521591e-08, "loss": 1.3935, "step": 3982 }, { "epoch": 2.92, "learning_rate": 3.9210582628448345e-08, "loss": 1.4592, "step": 3983 }, { "epoch": 2.92, "learning_rate": 3.851396728918544e-08, "loss": 1.4163, "step": 3984 }, { "epoch": 2.92, "learning_rate": 3.7823583633216365e-08, "loss": 1.5975, "step": 3985 }, { "epoch": 2.92, "learning_rate": 3.7139432092427876e-08, "loss": 1.3826, "step": 3986 }, { "epoch": 2.92, "learning_rate": 3.646151309481094e-08, "loss": 1.3259, "step": 3987 }, { "epoch": 2.92, "learning_rate": 3.578982706445633e-08, "loss": 1.479, "step": 3988 }, { "epoch": 2.92, "learning_rate": 3.5124374421555696e-08, "loss": 1.3936, "step": 3989 }, { "epoch": 2.92, "learning_rate": 3.446515558240049e-08, "loss": 1.2443, "step": 3990 }, { "epoch": 2.92, "learning_rate": 3.381217095938416e-08, "loss": 1.2566, "step": 3991 }, { "epoch": 2.92, "learning_rate": 3.316542096099995e-08, "loss": 1.283, "step": 3992 }, { "epoch": 2.93, "learning_rate": 3.2524905991839775e-08, "loss": 1.1862, "step": 3993 }, { "epoch": 2.93, "learning_rate": 3.189062645259533e-08, "loss": 1.5381, "step": 3994 }, { "epoch": 2.93, "learning_rate": 3.1262582740057e-08, "loss": 1.4653, "step": 3995 }, { "epoch": 2.93, "learning_rate": 3.064077524711606e-08, "loss": 1.3525, "step": 3996 }, { "epoch": 2.93, "learning_rate": 3.002520436276135e-08, "loss": 1.3696, "step": 3997 }, { "epoch": 2.93, "learning_rate": 2.9415870472079276e-08, "loss": 1.3363, "step": 3998 }, { "epoch": 2.93, "learning_rate": 2.8812773956256034e-08, "loss": 1.3242, "step": 3999 }, { "epoch": 2.93, "learning_rate": 2.821591519257538e-08, "loss": 1.4326, "step": 4000 }, { "epoch": 2.93, "learning_rate": 2.7625294554418648e-08, "loss": 1.5076, "step": 4001 }, { "epoch": 2.93, "learning_rate": 2.7040912411265828e-08, "loss": 1.4436, "step": 4002 }, { "epoch": 2.93, "learning_rate": 2.646276912869228e-08, "loss": 1.2817, "step": 4003 }, { "epoch": 2.93, "learning_rate": 2.5890865068373128e-08, "loss": 1.4766, "step": 4004 }, { "epoch": 2.93, "learning_rate": 2.5325200588076636e-08, "loss": 1.5013, "step": 4005 }, { "epoch": 2.93, "learning_rate": 2.4765776041669742e-08, "loss": 1.4736, "step": 4006 }, { "epoch": 2.94, "learning_rate": 2.4212591779118056e-08, "loss": 1.4522, "step": 4007 }, { "epoch": 2.94, "learning_rate": 2.3665648146480315e-08, "loss": 1.4761, "step": 4008 }, { "epoch": 2.94, "learning_rate": 2.312494548591282e-08, "loss": 1.4396, "step": 4009 }, { "epoch": 2.94, "learning_rate": 2.2590484135666112e-08, "loss": 1.5583, "step": 4010 }, { "epoch": 2.94, "learning_rate": 2.2062264430087187e-08, "loss": 1.4818, "step": 4011 }, { "epoch": 2.94, "learning_rate": 2.1540286699619494e-08, "loss": 1.3193, "step": 4012 }, { "epoch": 2.94, "learning_rate": 2.102455127079961e-08, "loss": 1.3551, "step": 4013 }, { "epoch": 2.94, "learning_rate": 2.0515058466260563e-08, "loss": 1.552, "step": 4014 }, { "epoch": 2.94, "learning_rate": 2.001180860472962e-08, "loss": 1.4042, "step": 4015 }, { "epoch": 2.94, "learning_rate": 1.95148020010294e-08, "loss": 1.5752, "step": 4016 }, { "epoch": 2.94, "learning_rate": 1.902403896607563e-08, "loss": 1.3633, "step": 4017 }, { "epoch": 2.94, "learning_rate": 1.8539519806878293e-08, "loss": 1.4579, "step": 4018 }, { "epoch": 2.94, "learning_rate": 1.806124482654159e-08, "loss": 1.3711, "step": 4019 }, { "epoch": 2.95, "learning_rate": 1.758921432426397e-08, "loss": 1.4178, "step": 4020 }, { "epoch": 2.95, "learning_rate": 1.712342859533811e-08, "loss": 1.4181, "step": 4021 }, { "epoch": 2.95, "learning_rate": 1.6663887931148702e-08, "loss": 1.3, "step": 4022 }, { "epoch": 2.95, "learning_rate": 1.621059261917357e-08, "loss": 1.4116, "step": 4023 }, { "epoch": 2.95, "learning_rate": 1.5763542942983656e-08, "loss": 1.3938, "step": 4024 }, { "epoch": 2.95, "learning_rate": 1.532273918224414e-08, "loss": 1.4528, "step": 4025 }, { "epoch": 2.95, "learning_rate": 1.4888181612713327e-08, "loss": 1.3842, "step": 4026 }, { "epoch": 2.95, "learning_rate": 1.4459870506238204e-08, "loss": 1.3476, "step": 4027 }, { "epoch": 2.95, "learning_rate": 1.403780613076111e-08, "loss": 1.5026, "step": 4028 }, { "epoch": 2.95, "learning_rate": 1.3621988750317505e-08, "loss": 1.3819, "step": 4029 }, { "epoch": 2.95, "learning_rate": 1.3212418625032642e-08, "loss": 1.5541, "step": 4030 }, { "epoch": 2.95, "learning_rate": 1.2809096011123789e-08, "loss": 1.5523, "step": 4031 }, { "epoch": 2.95, "learning_rate": 1.2412021160902454e-08, "loss": 1.4509, "step": 4032 }, { "epoch": 2.95, "learning_rate": 1.2021194322767716e-08, "loss": 1.4887, "step": 4033 }, { "epoch": 2.96, "learning_rate": 1.1636615741212886e-08, "loss": 1.3769, "step": 4034 }, { "epoch": 2.96, "learning_rate": 1.1258285656822187e-08, "loss": 1.4085, "step": 4035 }, { "epoch": 2.96, "learning_rate": 1.0886204306269632e-08, "loss": 1.3491, "step": 4036 }, { "epoch": 2.96, "learning_rate": 1.052037192232236e-08, "loss": 1.582, "step": 4037 }, { "epoch": 2.96, "learning_rate": 1.0160788733836191e-08, "loss": 1.3678, "step": 4038 }, { "epoch": 2.96, "learning_rate": 9.807454965756747e-09, "loss": 1.4637, "step": 4039 }, { "epoch": 2.96, "learning_rate": 9.460370839124989e-09, "loss": 1.3902, "step": 4040 }, { "epoch": 2.96, "learning_rate": 9.119536571066123e-09, "loss": 1.4732, "step": 4041 }, { "epoch": 2.96, "learning_rate": 8.784952374800704e-09, "loss": 1.454, "step": 4042 }, { "epoch": 2.96, "learning_rate": 8.456618459634635e-09, "loss": 1.4124, "step": 4043 }, { "epoch": 2.96, "learning_rate": 8.134535030968061e-09, "loss": 1.336, "step": 4044 }, { "epoch": 2.96, "learning_rate": 7.818702290289804e-09, "loss": 1.426, "step": 4045 }, { "epoch": 2.96, "learning_rate": 7.509120435176264e-09, "loss": 1.5773, "step": 4046 }, { "epoch": 2.96, "learning_rate": 7.205789659294748e-09, "loss": 1.4435, "step": 4047 }, { "epoch": 2.97, "learning_rate": 6.908710152403464e-09, "loss": 1.4619, "step": 4048 }, { "epoch": 2.97, "learning_rate": 6.617882100348194e-09, "loss": 1.4422, "step": 4049 }, { "epoch": 2.97, "learning_rate": 6.333305685064517e-09, "loss": 1.4172, "step": 4050 }, { "epoch": 2.97, "learning_rate": 6.054981084576694e-09, "loss": 1.4461, "step": 4051 }, { "epoch": 2.97, "learning_rate": 5.782908472998783e-09, "loss": 1.2325, "step": 4052 }, { "epoch": 2.97, "learning_rate": 5.517088020533523e-09, "loss": 1.2786, "step": 4053 }, { "epoch": 2.97, "learning_rate": 5.25751989347123e-09, "loss": 1.4046, "step": 4054 }, { "epoch": 2.97, "learning_rate": 5.0042042541931235e-09, "loss": 1.3168, "step": 4055 }, { "epoch": 2.97, "learning_rate": 4.757141261167997e-09, "loss": 1.2061, "step": 4056 }, { "epoch": 2.97, "learning_rate": 4.516331068952218e-09, "loss": 1.3578, "step": 4057 }, { "epoch": 2.97, "learning_rate": 4.281773828190839e-09, "loss": 1.4586, "step": 4058 }, { "epoch": 2.97, "learning_rate": 4.053469685617595e-09, "loss": 1.4551, "step": 4059 }, { "epoch": 2.97, "learning_rate": 3.831418784056018e-09, "loss": 1.4248, "step": 4060 }, { "epoch": 2.98, "learning_rate": 3.615621262414992e-09, "loss": 1.5355, "step": 4061 }, { "epoch": 2.98, "learning_rate": 3.4060772556931965e-09, "loss": 1.2477, "step": 4062 }, { "epoch": 2.98, "learning_rate": 3.2027868949757733e-09, "loss": 1.2223, "step": 4063 }, { "epoch": 2.98, "learning_rate": 3.0057503074376602e-09, "loss": 1.5053, "step": 4064 }, { "epoch": 2.98, "learning_rate": 2.814967616339148e-09, "loss": 1.3497, "step": 4065 }, { "epoch": 2.98, "learning_rate": 2.6304389410314323e-09, "loss": 1.3534, "step": 4066 }, { "epoch": 2.98, "learning_rate": 2.4521643969510624e-09, "loss": 1.4346, "step": 4067 }, { "epoch": 2.98, "learning_rate": 2.2801440956210506e-09, "loss": 1.4534, "step": 4068 }, { "epoch": 2.98, "learning_rate": 2.1143781446553157e-09, "loss": 1.5184, "step": 4069 }, { "epoch": 2.98, "learning_rate": 1.9548666477531285e-09, "loss": 1.4234, "step": 4070 }, { "epoch": 2.98, "learning_rate": 1.8016097047002246e-09, "loss": 1.4286, "step": 4071 }, { "epoch": 2.98, "learning_rate": 1.654607411371023e-09, "loss": 1.4198, "step": 4072 }, { "epoch": 2.98, "learning_rate": 1.5138598597275178e-09, "loss": 1.4166, "step": 4073 }, { "epoch": 2.98, "learning_rate": 1.3793671378181662e-09, "loss": 1.401, "step": 4074 }, { "epoch": 2.99, "learning_rate": 1.2511293297778893e-09, "loss": 1.5138, "step": 4075 }, { "epoch": 2.99, "learning_rate": 1.1291465158302927e-09, "loss": 1.4171, "step": 4076 }, { "epoch": 2.99, "learning_rate": 1.013418772285446e-09, "loss": 1.4428, "step": 4077 }, { "epoch": 2.99, "learning_rate": 9.039461715376618e-10, "loss": 1.5129, "step": 4078 }, { "epoch": 2.99, "learning_rate": 8.007287820732679e-10, "loss": 1.305, "step": 4079 }, { "epoch": 2.99, "learning_rate": 7.037666684606148e-10, "loss": 1.4243, "step": 4080 }, { "epoch": 2.99, "learning_rate": 6.13059891358958e-10, "loss": 1.3877, "step": 4081 }, { "epoch": 2.99, "learning_rate": 5.286085075117963e-10, "loss": 1.387, "step": 4082 }, { "epoch": 2.99, "learning_rate": 4.504125697490924e-10, "loss": 1.5607, "step": 4083 }, { "epoch": 2.99, "learning_rate": 3.784721269894931e-10, "loss": 1.4367, "step": 4084 }, { "epoch": 2.99, "learning_rate": 3.1278722423699894e-10, "loss": 1.3784, "step": 4085 }, { "epoch": 2.99, "learning_rate": 2.5335790258207426e-10, "loss": 1.37, "step": 4086 }, { "epoch": 2.99, "learning_rate": 2.001841992038678e-10, "loss": 1.5109, "step": 4087 }, { "epoch": 2.99, "learning_rate": 1.532661473657715e-10, "loss": 1.3921, "step": 4088 }, { "epoch": 3.0, "learning_rate": 1.1260377641764131e-10, "loss": 1.4207, "step": 4089 }, { "epoch": 3.0, "learning_rate": 7.819711179912759e-11, "loss": 1.3476, "step": 4090 }, { "epoch": 3.0, "learning_rate": 5.004617503190368e-11, "loss": 1.2871, "step": 4091 }, { "epoch": 3.0, "learning_rate": 2.8150983728547632e-11, "loss": 1.467, "step": 4092 }, { "epoch": 3.0, "learning_rate": 1.2511551584770687e-11, "loss": 1.5542, "step": 4093 }, { "epoch": 3.0, "learning_rate": 3.127888384968358e-12, "loss": 1.3247, "step": 4094 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 1.3435, "step": 4095 }, { "epoch": 3.0, "step": 4095, "total_flos": 3.3907276121112576e+17, "train_loss": 1.9838683304478106, "train_runtime": 15624.1612, "train_samples_per_second": 1.048, "train_steps_per_second": 0.262 } ], "max_steps": 4095, "num_train_epochs": 3, "total_flos": 3.3907276121112576e+17, "trial_name": null, "trial_params": null }