|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.128205128205128e-07, |
|
"loss": 1.2474, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0256410256410257e-06, |
|
"loss": 1.1588, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 1.1009, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0512820512820513e-06, |
|
"loss": 1.1041, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.564102564102564e-06, |
|
"loss": 1.0571, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.0064, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.58974358974359e-06, |
|
"loss": 1.0577, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.102564102564103e-06, |
|
"loss": 1.0429, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 1.0566, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 1.025, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.641025641025641e-06, |
|
"loss": 1.0348, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.0395, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.0705, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.0066, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.0498, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.205128205128205e-06, |
|
"loss": 1.0381, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.717948717948719e-06, |
|
"loss": 1.0386, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.0641, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.743589743589744e-06, |
|
"loss": 1.0387, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.0256410256410256e-05, |
|
"loss": 1.0826, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.076923076923077e-05, |
|
"loss": 1.0797, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1282051282051283e-05, |
|
"loss": 1.1147, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1794871794871796e-05, |
|
"loss": 1.0696, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 1.0447, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2820512820512823e-05, |
|
"loss": 1.0405, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.1069, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 1.0522, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.435897435897436e-05, |
|
"loss": 1.1448, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.4871794871794874e-05, |
|
"loss": 1.0508, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 1.1121, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.5897435897435897e-05, |
|
"loss": 1.1292, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.641025641025641e-05, |
|
"loss": 1.1066, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 1.1084, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.7435897435897438e-05, |
|
"loss": 1.1632, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.794871794871795e-05, |
|
"loss": 1.0844, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 1.0884, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8974358974358975e-05, |
|
"loss": 1.1601, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9487179487179488e-05, |
|
"loss": 1.1705, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1824, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.999991012628722e-05, |
|
"loss": 1.153, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.999964050676434e-05, |
|
"loss": 1.1438, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.999919114627769e-05, |
|
"loss": 1.1805, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.999856205290442e-05, |
|
"loss": 1.137, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.999775323795232e-05, |
|
"loss": 1.1703, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.999676471595962e-05, |
|
"loss": 1.142, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9995596504694764e-05, |
|
"loss": 1.1743, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.999424862515604e-05, |
|
"loss": 1.1679, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9992721101571238e-05, |
|
"loss": 1.1924, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.99910139613972e-05, |
|
"loss": 1.1704, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.998912723531933e-05, |
|
"loss": 1.1856, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9987060957251047e-05, |
|
"loss": 1.1808, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9984815164333163e-05, |
|
"loss": 1.2127, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.998238989693323e-05, |
|
"loss": 1.2006, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.997978519864481e-05, |
|
"loss": 1.1249, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9977001116286675e-05, |
|
"loss": 1.2025, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9974037699901993e-05, |
|
"loss": 1.1594, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9970895002757413e-05, |
|
"loss": 1.1506, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9967573081342103e-05, |
|
"loss": 1.2164, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9964071995366744e-05, |
|
"loss": 1.1398, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9960391807762462e-05, |
|
"loss": 1.2018, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9956532584679676e-05, |
|
"loss": 1.18, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.995249439548693e-05, |
|
"loss": 1.1726, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.994827731276963e-05, |
|
"loss": 1.1302, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.994388141232876e-05, |
|
"loss": 1.1494, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9939306773179498e-05, |
|
"loss": 1.2118, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9934553477549795e-05, |
|
"loss": 1.1385, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.992962161087893e-05, |
|
"loss": 1.1837, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9924511261815928e-05, |
|
"loss": 1.1921, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9919222522217998e-05, |
|
"loss": 1.1621, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9913755487148874e-05, |
|
"loss": 1.2225, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9908110254877107e-05, |
|
"loss": 1.139, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.990228692687429e-05, |
|
"loss": 1.1651, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9896285607813245e-05, |
|
"loss": 1.1357, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.989010640556614e-05, |
|
"loss": 1.16, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.988374943120254e-05, |
|
"loss": 1.1853, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9877214798987428e-05, |
|
"loss": 1.1863, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9870502626379127e-05, |
|
"loss": 1.2025, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9863613034027224e-05, |
|
"loss": 1.1802, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.985654614577036e-05, |
|
"loss": 1.2212, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9849302088634034e-05, |
|
"loss": 1.191, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9841880992828306e-05, |
|
"loss": 1.1702, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9834282991745465e-05, |
|
"loss": 1.1982, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9826508221957624e-05, |
|
"loss": 1.1503, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.981855682321427e-05, |
|
"loss": 1.1997, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.981042893843974e-05, |
|
"loss": 1.1724, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.980212471373068e-05, |
|
"loss": 1.1611, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.979364429835339e-05, |
|
"loss": 1.1512, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.978498784474115e-05, |
|
"loss": 1.2071, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9776155508491482e-05, |
|
"loss": 1.1767, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9767147448363366e-05, |
|
"loss": 1.162, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9757963826274357e-05, |
|
"loss": 1.1688, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.97486048072977e-05, |
|
"loss": 1.1622, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9739070559659347e-05, |
|
"loss": 1.1653, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.972936125473495e-05, |
|
"loss": 1.1141, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9719477067046768e-05, |
|
"loss": 1.1571, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9709418174260523e-05, |
|
"loss": 1.2207, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9699184757182225e-05, |
|
"loss": 1.1794, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9688776999754913e-05, |
|
"loss": 1.106, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9678195089055347e-05, |
|
"loss": 1.1521, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.966743921529065e-05, |
|
"loss": 1.1671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.965650957179488e-05, |
|
"loss": 1.1579, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9645406355025565e-05, |
|
"loss": 1.2015, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.963412976456017e-05, |
|
"loss": 1.1538, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9622680003092503e-05, |
|
"loss": 1.1832, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9611057276429085e-05, |
|
"loss": 1.2082, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9599261793485432e-05, |
|
"loss": 1.1638, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.958729376628231e-05, |
|
"loss": 1.201, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.957515340994193e-05, |
|
"loss": 1.1826, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.956284094268407e-05, |
|
"loss": 1.1293, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.955035658582216e-05, |
|
"loss": 1.1935, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9537700563759303e-05, |
|
"loss": 1.1658, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9524873103984234e-05, |
|
"loss": 1.1698, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9511874437067243e-05, |
|
"loss": 1.1962, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.949870479665602e-05, |
|
"loss": 1.1148, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9485364419471454e-05, |
|
"loss": 1.1701, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9471853545303407e-05, |
|
"loss": 1.202, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9458172417006347e-05, |
|
"loss": 1.1923, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9444321280495045e-05, |
|
"loss": 1.1486, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9430300384740108e-05, |
|
"loss": 1.163, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9416109981763526e-05, |
|
"loss": 1.166, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9401750326634144e-05, |
|
"loss": 1.1528, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9387221677463064e-05, |
|
"loss": 1.1547, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9372524295399014e-05, |
|
"loss": 1.2095, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9357658444623655e-05, |
|
"loss": 1.0905, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9342624392346826e-05, |
|
"loss": 1.169, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9327422408801744e-05, |
|
"loss": 1.1979, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9312052767240153e-05, |
|
"loss": 1.1515, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.92965157439274e-05, |
|
"loss": 1.1893, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9280811618137486e-05, |
|
"loss": 1.174, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9264940672148018e-05, |
|
"loss": 1.2114, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9248903191235177e-05, |
|
"loss": 1.1677, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9232699463668543e-05, |
|
"loss": 1.1598, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9216329780705955e-05, |
|
"loss": 1.1887, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9199794436588244e-05, |
|
"loss": 1.15, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9183093728533966e-05, |
|
"loss": 1.1685, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.916622795673405e-05, |
|
"loss": 1.1508, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9149197424346405e-05, |
|
"loss": 1.1324, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.913200243749046e-05, |
|
"loss": 1.1601, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9114643305241678e-05, |
|
"loss": 1.1349, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9097120339625994e-05, |
|
"loss": 1.1617, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9079433855614203e-05, |
|
"loss": 1.1513, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9061584171116302e-05, |
|
"loss": 1.17, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9043571606975776e-05, |
|
"loss": 1.1732, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9025396486963827e-05, |
|
"loss": 1.1233, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.900705913777356e-05, |
|
"loss": 1.1701, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.89885598890141e-05, |
|
"loss": 1.161, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8969899073204687e-05, |
|
"loss": 1.1404, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.895107702576868e-05, |
|
"loss": 1.2028, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8932094085027534e-05, |
|
"loss": 1.1943, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.891295059219472e-05, |
|
"loss": 1.1376, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.88936468913696e-05, |
|
"loss": 1.1743, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8874183329531222e-05, |
|
"loss": 1.1552, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8854560256532098e-05, |
|
"loss": 1.1528, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.883477802509192e-05, |
|
"loss": 1.1543, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.88148369907912e-05, |
|
"loss": 1.1008, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.879473751206489e-05, |
|
"loss": 1.1573, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.877447995019596e-05, |
|
"loss": 1.1236, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.875406466930886e-05, |
|
"loss": 1.1546, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8733492036363007e-05, |
|
"loss": 1.1547, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8712762421146185e-05, |
|
"loss": 1.1608, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8691876196267892e-05, |
|
"loss": 1.1708, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.867083373715264e-05, |
|
"loss": 1.1485, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8649635422033218e-05, |
|
"loss": 1.213, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.862828163194388e-05, |
|
"loss": 1.137, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8606772750713503e-05, |
|
"loss": 1.1987, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8585109164958698e-05, |
|
"loss": 1.1287, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8563291264076834e-05, |
|
"loss": 1.1653, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8541319440239066e-05, |
|
"loss": 1.1305, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.851919408838327e-05, |
|
"loss": 1.186, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8496915606206952e-05, |
|
"loss": 1.1371, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.847448439416009e-05, |
|
"loss": 1.1589, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.845190085543795e-05, |
|
"loss": 1.0932, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.842916539597382e-05, |
|
"loss": 1.1354, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8406278424431737e-05, |
|
"loss": 1.1056, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8383240352199118e-05, |
|
"loss": 1.1402, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8360051593379383e-05, |
|
"loss": 1.1689, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8336712564784506e-05, |
|
"loss": 1.1761, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8313223685927507e-05, |
|
"loss": 1.101, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8289585379014942e-05, |
|
"loss": 1.1944, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8265798068939295e-05, |
|
"loss": 1.2067, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8241862183271338e-05, |
|
"loss": 1.1291, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.821777815225245e-05, |
|
"loss": 1.1814, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.81935464087869e-05, |
|
"loss": 1.1324, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8169167388434024e-05, |
|
"loss": 1.1929, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8144641529400445e-05, |
|
"loss": 1.1131, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.8119969272532164e-05, |
|
"loss": 1.1646, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.8095151061306647e-05, |
|
"loss": 1.1554, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.8070187341824848e-05, |
|
"loss": 1.1406, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.8045078562803203e-05, |
|
"loss": 1.1385, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8019825175565544e-05, |
|
"loss": 1.1865, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7994427634035016e-05, |
|
"loss": 1.1631, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7968886394725876e-05, |
|
"loss": 1.1112, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7943201916735337e-05, |
|
"loss": 1.1238, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.791737466173527e-05, |
|
"loss": 1.1653, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.789140509396394e-05, |
|
"loss": 1.1407, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7865293680217636e-05, |
|
"loss": 1.1344, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7839040889842307e-05, |
|
"loss": 1.1513, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7812647194725093e-05, |
|
"loss": 1.1392, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7786113069285877e-05, |
|
"loss": 1.0903, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7759438990468726e-05, |
|
"loss": 1.1068, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.7732625437733338e-05, |
|
"loss": 1.1482, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.7705672893046425e-05, |
|
"loss": 1.1475, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.767858184087304e-05, |
|
"loss": 1.1514, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.765135276816787e-05, |
|
"loss": 1.1395, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7623986164366487e-05, |
|
"loss": 1.1084, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7596482521376546e-05, |
|
"loss": 1.1748, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7568842333568952e-05, |
|
"loss": 1.1709, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7541066097768965e-05, |
|
"loss": 1.1961, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7513154313247273e-05, |
|
"loss": 1.1287, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"loss": 1.1484, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7456926107294765e-05, |
|
"loss": 1.1195, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.742861069655148e-05, |
|
"loss": 1.1157, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7400161758443377e-05, |
|
"loss": 1.1829, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.737157980433279e-05, |
|
"loss": 1.1937, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7342865347972987e-05, |
|
"loss": 1.0915, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7314018905498932e-05, |
|
"loss": 1.1741, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7285040995418003e-05, |
|
"loss": 1.1407, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7255932138600665e-05, |
|
"loss": 1.1625, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7226692858271133e-05, |
|
"loss": 1.17, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7197323679997943e-05, |
|
"loss": 1.1736, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7167825131684516e-05, |
|
"loss": 1.1646, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7138197743559656e-05, |
|
"loss": 1.1301, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7108442048168038e-05, |
|
"loss": 1.1564, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.707855858036063e-05, |
|
"loss": 1.1653, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.7048547877285078e-05, |
|
"loss": 1.1335, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.7018410478376033e-05, |
|
"loss": 1.1478, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6988146925345487e-05, |
|
"loss": 1.1436, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.695775776217301e-05, |
|
"loss": 1.1958, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6927243535095995e-05, |
|
"loss": 1.1215, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6896604792599813e-05, |
|
"loss": 1.1639, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.686584208540797e-05, |
|
"loss": 1.1605, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6834955966472214e-05, |
|
"loss": 1.0812, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6803946990962577e-05, |
|
"loss": 1.1427, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6772815716257414e-05, |
|
"loss": 1.1497, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6741562701933366e-05, |
|
"loss": 1.1058, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.671018850975533e-05, |
|
"loss": 1.095, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.6678693703666327e-05, |
|
"loss": 1.1272, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.664707884977739e-05, |
|
"loss": 1.1257, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.661534451635738e-05, |
|
"loss": 1.1318, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6583491273822763e-05, |
|
"loss": 1.1335, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.655151969472738e-05, |
|
"loss": 1.1529, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6519430353752138e-05, |
|
"loss": 1.1674, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6487223827694673e-05, |
|
"loss": 1.1743, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6454900695459e-05, |
|
"loss": 1.186, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6422461538045104e-05, |
|
"loss": 1.1251, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.638990693853848e-05, |
|
"loss": 1.0951, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6357237482099682e-05, |
|
"loss": 1.1618, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6324453755953772e-05, |
|
"loss": 1.1283, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6291556349379794e-05, |
|
"loss": 1.1174, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6258545853700157e-05, |
|
"loss": 1.17, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.622542286227003e-05, |
|
"loss": 1.1735, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6192187970466646e-05, |
|
"loss": 1.0973, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.615884177567863e-05, |
|
"loss": 1.1478, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6125384877295255e-05, |
|
"loss": 1.1254, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6091817876695655e-05, |
|
"loss": 1.1599, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6058141377238026e-05, |
|
"loss": 1.1401, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.602435598424877e-05, |
|
"loss": 1.1241, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.599046230501163e-05, |
|
"loss": 1.1915, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5956460948756765e-05, |
|
"loss": 1.1119, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5922352526649803e-05, |
|
"loss": 1.1226, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5888137651780847e-05, |
|
"loss": 1.1635, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.585381693915346e-05, |
|
"loss": 1.123, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.581939100567363e-05, |
|
"loss": 1.178, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5784860470138633e-05, |
|
"loss": 1.152, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5750225953225968e-05, |
|
"loss": 1.1314, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5715488077482152e-05, |
|
"loss": 1.1556, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.568064746731156e-05, |
|
"loss": 1.1427, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5645704748965193e-05, |
|
"loss": 1.1633, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5610660550529413e-05, |
|
"loss": 1.1237, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.557551550191467e-05, |
|
"loss": 1.1241, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.554027023484416e-05, |
|
"loss": 1.1212, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.550492538284249e-05, |
|
"loss": 1.1125, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5469481581224274e-05, |
|
"loss": 1.1345, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5433939467082713e-05, |
|
"loss": 1.0837, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5398299679278172e-05, |
|
"loss": 1.1531, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5362562858426655e-05, |
|
"loss": 1.179, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5326729646888314e-05, |
|
"loss": 1.1123, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5290800688755906e-05, |
|
"loss": 1.1616, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5254776629843204e-05, |
|
"loss": 1.1236, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5218658117673389e-05, |
|
"loss": 1.1254, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.518244580146742e-05, |
|
"loss": 1.0994, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5146140332132359e-05, |
|
"loss": 1.0841, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5109742362249673e-05, |
|
"loss": 1.1284, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5073252546063493e-05, |
|
"loss": 1.108, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5036671539468879e-05, |
|
"loss": 1.1494, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.1226, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4963238586818346e-05, |
|
"loss": 1.0835, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4926387960700843e-05, |
|
"loss": 1.1047, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.488944878402802e-05, |
|
"loss": 1.0861, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4852421720772064e-05, |
|
"loss": 1.1066, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4815307436484898e-05, |
|
"loss": 1.1382, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4778106598286235e-05, |
|
"loss": 1.0789, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4740819874851562e-05, |
|
"loss": 1.1367, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4703447936400135e-05, |
|
"loss": 1.1017, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4665991454682924e-05, |
|
"loss": 1.1009, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4628451102970546e-05, |
|
"loss": 1.08, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4590827556041158e-05, |
|
"loss": 1.1003, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4553121490168335e-05, |
|
"loss": 1.1396, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4515333583108896e-05, |
|
"loss": 1.1184, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4477464514090745e-05, |
|
"loss": 1.1374, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.443951496380065e-05, |
|
"loss": 1.1114, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4401485614372009e-05, |
|
"loss": 1.0906, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4363377149372584e-05, |
|
"loss": 1.126, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4325190253792222e-05, |
|
"loss": 1.0907, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4286925614030542e-05, |
|
"loss": 1.126, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4248583917884595e-05, |
|
"loss": 1.1183, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4210165854536495e-05, |
|
"loss": 1.1165, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4171672114541042e-05, |
|
"loss": 1.1149, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4133103389813302e-05, |
|
"loss": 1.1124, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.409446037361617e-05, |
|
"loss": 1.0864, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4055743760547918e-05, |
|
"loss": 1.105, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4016954246529697e-05, |
|
"loss": 1.1575, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.3978092528793032e-05, |
|
"loss": 1.1155, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.39391593058673e-05, |
|
"loss": 1.12, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3900155277567157e-05, |
|
"loss": 1.1048, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3861081144979975e-05, |
|
"loss": 1.1003, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.382193761045322e-05, |
|
"loss": 1.0671, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.378272537758185e-05, |
|
"loss": 1.0981, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3743445151195658e-05, |
|
"loss": 1.132, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.37040976373466e-05, |
|
"loss": 1.1709, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3664683543296114e-05, |
|
"loss": 1.1083, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.3625203577502384e-05, |
|
"loss": 1.1272, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.3585658449607632e-05, |
|
"loss": 1.0967, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.3546048870425356e-05, |
|
"loss": 1.0316, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.3506375551927546e-05, |
|
"loss": 1.0944, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3466639207231882e-05, |
|
"loss": 1.1149, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3426840550588933e-05, |
|
"loss": 1.0834, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3386980297369308e-05, |
|
"loss": 1.0853, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3347059164050796e-05, |
|
"loss": 1.0994, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3307077868205487e-05, |
|
"loss": 1.127, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3267037128486883e-05, |
|
"loss": 1.1307, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3226937664616977e-05, |
|
"loss": 1.1151, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3186780197373306e-05, |
|
"loss": 1.1228, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.3146565448576002e-05, |
|
"loss": 1.0869, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.3106294141074825e-05, |
|
"loss": 1.0714, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.3065966998736155e-05, |
|
"loss": 1.1025, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.302558474643e-05, |
|
"loss": 1.0993, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2985148110016947e-05, |
|
"loss": 1.0594, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2944657816335124e-05, |
|
"loss": 1.0898, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2904114593187136e-05, |
|
"loss": 1.1221, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2863519169326984e-05, |
|
"loss": 1.1194, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2822872274446958e-05, |
|
"loss": 1.1323, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2782174639164528e-05, |
|
"loss": 1.1366, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2741426995009214e-05, |
|
"loss": 1.096, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2700630074409427e-05, |
|
"loss": 1.0622, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2659784610679318e-05, |
|
"loss": 1.1226, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2618891338005574e-05, |
|
"loss": 1.0771, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2577950991434249e-05, |
|
"loss": 1.1363, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2536964306857526e-05, |
|
"loss": 1.0735, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2495932021000516e-05, |
|
"loss": 1.0877, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2454854871407993e-05, |
|
"loss": 1.0683, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2413733596431141e-05, |
|
"loss": 1.1172, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2372568935214298e-05, |
|
"loss": 1.068, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2331361627681645e-05, |
|
"loss": 1.0995, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2290112414523927e-05, |
|
"loss": 1.0648, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2248822037185137e-05, |
|
"loss": 1.0978, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.2207491237849174e-05, |
|
"loss": 1.0318, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.2166120759426515e-05, |
|
"loss": 1.1007, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.2124711345540861e-05, |
|
"loss": 1.0964, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.2083263740515764e-05, |
|
"loss": 1.1559, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2041778689361254e-05, |
|
"loss": 1.1272, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2000256937760446e-05, |
|
"loss": 1.066, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1958699232056135e-05, |
|
"loss": 1.0868, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1917106319237386e-05, |
|
"loss": 1.0778, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1875478946926094e-05, |
|
"loss": 1.0461, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1833817863363563e-05, |
|
"loss": 1.0645, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1792123817397041e-05, |
|
"loss": 1.1091, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1750397558466273e-05, |
|
"loss": 1.1129, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1708639836590024e-05, |
|
"loss": 1.0704, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1666851402352587e-05, |
|
"loss": 1.0903, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1625033006890316e-05, |
|
"loss": 1.0858, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.15831854018781e-05, |
|
"loss": 1.1189, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.154130933951587e-05, |
|
"loss": 1.0825, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1499405572515059e-05, |
|
"loss": 1.0744, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1457474854085095e-05, |
|
"loss": 1.0701, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1415517937919846e-05, |
|
"loss": 1.0705, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1373535578184083e-05, |
|
"loss": 1.0532, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1331528529499909e-05, |
|
"loss": 1.1365, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1289497546933212e-05, |
|
"loss": 1.0797, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.124744338598008e-05, |
|
"loss": 1.0715, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1205366802553231e-05, |
|
"loss": 1.1339, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1163268552968422e-05, |
|
"loss": 1.0962, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.112114939393085e-05, |
|
"loss": 1.075, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1079010082521557e-05, |
|
"loss": 1.056, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1036851376183812e-05, |
|
"loss": 1.05, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0994674032709514e-05, |
|
"loss": 1.0404, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.095247881022555e-05, |
|
"loss": 1.0539, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.091026646718018e-05, |
|
"loss": 1.0554, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0868037762329405e-05, |
|
"loss": 1.0717, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0825793454723325e-05, |
|
"loss": 1.0589, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0783534303692493e-05, |
|
"loss": 0.7134, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0741261068834266e-05, |
|
"loss": 0.6371, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0698974509999159e-05, |
|
"loss": 0.6415, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0656675387277183e-05, |
|
"loss": 0.6547, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0614364460984178e-05, |
|
"loss": 0.6518, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.057204249164815e-05, |
|
"loss": 0.672, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.0529710239995606e-05, |
|
"loss": 0.669, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.0487368466937866e-05, |
|
"loss": 0.6394, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.0445017933557404e-05, |
|
"loss": 0.6664, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.0402659401094154e-05, |
|
"loss": 0.6451, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.036029363093183e-05, |
|
"loss": 0.6917, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.0317921384584245e-05, |
|
"loss": 0.6579, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.0275543423681622e-05, |
|
"loss": 0.6505, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.0233160509956893e-05, |
|
"loss": 0.636, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.0190773405232024e-05, |
|
"loss": 0.6344, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.014838287140431e-05, |
|
"loss": 0.6315, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.010598967043268e-05, |
|
"loss": 0.6549, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0063594564324014e-05, |
|
"loss": 0.6585, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0021198315119426e-05, |
|
"loss": 0.6358, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.97880168488058e-06, |
|
"loss": 0.6627, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.936405435675991e-06, |
|
"loss": 0.6451, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.894010329567322e-06, |
|
"loss": 0.6304, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.851617128595694e-06, |
|
"loss": 0.6271, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.809226594767979e-06, |
|
"loss": 0.6649, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.766839490043108e-06, |
|
"loss": 0.6811, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.724456576318383e-06, |
|
"loss": 0.6136, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.682078615415755e-06, |
|
"loss": 0.6876, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.63970636906817e-06, |
|
"loss": 0.6612, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.597340598905851e-06, |
|
"loss": 0.6553, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.554982066442601e-06, |
|
"loss": 0.6467, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.512631533062138e-06, |
|
"loss": 0.6705, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.470289760004398e-06, |
|
"loss": 0.6552, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.427957508351852e-06, |
|
"loss": 0.6369, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.385635539015824e-06, |
|
"loss": 0.6447, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.343324612722819e-06, |
|
"loss": 0.6566, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.301025490000843e-06, |
|
"loss": 0.6285, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.25873893116574e-06, |
|
"loss": 0.6479, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.216465696307513e-06, |
|
"loss": 0.6463, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.174206545276678e-06, |
|
"loss": 0.6277, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.131962237670599e-06, |
|
"loss": 0.6608, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.089733532819825e-06, |
|
"loss": 0.6512, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.047521189774456e-06, |
|
"loss": 0.6306, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.005325967290489e-06, |
|
"loss": 0.6236, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.963148623816191e-06, |
|
"loss": 0.653, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.920989917478446e-06, |
|
"loss": 0.656, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.878850606069152e-06, |
|
"loss": 0.6556, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.836731447031581e-06, |
|
"loss": 0.6574, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.79463319744677e-06, |
|
"loss": 0.6456, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.752556614019924e-06, |
|
"loss": 0.6353, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.710502453066791e-06, |
|
"loss": 0.6369, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.668471470500094e-06, |
|
"loss": 0.6316, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.626464421815919e-06, |
|
"loss": 0.6415, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.584482062080154e-06, |
|
"loss": 0.6522, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.542525145914907e-06, |
|
"loss": 0.6476, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.500594427484946e-06, |
|
"loss": 0.625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.458690660484134e-06, |
|
"loss": 0.6545, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.416814598121901e-06, |
|
"loss": 0.6584, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.374966993109689e-06, |
|
"loss": 0.6388, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.333148597647414e-06, |
|
"loss": 0.6499, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.291360163409978e-06, |
|
"loss": 0.6556, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.249602441533727e-06, |
|
"loss": 0.6359, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.207876182602959e-06, |
|
"loss": 0.612, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.16618213663644e-06, |
|
"loss": 0.6158, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.12452105307391e-06, |
|
"loss": 0.6362, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.082893680762619e-06, |
|
"loss": 0.6402, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.041300767943867e-06, |
|
"loss": 0.6629, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.999743062239557e-06, |
|
"loss": 0.6136, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.958221310638749e-06, |
|
"loss": 0.6517, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.916736259484239e-06, |
|
"loss": 0.6635, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.875288654459144e-06, |
|
"loss": 0.6405, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.833879240573487e-06, |
|
"loss": 0.6242, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.792508762150833e-06, |
|
"loss": 0.6642, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.751177962814867e-06, |
|
"loss": 0.6297, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.709887585476075e-06, |
|
"loss": 0.6458, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.668638372318359e-06, |
|
"loss": 0.6413, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.627431064785705e-06, |
|
"loss": 0.6093, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.5862664035688604e-06, |
|
"loss": 0.6088, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.545145128592009e-06, |
|
"loss": 0.628, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.504067978999484e-06, |
|
"loss": 0.6382, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.463035693142473e-06, |
|
"loss": 0.6458, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.422049008565757e-06, |
|
"loss": 0.638, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.38110866199443e-06, |
|
"loss": 0.6704, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.340215389320686e-06, |
|
"loss": 0.6049, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.299369925590575e-06, |
|
"loss": 0.6124, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.258573004990789e-06, |
|
"loss": 0.6417, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.217825360835475e-06, |
|
"loss": 0.6359, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.1771277255530456e-06, |
|
"loss": 0.6347, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.136480830673018e-06, |
|
"loss": 0.6138, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.095885406812866e-06, |
|
"loss": 0.6218, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.05534218366488e-06, |
|
"loss": 0.614, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.014851889983058e-06, |
|
"loss": 0.6245, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.974415253570003e-06, |
|
"loss": 0.6275, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.934033001263847e-06, |
|
"loss": 0.6227, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.893705858925179e-06, |
|
"loss": 0.6287, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.853434551424001e-06, |
|
"loss": 0.6074, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.813219802626698e-06, |
|
"loss": 0.6325, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.773062335383024e-06, |
|
"loss": 0.6666, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.73296287151312e-06, |
|
"loss": 0.6135, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.692922131794517e-06, |
|
"loss": 0.5956, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.652940835949208e-06, |
|
"loss": 0.5967, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.6130197026306945e-06, |
|
"loss": 0.6195, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.573159449411071e-06, |
|
"loss": 0.6283, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.533360792768122e-06, |
|
"loss": 0.6356, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.4936244480724575e-06, |
|
"loss": 0.595, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.453951129574644e-06, |
|
"loss": 0.6115, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.4143415503923676e-06, |
|
"loss": 0.6164, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.374796422497622e-06, |
|
"loss": 0.628, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.335316456703891e-06, |
|
"loss": 0.6135, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.295902362653401e-06, |
|
"loss": 0.6223, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.256554848804343e-06, |
|
"loss": 0.6014, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.2172746224181524e-06, |
|
"loss": 0.6463, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.178062389546784e-06, |
|
"loss": 0.6153, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.138918855020028e-06, |
|
"loss": 0.6424, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.099844722432844e-06, |
|
"loss": 0.6166, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.060840694132701e-06, |
|
"loss": 0.6097, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.021907471206971e-06, |
|
"loss": 0.6262, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.983045753470308e-06, |
|
"loss": 0.6141, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.944256239452085e-06, |
|
"loss": 0.6047, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.905539626383831e-06, |
|
"loss": 0.6547, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.866896610186701e-06, |
|
"loss": 0.5995, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.82832788545896e-06, |
|
"loss": 0.6131, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.789834145463506e-06, |
|
"loss": 0.6484, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.7514160821154085e-06, |
|
"loss": 0.6497, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.713074385969457e-06, |
|
"loss": 0.6031, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.67480974620778e-06, |
|
"loss": 0.6294, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.63662285062742e-06, |
|
"loss": 0.6053, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.598514385627997e-06, |
|
"loss": 0.622, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.56048503619935e-06, |
|
"loss": 0.6325, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.522535485909258e-06, |
|
"loss": 0.6111, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.484666416891109e-06, |
|
"loss": 0.6061, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.446878509831668e-06, |
|
"loss": 0.5885, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.409172443958844e-06, |
|
"loss": 0.6158, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.371548897029457e-06, |
|
"loss": 0.6153, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.334008545317082e-06, |
|
"loss": 0.5985, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.2965520635998676e-06, |
|
"loss": 0.6004, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.259180125148442e-06, |
|
"loss": 0.6108, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.22189340171377e-06, |
|
"loss": 0.6343, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.184692563515104e-06, |
|
"loss": 0.6376, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.147578279227943e-06, |
|
"loss": 0.5997, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.110551215971981e-06, |
|
"loss": 0.6192, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.073612039299157e-06, |
|
"loss": 0.6276, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.036761413181659e-06, |
|
"loss": 0.6493, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.5993, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.963328460531127e-06, |
|
"loss": 0.5804, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.926747453936509e-06, |
|
"loss": 0.6444, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.890257637750332e-06, |
|
"loss": 0.6287, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.853859667867641e-06, |
|
"loss": 0.5872, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.817554198532582e-06, |
|
"loss": 0.6015, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.781341882326615e-06, |
|
"loss": 0.5941, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.745223370156797e-06, |
|
"loss": 0.6406, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.709199311244098e-06, |
|
"loss": 0.6334, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.673270353111687e-06, |
|
"loss": 0.647, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.63743714157335e-06, |
|
"loss": 0.6168, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.6017003207218294e-06, |
|
"loss": 0.595, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.566060532917288e-06, |
|
"loss": 0.6195, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.530518418775734e-06, |
|
"loss": 0.6084, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.4950746171575135e-06, |
|
"loss": 0.5976, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.459729765155842e-06, |
|
"loss": 0.6086, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.424484498085335e-06, |
|
"loss": 0.5855, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.389339449470592e-06, |
|
"loss": 0.6503, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.354295251034811e-06, |
|
"loss": 0.6035, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.319352532688444e-06, |
|
"loss": 0.5885, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.284511922517853e-06, |
|
"loss": 0.5765, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.249774046774034e-06, |
|
"loss": 0.6631, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.2151395298613675e-06, |
|
"loss": 0.6236, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.180608994326371e-06, |
|
"loss": 0.6516, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.1461830608465385e-06, |
|
"loss": 0.6293, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.111862348219158e-06, |
|
"loss": 0.6361, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.077647473350201e-06, |
|
"loss": 0.6274, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.04353905124324e-06, |
|
"loss": 0.6109, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.009537694988372e-06, |
|
"loss": 0.5974, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.975644015751234e-06, |
|
"loss": 0.5849, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.941858622761975e-06, |
|
"loss": 0.6025, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.908182123304344e-06, |
|
"loss": 0.6323, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.8746151227047455e-06, |
|
"loss": 0.58, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.84115822432137e-06, |
|
"loss": 0.631, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.807812029533362e-06, |
|
"loss": 0.6424, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.7745771377299758e-06, |
|
"loss": 0.6061, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.7414541462998446e-06, |
|
"loss": 0.6172, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.708443650620206e-06, |
|
"loss": 0.6402, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.6755462440462288e-06, |
|
"loss": 0.5702, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.6427625179003223e-06, |
|
"loss": 0.6181, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.6100930614615204e-06, |
|
"loss": 0.6158, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.5775384619549e-06, |
|
"loss": 0.586, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.5450993045409997e-06, |
|
"loss": 0.6334, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.5127761723053313e-06, |
|
"loss": 0.5789, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.4805696462478634e-06, |
|
"loss": 0.6111, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.448480305272619e-06, |
|
"loss": 0.5845, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.41650872617724e-06, |
|
"loss": 0.6199, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.384655483642624e-06, |
|
"loss": 0.6317, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.352921150222612e-06, |
|
"loss": 0.6299, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.321306296333673e-06, |
|
"loss": 0.5978, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.2898114902446708e-06, |
|
"loss": 0.6216, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.2584372980666344e-06, |
|
"loss": 0.619, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.2271842837425917e-06, |
|
"loss": 0.6271, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.1960530090374277e-06, |
|
"loss": 0.5788, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.165044033527789e-06, |
|
"loss": 0.5523, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.134157914592032e-06, |
|
"loss": 0.5863, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.1033952074001882e-06, |
|
"loss": 0.6359, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.0727564649040066e-06, |
|
"loss": 0.6057, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.042242237826991e-06, |
|
"loss": 0.5813, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.011853074654515e-06, |
|
"loss": 0.6351, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.981589521623973e-06, |
|
"loss": 0.5861, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.951452122714926e-06, |
|
"loss": 0.608, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.9214414196393702e-06, |
|
"loss": 0.6156, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.8915579518319626e-06, |
|
"loss": 0.6082, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.861802256440348e-06, |
|
"loss": 0.6268, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.8321748683154893e-06, |
|
"loss": 0.5921, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.8026763200020557e-06, |
|
"loss": 0.5828, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.773307141728867e-06, |
|
"loss": 0.6255, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.744067861399333e-06, |
|
"loss": 0.5878, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.714959004582003e-06, |
|
"loss": 0.6113, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6859810945010687e-06, |
|
"loss": 0.6255, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6571346520270147e-06, |
|
"loss": 0.5998, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.628420195667214e-06, |
|
"loss": 0.607, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5998382415566258e-06, |
|
"loss": 0.5773, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.5713893034485216e-06, |
|
"loss": 0.5944, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.5430738927052346e-06, |
|
"loss": 0.6249, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.514892518288988e-06, |
|
"loss": 0.6379, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4868456867527315e-06, |
|
"loss": 0.5833, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4589339022310386e-06, |
|
"loss": 0.5841, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.431157666431052e-06, |
|
"loss": 0.6359, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.403517478623456e-06, |
|
"loss": 0.575, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3760138356335172e-06, |
|
"loss": 0.6207, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.348647231832131e-06, |
|
"loss": 0.6009, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3214181591269603e-06, |
|
"loss": 0.6267, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.2943271069535754e-06, |
|
"loss": 0.6262, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.267374562266662e-06, |
|
"loss": 0.5881, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.240561009531281e-06, |
|
"loss": 0.5842, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.2138869307141266e-06, |
|
"loss": 0.5602, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.1873528052749094e-06, |
|
"loss": 0.596, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.1609591101576945e-06, |
|
"loss": 0.5942, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.1347063197823648e-06, |
|
"loss": 0.5955, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.1085949060360654e-06, |
|
"loss": 0.6148, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.0826253382647334e-06, |
|
"loss": 0.6484, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.056798083264667e-06, |
|
"loss": 0.5729, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.0311136052741274e-06, |
|
"loss": 0.6088, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.0055723659649907e-06, |
|
"loss": 0.6047, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.9801748244344587e-06, |
|
"loss": 0.5812, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.9549214371968008e-06, |
|
"loss": 0.5976, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.9298126581751542e-06, |
|
"loss": 0.6362, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.9048489386933545e-06, |
|
"loss": 0.5954, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.8800307274678364e-06, |
|
"loss": 0.6468, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.8553584705995564e-06, |
|
"loss": 0.6041, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.8308326115659757e-06, |
|
"loss": 0.5584, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.8064535912131032e-06, |
|
"loss": 0.6246, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.7822218477475496e-06, |
|
"loss": 0.6056, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.7581378167286655e-06, |
|
"loss": 0.6081, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.7342019310607062e-06, |
|
"loss": 0.5951, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.7104146209850591e-06, |
|
"loss": 0.5586, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.6867763140724969e-06, |
|
"loss": 0.6417, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.6632874352154982e-06, |
|
"loss": 0.5777, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.6399484066206183e-06, |
|
"loss": 0.5733, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.6167596478008817e-06, |
|
"loss": 0.6019, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.5937215755682667e-06, |
|
"loss": 0.6216, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.5708346040261812e-06, |
|
"loss": 0.6211, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.5480991445620541e-06, |
|
"loss": 0.6229, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.5255156058399124e-06, |
|
"loss": 0.6139, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.5030843937930485e-06, |
|
"loss": 0.6054, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.4808059116167306e-06, |
|
"loss": 0.5948, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.4586805597609333e-06, |
|
"loss": 0.5594, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.4367087359231668e-06, |
|
"loss": 0.6174, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.4148908350413048e-06, |
|
"loss": 0.5938, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3932272492864984e-06, |
|
"loss": 0.5763, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3717183680561253e-06, |
|
"loss": 0.5997, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3503645779667852e-06, |
|
"loss": 0.564, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3291662628473634e-06, |
|
"loss": 0.5961, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.308123803732111e-06, |
|
"loss": 0.627, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2872375788538171e-06, |
|
"loss": 0.6356, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.266507963636997e-06, |
|
"loss": 0.6069, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2459353306911438e-06, |
|
"loss": 0.6107, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2255200498040432e-06, |
|
"loss": 0.606, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2052624879351105e-06, |
|
"loss": 0.583, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.1851630092088051e-06, |
|
"loss": 0.5755, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.1652219749080817e-06, |
|
"loss": 0.5951, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"loss": 0.5939, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.12581667046878e-06, |
|
"loss": 0.6146, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.1063531086304003e-06, |
|
"loss": 0.5748, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.0870494078052796e-06, |
|
"loss": 0.5867, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.067905914972468e-06, |
|
"loss": 0.5761, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.0489229742313223e-06, |
|
"loss": 0.592, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.0301009267953145e-06, |
|
"loss": 0.5793, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.0114401109859019e-06, |
|
"loss": 0.6008, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.929408622264448e-07, |
|
"loss": 0.5669, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.746035130361741e-07, |
|
"loss": 0.5875, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.564283930242258e-07, |
|
"loss": 0.6218, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.384158288836987e-07, |
|
"loss": 0.5938, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.205661443857994e-07, |
|
"loss": 0.5622, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.028796603740097e-07, |
|
"loss": 0.6239, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.853566947583259e-07, |
|
"loss": 0.5665, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.67997562509546e-07, |
|
"loss": 0.6079, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.508025756535987e-07, |
|
"loss": 0.5813, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.337720432659513e-07, |
|
"loss": 0.6135, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.169062714660347e-07, |
|
"loss": 0.5598, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.002055634117578e-07, |
|
"loss": 0.5624, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.836702192940493e-07, |
|
"loss": 0.6002, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.673005363314578e-07, |
|
"loss": 0.5882, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.510968087648262e-07, |
|
"loss": 0.6119, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.350593278519824e-07, |
|
"loss": 0.5976, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.19188381862519e-07, |
|
"loss": 0.5798, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.034842560726008e-07, |
|
"loss": 0.5791, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.879472327598502e-07, |
|
"loss": 0.5823, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.725775911982602e-07, |
|
"loss": 0.5993, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.573756076531779e-07, |
|
"loss": 0.5688, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.423415553763479e-07, |
|
"loss": 0.5925, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.274757046009871e-07, |
|
"loss": 0.6044, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.127783225369377e-07, |
|
"loss": 0.6185, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.982496733658582e-07, |
|
"loss": 0.5809, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.83890018236476e-07, |
|
"loss": 0.5961, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.696996152598966e-07, |
|
"loss": 0.6069, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.556787195049573e-07, |
|
"loss": 0.5802, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.418275829936537e-07, |
|
"loss": 0.6161, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.281464546965953e-07, |
|
"loss": 0.5966, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.146355805285452e-07, |
|
"loss": 0.6, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.012952033439844e-07, |
|
"loss": 0.578, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.881255629327608e-07, |
|
"loss": 0.5885, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.7512689601576843e-07, |
|
"loss": 0.5753, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.6229943624069963e-07, |
|
"loss": 0.6083, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.4964341417784165e-07, |
|
"loss": 0.6019, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.3715905731593233e-07, |
|
"loss": 0.6016, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.248465900580734e-07, |
|
"loss": 0.5873, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.127062337176935e-07, |
|
"loss": 0.5647, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.0073820651457043e-07, |
|
"loss": 0.5816, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.889427235709153e-07, |
|
"loss": 0.5444, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.773199969074959e-07, |
|
"loss": 0.6032, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.658702354398325e-07, |
|
"loss": 0.5779, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.5459364497443696e-07, |
|
"loss": 0.5919, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.4349042820512325e-07, |
|
"loss": 0.6086, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.325607847093537e-07, |
|
"loss": 0.5862, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.2180491094465414e-07, |
|
"loss": 0.6099, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.112230002450889e-07, |
|
"loss": 0.5925, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.0081524281777687e-07, |
|
"loss": 0.6217, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.905818257394799e-07, |
|
"loss": 0.5707, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.805229329532344e-07, |
|
"loss": 0.6038, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.706387452650494e-07, |
|
"loss": 0.5877, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.609294403406537e-07, |
|
"loss": 0.5912, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.513951927023017e-07, |
|
"loss": 0.6043, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.420361737256438e-07, |
|
"loss": 0.5947, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.3285255163663535e-07, |
|
"loss": 0.5927, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.2384449150851695e-07, |
|
"loss": 0.6107, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.1501215525885245e-07, |
|
"loss": 0.5427, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.063557016466111e-07, |
|
"loss": 0.5792, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.978752862693212e-07, |
|
"loss": 0.5775, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8957106156026084e-07, |
|
"loss": 0.5554, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8144317678573497e-07, |
|
"loss": 0.5464, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.7349177804237837e-07, |
|
"loss": 0.5882, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6571700825453674e-07, |
|
"loss": 0.5718, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.5811900717169537e-07, |
|
"loss": 0.5631, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.506979113659679e-07, |
|
"loss": 0.6137, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4345385422964043e-07, |
|
"loss": 0.5804, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.3638696597277678e-07, |
|
"loss": 0.5713, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2949737362087156e-07, |
|
"loss": 0.5563, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.227852010125752e-07, |
|
"loss": 0.5885, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1625056879746133e-07, |
|
"loss": 0.5563, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0989359443386305e-07, |
|
"loss": 0.5924, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0371439218675671e-07, |
|
"loss": 0.613, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.771307312571254e-08, |
|
"loss": 0.5946, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.188974512289617e-08, |
|
"loss": 0.5728, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.624451285112689e-08, |
|
"loss": 0.5644, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.077747778200474e-08, |
|
"loss": 0.6211, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.54887381840752e-08, |
|
"loss": 0.5863, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.037838912107298e-08, |
|
"loss": 0.6035, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.544652245020433e-08, |
|
"loss": 0.6091, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.069322682050516e-08, |
|
"loss": 0.5979, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.611858767124001e-08, |
|
"loss": 0.6134, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.1722687230369995e-08, |
|
"loss": 0.5743, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.7505604513072845e-08, |
|
"loss": 0.5767, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.346741532032628e-08, |
|
"loss": 0.5905, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.96081922375402e-08, |
|
"loss": 0.576, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.592800463325663e-08, |
|
"loss": 0.5968, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.242691865790071e-08, |
|
"loss": 0.5451, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.9104997242590528e-08, |
|
"loss": 0.5999, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.5962300098008042e-08, |
|
"loss": 0.5615, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.2998883713326592e-08, |
|
"loss": 0.5677, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.0214801355192826e-08, |
|
"loss": 0.6002, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.761010306676969e-08, |
|
"loss": 0.6138, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.518483566683826e-08, |
|
"loss": 0.6095, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.2939042748955078e-08, |
|
"loss": 0.5596, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0872764680671666e-08, |
|
"loss": 0.6428, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.986038602802894e-09, |
|
"loss": 0.5899, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.278898428764169e-09, |
|
"loss": 0.5952, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.751374843961932e-09, |
|
"loss": 0.5526, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.403495305237426e-09, |
|
"loss": 0.5829, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.2352840403804264e-09, |
|
"loss": 0.5848, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.246762047685147e-09, |
|
"loss": 0.5983, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.437947095582759e-09, |
|
"loss": 0.5923, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.088537223116533e-10, |
|
"loss": 0.5919, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.594932356654202e-10, |
|
"loss": 0.6125, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 8.987371278079693e-11, |
|
"loss": 0.5734, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6057, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1560, |
|
"total_flos": 3.6540609644891996e+18, |
|
"train_loss": 0.8723082382709552, |
|
"train_runtime": 12247.4613, |
|
"train_samples_per_second": 8.152, |
|
"train_steps_per_second": 0.127 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 1560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 780, |
|
"total_flos": 3.6540609644891996e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|